Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging

Bugfixes and Daniel Berrange's crypto library.

# gpg: Signature made Wed Jul  8 12:12:29 2015 BST using RSA key ID 78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream:
  ossaudio: fix memory leak
  ui: convert VNC to use generic cipher API
  block: convert qcow/qcow2 to use generic cipher API
  ui: convert VNC websockets to use crypto APIs
  block: convert quorum blockdrv to use crypto APIs
  crypto: add a nettle cipher implementation
  crypto: add a gcrypt cipher implementation
  crypto: introduce generic cipher API & built-in implementation
  crypto: move built-in D3DES implementation into crypto/
  crypto: move built-in AES implementation into crypto/
  crypto: introduce new module for computing hash digests
  vl: move rom_load_all after machine init done

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/block.c b/block.c
index 7e130cc..5e80336 100644
--- a/block.c
+++ b/block.c
@@ -1271,7 +1271,7 @@
     QemuOpts *opts = NULL;
     QDict *snapshot_options;
     BlockDriverState *bs_snapshot;
-    Error *local_err;
+    Error *local_err = NULL;
     int ret;
 
     /* if snapshot, we create a temporary backing file and open it
@@ -1841,9 +1841,9 @@
     if (bs->job) {
         block_job_cancel_sync(bs->job);
     }
-    bdrv_drain_all(); /* complete I/O */
+    bdrv_drain(bs); /* complete I/O */
     bdrv_flush(bs);
-    bdrv_drain_all(); /* in case flush left pending I/O */
+    bdrv_drain(bs); /* in case flush left pending I/O */
     notifier_list_notify(&bs->close_notifiers, bs);
 
     if (bs->drv) {
@@ -3906,7 +3906,7 @@
 
 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
 {
-    bdrv_drain_all(); /* ensure there are no in-flight requests */
+    bdrv_drain(bs); /* ensure there are no in-flight requests */
 
     bdrv_detach_aio_context(bs);
 
diff --git a/block/io.c b/block/io.c
index 305e0d9..d4bc83b 100644
--- a/block/io.c
+++ b/block/io.c
@@ -236,12 +236,12 @@
 /*
  * Wait for pending requests to complete on a single BlockDriverState subtree
  *
- * See the warning in bdrv_drain_all().  This function can only be called if
- * you are sure nothing can generate I/O because you have op blockers
- * installed.
- *
  * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
  * AioContext.
+ *
+ * Only this BlockDriverState's AioContext is run, so in-flight requests must
+ * not depend on events in other AioContexts.  In that case, use
+ * bdrv_drain_all() instead.
  */
 void bdrv_drain(BlockDriverState *bs)
 {
@@ -260,12 +260,6 @@
  *
  * This function does not flush data to disk, use bdrv_flush_all() for that
  * after calling this function.
- *
- * Note that completion of an asynchronous I/O operation can trigger any
- * number of other I/O operations on other devices---for example a coroutine
- * can be arbitrarily complex and a constant flow of I/O can come until the
- * coroutine is complete.  Because of this, it is not possible to have a
- * function to drain a single device's I/O queue.
  */
 void bdrv_drain_all(void)
 {
@@ -288,6 +282,12 @@
         }
     }
 
+    /* Note that completion of an asynchronous I/O operation can trigger any
+     * number of other I/O operations on other devices---for example a
+     * coroutine can submit an I/O request to another device in response to
+     * request completion.  Therefore we must keep looping until there was no
+     * more activity rather than simply draining each device independently.
+     */
     while (busy) {
         busy = false;
 
diff --git a/block/mirror.c b/block/mirror.c
index 8888cea..d409337 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -708,6 +708,8 @@
 
     s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
     if (!s->dirty_bitmap) {
+        g_free(s->replaces);
+        block_job_release(bs);
         return;
     }
     bdrv_set_enable_write_cache(s->target, true);
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index ed92a09..53b8afc 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -281,9 +281,6 @@
     i = min_lru_index;
     trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
                                         c == s->l2_table_cache, i);
-    if (i < 0) {
-        return i;
-    }
 
     ret = qcow2_cache_entry_flush(bs, c, i);
     if (ret < 0) {
diff --git a/block/raw-posix.c b/block/raw-posix.c
index cbe6574..855febe 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -2430,7 +2430,8 @@
     struct stat st;
 
     if (strstart(filename, "/dev/fd", NULL) &&
-        !strstart(filename, "/dev/fdset/", NULL)) {
+        !strstart(filename, "/dev/fdset/", NULL) &&
+        !strstart(filename, "/dev/fd/", NULL)) {
         prio = 50;
     }
 
diff --git a/block/snapshot.c b/block/snapshot.c
index 19395ae..49e143e 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -239,7 +239,7 @@
     }
 
     /* drain all pending i/o before deleting snapshot */
-    bdrv_drain_all();
+    bdrv_drain(bs);
 
     if (drv->bdrv_snapshot_delete) {
         return drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
diff --git a/blockjob.c b/blockjob.c
index ec46fad..62bb906 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -66,10 +66,7 @@
 
         block_job_set_speed(job, speed, &local_err);
         if (local_err) {
-            bs->job = NULL;
-            bdrv_op_unblock_all(bs, job->blocker);
-            error_free(job->blocker);
-            g_free(job);
+            block_job_release(bs);
             error_propagate(errp, local_err);
             return NULL;
         }
@@ -77,16 +74,23 @@
     return job;
 }
 
+void block_job_release(BlockDriverState *bs)
+{
+    BlockJob *job = bs->job;
+
+    bs->job = NULL;
+    bdrv_op_unblock_all(bs, job->blocker);
+    error_free(job->blocker);
+    g_free(job);
+}
+
 void block_job_completed(BlockJob *job, int ret)
 {
     BlockDriverState *bs = job->bs;
 
     assert(bs->job == job);
     job->cb(job->opaque, ret);
-    bs->job = NULL;
-    bdrv_op_unblock_all(bs, job->blocker);
-    error_free(job->blocker);
-    g_free(job);
+    block_job_release(bs);
 }
 
 void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
diff --git a/configure b/configure
index c878b3c..33b9455 100755
--- a/configure
+++ b/configure
@@ -314,6 +314,7 @@
 bzip2=""
 guest_agent=""
 guest_agent_with_vss="no"
+guest_agent_ntddscsi="no"
 guest_agent_msi=""
 vss_win32_sdk=""
 win_sdk="no"
@@ -732,7 +733,7 @@
   sysconfdir="\${prefix}"
   local_statedir=
   confsuffix=""
-  libs_qga="-lws2_32 -lwinmm -lpowrprof $libs_qga"
+  libs_qga="-lws2_32 -lwinmm -lpowrprof -liphlpapi $libs_qga"
 fi
 
 werror=""
@@ -3862,6 +3863,26 @@
 fi
 
 ##########################################
+# check if mingw environment provides a recent ntddscsi.h
+if test "$mingw32" = "yes" -a "$guest_agent" != "no"; then
+  cat > $TMPC << EOF
+#include <windows.h>
+#include <ntddscsi.h>
+int main(void) {
+#if !defined(IOCTL_SCSI_GET_ADDRESS)
+#error Missing required ioctl definitions
+#endif
+  SCSI_ADDRESS addr = { .Lun = 0, .TargetId = 0, .PathId = 0 };
+  return addr.Lun;
+}
+EOF
+  if compile_prog "" "" ; then
+    guest_agent_ntddscsi=yes
+    libs_qga="-lsetupapi $libs_qga"
+  fi
+fi
+
+##########################################
 # Guest agent Window MSI  package
 
 if test "$guest_agent" != yes; then
@@ -4534,6 +4555,7 @@
 echo "libnfs support    $libnfs"
 echo "build guest agent $guest_agent"
 echo "QGA VSS support   $guest_agent_with_vss"
+echo "QGA w32 disk info $guest_agent_ntddscsi"
 echo "seccomp support   $seccomp"
 echo "coroutine backend $coroutine"
 echo "coroutine pool    $coroutine_pool"
@@ -4610,6 +4632,9 @@
     echo "CONFIG_QGA_VSS=y" >> $config_host_mak
     echo "WIN_SDK=\"$win_sdk\"" >> $config_host_mak
   fi
+  if test "$guest_agent_ntddscsi" = "yes" ; then
+    echo "CONFIG_QGA_NTDDDISK=y" >> $config_host_mak
+  fi
   if test "$guest_agent_msi" != "no"; then
     echo "QEMU_GA_MSI_ENABLED=yes" >> $config_host_mak  
     echo "QEMU_GA_MSI_MINGW_DLL_PATH=${QEMU_GA_MSI_MINGW_DLL_PATH}" >> $config_host_mak
diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index 91d602c..48b5762 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -16,6 +16,7 @@
 CONFIG_FDC=y
 CONFIG_ACPI=y
 CONFIG_ACPI_X86=y
+CONFIG_ACPI_X86_ICH=y
 CONFIG_ACPI_MEMORY_HOTPLUG=y
 CONFIG_ACPI_CPU_HOTPLUG=y
 CONFIG_APM=y
diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak
index 62575eb..4962ed7 100644
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -17,6 +17,7 @@
 CONFIG_FDC=y
 CONFIG_ACPI=y
 CONFIG_ACPI_X86=y
+CONFIG_ACPI_X86_ICH=y
 CONFIG_ACPI_MEMORY_HOTPLUG=y
 CONFIG_ACPI_CPU_HOTPLUG=y
 CONFIG_APM=y
diff --git a/docs/qmp/qmp-events.txt b/docs/qmp/qmp-events.txt
index 4c13d48..d92cc48 100644
--- a/docs/qmp/qmp-events.txt
+++ b/docs/qmp/qmp-events.txt
@@ -473,6 +473,20 @@
 { "timestamp": {"seconds": 1290688046, "microseconds": 417172},
   "event": "SPICE_MIGRATE_COMPLETED" }
 
+MIGRATION
+---------
+
+Emitted when a migration event happens
+
+Data: None.
+
+ - "status": migration status
+     See MigrationStatus in ~/qapi-schema.json for possible values
+
+Example:
+
+{"timestamp": {"seconds": 1432121972, "microseconds": 744001},
+ "event": "MIGRATION", "data": {"status": "completed"}}
 
 STOP
 ----
diff --git a/docs/specs/ppc-spapr-hotplug.txt b/docs/specs/ppc-spapr-hotplug.txt
index d35771c..46e0719 100644
--- a/docs/specs/ppc-spapr-hotplug.txt
+++ b/docs/specs/ppc-spapr-hotplug.txt
@@ -284,4 +284,22 @@
     } drc;
 } QEMU_PACKED;
 
+== ibm,lrdr-capacity ==
+
+ibm,lrdr-capacity is a property in the /rtas device tree node that identifies
+the dynamic reconfiguration capabilities of the guest. It consists of a triple
+consisting of <phys>, <size> and <maxcpus>.
+
+  <phys>, encoded in BE format represents the maximum address in bytes and
+  hence the maximum memory that can be allocated to the guest.
+
+  <size>, encoded in BE format represents the size increments in which
+  memory can be hot-plugged to the guest.
+
+  <maxcpus>, a BE-encoded integer, represents the maximum number of
+  processors that the guest can have.
+
+pseries guests use this property to note the maximum allowed CPUs for the
+guest.
+
 [1] http://thread.gmane.org/gmane.linux.ports.ppc.embedded/75350/focus=106867
diff --git a/docs/specs/rocker.txt b/docs/specs/rocker.txt
index 0af5c61..1c74351 100644
--- a/docs/specs/rocker.txt
+++ b/docs/specs/rocker.txt
@@ -637,6 +637,7 @@
 				  (1 << 5): TCP packet
 				  (1 << 6): UDP packet
 				  (1 << 7): TCP/UDP csum good
+				  (1 << 8): Offload forward
 	RX_CSUM		2	IP calculated checksum:
 				  IPv4: IP payload csum
 				  IPv6: header and payload csum
@@ -645,6 +646,9 @@
 	RX_FRAG_MAX_LEN	2	Packet maximum fragment length
 	RX_FRAG_LEN	2	Actual packet fragment length after receive
 
+Offload forward RX_FLAG indicates the device has already forwarded the packet
+so the host CPU should not also forward the packet.
+
 Possible status return codes in descriptor on completion are:
 
 	DESC_COMP_ERR	reason
diff --git a/exec.c b/exec.c
index 251dc79..b7f7f98 100644
--- a/exec.c
+++ b/exec.c
@@ -1414,6 +1414,11 @@
         }
     }
 
+    new_ram_size = MAX(old_ram_size,
+              (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
+    if (new_ram_size > old_ram_size) {
+        migration_bitmap_extend(old_ram_size, new_ram_size);
+    }
     /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
      * QLIST (which has an RCU-friendly variant) does not have insertion at
      * tail, so save the last element in last_block.
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index 29d46d8..7d3230c 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -1,4 +1,5 @@
-common-obj-$(CONFIG_ACPI_X86) += core.o piix4.o ich9.o pcihp.o
+common-obj-$(CONFIG_ACPI_X86) += core.o piix4.o pcihp.o
+common-obj-$(CONFIG_ACPI_X86_ICH) += ich9.o tco.o
 common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu_hotplug.o
 common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o
 common-obj-$(CONFIG_ACPI) += acpi_interface.o
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index f4dc7a8..5fb7a87 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -30,6 +30,7 @@
 #include "qemu/timer.h"
 #include "sysemu/sysemu.h"
 #include "hw/acpi/acpi.h"
+#include "hw/acpi/tco.h"
 #include "sysemu/kvm.h"
 #include "exec/address-spaces.h"
 
@@ -92,8 +93,16 @@
                             unsigned width)
 {
     ICH9LPCPMRegs *pm = opaque;
+    TCOIORegs *tr = &pm->tco_regs;
+    uint64_t tco_en;
+
     switch (addr) {
     case 0:
+        tco_en = pm->smi_en & ICH9_PMIO_SMI_EN_TCO_EN;
+        /* once TCO_LOCK bit is set, TCO_EN bit cannot be overwritten */
+        if (tr->tco.cnt1 & TCO_LOCK) {
+            val = (val & ~ICH9_PMIO_SMI_EN_TCO_EN) | tco_en;
+        }
         pm->smi_en &= ~pm->smi_en_wmask;
         pm->smi_en |= (val & pm->smi_en_wmask);
         break;
@@ -159,6 +168,25 @@
     }
 };
 
+static bool vmstate_test_use_tco(void *opaque)
+{
+    ICH9LPCPMRegs *s = opaque;
+    return s->enable_tco;
+}
+
+static const VMStateDescription vmstate_tco_io_state = {
+    .name = "ich9_pm/tco",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .needed = vmstate_test_use_tco,
+    .fields      = (VMStateField[]) {
+        VMSTATE_STRUCT(tco_regs, ICH9LPCPMRegs, 1, vmstate_tco_io_sts,
+                       TCOIORegs),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 const VMStateDescription vmstate_ich9_pm = {
     .name = "ich9_pm",
     .version_id = 1,
@@ -179,6 +207,10 @@
     .subsections = (const VMStateDescription*[]) {
         &vmstate_memhp_state,
         NULL
+    },
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_tco_io_state,
+        NULL
     }
 };
 
@@ -209,7 +241,8 @@
     acpi_pm1_evt_power_down(&pm->acpi_regs);
 }
 
-void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, bool smm_enabled,
+void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm,
+                  bool smm_enabled, bool enable_tco,
                   qemu_irq sci_irq)
 {
     memory_region_init(&pm->io, OBJECT(lpc_pci), "ich9-pm", ICH9_PMIO_SIZE);
@@ -232,6 +265,12 @@
     memory_region_add_subregion(&pm->io, ICH9_PMIO_SMI_EN, &pm->io_smi);
 
     pm->smm_enabled = smm_enabled;
+
+    pm->enable_tco = enable_tco;
+    if (pm->enable_tco) {
+        acpi_pm_tco_init(&pm->tco_regs, &pm->io);
+    }
+
     pm->irq = sci_irq;
     qemu_register_reset(pm_reset, pm);
     pm->powerdown_notifier.notify = pm_powerdown_req;
@@ -352,6 +391,18 @@
     error_propagate(errp, local_err);
 }
 
+static bool ich9_pm_get_enable_tco(Object *obj, Error **errp)
+{
+    ICH9LPCState *s = ICH9_LPC_DEVICE(obj);
+    return s->pm.enable_tco;
+}
+
+static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp)
+{
+    ICH9LPCState *s = ICH9_LPC_DEVICE(obj);
+    s->pm.enable_tco = value;
+}
+
 void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp)
 {
     static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN;
@@ -383,6 +434,10 @@
                         ich9_pm_get_s4_val,
                         ich9_pm_set_s4_val,
                         NULL, pm, NULL);
+    object_property_add_bool(obj, ACPI_PM_PROP_TCO_ENABLED,
+                             ich9_pm_get_enable_tco,
+                             ich9_pm_set_enable_tco,
+                             NULL);
 }
 
 void ich9_pm_device_plug_cb(ICH9LPCPMRegs *pm, DeviceState *dev, Error **errp)
diff --git a/hw/acpi/tco.c b/hw/acpi/tco.c
new file mode 100644
index 0000000..7a026c2
--- /dev/null
+++ b/hw/acpi/tco.c
@@ -0,0 +1,264 @@
+/*
+ * QEMU ICH9 TCO emulation
+ *
+ * Copyright (c) 2015 Paulo Alcantara <pcacjr@zytor.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu-common.h"
+#include "sysemu/watchdog.h"
+#include "hw/i386/ich9.h"
+
+#include "hw/acpi/tco.h"
+
+//#define DEBUG
+
+#ifdef DEBUG
+#define TCO_DEBUG(fmt, ...)                                     \
+    do {                                                        \
+        fprintf(stderr, "%s "fmt, __func__, ## __VA_ARGS__);    \
+    } while (0)
+#else
+#define TCO_DEBUG(fmt, ...) do { } while (0)
+#endif
+
+enum {
+    TCO_RLD_DEFAULT         = 0x0000,
+    TCO_DAT_IN_DEFAULT      = 0x00,
+    TCO_DAT_OUT_DEFAULT     = 0x00,
+    TCO1_STS_DEFAULT        = 0x0000,
+    TCO2_STS_DEFAULT        = 0x0000,
+    TCO1_CNT_DEFAULT        = 0x0000,
+    TCO2_CNT_DEFAULT        = 0x0008,
+    TCO_MESSAGE1_DEFAULT    = 0x00,
+    TCO_MESSAGE2_DEFAULT    = 0x00,
+    TCO_WDCNT_DEFAULT       = 0x00,
+    TCO_TMR_DEFAULT         = 0x0004,
+    SW_IRQ_GEN_DEFAULT      = 0x03,
+};
+
+static inline void tco_timer_reload(TCOIORegs *tr)
+{
+    tr->expire_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+        ((int64_t)(tr->tco.tmr & TCO_TMR_MASK) * TCO_TICK_NSEC);
+    timer_mod(tr->tco_timer, tr->expire_time);
+}
+
+static inline void tco_timer_stop(TCOIORegs *tr)
+{
+    tr->expire_time = -1;
+}
+
+static void tco_timer_expired(void *opaque)
+{
+    TCOIORegs *tr = opaque;
+    ICH9LPCPMRegs *pm = container_of(tr, ICH9LPCPMRegs, tco_regs);
+    ICH9LPCState *lpc = container_of(pm, ICH9LPCState, pm);
+    uint32_t gcs = pci_get_long(lpc->chip_config + ICH9_CC_GCS);
+
+    tr->tco.rld = 0;
+    tr->tco.sts1 |= TCO_TIMEOUT;
+    if (++tr->timeouts_no == 2) {
+        tr->tco.sts2 |= TCO_SECOND_TO_STS;
+        tr->tco.sts2 |= TCO_BOOT_STS;
+        tr->timeouts_no = 0;
+
+        if (!lpc->pin_strap.spkr_hi && !(gcs & ICH9_CC_GCS_NO_REBOOT)) {
+            watchdog_perform_action();
+            tco_timer_stop(tr);
+            return;
+        }
+    }
+
+    if (pm->smi_en & ICH9_PMIO_SMI_EN_TCO_EN) {
+        ich9_generate_smi();
+    } else {
+        ich9_generate_nmi();
+    }
+    tr->tco.rld = tr->tco.tmr;
+    tco_timer_reload(tr);
+}
+
+/* NOTE: values of 0 or 1 will be ignored by ICH */
+static inline int can_start_tco_timer(TCOIORegs *tr)
+{
+    return !(tr->tco.cnt1 & TCO_TMR_HLT) && tr->tco.tmr > 1;
+}
+
+static uint32_t tco_ioport_readw(TCOIORegs *tr, uint32_t addr)
+{
+    uint16_t rld;
+
+    switch (addr) {
+    case TCO_RLD:
+        if (tr->expire_time != -1) {
+            int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+            int64_t elapsed = (tr->expire_time - now) / TCO_TICK_NSEC;
+            rld = (uint16_t)elapsed | (tr->tco.rld & ~TCO_RLD_MASK);
+        } else {
+            rld = tr->tco.rld;
+        }
+        return rld;
+    case TCO_DAT_IN:
+        return tr->tco.din;
+    case TCO_DAT_OUT:
+        return tr->tco.dout;
+    case TCO1_STS:
+        return tr->tco.sts1;
+    case TCO2_STS:
+        return tr->tco.sts2;
+    case TCO1_CNT:
+        return tr->tco.cnt1;
+    case TCO2_CNT:
+        return tr->tco.cnt2;
+    case TCO_MESSAGE1:
+        return tr->tco.msg1;
+    case TCO_MESSAGE2:
+        return tr->tco.msg2;
+    case TCO_WDCNT:
+        return tr->tco.wdcnt;
+    case TCO_TMR:
+        return tr->tco.tmr;
+    case SW_IRQ_GEN:
+        return tr->sw_irq_gen;
+    }
+    return 0;
+}
+
+static void tco_ioport_writew(TCOIORegs *tr, uint32_t addr, uint32_t val)
+{
+    switch (addr) {
+    case TCO_RLD:
+        tr->timeouts_no = 0;
+        if (can_start_tco_timer(tr)) {
+            tr->tco.rld = tr->tco.tmr;
+            tco_timer_reload(tr);
+        } else {
+            tr->tco.rld = val;
+        }
+        break;
+    case TCO_DAT_IN:
+        tr->tco.din = val;
+        tr->tco.sts1 |= SW_TCO_SMI;
+        ich9_generate_smi();
+        break;
+    case TCO_DAT_OUT:
+        tr->tco.dout = val;
+        tr->tco.sts1 |= TCO_INT_STS;
+        /* TODO: cause an interrupt, as selected by the TCO_INT_SEL bits */
+        break;
+    case TCO1_STS:
+        tr->tco.sts1 = val & TCO1_STS_MASK;
+        break;
+    case TCO2_STS:
+        tr->tco.sts2 = val & TCO2_STS_MASK;
+        break;
+    case TCO1_CNT:
+        val &= TCO1_CNT_MASK;
+        /*
+         * once TCO_LOCK bit is set, it can not be cleared by software. a reset
+         * is required to change this bit from 1 to 0 -- it defaults to 0.
+         */
+        tr->tco.cnt1 = val | (tr->tco.cnt1 & TCO_LOCK);
+        if (can_start_tco_timer(tr)) {
+            tr->tco.rld = tr->tco.tmr;
+            tco_timer_reload(tr);
+        } else {
+            tco_timer_stop(tr);
+        }
+        break;
+    case TCO2_CNT:
+        tr->tco.cnt2 = val;
+        break;
+    case TCO_MESSAGE1:
+        tr->tco.msg1 = val;
+        break;
+    case TCO_MESSAGE2:
+        tr->tco.msg2 = val;
+        break;
+    case TCO_WDCNT:
+        tr->tco.wdcnt = val;
+        break;
+    case TCO_TMR:
+        tr->tco.tmr = val;
+        break;
+    case SW_IRQ_GEN:
+        tr->sw_irq_gen = val;
+        break;
+    }
+}
+
+static uint64_t tco_io_readw(void *opaque, hwaddr addr, unsigned width)
+{
+    TCOIORegs *tr = opaque;
+    return tco_ioport_readw(tr, addr);
+}
+
+static void tco_io_writew(void *opaque, hwaddr addr, uint64_t val,
+                          unsigned width)
+{
+    TCOIORegs *tr = opaque;
+    tco_ioport_writew(tr, addr, val);
+}
+
+static const MemoryRegionOps tco_io_ops = {
+    .read = tco_io_readw,
+    .write = tco_io_writew,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 2,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+void acpi_pm_tco_init(TCOIORegs *tr, MemoryRegion *parent)
+{
+    *tr = (TCOIORegs) {
+        .tco = {
+            .rld      = TCO_RLD_DEFAULT,
+            .din      = TCO_DAT_IN_DEFAULT,
+            .dout     = TCO_DAT_OUT_DEFAULT,
+            .sts1     = TCO1_STS_DEFAULT,
+            .sts2     = TCO2_STS_DEFAULT,
+            .cnt1     = TCO1_CNT_DEFAULT,
+            .cnt2     = TCO2_CNT_DEFAULT,
+            .msg1     = TCO_MESSAGE1_DEFAULT,
+            .msg2     = TCO_MESSAGE2_DEFAULT,
+            .wdcnt    = TCO_WDCNT_DEFAULT,
+            .tmr      = TCO_TMR_DEFAULT,
+        },
+        .sw_irq_gen    = SW_IRQ_GEN_DEFAULT,
+        .tco_timer     = timer_new_ns(QEMU_CLOCK_VIRTUAL, tco_timer_expired, tr),
+        .expire_time   = -1,
+        .timeouts_no   = 0,
+    };
+    memory_region_init_io(&tr->io, memory_region_owner(parent),
+                          &tco_io_ops, tr, "sm-tco", ICH9_PMIO_TCO_LEN);
+    memory_region_add_subregion(parent, ICH9_PMIO_TCO_RLD, &tr->io);
+}
+
+const VMStateDescription vmstate_tco_io_sts = {
+    .name = "tco io device status",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField[]) {
+        VMSTATE_UINT16(tco.rld, TCOIORegs),
+        VMSTATE_UINT8(tco.din, TCOIORegs),
+        VMSTATE_UINT8(tco.dout, TCOIORegs),
+        VMSTATE_UINT16(tco.sts1, TCOIORegs),
+        VMSTATE_UINT16(tco.sts2, TCOIORegs),
+        VMSTATE_UINT16(tco.cnt1, TCOIORegs),
+        VMSTATE_UINT16(tco.cnt2, TCOIORegs),
+        VMSTATE_UINT8(tco.msg1, TCOIORegs),
+        VMSTATE_UINT8(tco.msg2, TCOIORegs),
+        VMSTATE_UINT8(tco.wdcnt, TCOIORegs),
+        VMSTATE_UINT16(tco.tmr, TCOIORegs),
+        VMSTATE_UINT8(sw_irq_gen, TCOIORegs),
+        VMSTATE_TIMER_PTR(tco_timer, TCOIORegs),
+        VMSTATE_INT64(expire_time, TCOIORegs),
+        VMSTATE_UINT8(timeouts_no, TCOIORegs),
+        VMSTATE_END_OF_LIST()
+    }
+};
diff --git a/hw/char/spapr_vty.c b/hw/char/spapr_vty.c
index 4e464bd..36b328b 100644
--- a/hw/char/spapr_vty.c
+++ b/hw/char/spapr_vty.c
@@ -74,7 +74,7 @@
 }
 
 /* Forward declaration */
-static target_ulong h_put_term_char(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_put_term_char(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                     target_ulong opcode, target_ulong *args)
 {
     target_ulong reg = args[0];
@@ -101,7 +101,7 @@
     return H_SUCCESS;
 }
 
-static target_ulong h_get_term_char(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_get_term_char(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                     target_ulong opcode, target_ulong *args)
 {
     target_ulong reg = args[0];
@@ -193,7 +193,7 @@
         DeviceState *iter = kid->child;
 
         /* Only look at VTY devices */
-        if (!object_dynamic_cast(OBJECT(iter), "spapr-vty")) {
+        if (!object_dynamic_cast(OBJECT(iter), TYPE_VIO_SPAPR_VTY_DEVICE)) {
             continue;
         }
 
@@ -214,7 +214,7 @@
     return selected;
 }
 
-VIOsPAPRDevice *vty_lookup(sPAPREnvironment *spapr, target_ulong reg)
+VIOsPAPRDevice *vty_lookup(sPAPRMachineState *spapr, target_ulong reg)
 {
     VIOsPAPRDevice *sdev;
 
@@ -228,6 +228,10 @@
         return spapr_vty_get_default(spapr->vio_bus);
     }
 
+    if (!object_dynamic_cast(OBJECT(sdev), TYPE_VIO_SPAPR_VTY_DEVICE)) {
+        return NULL;
+    }
+
     return sdev;
 }
 
diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c
index 278a2d1..3c58629 100644
--- a/hw/core/sysbus.c
+++ b/hw/core/sysbus.c
@@ -109,7 +109,13 @@
 
 void sysbus_connect_irq(SysBusDevice *dev, int n, qemu_irq irq)
 {
+    SysBusDeviceClass *sbd = SYS_BUS_DEVICE_GET_CLASS(dev);
+
     qdev_connect_gpio_out_named(DEVICE(dev), SYSBUS_DEVICE_GPIO_IRQ, n, irq);
+
+    if (sbd->connect_irq_notifier) {
+        sbd->connect_irq_notifier(dev, irq);
+    }
 }
 
 /* Check whether an MMIO region exists */
diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c
index f0f25c7..5bc62cf 100644
--- a/hw/display/virtio-gpu-pci.c
+++ b/hw/display/virtio-gpu-pci.c
@@ -17,7 +17,6 @@
 #include "hw/virtio/virtio-gpu.h"
 
 static Property virtio_gpu_pci_properties[] = {
-    DEFINE_VIRTIO_GPU_PROPERTIES(VirtIOGPUPCI, vdev.conf),
     DEFINE_VIRTIO_GPU_PCI_PROPERTIES(VirtIOPCIProxy),
     DEFINE_PROP_END_OF_LIST(),
 };
@@ -25,13 +24,21 @@
 static void virtio_gpu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
 {
     VirtIOGPUPCI *vgpu = VIRTIO_GPU_PCI(vpci_dev);
+    VirtIOGPU *g = &vgpu->vdev;
     DeviceState *vdev = DEVICE(&vgpu->vdev);
+    int i;
 
     qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus));
     /* force virtio-1.0 */
     vpci_dev->flags &= ~VIRTIO_PCI_FLAG_DISABLE_MODERN;
     vpci_dev->flags |= VIRTIO_PCI_FLAG_DISABLE_LEGACY;
     object_property_set_bool(OBJECT(vdev), true, "realized", errp);
+
+    for (i = 0; i < g->conf.max_outputs; i++) {
+        object_property_set_link(OBJECT(g->scanout[i].con),
+                                 OBJECT(vpci_dev),
+                                 "device", errp);
+    }
 }
 
 static void virtio_gpu_pci_class_init(ObjectClass *klass, void *data)
@@ -49,8 +56,9 @@
 static void virtio_gpu_initfn(Object *obj)
 {
     VirtIOGPUPCI *dev = VIRTIO_GPU_PCI(obj);
-    object_initialize(&dev->vdev, sizeof(dev->vdev), TYPE_VIRTIO_GPU);
-    object_property_add_child(obj, "virtio-backend", OBJECT(&dev->vdev), NULL);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_GPU);
 }
 
 static const TypeInfo virtio_gpu_pci_info = {
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 8c109b7..990a26b 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -871,7 +871,7 @@
 }
 
 static Property virtio_gpu_properties[] = {
-    DEFINE_VIRTIO_GPU_PROPERTIES(VirtIOGPU, conf),
+    DEFINE_PROP_UINT32("max_outputs", VirtIOGPU, conf.max_outputs, 1),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c
index 94f9d0e..f7e539f 100644
--- a/hw/display/virtio-vga.c
+++ b/hw/display/virtio-vga.c
@@ -79,6 +79,7 @@
     VirtIOGPU *g = &vvga->vdev;
     VGACommonState *vga = &vvga->vga;
     uint32_t offset;
+    int i;
 
     /* init vga compat bits */
     vga->vram_size_mb = 8;
@@ -120,6 +121,12 @@
 
     vga->con = g->scanout[0].con;
     graphic_console_set_hwops(vga->con, &virtio_vga_ops, vvga);
+
+    for (i = 0; i < g->conf.max_outputs; i++) {
+        object_property_set_link(OBJECT(g->scanout[i].con),
+                                 OBJECT(vpci_dev),
+                                 "device", errp);
+    }
 }
 
 static void virtio_vga_reset(DeviceState *dev)
@@ -131,7 +138,6 @@
 }
 
 static Property virtio_vga_properties[] = {
-    DEFINE_VIRTIO_GPU_PROPERTIES(VirtIOVGA, vdev.conf),
     DEFINE_VIRTIO_GPU_PCI_PROPERTIES(VirtIOPCIProxy),
     DEFINE_PROP_END_OF_LIST(),
 };
@@ -155,8 +161,9 @@
 static void virtio_vga_inst_initfn(Object *obj)
 {
     VirtIOVGA *dev = VIRTIO_VGA(obj);
-    object_initialize(&dev->vdev, sizeof(dev->vdev), TYPE_VIRTIO_GPU);
-    object_property_add_child(obj, "virtio-backend", OBJECT(&dev->vdev), NULL);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_GPU);
 }
 
 static TypeInfo virtio_vga_info = {
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 7959b44..7661ea9 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -293,11 +293,82 @@
     set_boot_dev(opaque, boot_device, errp);
 }
 
+static void pc_cmos_init_floppy(ISADevice *rtc_state, ISADevice *floppy)
+{
+    int val, nb, i;
+    FDriveType fd_type[2] = { FDRIVE_DRV_NONE, FDRIVE_DRV_NONE };
+
+    /* floppy type */
+    if (floppy) {
+        for (i = 0; i < 2; i++) {
+            fd_type[i] = isa_fdc_get_drive_type(floppy, i);
+        }
+    }
+    val = (cmos_get_fd_drive_type(fd_type[0]) << 4) |
+        cmos_get_fd_drive_type(fd_type[1]);
+    rtc_set_memory(rtc_state, 0x10, val);
+
+    val = rtc_get_memory(rtc_state, REG_EQUIPMENT_BYTE);
+    nb = 0;
+    if (fd_type[0] < FDRIVE_DRV_NONE) {
+        nb++;
+    }
+    if (fd_type[1] < FDRIVE_DRV_NONE) {
+        nb++;
+    }
+    switch (nb) {
+    case 0:
+        break;
+    case 1:
+        val |= 0x01; /* 1 drive, ready for boot */
+        break;
+    case 2:
+        val |= 0x41; /* 2 drives, ready for boot */
+        break;
+    }
+    rtc_set_memory(rtc_state, REG_EQUIPMENT_BYTE, val);
+}
+
 typedef struct pc_cmos_init_late_arg {
     ISADevice *rtc_state;
     BusState *idebus[2];
 } pc_cmos_init_late_arg;
 
+typedef struct check_fdc_state {
+    ISADevice *floppy;
+    bool multiple;
+} CheckFdcState;
+
+static int check_fdc(Object *obj, void *opaque)
+{
+    CheckFdcState *state = opaque;
+    Object *fdc;
+    uint32_t iobase;
+    Error *local_err = NULL;
+
+    fdc = object_dynamic_cast(obj, TYPE_ISA_FDC);
+    if (!fdc) {
+        return 0;
+    }
+
+    iobase = object_property_get_int(obj, "iobase", &local_err);
+    if (local_err || iobase != 0x3f0) {
+        error_free(local_err);
+        return 0;
+    }
+
+    if (state->floppy) {
+        state->multiple = true;
+    } else {
+        state->floppy = ISA_DEVICE(obj);
+    }
+    return 0;
+}
+
+static const char * const fdc_container_path[] = {
+    "/unattached", "/peripheral", "/peripheral-anon"
+};
+
 static void pc_cmos_init_late(void *opaque)
 {
     pc_cmos_init_late_arg *arg = opaque;
@@ -306,6 +377,8 @@
     int8_t heads, sectors;
     int val;
     int i, trans;
+    Object *container;
+    CheckFdcState state = { 0 };
 
     val = 0;
     if (ide_get_geometry(arg->idebus[0], 0,
@@ -335,16 +408,32 @@
     }
     rtc_set_memory(s, 0x39, val);
 
+    /*
+     * Locate the FDC at IO address 0x3f0, and configure the CMOS registers
+     * accordingly.
+     */
+    for (i = 0; i < ARRAY_SIZE(fdc_container_path); i++) {
+        container = container_get(qdev_get_machine(), fdc_container_path[i]);
+        object_child_foreach(container, check_fdc, &state);
+    }
+
+    if (state.multiple) {
+        error_report("warning: multiple floppy disk controllers with "
+                     "iobase=0x3f0 have been found;\n"
+                     "the one being picked for CMOS setup might not reflect "
+                     "your intent");
+    }
+    pc_cmos_init_floppy(s, state.floppy);
+
     qemu_unregister_reset(pc_cmos_init_late, opaque);
 }
 
 void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
                   const char *boot_device, MachineState *machine,
-                  ISADevice *floppy, BusState *idebus0, BusState *idebus1,
+                  BusState *idebus0, BusState *idebus1,
                   ISADevice *s)
 {
-    int val, nb, i;
-    FDriveType fd_type[2] = { FDRIVE_DRV_NONE, FDRIVE_DRV_NONE };
+    int val;
     static pc_cmos_init_late_arg arg;
     PCMachineState *pc_machine = PC_MACHINE(machine);
     Error *local_err = NULL;
@@ -401,39 +490,12 @@
         exit(1);
     }
 
-    /* floppy type */
-    if (floppy) {
-        for (i = 0; i < 2; i++) {
-            fd_type[i] = isa_fdc_get_drive_type(floppy, i);
-        }
-    }
-    val = (cmos_get_fd_drive_type(fd_type[0]) << 4) |
-        cmos_get_fd_drive_type(fd_type[1]);
-    rtc_set_memory(s, 0x10, val);
-
     val = 0;
-    nb = 0;
-    if (fd_type[0] < FDRIVE_DRV_NONE) {
-        nb++;
-    }
-    if (fd_type[1] < FDRIVE_DRV_NONE) {
-        nb++;
-    }
-    switch (nb) {
-    case 0:
-        break;
-    case 1:
-        val |= 0x01; /* 1 drive, ready for boot */
-        break;
-    case 2:
-        val |= 0x41; /* 2 drives, ready for boot */
-        break;
-    }
     val |= 0x02; /* FPU is there */
     val |= 0x04; /* PS/2 mouse installed */
     rtc_set_memory(s, REG_EQUIPMENT_BYTE, val);
 
-    /* hard drives */
+    /* hard drives and FDC */
     arg.rtc_state = s;
     arg.idebus[0] = idebus0;
     arg.idebus[1] = idebus1;
@@ -1401,7 +1463,6 @@
 void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
                           ISADevice **rtc_state,
                           bool create_fdctrl,
-                          ISADevice **floppy,
                           bool no_vmport,
                           uint32 hpet_irqs)
 {
@@ -1497,7 +1558,9 @@
         fd[i] = drive_get(IF_FLOPPY, 0, i);
         create_fdctrl |= !!fd[i];
     }
-    *floppy = create_fdctrl ? fdctrl_init_isa(isa_bus, fd) : NULL;
+    if (create_fdctrl) {
+        fdctrl_init_isa(isa_bus, fd);
+    }
 }
 
 void pc_nic_init(ISABus *isa_bus, PCIBus *pci_bus)
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 56cdcb9..8167b12 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -94,7 +94,6 @@
     DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
     BusState *idebus[MAX_IDE_BUS];
     ISADevice *rtc_state;
-    ISADevice *floppy;
     MemoryRegion *ram_memory;
     MemoryRegion *pci_memory;
     MemoryRegion *rom_memory;
@@ -241,7 +240,7 @@
     }
 
     /* init basic PC hardware */
-    pc_basic_device_init(isa_bus, gsi, &rtc_state, true, &floppy,
+    pc_basic_device_init(isa_bus, gsi, &rtc_state, true,
                          (pc_machine->vmport != ON_OFF_AUTO_ON), 0x4);
 
     pc_nic_init(isa_bus, pci_bus);
@@ -273,7 +272,7 @@
     }
 
     pc_cmos_init(below_4g_mem_size, above_4g_mem_size, machine->boot_order,
-                 machine, floppy, idebus[0], idebus[1], rtc_state);
+                 machine, idebus[0], idebus[1], rtc_state);
 
     if (pci_enabled && usb_enabled()) {
         pci_create_simple(pci_bus, piix3_devfn + 2, "piix3-usb-uhci");
@@ -312,6 +311,8 @@
     if (kvm_enabled()) {
         pcms->smm = ON_OFF_AUTO_OFF;
     }
+    global_state_set_optional();
+    savevm_skip_configuration();
 }
 
 static void pc_compat_2_2(MachineState *machine)
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 8aa3a67..974aead 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -73,7 +73,6 @@
     PCIDevice *lpc;
     BusState *idebus[MAX_SATA_PORTS];
     ISADevice *rtc_state;
-    ISADevice *floppy;
     MemoryRegion *pci_memory;
     MemoryRegion *rom_memory;
     MemoryRegion *ram_memory;
@@ -249,11 +248,11 @@
     }
 
     /* init basic PC hardware */
-    pc_basic_device_init(isa_bus, gsi, &rtc_state, !mc->no_floppy, &floppy,
+    pc_basic_device_init(isa_bus, gsi, &rtc_state, !mc->no_floppy,
                          (pc_machine->vmport != ON_OFF_AUTO_ON), 0xff0104);
 
     /* connect pm stuff to lpc */
-    ich9_lpc_pm_init(lpc, pc_machine_is_smm_enabled(pc_machine));
+    ich9_lpc_pm_init(lpc, pc_machine_is_smm_enabled(pc_machine), !mc->no_tco);
 
     /* ahci and SATA device, for q35 1 ahci controller is built-in */
     ahci = pci_create_simple_multifunction(host_bus,
@@ -278,7 +277,7 @@
                       8, NULL, 0);
 
     pc_cmos_init(below_4g_mem_size, above_4g_mem_size, machine->boot_order,
-                 machine, floppy, idebus[0], idebus[1], rtc_state);
+                 machine, idebus[0], idebus[1], rtc_state);
 
     /* the rest devices to which pci devfn is automatically assigned */
     pc_vga_init(isa_bus, host_bus);
@@ -295,6 +294,8 @@
     if (kvm_enabled()) {
         pcms->smm = ON_OFF_AUTO_OFF;
     }
+    global_state_set_optional();
+    savevm_skip_configuration();
 }
 
 static void pc_compat_2_2(MachineState *machine)
@@ -397,6 +398,7 @@
     m->default_machine_opts = "firmware=bios-256k.bin";
     m->default_display = "std";
     m->no_floppy = 1;
+    m->no_tco = 0;
     m->alias = "q35";
 }
 
@@ -408,6 +410,7 @@
 {
     pc_q35_2_4_machine_options(m);
     m->no_floppy = 0;
+    m->no_tco = 1;
     m->alias = NULL;
     SET_MACHINE_COMPAT(m, PC_COMPAT_2_3);
 }
diff --git a/hw/ide/ahci.h b/hw/ide/ahci.h
index 9f5b4d2..68d5074 100644
--- a/hw/ide/ahci.h
+++ b/hw/ide/ahci.h
@@ -166,7 +166,7 @@
 #define AHCI_CMD_HDR_CMD_FIS_LEN           0x1f
 #define AHCI_CMD_HDR_PRDT_LEN              16
 
-#define SATA_SIGNATURE_CDROM               0xeb140000
+#define SATA_SIGNATURE_CDROM               0xeb140101
 #define SATA_SIGNATURE_DISK                0x00000101
 
 #define AHCI_GENERIC_HOST_CONTROL_REGS_MAX_ADDR 0x20
diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
index 2cb7d25..f56bff1 100644
--- a/hw/intc/arm_gic_kvm.c
+++ b/hw/intc/arm_gic_kvm.c
@@ -570,6 +570,12 @@
      */
     i += (GIC_INTERNAL * s->num_cpu);
     qdev_init_gpio_in(dev, kvm_arm_gic_set_irq, i);
+
+    for (i = 0; i < s->num_irq - GIC_INTERNAL; i++) {
+        qemu_irq irq = qdev_get_gpio_in(dev, i);
+        kvm_irqchip_set_qemuirq_gsi(kvm_state, irq, i);
+    }
+
     /* We never use our outbound IRQ/FIQ lines but provide them so that
      * we maintain the same interface as the non-KVM GIC.
      */
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 0fd2a84..924b1ae 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -806,7 +806,7 @@
  * Guest interfaces
  */
 
-static target_ulong h_cppr(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_cppr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                            target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
@@ -816,7 +816,7 @@
     return H_SUCCESS;
 }
 
-static target_ulong h_ipi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_ipi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           target_ulong opcode, target_ulong *args)
 {
     target_ulong server = get_cpu_index_by_dt_id(args[0]);
@@ -830,7 +830,7 @@
     return H_SUCCESS;
 }
 
-static target_ulong h_xirr(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                            target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
@@ -840,7 +840,7 @@
     return H_SUCCESS;
 }
 
-static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                              target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
@@ -852,7 +852,7 @@
     return H_SUCCESS;
 }
 
-static target_ulong h_eoi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
@@ -862,7 +862,7 @@
     return H_SUCCESS;
 }
 
-static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                             target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
@@ -874,7 +874,7 @@
     return H_SUCCESS;
 }
 
-static void rtas_set_xive(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           uint32_t token,
                           uint32_t nargs, target_ulong args,
                           uint32_t nret, target_ulong rets)
@@ -902,7 +902,7 @@
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 }
 
-static void rtas_get_xive(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           uint32_t token,
                           uint32_t nargs, target_ulong args,
                           uint32_t nret, target_ulong rets)
@@ -927,7 +927,7 @@
     rtas_st(rets, 2, ics->irqs[nr - ics->offset].priority);
 }
 
-static void rtas_int_off(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                          uint32_t token,
                          uint32_t nargs, target_ulong args,
                          uint32_t nret, target_ulong rets)
@@ -953,7 +953,7 @@
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 }
 
-static void rtas_int_on(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                         uint32_t token,
                         uint32_t nargs, target_ulong args,
                         uint32_t nret, target_ulong rets)
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index c15453f..d58729c 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -331,6 +331,15 @@
         abort();
     }
 
+    /*
+     * If we are reusing a parked vCPU fd corresponding to the CPU
+     * which was hot-removed earlier we don't have to renable
+     * KVM_CAP_IRQ_XICS capability again.
+     */
+    if (ss->cap_irq_xics_enabled) {
+        return;
+    }
+
     if (icpkvm->kernel_xics_fd != -1) {
         int ret;
 
@@ -343,6 +352,7 @@
                     kvm_arch_vcpu_id(cs), strerror(errno));
             exit(1);
         }
+        ss->cap_irq_xics_enabled = true;
     }
 }
 
@@ -368,7 +378,7 @@
     }
 }
 
-static void rtas_dummy(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                        uint32_t token,
                        uint32_t nargs, target_ulong args,
                        uint32_t nret, target_ulong rets)
diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
index bd655b8..360699f 100644
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c
@@ -138,6 +138,7 @@
     pci_set_long(c + ICH9_CC_D27IR, ICH9_CC_DIR_DEFAULT);
     pci_set_long(c + ICH9_CC_D26IR, ICH9_CC_DIR_DEFAULT);
     pci_set_long(c + ICH9_CC_D25IR, ICH9_CC_DIR_DEFAULT);
+    pci_set_long(c + ICH9_CC_GCS, ICH9_CC_GCS_DEFAULT);
 
     ich9_cc_update(lpc);
 }
@@ -313,6 +314,16 @@
     return route;
 }
 
+void ich9_generate_smi(void)
+{
+    cpu_interrupt(first_cpu, CPU_INTERRUPT_SMI);
+}
+
+void ich9_generate_nmi(void)
+{
+    cpu_interrupt(first_cpu, CPU_INTERRUPT_NMI);
+}
+
 static int ich9_lpc_sci_irq(ICH9LPCState *lpc)
 {
     switch (lpc->d.config[ICH9_LPC_ACPI_CTRL] &
@@ -357,13 +368,13 @@
     }
 }
 
-void ich9_lpc_pm_init(PCIDevice *lpc_pci, bool smm_enabled)
+void ich9_lpc_pm_init(PCIDevice *lpc_pci, bool smm_enabled, bool enable_tco)
 {
     ICH9LPCState *lpc = ICH9_LPC_DEVICE(lpc_pci);
     qemu_irq sci_irq;
 
     sci_irq = qemu_allocate_irq(ich9_set_sci, lpc, 0);
-    ich9_pm_init(lpc_pci, &lpc->pm, smm_enabled, sci_irq);
+    ich9_pm_init(lpc_pci, &lpc->pm, smm_enabled, enable_tco, sci_irq);
     ich9_lpc_reset(&lpc->d.qdev);
 }
 
@@ -681,6 +692,11 @@
     }
 };
 
+static Property ich9_lpc_properties[] = {
+    DEFINE_PROP_BOOL("noreboot", ICH9LPCState, pin_strap.spkr_hi, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void ich9_lpc_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -692,6 +708,7 @@
     dc->reset = ich9_lpc_reset;
     k->init = ich9_lpc_init;
     dc->vmsd = &vmstate_ich9_lpc;
+    dc->props = ich9_lpc_properties;
     k->config_write = ich9_lpc_config_write;
     dc->desc = "ICH9 LPC bridge";
     k->vendor_id = PCI_VENDOR_ID_INTEL;
diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c
index e9037b0..e3c0242 100644
--- a/hw/misc/macio/macio.c
+++ b/hw/misc/macio/macio.c
@@ -132,8 +132,6 @@
     SysBusDevice *sysbus_dev;
     Error *err = NULL;
 
-    d->config[0x3d] = 0x01; // interrupt on pin 1
-
     object_property_set_bool(OBJECT(&s->cuda), true, "realized", &err);
     if (err) {
         error_propagate(errp, err);
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index bab8e2a..5c6bcd0 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -185,6 +185,9 @@
 {
     s->mac_reg[STATUS] |= E1000_STATUS_LU;
     s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
+
+    /* E1000_STATUS_LU is tested by e1000_can_receive() */
+    qemu_flush_queued_packets(qemu_get_queue(s->nic));
 }
 
 static bool
diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c
index 4d25842..47d080f 100644
--- a/hw/net/rocker/rocker.c
+++ b/hw/net/rocker/rocker.c
@@ -96,7 +96,7 @@
 
 RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
 {
-    RockerSwitch *rocker = g_malloc0(sizeof(*rocker));
+    RockerSwitch *rocker;
     Rocker *r;
 
     r = rocker_find(name);
@@ -106,6 +106,7 @@
         return NULL;
     }
 
+    rocker = g_new0(RockerSwitch, 1);
     rocker->name = g_strdup(r->name);
     rocker->id = r->switch_id;
     rocker->ports = r->fp_ports;
@@ -192,11 +193,13 @@
         if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
             return -ROCKER_EINVAL;
         }
+        break;
     case ROCKER_TX_OFFLOAD_TSO:
         if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
             !tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
             return -ROCKER_EINVAL;
         }
+        break;
     }
 
     if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
@@ -600,7 +603,7 @@
 }
 
 int rx_produce(World *world, uint32_t pport,
-               const struct iovec *iov, int iovcnt)
+               const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
 {
     Rocker *r = world_rocker(world);
     PCIDevice *dev = (PCIDevice *)r;
@@ -643,6 +646,10 @@
         goto out;
     }
 
+    if (copy_to_cpu) {
+        rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
+    }
+
     /* XXX calc rx flags/csum */
 
     tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
diff --git a/hw/net/rocker/rocker.h b/hw/net/rocker/rocker.h
index b3310b6..f9c80f8 100644
--- a/hw/net/rocker/rocker.h
+++ b/hw/net/rocker/rocker.h
@@ -77,7 +77,7 @@
 int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
                                uint16_t vlan_id);
 int rx_produce(World *world, uint32_t pport,
-               const struct iovec *iov, int iovcnt);
+               const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu);
 int rocker_port_eg(Rocker *r, uint32_t pport,
                    const struct iovec *iov, int iovcnt);
 
diff --git a/hw/net/rocker/rocker_fp.c b/hw/net/rocker/rocker_fp.c
index d8d934c..c693ae5 100644
--- a/hw/net/rocker/rocker_fp.c
+++ b/hw/net/rocker/rocker_fp.c
@@ -125,18 +125,21 @@
     return ROCKER_OK;
 }
 
-static int fp_port_can_receive(NetClientState *nc)
-{
-    FpPort *port = qemu_get_nic_opaque(nc);
-
-    return port->enabled;
-}
-
 static ssize_t fp_port_receive_iov(NetClientState *nc, const struct iovec *iov,
                                    int iovcnt)
 {
     FpPort *port = qemu_get_nic_opaque(nc);
 
+    /* If the port is disabled, we want to drop this pkt
+     * now rather than queing it for later.  We don't want
+     * any stale pkts getting into the device when the port
+     * transitions to enabled.
+     */
+
+    if (!port->enabled) {
+        return -1;
+    }
+
     return world_ingress(port->world, port->pport, iov, iovcnt);
 }
 
@@ -165,7 +168,6 @@
 static NetClientInfo fp_port_info = {
     .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),
-    .can_receive = fp_port_can_receive,
     .receive = fp_port_receive,
     .receive_iov = fp_port_receive_iov,
     .cleanup = fp_port_cleanup,
diff --git a/hw/net/rocker/rocker_hw.h b/hw/net/rocker/rocker_hw.h
index fe639ba..8c50830 100644
--- a/hw/net/rocker/rocker_hw.h
+++ b/hw/net/rocker/rocker_hw.h
@@ -250,6 +250,7 @@
 #define ROCKER_RX_FLAGS_TCP                     (1 << 5)
 #define ROCKER_RX_FLAGS_UDP                     (1 << 6)
 #define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD       (1 << 7)
+#define ROCKER_RX_FLAGS_FWD_OFFLOAD             (1 << 8)
 
 /* Tx msg */
 enum {
diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c
index b25a17d..874fb01 100644
--- a/hw/net/rocker/rocker_of_dpa.c
+++ b/hw/net/rocker/rocker_of_dpa.c
@@ -825,6 +825,8 @@
 static void of_dpa_output_l2_interface(OfDpaFlowContext *fc,
                                        OfDpaGroup *group)
 {
+    uint8_t copy_to_cpu = fc->action_set.apply.copy_to_cpu;
+
     if (group->l2_interface.pop_vlan) {
         of_dpa_flow_pkt_strip_vlan(fc);
     }
@@ -837,7 +839,8 @@
      */
 
     if (group->l2_interface.out_pport == 0) {
-        rx_produce(fc->of_dpa->world, fc->in_pport, fc->iov, fc->iovcnt);
+        rx_produce(fc->of_dpa->world, fc->in_pport, fc->iov, fc->iovcnt,
+                   copy_to_cpu);
     } else if (group->l2_interface.out_pport != fc->in_pport) {
         rocker_port_eg(world_rocker(fc->of_dpa->world),
                        group->l2_interface.out_pport,
@@ -2525,7 +2528,6 @@
             ngroup->has_set_vlan_id = true;
             ngroup->set_vlan_id = ntohs(group->l2_rewrite.vlan_id);
         }
-        break;
         if (memcmp(group->l2_rewrite.src_mac.a, zero_mac.a, ETH_ALEN)) {
             ngroup->has_set_eth_src = true;
             ngroup->set_eth_src =
@@ -2536,6 +2538,7 @@
             ngroup->set_eth_dst =
                 qemu_mac_strdup_printf(group->l2_rewrite.dst_mac.a);
         }
+        break;
     case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD:
     case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST:
         ngroup->has_vlan_id = true;
diff --git a/hw/net/rocker/rocker_world.c b/hw/net/rocker/rocker_world.c
index b991e87..a6b18f1 100644
--- a/hw/net/rocker/rocker_world.c
+++ b/hw/net/rocker/rocker_world.c
@@ -32,7 +32,7 @@
         return world->ops->ig(world, pport, iov, iovcnt);
     }
 
-    return iov_size(iov, iovcnt);
+    return -1;
 }
 
 int world_do_cmd(World *world, DescInfo *info,
diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c
index 2dd5ec1..1ca5e9c 100644
--- a/hw/net/spapr_llan.c
+++ b/hw/net/spapr_llan.c
@@ -284,7 +284,7 @@
 }
 
 static target_ulong h_register_logical_lan(PowerPCCPU *cpu,
-                                           sPAPREnvironment *spapr,
+                                           sPAPRMachineState *spapr,
                                            target_ulong opcode,
                                            target_ulong *args)
 {
@@ -349,7 +349,8 @@
 }
 
 
-static target_ulong h_free_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_free_logical_lan(PowerPCCPU *cpu,
+                                       sPAPRMachineState *spapr,
                                        target_ulong opcode, target_ulong *args)
 {
     target_ulong reg = args[0];
@@ -371,7 +372,7 @@
 }
 
 static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu,
-                                             sPAPREnvironment *spapr,
+                                             sPAPRMachineState *spapr,
                                              target_ulong opcode,
                                              target_ulong *args)
 {
@@ -421,7 +422,8 @@
     return H_SUCCESS;
 }
 
-static target_ulong h_send_logical_lan(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_send_logical_lan(PowerPCCPU *cpu,
+                                       sPAPRMachineState *spapr,
                                        target_ulong opcode, target_ulong *args)
 {
     target_ulong reg = args[0];
@@ -490,7 +492,7 @@
     return H_SUCCESS;
 }
 
-static target_ulong h_multicast_ctrl(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_multicast_ctrl(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                      target_ulong opcode, target_ulong *args)
 {
     target_ulong reg = args[0];
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 104a0f5..706e060 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -1879,6 +1879,12 @@
         return -1;
     }
 
+    if (s->peer_has_vhdr) {
+        vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf);
+        buf += sizeof(struct virtio_net_hdr);
+        size -= sizeof(struct virtio_net_hdr);
+    }
+
     /* Pad to minimum Ethernet frame length */
     if (size < sizeof(min_buf)) {
         memcpy(min_buf, buf, size);
@@ -1887,12 +1893,6 @@
         size = sizeof(min_buf);
     }
 
-    if (s->peer_has_vhdr) {
-        vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf);
-        buf += sizeof(struct virtio_net_hdr);
-        size -= sizeof(struct virtio_net_hdr);
-    }
-
     vmxnet_rx_pkt_set_packet_type(s->rx_pkt,
         get_eth_packet_type(PKT_GET_ETH_HDR(buf)));
 
diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index 11332d1..fcaa77d 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -45,7 +45,7 @@
 #define DEFAULT_NVRAM_SIZE 65536
 #define MAX_NVRAM_SIZE 1048576
 
-static void rtas_nvram_fetch(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_nvram_fetch(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                              uint32_t token, uint32_t nargs,
                              target_ulong args,
                              uint32_t nret, target_ulong rets)
@@ -86,7 +86,7 @@
     rtas_st(rets, 1, len);
 }
 
-static void rtas_nvram_store(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_nvram_store(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                              uint32_t token, uint32_t nargs,
                              target_ulong args,
                              uint32_t nret, target_ulong rets)
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 14c7711..6e28985 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -78,7 +78,7 @@
                  PCI_EXP_LNK_LS_25);
 
     pci_set_word(exp_cap + PCI_EXP_LNKSTA,
-                 PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25);
+                 PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25 |PCI_EXP_LNKSTA_DLLLA);
 
     pci_set_long(exp_cap + PCI_EXP_DEVCAP2,
                  PCI_EXP_DEVCAP2_EFF | PCI_EXP_DEVCAP2_EETLPP);
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 0f3e341..77d5c81 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -145,7 +145,6 @@
 static void ppc_core99_init(MachineState *machine)
 {
     ram_addr_t ram_size = machine->ram_size;
-    const char *cpu_model = machine->cpu_model;
     const char *kernel_filename = machine->kernel_filename;
     const char *kernel_cmdline = machine->kernel_cmdline;
     const char *initrd_filename = machine->initrd_filename;
@@ -182,14 +181,15 @@
     linux_boot = (kernel_filename != NULL);
 
     /* init CPUs */
-    if (cpu_model == NULL)
+    if (machine->cpu_model == NULL) {
 #ifdef TARGET_PPC64
-        cpu_model = "970fx";
+        machine->cpu_model = "970fx";
 #else
-        cpu_model = "G4";
+        machine->cpu_model = "G4";
 #endif
+    }
     for (i = 0; i < smp_cpus; i++) {
-        cpu = cpu_ppc_init(cpu_model);
+        cpu = cpu_ppc_init(machine->cpu_model);
         if (cpu == NULL) {
             fprintf(stderr, "Unable to find PowerPC CPU definition\n");
             exit(1);
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index 99879dd..06fdbaf 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -75,7 +75,6 @@
 static void ppc_heathrow_init(MachineState *machine)
 {
     ram_addr_t ram_size = machine->ram_size;
-    const char *cpu_model = machine->cpu_model;
     const char *kernel_filename = machine->kernel_filename;
     const char *kernel_cmdline = machine->kernel_cmdline;
     const char *initrd_filename = machine->initrd_filename;
@@ -107,10 +106,10 @@
     linux_boot = (kernel_filename != NULL);
 
     /* init CPUs */
-    if (cpu_model == NULL)
-        cpu_model = "G3";
+    if (machine->cpu_model == NULL)
+        machine->cpu_model = "G3";
     for (i = 0; i < smp_cpus; i++) {
-        cpu = cpu_ppc_init(cpu_model);
+        cpu = cpu_ppc_init(machine->cpu_model);
         if (cpu == NULL) {
             fprintf(stderr, "Unable to find PowerPC CPU definition\n");
             exit(1);
diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c
index 778970a..032fa80 100644
--- a/hw/ppc/ppc440_bamboo.c
+++ b/hw/ppc/ppc440_bamboo.c
@@ -159,7 +159,6 @@
 static void bamboo_init(MachineState *machine)
 {
     ram_addr_t ram_size = machine->ram_size;
-    const char *cpu_model = machine->cpu_model;
     const char *kernel_filename = machine->kernel_filename;
     const char *kernel_cmdline = machine->kernel_cmdline;
     const char *initrd_filename = machine->initrd_filename;
@@ -184,10 +183,10 @@
     int i;
 
     /* Setup CPU. */
-    if (cpu_model == NULL) {
-        cpu_model = "440EP";
+    if (machine->cpu_model == NULL) {
+        machine->cpu_model = "440EP";
     }
-    cpu = cpu_ppc_init(cpu_model);
+    cpu = cpu_ppc_init(machine->cpu_model);
     if (cpu == NULL) {
         fprintf(stderr, "Unable to initialize CPU!\n");
         exit(1);
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index 998ee2d..45b5f62 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -506,7 +506,6 @@
 static void ppc_prep_init(MachineState *machine)
 {
     ram_addr_t ram_size = machine->ram_size;
-    const char *cpu_model = machine->cpu_model;
     const char *kernel_filename = machine->kernel_filename;
     const char *kernel_cmdline = machine->kernel_cmdline;
     const char *initrd_filename = machine->initrd_filename;
@@ -536,10 +535,10 @@
     linux_boot = (kernel_filename != NULL);
 
     /* init CPUs */
-    if (cpu_model == NULL)
-        cpu_model = "602";
+    if (machine->cpu_model == NULL)
+        machine->cpu_model = "602";
     for (i = 0; i < smp_cpus; i++) {
-        cpu = cpu_ppc_init(cpu_model);
+        cpu = cpu_ppc_init(machine->cpu_model);
         if (cpu == NULL) {
             fprintf(stderr, "Unable to find PowerPC CPU definition\n");
             exit(1);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index f174e5a..a6f1947 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -34,6 +34,7 @@
 #include "sysemu/cpus.h"
 #include "sysemu/kvm.h"
 #include "kvm_ppc.h"
+#include "migration/migration.h"
 #include "mmu-hash64.h"
 #include "qom/cpu.h"
 
@@ -90,25 +91,6 @@
 
 #define HTAB_SIZE(spapr)        (1ULL << ((spapr)->htab_shift))
 
-typedef struct sPAPRMachineState sPAPRMachineState;
-
-#define TYPE_SPAPR_MACHINE      "spapr-machine"
-#define SPAPR_MACHINE(obj) \
-    OBJECT_CHECK(sPAPRMachineState, (obj), TYPE_SPAPR_MACHINE)
-
-/**
- * sPAPRMachineState:
- */
-struct sPAPRMachineState {
-    /*< private >*/
-    MachineState parent_obj;
-
-    /*< public >*/
-    char *kvm_type;
-};
-
-sPAPREnvironment *spapr;
-
 static XICSState *try_create_xics(const char *type, int nr_servers,
                                   int nr_irqs, Error **errp)
 {
@@ -184,7 +166,28 @@
     return ret;
 }
 
-static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
+static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, CPUState *cs)
+{
+    int ret = 0;
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    int index = ppc_get_vcpu_dt_id(cpu);
+    uint32_t associativity[] = {cpu_to_be32(0x5),
+                                cpu_to_be32(0x0),
+                                cpu_to_be32(0x0),
+                                cpu_to_be32(0x0),
+                                cpu_to_be32(cs->numa_node),
+                                cpu_to_be32(index)};
+
+    /* Advertise NUMA via ibm,associativity */
+    if (nb_numa_nodes > 1) {
+        ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
+                          sizeof(associativity));
+    }
+
+    return ret;
+}
+
+static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr)
 {
     int ret = 0, offset, cpus_offset;
     CPUState *cs;
@@ -196,12 +199,6 @@
         PowerPCCPU *cpu = POWERPC_CPU(cs);
         DeviceClass *dc = DEVICE_GET_CLASS(cs);
         int index = ppc_get_vcpu_dt_id(cpu);
-        uint32_t associativity[] = {cpu_to_be32(0x5),
-                                    cpu_to_be32(0x0),
-                                    cpu_to_be32(0x0),
-                                    cpu_to_be32(0x0),
-                                    cpu_to_be32(cs->numa_node),
-                                    cpu_to_be32(index)};
 
         if ((index % smt) != 0) {
             continue;
@@ -225,20 +222,17 @@
             }
         }
 
-        if (nb_numa_nodes > 1) {
-            ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
-                              sizeof(associativity));
-            if (ret < 0) {
-                return ret;
-            }
-        }
-
         ret = fdt_setprop(fdt, offset, "ibm,pft-size",
                           pft_size_prop, sizeof(pft_size_prop));
         if (ret < 0) {
             return ret;
         }
 
+        ret = spapr_fixup_cpu_numa_dt(fdt, offset, cs);
+        if (ret < 0) {
+            return ret;
+        }
+
         ret = spapr_fixup_cpu_smt_dt(fdt, offset, cpu,
                                      ppc_get_compat_smt_threads(cpu));
         if (ret < 0) {
@@ -284,15 +278,18 @@
 
 static hwaddr spapr_node0_size(void)
 {
+    MachineState *machine = MACHINE(qdev_get_machine());
+
     if (nb_numa_nodes) {
         int i;
         for (i = 0; i < nb_numa_nodes; ++i) {
             if (numa_info[i].node_mem) {
-                return MIN(pow2floor(numa_info[i].node_mem), ram_size);
+                return MIN(pow2floor(numa_info[i].node_mem),
+                           machine->ram_size);
             }
         }
     }
-    return ram_size;
+    return machine->ram_size;
 }
 
 #define _FDT(exp) \
@@ -318,18 +315,13 @@
                                    uint32_t epow_irq)
 {
     void *fdt;
-    CPUState *cs;
     uint32_t start_prop = cpu_to_be32(initrd_base);
     uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
     GString *hypertas = g_string_sized_new(256);
     GString *qemu_hypertas = g_string_sized_new(256);
     uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
-    uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
-    int smt = kvmppc_smt_threads();
+    uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(max_cpus)};
     unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
-    QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
-    unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
-    uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
     char *buf;
 
     add_str(hypertas, "hcall-pft");
@@ -415,107 +407,6 @@
 
     _FDT((fdt_end_node(fdt)));
 
-    /* cpus */
-    _FDT((fdt_begin_node(fdt, "cpus")));
-
-    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
-    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
-
-    CPU_FOREACH(cs) {
-        PowerPCCPU *cpu = POWERPC_CPU(cs);
-        CPUPPCState *env = &cpu->env;
-        DeviceClass *dc = DEVICE_GET_CLASS(cs);
-        PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
-        int index = ppc_get_vcpu_dt_id(cpu);
-        char *nodename;
-        uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
-                           0xffffffff, 0xffffffff};
-        uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
-        uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
-        uint32_t page_sizes_prop[64];
-        size_t page_sizes_prop_size;
-
-        if ((index % smt) != 0) {
-            continue;
-        }
-
-        nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
-
-        _FDT((fdt_begin_node(fdt, nodename)));
-
-        g_free(nodename);
-
-        _FDT((fdt_property_cell(fdt, "reg", index)));
-        _FDT((fdt_property_string(fdt, "device_type", "cpu")));
-
-        _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR])));
-        _FDT((fdt_property_cell(fdt, "d-cache-block-size",
-                                env->dcache_line_size)));
-        _FDT((fdt_property_cell(fdt, "d-cache-line-size",
-                                env->dcache_line_size)));
-        _FDT((fdt_property_cell(fdt, "i-cache-block-size",
-                                env->icache_line_size)));
-        _FDT((fdt_property_cell(fdt, "i-cache-line-size",
-                                env->icache_line_size)));
-
-        if (pcc->l1_dcache_size) {
-            _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size)));
-        } else {
-            fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
-        }
-        if (pcc->l1_icache_size) {
-            _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size)));
-        } else {
-            fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
-        }
-
-        _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
-        _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
-        _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
-        _FDT((fdt_property_string(fdt, "status", "okay")));
-        _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
-
-        if (env->spr_cb[SPR_PURR].oea_read) {
-            _FDT((fdt_property(fdt, "ibm,purr", NULL, 0)));
-        }
-
-        if (env->mmu_model & POWERPC_MMU_1TSEG) {
-            _FDT((fdt_property(fdt, "ibm,processor-segment-sizes",
-                               segs, sizeof(segs))));
-        }
-
-        /* Advertise VMX/VSX (vector extensions) if available
-         *   0 / no property == no vector extensions
-         *   1               == VMX / Altivec available
-         *   2               == VSX available */
-        if (env->insns_flags & PPC_ALTIVEC) {
-            uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
-
-            _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx)));
-        }
-
-        /* Advertise DFP (Decimal Floating Point) if available
-         *   0 / no property == no DFP
-         *   1               == DFP available */
-        if (env->insns_flags2 & PPC2_DFP) {
-            _FDT((fdt_property_cell(fdt, "ibm,dfp", 1)));
-        }
-
-        page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
-                                                      sizeof(page_sizes_prop));
-        if (page_sizes_prop_size) {
-            _FDT((fdt_property(fdt, "ibm,segment-page-sizes",
-                               page_sizes_prop, page_sizes_prop_size)));
-        }
-
-        _FDT((fdt_property_cell(fdt, "ibm,chip-id",
-                                cs->cpu_index / cpus_per_socket)));
-
-        _FDT((fdt_end_node(fdt)));
-    }
-
-    _FDT((fdt_end_node(fdt)));
-
     /* RTAS */
     _FDT((fdt_begin_node(fdt, "rtas")));
 
@@ -604,7 +495,8 @@
     return fdt;
 }
 
-int spapr_h_cas_compose_response(target_ulong addr, target_ulong size)
+int spapr_h_cas_compose_response(sPAPRMachineState *spapr,
+                                 target_ulong addr, target_ulong size)
 {
     void *fdt, *fdt_skel;
     sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 };
@@ -665,8 +557,9 @@
                       sizeof(associativity))));
 }
 
-static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
+static int spapr_populate_memory(sPAPRMachineState *spapr, void *fdt)
 {
+    MachineState *machine = MACHINE(spapr);
     hwaddr mem_start, node_size;
     int i, nb_nodes = nb_numa_nodes;
     NodeInfo *nodes = numa_info;
@@ -675,7 +568,7 @@
     /* No NUMA nodes, assume there is just one node with whole RAM */
     if (!nb_numa_nodes) {
         nb_nodes = 1;
-        ramnode.node_mem = ram_size;
+        ramnode.node_mem = machine->ram_size;
         nodes = &ramnode;
     }
 
@@ -683,12 +576,12 @@
         if (!nodes[i].node_mem) {
             continue;
         }
-        if (mem_start >= ram_size) {
+        if (mem_start >= machine->ram_size) {
             node_size = 0;
         } else {
             node_size = nodes[i].node_mem;
-            if (node_size > ram_size - mem_start) {
-                node_size = ram_size - mem_start;
+            if (node_size > machine->ram_size - mem_start) {
+                node_size = machine->ram_size - mem_start;
             }
         }
         if (!mem_start) {
@@ -714,7 +607,138 @@
     return 0;
 }
 
-static void spapr_finalize_fdt(sPAPREnvironment *spapr,
+static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
+                                  sPAPRMachineState *spapr)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
+    int index = ppc_get_vcpu_dt_id(cpu);
+    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
+                       0xffffffff, 0xffffffff};
+    uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ;
+    uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
+    uint32_t page_sizes_prop[64];
+    size_t page_sizes_prop_size;
+    QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
+    unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
+    uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
+    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
+
+    _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
+    _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
+
+    _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
+    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
+                           env->dcache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
+                           env->dcache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
+                           env->icache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
+                           env->icache_line_size)));
+
+    if (pcc->l1_dcache_size) {
+        _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
+                               pcc->l1_dcache_size)));
+    } else {
+        fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
+    }
+    if (pcc->l1_icache_size) {
+        _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
+                               pcc->l1_icache_size)));
+    } else {
+        fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
+    }
+
+    _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
+    _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr)));
+    _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
+    _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
+
+    if (env->spr_cb[SPR_PURR].oea_read) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
+    }
+
+    if (env->mmu_model & POWERPC_MMU_1TSEG) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
+                          segs, sizeof(segs))));
+    }
+
+    /* Advertise VMX/VSX (vector extensions) if available
+     *   0 / no property == no vector extensions
+     *   1               == VMX / Altivec available
+     *   2               == VSX available */
+    if (env->insns_flags & PPC_ALTIVEC) {
+        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
+
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx)));
+    }
+
+    /* Advertise DFP (Decimal Floating Point) if available
+     *   0 / no property == no DFP
+     *   1               == DFP available */
+    if (env->insns_flags2 & PPC2_DFP) {
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
+    }
+
+    page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
+                                                  sizeof(page_sizes_prop));
+    if (page_sizes_prop_size) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
+                          page_sizes_prop, page_sizes_prop_size)));
+    }
+
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
+                           cs->cpu_index / cpus_per_socket)));
+
+    _FDT((fdt_setprop(fdt, offset, "ibm,pft-size",
+                      pft_size_prop, sizeof(pft_size_prop))));
+
+    _FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cs));
+
+    _FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu,
+                                ppc_get_compat_smt_threads(cpu)));
+}
+
+static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr)
+{
+    CPUState *cs;
+    int cpus_offset;
+    char *nodename;
+    int smt = kvmppc_smt_threads();
+
+    cpus_offset = fdt_add_subnode(fdt, 0, "cpus");
+    _FDT(cpus_offset);
+    _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1)));
+    _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0)));
+
+    /*
+     * We walk the CPUs in reverse order to ensure that CPU DT nodes
+     * created by fdt_add_subnode() end up in the right order in FDT
+     * for the guest kernel the enumerate the CPUs correctly.
+     */
+    CPU_FOREACH_REVERSE(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+        int index = ppc_get_vcpu_dt_id(cpu);
+        DeviceClass *dc = DEVICE_GET_CLASS(cs);
+        int offset;
+
+        if ((index % smt) != 0) {
+            continue;
+        }
+
+        nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
+        offset = fdt_add_subnode(fdt, cpus_offset, nodename);
+        g_free(nodename);
+        _FDT(offset);
+        spapr_populate_cpu_dt(cs, fdt, offset, spapr);
+    }
+
+}
+
+static void spapr_finalize_fdt(sPAPRMachineState *spapr,
                                hwaddr fdt_addr,
                                hwaddr rtas_addr,
                                hwaddr rtas_size)
@@ -759,11 +783,8 @@
         fprintf(stderr, "Couldn't set up RTAS device tree properties\n");
     }
 
-    /* Advertise NUMA via ibm,associativity */
-    ret = spapr_fixup_cpu_dt(fdt, spapr);
-    if (ret < 0) {
-        fprintf(stderr, "Couldn't finalize CPU device tree properties\n");
-    }
+    /* cpus */
+    spapr_populate_cpus_dt_node(fdt, spapr);
 
     bootlist = get_boot_devices_list(&cb, true);
     if (cb && bootlist) {
@@ -830,7 +851,7 @@
 #define CLEAN_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY))
 #define DIRTY_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) |= tswap64(HPTE64_V_HPTE_DIRTY))
 
-static void spapr_reset_htab(sPAPREnvironment *spapr)
+static void spapr_reset_htab(sPAPRMachineState *spapr)
 {
     long shift;
     int index;
@@ -892,7 +913,7 @@
  * A guest reset will cause spapr->htab_fd to become stale if being used.
  * Reopen the file descriptor to make sure the whole HTAB is properly read.
  */
-static int spapr_check_htab_fd(sPAPREnvironment *spapr)
+static int spapr_check_htab_fd(sPAPRMachineState *spapr)
 {
     int rc = 0;
 
@@ -912,6 +933,7 @@
 
 static void ppc_spapr_reset(void)
 {
+    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     PowerPCCPU *first_ppc_cpu;
     uint32_t rtas_limit;
 
@@ -945,12 +967,13 @@
     first_ppc_cpu->env.gpr[3] = spapr->fdt_addr;
     first_ppc_cpu->env.gpr[5] = 0;
     first_cpu->halted = 0;
-    first_ppc_cpu->env.nip = spapr->entry_point;
+    first_ppc_cpu->env.nip = SPAPR_ENTRY_POINT;
 
 }
 
 static void spapr_cpu_reset(void *opaque)
 {
+    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     PowerPCCPU *cpu = opaque;
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
@@ -979,12 +1002,12 @@
      * We have 8 hpte per group, and each hpte is 16 bytes.
      * ie have 128 bytes per hpte entry.
      */
-    env->htab_mask = (1ULL << ((spapr)->htab_shift - 7)) - 1;
+    env->htab_mask = (1ULL << (spapr->htab_shift - 7)) - 1;
     env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab |
         (spapr->htab_shift - 18);
 }
 
-static void spapr_create_nvram(sPAPREnvironment *spapr)
+static void spapr_create_nvram(sPAPRMachineState *spapr)
 {
     DeviceState *dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram");
     DriveInfo *dinfo = drive_get(IF_PFLASH, 0, 0);
@@ -998,7 +1021,7 @@
     spapr->nvram = (struct sPAPRNVRAM *)dev;
 }
 
-static void spapr_rtc_create(sPAPREnvironment *spapr)
+static void spapr_rtc_create(sPAPRMachineState *spapr)
 {
     DeviceState *dev = qdev_create(NULL, TYPE_SPAPR_RTC);
 
@@ -1028,7 +1051,7 @@
 
 static int spapr_post_load(void *opaque, int version_id)
 {
-    sPAPREnvironment *spapr = (sPAPREnvironment *)opaque;
+    sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
     int err = 0;
 
     /* In earlier versions, there was no separate qdev for the PAPR
@@ -1057,16 +1080,16 @@
         VMSTATE_UNUSED_BUFFER(version_before_3, 0, 4),
 
         /* RTC offset */
-        VMSTATE_UINT64_TEST(rtc_offset, sPAPREnvironment, version_before_3),
+        VMSTATE_UINT64_TEST(rtc_offset, sPAPRMachineState, version_before_3),
 
-        VMSTATE_PPC_TIMEBASE_V(tb, sPAPREnvironment, 2),
+        VMSTATE_PPC_TIMEBASE_V(tb, sPAPRMachineState, 2),
         VMSTATE_END_OF_LIST()
     },
 };
 
 static int htab_save_setup(QEMUFile *f, void *opaque)
 {
-    sPAPREnvironment *spapr = opaque;
+    sPAPRMachineState *spapr = opaque;
 
     /* "Iteration" header */
     qemu_put_be32(f, spapr->htab_shift);
@@ -1090,7 +1113,7 @@
     return 0;
 }
 
-static void htab_save_first_pass(QEMUFile *f, sPAPREnvironment *spapr,
+static void htab_save_first_pass(QEMUFile *f, sPAPRMachineState *spapr,
                                  int64_t max_ns)
 {
     int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
@@ -1140,7 +1163,7 @@
     spapr->htab_save_index = index;
 }
 
-static int htab_save_later_pass(QEMUFile *f, sPAPREnvironment *spapr,
+static int htab_save_later_pass(QEMUFile *f, sPAPRMachineState *spapr,
                                 int64_t max_ns)
 {
     bool final = max_ns < 0;
@@ -1222,7 +1245,7 @@
 
 static int htab_save_iterate(QEMUFile *f, void *opaque)
 {
-    sPAPREnvironment *spapr = opaque;
+    sPAPRMachineState *spapr = opaque;
     int rc = 0;
 
     /* Iteration header */
@@ -1257,7 +1280,7 @@
 
 static int htab_save_complete(QEMUFile *f, void *opaque)
 {
-    sPAPREnvironment *spapr = opaque;
+    sPAPRMachineState *spapr = opaque;
 
     /* Iteration header */
     qemu_put_be32(f, 0);
@@ -1292,7 +1315,7 @@
 
 static int htab_load(QEMUFile *f, void *opaque, int version_id)
 {
-    sPAPREnvironment *spapr = opaque;
+    sPAPRMachineState *spapr = opaque;
     uint32_t section_hdr;
     int fd = -1;
 
@@ -1386,16 +1409,42 @@
     machine->boot_order = g_strdup(boot_device);
 }
 
+static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu)
+{
+    CPUPPCState *env = &cpu->env;
+
+    /* Set time-base frequency to 512 MHz */
+    cpu_ppc_tb_init(env, TIMEBASE_FREQ);
+
+    /* PAPR always has exception vectors in RAM not ROM. To ensure this,
+     * MSR[IP] should never be set.
+     */
+    env->msr_mask &= ~(1 << 6);
+
+    /* Tell KVM that we're in PAPR mode */
+    if (kvm_enabled()) {
+        kvmppc_set_papr(cpu);
+    }
+
+    if (cpu->max_compat) {
+        if (ppc_set_compat(cpu, cpu->max_compat) < 0) {
+            exit(1);
+        }
+    }
+
+    xics_cpu_setup(spapr->icp, cpu);
+
+    qemu_register_reset(spapr_cpu_reset, cpu);
+}
+
 /* pSeries LPAR / sPAPR hardware init */
 static void ppc_spapr_init(MachineState *machine)
 {
-    ram_addr_t ram_size = machine->ram_size;
-    const char *cpu_model = machine->cpu_model;
+    sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
     const char *kernel_filename = machine->kernel_filename;
     const char *kernel_cmdline = machine->kernel_cmdline;
     const char *initrd_filename = machine->initrd_filename;
     PowerPCCPU *cpu;
-    CPUPPCState *env;
     PCIHostState *phb;
     int i;
     MemoryRegion *sysmem = get_system_memory();
@@ -1412,7 +1461,6 @@
 
     msi_supported = true;
 
-    spapr = g_malloc0(sizeof(*spapr));
     QLIST_INIT(&spapr->phbs);
 
     cpu_ppc_hypercall = emulate_spapr_hypercall;
@@ -1459,7 +1507,7 @@
      * more than needed for the Linux guests we support. */
     spapr->htab_shift = 18; /* Minimum architected size */
     while (spapr->htab_shift <= 46) {
-        if ((1ULL << (spapr->htab_shift + 7)) >= ram_size) {
+        if ((1ULL << (spapr->htab_shift + 7)) >= machine->ram_size) {
             break;
         }
         spapr->htab_shift++;
@@ -1467,43 +1515,21 @@
 
     /* Set up Interrupt Controller before we create the VCPUs */
     spapr->icp = xics_system_init(machine,
-                                  smp_cpus * kvmppc_smt_threads() / smp_threads,
+                                  DIV_ROUND_UP(max_cpus * kvmppc_smt_threads(),
+                                               smp_threads),
                                   XICS_IRQS);
 
     /* init CPUs */
-    if (cpu_model == NULL) {
-        cpu_model = kvm_enabled() ? "host" : "POWER7";
+    if (machine->cpu_model == NULL) {
+        machine->cpu_model = kvm_enabled() ? "host" : "POWER7";
     }
     for (i = 0; i < smp_cpus; i++) {
-        cpu = cpu_ppc_init(cpu_model);
+        cpu = cpu_ppc_init(machine->cpu_model);
         if (cpu == NULL) {
             fprintf(stderr, "Unable to find PowerPC CPU definition\n");
             exit(1);
         }
-        env = &cpu->env;
-
-        /* Set time-base frequency to 512 MHz */
-        cpu_ppc_tb_init(env, TIMEBASE_FREQ);
-
-        /* PAPR always has exception vectors in RAM not ROM. To ensure this,
-         * MSR[IP] should never be set.
-         */
-        env->msr_mask &= ~(1 << 6);
-
-        /* Tell KVM that we're in PAPR mode */
-        if (kvm_enabled()) {
-            kvmppc_set_papr(cpu);
-        }
-
-        if (cpu->max_compat) {
-            if (ppc_set_compat(cpu, cpu->max_compat) < 0) {
-                exit(1);
-            }
-        }
-
-        xics_cpu_setup(spapr->icp, cpu);
-
-        qemu_register_reset(spapr_cpu_reset, cpu);
+        spapr_cpu_init(spapr, cpu);
     }
 
     if (kvm_enabled()) {
@@ -1512,9 +1538,8 @@
     }
 
     /* allocate RAM */
-    spapr->ram_limit = ram_size;
     memory_region_allocate_system_memory(ram, NULL, "ppc_spapr.ram",
-                                         spapr->ram_limit);
+                                         machine->ram_size);
     memory_region_add_subregion(sysmem, 0, ram);
 
     if (rma_alloc_size && rma) {
@@ -1658,8 +1683,9 @@
     }
     g_free(filename);
 
-    spapr->entry_point = 0x100;
-
+    /* FIXME: Should register things through the MachineState's qdev
+     * interface, this is a legacy from the sPAPREnvironment structure
+     * which predated MachineState but had a similar function */
     vmstate_register(NULL, 0, &vmstate_spapr, spapr);
     register_savevm_live(NULL, "spapr/htab", -1, 1,
                          &savevm_htab_handlers, spapr);
@@ -1755,17 +1781,17 @@
 
 static char *spapr_get_kvm_type(Object *obj, Error **errp)
 {
-    sPAPRMachineState *sm = SPAPR_MACHINE(obj);
+    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
 
-    return g_strdup(sm->kvm_type);
+    return g_strdup(spapr->kvm_type);
 }
 
 static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp)
 {
-    sPAPRMachineState *sm = SPAPR_MACHINE(obj);
+    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
 
-    g_free(sm->kvm_type);
-    sm->kvm_type = g_strdup(value);
+    g_free(spapr->kvm_type);
+    spapr->kvm_type = g_strdup(value);
 }
 
 static void spapr_machine_initfn(Object *obj)
@@ -1820,6 +1846,7 @@
     .abstract      = true,
     .instance_size = sizeof(sPAPRMachineState),
     .instance_init = spapr_machine_initfn,
+    .class_size    = sizeof(sPAPRMachineClass),
     .class_init    = spapr_machine_class_init,
     .interfaces = (InterfaceInfo[]) {
         { TYPE_FW_PATH_PROVIDER },
@@ -1851,6 +1878,8 @@
 
 static void spapr_compat_2_3(Object *obj)
 {
+    savevm_skip_section_footers();
+    global_state_set_optional();
 }
 
 static void spapr_compat_2_2(Object *obj)
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index fda9e35..f626eb7 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -238,6 +238,7 @@
 
 static void rtas_event_log_queue(int log_type, void *data, bool exception)
 {
+    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     sPAPREventLogEntry *entry = g_new(sPAPREventLogEntry, 1);
 
     g_assert(data);
@@ -250,6 +251,7 @@
 static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask,
                                                   bool exception)
 {
+    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     sPAPREventLogEntry *entry = NULL;
 
     /* we only queue EPOW events atm. */
@@ -278,6 +280,7 @@
 
 static bool rtas_event_log_contains(uint32_t event_mask, bool exception)
 {
+    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     sPAPREventLogEntry *entry = NULL;
 
     /* we only queue EPOW events atm. */
@@ -314,6 +317,7 @@
 static void spapr_init_maina(struct rtas_event_log_v6_maina *maina,
                              int section_count)
 {
+    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     struct tm tm;
     int year;
 
@@ -336,7 +340,7 @@
 
 static void spapr_powerdown_req(Notifier *n, void *opaque)
 {
-    sPAPREnvironment *spapr = container_of(n, sPAPREnvironment, epow_notifier);
+    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     struct rtas_error_log *hdr;
     struct rtas_event_log_v6 *v6hdr;
     struct rtas_event_log_v6_maina *maina;
@@ -384,6 +388,7 @@
 
 static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action)
 {
+    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     struct hp_log_full *new_hp;
     struct rtas_error_log *hdr;
     struct rtas_event_log_v6 *v6hdr;
@@ -453,7 +458,7 @@
     spapr_hotplug_req_event(drc, RTAS_LOG_V6_HP_ACTION_REMOVE);
 }
 
-static void check_exception(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                             uint32_t token, uint32_t nargs,
                             target_ulong args,
                             uint32_t nret, target_ulong rets)
@@ -508,7 +513,7 @@
     rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND);
 }
 
-static void event_scan(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void event_scan(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                        uint32_t token, uint32_t nargs,
                        target_ulong args,
                        uint32_t nret, target_ulong rets)
@@ -548,7 +553,7 @@
     rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND);
 }
 
-void spapr_events_init(sPAPREnvironment *spapr)
+void spapr_events_init(sPAPRMachineState *spapr)
 {
     QTAILQ_INIT(&spapr->pending_events);
     spapr->check_exception_irq = xics_alloc(spapr->icp, 0, 0, false);
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 4f76f1c..652ddf6 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -84,9 +84,10 @@
     return true;
 }
 
-static target_ulong h_enter(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                             target_ulong opcode, target_ulong *args)
 {
+    MachineState *machine = MACHINE(spapr);
     CPUPPCState *env = &cpu->env;
     target_ulong flags = args[0];
     target_ulong pte_index = args[1];
@@ -118,7 +119,7 @@
 
     raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << page_shift) - 1);
 
-    if (raddr < spapr->ram_limit) {
+    if (raddr < machine->ram_size) {
         /* Regular RAM - should have WIMG=0010 */
         if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) {
             return H_PARAMETER;
@@ -205,7 +206,7 @@
     return REMOVE_SUCCESS;
 }
 
-static target_ulong h_remove(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                              target_ulong opcode, target_ulong *args)
 {
     CPUPPCState *env = &cpu->env;
@@ -252,7 +253,7 @@
 
 #define H_BULK_REMOVE_MAX_BATCH        4
 
-static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                   target_ulong opcode, target_ulong *args)
 {
     CPUPPCState *env = &cpu->env;
@@ -299,7 +300,7 @@
     return H_SUCCESS;
 }
 
-static target_ulong h_protect(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                               target_ulong opcode, target_ulong *args)
 {
     CPUPPCState *env = &cpu->env;
@@ -337,7 +338,7 @@
     return H_SUCCESS;
 }
 
-static target_ulong h_read(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                            target_ulong opcode, target_ulong *args)
 {
     CPUPPCState *env = &cpu->env;
@@ -367,7 +368,7 @@
     return H_SUCCESS;
 }
 
-static target_ulong h_set_dabr(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_set_dabr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                target_ulong opcode, target_ulong *args)
 {
     /* FIXME: actually implement this */
@@ -506,7 +507,7 @@
     return H_SUCCESS;
 }
 
-static target_ulong h_register_vpa(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_register_vpa(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                    target_ulong opcode, target_ulong *args)
 {
     target_ulong flags = args[0];
@@ -551,7 +552,7 @@
     return ret;
 }
 
-static target_ulong h_cede(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_cede(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                            target_ulong opcode, target_ulong *args)
 {
     CPUPPCState *env = &cpu->env;
@@ -567,7 +568,7 @@
     return H_SUCCESS;
 }
 
-static target_ulong h_rtas(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_rtas(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                            target_ulong opcode, target_ulong *args)
 {
     target_ulong rtas_r3 = args[0];
@@ -579,7 +580,7 @@
                            nret, rtas_r3 + 12 + 4*nargs);
 }
 
-static target_ulong h_logical_load(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_logical_load(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                    target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
@@ -603,7 +604,7 @@
     return H_PARAMETER;
 }
 
-static target_ulong h_logical_store(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_logical_store(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                     target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
@@ -629,7 +630,7 @@
     return H_PARAMETER;
 }
 
-static target_ulong h_logical_memop(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_logical_memop(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                     target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
@@ -698,14 +699,14 @@
     return H_SUCCESS;
 }
 
-static target_ulong h_logical_icbi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_logical_icbi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                    target_ulong opcode, target_ulong *args)
 {
     /* Nothing to do on emulation, KVM will trap this in the kernel */
     return H_SUCCESS;
 }
 
-static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                    target_ulong opcode, target_ulong *args)
 {
     /* Nothing to do on emulation, KVM will trap this in the kernel */
@@ -788,7 +789,7 @@
     return H_SUCCESS;
 }
 
-static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                target_ulong opcode, target_ulong *args)
 {
     target_ulong resource = args[1];
@@ -828,7 +829,7 @@
     ((cpuver) == CPU_POWERPC_LOGICAL_2_07) ? 2070 : 0)
 
 static target_ulong h_client_architecture_support(PowerPCCPU *cpu_,
-                                                  sPAPREnvironment *spapr,
+                                                  sPAPRMachineState *spapr,
                                                   target_ulong opcode,
                                                   target_ulong *args)
 {
@@ -921,7 +922,7 @@
         return H_SUCCESS;
     }
 
-    if (spapr_h_cas_compose_response(args[1], args[2])) {
+    if (spapr_h_cas_compose_response(spapr, args[1], args[2])) {
         qemu_system_reset_request();
     }
 
@@ -952,6 +953,8 @@
 target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
                              target_ulong *args)
 {
+    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+
     if ((opcode <= MAX_HCALL_OPCODE)
         && ((opcode & 0x3) == 0)) {
         spapr_hcall_fn fn = papr_hypercall_table[opcode / 4];
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 8cd9dba..f61504e 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -60,6 +60,20 @@
     return NULL;
 }
 
+static IOMMUAccessFlags spapr_tce_iommu_access_flags(uint64_t tce)
+{
+    switch (tce & SPAPR_TCE_RW) {
+    case SPAPR_TCE_FAULT:
+        return IOMMU_NONE;
+    case SPAPR_TCE_RO:
+        return IOMMU_RO;
+    case SPAPR_TCE_WO:
+        return IOMMU_WO;
+    default: /* SPAPR_TCE_RW */
+        return IOMMU_RW;
+    }
+}
+
 /* Called from RCU critical section */
 static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr,
                                                bool is_write)
@@ -82,7 +96,7 @@
         ret.iova = addr & page_mask;
         ret.translated_addr = tce & page_mask;
         ret.addr_mask = ~page_mask;
-        ret.perm = tce & IOMMU_RW;
+        ret.perm = spapr_tce_iommu_access_flags(tce);
     }
     trace_spapr_iommu_xlate(tcet->liobn, addr, ret.iova, ret.perm,
                             ret.addr_mask);
@@ -233,14 +247,14 @@
     entry.iova = ioba & page_mask;
     entry.translated_addr = tce & page_mask;
     entry.addr_mask = ~page_mask;
-    entry.perm = tce & IOMMU_RW;
+    entry.perm = spapr_tce_iommu_access_flags(tce);
     memory_region_notify_iommu(&tcet->iommu, entry);
 
     return H_SUCCESS;
 }
 
 static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
-                                       sPAPREnvironment *spapr,
+                                       sPAPRMachineState *spapr,
                                        target_ulong opcode, target_ulong *args)
 {
     int i;
@@ -267,9 +281,7 @@
     ioba &= page_mask;
 
     for (i = 0; i < npages; ++i, ioba += page_size) {
-        target_ulong off = (tce_list & ~SPAPR_TCE_RW) +
-                                i * sizeof(target_ulong);
-        tce = ldq_be_phys(cs->as, off);
+        tce = ldq_be_phys(cs->as, tce_list + i * sizeof(target_ulong));
 
         ret = put_tce_emu(tcet, ioba, tce);
         if (ret) {
@@ -287,7 +299,7 @@
     return ret;
 }
 
-static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                               target_ulong opcode, target_ulong *args)
 {
     int i;
@@ -326,7 +338,7 @@
     return ret;
 }
 
-static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                               target_ulong opcode, target_ulong *args)
 {
     target_ulong liobn = args[0];
@@ -367,7 +379,7 @@
     return H_SUCCESS;
 }
 
-static target_ulong h_get_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_get_tce(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                               target_ulong opcode, target_ulong *args)
 {
     target_ulong liobn = args[0];
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index d4a6150..a8f79d8 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -23,6 +23,7 @@
  * THE SOFTWARE.
  */
 #include "hw/hw.h"
+#include "hw/sysbus.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/msi.h"
 #include "hw/pci/msix.h"
@@ -35,6 +36,7 @@
 #include "qemu/error-report.h"
 #include "qapi/qmp/qerror.h"
 
+#include "hw/pci/pci_bridge.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/ppc/spapr_drc.h"
 #include "sysemu/device_tree.h"
@@ -50,6 +52,8 @@
 #define RTAS_TYPE_MSI           1
 #define RTAS_TYPE_MSIX          2
 
+#define FDT_NAME_MAX          128
+
 #define _FDT(exp) \
     do { \
         int ret = (exp);                                           \
@@ -58,7 +62,7 @@
         }                                                          \
     } while (0)
 
-sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid)
+sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid)
 {
     sPAPRPHBState *sphb;
 
@@ -72,7 +76,7 @@
     return NULL;
 }
 
-PCIDevice *spapr_pci_find_dev(sPAPREnvironment *spapr, uint64_t buid,
+PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid,
                               uint32_t config_addr)
 {
     sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
@@ -93,7 +97,7 @@
     return ((arg >> 20) & 0xf00) | (arg & 0xff);
 }
 
-static void finish_read_pci_config(sPAPREnvironment *spapr, uint64_t buid,
+static void finish_read_pci_config(sPAPRMachineState *spapr, uint64_t buid,
                                    uint32_t addr, uint32_t size,
                                    target_ulong rets)
 {
@@ -123,7 +127,7 @@
     rtas_st(rets, 1, val);
 }
 
-static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                      uint32_t token, uint32_t nargs,
                                      target_ulong args,
                                      uint32_t nret, target_ulong rets)
@@ -143,7 +147,7 @@
     finish_read_pci_config(spapr, buid, addr, size, rets);
 }
 
-static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                  uint32_t token, uint32_t nargs,
                                  target_ulong args,
                                  uint32_t nret, target_ulong rets)
@@ -161,7 +165,7 @@
     finish_read_pci_config(spapr, 0, addr, size, rets);
 }
 
-static void finish_write_pci_config(sPAPREnvironment *spapr, uint64_t buid,
+static void finish_write_pci_config(sPAPRMachineState *spapr, uint64_t buid,
                                     uint32_t addr, uint32_t size,
                                     uint32_t val, target_ulong rets)
 {
@@ -189,7 +193,7 @@
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 }
 
-static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                       uint32_t token, uint32_t nargs,
                                       target_ulong args,
                                       uint32_t nret, target_ulong rets)
@@ -210,7 +214,7 @@
     finish_write_pci_config(spapr, buid, addr, size, val, rets);
 }
 
-static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                   uint32_t token, uint32_t nargs,
                                   target_ulong args,
                                   uint32_t nret, target_ulong rets)
@@ -259,7 +263,7 @@
     }
 }
 
-static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                 uint32_t token, uint32_t nargs,
                                 target_ulong args, uint32_t nret,
                                 target_ulong rets)
@@ -377,7 +381,7 @@
 }
 
 static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
-                                                   sPAPREnvironment *spapr,
+                                                   sPAPRMachineState *spapr,
                                                    uint32_t token,
                                                    uint32_t nargs,
                                                    target_ulong args,
@@ -418,13 +422,14 @@
 }
 
 static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
-                                    sPAPREnvironment *spapr,
+                                    sPAPRMachineState *spapr,
                                     uint32_t token, uint32_t nargs,
                                     target_ulong args, uint32_t nret,
                                     target_ulong rets)
 {
     sPAPRPHBState *sphb;
     sPAPRPHBClass *spc;
+    PCIDevice *pdev;
     uint32_t addr, option;
     uint64_t buid;
     int ret;
@@ -442,6 +447,12 @@
         goto param_error_exit;
     }
 
+    pdev = pci_find_device(PCI_HOST_BRIDGE(sphb)->bus,
+                           (addr >> 16) & 0xFF, (addr >> 8) & 0xFF);
+    if (!pdev || !object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
+        goto param_error_exit;
+    }
+
     spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
     if (!spc->eeh_set_option) {
         goto param_error_exit;
@@ -456,7 +467,7 @@
 }
 
 static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu,
-                                           sPAPREnvironment *spapr,
+                                           sPAPRMachineState *spapr,
                                            uint32_t token, uint32_t nargs,
                                            target_ulong args, uint32_t nret,
                                            target_ulong rets)
@@ -512,7 +523,7 @@
 }
 
 static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu,
-                                            sPAPREnvironment *spapr,
+                                            sPAPRMachineState *spapr,
                                             uint32_t token, uint32_t nargs,
                                             target_ulong args, uint32_t nret,
                                             target_ulong rets)
@@ -556,7 +567,7 @@
 }
 
 static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu,
-                                    sPAPREnvironment *spapr,
+                                    sPAPRMachineState *spapr,
                                     uint32_t token, uint32_t nargs,
                                     target_ulong args, uint32_t nret,
                                     target_ulong rets)
@@ -592,7 +603,7 @@
 }
 
 static void rtas_ibm_configure_pe(PowerPCCPU *cpu,
-                                  sPAPREnvironment *spapr,
+                                  sPAPRMachineState *spapr,
                                   uint32_t token, uint32_t nargs,
                                   target_ulong args, uint32_t nret,
                                   target_ulong rets)
@@ -627,7 +638,7 @@
 
 /* To support it later */
 static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu,
-                                       sPAPREnvironment *spapr,
+                                       sPAPRMachineState *spapr,
                                        uint32_t token, uint32_t nargs,
                                        target_ulong args, uint32_t nret,
                                        target_ulong rets)
@@ -718,6 +729,7 @@
 static void spapr_msi_write(void *opaque, hwaddr addr,
                             uint64_t data, unsigned size)
 {
+    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     uint32_t irq = data;
 
     trace_spapr_pci_msi_write(addr, data, irq);
@@ -742,6 +754,60 @@
     return &phb->iommu_as;
 }
 
+static char *spapr_phb_vfio_get_loc_code(sPAPRPHBState *sphb,  PCIDevice *pdev)
+{
+    char *path = NULL, *buf = NULL, *host = NULL;
+
+    /* Get the PCI VFIO host id */
+    host = object_property_get_str(OBJECT(pdev), "host", NULL);
+    if (!host) {
+        goto err_out;
+    }
+
+    /* Construct the path of the file that will give us the DT location */
+    path = g_strdup_printf("/sys/bus/pci/devices/%s/devspec", host);
+    g_free(host);
+    if (!path || !g_file_get_contents(path, &buf, NULL, NULL)) {
+        goto err_out;
+    }
+    g_free(path);
+
+    /* Construct and read from host device tree the loc-code */
+    path = g_strdup_printf("/proc/device-tree%s/ibm,loc-code", buf);
+    g_free(buf);
+    if (!path || !g_file_get_contents(path, &buf, NULL, NULL)) {
+        goto err_out;
+    }
+    return buf;
+
+err_out:
+    g_free(path);
+    return NULL;
+}
+
+static char *spapr_phb_get_loc_code(sPAPRPHBState *sphb, PCIDevice *pdev)
+{
+    char *buf;
+    const char *devtype = "qemu";
+    uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
+
+    if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
+        buf = spapr_phb_vfio_get_loc_code(sphb, pdev);
+        if (buf) {
+            return buf;
+        }
+        devtype = "vfio";
+    }
+    /*
+     * For emulated devices and VFIO-failure case, make up
+     * the loc-code.
+     */
+    buf = g_strdup_printf("%s_%s:%04x:%02x:%02x.%x",
+                          devtype, pdev->name, sphb->index, busnr,
+                          PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+    return buf;
+}
+
 /* Macros to operate with address in OF binding to PCI */
 #define b_x(x, p, l)    (((x) & ((1<<(l))-1)) << (p))
 #define b_n(x)          b_x((x), 31, 1) /* 0 if relocatable */
@@ -786,7 +852,13 @@
  * phys.hi = 0xYYXXXXZZ, where:
  *   0xYY = npt000ss
  *          |||   |
- *          |||   +-- space code: 1 if IO region, 2 if MEM region
+ *          |||   +-- space code
+ *          |||               |
+ *          |||               +  00 if configuration space
+ *          |||               +  01 if IO region,
+ *          |||               +  10 if 32-bit MEM region
+ *          |||               +  11 if 64-bit MEM region
+ *          |||
  *          ||+------ for non-relocatable IO: 1 if aliased
  *          ||        for relocatable IO: 1 if below 64KB
  *          ||        for MEM: 1 if below 1MB
@@ -846,6 +918,8 @@
         reg->phys_hi = cpu_to_be32(dev_id | b_rrrrrrrr(pci_bar(d, i)));
         if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) {
             reg->phys_hi |= cpu_to_be32(b_ss(1));
+        } else if (d->io_regions[i].type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+            reg->phys_hi |= cpu_to_be32(b_ss(3));
         } else {
             reg->phys_hi |= cpu_to_be32(b_ss(2));
         }
@@ -870,13 +944,17 @@
     rp->assigned_len = assigned_idx * sizeof(ResourceFields);
 }
 
+static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb,
+                                            PCIDevice *pdev);
+
 static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
-                                       int phb_index, int drc_index,
-                                       const char *drc_name)
+                                       sPAPRPHBState *sphb)
 {
     ResourceProps rp;
     bool is_bridge = false;
-    int pci_status;
+    int pci_status, err;
+    char *buf = NULL;
+    uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev);
 
     if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) ==
         PCI_HEADER_TYPE_BRIDGE) {
@@ -891,8 +969,7 @@
     _FDT(fdt_setprop_cell(fdt, offset, "revision-id",
                           pci_default_read_config(dev, PCI_REVISION_ID, 1)));
     _FDT(fdt_setprop_cell(fdt, offset, "class-code",
-                          pci_default_read_config(dev, PCI_CLASS_DEVICE, 2)
-                            << 8));
+                          pci_default_read_config(dev, PCI_CLASS_PROG, 3)));
     if (pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)) {
         _FDT(fdt_setprop_cell(fdt, offset, "interrupts",
                  pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)));
@@ -938,8 +1015,21 @@
      * processed by OF beforehand
      */
     _FDT(fdt_setprop_string(fdt, offset, "name", "pci"));
-    _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", drc_name, strlen(drc_name)));
-    _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index));
+    buf = spapr_phb_get_loc_code(sphb, dev);
+    if (!buf) {
+        error_report("Failed setting the ibm,loc-code");
+        return -1;
+    }
+
+    err = fdt_setprop_string(fdt, offset, "ibm,loc-code", buf);
+    g_free(buf);
+    if (err < 0) {
+        return err;
+    }
+
+    if (drc_index) {
+        _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index));
+    }
 
     _FDT(fdt_setprop_cell(fdt, offset, "#address-cells",
                           RESOURCE_CELLS_ADDRESS));
@@ -957,29 +1047,27 @@
 }
 
 /* create OF node for pci device and required OF DT properties */
-static void *spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
-                                       int drc_index, const char *drc_name,
-                                       int *dt_offset)
+static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
+                                     void *fdt, int node_offset)
 {
-    void *fdt;
-    int offset, ret, fdt_size;
+    int offset, ret;
     int slot = PCI_SLOT(dev->devfn);
     int func = PCI_FUNC(dev->devfn);
-    char nodename[512];
+    char nodename[FDT_NAME_MAX];
 
-    fdt = create_device_tree(&fdt_size);
     if (func != 0) {
-        sprintf(nodename, "pci@%d,%d", slot, func);
+        snprintf(nodename, FDT_NAME_MAX, "pci@%x,%x", slot, func);
     } else {
-        sprintf(nodename, "pci@%d", slot);
+        snprintf(nodename, FDT_NAME_MAX, "pci@%x", slot);
     }
-    offset = fdt_add_subnode(fdt, 0, nodename);
-    ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb->index, drc_index,
-                                      drc_name);
-    g_assert(!ret);
+    offset = fdt_add_subnode(fdt, node_offset, nodename);
+    ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb);
 
-    *dt_offset = offset;
-    return fdt;
+    g_assert(!ret);
+    if (ret) {
+        return 0;
+    }
+    return offset;
 }
 
 static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
@@ -989,22 +1077,21 @@
 {
     sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
     DeviceState *dev = DEVICE(pdev);
-    int drc_index = drck->get_index(drc);
-    const char *drc_name = drck->get_name(drc);
     void *fdt = NULL;
-    int fdt_start_offset = 0;
+    int fdt_start_offset = 0, fdt_size;
 
-    /* boot-time devices get their device tree node created by SLOF, but for
-     * hotplugged devices we need QEMU to generate it so the guest can fetch
-     * it via RTAS
-     */
     if (dev->hotplugged) {
-        fdt = spapr_create_pci_child_dt(phb, pdev, drc_index, drc_name,
-                                        &fdt_start_offset);
+        fdt = create_device_tree(&fdt_size);
+        fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0);
+        if (!fdt_start_offset) {
+            error_setg(errp, "Failed to create pci child device tree node");
+            goto out;
+        }
     }
 
     drck->attach(drc, DEVICE(pdev),
                  fdt, fdt_start_offset, !dev->hotplugged, errp);
+out:
     if (*errp) {
         g_free(fdt);
     }
@@ -1046,6 +1133,20 @@
                                     pdev->devfn);
 }
 
+static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb,
+                                            PCIDevice *pdev)
+{
+    sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev);
+    sPAPRDRConnectorClass *drck;
+
+    if (!drc) {
+        return 0;
+    }
+
+    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    return drck->get_index(drc);
+}
+
 static void spapr_phb_hot_plug_child(HotplugHandler *plug_handler,
                                      DeviceState *plugged_dev, Error **errp)
 {
@@ -1110,6 +1211,7 @@
 
 static void spapr_phb_realize(DeviceState *dev, Error **errp)
 {
+    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     SysBusDevice *s = SYS_BUS_DEVICE(dev);
     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
     PCIHostState *phb = PCI_HOST_BRIDGE(s);
@@ -1351,34 +1453,28 @@
     },
 };
 
-static void spapr_pci_fill_msi_devs(gpointer key, gpointer value,
-                                    gpointer opaque)
-{
-    sPAPRPHBState *sphb = opaque;
-
-    sphb->msi_devs[sphb->msi_devs_num].key = *(uint32_t *)key;
-    sphb->msi_devs[sphb->msi_devs_num].value = *(spapr_pci_msi *)value;
-    sphb->msi_devs_num++;
-}
-
 static void spapr_pci_pre_save(void *opaque)
 {
     sPAPRPHBState *sphb = opaque;
-    int msi_devs_num;
+    GHashTableIter iter;
+    gpointer key, value;
+    int i;
 
     if (sphb->msi_devs) {
         g_free(sphb->msi_devs);
         sphb->msi_devs = NULL;
     }
-    sphb->msi_devs_num = 0;
-    msi_devs_num = g_hash_table_size(sphb->msi);
-    if (!msi_devs_num) {
+    sphb->msi_devs_num = g_hash_table_size(sphb->msi);
+    if (!sphb->msi_devs_num) {
         return;
     }
-    sphb->msi_devs = g_malloc(msi_devs_num * sizeof(spapr_pci_msi_mig));
+    sphb->msi_devs = g_malloc(sphb->msi_devs_num * sizeof(spapr_pci_msi_mig));
 
-    g_hash_table_foreach(sphb->msi, spapr_pci_fill_msi_devs, sphb);
-    assert(sphb->msi_devs_num == msi_devs_num);
+    g_hash_table_iter_init(&iter, sphb->msi);
+    for (i = 0; g_hash_table_iter_next(&iter, &key, &value); ++i) {
+        sphb->msi_devs[i].key = *(uint32_t *) key;
+        sphb->msi_devs[i].value = *(spapr_pci_msi *) value;
+    }
 }
 
 static int spapr_pci_post_load(void *opaque, int version_id)
@@ -1464,7 +1560,7 @@
     }
 };
 
-PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index)
+PCIHostState *spapr_create_phb(sPAPRMachineState *spapr, int index)
 {
     DeviceState *dev;
 
@@ -1475,12 +1571,90 @@
     return PCI_HOST_BRIDGE(dev);
 }
 
+typedef struct sPAPRFDT {
+    void *fdt;
+    int node_off;
+    sPAPRPHBState *sphb;
+} sPAPRFDT;
+
+static void spapr_populate_pci_devices_dt(PCIBus *bus, PCIDevice *pdev,
+                                          void *opaque)
+{
+    PCIBus *sec_bus;
+    sPAPRFDT *p = opaque;
+    int offset;
+    sPAPRFDT s_fdt;
+
+    offset = spapr_create_pci_child_dt(p->sphb, pdev, p->fdt, p->node_off);
+    if (!offset) {
+        error_report("Failed to create pci child device tree node");
+        return;
+    }
+
+    if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
+         PCI_HEADER_TYPE_BRIDGE)) {
+        return;
+    }
+
+    sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
+    if (!sec_bus) {
+        return;
+    }
+
+    s_fdt.fdt = p->fdt;
+    s_fdt.node_off = offset;
+    s_fdt.sphb = p->sphb;
+    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
+                        spapr_populate_pci_devices_dt,
+                        &s_fdt);
+}
+
+static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
+                                           void *opaque)
+{
+    unsigned int *bus_no = opaque;
+    unsigned int primary = *bus_no;
+    unsigned int subordinate = 0xff;
+    PCIBus *sec_bus = NULL;
+
+    if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
+         PCI_HEADER_TYPE_BRIDGE)) {
+        return;
+    }
+
+    (*bus_no)++;
+    pci_default_write_config(pdev, PCI_PRIMARY_BUS, primary, 1);
+    pci_default_write_config(pdev, PCI_SECONDARY_BUS, *bus_no, 1);
+    pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
+
+    sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
+    if (!sec_bus) {
+        return;
+    }
+
+    pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, subordinate, 1);
+    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
+                        spapr_phb_pci_enumerate_bridge, bus_no);
+    pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
+}
+
+static void spapr_phb_pci_enumerate(sPAPRPHBState *phb)
+{
+    PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
+    unsigned int bus_no = 0;
+
+    pci_for_each_device(bus, pci_bus_num(bus),
+                        spapr_phb_pci_enumerate_bridge,
+                        &bus_no);
+
+}
+
 int spapr_populate_pci_dt(sPAPRPHBState *phb,
                           uint32_t xics_phandle,
                           void *fdt)
 {
     int bus_off, i, j, ret;
-    char nodename[256];
+    char nodename[FDT_NAME_MAX];
     uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
     const uint64_t mmiosize = memory_region_size(&phb->memwindow);
     const uint64_t w32max = (1ULL << 32) - SPAPR_PCI_MEM_WIN_BUS_OFFSET;
@@ -1514,9 +1688,11 @@
         cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
     uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
     sPAPRTCETable *tcet;
+    PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
+    sPAPRFDT s_fdt;
 
     /* Start populating the FDT */
-    sprintf(nodename, "pci@%" PRIx64, phb->buid);
+    snprintf(nodename, FDT_NAME_MAX, "pci@%" PRIx64, phb->buid);
     bus_off = fdt_add_subnode(fdt, 0, nodename);
     if (bus_off < 0) {
         return bus_off;
@@ -1563,6 +1739,18 @@
                  tcet->liobn, tcet->bus_offset,
                  tcet->nb_table << tcet->page_shift);
 
+    /* Walk the bridges and program the bus numbers*/
+    spapr_phb_pci_enumerate(phb);
+    _FDT(fdt_setprop_cell(fdt, bus_off, "qemu,phb-enumerated", 0x1));
+
+    /* Populate tree nodes with PCI devices attached */
+    s_fdt.fdt = fdt;
+    s_fdt.node_off = bus_off;
+    s_fdt.sphb = phb;
+    pci_for_each_device(bus, pci_bus_num(bus),
+                        spapr_populate_pci_devices_dt,
+                        &s_fdt);
+
     ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb),
                                 SPAPR_DR_CONNECTOR_TYPE_PCI);
     if (ret) {
@@ -1631,6 +1819,7 @@
 
 void spapr_pci_switch_vga(bool big_endian)
 {
+    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     sPAPRPHBState *sphb;
 
     /*
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
index 99a1be5..cca45ed 100644
--- a/hw/ppc/spapr_pci_vfio.c
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -19,6 +19,7 @@
 
 #include "hw/ppc/spapr.h"
 #include "hw/pci-host/spapr.h"
+#include "hw/pci/msix.h"
 #include "linux/vfio.h"
 #include "hw/vfio/vfio.h"
 
@@ -71,9 +72,26 @@
                                 spapr_tce_get_iommu(tcet));
 }
 
+static void spapr_phb_vfio_eeh_reenable(sPAPRPHBVFIOState *svphb)
+{
+    struct vfio_eeh_pe_op op = {
+        .argsz = sizeof(op),
+        .op    = VFIO_EEH_PE_ENABLE
+    };
+
+    vfio_container_ioctl(&svphb->phb.iommu_as,
+                         svphb->iommugroupid, VFIO_EEH_PE_OP, &op);
+}
+
 static void spapr_phb_vfio_reset(DeviceState *qdev)
 {
-    /* Do nothing */
+    /*
+     * The PE might be in frozen state. To reenable the EEH
+     * functionality on it will clean the frozen state, which
+     * ensures that the contained PCI devices will work properly
+     * after reboot.
+     */
+    spapr_phb_vfio_eeh_reenable(SPAPR_PCI_VFIO_HOST_BRIDGE(qdev));
 }
 
 static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb,
@@ -142,6 +160,49 @@
     return RTAS_OUT_SUCCESS;
 }
 
+static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus *bus,
+                                              PCIDevice *pdev,
+                                              void *opaque)
+{
+    /* Check if the device is VFIO PCI device */
+    if (!object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
+        return;
+    }
+
+    /*
+     * The MSIx table will be cleaned out by reset. We need
+     * disable it so that it can be reenabled properly. Also,
+     * the cached MSIx table should be cleared as it's not
+     * reflecting the contents in hardware.
+     */
+    if (msix_enabled(pdev)) {
+        uint16_t flags;
+
+        flags = pci_host_config_read_common(pdev,
+                                            pdev->msix_cap + PCI_MSIX_FLAGS,
+                                            pci_config_size(pdev), 2);
+        flags &= ~PCI_MSIX_FLAGS_ENABLE;
+        pci_host_config_write_common(pdev,
+                                     pdev->msix_cap + PCI_MSIX_FLAGS,
+                                     pci_config_size(pdev), flags, 2);
+    }
+
+    msix_reset(pdev);
+}
+
+static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque)
+{
+       pci_for_each_device(bus, pci_bus_num(bus),
+                           spapr_phb_vfio_eeh_clear_dev_msix, NULL);
+}
+
+static void spapr_phb_vfio_eeh_pre_reset(sPAPRPHBState *sphb)
+{
+       PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
+
+       pci_for_each_bus(phb->bus, spapr_phb_vfio_eeh_clear_bus_msix, NULL);
+}
+
 static int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option)
 {
     sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
@@ -153,9 +214,11 @@
         op.op = VFIO_EEH_PE_RESET_DEACTIVATE;
         break;
     case RTAS_SLOT_RESET_HOT:
+        spapr_phb_vfio_eeh_pre_reset(sphb);
         op.op = VFIO_EEH_PE_RESET_HOT;
         break;
     case RTAS_SLOT_RESET_FUNDAMENTAL:
+        spapr_phb_vfio_eeh_pre_reset(sphb);
         op.op = VFIO_EEH_PE_RESET_FUNDAMENTAL;
         break;
     default:
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index fa28d43..2986f94 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -29,6 +29,7 @@
 #include "sysemu/char.h"
 #include "hw/qdev.h"
 #include "sysemu/device_tree.h"
+#include "sysemu/cpus.h"
 
 #include "hw/ppc/spapr.h"
 #include "hw/ppc/spapr_vio.h"
@@ -47,7 +48,7 @@
     do { } while (0)
 #endif
 
-static sPAPRConfigureConnectorState *spapr_ccs_find(sPAPREnvironment *spapr,
+static sPAPRConfigureConnectorState *spapr_ccs_find(sPAPRMachineState *spapr,
                                                     uint32_t drc_index)
 {
     sPAPRConfigureConnectorState *ccs = NULL;
@@ -61,14 +62,14 @@
     return ccs;
 }
 
-static void spapr_ccs_add(sPAPREnvironment *spapr,
+static void spapr_ccs_add(sPAPRMachineState *spapr,
                           sPAPRConfigureConnectorState *ccs)
 {
     g_assert(!spapr_ccs_find(spapr, ccs->drc_index));
     QTAILQ_INSERT_HEAD(&spapr->ccs_list, ccs, next);
 }
 
-static void spapr_ccs_remove(sPAPREnvironment *spapr,
+static void spapr_ccs_remove(sPAPRMachineState *spapr,
                              sPAPRConfigureConnectorState *ccs)
 {
     QTAILQ_REMOVE(&spapr->ccs_list, ccs, next);
@@ -77,7 +78,7 @@
 
 void spapr_ccs_reset_hook(void *opaque)
 {
-    sPAPREnvironment *spapr = opaque;
+    sPAPRMachineState *spapr = opaque;
     sPAPRConfigureConnectorState *ccs, *ccs_tmp;
 
     QTAILQ_FOREACH_SAFE(ccs, &spapr->ccs_list, next, ccs_tmp) {
@@ -85,7 +86,7 @@
     }
 }
 
-static void rtas_display_character(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_display_character(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                    uint32_t token, uint32_t nargs,
                                    target_ulong args,
                                    uint32_t nret, target_ulong rets)
@@ -101,7 +102,7 @@
     }
 }
 
-static void rtas_power_off(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_power_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                            uint32_t token, uint32_t nargs, target_ulong args,
                            uint32_t nret, target_ulong rets)
 {
@@ -113,7 +114,7 @@
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 }
 
-static void rtas_system_reboot(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_system_reboot(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                uint32_t token, uint32_t nargs,
                                target_ulong args,
                                uint32_t nret, target_ulong rets)
@@ -127,7 +128,7 @@
 }
 
 static void rtas_query_cpu_stopped_state(PowerPCCPU *cpu_,
-                                         sPAPREnvironment *spapr,
+                                         sPAPRMachineState *spapr,
                                          uint32_t token, uint32_t nargs,
                                          target_ulong args,
                                          uint32_t nret, target_ulong rets)
@@ -157,7 +158,7 @@
     rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 }
 
-static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPREnvironment *spapr,
+static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPRMachineState *spapr,
                            uint32_t token, uint32_t nargs,
                            target_ulong args,
                            uint32_t nret, target_ulong rets)
@@ -204,7 +205,7 @@
     rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
 }
 
-static void rtas_stop_self(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_stop_self(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                            uint32_t token, uint32_t nargs,
                            target_ulong args,
                            uint32_t nret, target_ulong rets)
@@ -227,7 +228,7 @@
 }
 
 static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu,
-                                          sPAPREnvironment *spapr,
+                                          sPAPRMachineState *spapr,
                                           uint32_t token, uint32_t nargs,
                                           target_ulong args,
                                           uint32_t nret, target_ulong rets)
@@ -262,7 +263,7 @@
 }
 
 static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu,
-                                          sPAPREnvironment *spapr,
+                                          sPAPRMachineState *spapr,
                                           uint32_t token, uint32_t nargs,
                                           target_ulong args,
                                           uint32_t nret, target_ulong rets)
@@ -282,7 +283,7 @@
 }
 
 static void rtas_ibm_os_term(PowerPCCPU *cpu,
-                            sPAPREnvironment *spapr,
+                            sPAPRMachineState *spapr,
                             uint32_t token, uint32_t nargs,
                             target_ulong args,
                             uint32_t nret, target_ulong rets)
@@ -294,7 +295,7 @@
     rtas_st(rets, 0, ret);
 }
 
-static void rtas_set_power_level(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_set_power_level(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                  uint32_t token, uint32_t nargs,
                                  target_ulong args, uint32_t nret,
                                  target_ulong rets)
@@ -319,7 +320,7 @@
     rtas_st(rets, 1, 100);
 }
 
-static void rtas_get_power_level(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_get_power_level(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                   uint32_t token, uint32_t nargs,
                                   target_ulong args, uint32_t nret,
                                   target_ulong rets)
@@ -356,7 +357,7 @@
     return false;
 }
 
-static void rtas_set_indicator(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_set_indicator(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                uint32_t token, uint32_t nargs,
                                target_ulong args, uint32_t nret,
                                target_ulong rets)
@@ -427,7 +428,7 @@
     rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
 }
 
-static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                   uint32_t token, uint32_t nargs,
                                   target_ulong args, uint32_t nret,
                                   target_ulong rets)
@@ -481,7 +482,7 @@
 #define CC_WA_LEN 4096
 
 static void rtas_ibm_configure_connector(PowerPCCPU *cpu,
-                                         sPAPREnvironment *spapr,
+                                         sPAPRMachineState *spapr,
                                          uint32_t token, uint32_t nargs,
                                          target_ulong args, uint32_t nret,
                                          target_ulong rets)
@@ -601,7 +602,7 @@
     spapr_rtas_fn fn;
 } rtas_table[RTAS_TOKEN_MAX - RTAS_TOKEN_BASE];
 
-target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                              uint32_t token, uint32_t nargs, target_ulong args,
                              uint32_t nret, target_ulong rets)
 {
@@ -651,6 +652,8 @@
 {
     int ret;
     int i;
+    uint32_t lrdr_capacity[5];
+    MachineState *machine = MACHINE(qdev_get_machine());
 
     ret = fdt_add_mem_rsv(fdt, rtas_addr, rtas_size);
     if (ret < 0) {
@@ -699,6 +702,19 @@
         }
 
     }
+
+    lrdr_capacity[0] = cpu_to_be32(((uint64_t)machine->maxram_size) >> 32);
+    lrdr_capacity[1] = cpu_to_be32(machine->maxram_size & 0xffffffff);
+    lrdr_capacity[2] = 0;
+    lrdr_capacity[3] = cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE);
+    lrdr_capacity[4] = cpu_to_be32(max_cpus/smp_threads);
+    ret = qemu_fdt_setprop(fdt, "/rtas", "ibm,lrdr-capacity", lrdr_capacity,
+                     sizeof(lrdr_capacity));
+    if (ret < 0) {
+        fprintf(stderr, "Couldn't add ibm,lrdr-capacity rtas property\n");
+        return ret;
+    }
+
     return 0;
 }
 
diff --git a/hw/ppc/spapr_rtc.c b/hw/ppc/spapr_rtc.c
index 9da3746..d20b8f2 100644
--- a/hw/ppc/spapr_rtc.c
+++ b/hw/ppc/spapr_rtc.c
@@ -76,7 +76,7 @@
     return 0;
 }
 
-static void rtas_get_time_of_day(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_get_time_of_day(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                  uint32_t token, uint32_t nargs,
                                  target_ulong args,
                                  uint32_t nret, target_ulong rets)
@@ -106,7 +106,7 @@
     rtas_st(rets, 7, ns);
 }
 
-static void rtas_set_time_of_day(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_set_time_of_day(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                  uint32_t token, uint32_t nargs,
                                  target_ulong args,
                                  uint32_t nret, target_ulong rets)
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 8b59b64..c51eb8e 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -160,7 +160,7 @@
 /*
  * CRQ handling
  */
-static target_ulong h_reg_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_reg_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                               target_ulong opcode, target_ulong *args)
 {
     target_ulong reg = args[0];
@@ -218,7 +218,7 @@
     return H_SUCCESS;
 }
 
-static target_ulong h_free_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_free_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                target_ulong opcode, target_ulong *args)
 {
     target_ulong reg = args[0];
@@ -232,7 +232,7 @@
     return free_crq(dev);
 }
 
-static target_ulong h_send_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_send_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                target_ulong opcode, target_ulong *args)
 {
     target_ulong reg = args[0];
@@ -255,7 +255,7 @@
     return H_HARDWARE;
 }
 
-static target_ulong h_enable_crq(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_enable_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                  target_ulong opcode, target_ulong *args)
 {
     target_ulong reg = args[0];
@@ -333,7 +333,7 @@
     dev->tcet->bypass = bypass;
 }
 
-static void rtas_set_tce_bypass(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_set_tce_bypass(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                 uint32_t token,
                                 uint32_t nargs, target_ulong args,
                                 uint32_t nret, target_ulong rets)
@@ -364,7 +364,7 @@
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 }
 
-static void rtas_quiesce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static void rtas_quiesce(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                          uint32_t token,
                          uint32_t nargs, target_ulong args,
                          uint32_t nret, target_ulong rets)
@@ -426,6 +426,7 @@
 
 static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
 {
+    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     VIOsPAPRDevice *dev = (VIOsPAPRDevice *)qdev;
     VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
     char *id;
@@ -491,7 +492,7 @@
     pc->realize(dev, errp);
 }
 
-static target_ulong h_vio_signal(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+static target_ulong h_vio_signal(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                                  target_ulong opcode,
                                  target_ulong *args)
 {
diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c
index 439732f..de86f7c 100644
--- a/hw/ppc/virtex_ml507.c
+++ b/hw/ppc/virtex_ml507.c
@@ -197,7 +197,6 @@
 static void virtex_init(MachineState *machine)
 {
     ram_addr_t ram_size = machine->ram_size;
-    const char *cpu_model = machine->cpu_model;
     const char *kernel_filename = machine->kernel_filename;
     const char *kernel_cmdline = machine->kernel_cmdline;
     hwaddr initrd_base = 0;
@@ -214,11 +213,11 @@
     int i;
 
     /* init CPUs */
-    if (cpu_model == NULL) {
-        cpu_model = "440-Xilinx";
+    if (machine->cpu_model == NULL) {
+        machine->cpu_model = "440-Xilinx";
     }
 
-    cpu = ppc440_init_xilinx(&ram_size, 1, cpu_model, 400000000);
+    cpu = ppc440_init_xilinx(&ram_size, 1, machine->cpu_model, 400000000);
     env = &cpu->env;
     qemu_register_reset(main_cpu_reset, cpu);
 
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index d631337..e345a6e 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -1316,8 +1316,8 @@
     VirtQueue *vq = virtio_get_queue(vdev, n);
     EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);
 
-    return kvm_irqchip_add_irqfd_notifier(kvm_state, notifier, NULL,
-                                          dev->routes.gsi[n]);
+    return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, notifier, NULL,
+                                              dev->routes.gsi[n]);
 }
 
 static void virtio_ccw_remove_irqfd(VirtioCcwDevice *dev, int n)
@@ -1327,8 +1327,8 @@
     EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);
     int ret;
 
-    ret = kvm_irqchip_remove_irqfd_notifier(kvm_state, notifier,
-                                            dev->routes.gsi[n]);
+    ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, notifier,
+                                                dev->routes.gsi[n]);
     assert(ret == 0);
 }
 
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index b1045da..85ee9b0 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -772,11 +772,19 @@
 
     if (QLIST_EMPTY(&container->group_list)) {
         VFIOAddressSpace *space = container->space;
+        VFIOGuestIOMMU *giommu, *tmp;
 
         if (container->iommu_data.release) {
             container->iommu_data.release(container);
         }
         QLIST_REMOVE(container, next);
+
+        QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) {
+            memory_region_unregister_iommu_notifier(&giommu->n);
+            QLIST_REMOVE(giommu, giommu_next);
+            g_free(giommu);
+        }
+
         trace_vfio_disconnect_container(container->fd);
         close(container->fd);
         g_free(container);
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index e0e339a..2ed877f 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -597,7 +597,7 @@
         return;
     }
 
-    if (kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->kvm_interrupt,
+    if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt,
                                        NULL, virq) < 0) {
         kvm_irqchip_release_virq(kvm_state, virq);
         event_notifier_cleanup(&vector->kvm_interrupt);
@@ -609,8 +609,8 @@
 
 static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector)
 {
-    kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->kvm_interrupt,
-                                      vector->virq);
+    kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt,
+                                          vector->virq);
     kvm_irqchip_release_virq(kvm_state, vector->virq);
     vector->virq = -1;
     event_notifier_cleanup(&vector->kvm_interrupt);
@@ -939,7 +939,7 @@
     };
     uint64_t size;
     off_t off = 0;
-    size_t bytes;
+    ssize_t bytes;
 
     if (ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info)) {
         error_report("vfio: Error getting ROM info: %m");
@@ -2252,6 +2252,33 @@
     vdev->msix->pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK;
     vdev->msix->entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1;
 
+    /*
+     * Test the size of the pba_offset variable and catch if it extends outside
+     * of the specified BAR. If it is the case, we need to apply a hardware
+     * specific quirk if the device is known or we have a broken configuration.
+     */
+    if (vdev->msix->pba_offset >=
+        vdev->bars[vdev->msix->pba_bar].region.size) {
+
+        PCIDevice *pdev = &vdev->pdev;
+        uint16_t vendor = pci_get_word(pdev->config + PCI_VENDOR_ID);
+        uint16_t device = pci_get_word(pdev->config + PCI_DEVICE_ID);
+
+        /*
+         * Chelsio T5 Virtual Function devices are encoded as 0x58xx for T5
+         * adapters. The T5 hardware returns an incorrect value of 0x8000 for
+         * the VF PBA offset while the BAR itself is only 8k. The correct value
+         * is 0x1000, so we hard code that here.
+         */
+        if (vendor == PCI_VENDOR_ID_CHELSIO && (device & 0xff00) == 0x5800) {
+            vdev->msix->pba_offset = 0x1000;
+        } else {
+            error_report("vfio: Hardware reports invalid configuration, "
+                         "MSIX PBA outside of specified BAR");
+            return -EINVAL;
+        }
+    }
+
     trace_vfio_early_setup_msix(vdev->vbasedev.name, pos,
                                 vdev->msix->table_bar,
                                 vdev->msix->table_offset,
@@ -2388,7 +2415,7 @@
      * potentially insert a direct-mapped subregion before and after it.
      */
     if (vdev->msix && vdev->msix->table_bar == nr) {
-        size = vdev->msix->table_offset & qemu_host_page_mask;
+        size = vdev->msix->table_offset & qemu_real_host_page_mask;
     }
 
     strncat(name, " mmap", sizeof(name) - strlen(name) - 1);
@@ -2401,8 +2428,9 @@
     if (vdev->msix && vdev->msix->table_bar == nr) {
         uint64_t start;
 
-        start = HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset +
-                                (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE));
+        start = REAL_HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset +
+                                     (vdev->msix->entries *
+                                      PCI_MSIX_ENTRY_SIZE));
 
         size = start < bar->region.size ? bar->region.size - start : 0;
         strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1);
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index 5c678b9..60365d1 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -26,6 +26,7 @@
 #include "hw/sysbus.h"
 #include "trace.h"
 #include "hw/platform-bus.h"
+#include "sysemu/kvm.h"
 
 /*
  * Functions used whatever the injection method
@@ -51,6 +52,7 @@
     intp->pin = info.index;
     intp->flags = info.flags;
     intp->state = VFIO_IRQ_INACTIVE;
+    intp->kvm_accel = false;
 
     sysbus_init_irq(sbdev, &intp->qemuirq);
 
@@ -61,6 +63,13 @@
         error_report("vfio: Error: trigger event_notifier_init failed ");
         return NULL;
     }
+    /* Get an eventfd for resample/unmask */
+    ret = event_notifier_init(&intp->unmask, 0);
+    if (ret) {
+        g_free(intp);
+        error_report("vfio: Error: resamplefd event_notifier_init failed");
+        return NULL;
+    }
 
     QLIST_INSERT_HEAD(&vdev->intp_list, intp, next);
     return intp;
@@ -315,6 +324,94 @@
     return ret;
 }
 
+/*
+ * Functions used for irqfd
+ */
+
+/**
+ * vfio_set_resample_eventfd - sets the resamplefd for an IRQ
+ * @intp: the IRQ struct handle
+ * programs the VFIO driver to unmask this IRQ when the
+ * intp->unmask eventfd is triggered
+ */
+static int vfio_set_resample_eventfd(VFIOINTp *intp)
+{
+    VFIODevice *vbasedev = &intp->vdev->vbasedev;
+    struct vfio_irq_set *irq_set;
+    int argsz, ret;
+    int32_t *pfd;
+
+    argsz = sizeof(*irq_set) + sizeof(*pfd);
+    irq_set = g_malloc0(argsz);
+    irq_set->argsz = argsz;
+    irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK;
+    irq_set->index = intp->pin;
+    irq_set->start = 0;
+    irq_set->count = 1;
+    pfd = (int32_t *)&irq_set->data;
+    *pfd = event_notifier_get_fd(&intp->unmask);
+    qemu_set_fd_handler(*pfd, NULL, NULL, NULL);
+    ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
+    g_free(irq_set);
+    if (ret < 0) {
+        error_report("vfio: Failed to set resample eventfd: %m");
+    }
+    return ret;
+}
+
+static void vfio_start_irqfd_injection(SysBusDevice *sbdev, qemu_irq irq)
+{
+    VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
+    VFIOINTp *intp;
+
+    if (!kvm_irqfds_enabled() || !kvm_resamplefds_enabled() ||
+        !vdev->irqfd_allowed) {
+        return;
+    }
+
+    QLIST_FOREACH(intp, &vdev->intp_list, next) {
+        if (intp->qemuirq == irq) {
+            break;
+        }
+    }
+    assert(intp);
+
+    /* Get to a known interrupt state */
+    qemu_set_fd_handler(event_notifier_get_fd(&intp->interrupt),
+                        NULL, NULL, vdev);
+
+    vfio_mask_single_irqindex(&vdev->vbasedev, intp->pin);
+    qemu_set_irq(intp->qemuirq, 0);
+
+    if (kvm_irqchip_add_irqfd_notifier(kvm_state, &intp->interrupt,
+                                   &intp->unmask, irq) < 0) {
+        goto fail_irqfd;
+    }
+
+    if (vfio_set_trigger_eventfd(intp, NULL) < 0) {
+        goto fail_vfio;
+    }
+    if (vfio_set_resample_eventfd(intp) < 0) {
+        goto fail_vfio;
+    }
+
+    /* Let's resume injection with irqfd setup */
+    vfio_unmask_single_irqindex(&vdev->vbasedev, intp->pin);
+
+    intp->kvm_accel = true;
+
+    trace_vfio_platform_start_irqfd_injection(intp->pin,
+                                     event_notifier_get_fd(&intp->interrupt),
+                                     event_notifier_get_fd(&intp->unmask));
+    return;
+fail_vfio:
+    kvm_irqchip_remove_irqfd_notifier(kvm_state, &intp->interrupt, irq);
+fail_irqfd:
+    vfio_start_eventfd_injection(intp);
+    vfio_unmask_single_irqindex(&vdev->vbasedev, intp->pin);
+    return;
+}
+
 /* VFIO skeleton */
 
 static void vfio_platform_compute_needs_reset(VFIODevice *vbasedev)
@@ -584,17 +681,20 @@
     DEFINE_PROP_BOOL("x-mmap", VFIOPlatformDevice, vbasedev.allow_mmap, true),
     DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
                        mmap_timeout, 1100),
+    DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
 static void vfio_platform_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
+    SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
 
     dc->realize = vfio_platform_realize;
     dc->props = vfio_platform_dev_properties;
     dc->vmsd = &vfio_platform_vmstate;
     dc->desc = "VFIO-based platform device assignment";
+    sbc->connect_irq_notifier = vfio_start_irqfd_injection;
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 }
 
diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c
index 3589185..07fd69c 100644
--- a/hw/virtio/dataplane/vring.c
+++ b/hw/virtio/dataplane/vring.c
@@ -153,22 +153,20 @@
         return true;
     }
 
-    return vring_need_event(vring_used_event(&vring->vr), new, old);
+    return vring_need_event(virtio_tswap16(vdev, vring_used_event(&vring->vr)),
+                            new, old);
 }
 
 
-static int get_desc(VirtIODevice *vdev, Vring *vring, VirtQueueElement *elem,
+static int get_desc(Vring *vring, VirtQueueElement *elem,
                     struct vring_desc *desc)
 {
     unsigned *num;
     struct iovec *iov;
     hwaddr *addr;
     MemoryRegion *mr;
-    int is_write = virtio_tswap16(vdev, desc->flags) & VRING_DESC_F_WRITE;
-    uint32_t len = virtio_tswap32(vdev, desc->len);
-    uint64_t desc_addr = virtio_tswap64(vdev, desc->addr);
 
-    if (is_write) {
+    if (desc->flags & VRING_DESC_F_WRITE) {
         num = &elem->in_num;
         iov = &elem->in_sg[*num];
         addr = &elem->in_addr[*num];
@@ -192,17 +190,18 @@
     }
 
     /* TODO handle non-contiguous memory across region boundaries */
-    iov->iov_base = vring_map(&mr, desc_addr, len, is_write);
+    iov->iov_base = vring_map(&mr, desc->addr, desc->len,
+                              desc->flags & VRING_DESC_F_WRITE);
     if (!iov->iov_base) {
         error_report("Failed to map descriptor addr %#" PRIx64 " len %u",
-                     (uint64_t)desc_addr, len);
+                     (uint64_t)desc->addr, desc->len);
         return -EFAULT;
     }
 
     /* The MemoryRegion is looked up again and unref'ed later, leave the
      * ref in place.  */
-    iov->iov_len = len;
-    *addr = desc_addr;
+    iov->iov_len = desc->len;
+    *addr = desc->addr;
     *num += 1;
     return 0;
 }
@@ -224,23 +223,21 @@
     struct vring_desc desc;
     unsigned int i = 0, count, found = 0;
     int ret;
-    uint32_t len = virtio_tswap32(vdev, indirect->len);
-    uint64_t addr = virtio_tswap64(vdev, indirect->addr);
 
     /* Sanity check */
-    if (unlikely(len % sizeof(desc))) {
+    if (unlikely(indirect->len % sizeof(desc))) {
         error_report("Invalid length in indirect descriptor: "
                      "len %#x not multiple of %#zx",
-                     len, sizeof(desc));
+                     indirect->len, sizeof(desc));
         vring->broken = true;
         return -EFAULT;
     }
 
-    count = len / sizeof(desc);
+    count = indirect->len / sizeof(desc);
     /* Buffers are chained via a 16 bit next field, so
      * we can have at most 2^16 of these. */
     if (unlikely(count > USHRT_MAX + 1)) {
-        error_report("Indirect buffer length too big: %d", len);
+        error_report("Indirect buffer length too big: %d", indirect->len);
         vring->broken = true;
         return -EFAULT;
     }
@@ -251,12 +248,12 @@
 
         /* Translate indirect descriptor */
         desc_ptr = vring_map(&mr,
-                             addr + found * sizeof(desc),
+                             indirect->addr + found * sizeof(desc),
                              sizeof(desc), false);
         if (!desc_ptr) {
             error_report("Failed to map indirect descriptor "
                          "addr %#" PRIx64 " len %zu",
-                         (uint64_t)addr + found * sizeof(desc),
+                         (uint64_t)indirect->addr + found * sizeof(desc),
                          sizeof(desc));
             vring->broken = true;
             return -EFAULT;
@@ -274,20 +271,19 @@
             return -EFAULT;
         }
 
-        if (unlikely(virtio_tswap16(vdev, desc.flags)
-                     & VRING_DESC_F_INDIRECT)) {
+        if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) {
             error_report("Nested indirect descriptor");
             vring->broken = true;
             return -EFAULT;
         }
 
-        ret = get_desc(vdev, vring, elem, &desc);
+        ret = get_desc(vring, elem, &desc);
         if (ret < 0) {
             vring->broken |= (ret == -EFAULT);
             return ret;
         }
-        i = virtio_tswap16(vdev, desc.next);
-    } while (virtio_tswap16(vdev, desc.flags) & VRING_DESC_F_NEXT);
+        i = desc.next;
+    } while (desc.flags & VRING_DESC_F_NEXT);
     return 0;
 }
 
@@ -388,7 +384,7 @@
         /* Ensure descriptor is loaded before accessing fields */
         barrier();
 
-        if (virtio_tswap16(vdev, desc.flags) & VRING_DESC_F_INDIRECT) {
+        if (desc.flags & VRING_DESC_F_INDIRECT) {
             ret = get_indirect(vdev, vring, elem, &desc);
             if (ret < 0) {
                 goto out;
@@ -396,18 +392,19 @@
             continue;
         }
 
-        ret = get_desc(vdev, vring, elem, &desc);
+        ret = get_desc(vring, elem, &desc);
         if (ret < 0) {
             goto out;
         }
 
-        i = virtio_tswap16(vdev, desc.next);
-    } while (virtio_tswap16(vdev, desc.flags) & VRING_DESC_F_NEXT);
+        i = desc.next;
+    } while (desc.flags & VRING_DESC_F_NEXT);
 
     /* On success, increment avail index. */
     vring->last_avail_idx++;
     if (virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
-        vring_avail_event(&vring->vr) = vring->last_avail_idx;
+        vring_avail_event(&vring->vr) =
+            virtio_tswap16(vdev, vring->last_avail_idx);
     }
 
     return head;
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 6a0174e..ccca2b6 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -443,11 +443,89 @@
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
+/* Below are generic functions to do memcpy from/to an address space,
+ * without byteswaps, with input validation.
+ *
+ * As regular address_space_* APIs all do some kind of byteswap at least for
+ * some host/target combinations, we are forced to explicitly convert to a
+ * known-endianness integer value.
+ * It doesn't really matter which endian format to go through, so the code
+ * below selects the endian that causes the least amount of work on the given
+ * host.
+ *
+ * Note: host pointer must be aligned.
+ */
+static
+void virtio_address_space_write(AddressSpace *as, hwaddr addr,
+                                const uint8_t *buf, int len)
+{
+    uint32_t val;
+
+    /* address_space_* APIs assume an aligned address.
+     * As address is under guest control, handle illegal values.
+     */
+    addr &= ~(len - 1);
+
+    /* Make sure caller aligned buf properly */
+    assert(!(((uintptr_t)buf) & (len - 1)));
+
+    switch (len) {
+    case 1:
+        val = pci_get_byte(buf);
+        address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
+        break;
+    case 2:
+        val = pci_get_word(buf);
+        address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
+        break;
+    case 4:
+        val = pci_get_long(buf);
+        address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
+        break;
+    default:
+        /* As length is under guest control, handle illegal values. */
+        break;
+    }
+}
+
+static void
+virtio_address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
+{
+    uint32_t val;
+
+    /* address_space_* APIs assume an aligned address.
+     * As address is under guest control, handle illegal values.
+     */
+    addr &= ~(len - 1);
+
+    /* Make sure caller aligned buf properly */
+    assert(!(((uintptr_t)buf) & (len - 1)));
+
+    switch (len) {
+    case 1:
+        val = address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
+        pci_set_byte(buf, val);
+        break;
+    case 2:
+        val = address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
+        pci_set_word(buf, val);
+        break;
+    case 4:
+        val = address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
+        pci_set_long(buf, val);
+        break;
+    default:
+        /* As length is under guest control, handle illegal values. */
+        break;
+    }
+}
+
 static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
                                 uint32_t val, int len)
 {
     VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
     VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    struct virtio_pci_cfg_cap *cfg;
 
     pci_default_write_config(pci_dev, address, val, len);
 
@@ -456,6 +534,49 @@
         virtio_pci_stop_ioeventfd(proxy);
         virtio_set_status(vdev, vdev->status & ~VIRTIO_CONFIG_S_DRIVER_OK);
     }
+
+    if (proxy->config_cap &&
+        ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap,
+                                                                  pci_cfg_data),
+                       sizeof cfg->pci_cfg_data)) {
+        uint32_t off;
+        uint32_t len;
+
+        cfg = (void *)(proxy->pci_dev.config + proxy->config_cap);
+        off = le32_to_cpu(cfg->cap.offset);
+        len = le32_to_cpu(cfg->cap.length);
+
+        if (len <= sizeof cfg->pci_cfg_data) {
+            virtio_address_space_write(&proxy->modern_as, off,
+                                       cfg->pci_cfg_data, len);
+        }
+    }
+}
+
+static uint32_t virtio_read_config(PCIDevice *pci_dev,
+                                   uint32_t address, int len)
+{
+    VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
+    struct virtio_pci_cfg_cap *cfg;
+
+    if (proxy->config_cap &&
+        ranges_overlap(address, len, proxy->config_cap + offsetof(struct virtio_pci_cfg_cap,
+                                                                  pci_cfg_data),
+                       sizeof cfg->pci_cfg_data)) {
+        uint32_t off;
+        uint32_t len;
+
+        cfg = (void *)(proxy->pci_dev.config + proxy->config_cap);
+        off = le32_to_cpu(cfg->cap.offset);
+        len = le32_to_cpu(cfg->cap.length);
+
+        if (len <= sizeof cfg->pci_cfg_data) {
+            virtio_address_space_read(&proxy->modern_as, off,
+                                      cfg->pci_cfg_data, len);
+        }
+    }
+
+    return pci_default_read_config(pci_dev, address, len);
 }
 
 static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
@@ -495,7 +616,7 @@
     VirtQueue *vq = virtio_get_queue(vdev, queue_no);
     EventNotifier *n = virtio_queue_get_guest_notifier(vq);
     int ret;
-    ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, NULL, irqfd->virq);
+    ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq);
     return ret;
 }
 
@@ -509,7 +630,7 @@
     VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
     int ret;
 
-    ret = kvm_irqchip_remove_irqfd_notifier(kvm_state, n, irqfd->virq);
+    ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq);
     assert(ret == 0);
 }
 
@@ -942,7 +1063,7 @@
     return proxy->nvectors;
 }
 
-static void virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
+static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
                                    struct virtio_pci_cap *cap)
 {
     PCIDevice *dev = &proxy->pci_dev;
@@ -954,6 +1075,8 @@
     assert(cap->cap_len >= sizeof *cap);
     memcpy(dev->config + offset + PCI_CAP_FLAGS, &cap->cap_len,
            cap->cap_len - PCI_CAP_FLAGS);
+
+    return offset;
 }
 
 static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
@@ -1329,6 +1452,11 @@
             .notify_off_multiplier =
                 cpu_to_le32(QEMU_VIRTIO_PCI_QUEUE_MEM_MULT),
         };
+        struct virtio_pci_cfg_cap cfg = {
+            .cap.cap_len = sizeof cfg,
+            .cap.cfg_type = VIRTIO_PCI_CAP_PCI_CFG,
+        };
+        struct virtio_pci_cfg_cap *cfg_mask;
 
         /* TODO: add io access for speed */
 
@@ -1338,11 +1466,19 @@
         virtio_pci_modern_region_map(proxy, &proxy->isr, &cap);
         virtio_pci_modern_region_map(proxy, &proxy->device, &cap);
         virtio_pci_modern_region_map(proxy, &proxy->notify, &notify.cap);
+
         pci_register_bar(&proxy->pci_dev, proxy->modern_mem_bar,
                          PCI_BASE_ADDRESS_SPACE_MEMORY |
                          PCI_BASE_ADDRESS_MEM_PREFETCH |
                          PCI_BASE_ADDRESS_MEM_TYPE_64,
                          &proxy->modern_bar);
+
+        proxy->config_cap = virtio_pci_add_mem_cap(proxy, &cfg.cap);
+        cfg_mask = (void *)(proxy->pci_dev.wmask + proxy->config_cap);
+        pci_set_byte(&cfg_mask->cap.bar, ~0x0);
+        pci_set_long((uint8_t *)&cfg_mask->cap.offset, ~0x0);
+        pci_set_long((uint8_t *)&cfg_mask->cap.length, ~0x0);
+        pci_set_long(cfg_mask->pci_cfg_data, ~0x0);
     }
 
     if (proxy->nvectors &&
@@ -1354,6 +1490,7 @@
     }
 
     proxy->pci_dev.config_write = virtio_write_config;
+    proxy->pci_dev.config_read = virtio_read_config;
 
     if (legacy) {
         size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev)
@@ -1424,6 +1561,15 @@
                        2 * QEMU_VIRTIO_PCI_QUEUE_MEM_MULT *
                        VIRTIO_QUEUE_MAX);
 
+    memory_region_init_alias(&proxy->modern_cfg,
+                             OBJECT(proxy),
+                             "virtio-pci-cfg",
+                             &proxy->modern_bar,
+                             0,
+                             memory_region_size(&proxy->modern_bar));
+
+    address_space_init(&proxy->modern_as, &proxy->modern_cfg, "virtio-pci-cfg-as");
+
     virtio_pci_bus_new(&proxy->bus, sizeof(proxy->bus), proxy);
     if (k->realize) {
         k->realize(proxy, errp);
@@ -1432,7 +1578,10 @@
 
 static void virtio_pci_exit(PCIDevice *pci_dev)
 {
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
+
     msix_uninit_exclusive_bar(pci_dev);
+    address_space_destroy(&proxy->modern_as);
 }
 
 static void virtio_pci_reset(DeviceState *qdev)
diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
index 05d9d24..b6c442f 100644
--- a/hw/virtio/virtio-pci.h
+++ b/hw/virtio/virtio-pci.h
@@ -112,9 +112,12 @@
     VirtIOPCIRegion device;
     VirtIOPCIRegion notify;
     MemoryRegion modern_bar;
+    MemoryRegion modern_cfg;
+    AddressSpace modern_as;
     uint32_t legacy_io_bar;
     uint32_t msix_bar;
     uint32_t modern_mem_bar;
+    int config_cap;
     uint32_t flags;
     uint32_t class_code;
     uint32_t nvectors;
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 57d8ef1..dd9d5e6 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -166,6 +166,14 @@
 void block_job_yield(BlockJob *job);
 
 /**
+ * block_job_release:
+ * @bs: The block device.
+ *
+ * Release job resources when an error occurred or job completed.
+ */
+void block_job_release(BlockDriverState *bs);
+
+/**
  * block_job_completed:
  * @job: The job being completed.
  * @ret: The status code.
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 8999634..ea6a9a6 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -183,10 +183,13 @@
 
 /* ??? These should be the larger of uintptr_t and target_ulong.  */
 extern uintptr_t qemu_real_host_page_size;
+extern uintptr_t qemu_real_host_page_mask;
 extern uintptr_t qemu_host_page_size;
 extern uintptr_t qemu_host_page_mask;
 
 #define HOST_PAGE_ALIGN(addr) (((addr) + qemu_host_page_size - 1) & qemu_host_page_mask)
+#define REAL_HOST_PAGE_ALIGN(addr) (((addr) + qemu_real_host_page_size - 1) & \
+                                    qemu_real_host_page_mask)
 
 /* same as PROT_xxx */
 #define PAGE_READ      0x0001
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index d678114..2e74760 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -365,4 +365,7 @@
     return cpu->can_do_io != 0;
 }
 
+#if !defined(CONFIG_USER_ONLY)
+void migration_bitmap_extend(ram_addr_t old, ram_addr_t new);
+#endif
 #endif
diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h
index ac24bbe..345fd8d 100644
--- a/include/hw/acpi/ich9.h
+++ b/include/hw/acpi/ich9.h
@@ -25,6 +25,7 @@
 #include "hw/acpi/cpu_hotplug.h"
 #include "hw/acpi/memory_hotplug.h"
 #include "hw/acpi/acpi_dev_interface.h"
+#include "hw/acpi/tco.h"
 
 typedef struct ICH9LPCPMRegs {
     /*
@@ -55,10 +56,15 @@
     uint8_t disable_s4;
     uint8_t s4_val;
     uint8_t smm_enabled;
+    bool enable_tco;
+    TCOIORegs tco_regs;
 } ICH9LPCPMRegs;
 
 void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm,
-                  bool smm_enabled, qemu_irq sci_irq);
+                  bool smm_enabled,
+                  bool enable_tco,
+                  qemu_irq sci_irq);
+
 void ich9_pm_iospace_update(ICH9LPCPMRegs *pm, uint32_t pm_io_base);
 extern const VMStateDescription vmstate_ich9_pm;
 
diff --git a/include/hw/acpi/tco.h b/include/hw/acpi/tco.h
new file mode 100644
index 0000000..c63afc8
--- /dev/null
+++ b/include/hw/acpi/tco.h
@@ -0,0 +1,82 @@
+/*
+ * QEMU ICH9 TCO emulation
+ *
+ * Copyright (c) 2015 Paulo Alcantara <pcacjr@zytor.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef HW_ACPI_TCO_H
+#define HW_ACPI_TCO_H
+
+#include "qemu/typedefs.h"
+#include "qemu-common.h"
+
+/* As per ICH9 spec, the internal timer has an error of ~0.6s on every tick */
+#define TCO_TICK_NSEC 600000000LL
+
+/* TCO I/O register offsets */
+enum {
+    TCO_RLD           = 0x00,
+    TCO_DAT_IN        = 0x02,
+    TCO_DAT_OUT       = 0x03,
+    TCO1_STS          = 0x04,
+    TCO2_STS          = 0x06,
+    TCO1_CNT          = 0x08,
+    TCO2_CNT          = 0x0a,
+    TCO_MESSAGE1      = 0x0c,
+    TCO_MESSAGE2      = 0x0d,
+    TCO_WDCNT         = 0x0e,
+    SW_IRQ_GEN        = 0x10,
+    TCO_TMR           = 0x12,
+};
+
+/* TCO I/O register control/status bits */
+enum {
+    SW_TCO_SMI           = 1 << 1,
+    TCO_INT_STS          = 1 << 2,
+    TCO_LOCK             = 1 << 12,
+    TCO_TMR_HLT          = 1 << 11,
+    TCO_TIMEOUT          = 1 << 3,
+    TCO_SECOND_TO_STS    = 1 << 1,
+    TCO_BOOT_STS         = 1 << 2,
+};
+
+/* TCO I/O registers mask bits */
+enum {
+    TCO_RLD_MASK     = 0x3ff,
+    TCO1_STS_MASK    = 0xe870,
+    TCO2_STS_MASK    = 0xfff8,
+    TCO1_CNT_MASK    = 0xfeff,
+    TCO_TMR_MASK     = 0x3ff,
+};
+
+typedef struct TCOIORegs {
+    struct {
+        uint16_t rld;
+        uint8_t din;
+        uint8_t dout;
+        uint16_t sts1;
+        uint16_t sts2;
+        uint16_t cnt1;
+        uint16_t cnt2;
+        uint8_t msg1;
+        uint8_t msg2;
+        uint8_t wdcnt;
+        uint16_t tmr;
+    } tco;
+    uint8_t sw_irq_gen;
+
+    QEMUTimer *tco_timer;
+    int64_t expire_time;
+    uint8_t timeouts_no;
+
+    MemoryRegion io;
+} TCOIORegs;
+
+/* tco.c */
+void acpi_pm_tco_init(TCOIORegs *tr, MemoryRegion *parent);
+
+extern const VMStateDescription vmstate_tco_io_sts;
+
+#endif /* HW_ACPI_TCO_H */
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 6379901..2aec9cb 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -99,7 +99,8 @@
         no_floppy:1,
         no_cdrom:1,
         no_sdcard:1,
-        has_dynamic_sysbus:1;
+        has_dynamic_sysbus:1,
+        no_tco:1;
     int is_default;
     const char *default_machine_opts;
     const char *default_boot_order;
diff --git a/include/hw/i386/ich9.h b/include/hw/i386/ich9.h
index b317a48..b9d2b04 100644
--- a/include/hw/i386/ich9.h
+++ b/include/hw/i386/ich9.h
@@ -17,9 +17,12 @@
 void ich9_lpc_set_irq(void *opaque, int irq_num, int level);
 int ich9_lpc_map_irq(PCIDevice *pci_dev, int intx);
 PCIINTxRoute ich9_route_intx_pin_to_irq(void *opaque, int pirq_pin);
-void ich9_lpc_pm_init(PCIDevice *pci_lpc, bool smm_enabled);
+void ich9_lpc_pm_init(PCIDevice *pci_lpc, bool smm_enabled, bool enable_tco);
 I2CBus *ich9_smb_init(PCIBus *bus, int devfn, uint32_t smb_io_base);
 
+void ich9_generate_smi(void);
+void ich9_generate_nmi(void);
+
 #define ICH9_CC_SIZE                            (16 * 1024)     /* 16KB */
 
 #define TYPE_ICH9_LPC_DEVICE "ICH9-LPC"
@@ -43,6 +46,11 @@
     ICH9LPCPMRegs pm;
     uint32_t sci_level; /* track sci level */
 
+    /* 2.24 Pin Straps */
+    struct {
+        bool spkr_hi;
+    } pin_strap;
+
     /* 10.1 Chipset Configuration registers(Memory Space)
      which is pointed by RCBA */
     uint8_t chip_config[ICH9_CC_SIZE];
@@ -90,6 +98,9 @@
 #define ICH9_CC_DIR_MASK                        0x7
 #define ICH9_CC_OIC                             0x31FF
 #define ICH9_CC_OIC_AEN                         0x1
+#define ICH9_CC_GCS                             0x3410
+#define ICH9_CC_GCS_DEFAULT                     0x00000020
+#define ICH9_CC_GCS_NO_REBOOT                   (1 << 5)
 
 /* D28:F[0-5] */
 #define ICH9_PCIE_DEV                           28
@@ -186,7 +197,10 @@
 #define ICH9_PMIO_GPE0_LEN                      16
 #define ICH9_PMIO_SMI_EN                        0x30
 #define ICH9_PMIO_SMI_EN_APMC_EN                (1 << 5)
+#define ICH9_PMIO_SMI_EN_TCO_EN                 (1 << 13)
 #define ICH9_PMIO_SMI_STS                       0x34
+#define ICH9_PMIO_TCO_RLD                       0x60
+#define ICH9_PMIO_TCO_LEN                       32
 
 /* FADT ACPI_ENABLE/ACPI_DISABLE */
 #define ICH9_APM_ACPI_ENABLE                    0x2
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 786a1d5..15e3352 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -88,6 +88,7 @@
 #define ACPI_PM_PROP_PM_IO_BASE "pm_io_base"
 #define ACPI_PM_PROP_GPE0_BLK "gpe0_blk"
 #define ACPI_PM_PROP_GPE0_BLK_LEN "gpe0_blk_len"
+#define ACPI_PM_PROP_TCO_ENABLED "enable_tco"
 
 struct PcGuestInfo {
     bool isapc_ram_fw;
@@ -198,13 +199,12 @@
 void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
                           ISADevice **rtc_state,
                           bool create_fdctrl,
-                          ISADevice **floppy,
                           bool no_vmport,
                           uint32 hpet_irqs);
 void pc_init_ne2k_isa(ISABus *bus, NICInfo *nd);
 void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
                   const char *boot_device, MachineState *machine,
-                  ISADevice *floppy, BusState *ide0, BusState *ide1,
+                  BusState *ide0, BusState *ide1,
                   ISADevice *s);
 void pc_nic_init(ISABus *isa_bus, PCIBus *pci_bus);
 void pc_pci_device_init(PCIBus *pci_bus);
@@ -293,7 +293,12 @@
 bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
 
 #define PC_COMPAT_2_3 \
-        HW_COMPAT_2_3
+        HW_COMPAT_2_3 \
+        {\
+            .driver   = TYPE_X86_CPU,\
+            .property = "arat",\
+            .value    = "off",\
+        },
 
 #define PC_COMPAT_2_2 \
         PC_COMPAT_2_3 \
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 9dca388..5322b56 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -119,21 +119,23 @@
 
 static inline qemu_irq spapr_phb_lsi_qirq(struct sPAPRPHBState *phb, int pin)
 {
+    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+
     return xics_get_qirq(spapr->icp, phb->lsi_table[pin].irq);
 }
 
-PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index);
+PCIHostState *spapr_create_phb(sPAPRMachineState *spapr, int index);
 
 int spapr_populate_pci_dt(sPAPRPHBState *phb,
                           uint32_t xics_phandle,
                           void *fdt);
 
-void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr);
+void spapr_pci_msi_init(sPAPRMachineState *spapr, hwaddr addr);
 
 void spapr_pci_rtas_init(void);
 
-sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid);
-PCIDevice *spapr_pci_find_dev(sPAPREnvironment *spapr, uint64_t buid,
+sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid);
+PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid,
                               uint32_t config_addr);
 
 #endif /* __HW_SPAPR_PCI_H__ */
diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
index 49c062b..d98e6c9 100644
--- a/include/hw/pci/pci_ids.h
+++ b/include/hw/pci/pci_ids.h
@@ -114,6 +114,8 @@
 #define PCI_VENDOR_ID_ENSONIQ            0x1274
 #define PCI_DEVICE_ID_ENSONIQ_ES1370     0x5000
 
+#define PCI_VENDOR_ID_CHELSIO            0x1425
+
 #define PCI_VENDOR_ID_FREESCALE          0x1957
 #define PCI_DEVICE_ID_MPC8533E           0x0030
 
diff --git a/include/hw/pci/pci_regs.h b/include/hw/pci/pci_regs.h
index 57e8c80..ba8cbe9 100644
--- a/include/hw/pci/pci_regs.h
+++ b/include/hw/pci/pci_regs.h
@@ -1,719 +1 @@
-/*
- *	pci_regs.h
- *
- *	PCI standard defines
- *	Copyright 1994, Drew Eckhardt
- *	Copyright 1997--1999 Martin Mares <mj@ucw.cz>
- *
- *	For more information, please consult the following manuals (look at
- *	http://www.pcisig.com/ for how to get them):
- *
- *	PCI BIOS Specification
- *	PCI Local Bus Specification
- *	PCI to PCI Bridge Specification
- *	PCI System Design Guide
- *
- * 	For hypertransport information, please consult the following manuals
- * 	from http://www.hypertransport.org
- *
- *	The Hypertransport I/O Link Specification
- */
-
-#ifndef LINUX_PCI_REGS_H
-#define LINUX_PCI_REGS_H
-
-/*
- * Under PCI, each device has 256 bytes of configuration address space,
- * of which the first 64 bytes are standardized as follows:
- */
-#define PCI_VENDOR_ID		0x00	/* 16 bits */
-#define PCI_DEVICE_ID		0x02	/* 16 bits */
-#define PCI_COMMAND		0x04	/* 16 bits */
-#define  PCI_COMMAND_IO		0x1	/* Enable response in I/O space */
-#define  PCI_COMMAND_MEMORY	0x2	/* Enable response in Memory space */
-#define  PCI_COMMAND_MASTER	0x4	/* Enable bus mastering */
-#define  PCI_COMMAND_SPECIAL	0x8	/* Enable response to special cycles */
-#define  PCI_COMMAND_INVALIDATE	0x10	/* Use memory write and invalidate */
-#define  PCI_COMMAND_VGA_PALETTE 0x20	/* Enable palette snooping */
-#define  PCI_COMMAND_PARITY	0x40	/* Enable parity checking */
-#define  PCI_COMMAND_WAIT 	0x80	/* Enable address/data stepping */
-#define  PCI_COMMAND_SERR	0x100	/* Enable SERR */
-#define  PCI_COMMAND_FAST_BACK	0x200	/* Enable back-to-back writes */
-#define  PCI_COMMAND_INTX_DISABLE 0x400 /* INTx Emulation Disable */
-
-#define PCI_STATUS		0x06	/* 16 bits */
-#define  PCI_STATUS_INTERRUPT	0x08	/* Interrupt status */
-#define  PCI_STATUS_CAP_LIST	0x10	/* Support Capability List */
-#define  PCI_STATUS_66MHZ	0x20	/* Support 66 Mhz PCI 2.1 bus */
-#define  PCI_STATUS_UDF		0x40	/* Support User Definable Features [obsolete] */
-#define  PCI_STATUS_FAST_BACK	0x80	/* Accept fast-back to back */
-#define  PCI_STATUS_PARITY	0x100	/* Detected parity error */
-#define  PCI_STATUS_DEVSEL_MASK	0x600	/* DEVSEL timing */
-#define  PCI_STATUS_DEVSEL_FAST		0x000
-#define  PCI_STATUS_DEVSEL_MEDIUM	0x200
-#define  PCI_STATUS_DEVSEL_SLOW		0x400
-#define  PCI_STATUS_SIG_TARGET_ABORT	0x800 /* Set on target abort */
-#define  PCI_STATUS_REC_TARGET_ABORT	0x1000 /* Master ack of " */
-#define  PCI_STATUS_REC_MASTER_ABORT	0x2000 /* Set on master abort */
-#define  PCI_STATUS_SIG_SYSTEM_ERROR	0x4000 /* Set when we drive SERR */
-#define  PCI_STATUS_DETECTED_PARITY	0x8000 /* Set on parity error */
-
-#define PCI_CLASS_REVISION	0x08	/* High 24 bits are class, low 8 revision */
-#define PCI_REVISION_ID		0x08	/* Revision ID */
-#define PCI_CLASS_PROG		0x09	/* Reg. Level Programming Interface */
-#define PCI_CLASS_DEVICE	0x0a	/* Device class */
-
-#define PCI_CACHE_LINE_SIZE	0x0c	/* 8 bits */
-#define PCI_LATENCY_TIMER	0x0d	/* 8 bits */
-#define PCI_HEADER_TYPE		0x0e	/* 8 bits */
-#define  PCI_HEADER_TYPE_NORMAL		0
-#define  PCI_HEADER_TYPE_BRIDGE		1
-#define  PCI_HEADER_TYPE_CARDBUS	2
-
-#define PCI_BIST		0x0f	/* 8 bits */
-#define  PCI_BIST_CODE_MASK	0x0f	/* Return result */
-#define  PCI_BIST_START		0x40	/* 1 to start BIST, 2 secs or less */
-#define  PCI_BIST_CAPABLE	0x80	/* 1 if BIST capable */
-
-/*
- * Base addresses specify locations in memory or I/O space.
- * Decoded size can be determined by writing a value of
- * 0xffffffff to the register, and reading it back.  Only
- * 1 bits are decoded.
- */
-#define PCI_BASE_ADDRESS_0	0x10	/* 32 bits */
-#define PCI_BASE_ADDRESS_1	0x14	/* 32 bits [htype 0,1 only] */
-#define PCI_BASE_ADDRESS_2	0x18	/* 32 bits [htype 0 only] */
-#define PCI_BASE_ADDRESS_3	0x1c	/* 32 bits */
-#define PCI_BASE_ADDRESS_4	0x20	/* 32 bits */
-#define PCI_BASE_ADDRESS_5	0x24	/* 32 bits */
-#define  PCI_BASE_ADDRESS_SPACE		0x01	/* 0 = memory, 1 = I/O */
-#define  PCI_BASE_ADDRESS_SPACE_IO	0x01
-#define  PCI_BASE_ADDRESS_SPACE_MEMORY	0x00
-#define  PCI_BASE_ADDRESS_MEM_TYPE_MASK	0x06
-#define  PCI_BASE_ADDRESS_MEM_TYPE_32	0x00	/* 32 bit address */
-#define  PCI_BASE_ADDRESS_MEM_TYPE_1M	0x02	/* Below 1M [obsolete] */
-#define  PCI_BASE_ADDRESS_MEM_TYPE_64	0x04	/* 64 bit address */
-#define  PCI_BASE_ADDRESS_MEM_PREFETCH	0x08	/* prefetchable? */
-#define  PCI_BASE_ADDRESS_MEM_MASK	(~0x0fUL)
-#define  PCI_BASE_ADDRESS_IO_MASK	(~0x03UL)
-/* bit 1 is reserved if address_space = 1 */
-
-/* Header type 0 (normal devices) */
-#define PCI_CARDBUS_CIS		0x28
-#define PCI_SUBSYSTEM_VENDOR_ID	0x2c
-#define PCI_SUBSYSTEM_ID	0x2e
-#define PCI_ROM_ADDRESS		0x30	/* Bits 31..11 are address, 10..1 reserved */
-#define  PCI_ROM_ADDRESS_ENABLE	0x01
-#define PCI_ROM_ADDRESS_MASK	(~0x7ffUL)
-
-#define PCI_CAPABILITY_LIST	0x34	/* Offset of first capability list entry */
-
-/* 0x35-0x3b are reserved */
-#define PCI_INTERRUPT_LINE	0x3c	/* 8 bits */
-#define PCI_INTERRUPT_PIN	0x3d	/* 8 bits */
-#define PCI_MIN_GNT		0x3e	/* 8 bits */
-#define PCI_MAX_LAT		0x3f	/* 8 bits */
-
-/* Header type 1 (PCI-to-PCI bridges) */
-#define PCI_PRIMARY_BUS		0x18	/* Primary bus number */
-#define PCI_SECONDARY_BUS	0x19	/* Secondary bus number */
-#define PCI_SUBORDINATE_BUS	0x1a	/* Highest bus number behind the bridge */
-#define PCI_SEC_LATENCY_TIMER	0x1b	/* Latency timer for secondary interface */
-#define PCI_IO_BASE		0x1c	/* I/O range behind the bridge */
-#define PCI_IO_LIMIT		0x1d
-#define  PCI_IO_RANGE_TYPE_MASK	0x0fUL	/* I/O bridging type */
-#define  PCI_IO_RANGE_TYPE_16	0x00
-#define  PCI_IO_RANGE_TYPE_32	0x01
-#define  PCI_IO_RANGE_MASK	(~0x0fUL)
-#define PCI_SEC_STATUS		0x1e	/* Secondary status register, only bit 14 used */
-#define PCI_MEMORY_BASE		0x20	/* Memory range behind */
-#define PCI_MEMORY_LIMIT	0x22
-#define  PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL
-#define  PCI_MEMORY_RANGE_MASK	(~0x0fUL)
-#define PCI_PREF_MEMORY_BASE	0x24	/* Prefetchable memory range behind */
-#define PCI_PREF_MEMORY_LIMIT	0x26
-#define  PCI_PREF_RANGE_TYPE_MASK 0x0fUL
-#define  PCI_PREF_RANGE_TYPE_32	0x00
-#define  PCI_PREF_RANGE_TYPE_64	0x01
-#define  PCI_PREF_RANGE_MASK	(~0x0fUL)
-#define PCI_PREF_BASE_UPPER32	0x28	/* Upper half of prefetchable memory range */
-#define PCI_PREF_LIMIT_UPPER32	0x2c
-#define PCI_IO_BASE_UPPER16	0x30	/* Upper half of I/O addresses */
-#define PCI_IO_LIMIT_UPPER16	0x32
-/* 0x34 same as for htype 0 */
-/* 0x35-0x3b is reserved */
-#define PCI_ROM_ADDRESS1	0x38	/* Same as PCI_ROM_ADDRESS, but for htype 1 */
-/* 0x3c-0x3d are same as for htype 0 */
-#define PCI_BRIDGE_CONTROL	0x3e
-#define  PCI_BRIDGE_CTL_PARITY	0x01	/* Enable parity detection on secondary interface */
-#define  PCI_BRIDGE_CTL_SERR	0x02	/* The same for SERR forwarding */
-#define  PCI_BRIDGE_CTL_ISA	0x04	/* Enable ISA mode */
-#define  PCI_BRIDGE_CTL_VGA	0x08	/* Forward VGA addresses */
-#define  PCI_BRIDGE_CTL_MASTER_ABORT	0x20  /* Report master aborts */
-#define  PCI_BRIDGE_CTL_BUS_RESET	0x40	/* Secondary bus reset */
-#define  PCI_BRIDGE_CTL_FAST_BACK	0x80	/* Fast Back2Back enabled on secondary interface */
-
-/* Header type 2 (CardBus bridges) */
-#define PCI_CB_CAPABILITY_LIST	0x14
-/* 0x15 reserved */
-#define PCI_CB_SEC_STATUS	0x16	/* Secondary status */
-#define PCI_CB_PRIMARY_BUS	0x18	/* PCI bus number */
-#define PCI_CB_CARD_BUS		0x19	/* CardBus bus number */
-#define PCI_CB_SUBORDINATE_BUS	0x1a	/* Subordinate bus number */
-#define PCI_CB_LATENCY_TIMER	0x1b	/* CardBus latency timer */
-#define PCI_CB_MEMORY_BASE_0	0x1c
-#define PCI_CB_MEMORY_LIMIT_0	0x20
-#define PCI_CB_MEMORY_BASE_1	0x24
-#define PCI_CB_MEMORY_LIMIT_1	0x28
-#define PCI_CB_IO_BASE_0	0x2c
-#define PCI_CB_IO_BASE_0_HI	0x2e
-#define PCI_CB_IO_LIMIT_0	0x30
-#define PCI_CB_IO_LIMIT_0_HI	0x32
-#define PCI_CB_IO_BASE_1	0x34
-#define PCI_CB_IO_BASE_1_HI	0x36
-#define PCI_CB_IO_LIMIT_1	0x38
-#define PCI_CB_IO_LIMIT_1_HI	0x3a
-#define  PCI_CB_IO_RANGE_MASK	(~0x03UL)
-/* 0x3c-0x3d are same as for htype 0 */
-#define PCI_CB_BRIDGE_CONTROL	0x3e
-#define  PCI_CB_BRIDGE_CTL_PARITY	0x01	/* Similar to standard bridge control register */
-#define  PCI_CB_BRIDGE_CTL_SERR		0x02
-#define  PCI_CB_BRIDGE_CTL_ISA		0x04
-#define  PCI_CB_BRIDGE_CTL_VGA		0x08
-#define  PCI_CB_BRIDGE_CTL_MASTER_ABORT	0x20
-#define  PCI_CB_BRIDGE_CTL_CB_RESET	0x40	/* CardBus reset */
-#define  PCI_CB_BRIDGE_CTL_16BIT_INT	0x80	/* Enable interrupt for 16-bit cards */
-#define  PCI_CB_BRIDGE_CTL_PREFETCH_MEM0 0x100	/* Prefetch enable for both memory regions */
-#define  PCI_CB_BRIDGE_CTL_PREFETCH_MEM1 0x200
-#define  PCI_CB_BRIDGE_CTL_POST_WRITES	0x400
-#define PCI_CB_SUBSYSTEM_VENDOR_ID	0x40
-#define PCI_CB_SUBSYSTEM_ID		0x42
-#define PCI_CB_LEGACY_MODE_BASE		0x44	/* 16-bit PC Card legacy mode base address (ExCa) */
-/* 0x48-0x7f reserved */
-
-/* Capability lists */
-
-#define PCI_CAP_LIST_ID		0	/* Capability ID */
-#define  PCI_CAP_ID_PM		0x01	/* Power Management */
-#define  PCI_CAP_ID_AGP		0x02	/* Accelerated Graphics Port */
-#define  PCI_CAP_ID_VPD		0x03	/* Vital Product Data */
-#define  PCI_CAP_ID_SLOTID	0x04	/* Slot Identification */
-#define  PCI_CAP_ID_MSI		0x05	/* Message Signalled Interrupts */
-#define  PCI_CAP_ID_CHSWP	0x06	/* CompactPCI HotSwap */
-#define  PCI_CAP_ID_PCIX	0x07	/* PCI-X */
-#define  PCI_CAP_ID_HT		0x08	/* HyperTransport */
-#define  PCI_CAP_ID_VNDR	0x09	/* Vendor specific */
-#define  PCI_CAP_ID_DBG		0x0A	/* Debug port */
-#define  PCI_CAP_ID_CCRC	0x0B	/* CompactPCI Central Resource Control */
-#define  PCI_CAP_ID_SHPC 	0x0C	/* PCI Standard Hot-Plug Controller */
-#define  PCI_CAP_ID_SSVID	0x0D	/* Bridge subsystem vendor/device ID */
-#define  PCI_CAP_ID_AGP3	0x0E	/* AGP Target PCI-PCI bridge */
-#define  PCI_CAP_ID_EXP 	0x10	/* PCI Express */
-#define  PCI_CAP_ID_MSIX	0x11	/* MSI-X */
-#define  PCI_CAP_ID_SATA	0x12	/* Serial ATA */
-#define  PCI_CAP_ID_AF		0x13	/* PCI Advanced Features */
-#define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
-#define PCI_CAP_FLAGS		2	/* Capability defined flags (16 bits) */
-#define PCI_CAP_SIZEOF		4
-
-/* Power Management Registers */
-
-#define PCI_PM_PMC		2	/* PM Capabilities Register */
-#define  PCI_PM_CAP_VER_MASK	0x0007	/* Version */
-#define  PCI_PM_CAP_PME_CLOCK	0x0008	/* PME clock required */
-#define  PCI_PM_CAP_RESERVED    0x0010  /* Reserved field */
-#define  PCI_PM_CAP_DSI		0x0020	/* Device specific initialization */
-#define  PCI_PM_CAP_AUX_POWER	0x01C0	/* Auxiliary power support mask */
-#define  PCI_PM_CAP_D1		0x0200	/* D1 power state support */
-#define  PCI_PM_CAP_D2		0x0400	/* D2 power state support */
-#define  PCI_PM_CAP_PME		0x0800	/* PME pin supported */
-#define  PCI_PM_CAP_PME_MASK	0xF800	/* PME Mask of all supported states */
-#define  PCI_PM_CAP_PME_D0	0x0800	/* PME# from D0 */
-#define  PCI_PM_CAP_PME_D1	0x1000	/* PME# from D1 */
-#define  PCI_PM_CAP_PME_D2	0x2000	/* PME# from D2 */
-#define  PCI_PM_CAP_PME_D3	0x4000	/* PME# from D3 (hot) */
-#define  PCI_PM_CAP_PME_D3cold	0x8000	/* PME# from D3 (cold) */
-#define  PCI_PM_CAP_PME_SHIFT	11	/* Start of the PME Mask in PMC */
-#define PCI_PM_CTRL		4	/* PM control and status register */
-#define  PCI_PM_CTRL_STATE_MASK	0x0003	/* Current power state (D0 to D3) */
-#define  PCI_PM_CTRL_NO_SOFT_RESET	0x0008	/* No reset for D3hot->D0 */
-#define  PCI_PM_CTRL_PME_ENABLE	0x0100	/* PME pin enable */
-#define  PCI_PM_CTRL_DATA_SEL_MASK	0x1e00	/* Data select (??) */
-#define  PCI_PM_CTRL_DATA_SCALE_MASK	0x6000	/* Data scale (??) */
-#define  PCI_PM_CTRL_PME_STATUS	0x8000	/* PME pin status */
-#define PCI_PM_PPB_EXTENSIONS	6	/* PPB support extensions (??) */
-#define  PCI_PM_PPB_B2_B3	0x40	/* Stop clock when in D3hot (??) */
-#define  PCI_PM_BPCC_ENABLE	0x80	/* Bus power/clock control enable (??) */
-#define PCI_PM_DATA_REGISTER	7	/* (??) */
-#define PCI_PM_SIZEOF		8
-
-/* AGP registers */
-
-#define PCI_AGP_VERSION		2	/* BCD version number */
-#define PCI_AGP_RFU		3	/* Rest of capability flags */
-#define PCI_AGP_STATUS		4	/* Status register */
-#define  PCI_AGP_STATUS_RQ_MASK	0xff000000	/* Maximum number of requests - 1 */
-#define  PCI_AGP_STATUS_SBA	0x0200	/* Sideband addressing supported */
-#define  PCI_AGP_STATUS_64BIT	0x0020	/* 64-bit addressing supported */
-#define  PCI_AGP_STATUS_FW	0x0010	/* FW transfers supported */
-#define  PCI_AGP_STATUS_RATE4	0x0004	/* 4x transfer rate supported */
-#define  PCI_AGP_STATUS_RATE2	0x0002	/* 2x transfer rate supported */
-#define  PCI_AGP_STATUS_RATE1	0x0001	/* 1x transfer rate supported */
-#define PCI_AGP_COMMAND		8	/* Control register */
-#define  PCI_AGP_COMMAND_RQ_MASK 0xff000000  /* Master: Maximum number of requests */
-#define  PCI_AGP_COMMAND_SBA	0x0200	/* Sideband addressing enabled */
-#define  PCI_AGP_COMMAND_AGP	0x0100	/* Allow processing of AGP transactions */
-#define  PCI_AGP_COMMAND_64BIT	0x0020 	/* Allow processing of 64-bit addresses */
-#define  PCI_AGP_COMMAND_FW	0x0010 	/* Force FW transfers */
-#define  PCI_AGP_COMMAND_RATE4	0x0004	/* Use 4x rate */
-#define  PCI_AGP_COMMAND_RATE2	0x0002	/* Use 2x rate */
-#define  PCI_AGP_COMMAND_RATE1	0x0001	/* Use 1x rate */
-#define PCI_AGP_SIZEOF		12
-
-/* Vital Product Data */
-
-#define PCI_VPD_ADDR		2	/* Address to access (15 bits!) */
-#define  PCI_VPD_ADDR_MASK	0x7fff	/* Address mask */
-#define  PCI_VPD_ADDR_F		0x8000	/* Write 0, 1 indicates completion */
-#define PCI_VPD_DATA		4	/* 32-bits of data returned here */
-
-/* Slot Identification */
-
-#define PCI_SID_ESR		2	/* Expansion Slot Register */
-#define  PCI_SID_ESR_NSLOTS	0x1f	/* Number of expansion slots available */
-#define  PCI_SID_ESR_FIC	0x20	/* First In Chassis Flag */
-#define PCI_SID_CHASSIS_NR	3	/* Chassis Number */
-
-/* Message Signalled Interrupts registers */
-
-#define PCI_MSI_FLAGS		2	/* Various flags */
-#define  PCI_MSI_FLAGS_64BIT	0x80	/* 64-bit addresses allowed */
-#define  PCI_MSI_FLAGS_QSIZE	0x70	/* Message queue size configured */
-#define  PCI_MSI_FLAGS_QMASK	0x0e	/* Maximum queue size available */
-#define  PCI_MSI_FLAGS_ENABLE	0x01	/* MSI feature enabled */
-#define  PCI_MSI_FLAGS_MASKBIT	0x100	/* 64-bit mask bits allowed */
-#define PCI_MSI_RFU		3	/* Rest of capability flags */
-#define PCI_MSI_ADDRESS_LO	4	/* Lower 32 bits */
-#define PCI_MSI_ADDRESS_HI	8	/* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */
-#define PCI_MSI_DATA_32		8	/* 16 bits of data for 32-bit devices */
-#define PCI_MSI_MASK_32		12	/* Mask bits register for 32-bit devices */
-#define PCI_MSI_PENDING_32	16	/* Pending bits register for 32-bit devices */
-#define PCI_MSI_DATA_64		12	/* 16 bits of data for 64-bit devices */
-#define PCI_MSI_MASK_64		16	/* Mask bits register for 64-bit devices */
-#define PCI_MSI_PENDING_64	20	/* Pending bits register for 32-bit devices */
-
-/* MSI-X registers */
-#define PCI_MSIX_FLAGS		2
-#define  PCI_MSIX_FLAGS_QSIZE	0x7FF
-#define  PCI_MSIX_FLAGS_ENABLE	(1 << 15)
-#define  PCI_MSIX_FLAGS_MASKALL	(1 << 14)
-#define PCI_MSIX_TABLE		4
-#define PCI_MSIX_PBA		8
-#define  PCI_MSIX_FLAGS_BIRMASK	(7 << 0)
-
-/* MSI-X entry's format */
-#define PCI_MSIX_ENTRY_SIZE		16
-#define  PCI_MSIX_ENTRY_LOWER_ADDR	0
-#define  PCI_MSIX_ENTRY_UPPER_ADDR	4
-#define  PCI_MSIX_ENTRY_DATA		8
-#define  PCI_MSIX_ENTRY_VECTOR_CTRL	12
-#define   PCI_MSIX_ENTRY_CTRL_MASKBIT	1
-
-/* CompactPCI Hotswap Register */
-
-#define PCI_CHSWP_CSR		2	/* Control and Status Register */
-#define  PCI_CHSWP_DHA		0x01	/* Device Hiding Arm */
-#define  PCI_CHSWP_EIM		0x02	/* ENUM# Signal Mask */
-#define  PCI_CHSWP_PIE		0x04	/* Pending Insert or Extract */
-#define  PCI_CHSWP_LOO		0x08	/* LED On / Off */
-#define  PCI_CHSWP_PI		0x30	/* Programming Interface */
-#define  PCI_CHSWP_EXT		0x40	/* ENUM# status - extraction */
-#define  PCI_CHSWP_INS		0x80	/* ENUM# status - insertion */
-
-/* PCI Advanced Feature registers */
-
-#define PCI_AF_LENGTH		2
-#define PCI_AF_CAP		3
-#define  PCI_AF_CAP_TP		0x01
-#define  PCI_AF_CAP_FLR		0x02
-#define PCI_AF_CTRL		4
-#define  PCI_AF_CTRL_FLR	0x01
-#define PCI_AF_STATUS		5
-#define  PCI_AF_STATUS_TP	0x01
-
-/* PCI-X registers */
-
-#define PCI_X_CMD		2	/* Modes & Features */
-#define  PCI_X_CMD_DPERR_E	0x0001	/* Data Parity Error Recovery Enable */
-#define  PCI_X_CMD_ERO		0x0002	/* Enable Relaxed Ordering */
-#define  PCI_X_CMD_READ_512	0x0000	/* 512 byte maximum read byte count */
-#define  PCI_X_CMD_READ_1K	0x0004	/* 1Kbyte maximum read byte count */
-#define  PCI_X_CMD_READ_2K	0x0008	/* 2Kbyte maximum read byte count */
-#define  PCI_X_CMD_READ_4K	0x000c	/* 4Kbyte maximum read byte count */
-#define  PCI_X_CMD_MAX_READ	0x000c	/* Max Memory Read Byte Count */
-				/* Max # of outstanding split transactions */
-#define  PCI_X_CMD_SPLIT_1	0x0000	/* Max 1 */
-#define  PCI_X_CMD_SPLIT_2	0x0010	/* Max 2 */
-#define  PCI_X_CMD_SPLIT_3	0x0020	/* Max 3 */
-#define  PCI_X_CMD_SPLIT_4	0x0030	/* Max 4 */
-#define  PCI_X_CMD_SPLIT_8	0x0040	/* Max 8 */
-#define  PCI_X_CMD_SPLIT_12	0x0050	/* Max 12 */
-#define  PCI_X_CMD_SPLIT_16	0x0060	/* Max 16 */
-#define  PCI_X_CMD_SPLIT_32	0x0070	/* Max 32 */
-#define  PCI_X_CMD_MAX_SPLIT	0x0070	/* Max Outstanding Split Transactions */
-#define  PCI_X_CMD_VERSION(x) 	(((x) >> 12) & 3) /* Version */
-#define PCI_X_STATUS		4	/* PCI-X capabilities */
-#define  PCI_X_STATUS_DEVFN	0x000000ff	/* A copy of devfn */
-#define  PCI_X_STATUS_BUS	0x0000ff00	/* A copy of bus nr */
-#define  PCI_X_STATUS_64BIT	0x00010000	/* 64-bit device */
-#define  PCI_X_STATUS_133MHZ	0x00020000	/* 133 MHz capable */
-#define  PCI_X_STATUS_SPL_DISC	0x00040000	/* Split Completion Discarded */
-#define  PCI_X_STATUS_UNX_SPL	0x00080000	/* Unexpected Split Completion */
-#define  PCI_X_STATUS_COMPLEX	0x00100000	/* Device Complexity */
-#define  PCI_X_STATUS_MAX_READ	0x00600000	/* Designed Max Memory Read Count */
-#define  PCI_X_STATUS_MAX_SPLIT	0x03800000	/* Designed Max Outstanding Split Transactions */
-#define  PCI_X_STATUS_MAX_CUM	0x1c000000	/* Designed Max Cumulative Read Size */
-#define  PCI_X_STATUS_SPL_ERR	0x20000000	/* Rcvd Split Completion Error Msg */
-#define  PCI_X_STATUS_266MHZ	0x40000000	/* 266 MHz capable */
-#define  PCI_X_STATUS_533MHZ	0x80000000	/* 533 MHz capable */
-
-/* PCI Bridge Subsystem ID registers */
-
-#define PCI_SSVID_VENDOR_ID     4	/* PCI-Bridge subsystem vendor id register */
-#define PCI_SSVID_DEVICE_ID     6	/* PCI-Bridge subsystem device id register */
-
-/* PCI Express capability registers */
-
-#define PCI_EXP_FLAGS		2	/* Capabilities register */
-#define PCI_EXP_FLAGS_VERS	0x000f	/* Capability version */
-#define PCI_EXP_FLAGS_TYPE	0x00f0	/* Device/Port type */
-#define  PCI_EXP_TYPE_ENDPOINT	0x0	/* Express Endpoint */
-#define  PCI_EXP_TYPE_LEG_END	0x1	/* Legacy Endpoint */
-#define  PCI_EXP_TYPE_ROOT_PORT 0x4	/* Root Port */
-#define  PCI_EXP_TYPE_UPSTREAM	0x5	/* Upstream Port */
-#define  PCI_EXP_TYPE_DOWNSTREAM 0x6	/* Downstream Port */
-#define  PCI_EXP_TYPE_PCI_BRIDGE 0x7	/* PCI/PCI-X Bridge */
-#define  PCI_EXP_TYPE_PCIE_BRIDGE 0x8   /* PCI/PCI-X to PCIE Bridge */
-#define  PCI_EXP_TYPE_RC_END	0x9	/* Root Complex Integrated Endpoint */
-#define  PCI_EXP_TYPE_RC_EC     0xa     /* Root Complex Event Collector */
-#define PCI_EXP_FLAGS_SLOT	0x0100	/* Slot implemented */
-#define PCI_EXP_FLAGS_IRQ	0x3e00	/* Interrupt message number */
-#define PCI_EXP_DEVCAP		4	/* Device capabilities */
-#define  PCI_EXP_DEVCAP_PAYLOAD	0x07	/* Max_Payload_Size */
-#define  PCI_EXP_DEVCAP_PHANTOM	0x18	/* Phantom functions */
-#define  PCI_EXP_DEVCAP_EXT_TAG	0x20	/* Extended tags */
-#define  PCI_EXP_DEVCAP_L0S	0x1c0	/* L0s Acceptable Latency */
-#define  PCI_EXP_DEVCAP_L1	0xe00	/* L1 Acceptable Latency */
-#define  PCI_EXP_DEVCAP_ATN_BUT	0x1000	/* Attention Button Present */
-#define  PCI_EXP_DEVCAP_ATN_IND	0x2000	/* Attention Indicator Present */
-#define  PCI_EXP_DEVCAP_PWR_IND	0x4000	/* Power Indicator Present */
-#define  PCI_EXP_DEVCAP_RBER	0x8000	/* Role-Based Error Reporting */
-#define  PCI_EXP_DEVCAP_PWR_VAL	0x3fc0000 /* Slot Power Limit Value */
-#define  PCI_EXP_DEVCAP_PWR_SCL	0xc000000 /* Slot Power Limit Scale */
-#define  PCI_EXP_DEVCAP_FLR     0x10000000 /* Function Level Reset */
-#define PCI_EXP_DEVCTL		8	/* Device Control */
-#define  PCI_EXP_DEVCTL_CERE	0x0001	/* Correctable Error Reporting En. */
-#define  PCI_EXP_DEVCTL_NFERE	0x0002	/* Non-Fatal Error Reporting Enable */
-#define  PCI_EXP_DEVCTL_FERE	0x0004	/* Fatal Error Reporting Enable */
-#define  PCI_EXP_DEVCTL_URRE	0x0008	/* Unsupported Request Reporting En. */
-#define  PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */
-#define  PCI_EXP_DEVCTL_PAYLOAD	0x00e0	/* Max_Payload_Size */
-#define  PCI_EXP_DEVCTL_EXT_TAG	0x0100	/* Extended Tag Field Enable */
-#define  PCI_EXP_DEVCTL_PHANTOM	0x0200	/* Phantom Functions Enable */
-#define  PCI_EXP_DEVCTL_AUX_PME	0x0400	/* Auxiliary Power PM Enable */
-#define  PCI_EXP_DEVCTL_NOSNOOP_EN 0x0800  /* Enable No Snoop */
-#define  PCI_EXP_DEVCTL_READRQ	0x7000	/* Max_Read_Request_Size */
-#define  PCI_EXP_DEVCTL_BCR_FLR 0x8000  /* Bridge Configuration Retry / FLR */
-#define PCI_EXP_DEVSTA		10	/* Device Status */
-#define  PCI_EXP_DEVSTA_CED	0x01	/* Correctable Error Detected */
-#define  PCI_EXP_DEVSTA_NFED	0x02	/* Non-Fatal Error Detected */
-#define  PCI_EXP_DEVSTA_FED	0x04	/* Fatal Error Detected */
-#define  PCI_EXP_DEVSTA_URD	0x08	/* Unsupported Request Detected */
-#define  PCI_EXP_DEVSTA_AUXPD	0x10	/* AUX Power Detected */
-#define  PCI_EXP_DEVSTA_TRPND	0x20	/* Transactions Pending */
-#define PCI_EXP_LNKCAP		12	/* Link Capabilities */
-#define  PCI_EXP_LNKCAP_SLS	0x0000000f /* Supported Link Speeds */
-#define  PCI_EXP_LNKCAP_MLW	0x000003f0 /* Maximum Link Width */
-#define  PCI_EXP_LNKCAP_ASPMS	0x00000c00 /* ASPM Support */
-#define  PCI_EXP_LNKCAP_L0SEL	0x00007000 /* L0s Exit Latency */
-#define  PCI_EXP_LNKCAP_L1EL	0x00038000 /* L1 Exit Latency */
-#define  PCI_EXP_LNKCAP_CLKPM	0x00040000 /* L1 Clock Power Management */
-#define  PCI_EXP_LNKCAP_SDERC	0x00080000 /* Surprise Down Error Reporting Capable */
-#define  PCI_EXP_LNKCAP_DLLLARC	0x00100000 /* Data Link Layer Link Active Reporting Capable */
-#define  PCI_EXP_LNKCAP_LBNC	0x00200000 /* Link Bandwidth Notification Capability */
-#define  PCI_EXP_LNKCAP_PN	0xff000000 /* Port Number */
-#define PCI_EXP_LNKCTL		16	/* Link Control */
-#define  PCI_EXP_LNKCTL_ASPMC	0x0003	/* ASPM Control */
-#define  PCI_EXP_LNKCTL_RCB	0x0008	/* Read Completion Boundary */
-#define  PCI_EXP_LNKCTL_LD	0x0010	/* Link Disable */
-#define  PCI_EXP_LNKCTL_RL	0x0020	/* Retrain Link */
-#define  PCI_EXP_LNKCTL_CCC	0x0040	/* Common Clock Configuration */
-#define  PCI_EXP_LNKCTL_ES	0x0080	/* Extended Synch */
-#define  PCI_EXP_LNKCTL_CLKREQ_EN 0x100	/* Enable clkreq */
-#define  PCI_EXP_LNKCTL_HAWD	0x0200	/* Hardware Autonomous Width Disable */
-#define  PCI_EXP_LNKCTL_LBMIE	0x0400	/* Link Bandwidth Management Interrupt Enable */
-#define  PCI_EXP_LNKCTL_LABIE	0x0800	/* Lnk Autonomous Bandwidth Interrupt Enable */
-#define PCI_EXP_LNKSTA		18	/* Link Status */
-#define  PCI_EXP_LNKSTA_CLS	0x000f	/* Current Link Speed */
-#define  PCI_EXP_LNKSTA_CLS_2_5GB 0x01	/* Current Link Speed 2.5GT/s */
-#define  PCI_EXP_LNKSTA_CLS_5_0GB 0x02	/* Current Link Speed 5.0GT/s */
-#define  PCI_EXP_LNKSTA_NLW	0x03f0	/* Nogotiated Link Width */
-#define  PCI_EXP_LNKSTA_NLW_SHIFT 4	/* start of NLW mask in link status */
-#define  PCI_EXP_LNKSTA_LT	0x0800	/* Link Training */
-#define  PCI_EXP_LNKSTA_SLC	0x1000	/* Slot Clock Configuration */
-#define  PCI_EXP_LNKSTA_DLLLA	0x2000	/* Data Link Layer Link Active */
-#define  PCI_EXP_LNKSTA_LBMS	0x4000	/* Link Bandwidth Management Status */
-#define  PCI_EXP_LNKSTA_LABS	0x8000	/* Link Autonomous Bandwidth Status */
-#define PCI_EXP_SLTCAP		20	/* Slot Capabilities */
-#define  PCI_EXP_SLTCAP_ABP	0x00000001 /* Attention Button Present */
-#define  PCI_EXP_SLTCAP_PCP	0x00000002 /* Power Controller Present */
-#define  PCI_EXP_SLTCAP_MRLSP	0x00000004 /* MRL Sensor Present */
-#define  PCI_EXP_SLTCAP_AIP	0x00000008 /* Attention Indicator Present */
-#define  PCI_EXP_SLTCAP_PIP	0x00000010 /* Power Indicator Present */
-#define  PCI_EXP_SLTCAP_HPS	0x00000020 /* Hot-Plug Surprise */
-#define  PCI_EXP_SLTCAP_HPC	0x00000040 /* Hot-Plug Capable */
-#define  PCI_EXP_SLTCAP_SPLV	0x00007f80 /* Slot Power Limit Value */
-#define  PCI_EXP_SLTCAP_SPLS	0x00018000 /* Slot Power Limit Scale */
-#define  PCI_EXP_SLTCAP_EIP	0x00020000 /* Electromechanical Interlock Present */
-#define  PCI_EXP_SLTCAP_NCCS	0x00040000 /* No Command Completed Support */
-#define  PCI_EXP_SLTCAP_PSN	0xfff80000 /* Physical Slot Number */
-#define PCI_EXP_SLTCTL		24	/* Slot Control */
-#define  PCI_EXP_SLTCTL_ABPE	0x0001	/* Attention Button Pressed Enable */
-#define  PCI_EXP_SLTCTL_PFDE	0x0002	/* Power Fault Detected Enable */
-#define  PCI_EXP_SLTCTL_MRLSCE	0x0004	/* MRL Sensor Changed Enable */
-#define  PCI_EXP_SLTCTL_PDCE	0x0008	/* Presence Detect Changed Enable */
-#define  PCI_EXP_SLTCTL_CCIE	0x0010	/* Command Completed Interrupt Enable */
-#define  PCI_EXP_SLTCTL_HPIE	0x0020	/* Hot-Plug Interrupt Enable */
-#define  PCI_EXP_SLTCTL_AIC	0x00c0	/* Attention Indicator Control */
-#define  PCI_EXP_SLTCTL_PIC	0x0300	/* Power Indicator Control */
-#define  PCI_EXP_SLTCTL_PCC	0x0400	/* Power Controller Control */
-#define  PCI_EXP_SLTCTL_EIC	0x0800	/* Electromechanical Interlock Control */
-#define  PCI_EXP_SLTCTL_DLLSCE	0x1000	/* Data Link Layer State Changed Enable */
-#define PCI_EXP_SLTSTA		26	/* Slot Status */
-#define  PCI_EXP_SLTSTA_ABP	0x0001	/* Attention Button Pressed */
-#define  PCI_EXP_SLTSTA_PFD	0x0002	/* Power Fault Detected */
-#define  PCI_EXP_SLTSTA_MRLSC	0x0004	/* MRL Sensor Changed */
-#define  PCI_EXP_SLTSTA_PDC	0x0008	/* Presence Detect Changed */
-#define  PCI_EXP_SLTSTA_CC	0x0010	/* Command Completed */
-#define  PCI_EXP_SLTSTA_MRLSS	0x0020	/* MRL Sensor State */
-#define  PCI_EXP_SLTSTA_PDS	0x0040	/* Presence Detect State */
-#define  PCI_EXP_SLTSTA_EIS	0x0080	/* Electromechanical Interlock Status */
-#define  PCI_EXP_SLTSTA_DLLSC	0x0100	/* Data Link Layer State Changed */
-#define PCI_EXP_RTCTL		28	/* Root Control */
-#define  PCI_EXP_RTCTL_SECEE	0x01	/* System Error on Correctable Error */
-#define  PCI_EXP_RTCTL_SENFEE	0x02	/* System Error on Non-Fatal Error */
-#define  PCI_EXP_RTCTL_SEFEE	0x04	/* System Error on Fatal Error */
-#define  PCI_EXP_RTCTL_PMEIE	0x08	/* PME Interrupt Enable */
-#define  PCI_EXP_RTCTL_CRSSVE	0x10	/* CRS Software Visibility Enable */
-#define PCI_EXP_RTCAP		30	/* Root Capabilities */
-#define PCI_EXP_RTSTA		32	/* Root Status */
-#define PCI_EXP_RTSTA_PME	0x10000 /* PME status */
-#define PCI_EXP_RTSTA_PENDING	0x20000 /* PME pending */
-#define PCI_EXP_DEVCAP2		36	/* Device Capabilities 2 */
-#define  PCI_EXP_DEVCAP2_ARI	0x20	/* Alternative Routing-ID */
-#define  PCI_EXP_DEVCAP2_LTR	0x800	/* Latency tolerance reporting */
-#define  PCI_EXP_OBFF_MASK	0xc0000 /* OBFF support mechanism */
-#define  PCI_EXP_OBFF_MSG	0x40000 /* New message signaling */
-#define  PCI_EXP_OBFF_WAKE	0x80000 /* Re-use WAKE# for OBFF */
-#define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
-#define  PCI_EXP_DEVCTL2_ARI	0x20	/* Alternative Routing-ID */
-#define  PCI_EXP_IDO_REQ_EN	0x100	/* ID-based ordering request enable */
-#define  PCI_EXP_IDO_CMP_EN	0x200	/* ID-based ordering completion enable */
-#define  PCI_EXP_LTR_EN		0x400	/* Latency tolerance reporting */
-#define  PCI_EXP_OBFF_MSGA_EN	0x2000	/* OBFF enable with Message type A */
-#define  PCI_EXP_OBFF_MSGB_EN	0x4000	/* OBFF enable with Message type B */
-#define  PCI_EXP_OBFF_WAKE_EN	0x6000	/* OBFF using WAKE# signaling */
-#define PCI_EXP_LNKCTL2		48	/* Link Control 2 */
-#define PCI_EXP_SLTCTL2		56	/* Slot Control 2 */
-
-/* Extended Capabilities (PCI-X 2.0 and Express) */
-#define PCI_EXT_CAP_ID(header)		(header & 0x0000ffff)
-#define PCI_EXT_CAP_VER(header)		((header >> 16) & 0xf)
-#define PCI_EXT_CAP_NEXT(header)	((header >> 20) & 0xffc)
-
-#define PCI_EXT_CAP_ID_ERR	1
-#define PCI_EXT_CAP_ID_VC	2
-#define PCI_EXT_CAP_ID_DSN	3
-#define PCI_EXT_CAP_ID_PWR	4
-#define PCI_EXT_CAP_ID_VNDR	11
-#define PCI_EXT_CAP_ID_ACS	13
-#define PCI_EXT_CAP_ID_ARI	14
-#define PCI_EXT_CAP_ID_ATS	15
-#define PCI_EXT_CAP_ID_SRIOV	16
-#define PCI_EXT_CAP_ID_LTR	24
-
-/* Advanced Error Reporting */
-#define PCI_ERR_UNCOR_STATUS	4	/* Uncorrectable Error Status */
-#define  PCI_ERR_UNC_TRAIN	0x00000001	/* Training */
-#define  PCI_ERR_UNC_DLP	0x00000010	/* Data Link Protocol */
-#define  PCI_ERR_UNC_POISON_TLP	0x00001000	/* Poisoned TLP */
-#define  PCI_ERR_UNC_FCP	0x00002000	/* Flow Control Protocol */
-#define  PCI_ERR_UNC_COMP_TIME	0x00004000	/* Completion Timeout */
-#define  PCI_ERR_UNC_COMP_ABORT	0x00008000	/* Completer Abort */
-#define  PCI_ERR_UNC_UNX_COMP	0x00010000	/* Unexpected Completion */
-#define  PCI_ERR_UNC_RX_OVER	0x00020000	/* Receiver Overflow */
-#define  PCI_ERR_UNC_MALF_TLP	0x00040000	/* Malformed TLP */
-#define  PCI_ERR_UNC_ECRC	0x00080000	/* ECRC Error Status */
-#define  PCI_ERR_UNC_UNSUP	0x00100000	/* Unsupported Request */
-#define PCI_ERR_UNCOR_MASK	8	/* Uncorrectable Error Mask */
-	/* Same bits as above */
-#define PCI_ERR_UNCOR_SEVER	12	/* Uncorrectable Error Severity */
-	/* Same bits as above */
-#define PCI_ERR_COR_STATUS	16	/* Correctable Error Status */
-#define  PCI_ERR_COR_RCVR	0x00000001	/* Receiver Error Status */
-#define  PCI_ERR_COR_BAD_TLP	0x00000040	/* Bad TLP Status */
-#define  PCI_ERR_COR_BAD_DLLP	0x00000080	/* Bad DLLP Status */
-#define  PCI_ERR_COR_REP_ROLL	0x00000100	/* REPLAY_NUM Rollover */
-#define  PCI_ERR_COR_REP_TIMER	0x00001000	/* Replay Timer Timeout */
-#define PCI_ERR_COR_MASK	20	/* Correctable Error Mask */
-	/* Same bits as above */
-#define PCI_ERR_CAP		24	/* Advanced Error Capabilities */
-#define  PCI_ERR_CAP_FEP(x)	((x) & 31)	/* First Error Pointer */
-#define  PCI_ERR_CAP_ECRC_GENC	0x00000020	/* ECRC Generation Capable */
-#define  PCI_ERR_CAP_ECRC_GENE	0x00000040	/* ECRC Generation Enable */
-#define  PCI_ERR_CAP_ECRC_CHKC	0x00000080	/* ECRC Check Capable */
-#define  PCI_ERR_CAP_ECRC_CHKE	0x00000100	/* ECRC Check Enable */
-#define PCI_ERR_HEADER_LOG	28	/* Header Log Register (16 bytes) */
-#define PCI_ERR_ROOT_COMMAND	44	/* Root Error Command */
-/* Correctable Err Reporting Enable */
-#define PCI_ERR_ROOT_CMD_COR_EN		0x00000001
-/* Non-fatal Err Reporting Enable */
-#define PCI_ERR_ROOT_CMD_NONFATAL_EN	0x00000002
-/* Fatal Err Reporting Enable */
-#define PCI_ERR_ROOT_CMD_FATAL_EN	0x00000004
-#define PCI_ERR_ROOT_STATUS	48
-#define PCI_ERR_ROOT_COR_RCV		0x00000001	/* ERR_COR Received */
-/* Multi ERR_COR Received */
-#define PCI_ERR_ROOT_MULTI_COR_RCV	0x00000002
-/* ERR_FATAL/NONFATAL Recevied */
-#define PCI_ERR_ROOT_UNCOR_RCV		0x00000004
-/* Multi ERR_FATAL/NONFATAL Recevied */
-#define PCI_ERR_ROOT_MULTI_UNCOR_RCV	0x00000008
-#define PCI_ERR_ROOT_FIRST_FATAL	0x00000010	/* First Fatal */
-#define PCI_ERR_ROOT_NONFATAL_RCV	0x00000020	/* Non-Fatal Received */
-#define PCI_ERR_ROOT_FATAL_RCV		0x00000040	/* Fatal Received */
-#define PCI_ERR_ROOT_ERR_SRC	52	/* Error Source Identification */
-
-/* Virtual Channel */
-#define PCI_VC_PORT_REG1	4
-#define PCI_VC_PORT_REG2	8
-#define PCI_VC_PORT_CTRL	12
-#define PCI_VC_PORT_STATUS	14
-#define PCI_VC_RES_CAP		16
-#define PCI_VC_RES_CTRL		20
-#define PCI_VC_RES_STATUS	26
-
-/* Power Budgeting */
-#define PCI_PWR_DSR		4	/* Data Select Register */
-#define PCI_PWR_DATA		8	/* Data Register */
-#define  PCI_PWR_DATA_BASE(x)	((x) & 0xff)	    /* Base Power */
-#define  PCI_PWR_DATA_SCALE(x)	(((x) >> 8) & 3)    /* Data Scale */
-#define  PCI_PWR_DATA_PM_SUB(x)	(((x) >> 10) & 7)   /* PM Sub State */
-#define  PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */
-#define  PCI_PWR_DATA_TYPE(x)	(((x) >> 15) & 7)   /* Type */
-#define  PCI_PWR_DATA_RAIL(x)	(((x) >> 18) & 7)   /* Power Rail */
-#define PCI_PWR_CAP		12	/* Capability */
-#define  PCI_PWR_CAP_BUDGET(x)	((x) & 1)	/* Included in system budget */
-
-/*
- * Hypertransport sub capability types
- *
- * Unfortunately there are both 3 bit and 5 bit capability types defined
- * in the HT spec, catering for that is a little messy. You probably don't
- * want to use these directly, just use pci_find_ht_capability() and it
- * will do the right thing for you.
- */
-#define HT_3BIT_CAP_MASK	0xE0
-#define HT_CAPTYPE_SLAVE	0x00	/* Slave/Primary link configuration */
-#define HT_CAPTYPE_HOST		0x20	/* Host/Secondary link configuration */
-
-#define HT_5BIT_CAP_MASK	0xF8
-#define HT_CAPTYPE_IRQ		0x80	/* IRQ Configuration */
-#define HT_CAPTYPE_REMAPPING_40	0xA0	/* 40 bit address remapping */
-#define HT_CAPTYPE_REMAPPING_64 0xA2	/* 64 bit address remapping */
-#define HT_CAPTYPE_UNITID_CLUMP	0x90	/* Unit ID clumping */
-#define HT_CAPTYPE_EXTCONF	0x98	/* Extended Configuration Space Access */
-#define HT_CAPTYPE_MSI_MAPPING	0xA8	/* MSI Mapping Capability */
-#define  HT_MSI_FLAGS		0x02		/* Offset to flags */
-#define  HT_MSI_FLAGS_ENABLE	0x1		/* Mapping enable */
-#define  HT_MSI_FLAGS_FIXED	0x2		/* Fixed mapping only */
-#define  HT_MSI_FIXED_ADDR	0x00000000FEE00000ULL	/* Fixed addr */
-#define  HT_MSI_ADDR_LO		0x04		/* Offset to low addr bits */
-#define  HT_MSI_ADDR_LO_MASK	0xFFF00000	/* Low address bit mask */
-#define  HT_MSI_ADDR_HI		0x08		/* Offset to high addr bits */
-#define HT_CAPTYPE_DIRECT_ROUTE	0xB0	/* Direct routing configuration */
-#define HT_CAPTYPE_VCSET	0xB8	/* Virtual Channel configuration */
-#define HT_CAPTYPE_ERROR_RETRY	0xC0	/* Retry on error configuration */
-#define HT_CAPTYPE_GEN3		0xD0	/* Generation 3 hypertransport configuration */
-#define HT_CAPTYPE_PM		0xE0	/* Hypertransport powermanagement configuration */
-
-/* Alternative Routing-ID Interpretation */
-#define PCI_ARI_CAP		0x04	/* ARI Capability Register */
-#define  PCI_ARI_CAP_MFVC	0x0001	/* MFVC Function Groups Capability */
-#define  PCI_ARI_CAP_ACS	0x0002	/* ACS Function Groups Capability */
-#define  PCI_ARI_CAP_NFN(x)	(((x) >> 8) & 0xff) /* Next Function Number */
-#define PCI_ARI_CTRL		0x06	/* ARI Control Register */
-#define  PCI_ARI_CTRL_MFVC	0x0001	/* MFVC Function Groups Enable */
-#define  PCI_ARI_CTRL_ACS	0x0002	/* ACS Function Groups Enable */
-#define  PCI_ARI_CTRL_FG(x)	(((x) >> 4) & 7) /* Function Group */
-
-/* Address Translation Service */
-#define PCI_ATS_CAP		0x04	/* ATS Capability Register */
-#define  PCI_ATS_CAP_QDEP(x)	((x) & 0x1f)	/* Invalidate Queue Depth */
-#define  PCI_ATS_MAX_QDEP	32	/* Max Invalidate Queue Depth */
-#define PCI_ATS_CTRL		0x06	/* ATS Control Register */
-#define  PCI_ATS_CTRL_ENABLE	0x8000	/* ATS Enable */
-#define  PCI_ATS_CTRL_STU(x)	((x) & 0x1f)	/* Smallest Translation Unit */
-#define  PCI_ATS_MIN_STU	12	/* shift of minimum STU block */
-
-/* Single Root I/O Virtualization */
-#define PCI_SRIOV_CAP		0x04	/* SR-IOV Capabilities */
-#define  PCI_SRIOV_CAP_VFM	0x01	/* VF Migration Capable */
-#define  PCI_SRIOV_CAP_INTR(x)	((x) >> 21) /* Interrupt Message Number */
-#define PCI_SRIOV_CTRL		0x08	/* SR-IOV Control */
-#define  PCI_SRIOV_CTRL_VFE	0x01	/* VF Enable */
-#define  PCI_SRIOV_CTRL_VFM	0x02	/* VF Migration Enable */
-#define  PCI_SRIOV_CTRL_INTR	0x04	/* VF Migration Interrupt Enable */
-#define  PCI_SRIOV_CTRL_MSE	0x08	/* VF Memory Space Enable */
-#define  PCI_SRIOV_CTRL_ARI	0x10	/* ARI Capable Hierarchy */
-#define PCI_SRIOV_STATUS	0x0a	/* SR-IOV Status */
-#define  PCI_SRIOV_STATUS_VFM	0x01	/* VF Migration Status */
-#define PCI_SRIOV_INITIAL_VF	0x0c	/* Initial VFs */
-#define PCI_SRIOV_TOTAL_VF	0x0e	/* Total VFs */
-#define PCI_SRIOV_NUM_VF	0x10	/* Number of VFs */
-#define PCI_SRIOV_FUNC_LINK	0x12	/* Function Dependency Link */
-#define PCI_SRIOV_VF_OFFSET	0x14	/* First VF Offset */
-#define PCI_SRIOV_VF_STRIDE	0x16	/* Following VF Stride */
-#define PCI_SRIOV_VF_DID	0x1a	/* VF Device ID */
-#define PCI_SRIOV_SUP_PGSIZE	0x1c	/* Supported Page Sizes */
-#define PCI_SRIOV_SYS_PGSIZE	0x20	/* System Page Size */
-#define PCI_SRIOV_BAR		0x24	/* VF BAR0 */
-#define  PCI_SRIOV_NUM_BARS	6	/* Number of VF BARs */
-#define PCI_SRIOV_VFM		0x3c	/* VF Migration State Array Offset*/
-#define  PCI_SRIOV_VFM_BIR(x)	((x) & 7)	/* State BIR */
-#define  PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7)	/* State Offset */
-#define  PCI_SRIOV_VFM_UA	0x0	/* Inactive.Unavailable */
-#define  PCI_SRIOV_VFM_MI	0x1	/* Dormant.MigrateIn */
-#define  PCI_SRIOV_VFM_MO	0x2	/* Active.MigrateOut */
-#define  PCI_SRIOV_VFM_AV	0x3	/* Active.Available */
-
-#define PCI_LTR_MAX_SNOOP_LAT	0x4
-#define PCI_LTR_MAX_NOSNOOP_LAT	0x6
-#define  PCI_LTR_VALUE_MASK	0x000003ff
-#define  PCI_LTR_SCALE_MASK	0x00001c00
-#define  PCI_LTR_SCALE_SHIFT	10
-
-/* Access Control Service */
-#define PCI_ACS_CAP		0x04	/* ACS Capability Register */
-#define  PCI_ACS_SV		0x01	/* Source Validation */
-#define  PCI_ACS_TB		0x02	/* Translation Blocking */
-#define  PCI_ACS_RR		0x04	/* P2P Request Redirect */
-#define  PCI_ACS_CR		0x08	/* P2P Completion Redirect */
-#define  PCI_ACS_UF		0x10	/* Upstream Forwarding */
-#define  PCI_ACS_EC		0x20	/* P2P Egress Control */
-#define  PCI_ACS_DT		0x40	/* Direct Translated P2P */
-#define PCI_ACS_CTRL		0x06	/* ACS Control Register */
-#define PCI_ACS_EGRESS_CTL_V	0x08	/* ACS Egress Control Vector */
-
-#endif /* LINUX_PCI_REGS_H */
+#include "standard-headers/linux/pci_regs.h"
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 7b4b1bb..91a61ab 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -2,6 +2,7 @@
 #define __HW_SPAPR_H__
 
 #include "sysemu/dma.h"
+#include "hw/boards.h"
 #include "hw/ppc/xics.h"
 #include "hw/ppc/spapr_drc.h"
 
@@ -12,15 +13,42 @@
 typedef struct sPAPREventLogEntry sPAPREventLogEntry;
 
 #define HPTE64_V_HPTE_DIRTY     0x0000000000000040ULL
+#define SPAPR_ENTRY_POINT       0x100
 
-typedef struct sPAPREnvironment {
+typedef struct sPAPRMachineClass sPAPRMachineClass;
+typedef struct sPAPRMachineState sPAPRMachineState;
+
+#define TYPE_SPAPR_MACHINE      "spapr-machine"
+#define SPAPR_MACHINE(obj) \
+    OBJECT_CHECK(sPAPRMachineState, (obj), TYPE_SPAPR_MACHINE)
+#define SPAPR_MACHINE_GET_CLASS(obj) \
+    OBJECT_GET_CLASS(sPAPRMachineClass, obj, TYPE_SPAPR_MACHINE)
+#define SPAPR_MACHINE_CLASS(klass) \
+    OBJECT_CLASS_CHECK(sPAPRMachineClass, klass, TYPE_SPAPR_MACHINE)
+
+/**
+ * sPAPRMachineClass:
+ */
+struct sPAPRMachineClass {
+    /*< private >*/
+    MachineClass parent_class;
+
+    /*< public >*/
+};
+
+/**
+ * sPAPRMachineState:
+ */
+struct sPAPRMachineState {
+    /*< private >*/
+    MachineState parent_obj;
+
     struct VIOsPAPRBus *vio_bus;
     QLIST_HEAD(, sPAPRPHBState) phbs;
     struct sPAPRNVRAM *nvram;
     XICSState *icp;
     DeviceState *rtc;
 
-    hwaddr ram_limit;
     void *htab;
     uint32_t htab_shift;
     hwaddr rma_size;
@@ -29,7 +57,6 @@
     ssize_t rtas_size;
     void *rtas_blob;
     void *fdt_skel;
-    target_ulong entry_point;
     uint64_t rtc_offset; /* Now used only during incoming migration */
     struct PPCTimebase tb;
     bool has_graphics;
@@ -46,7 +73,10 @@
 
     /* RTAS state */
     QTAILQ_HEAD(, sPAPRConfigureConnectorState) ccs_list;
-} sPAPREnvironment;
+
+    /*< public >*/
+    char *kvm_type;
+};
 
 #define H_SUCCESS         0
 #define H_BUSY            1        /* Hardware busy -- retry later */
@@ -319,8 +349,6 @@
 #define KVMPPC_H_CAS            (KVMPPC_HCALL_BASE + 0x2)
 #define KVMPPC_HCALL_MAX        KVMPPC_H_CAS
 
-extern sPAPREnvironment *spapr;
-
 typedef struct sPAPRDeviceTreeUpdateHeader {
     uint32_t version_id;
 } sPAPRDeviceTreeUpdateHeader;
@@ -335,7 +363,7 @@
     do { } while (0)
 #endif
 
-typedef target_ulong (*spapr_hcall_fn)(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+typedef target_ulong (*spapr_hcall_fn)(PowerPCCPU *cpu, sPAPRMachineState *sm,
                                        target_ulong opcode,
                                        target_ulong *args);
 
@@ -490,12 +518,12 @@
     rtas_st_buffer_direct(phys + 2, phys_len - 2, buffer, buffer_len);
 }
 
-typedef void (*spapr_rtas_fn)(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+typedef void (*spapr_rtas_fn)(PowerPCCPU *cpu, sPAPRMachineState *sm,
                               uint32_t token,
                               uint32_t nargs, target_ulong args,
                               uint32_t nret, target_ulong rets);
 void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn);
-target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPRMachineState *sm,
                              uint32_t token, uint32_t nargs, target_ulong args,
                              uint32_t nret, target_ulong rets);
 int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
@@ -546,9 +574,10 @@
     QTAILQ_ENTRY(sPAPREventLogEntry) next;
 };
 
-void spapr_events_init(sPAPREnvironment *spapr);
+void spapr_events_init(sPAPRMachineState *sm);
 void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq);
-int spapr_h_cas_compose_response(target_ulong addr, target_ulong size);
+int spapr_h_cas_compose_response(sPAPRMachineState *sm,
+                                 target_ulong addr, target_ulong size);
 sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
                                    uint64_t bus_offset,
                                    uint32_t page_shift,
@@ -578,4 +607,6 @@
 void spapr_rtc_read(DeviceState *dev, struct tm *tm, uint32_t *ns);
 int spapr_rtc_import_offset(DeviceState *dev, int64_t legacy_offset);
 
+#define SPAPR_MEMORY_BLOCK_SIZE (1 << 28) /* 256MB */
+
 #endif /* !defined (__HW_SPAPR_H__) */
diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h
index f95016a..2299a54 100644
--- a/include/hw/ppc/spapr_vio.h
+++ b/include/hw/ppc/spapr_vio.h
@@ -88,6 +88,8 @@
 
 static inline qemu_irq spapr_vio_qirq(VIOsPAPRDevice *dev)
 {
+    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+
     return xics_get_qirq(spapr->icp, dev->irq);
 }
 
@@ -126,7 +128,7 @@
 
 int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq);
 
-VIOsPAPRDevice *vty_lookup(sPAPREnvironment *spapr, target_ulong reg);
+VIOsPAPRDevice *vty_lookup(sPAPRMachineState *spapr, target_ulong reg);
 void vty_putchars(VIOsPAPRDevice *sdev, uint8_t *buf, int len);
 void spapr_vty_create(VIOsPAPRBus *bus, CharDriverState *chardev);
 void spapr_vlan_create(VIOsPAPRBus *bus, NICInfo *nd);
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index a214dd7..355a966 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -109,6 +109,7 @@
     uint8_t pending_priority;
     uint8_t mfrr;
     qemu_irq output;
+    bool cap_irq_xics_enabled;
 };
 
 #define TYPE_ICS "ics"
diff --git a/include/hw/sysbus.h b/include/hw/sysbus.h
index 34f93c3..cc1dba4 100644
--- a/include/hw/sysbus.h
+++ b/include/hw/sysbus.h
@@ -58,6 +58,7 @@
      * omitted then. (This is not considered a fatal error.)
      */
     char *(*explicit_ofw_unit_address)(const SysBusDevice *dev);
+    void (*connect_irq_notifier)(SysBusDevice *dev, qemu_irq irq);
 } SysBusDeviceClass;
 
 struct SysBusDevice {
diff --git a/include/hw/vfio/vfio-platform.h b/include/hw/vfio/vfio-platform.h
index 26b2ad6..c5cf1d7 100644
--- a/include/hw/vfio/vfio-platform.h
+++ b/include/hw/vfio/vfio-platform.h
@@ -41,6 +41,7 @@
     int state; /* inactive, pending, active */
     uint8_t pin; /* index */
     uint32_t flags; /* IRQ info flags */
+    bool kvm_accel; /* set when QEMU bypass through KVM enabled */
 } VFIOINTp;
 
 /* function type for user side eventfd handler */
@@ -57,6 +58,7 @@
     uint32_t mmap_timeout; /* delay to re-enable mmaps after interrupt */
     QEMUTimer *mmap_timer; /* allows fast-path resume after IRQ hit */
     QemuMutex intp_mutex; /* protect the intp_list IRQ state */
+    bool irqfd_allowed; /* debug option to force irqfd on/off */
 } VFIOPlatformDevice;
 
 typedef struct VFIOPlatformDeviceClass {
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index b8c9244..8896761 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -112,9 +112,6 @@
                     VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, false), \
     DEFINE_PROP_UINT32("vectors", _state, nvectors, 3)
 
-#define DEFINE_VIRTIO_GPU_PROPERTIES(_state, _conf_field)               \
-    DEFINE_PROP_UINT32("max_outputs", _state, _conf_field.max_outputs, 1)
-
 #define VIRTIO_GPU_FILL_CMD(out) do {                                   \
         size_t s;                                                       \
         s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0,          \
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index 280dacf..60b11d5 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -21,9 +21,6 @@
 #define VIRTIO_NET(obj) \
         OBJECT_CHECK(VirtIONet, (obj), TYPE_VIRTIO_NET)
 
-#define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2 /* Control channel offload
-                                         * configuration support */
-
 #define TX_TIMER_INTERVAL 150000 /* 150 us */
 
 /* Limit the number of packets that can be sent via a single flush
@@ -100,15 +97,6 @@
     int announce_counter;
 } VirtIONet;
 
-/*
- * Control network offloads
- *
- * Dynamic offloads are available with the
- * VIRTIO_NET_F_CTRL_GUEST_OFFLOADS feature bit.
- */
-#define VIRTIO_NET_CTRL_GUEST_OFFLOADS   5
-#define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET        0
-
 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
                                    const char *type);
 
diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h
index 38f29fb..ed5fd3e 100644
--- a/include/hw/xen/xen_common.h
+++ b/include/hw/xen/xen_common.h
@@ -195,7 +195,7 @@
 
 #define IOREQ_TYPE_PCI_CONFIG 2
 
-typedef uint32_t ioservid_t;
+typedef uint16_t ioservid_t;
 
 static inline void xen_map_memory_section(XenXC xc, domid_t dom,
                                           ioservid_t ioservid,
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 9387c8c..b2711ef 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -34,6 +34,7 @@
 #define QEMU_VM_SECTION_FULL         0x04
 #define QEMU_VM_SUBSECTION           0x05
 #define QEMU_VM_VMDESCRIPTION        0x06
+#define QEMU_VM_CONFIGURATION        0x07
 #define QEMU_VM_SECTION_FOOTER       0x7e
 
 struct MigrationParams {
@@ -176,10 +177,11 @@
 int migrate_compress_level(void);
 int migrate_compress_threads(void);
 int migrate_decompress_threads(void);
+bool migrate_use_events(void);
 
 void ram_control_before_iterate(QEMUFile *f, uint64_t flags);
 void ram_control_after_iterate(QEMUFile *f, uint64_t flags);
-void ram_control_load_hook(QEMUFile *f, uint64_t flags);
+void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data);
 
 /* Whenever this is found in the data stream, the flags
  * will be passed to ram_control_load_hook in the incoming-migration
@@ -197,4 +199,7 @@
 
 void ram_mig_init(void);
 void savevm_skip_section_footers(void);
+void register_global_state(void);
+void global_state_set_optional(void);
+void savevm_skip_configuration(void);
 #endif
diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index 4f67d79..ea49f33 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -63,16 +63,20 @@
 /*
  * This function provides hooks around different
  * stages of RAM migration.
+ * 'opaque' is the backend specific data in QEMUFile
+ * 'data' is call specific data associated with the 'flags' value
  */
-typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags);
+typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags,
+                              void *data);
 
 /*
  * Constants used by ram_control_* hooks
  */
-#define RAM_CONTROL_SETUP    0
-#define RAM_CONTROL_ROUND    1
-#define RAM_CONTROL_HOOK     2
-#define RAM_CONTROL_FINISH   3
+#define RAM_CONTROL_SETUP     0
+#define RAM_CONTROL_ROUND     1
+#define RAM_CONTROL_HOOK      2
+#define RAM_CONTROL_FINISH    3
+#define RAM_CONTROL_BLOCK_REG 4
 
 /*
  * This function allows override of where the RAM page
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 0695d7c..f51ff69 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -820,6 +820,8 @@
 void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
                         void *opaque, QJSON *vmdesc);
 
+bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque);
+
 int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
                                    const VMStateDescription *vmsd,
                                    void *base, int alias_id,
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 39f0f19..42f42f5 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -323,6 +323,8 @@
 #define CPU_FOREACH(cpu) QTAILQ_FOREACH(cpu, &cpus, node)
 #define CPU_FOREACH_SAFE(cpu, next_cpu) \
     QTAILQ_FOREACH_SAFE(cpu, &cpus, node, next_cpu)
+#define CPU_FOREACH_REVERSE(cpu) \
+    QTAILQ_FOREACH_REVERSE(cpu, &cpus, CPUTailQ, node)
 #define first_cpu QTAILQ_FIRST(&cpus)
 
 DECLARE_TLS(CPUState *, current_cpu);
diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h
new file mode 100644
index 0000000..57e8c80
--- /dev/null
+++ b/include/standard-headers/linux/pci_regs.h
@@ -0,0 +1,719 @@
+/*
+ *	pci_regs.h
+ *
+ *	PCI standard defines
+ *	Copyright 1994, Drew Eckhardt
+ *	Copyright 1997--1999 Martin Mares <mj@ucw.cz>
+ *
+ *	For more information, please consult the following manuals (look at
+ *	http://www.pcisig.com/ for how to get them):
+ *
+ *	PCI BIOS Specification
+ *	PCI Local Bus Specification
+ *	PCI to PCI Bridge Specification
+ *	PCI System Design Guide
+ *
+ * 	For hypertransport information, please consult the following manuals
+ * 	from http://www.hypertransport.org
+ *
+ *	The Hypertransport I/O Link Specification
+ */
+
+#ifndef LINUX_PCI_REGS_H
+#define LINUX_PCI_REGS_H
+
+/*
+ * Under PCI, each device has 256 bytes of configuration address space,
+ * of which the first 64 bytes are standardized as follows:
+ */
+#define PCI_VENDOR_ID		0x00	/* 16 bits */
+#define PCI_DEVICE_ID		0x02	/* 16 bits */
+#define PCI_COMMAND		0x04	/* 16 bits */
+#define  PCI_COMMAND_IO		0x1	/* Enable response in I/O space */
+#define  PCI_COMMAND_MEMORY	0x2	/* Enable response in Memory space */
+#define  PCI_COMMAND_MASTER	0x4	/* Enable bus mastering */
+#define  PCI_COMMAND_SPECIAL	0x8	/* Enable response to special cycles */
+#define  PCI_COMMAND_INVALIDATE	0x10	/* Use memory write and invalidate */
+#define  PCI_COMMAND_VGA_PALETTE 0x20	/* Enable palette snooping */
+#define  PCI_COMMAND_PARITY	0x40	/* Enable parity checking */
+#define  PCI_COMMAND_WAIT 	0x80	/* Enable address/data stepping */
+#define  PCI_COMMAND_SERR	0x100	/* Enable SERR */
+#define  PCI_COMMAND_FAST_BACK	0x200	/* Enable back-to-back writes */
+#define  PCI_COMMAND_INTX_DISABLE 0x400 /* INTx Emulation Disable */
+
+#define PCI_STATUS		0x06	/* 16 bits */
+#define  PCI_STATUS_INTERRUPT	0x08	/* Interrupt status */
+#define  PCI_STATUS_CAP_LIST	0x10	/* Support Capability List */
+#define  PCI_STATUS_66MHZ	0x20	/* Support 66 Mhz PCI 2.1 bus */
+#define  PCI_STATUS_UDF		0x40	/* Support User Definable Features [obsolete] */
+#define  PCI_STATUS_FAST_BACK	0x80	/* Accept fast-back to back */
+#define  PCI_STATUS_PARITY	0x100	/* Detected parity error */
+#define  PCI_STATUS_DEVSEL_MASK	0x600	/* DEVSEL timing */
+#define  PCI_STATUS_DEVSEL_FAST		0x000
+#define  PCI_STATUS_DEVSEL_MEDIUM	0x200
+#define  PCI_STATUS_DEVSEL_SLOW		0x400
+#define  PCI_STATUS_SIG_TARGET_ABORT	0x800 /* Set on target abort */
+#define  PCI_STATUS_REC_TARGET_ABORT	0x1000 /* Master ack of " */
+#define  PCI_STATUS_REC_MASTER_ABORT	0x2000 /* Set on master abort */
+#define  PCI_STATUS_SIG_SYSTEM_ERROR	0x4000 /* Set when we drive SERR */
+#define  PCI_STATUS_DETECTED_PARITY	0x8000 /* Set on parity error */
+
+#define PCI_CLASS_REVISION	0x08	/* High 24 bits are class, low 8 revision */
+#define PCI_REVISION_ID		0x08	/* Revision ID */
+#define PCI_CLASS_PROG		0x09	/* Reg. Level Programming Interface */
+#define PCI_CLASS_DEVICE	0x0a	/* Device class */
+
+#define PCI_CACHE_LINE_SIZE	0x0c	/* 8 bits */
+#define PCI_LATENCY_TIMER	0x0d	/* 8 bits */
+#define PCI_HEADER_TYPE		0x0e	/* 8 bits */
+#define  PCI_HEADER_TYPE_NORMAL		0
+#define  PCI_HEADER_TYPE_BRIDGE		1
+#define  PCI_HEADER_TYPE_CARDBUS	2
+
+#define PCI_BIST		0x0f	/* 8 bits */
+#define  PCI_BIST_CODE_MASK	0x0f	/* Return result */
+#define  PCI_BIST_START		0x40	/* 1 to start BIST, 2 secs or less */
+#define  PCI_BIST_CAPABLE	0x80	/* 1 if BIST capable */
+
+/*
+ * Base addresses specify locations in memory or I/O space.
+ * Decoded size can be determined by writing a value of
+ * 0xffffffff to the register, and reading it back.  Only
+ * 1 bits are decoded.
+ */
+#define PCI_BASE_ADDRESS_0	0x10	/* 32 bits */
+#define PCI_BASE_ADDRESS_1	0x14	/* 32 bits [htype 0,1 only] */
+#define PCI_BASE_ADDRESS_2	0x18	/* 32 bits [htype 0 only] */
+#define PCI_BASE_ADDRESS_3	0x1c	/* 32 bits */
+#define PCI_BASE_ADDRESS_4	0x20	/* 32 bits */
+#define PCI_BASE_ADDRESS_5	0x24	/* 32 bits */
+#define  PCI_BASE_ADDRESS_SPACE		0x01	/* 0 = memory, 1 = I/O */
+#define  PCI_BASE_ADDRESS_SPACE_IO	0x01
+#define  PCI_BASE_ADDRESS_SPACE_MEMORY	0x00
+#define  PCI_BASE_ADDRESS_MEM_TYPE_MASK	0x06
+#define  PCI_BASE_ADDRESS_MEM_TYPE_32	0x00	/* 32 bit address */
+#define  PCI_BASE_ADDRESS_MEM_TYPE_1M	0x02	/* Below 1M [obsolete] */
+#define  PCI_BASE_ADDRESS_MEM_TYPE_64	0x04	/* 64 bit address */
+#define  PCI_BASE_ADDRESS_MEM_PREFETCH	0x08	/* prefetchable? */
+#define  PCI_BASE_ADDRESS_MEM_MASK	(~0x0fUL)
+#define  PCI_BASE_ADDRESS_IO_MASK	(~0x03UL)
+/* bit 1 is reserved if address_space = 1 */
+
+/* Header type 0 (normal devices) */
+#define PCI_CARDBUS_CIS		0x28
+#define PCI_SUBSYSTEM_VENDOR_ID	0x2c
+#define PCI_SUBSYSTEM_ID	0x2e
+#define PCI_ROM_ADDRESS		0x30	/* Bits 31..11 are address, 10..1 reserved */
+#define  PCI_ROM_ADDRESS_ENABLE	0x01
+#define PCI_ROM_ADDRESS_MASK	(~0x7ffUL)
+
+#define PCI_CAPABILITY_LIST	0x34	/* Offset of first capability list entry */
+
+/* 0x35-0x3b are reserved */
+#define PCI_INTERRUPT_LINE	0x3c	/* 8 bits */
+#define PCI_INTERRUPT_PIN	0x3d	/* 8 bits */
+#define PCI_MIN_GNT		0x3e	/* 8 bits */
+#define PCI_MAX_LAT		0x3f	/* 8 bits */
+
+/* Header type 1 (PCI-to-PCI bridges) */
+#define PCI_PRIMARY_BUS		0x18	/* Primary bus number */
+#define PCI_SECONDARY_BUS	0x19	/* Secondary bus number */
+#define PCI_SUBORDINATE_BUS	0x1a	/* Highest bus number behind the bridge */
+#define PCI_SEC_LATENCY_TIMER	0x1b	/* Latency timer for secondary interface */
+#define PCI_IO_BASE		0x1c	/* I/O range behind the bridge */
+#define PCI_IO_LIMIT		0x1d
+#define  PCI_IO_RANGE_TYPE_MASK	0x0fUL	/* I/O bridging type */
+#define  PCI_IO_RANGE_TYPE_16	0x00
+#define  PCI_IO_RANGE_TYPE_32	0x01
+#define  PCI_IO_RANGE_MASK	(~0x0fUL)
+#define PCI_SEC_STATUS		0x1e	/* Secondary status register, only bit 14 used */
+#define PCI_MEMORY_BASE		0x20	/* Memory range behind */
+#define PCI_MEMORY_LIMIT	0x22
+#define  PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL
+#define  PCI_MEMORY_RANGE_MASK	(~0x0fUL)
+#define PCI_PREF_MEMORY_BASE	0x24	/* Prefetchable memory range behind */
+#define PCI_PREF_MEMORY_LIMIT	0x26
+#define  PCI_PREF_RANGE_TYPE_MASK 0x0fUL
+#define  PCI_PREF_RANGE_TYPE_32	0x00
+#define  PCI_PREF_RANGE_TYPE_64	0x01
+#define  PCI_PREF_RANGE_MASK	(~0x0fUL)
+#define PCI_PREF_BASE_UPPER32	0x28	/* Upper half of prefetchable memory range */
+#define PCI_PREF_LIMIT_UPPER32	0x2c
+#define PCI_IO_BASE_UPPER16	0x30	/* Upper half of I/O addresses */
+#define PCI_IO_LIMIT_UPPER16	0x32
+/* 0x34 same as for htype 0 */
+/* 0x35-0x3b is reserved */
+#define PCI_ROM_ADDRESS1	0x38	/* Same as PCI_ROM_ADDRESS, but for htype 1 */
+/* 0x3c-0x3d are same as for htype 0 */
+#define PCI_BRIDGE_CONTROL	0x3e
+#define  PCI_BRIDGE_CTL_PARITY	0x01	/* Enable parity detection on secondary interface */
+#define  PCI_BRIDGE_CTL_SERR	0x02	/* The same for SERR forwarding */
+#define  PCI_BRIDGE_CTL_ISA	0x04	/* Enable ISA mode */
+#define  PCI_BRIDGE_CTL_VGA	0x08	/* Forward VGA addresses */
+#define  PCI_BRIDGE_CTL_MASTER_ABORT	0x20  /* Report master aborts */
+#define  PCI_BRIDGE_CTL_BUS_RESET	0x40	/* Secondary bus reset */
+#define  PCI_BRIDGE_CTL_FAST_BACK	0x80	/* Fast Back2Back enabled on secondary interface */
+
+/* Header type 2 (CardBus bridges) */
+#define PCI_CB_CAPABILITY_LIST	0x14
+/* 0x15 reserved */
+#define PCI_CB_SEC_STATUS	0x16	/* Secondary status */
+#define PCI_CB_PRIMARY_BUS	0x18	/* PCI bus number */
+#define PCI_CB_CARD_BUS		0x19	/* CardBus bus number */
+#define PCI_CB_SUBORDINATE_BUS	0x1a	/* Subordinate bus number */
+#define PCI_CB_LATENCY_TIMER	0x1b	/* CardBus latency timer */
+#define PCI_CB_MEMORY_BASE_0	0x1c
+#define PCI_CB_MEMORY_LIMIT_0	0x20
+#define PCI_CB_MEMORY_BASE_1	0x24
+#define PCI_CB_MEMORY_LIMIT_1	0x28
+#define PCI_CB_IO_BASE_0	0x2c
+#define PCI_CB_IO_BASE_0_HI	0x2e
+#define PCI_CB_IO_LIMIT_0	0x30
+#define PCI_CB_IO_LIMIT_0_HI	0x32
+#define PCI_CB_IO_BASE_1	0x34
+#define PCI_CB_IO_BASE_1_HI	0x36
+#define PCI_CB_IO_LIMIT_1	0x38
+#define PCI_CB_IO_LIMIT_1_HI	0x3a
+#define  PCI_CB_IO_RANGE_MASK	(~0x03UL)
+/* 0x3c-0x3d are same as for htype 0 */
+#define PCI_CB_BRIDGE_CONTROL	0x3e
+#define  PCI_CB_BRIDGE_CTL_PARITY	0x01	/* Similar to standard bridge control register */
+#define  PCI_CB_BRIDGE_CTL_SERR		0x02
+#define  PCI_CB_BRIDGE_CTL_ISA		0x04
+#define  PCI_CB_BRIDGE_CTL_VGA		0x08
+#define  PCI_CB_BRIDGE_CTL_MASTER_ABORT	0x20
+#define  PCI_CB_BRIDGE_CTL_CB_RESET	0x40	/* CardBus reset */
+#define  PCI_CB_BRIDGE_CTL_16BIT_INT	0x80	/* Enable interrupt for 16-bit cards */
+#define  PCI_CB_BRIDGE_CTL_PREFETCH_MEM0 0x100	/* Prefetch enable for both memory regions */
+#define  PCI_CB_BRIDGE_CTL_PREFETCH_MEM1 0x200
+#define  PCI_CB_BRIDGE_CTL_POST_WRITES	0x400
+#define PCI_CB_SUBSYSTEM_VENDOR_ID	0x40
+#define PCI_CB_SUBSYSTEM_ID		0x42
+#define PCI_CB_LEGACY_MODE_BASE		0x44	/* 16-bit PC Card legacy mode base address (ExCa) */
+/* 0x48-0x7f reserved */
+
+/* Capability lists */
+
+#define PCI_CAP_LIST_ID		0	/* Capability ID */
+#define  PCI_CAP_ID_PM		0x01	/* Power Management */
+#define  PCI_CAP_ID_AGP		0x02	/* Accelerated Graphics Port */
+#define  PCI_CAP_ID_VPD		0x03	/* Vital Product Data */
+#define  PCI_CAP_ID_SLOTID	0x04	/* Slot Identification */
+#define  PCI_CAP_ID_MSI		0x05	/* Message Signalled Interrupts */
+#define  PCI_CAP_ID_CHSWP	0x06	/* CompactPCI HotSwap */
+#define  PCI_CAP_ID_PCIX	0x07	/* PCI-X */
+#define  PCI_CAP_ID_HT		0x08	/* HyperTransport */
+#define  PCI_CAP_ID_VNDR	0x09	/* Vendor specific */
+#define  PCI_CAP_ID_DBG		0x0A	/* Debug port */
+#define  PCI_CAP_ID_CCRC	0x0B	/* CompactPCI Central Resource Control */
+#define  PCI_CAP_ID_SHPC 	0x0C	/* PCI Standard Hot-Plug Controller */
+#define  PCI_CAP_ID_SSVID	0x0D	/* Bridge subsystem vendor/device ID */
+#define  PCI_CAP_ID_AGP3	0x0E	/* AGP Target PCI-PCI bridge */
+#define  PCI_CAP_ID_EXP 	0x10	/* PCI Express */
+#define  PCI_CAP_ID_MSIX	0x11	/* MSI-X */
+#define  PCI_CAP_ID_SATA	0x12	/* Serial ATA */
+#define  PCI_CAP_ID_AF		0x13	/* PCI Advanced Features */
+#define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
+#define PCI_CAP_FLAGS		2	/* Capability defined flags (16 bits) */
+#define PCI_CAP_SIZEOF		4
+
+/* Power Management Registers */
+
+#define PCI_PM_PMC		2	/* PM Capabilities Register */
+#define  PCI_PM_CAP_VER_MASK	0x0007	/* Version */
+#define  PCI_PM_CAP_PME_CLOCK	0x0008	/* PME clock required */
+#define  PCI_PM_CAP_RESERVED    0x0010  /* Reserved field */
+#define  PCI_PM_CAP_DSI		0x0020	/* Device specific initialization */
+#define  PCI_PM_CAP_AUX_POWER	0x01C0	/* Auxiliary power support mask */
+#define  PCI_PM_CAP_D1		0x0200	/* D1 power state support */
+#define  PCI_PM_CAP_D2		0x0400	/* D2 power state support */
+#define  PCI_PM_CAP_PME		0x0800	/* PME pin supported */
+#define  PCI_PM_CAP_PME_MASK	0xF800	/* PME Mask of all supported states */
+#define  PCI_PM_CAP_PME_D0	0x0800	/* PME# from D0 */
+#define  PCI_PM_CAP_PME_D1	0x1000	/* PME# from D1 */
+#define  PCI_PM_CAP_PME_D2	0x2000	/* PME# from D2 */
+#define  PCI_PM_CAP_PME_D3	0x4000	/* PME# from D3 (hot) */
+#define  PCI_PM_CAP_PME_D3cold	0x8000	/* PME# from D3 (cold) */
+#define  PCI_PM_CAP_PME_SHIFT	11	/* Start of the PME Mask in PMC */
+#define PCI_PM_CTRL		4	/* PM control and status register */
+#define  PCI_PM_CTRL_STATE_MASK	0x0003	/* Current power state (D0 to D3) */
+#define  PCI_PM_CTRL_NO_SOFT_RESET	0x0008	/* No reset for D3hot->D0 */
+#define  PCI_PM_CTRL_PME_ENABLE	0x0100	/* PME pin enable */
+#define  PCI_PM_CTRL_DATA_SEL_MASK	0x1e00	/* Data select (??) */
+#define  PCI_PM_CTRL_DATA_SCALE_MASK	0x6000	/* Data scale (??) */
+#define  PCI_PM_CTRL_PME_STATUS	0x8000	/* PME pin status */
+#define PCI_PM_PPB_EXTENSIONS	6	/* PPB support extensions (??) */
+#define  PCI_PM_PPB_B2_B3	0x40	/* Stop clock when in D3hot (??) */
+#define  PCI_PM_BPCC_ENABLE	0x80	/* Bus power/clock control enable (??) */
+#define PCI_PM_DATA_REGISTER	7	/* (??) */
+#define PCI_PM_SIZEOF		8
+
+/* AGP registers */
+
+#define PCI_AGP_VERSION		2	/* BCD version number */
+#define PCI_AGP_RFU		3	/* Rest of capability flags */
+#define PCI_AGP_STATUS		4	/* Status register */
+#define  PCI_AGP_STATUS_RQ_MASK	0xff000000	/* Maximum number of requests - 1 */
+#define  PCI_AGP_STATUS_SBA	0x0200	/* Sideband addressing supported */
+#define  PCI_AGP_STATUS_64BIT	0x0020	/* 64-bit addressing supported */
+#define  PCI_AGP_STATUS_FW	0x0010	/* FW transfers supported */
+#define  PCI_AGP_STATUS_RATE4	0x0004	/* 4x transfer rate supported */
+#define  PCI_AGP_STATUS_RATE2	0x0002	/* 2x transfer rate supported */
+#define  PCI_AGP_STATUS_RATE1	0x0001	/* 1x transfer rate supported */
+#define PCI_AGP_COMMAND		8	/* Control register */
+#define  PCI_AGP_COMMAND_RQ_MASK 0xff000000  /* Master: Maximum number of requests */
+#define  PCI_AGP_COMMAND_SBA	0x0200	/* Sideband addressing enabled */
+#define  PCI_AGP_COMMAND_AGP	0x0100	/* Allow processing of AGP transactions */
+#define  PCI_AGP_COMMAND_64BIT	0x0020 	/* Allow processing of 64-bit addresses */
+#define  PCI_AGP_COMMAND_FW	0x0010 	/* Force FW transfers */
+#define  PCI_AGP_COMMAND_RATE4	0x0004	/* Use 4x rate */
+#define  PCI_AGP_COMMAND_RATE2	0x0002	/* Use 2x rate */
+#define  PCI_AGP_COMMAND_RATE1	0x0001	/* Use 1x rate */
+#define PCI_AGP_SIZEOF		12
+
+/* Vital Product Data */
+
+#define PCI_VPD_ADDR		2	/* Address to access (15 bits!) */
+#define  PCI_VPD_ADDR_MASK	0x7fff	/* Address mask */
+#define  PCI_VPD_ADDR_F		0x8000	/* Write 0, 1 indicates completion */
+#define PCI_VPD_DATA		4	/* 32-bits of data returned here */
+
+/* Slot Identification */
+
+#define PCI_SID_ESR		2	/* Expansion Slot Register */
+#define  PCI_SID_ESR_NSLOTS	0x1f	/* Number of expansion slots available */
+#define  PCI_SID_ESR_FIC	0x20	/* First In Chassis Flag */
+#define PCI_SID_CHASSIS_NR	3	/* Chassis Number */
+
+/* Message Signalled Interrupts registers */
+
+#define PCI_MSI_FLAGS		2	/* Various flags */
+#define  PCI_MSI_FLAGS_64BIT	0x80	/* 64-bit addresses allowed */
+#define  PCI_MSI_FLAGS_QSIZE	0x70	/* Message queue size configured */
+#define  PCI_MSI_FLAGS_QMASK	0x0e	/* Maximum queue size available */
+#define  PCI_MSI_FLAGS_ENABLE	0x01	/* MSI feature enabled */
+#define  PCI_MSI_FLAGS_MASKBIT	0x100	/* 64-bit mask bits allowed */
+#define PCI_MSI_RFU		3	/* Rest of capability flags */
+#define PCI_MSI_ADDRESS_LO	4	/* Lower 32 bits */
+#define PCI_MSI_ADDRESS_HI	8	/* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */
+#define PCI_MSI_DATA_32		8	/* 16 bits of data for 32-bit devices */
+#define PCI_MSI_MASK_32		12	/* Mask bits register for 32-bit devices */
+#define PCI_MSI_PENDING_32	16	/* Pending bits register for 32-bit devices */
+#define PCI_MSI_DATA_64		12	/* 16 bits of data for 64-bit devices */
+#define PCI_MSI_MASK_64		16	/* Mask bits register for 64-bit devices */
+#define PCI_MSI_PENDING_64	20	/* Pending bits register for 32-bit devices */
+
+/* MSI-X registers */
+#define PCI_MSIX_FLAGS		2
+#define  PCI_MSIX_FLAGS_QSIZE	0x7FF
+#define  PCI_MSIX_FLAGS_ENABLE	(1 << 15)
+#define  PCI_MSIX_FLAGS_MASKALL	(1 << 14)
+#define PCI_MSIX_TABLE		4
+#define PCI_MSIX_PBA		8
+#define  PCI_MSIX_FLAGS_BIRMASK	(7 << 0)
+
+/* MSI-X entry's format */
+#define PCI_MSIX_ENTRY_SIZE		16
+#define  PCI_MSIX_ENTRY_LOWER_ADDR	0
+#define  PCI_MSIX_ENTRY_UPPER_ADDR	4
+#define  PCI_MSIX_ENTRY_DATA		8
+#define  PCI_MSIX_ENTRY_VECTOR_CTRL	12
+#define   PCI_MSIX_ENTRY_CTRL_MASKBIT	1
+
+/* CompactPCI Hotswap Register */
+
+#define PCI_CHSWP_CSR		2	/* Control and Status Register */
+#define  PCI_CHSWP_DHA		0x01	/* Device Hiding Arm */
+#define  PCI_CHSWP_EIM		0x02	/* ENUM# Signal Mask */
+#define  PCI_CHSWP_PIE		0x04	/* Pending Insert or Extract */
+#define  PCI_CHSWP_LOO		0x08	/* LED On / Off */
+#define  PCI_CHSWP_PI		0x30	/* Programming Interface */
+#define  PCI_CHSWP_EXT		0x40	/* ENUM# status - extraction */
+#define  PCI_CHSWP_INS		0x80	/* ENUM# status - insertion */
+
+/* PCI Advanced Feature registers */
+
+#define PCI_AF_LENGTH		2
+#define PCI_AF_CAP		3
+#define  PCI_AF_CAP_TP		0x01
+#define  PCI_AF_CAP_FLR		0x02
+#define PCI_AF_CTRL		4
+#define  PCI_AF_CTRL_FLR	0x01
+#define PCI_AF_STATUS		5
+#define  PCI_AF_STATUS_TP	0x01
+
+/* PCI-X registers */
+
+#define PCI_X_CMD		2	/* Modes & Features */
+#define  PCI_X_CMD_DPERR_E	0x0001	/* Data Parity Error Recovery Enable */
+#define  PCI_X_CMD_ERO		0x0002	/* Enable Relaxed Ordering */
+#define  PCI_X_CMD_READ_512	0x0000	/* 512 byte maximum read byte count */
+#define  PCI_X_CMD_READ_1K	0x0004	/* 1Kbyte maximum read byte count */
+#define  PCI_X_CMD_READ_2K	0x0008	/* 2Kbyte maximum read byte count */
+#define  PCI_X_CMD_READ_4K	0x000c	/* 4Kbyte maximum read byte count */
+#define  PCI_X_CMD_MAX_READ	0x000c	/* Max Memory Read Byte Count */
+				/* Max # of outstanding split transactions */
+#define  PCI_X_CMD_SPLIT_1	0x0000	/* Max 1 */
+#define  PCI_X_CMD_SPLIT_2	0x0010	/* Max 2 */
+#define  PCI_X_CMD_SPLIT_3	0x0020	/* Max 3 */
+#define  PCI_X_CMD_SPLIT_4	0x0030	/* Max 4 */
+#define  PCI_X_CMD_SPLIT_8	0x0040	/* Max 8 */
+#define  PCI_X_CMD_SPLIT_12	0x0050	/* Max 12 */
+#define  PCI_X_CMD_SPLIT_16	0x0060	/* Max 16 */
+#define  PCI_X_CMD_SPLIT_32	0x0070	/* Max 32 */
+#define  PCI_X_CMD_MAX_SPLIT	0x0070	/* Max Outstanding Split Transactions */
+#define  PCI_X_CMD_VERSION(x) 	(((x) >> 12) & 3) /* Version */
+#define PCI_X_STATUS		4	/* PCI-X capabilities */
+#define  PCI_X_STATUS_DEVFN	0x000000ff	/* A copy of devfn */
+#define  PCI_X_STATUS_BUS	0x0000ff00	/* A copy of bus nr */
+#define  PCI_X_STATUS_64BIT	0x00010000	/* 64-bit device */
+#define  PCI_X_STATUS_133MHZ	0x00020000	/* 133 MHz capable */
+#define  PCI_X_STATUS_SPL_DISC	0x00040000	/* Split Completion Discarded */
+#define  PCI_X_STATUS_UNX_SPL	0x00080000	/* Unexpected Split Completion */
+#define  PCI_X_STATUS_COMPLEX	0x00100000	/* Device Complexity */
+#define  PCI_X_STATUS_MAX_READ	0x00600000	/* Designed Max Memory Read Count */
+#define  PCI_X_STATUS_MAX_SPLIT	0x03800000	/* Designed Max Outstanding Split Transactions */
+#define  PCI_X_STATUS_MAX_CUM	0x1c000000	/* Designed Max Cumulative Read Size */
+#define  PCI_X_STATUS_SPL_ERR	0x20000000	/* Rcvd Split Completion Error Msg */
+#define  PCI_X_STATUS_266MHZ	0x40000000	/* 266 MHz capable */
+#define  PCI_X_STATUS_533MHZ	0x80000000	/* 533 MHz capable */
+
+/* PCI Bridge Subsystem ID registers */
+
+#define PCI_SSVID_VENDOR_ID     4	/* PCI-Bridge subsystem vendor id register */
+#define PCI_SSVID_DEVICE_ID     6	/* PCI-Bridge subsystem device id register */
+
+/* PCI Express capability registers */
+
+#define PCI_EXP_FLAGS		2	/* Capabilities register */
+#define PCI_EXP_FLAGS_VERS	0x000f	/* Capability version */
+#define PCI_EXP_FLAGS_TYPE	0x00f0	/* Device/Port type */
+#define  PCI_EXP_TYPE_ENDPOINT	0x0	/* Express Endpoint */
+#define  PCI_EXP_TYPE_LEG_END	0x1	/* Legacy Endpoint */
+#define  PCI_EXP_TYPE_ROOT_PORT 0x4	/* Root Port */
+#define  PCI_EXP_TYPE_UPSTREAM	0x5	/* Upstream Port */
+#define  PCI_EXP_TYPE_DOWNSTREAM 0x6	/* Downstream Port */
+#define  PCI_EXP_TYPE_PCI_BRIDGE 0x7	/* PCI/PCI-X Bridge */
+#define  PCI_EXP_TYPE_PCIE_BRIDGE 0x8   /* PCI/PCI-X to PCIE Bridge */
+#define  PCI_EXP_TYPE_RC_END	0x9	/* Root Complex Integrated Endpoint */
+#define  PCI_EXP_TYPE_RC_EC     0xa     /* Root Complex Event Collector */
+#define PCI_EXP_FLAGS_SLOT	0x0100	/* Slot implemented */
+#define PCI_EXP_FLAGS_IRQ	0x3e00	/* Interrupt message number */
+#define PCI_EXP_DEVCAP		4	/* Device capabilities */
+#define  PCI_EXP_DEVCAP_PAYLOAD	0x07	/* Max_Payload_Size */
+#define  PCI_EXP_DEVCAP_PHANTOM	0x18	/* Phantom functions */
+#define  PCI_EXP_DEVCAP_EXT_TAG	0x20	/* Extended tags */
+#define  PCI_EXP_DEVCAP_L0S	0x1c0	/* L0s Acceptable Latency */
+#define  PCI_EXP_DEVCAP_L1	0xe00	/* L1 Acceptable Latency */
+#define  PCI_EXP_DEVCAP_ATN_BUT	0x1000	/* Attention Button Present */
+#define  PCI_EXP_DEVCAP_ATN_IND	0x2000	/* Attention Indicator Present */
+#define  PCI_EXP_DEVCAP_PWR_IND	0x4000	/* Power Indicator Present */
+#define  PCI_EXP_DEVCAP_RBER	0x8000	/* Role-Based Error Reporting */
+#define  PCI_EXP_DEVCAP_PWR_VAL	0x3fc0000 /* Slot Power Limit Value */
+#define  PCI_EXP_DEVCAP_PWR_SCL	0xc000000 /* Slot Power Limit Scale */
+#define  PCI_EXP_DEVCAP_FLR     0x10000000 /* Function Level Reset */
+#define PCI_EXP_DEVCTL		8	/* Device Control */
+#define  PCI_EXP_DEVCTL_CERE	0x0001	/* Correctable Error Reporting En. */
+#define  PCI_EXP_DEVCTL_NFERE	0x0002	/* Non-Fatal Error Reporting Enable */
+#define  PCI_EXP_DEVCTL_FERE	0x0004	/* Fatal Error Reporting Enable */
+#define  PCI_EXP_DEVCTL_URRE	0x0008	/* Unsupported Request Reporting En. */
+#define  PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */
+#define  PCI_EXP_DEVCTL_PAYLOAD	0x00e0	/* Max_Payload_Size */
+#define  PCI_EXP_DEVCTL_EXT_TAG	0x0100	/* Extended Tag Field Enable */
+#define  PCI_EXP_DEVCTL_PHANTOM	0x0200	/* Phantom Functions Enable */
+#define  PCI_EXP_DEVCTL_AUX_PME	0x0400	/* Auxiliary Power PM Enable */
+#define  PCI_EXP_DEVCTL_NOSNOOP_EN 0x0800  /* Enable No Snoop */
+#define  PCI_EXP_DEVCTL_READRQ	0x7000	/* Max_Read_Request_Size */
+#define  PCI_EXP_DEVCTL_BCR_FLR 0x8000  /* Bridge Configuration Retry / FLR */
+#define PCI_EXP_DEVSTA		10	/* Device Status */
+#define  PCI_EXP_DEVSTA_CED	0x01	/* Correctable Error Detected */
+#define  PCI_EXP_DEVSTA_NFED	0x02	/* Non-Fatal Error Detected */
+#define  PCI_EXP_DEVSTA_FED	0x04	/* Fatal Error Detected */
+#define  PCI_EXP_DEVSTA_URD	0x08	/* Unsupported Request Detected */
+#define  PCI_EXP_DEVSTA_AUXPD	0x10	/* AUX Power Detected */
+#define  PCI_EXP_DEVSTA_TRPND	0x20	/* Transactions Pending */
+#define PCI_EXP_LNKCAP		12	/* Link Capabilities */
+#define  PCI_EXP_LNKCAP_SLS	0x0000000f /* Supported Link Speeds */
+#define  PCI_EXP_LNKCAP_MLW	0x000003f0 /* Maximum Link Width */
+#define  PCI_EXP_LNKCAP_ASPMS	0x00000c00 /* ASPM Support */
+#define  PCI_EXP_LNKCAP_L0SEL	0x00007000 /* L0s Exit Latency */
+#define  PCI_EXP_LNKCAP_L1EL	0x00038000 /* L1 Exit Latency */
+#define  PCI_EXP_LNKCAP_CLKPM	0x00040000 /* L1 Clock Power Management */
+#define  PCI_EXP_LNKCAP_SDERC	0x00080000 /* Surprise Down Error Reporting Capable */
+#define  PCI_EXP_LNKCAP_DLLLARC	0x00100000 /* Data Link Layer Link Active Reporting Capable */
+#define  PCI_EXP_LNKCAP_LBNC	0x00200000 /* Link Bandwidth Notification Capability */
+#define  PCI_EXP_LNKCAP_PN	0xff000000 /* Port Number */
+#define PCI_EXP_LNKCTL		16	/* Link Control */
+#define  PCI_EXP_LNKCTL_ASPMC	0x0003	/* ASPM Control */
+#define  PCI_EXP_LNKCTL_RCB	0x0008	/* Read Completion Boundary */
+#define  PCI_EXP_LNKCTL_LD	0x0010	/* Link Disable */
+#define  PCI_EXP_LNKCTL_RL	0x0020	/* Retrain Link */
+#define  PCI_EXP_LNKCTL_CCC	0x0040	/* Common Clock Configuration */
+#define  PCI_EXP_LNKCTL_ES	0x0080	/* Extended Synch */
+#define  PCI_EXP_LNKCTL_CLKREQ_EN 0x100	/* Enable clkreq */
+#define  PCI_EXP_LNKCTL_HAWD	0x0200	/* Hardware Autonomous Width Disable */
+#define  PCI_EXP_LNKCTL_LBMIE	0x0400	/* Link Bandwidth Management Interrupt Enable */
+#define  PCI_EXP_LNKCTL_LABIE	0x0800	/* Lnk Autonomous Bandwidth Interrupt Enable */
+#define PCI_EXP_LNKSTA		18	/* Link Status */
+#define  PCI_EXP_LNKSTA_CLS	0x000f	/* Current Link Speed */
+#define  PCI_EXP_LNKSTA_CLS_2_5GB 0x01	/* Current Link Speed 2.5GT/s */
+#define  PCI_EXP_LNKSTA_CLS_5_0GB 0x02	/* Current Link Speed 5.0GT/s */
+#define  PCI_EXP_LNKSTA_NLW	0x03f0	/* Nogotiated Link Width */
+#define  PCI_EXP_LNKSTA_NLW_SHIFT 4	/* start of NLW mask in link status */
+#define  PCI_EXP_LNKSTA_LT	0x0800	/* Link Training */
+#define  PCI_EXP_LNKSTA_SLC	0x1000	/* Slot Clock Configuration */
+#define  PCI_EXP_LNKSTA_DLLLA	0x2000	/* Data Link Layer Link Active */
+#define  PCI_EXP_LNKSTA_LBMS	0x4000	/* Link Bandwidth Management Status */
+#define  PCI_EXP_LNKSTA_LABS	0x8000	/* Link Autonomous Bandwidth Status */
+#define PCI_EXP_SLTCAP		20	/* Slot Capabilities */
+#define  PCI_EXP_SLTCAP_ABP	0x00000001 /* Attention Button Present */
+#define  PCI_EXP_SLTCAP_PCP	0x00000002 /* Power Controller Present */
+#define  PCI_EXP_SLTCAP_MRLSP	0x00000004 /* MRL Sensor Present */
+#define  PCI_EXP_SLTCAP_AIP	0x00000008 /* Attention Indicator Present */
+#define  PCI_EXP_SLTCAP_PIP	0x00000010 /* Power Indicator Present */
+#define  PCI_EXP_SLTCAP_HPS	0x00000020 /* Hot-Plug Surprise */
+#define  PCI_EXP_SLTCAP_HPC	0x00000040 /* Hot-Plug Capable */
+#define  PCI_EXP_SLTCAP_SPLV	0x00007f80 /* Slot Power Limit Value */
+#define  PCI_EXP_SLTCAP_SPLS	0x00018000 /* Slot Power Limit Scale */
+#define  PCI_EXP_SLTCAP_EIP	0x00020000 /* Electromechanical Interlock Present */
+#define  PCI_EXP_SLTCAP_NCCS	0x00040000 /* No Command Completed Support */
+#define  PCI_EXP_SLTCAP_PSN	0xfff80000 /* Physical Slot Number */
+#define PCI_EXP_SLTCTL		24	/* Slot Control */
+#define  PCI_EXP_SLTCTL_ABPE	0x0001	/* Attention Button Pressed Enable */
+#define  PCI_EXP_SLTCTL_PFDE	0x0002	/* Power Fault Detected Enable */
+#define  PCI_EXP_SLTCTL_MRLSCE	0x0004	/* MRL Sensor Changed Enable */
+#define  PCI_EXP_SLTCTL_PDCE	0x0008	/* Presence Detect Changed Enable */
+#define  PCI_EXP_SLTCTL_CCIE	0x0010	/* Command Completed Interrupt Enable */
+#define  PCI_EXP_SLTCTL_HPIE	0x0020	/* Hot-Plug Interrupt Enable */
+#define  PCI_EXP_SLTCTL_AIC	0x00c0	/* Attention Indicator Control */
+#define  PCI_EXP_SLTCTL_PIC	0x0300	/* Power Indicator Control */
+#define  PCI_EXP_SLTCTL_PCC	0x0400	/* Power Controller Control */
+#define  PCI_EXP_SLTCTL_EIC	0x0800	/* Electromechanical Interlock Control */
+#define  PCI_EXP_SLTCTL_DLLSCE	0x1000	/* Data Link Layer State Changed Enable */
+#define PCI_EXP_SLTSTA		26	/* Slot Status */
+#define  PCI_EXP_SLTSTA_ABP	0x0001	/* Attention Button Pressed */
+#define  PCI_EXP_SLTSTA_PFD	0x0002	/* Power Fault Detected */
+#define  PCI_EXP_SLTSTA_MRLSC	0x0004	/* MRL Sensor Changed */
+#define  PCI_EXP_SLTSTA_PDC	0x0008	/* Presence Detect Changed */
+#define  PCI_EXP_SLTSTA_CC	0x0010	/* Command Completed */
+#define  PCI_EXP_SLTSTA_MRLSS	0x0020	/* MRL Sensor State */
+#define  PCI_EXP_SLTSTA_PDS	0x0040	/* Presence Detect State */
+#define  PCI_EXP_SLTSTA_EIS	0x0080	/* Electromechanical Interlock Status */
+#define  PCI_EXP_SLTSTA_DLLSC	0x0100	/* Data Link Layer State Changed */
+#define PCI_EXP_RTCTL		28	/* Root Control */
+#define  PCI_EXP_RTCTL_SECEE	0x01	/* System Error on Correctable Error */
+#define  PCI_EXP_RTCTL_SENFEE	0x02	/* System Error on Non-Fatal Error */
+#define  PCI_EXP_RTCTL_SEFEE	0x04	/* System Error on Fatal Error */
+#define  PCI_EXP_RTCTL_PMEIE	0x08	/* PME Interrupt Enable */
+#define  PCI_EXP_RTCTL_CRSSVE	0x10	/* CRS Software Visibility Enable */
+#define PCI_EXP_RTCAP		30	/* Root Capabilities */
+#define PCI_EXP_RTSTA		32	/* Root Status */
+#define PCI_EXP_RTSTA_PME	0x10000 /* PME status */
+#define PCI_EXP_RTSTA_PENDING	0x20000 /* PME pending */
+#define PCI_EXP_DEVCAP2		36	/* Device Capabilities 2 */
+#define  PCI_EXP_DEVCAP2_ARI	0x20	/* Alternative Routing-ID */
+#define  PCI_EXP_DEVCAP2_LTR	0x800	/* Latency tolerance reporting */
+#define  PCI_EXP_OBFF_MASK	0xc0000 /* OBFF support mechanism */
+#define  PCI_EXP_OBFF_MSG	0x40000 /* New message signaling */
+#define  PCI_EXP_OBFF_WAKE	0x80000 /* Re-use WAKE# for OBFF */
+#define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
+#define  PCI_EXP_DEVCTL2_ARI	0x20	/* Alternative Routing-ID */
+#define  PCI_EXP_IDO_REQ_EN	0x100	/* ID-based ordering request enable */
+#define  PCI_EXP_IDO_CMP_EN	0x200	/* ID-based ordering completion enable */
+#define  PCI_EXP_LTR_EN		0x400	/* Latency tolerance reporting */
+#define  PCI_EXP_OBFF_MSGA_EN	0x2000	/* OBFF enable with Message type A */
+#define  PCI_EXP_OBFF_MSGB_EN	0x4000	/* OBFF enable with Message type B */
+#define  PCI_EXP_OBFF_WAKE_EN	0x6000	/* OBFF using WAKE# signaling */
+#define PCI_EXP_LNKCTL2		48	/* Link Control 2 */
+#define PCI_EXP_SLTCTL2		56	/* Slot Control 2 */
+
+/* Extended Capabilities (PCI-X 2.0 and Express) */
+#define PCI_EXT_CAP_ID(header)		(header & 0x0000ffff)
+#define PCI_EXT_CAP_VER(header)		((header >> 16) & 0xf)
+#define PCI_EXT_CAP_NEXT(header)	((header >> 20) & 0xffc)
+
+#define PCI_EXT_CAP_ID_ERR	1
+#define PCI_EXT_CAP_ID_VC	2
+#define PCI_EXT_CAP_ID_DSN	3
+#define PCI_EXT_CAP_ID_PWR	4
+#define PCI_EXT_CAP_ID_VNDR	11
+#define PCI_EXT_CAP_ID_ACS	13
+#define PCI_EXT_CAP_ID_ARI	14
+#define PCI_EXT_CAP_ID_ATS	15
+#define PCI_EXT_CAP_ID_SRIOV	16
+#define PCI_EXT_CAP_ID_LTR	24
+
+/* Advanced Error Reporting */
+#define PCI_ERR_UNCOR_STATUS	4	/* Uncorrectable Error Status */
+#define  PCI_ERR_UNC_TRAIN	0x00000001	/* Training */
+#define  PCI_ERR_UNC_DLP	0x00000010	/* Data Link Protocol */
+#define  PCI_ERR_UNC_POISON_TLP	0x00001000	/* Poisoned TLP */
+#define  PCI_ERR_UNC_FCP	0x00002000	/* Flow Control Protocol */
+#define  PCI_ERR_UNC_COMP_TIME	0x00004000	/* Completion Timeout */
+#define  PCI_ERR_UNC_COMP_ABORT	0x00008000	/* Completer Abort */
+#define  PCI_ERR_UNC_UNX_COMP	0x00010000	/* Unexpected Completion */
+#define  PCI_ERR_UNC_RX_OVER	0x00020000	/* Receiver Overflow */
+#define  PCI_ERR_UNC_MALF_TLP	0x00040000	/* Malformed TLP */
+#define  PCI_ERR_UNC_ECRC	0x00080000	/* ECRC Error Status */
+#define  PCI_ERR_UNC_UNSUP	0x00100000	/* Unsupported Request */
+#define PCI_ERR_UNCOR_MASK	8	/* Uncorrectable Error Mask */
+	/* Same bits as above */
+#define PCI_ERR_UNCOR_SEVER	12	/* Uncorrectable Error Severity */
+	/* Same bits as above */
+#define PCI_ERR_COR_STATUS	16	/* Correctable Error Status */
+#define  PCI_ERR_COR_RCVR	0x00000001	/* Receiver Error Status */
+#define  PCI_ERR_COR_BAD_TLP	0x00000040	/* Bad TLP Status */
+#define  PCI_ERR_COR_BAD_DLLP	0x00000080	/* Bad DLLP Status */
+#define  PCI_ERR_COR_REP_ROLL	0x00000100	/* REPLAY_NUM Rollover */
+#define  PCI_ERR_COR_REP_TIMER	0x00001000	/* Replay Timer Timeout */
+#define PCI_ERR_COR_MASK	20	/* Correctable Error Mask */
+	/* Same bits as above */
+#define PCI_ERR_CAP		24	/* Advanced Error Capabilities */
+#define  PCI_ERR_CAP_FEP(x)	((x) & 31)	/* First Error Pointer */
+#define  PCI_ERR_CAP_ECRC_GENC	0x00000020	/* ECRC Generation Capable */
+#define  PCI_ERR_CAP_ECRC_GENE	0x00000040	/* ECRC Generation Enable */
+#define  PCI_ERR_CAP_ECRC_CHKC	0x00000080	/* ECRC Check Capable */
+#define  PCI_ERR_CAP_ECRC_CHKE	0x00000100	/* ECRC Check Enable */
+#define PCI_ERR_HEADER_LOG	28	/* Header Log Register (16 bytes) */
+#define PCI_ERR_ROOT_COMMAND	44	/* Root Error Command */
+/* Correctable Err Reporting Enable */
+#define PCI_ERR_ROOT_CMD_COR_EN		0x00000001
+/* Non-fatal Err Reporting Enable */
+#define PCI_ERR_ROOT_CMD_NONFATAL_EN	0x00000002
+/* Fatal Err Reporting Enable */
+#define PCI_ERR_ROOT_CMD_FATAL_EN	0x00000004
+#define PCI_ERR_ROOT_STATUS	48
+#define PCI_ERR_ROOT_COR_RCV		0x00000001	/* ERR_COR Received */
+/* Multi ERR_COR Received */
+#define PCI_ERR_ROOT_MULTI_COR_RCV	0x00000002
+/* ERR_FATAL/NONFATAL Recevied */
+#define PCI_ERR_ROOT_UNCOR_RCV		0x00000004
+/* Multi ERR_FATAL/NONFATAL Recevied */
+#define PCI_ERR_ROOT_MULTI_UNCOR_RCV	0x00000008
+#define PCI_ERR_ROOT_FIRST_FATAL	0x00000010	/* First Fatal */
+#define PCI_ERR_ROOT_NONFATAL_RCV	0x00000020	/* Non-Fatal Received */
+#define PCI_ERR_ROOT_FATAL_RCV		0x00000040	/* Fatal Received */
+#define PCI_ERR_ROOT_ERR_SRC	52	/* Error Source Identification */
+
+/* Virtual Channel */
+#define PCI_VC_PORT_REG1	4
+#define PCI_VC_PORT_REG2	8
+#define PCI_VC_PORT_CTRL	12
+#define PCI_VC_PORT_STATUS	14
+#define PCI_VC_RES_CAP		16
+#define PCI_VC_RES_CTRL		20
+#define PCI_VC_RES_STATUS	26
+
+/* Power Budgeting */
+#define PCI_PWR_DSR		4	/* Data Select Register */
+#define PCI_PWR_DATA		8	/* Data Register */
+#define  PCI_PWR_DATA_BASE(x)	((x) & 0xff)	    /* Base Power */
+#define  PCI_PWR_DATA_SCALE(x)	(((x) >> 8) & 3)    /* Data Scale */
+#define  PCI_PWR_DATA_PM_SUB(x)	(((x) >> 10) & 7)   /* PM Sub State */
+#define  PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */
+#define  PCI_PWR_DATA_TYPE(x)	(((x) >> 15) & 7)   /* Type */
+#define  PCI_PWR_DATA_RAIL(x)	(((x) >> 18) & 7)   /* Power Rail */
+#define PCI_PWR_CAP		12	/* Capability */
+#define  PCI_PWR_CAP_BUDGET(x)	((x) & 1)	/* Included in system budget */
+
+/*
+ * Hypertransport sub capability types
+ *
+ * Unfortunately there are both 3 bit and 5 bit capability types defined
+ * in the HT spec, catering for that is a little messy. You probably don't
+ * want to use these directly, just use pci_find_ht_capability() and it
+ * will do the right thing for you.
+ */
+#define HT_3BIT_CAP_MASK	0xE0
+#define HT_CAPTYPE_SLAVE	0x00	/* Slave/Primary link configuration */
+#define HT_CAPTYPE_HOST		0x20	/* Host/Secondary link configuration */
+
+#define HT_5BIT_CAP_MASK	0xF8
+#define HT_CAPTYPE_IRQ		0x80	/* IRQ Configuration */
+#define HT_CAPTYPE_REMAPPING_40	0xA0	/* 40 bit address remapping */
+#define HT_CAPTYPE_REMAPPING_64 0xA2	/* 64 bit address remapping */
+#define HT_CAPTYPE_UNITID_CLUMP	0x90	/* Unit ID clumping */
+#define HT_CAPTYPE_EXTCONF	0x98	/* Extended Configuration Space Access */
+#define HT_CAPTYPE_MSI_MAPPING	0xA8	/* MSI Mapping Capability */
+#define  HT_MSI_FLAGS		0x02		/* Offset to flags */
+#define  HT_MSI_FLAGS_ENABLE	0x1		/* Mapping enable */
+#define  HT_MSI_FLAGS_FIXED	0x2		/* Fixed mapping only */
+#define  HT_MSI_FIXED_ADDR	0x00000000FEE00000ULL	/* Fixed addr */
+#define  HT_MSI_ADDR_LO		0x04		/* Offset to low addr bits */
+#define  HT_MSI_ADDR_LO_MASK	0xFFF00000	/* Low address bit mask */
+#define  HT_MSI_ADDR_HI		0x08		/* Offset to high addr bits */
+#define HT_CAPTYPE_DIRECT_ROUTE	0xB0	/* Direct routing configuration */
+#define HT_CAPTYPE_VCSET	0xB8	/* Virtual Channel configuration */
+#define HT_CAPTYPE_ERROR_RETRY	0xC0	/* Retry on error configuration */
+#define HT_CAPTYPE_GEN3		0xD0	/* Generation 3 hypertransport configuration */
+#define HT_CAPTYPE_PM		0xE0	/* Hypertransport powermanagement configuration */
+
+/* Alternative Routing-ID Interpretation */
+#define PCI_ARI_CAP		0x04	/* ARI Capability Register */
+#define  PCI_ARI_CAP_MFVC	0x0001	/* MFVC Function Groups Capability */
+#define  PCI_ARI_CAP_ACS	0x0002	/* ACS Function Groups Capability */
+#define  PCI_ARI_CAP_NFN(x)	(((x) >> 8) & 0xff) /* Next Function Number */
+#define PCI_ARI_CTRL		0x06	/* ARI Control Register */
+#define  PCI_ARI_CTRL_MFVC	0x0001	/* MFVC Function Groups Enable */
+#define  PCI_ARI_CTRL_ACS	0x0002	/* ACS Function Groups Enable */
+#define  PCI_ARI_CTRL_FG(x)	(((x) >> 4) & 7) /* Function Group */
+
+/* Address Translation Service */
+#define PCI_ATS_CAP		0x04	/* ATS Capability Register */
+#define  PCI_ATS_CAP_QDEP(x)	((x) & 0x1f)	/* Invalidate Queue Depth */
+#define  PCI_ATS_MAX_QDEP	32	/* Max Invalidate Queue Depth */
+#define PCI_ATS_CTRL		0x06	/* ATS Control Register */
+#define  PCI_ATS_CTRL_ENABLE	0x8000	/* ATS Enable */
+#define  PCI_ATS_CTRL_STU(x)	((x) & 0x1f)	/* Smallest Translation Unit */
+#define  PCI_ATS_MIN_STU	12	/* shift of minimum STU block */
+
+/* Single Root I/O Virtualization */
+#define PCI_SRIOV_CAP		0x04	/* SR-IOV Capabilities */
+#define  PCI_SRIOV_CAP_VFM	0x01	/* VF Migration Capable */
+#define  PCI_SRIOV_CAP_INTR(x)	((x) >> 21) /* Interrupt Message Number */
+#define PCI_SRIOV_CTRL		0x08	/* SR-IOV Control */
+#define  PCI_SRIOV_CTRL_VFE	0x01	/* VF Enable */
+#define  PCI_SRIOV_CTRL_VFM	0x02	/* VF Migration Enable */
+#define  PCI_SRIOV_CTRL_INTR	0x04	/* VF Migration Interrupt Enable */
+#define  PCI_SRIOV_CTRL_MSE	0x08	/* VF Memory Space Enable */
+#define  PCI_SRIOV_CTRL_ARI	0x10	/* ARI Capable Hierarchy */
+#define PCI_SRIOV_STATUS	0x0a	/* SR-IOV Status */
+#define  PCI_SRIOV_STATUS_VFM	0x01	/* VF Migration Status */
+#define PCI_SRIOV_INITIAL_VF	0x0c	/* Initial VFs */
+#define PCI_SRIOV_TOTAL_VF	0x0e	/* Total VFs */
+#define PCI_SRIOV_NUM_VF	0x10	/* Number of VFs */
+#define PCI_SRIOV_FUNC_LINK	0x12	/* Function Dependency Link */
+#define PCI_SRIOV_VF_OFFSET	0x14	/* First VF Offset */
+#define PCI_SRIOV_VF_STRIDE	0x16	/* Following VF Stride */
+#define PCI_SRIOV_VF_DID	0x1a	/* VF Device ID */
+#define PCI_SRIOV_SUP_PGSIZE	0x1c	/* Supported Page Sizes */
+#define PCI_SRIOV_SYS_PGSIZE	0x20	/* System Page Size */
+#define PCI_SRIOV_BAR		0x24	/* VF BAR0 */
+#define  PCI_SRIOV_NUM_BARS	6	/* Number of VF BARs */
+#define PCI_SRIOV_VFM		0x3c	/* VF Migration State Array Offset*/
+#define  PCI_SRIOV_VFM_BIR(x)	((x) & 7)	/* State BIR */
+#define  PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7)	/* State Offset */
+#define  PCI_SRIOV_VFM_UA	0x0	/* Inactive.Unavailable */
+#define  PCI_SRIOV_VFM_MI	0x1	/* Dormant.MigrateIn */
+#define  PCI_SRIOV_VFM_MO	0x2	/* Active.MigrateOut */
+#define  PCI_SRIOV_VFM_AV	0x3	/* Active.Available */
+
+#define PCI_LTR_MAX_SNOOP_LAT	0x4
+#define PCI_LTR_MAX_NOSNOOP_LAT	0x6
+#define  PCI_LTR_VALUE_MASK	0x000003ff
+#define  PCI_LTR_SCALE_MASK	0x00001c00
+#define  PCI_LTR_SCALE_SHIFT	10
+
+/* Access Control Service */
+#define PCI_ACS_CAP		0x04	/* ACS Capability Register */
+#define  PCI_ACS_SV		0x01	/* Source Validation */
+#define  PCI_ACS_TB		0x02	/* Translation Blocking */
+#define  PCI_ACS_RR		0x04	/* P2P Request Redirect */
+#define  PCI_ACS_CR		0x08	/* P2P Completion Redirect */
+#define  PCI_ACS_UF		0x10	/* Upstream Forwarding */
+#define  PCI_ACS_EC		0x20	/* P2P Egress Control */
+#define  PCI_ACS_DT		0x40	/* Direct Translated P2P */
+#define PCI_ACS_CTRL		0x06	/* ACS Control Register */
+#define PCI_ACS_EGRESS_CTL_V	0x08	/* ACS Egress Control Vector */
+
+#endif /* LINUX_PCI_REGS_H */
diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h
index 3209c90..a78f33e 100644
--- a/include/standard-headers/linux/virtio_net.h
+++ b/include/standard-headers/linux/virtio_net.h
@@ -34,6 +34,7 @@
 /* The feature bitmap for virtio net */
 #define VIRTIO_NET_F_CSUM	0	/* Host handles pkts w/ partial csum */
 #define VIRTIO_NET_F_GUEST_CSUM	1	/* Guest handles pkts w/ partial csum */
+#define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2 /* Dynamic offload configuration. */
 #define VIRTIO_NET_F_MAC	5	/* Host has given MAC address. */
 #define VIRTIO_NET_F_GUEST_TSO4	7	/* Guest can handle TSOv4 in. */
 #define VIRTIO_NET_F_GUEST_TSO6	8	/* Guest can handle TSOv6 in. */
@@ -226,4 +227,19 @@
  #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
  #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
 
+/*
+ * Control network offloads
+ *
+ * Reconfigures the network offloads that Guest can handle.
+ *
+ * Available with the VIRTIO_NET_F_CTRL_GUEST_OFFLOADS feature bit.
+ *
+ * Command data format matches the feature bit mask exactly.
+ *
+ * See VIRTIO_NET_F_GUEST_* for the list of offloads
+ * that can be enabled/disabled.
+ */
+#define VIRTIO_NET_CTRL_GUEST_OFFLOADS   5
+#define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET        0
+
 #endif /* _LINUX_VIRTIO_NET_H */
diff --git a/include/standard-headers/linux/virtio_pci.h b/include/standard-headers/linux/virtio_pci.h
index ecdc133..9262acd 100644
--- a/include/standard-headers/linux/virtio_pci.h
+++ b/include/standard-headers/linux/virtio_pci.h
@@ -157,6 +157,12 @@
 	uint32_t queue_used_hi;		/* read-write */
 };
 
+/* Fields in VIRTIO_PCI_CAP_PCI_CFG: */
+struct virtio_pci_cfg_cap {
+	struct virtio_pci_cap cap;
+	uint8_t pci_cfg_data[4]; /* Data for BAR access. */
+};
+
 /* Macro versions of offsets for the Old Timers! */
 #define VIRTIO_PCI_CAP_VNDR		0
 #define VIRTIO_PCI_CAP_NEXT		1
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index f459fbd..983e99e 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -19,6 +19,7 @@
 #include "qemu/queue.h"
 #include "qom/cpu.h"
 #include "exec/memattrs.h"
+#include "hw/irq.h"
 
 #ifdef CONFIG_KVM
 #include <linux/kvm.h>
@@ -151,6 +152,7 @@
 #define kvm_halt_in_kernel() (false)
 #define kvm_eventfds_enabled() (false)
 #define kvm_irqfds_enabled() (false)
+#define kvm_resamplefds_enabled() (false)
 #define kvm_msi_via_irqfd_enabled() (false)
 #define kvm_gsi_routing_allowed() (false)
 #define kvm_gsi_direct_mapping() (false)
@@ -416,9 +418,15 @@
 
 int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter);
 
+int kvm_irqchip_add_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
+                                       EventNotifier *rn, int virq);
+int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
+                                          int virq);
 int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n,
-                                   EventNotifier *rn, int virq);
-int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq);
+                                   EventNotifier *rn, qemu_irq irq);
+int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n,
+                                      qemu_irq irq);
+void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi);
 void kvm_pc_gsi_handler(void *opaque, int n, int level);
 void kvm_pc_setup_irq_routing(bool pci_enabled);
 void kvm_init_irq_routing(KVMState *s);
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index df80951..44570d1 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -28,6 +28,7 @@
 void runstate_set(RunState new_state);
 int runstate_is_running(void);
 bool runstate_needs_reset(void);
+bool runstate_store(char *str, size_t size);
 typedef struct vm_change_state_entry VMChangeStateEntry;
 typedef void VMChangeStateHandler(void *opaque, int running, RunState state);
 
diff --git a/kvm-all.c b/kvm-all.c
index edff01c..06e06f2 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -35,6 +35,7 @@
 #include "exec/address-spaces.h"
 #include "qemu/event_notifier.h"
 #include "trace.h"
+#include "hw/irq.h"
 
 #include "hw/boards.h"
 
@@ -84,6 +85,7 @@
      * unsigned, and treating them as signed here can break things */
     unsigned irq_set_ioctl;
     unsigned int sigmask_len;
+    GHashTable *gsimap;
 #ifdef KVM_CAP_IRQ_ROUTING
     struct kvm_irq_routing *irq_routes;
     int nr_allocated_irq_routes;
@@ -1332,19 +1334,49 @@
 }
 #endif /* !KVM_CAP_IRQ_ROUTING */
 
-int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n,
-                                   EventNotifier *rn, int virq)
+int kvm_irqchip_add_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
+                                       EventNotifier *rn, int virq)
 {
     return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n),
            rn ? event_notifier_get_fd(rn) : -1, virq, true);
 }
 
-int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq)
+int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
+                                          int virq)
 {
     return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n), -1, virq,
            false);
 }
 
+int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n,
+                                   EventNotifier *rn, qemu_irq irq)
+{
+    gpointer key, gsi;
+    gboolean found = g_hash_table_lookup_extended(s->gsimap, irq, &key, &gsi);
+
+    if (!found) {
+        return -ENXIO;
+    }
+    return kvm_irqchip_add_irqfd_notifier_gsi(s, n, rn, GPOINTER_TO_INT(gsi));
+}
+
+int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n,
+                                      qemu_irq irq)
+{
+    gpointer key, gsi;
+    gboolean found = g_hash_table_lookup_extended(s->gsimap, irq, &key, &gsi);
+
+    if (!found) {
+        return -ENXIO;
+    }
+    return kvm_irqchip_remove_irqfd_notifier_gsi(s, n, GPOINTER_TO_INT(gsi));
+}
+
+void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi)
+{
+    g_hash_table_insert(s->gsimap, irq, GINT_TO_POINTER(gsi));
+}
+
 static void kvm_irqchip_create(MachineState *machine, KVMState *s)
 {
     int ret;
@@ -1380,6 +1412,8 @@
     kvm_halt_in_kernel_allowed = true;
 
     kvm_init_irq_routing(s);
+
+    s->gsimap = g_hash_table_new(g_direct_hash, g_direct_equal);
 }
 
 /* Find number of supported CPUs using the recommended
diff --git a/kvm-stub.c b/kvm-stub.c
index 7ba90c5..d9ad624 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -24,6 +24,7 @@
 bool kvm_async_interrupts_allowed;
 bool kvm_eventfds_allowed;
 bool kvm_irqfds_allowed;
+bool kvm_resamplefds_allowed;
 bool kvm_msi_via_irqfd_allowed;
 bool kvm_gsi_routing_allowed;
 bool kvm_gsi_direct_mapping;
@@ -137,13 +138,14 @@
     return -ENOSYS;
 }
 
-int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n,
-                                   EventNotifier *rn, int virq)
+int kvm_irqchip_add_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
+                                       EventNotifier *rn, int virq)
 {
     return -ENOSYS;
 }
 
-int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq)
+int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
+                                          int virq)
 {
     return -ENOSYS;
 }
diff --git a/linux-user/main.c b/linux-user/main.c
index c855bcc..6c5c2ef 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -1424,8 +1424,7 @@
 #ifdef TARGET_PPC
 static inline uint64_t cpu_ppc_get_tb(CPUPPCState *env)
 {
-    /* TO FIX */
-    return 0;
+    return cpu_get_real_ticks();
 }
 
 uint64_t cpu_ppc_load_tbl(CPUPPCState *env)
diff --git a/migration/block.c b/migration/block.c
index ddb59cc..ed865ed 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -457,7 +457,7 @@
         blk_mig_lock();
         if (bmds_aio_inflight(bmds, sector)) {
             blk_mig_unlock();
-            bdrv_drain_all();
+            bdrv_drain(bmds->bs);
         } else {
             blk_mig_unlock();
         }
diff --git a/migration/migration.c b/migration/migration.c
index c6ac08a..45719a0 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -26,6 +26,8 @@
 #include "qemu/thread.h"
 #include "qmp-commands.h"
 #include "trace.h"
+#include "qapi/util.h"
+#include "qapi-event.h"
 
 #define MAX_THROTTLE  (32 << 20)      /* Migration speed throttling */
 
@@ -97,6 +99,120 @@
     mis_current = NULL;
 }
 
+
+typedef struct {
+    bool optional;
+    uint32_t size;
+    uint8_t runstate[100];
+} GlobalState;
+
+static GlobalState global_state;
+
+static int global_state_store(void)
+{
+    if (!runstate_store((char *)global_state.runstate,
+                        sizeof(global_state.runstate))) {
+        error_report("runstate name too big: %s", global_state.runstate);
+        trace_migrate_state_too_big();
+        return -EINVAL;
+    }
+    return 0;
+}
+
+static char *global_state_get_runstate(void)
+{
+    return (char *)global_state.runstate;
+}
+
+void global_state_set_optional(void)
+{
+    global_state.optional = true;
+}
+
+static bool global_state_needed(void *opaque)
+{
+    GlobalState *s = opaque;
+    char *runstate = (char *)s->runstate;
+
+    /* If it is not optional, it is mandatory */
+
+    if (s->optional == false) {
+        return true;
+    }
+
+    /* If state is running or paused, it is not needed */
+
+    if (strcmp(runstate, "running") == 0 ||
+        strcmp(runstate, "paused") == 0) {
+        return false;
+    }
+
+    /* for any other state it is needed */
+    return true;
+}
+
+static int global_state_post_load(void *opaque, int version_id)
+{
+    GlobalState *s = opaque;
+    int ret = 0;
+    char *runstate = (char *)s->runstate;
+
+    trace_migrate_global_state_post_load(runstate);
+
+    if (strcmp(runstate, "running") != 0) {
+        Error *local_err = NULL;
+        int r = qapi_enum_parse(RunState_lookup, runstate, RUN_STATE_MAX,
+                                -1, &local_err);
+
+        if (r == -1) {
+            if (local_err) {
+                error_report_err(local_err);
+            }
+            return -EINVAL;
+        }
+        ret = vm_stop_force_state(r);
+    }
+
+   return ret;
+}
+
+static void global_state_pre_save(void *opaque)
+{
+    GlobalState *s = opaque;
+
+    trace_migrate_global_state_pre_save((char *)s->runstate);
+    s->size = strlen((char *)s->runstate) + 1;
+}
+
+static const VMStateDescription vmstate_globalstate = {
+    .name = "globalstate",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .post_load = global_state_post_load,
+    .pre_save = global_state_pre_save,
+    .needed = global_state_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(size, GlobalState),
+        VMSTATE_BUFFER(runstate, GlobalState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+void register_global_state(void)
+{
+    /* We would use it independently that we receive it */
+    strcpy((char *)&global_state.runstate, "");
+    vmstate_register(NULL, 0, &vmstate_globalstate, &global_state);
+}
+
+static void migrate_generate_event(int new_state)
+{
+    if (migrate_use_events()) {
+        qapi_event_send_migration(new_state, &error_abort);
+        trace_migrate_set_state(new_state);
+    }
+}
+
 /*
  * Called on -incoming with a defer: uri.
  * The migration can be started later after any parameters have been
@@ -114,6 +230,7 @@
 {
     const char *p;
 
+    qapi_event_send_migration(MIGRATION_STATUS_SETUP, &error_abort);
     if (!strcmp(uri, "defer")) {
         deferred_incoming_migration(errp);
     } else if (strstart(uri, "tcp:", &p)) {
@@ -142,7 +259,7 @@
     int ret;
 
     migration_incoming_state_new(f);
-
+    migrate_generate_event(MIGRATION_STATUS_ACTIVE);
     ret = qemu_loadvm_state(f);
 
     qemu_fclose(f);
@@ -150,10 +267,12 @@
     migration_incoming_state_destroy();
 
     if (ret < 0) {
+        migrate_generate_event(MIGRATION_STATUS_FAILED);
         error_report("load of migration failed: %s", strerror(-ret));
         migrate_decompress_threads_join();
         exit(EXIT_FAILURE);
     }
+    migrate_generate_event(MIGRATION_STATUS_COMPLETED);
     qemu_announce_self();
 
     /* Make sure all file formats flush their mutable metadata */
@@ -164,10 +283,20 @@
         exit(EXIT_FAILURE);
     }
 
-    if (autostart) {
+    /* runstate == "" means that we haven't received it through the
+     * wire, so we obey autostart.  runstate == runing means that we
+     * need to run it, we need to make sure that we do it after
+     * everything else has finished.  Every other state change is done
+     * at the post_load function */
+
+    if (strcmp(global_state_get_runstate(), "running") == 0) {
         vm_start();
-    } else {
-        runstate_set(RUN_STATE_PAUSED);
+    } else if (strcmp(global_state_get_runstate(), "") == 0) {
+        if (autostart) {
+            vm_start();
+        } else {
+            runstate_set(RUN_STATE_PAUSED);
+        }
     }
     migrate_decompress_threads_join();
 }
@@ -392,8 +521,8 @@
 
 static void migrate_set_state(MigrationState *s, int old_state, int new_state)
 {
-    if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
-        trace_migrate_set_state(new_state);
+    if (atomic_cmpxchg(&s->state, old_state, new_state) == old_state) {
+        migrate_generate_event(new_state);
     }
 }
 
@@ -432,8 +561,7 @@
 {
     trace_migrate_fd_error();
     assert(s->file == NULL);
-    s->state = MIGRATION_STATUS_FAILED;
-    trace_migrate_set_state(MIGRATION_STATUS_FAILED);
+    migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED);
     notifier_list_notify(&migration_state_notifiers, s);
 }
 
@@ -517,8 +645,7 @@
     s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
                decompress_thread_count;
     s->bandwidth_limit = bandwidth_limit;
-    s->state = MIGRATION_STATUS_SETUP;
-    trace_migrate_set_state(MIGRATION_STATUS_SETUP);
+    migrate_set_state(s, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
 
     s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
     return s;
@@ -577,7 +704,6 @@
         error_setg(errp, QERR_MIGRATION_ACTIVE);
         return;
     }
-
     if (runstate_check(RUN_STATE_INMIGRATE)) {
         error_setg(errp, "Guest is waiting for an incoming migration");
         return;
@@ -592,6 +718,12 @@
         return;
     }
 
+    /* We are starting a new migration, so we want to start in a clean
+       state.  This change is only needed if previous migration
+       failed/was cancelled.  We don't use migrate_set_state() because
+       we are setting the initial state, not changing it. */
+    s->state = MIGRATION_STATUS_NONE;
+
     s = migrate_init(&params);
 
     if (strstart(uri, "tcp:", &p)) {
@@ -611,7 +743,7 @@
     } else {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
                    "a valid migration protocol");
-        s->state = MIGRATION_STATUS_FAILED;
+        migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED);
         return;
     }
 
@@ -740,6 +872,15 @@
     return s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
 }
 
+bool migrate_use_events(void)
+{
+    MigrationState *s;
+
+    s = migrate_get_current();
+
+    return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
+}
+
 int migrate_use_xbzrle(void)
 {
     MigrationState *s;
@@ -793,10 +934,13 @@
                 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
                 old_vm_running = runstate_is_running();
 
-                ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
-                if (ret >= 0) {
-                    qemu_file_set_rate_limit(s->file, INT64_MAX);
-                    qemu_savevm_state_complete(s->file);
+                ret = global_state_store();
+                if (!ret) {
+                    ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+                    if (ret >= 0) {
+                        qemu_file_set_rate_limit(s->file, INT64_MAX);
+                        qemu_savevm_state_complete(s->file);
+                    }
                 }
                 qemu_mutex_unlock_iothread();
 
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 557c1c1..6bb3dc1 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -129,7 +129,7 @@
     int ret = 0;
 
     if (f->ops->before_ram_iterate) {
-        ret = f->ops->before_ram_iterate(f, f->opaque, flags);
+        ret = f->ops->before_ram_iterate(f, f->opaque, flags, NULL);
         if (ret < 0) {
             qemu_file_set_error(f, ret);
         }
@@ -141,24 +141,30 @@
     int ret = 0;
 
     if (f->ops->after_ram_iterate) {
-        ret = f->ops->after_ram_iterate(f, f->opaque, flags);
+        ret = f->ops->after_ram_iterate(f, f->opaque, flags, NULL);
         if (ret < 0) {
             qemu_file_set_error(f, ret);
         }
     }
 }
 
-void ram_control_load_hook(QEMUFile *f, uint64_t flags)
+void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data)
 {
     int ret = -EINVAL;
 
     if (f->ops->hook_ram_load) {
-        ret = f->ops->hook_ram_load(f, f->opaque, flags);
+        ret = f->ops->hook_ram_load(f, f->opaque, flags, data);
         if (ret < 0) {
             qemu_file_set_error(f, ret);
         }
     } else {
-        qemu_file_set_error(f, ret);
+        /*
+         * Hook is a hook specifically requested by the source sending a flag
+         * that expects there to be a hook on the destination.
+         */
+        if (flags == RAM_CONTROL_HOOK) {
+            qemu_file_set_error(f, ret);
+        }
     }
 }
 
diff --git a/migration/ram.c b/migration/ram.c
index 57368e1..c696814 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -222,6 +222,7 @@
 static RAMBlock *last_sent_block;
 static ram_addr_t last_offset;
 static unsigned long *migration_bitmap;
+static QemuMutex migration_bitmap_mutex;
 static uint64_t migration_dirty_pages;
 static uint32_t last_version;
 static bool ram_bulk_stage;
@@ -494,6 +495,7 @@
     return 1;
 }
 
+/* Called with rcu_read_lock() to protect migration_bitmap */
 static inline
 ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
                                                  ram_addr_t start)
@@ -502,26 +504,31 @@
     unsigned long nr = base + (start >> TARGET_PAGE_BITS);
     uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr));
     unsigned long size = base + (mr_size >> TARGET_PAGE_BITS);
+    unsigned long *bitmap;
 
     unsigned long next;
 
+    bitmap = atomic_rcu_read(&migration_bitmap);
     if (ram_bulk_stage && nr > base) {
         next = nr + 1;
     } else {
-        next = find_next_bit(migration_bitmap, size, nr);
+        next = find_next_bit(bitmap, size, nr);
     }
 
     if (next < size) {
-        clear_bit(next, migration_bitmap);
+        clear_bit(next, bitmap);
         migration_dirty_pages--;
     }
     return (next - base) << TARGET_PAGE_BITS;
 }
 
+/* Called with rcu_read_lock() to protect migration_bitmap */
 static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
 {
+    unsigned long *bitmap;
+    bitmap = atomic_rcu_read(&migration_bitmap);
     migration_dirty_pages +=
-        cpu_physical_memory_sync_dirty_bitmap(migration_bitmap, start, length);
+        cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length);
 }
 
 
@@ -563,11 +570,13 @@
     trace_migration_bitmap_sync_start();
     address_space_sync_dirty_bitmap(&address_space_memory);
 
+    qemu_mutex_lock(&migration_bitmap_mutex);
     rcu_read_lock();
     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
         migration_bitmap_sync_range(block->mr->ram_addr, block->used_length);
     }
     rcu_read_unlock();
+    qemu_mutex_unlock(&migration_bitmap_mutex);
 
     trace_migration_bitmap_sync_end(migration_dirty_pages
                                     - num_dirty_pages_init);
@@ -1017,10 +1026,15 @@
 
 static void migration_end(void)
 {
-    if (migration_bitmap) {
+    /* caller have hold iothread lock or is in a bh, so there is
+     * no writing race against this migration_bitmap
+     */
+    unsigned long *bitmap = migration_bitmap;
+    atomic_rcu_set(&migration_bitmap, NULL);
+    if (bitmap) {
         memory_global_dirty_log_stop();
-        g_free(migration_bitmap);
-        migration_bitmap = NULL;
+        synchronize_rcu();
+        g_free(bitmap);
     }
 
     XBZRLE_cache_lock();
@@ -1051,6 +1065,30 @@
 
 #define MAX_WAIT 50 /* ms, half buffered_file limit */
 
+void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
+{
+    /* called in qemu main thread, so there is
+     * no writing race against this migration_bitmap
+     */
+    if (migration_bitmap) {
+        unsigned long *old_bitmap = migration_bitmap, *bitmap;
+        bitmap = bitmap_new(new);
+
+        /* prevent migration_bitmap content from being set bit
+         * by migration_bitmap_sync_range() at the same time.
+         * it is safe to migration if migration_bitmap is cleared bit
+         * at the same time.
+         */
+        qemu_mutex_lock(&migration_bitmap_mutex);
+        bitmap_copy(bitmap, old_bitmap, old);
+        bitmap_set(bitmap, old, new - old);
+        atomic_rcu_set(&migration_bitmap, bitmap);
+        qemu_mutex_unlock(&migration_bitmap_mutex);
+        migration_dirty_pages += new - old;
+        synchronize_rcu();
+        g_free(old_bitmap);
+    }
+}
 
 /* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
  * long-running RCU critical section.  When rcu-reclaims in the code
@@ -1067,6 +1105,7 @@
     dirty_rate_high_cnt = 0;
     bitmap_sync_count = 0;
     migration_bitmap_sync_init();
+    qemu_mutex_init(&migration_bitmap_mutex);
 
     if (migrate_use_xbzrle()) {
         XBZRLE_cache_lock();
@@ -1477,6 +1516,8 @@
                                 error_report_err(local_err);
                             }
                         }
+                        ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
+                                              block->idstr);
                         break;
                     }
                 }
@@ -1545,7 +1586,7 @@
             break;
         default:
             if (flags & RAM_SAVE_FLAG_HOOK) {
-                ram_control_load_hook(f, flags);
+                ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
             } else {
                 error_report("Unknown combination of migration flags: %#x",
                              flags);
diff --git a/migration/rdma.c b/migration/rdma.c
index b777273..f106b2a 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -215,17 +215,19 @@
  * the information. It's small anyway, so a list is overkill.
  */
 typedef struct RDMALocalBlock {
-    uint8_t  *local_host_addr; /* local virtual address */
-    uint64_t remote_host_addr; /* remote virtual address */
-    uint64_t offset;
-    uint64_t length;
-    struct   ibv_mr **pmr;     /* MRs for chunk-level registration */
-    struct   ibv_mr *mr;       /* MR for non-chunk-level registration */
-    uint32_t *remote_keys;     /* rkeys for chunk-level registration */
-    uint32_t remote_rkey;      /* rkeys for non-chunk-level registration */
-    int      index;            /* which block are we */
-    bool     is_ram_block;
-    int      nb_chunks;
+    char          *block_name;
+    uint8_t       *local_host_addr; /* local virtual address */
+    uint64_t       remote_host_addr; /* remote virtual address */
+    uint64_t       offset;
+    uint64_t       length;
+    struct         ibv_mr **pmr;    /* MRs for chunk-level registration */
+    struct         ibv_mr *mr;      /* MR for non-chunk-level registration */
+    uint32_t      *remote_keys;     /* rkeys for chunk-level registration */
+    uint32_t       remote_rkey;     /* rkeys for non-chunk-level registration */
+    int            index;           /* which block are we */
+    unsigned int   src_index;       /* (Only used on dest) */
+    bool           is_ram_block;
+    int            nb_chunks;
     unsigned long *transit_bitmap;
     unsigned long *unregister_bitmap;
 } RDMALocalBlock;
@@ -353,6 +355,9 @@
     RDMALocalBlocks local_ram_blocks;
     RDMADestBlock  *dest_blocks;
 
+    /* Index of the next RAMBlock received during block registration */
+    unsigned int    next_src_index;
+
     /*
      * Migration on *destination* started.
      * Then use coroutine yield function.
@@ -411,7 +416,7 @@
  */
 typedef struct QEMU_PACKED {
     union QEMU_PACKED {
-        uint64_t current_addr;  /* offset into the ramblock of the chunk */
+        uint64_t current_addr;  /* offset into the ram_addr_t space */
         uint64_t chunk;         /* chunk to lookup if unregistering */
     } key;
     uint32_t current_index; /* which ramblock the chunk belongs to */
@@ -419,8 +424,19 @@
     uint64_t chunks;            /* how many sequential chunks to register */
 } RDMARegister;
 
-static void register_to_network(RDMARegister *reg)
+static void register_to_network(RDMAContext *rdma, RDMARegister *reg)
 {
+    RDMALocalBlock *local_block;
+    local_block  = &rdma->local_ram_blocks.block[reg->current_index];
+
+    if (local_block->is_ram_block) {
+        /*
+         * current_addr as passed in is an address in the local ram_addr_t
+         * space, we need to translate this for the destination
+         */
+        reg->key.current_addr -= local_block->offset;
+        reg->key.current_addr += rdma->dest_blocks[reg->current_index].offset;
+    }
     reg->key.current_addr = htonll(reg->key.current_addr);
     reg->current_index = htonl(reg->current_index);
     reg->chunks = htonll(reg->chunks);
@@ -436,13 +452,19 @@
 typedef struct QEMU_PACKED {
     uint32_t value;     /* if zero, we will madvise() */
     uint32_t block_idx; /* which ram block index */
-    uint64_t offset;    /* where in the remote ramblock this chunk */
+    uint64_t offset;    /* Address in remote ram_addr_t space */
     uint64_t length;    /* length of the chunk */
 } RDMACompress;
 
-static void compress_to_network(RDMACompress *comp)
+static void compress_to_network(RDMAContext *rdma, RDMACompress *comp)
 {
     comp->value = htonl(comp->value);
+    /*
+     * comp->offset as passed in is an address in the local ram_addr_t
+     * space, we need to translate this for the destination
+     */
+    comp->offset -= rdma->local_ram_blocks.block[comp->block_idx].offset;
+    comp->offset += rdma->dest_blocks[comp->block_idx].offset;
     comp->block_idx = htonl(comp->block_idx);
     comp->offset = htonll(comp->offset);
     comp->length = htonll(comp->length);
@@ -511,27 +533,27 @@
     return result;
 }
 
-static int rdma_add_block(RDMAContext *rdma, void *host_addr,
+static int rdma_add_block(RDMAContext *rdma, const char *block_name,
+                         void *host_addr,
                          ram_addr_t block_offset, uint64_t length)
 {
     RDMALocalBlocks *local = &rdma->local_ram_blocks;
-    RDMALocalBlock *block = g_hash_table_lookup(rdma->blockmap,
-        (void *)(uintptr_t)block_offset);
+    RDMALocalBlock *block;
     RDMALocalBlock *old = local->block;
 
-    assert(block == NULL);
-
     local->block = g_malloc0(sizeof(RDMALocalBlock) * (local->nb_blocks + 1));
 
     if (local->nb_blocks) {
         int x;
 
-        for (x = 0; x < local->nb_blocks; x++) {
-            g_hash_table_remove(rdma->blockmap,
-                                (void *)(uintptr_t)old[x].offset);
-            g_hash_table_insert(rdma->blockmap,
-                                (void *)(uintptr_t)old[x].offset,
-                                &local->block[x]);
+        if (rdma->blockmap) {
+            for (x = 0; x < local->nb_blocks; x++) {
+                g_hash_table_remove(rdma->blockmap,
+                                    (void *)(uintptr_t)old[x].offset);
+                g_hash_table_insert(rdma->blockmap,
+                                    (void *)(uintptr_t)old[x].offset,
+                                    &local->block[x]);
+            }
         }
         memcpy(local->block, old, sizeof(RDMALocalBlock) * local->nb_blocks);
         g_free(old);
@@ -539,10 +561,12 @@
 
     block = &local->block[local->nb_blocks];
 
+    block->block_name = g_strdup(block_name);
     block->local_host_addr = host_addr;
     block->offset = block_offset;
     block->length = length;
     block->index = local->nb_blocks;
+    block->src_index = ~0U; /* Filled in by the receipt of the block list */
     block->nb_chunks = ram_chunk_index(host_addr, host_addr + length) + 1UL;
     block->transit_bitmap = bitmap_new(block->nb_chunks);
     bitmap_clear(block->transit_bitmap, 0, block->nb_chunks);
@@ -552,9 +576,12 @@
 
     block->is_ram_block = local->init ? false : true;
 
-    g_hash_table_insert(rdma->blockmap, (void *) block_offset, block);
+    if (rdma->blockmap) {
+        g_hash_table_insert(rdma->blockmap, (void *) block_offset, block);
+    }
 
-    trace_rdma_add_block(local->nb_blocks, (uintptr_t) block->local_host_addr,
+    trace_rdma_add_block(block_name, local->nb_blocks,
+                         (uintptr_t) block->local_host_addr,
                          block->offset, block->length,
                          (uintptr_t) (block->local_host_addr + block->length),
                          BITS_TO_LONGS(block->nb_chunks) *
@@ -574,7 +601,7 @@
 static int qemu_rdma_init_one_block(const char *block_name, void *host_addr,
     ram_addr_t block_offset, ram_addr_t length, void *opaque)
 {
-    return rdma_add_block(opaque, host_addr, block_offset, length);
+    return rdma_add_block(opaque, block_name, host_addr, block_offset, length);
 }
 
 /*
@@ -587,7 +614,6 @@
     RDMALocalBlocks *local = &rdma->local_ram_blocks;
 
     assert(rdma->blockmap == NULL);
-    rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal);
     memset(local, 0, sizeof *local);
     qemu_ram_foreach_block(qemu_rdma_init_one_block, rdma);
     trace_qemu_rdma_init_ram_blocks(local->nb_blocks);
@@ -597,16 +623,19 @@
     return 0;
 }
 
-static int rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset)
+/*
+ * Note: If used outside of cleanup, the caller must ensure that the destination
+ * block structures are also updated
+ */
+static int rdma_delete_block(RDMAContext *rdma, RDMALocalBlock *block)
 {
     RDMALocalBlocks *local = &rdma->local_ram_blocks;
-    RDMALocalBlock *block = g_hash_table_lookup(rdma->blockmap,
-        (void *) block_offset);
     RDMALocalBlock *old = local->block;
     int x;
 
-    assert(block);
-
+    if (rdma->blockmap) {
+        g_hash_table_remove(rdma->blockmap, (void *)(uintptr_t)block->offset);
+    }
     if (block->pmr) {
         int j;
 
@@ -636,8 +665,14 @@
     g_free(block->remote_keys);
     block->remote_keys = NULL;
 
-    for (x = 0; x < local->nb_blocks; x++) {
-        g_hash_table_remove(rdma->blockmap, (void *)(uintptr_t)old[x].offset);
+    g_free(block->block_name);
+    block->block_name = NULL;
+
+    if (rdma->blockmap) {
+        for (x = 0; x < local->nb_blocks; x++) {
+            g_hash_table_remove(rdma->blockmap,
+                                (void *)(uintptr_t)old[x].offset);
+        }
     }
 
     if (local->nb_blocks > 1) {
@@ -659,8 +694,7 @@
         local->block = NULL;
     }
 
-    trace_rdma_delete_block(local->nb_blocks,
-                           (uintptr_t)block->local_host_addr,
+    trace_rdma_delete_block(block, (uintptr_t)block->local_host_addr,
                            block->offset, block->length,
                             (uintptr_t)(block->local_host_addr + block->length),
                            BITS_TO_LONGS(block->nb_chunks) *
@@ -670,7 +704,7 @@
 
     local->nb_blocks--;
 
-    if (local->nb_blocks) {
+    if (local->nb_blocks && rdma->blockmap) {
         for (x = 0; x < local->nb_blocks; x++) {
             g_hash_table_insert(rdma->blockmap,
                                 (void *)(uintptr_t)local->block[x].offset,
@@ -1223,7 +1257,7 @@
 
 /*
  * Perform a non-optimized memory unregistration after every transfer
- * for demonsration purposes, only if pin-all is not requested.
+ * for demonstration purposes, only if pin-all is not requested.
  *
  * Potential optimizations:
  * 1. Start a new thread to run this function continuously
@@ -1289,7 +1323,7 @@
         rdma->total_registrations--;
 
         reg.key.chunk = chunk;
-        register_to_network(&reg);
+        register_to_network(rdma, &reg);
         ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) &reg,
                                 &resp, NULL, NULL);
         if (ret < 0) {
@@ -1910,7 +1944,7 @@
                 trace_qemu_rdma_write_one_zero(chunk, sge.length,
                                                current_index, current_addr);
 
-                compress_to_network(&comp);
+                compress_to_network(rdma, &comp);
                 ret = qemu_rdma_exchange_send(rdma, &head,
                                 (uint8_t *) &comp, NULL, NULL, NULL);
 
@@ -1937,7 +1971,7 @@
             trace_qemu_rdma_write_one_sendreg(chunk, sge.length, current_index,
                                               current_addr);
 
-            register_to_network(&reg);
+            register_to_network(rdma, &reg);
             ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) &reg,
                                     &resp, &reg_result_idx, NULL);
             if (ret < 0) {
@@ -2198,7 +2232,7 @@
 
     if (rdma->local_ram_blocks.block) {
         while (rdma->local_ram_blocks.nb_blocks) {
-            rdma_delete_block(rdma, rdma->local_ram_blocks.block->offset);
+            rdma_delete_block(rdma, &rdma->local_ram_blocks.block[0]);
         }
     }
 
@@ -2271,6 +2305,14 @@
         goto err_rdma_source_init;
     }
 
+    /* Build the hash that maps from offset to RAMBlock */
+    rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal);
+    for (idx = 0; idx < rdma->local_ram_blocks.nb_blocks; idx++) {
+        g_hash_table_insert(rdma->blockmap,
+                (void *)(uintptr_t)rdma->local_ram_blocks.block[idx].offset,
+                &rdma->local_ram_blocks.block[idx]);
+    }
+
     for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
         ret = qemu_rdma_reg_control(rdma, idx);
         if (ret) {
@@ -2880,6 +2922,14 @@
     return ret;
 }
 
+static int dest_ram_sort_func(const void *a, const void *b)
+{
+    unsigned int a_index = ((const RDMALocalBlock *)a)->src_index;
+    unsigned int b_index = ((const RDMALocalBlock *)b)->src_index;
+
+    return (a_index < b_index) ? -1 : (a_index != b_index);
+}
+
 /*
  * During each iteration of the migration, we listen for instructions
  * by the source VM to perform dynamic page registrations before they
@@ -2889,8 +2939,7 @@
  *
  * Keep doing this until the source tells us to stop.
  */
-static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque,
-                                         uint64_t flags)
+static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque)
 {
     RDMAControlHeader reg_resp = { .len = sizeof(RDMARegisterResult),
                                .type = RDMA_CONTROL_REGISTER_RESULT,
@@ -2920,7 +2969,7 @@
     CHECK_ERROR_STATE();
 
     do {
-        trace_qemu_rdma_registration_handle_wait(flags);
+        trace_qemu_rdma_registration_handle_wait();
 
         ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_NONE);
 
@@ -2943,6 +2992,13 @@
             trace_qemu_rdma_registration_handle_compress(comp->length,
                                                          comp->block_idx,
                                                          comp->offset);
+            if (comp->block_idx >= rdma->local_ram_blocks.nb_blocks) {
+                error_report("rdma: 'compress' bad block index %u (vs %d)",
+                             (unsigned int)comp->block_idx,
+                             rdma->local_ram_blocks.nb_blocks);
+                ret = -EIO;
+                break;
+            }
             block = &(rdma->local_ram_blocks.block[comp->block_idx]);
 
             host_addr = block->local_host_addr +
@@ -2958,6 +3014,13 @@
         case RDMA_CONTROL_RAM_BLOCKS_REQUEST:
             trace_qemu_rdma_registration_handle_ram_blocks();
 
+            /* Sort our local RAM Block list so it's the same as the source,
+             * we can do this since we've filled in a src_index in the list
+             * as we received the RAMBlock list earlier.
+             */
+            qsort(rdma->local_ram_blocks.block,
+                  rdma->local_ram_blocks.nb_blocks,
+                  sizeof(RDMALocalBlock), dest_ram_sort_func);
             if (rdma->pin_all) {
                 ret = qemu_rdma_reg_whole_ram_blocks(rdma);
                 if (ret) {
@@ -2985,6 +3048,12 @@
                 rdma->dest_blocks[i].length = local->block[i].length;
 
                 dest_block_to_network(&rdma->dest_blocks[i]);
+                trace_qemu_rdma_registration_handle_ram_blocks_loop(
+                    local->block[i].block_name,
+                    local->block[i].offset,
+                    local->block[i].length,
+                    local->block[i].local_host_addr,
+                    local->block[i].src_index);
             }
 
             blocks.len = rdma->local_ram_blocks.nb_blocks
@@ -3018,8 +3087,23 @@
                 trace_qemu_rdma_registration_handle_register_loop(count,
                          reg->current_index, reg->key.current_addr, reg->chunks);
 
+                if (reg->current_index >= rdma->local_ram_blocks.nb_blocks) {
+                    error_report("rdma: 'register' bad block index %u (vs %d)",
+                                 (unsigned int)reg->current_index,
+                                 rdma->local_ram_blocks.nb_blocks);
+                    ret = -ENOENT;
+                    break;
+                }
                 block = &(rdma->local_ram_blocks.block[reg->current_index]);
                 if (block->is_ram_block) {
+                    if (block->offset > reg->key.current_addr) {
+                        error_report("rdma: bad register address for block %s"
+                            " offset: %" PRIx64 " current_addr: %" PRIx64,
+                            block->block_name, block->offset,
+                            reg->key.current_addr);
+                        ret = -ERANGE;
+                        break;
+                    }
                     host_addr = (block->local_host_addr +
                                 (reg->key.current_addr - block->offset));
                     chunk = ram_chunk_index(block->local_host_addr,
@@ -3028,6 +3112,14 @@
                     chunk = reg->key.chunk;
                     host_addr = block->local_host_addr +
                         (reg->key.chunk * (1UL << RDMA_REG_CHUNK_SHIFT));
+                    /* Check for particularly bad chunk value */
+                    if (host_addr < (void *)block->local_host_addr) {
+                        error_report("rdma: bad chunk for block %s"
+                            " chunk: %" PRIx64,
+                            block->block_name, reg->key.chunk);
+                        ret = -ERANGE;
+                        break;
+                    }
                 }
                 chunk_start = ram_chunk_start(block, chunk);
                 chunk_end = ram_chunk_end(block, chunk + reg->chunks);
@@ -3108,8 +3200,56 @@
     return ret;
 }
 
+/* Destination:
+ * Called via a ram_control_load_hook during the initial RAM load section which
+ * lists the RAMBlocks by name.  This lets us know the order of the RAMBlocks
+ * on the source.
+ * We've already built our local RAMBlock list, but not yet sent the list to
+ * the source.
+ */
+static int rdma_block_notification_handle(QEMUFileRDMA *rfile, const char *name)
+{
+    RDMAContext *rdma = rfile->rdma;
+    int curr;
+    int found = -1;
+
+    /* Find the matching RAMBlock in our local list */
+    for (curr = 0; curr < rdma->local_ram_blocks.nb_blocks; curr++) {
+        if (!strcmp(rdma->local_ram_blocks.block[curr].block_name, name)) {
+            found = curr;
+            break;
+        }
+    }
+
+    if (found == -1) {
+        error_report("RAMBlock '%s' not found on destination", name);
+        return -ENOENT;
+    }
+
+    rdma->local_ram_blocks.block[curr].src_index = rdma->next_src_index;
+    trace_rdma_block_notification_handle(name, rdma->next_src_index);
+    rdma->next_src_index++;
+
+    return 0;
+}
+
+static int rdma_load_hook(QEMUFile *f, void *opaque, uint64_t flags, void *data)
+{
+    switch (flags) {
+    case RAM_CONTROL_BLOCK_REG:
+        return rdma_block_notification_handle(opaque, data);
+
+    case RAM_CONTROL_HOOK:
+        return qemu_rdma_registration_handle(f, opaque);
+
+    default:
+        /* Shouldn't be called with any other values */
+        abort();
+    }
+}
+
 static int qemu_rdma_registration_start(QEMUFile *f, void *opaque,
-                                        uint64_t flags)
+                                        uint64_t flags, void *data)
 {
     QEMUFileRDMA *rfile = opaque;
     RDMAContext *rdma = rfile->rdma;
@@ -3128,7 +3268,7 @@
  * First, flush writes, if any.
  */
 static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
-                                       uint64_t flags)
+                                       uint64_t flags, void *data)
 {
     Error *local_err = NULL, **errp = &local_err;
     QEMUFileRDMA *rfile = opaque;
@@ -3148,7 +3288,7 @@
     if (flags == RAM_CONTROL_SETUP) {
         RDMAControlHeader resp = {.type = RDMA_CONTROL_RAM_BLOCKS_RESULT };
         RDMALocalBlocks *local = &rdma->local_ram_blocks;
-        int reg_result_idx, i, j, nb_dest_blocks;
+        int reg_result_idx, i, nb_dest_blocks;
 
         head.type = RDMA_CONTROL_RAM_BLOCKS_REQUEST;
         trace_qemu_rdma_registration_stop_ram();
@@ -3184,9 +3324,11 @@
          */
 
         if (local->nb_blocks != nb_dest_blocks) {
-            ERROR(errp, "ram blocks mismatch #1! "
+            ERROR(errp, "ram blocks mismatch (Number of blocks %d vs %d) "
                         "Your QEMU command line parameters are probably "
-                        "not identical on both the source and destination.");
+                        "not identical on both the source and destination.",
+                        local->nb_blocks, nb_dest_blocks);
+            rdma->error_state = -EINVAL;
             return -EINVAL;
         }
 
@@ -3196,30 +3338,18 @@
         for (i = 0; i < nb_dest_blocks; i++) {
             network_to_dest_block(&rdma->dest_blocks[i]);
 
-            /* search local ram blocks */
-            for (j = 0; j < local->nb_blocks; j++) {
-                if (rdma->dest_blocks[i].offset != local->block[j].offset) {
-                    continue;
-                }
-
-                if (rdma->dest_blocks[i].length != local->block[j].length) {
-                    ERROR(errp, "ram blocks mismatch #2! "
-                        "Your QEMU command line parameters are probably "
-                        "not identical on both the source and destination.");
-                    return -EINVAL;
-                }
-                local->block[j].remote_host_addr =
-                        rdma->dest_blocks[i].remote_host_addr;
-                local->block[j].remote_rkey = rdma->dest_blocks[i].remote_rkey;
-                break;
-            }
-
-            if (j >= local->nb_blocks) {
-                ERROR(errp, "ram blocks mismatch #3! "
-                        "Your QEMU command line parameters are probably "
-                        "not identical on both the source and destination.");
+            /* We require that the blocks are in the same order */
+            if (rdma->dest_blocks[i].length != local->block[i].length) {
+                ERROR(errp, "Block %s/%d has a different length %" PRIu64
+                            "vs %" PRIu64, local->block[i].block_name, i,
+                            local->block[i].length,
+                            rdma->dest_blocks[i].length);
+                rdma->error_state = -EINVAL;
                 return -EINVAL;
             }
+            local->block[i].remote_host_addr =
+                    rdma->dest_blocks[i].remote_host_addr;
+            local->block[i].remote_rkey = rdma->dest_blocks[i].remote_rkey;
         }
     }
 
@@ -3250,7 +3380,7 @@
     .get_buffer    = qemu_rdma_get_buffer,
     .get_fd        = qemu_rdma_get_fd,
     .close         = qemu_rdma_close,
-    .hook_ram_load = qemu_rdma_registration_handle,
+    .hook_ram_load = rdma_load_hook,
 };
 
 static const QEMUFileOps rdma_write_ops = {
@@ -3263,12 +3393,13 @@
 
 static void *qemu_fopen_rdma(RDMAContext *rdma, const char *mode)
 {
-    QEMUFileRDMA *r = g_malloc0(sizeof(QEMUFileRDMA));
+    QEMUFileRDMA *r;
 
     if (qemu_file_mode_is_not_valid(mode)) {
         return NULL;
     }
 
+    r = g_malloc0(sizeof(QEMUFileRDMA));
     r->rdma = rdma;
 
     if (mode[0] == 'w') {
@@ -3287,7 +3418,7 @@
     QEMUFile *f;
     Error *local_err = NULL, **errp = &local_err;
 
-    trace_qemu_dma_accept_incoming_migration();
+    trace_qemu_rdma_accept_incoming_migration();
     ret = qemu_rdma_accept(rdma);
 
     if (ret) {
@@ -3295,7 +3426,7 @@
         return;
     }
 
-    trace_qemu_dma_accept_incoming_migration_accepted();
+    trace_qemu_rdma_accept_incoming_migration_accepted();
 
     f = qemu_fopen_rdma(rdma, "rb");
     if (f == NULL) {
diff --git a/migration/savevm.c b/migration/savevm.c
index 9e0e286..86735fc 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -246,11 +246,55 @@
 typedef struct SaveState {
     QTAILQ_HEAD(, SaveStateEntry) handlers;
     int global_section_id;
+    bool skip_configuration;
+    uint32_t len;
+    const char *name;
 } SaveState;
 
 static SaveState savevm_state = {
     .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
     .global_section_id = 0,
+    .skip_configuration = false,
+};
+
+void savevm_skip_configuration(void)
+{
+    savevm_state.skip_configuration = true;
+}
+
+
+static void configuration_pre_save(void *opaque)
+{
+    SaveState *state = opaque;
+    const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
+
+    state->len = strlen(current_name);
+    state->name = current_name;
+}
+
+static int configuration_post_load(void *opaque, int version_id)
+{
+    SaveState *state = opaque;
+    const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
+
+    if (strncmp(state->name, current_name, state->len) != 0) {
+        error_report("Machine type received is '%s' and local is '%s'",
+                     state->name, current_name);
+        return -EINVAL;
+    }
+    return 0;
+}
+
+static const VMStateDescription vmstate_configuration = {
+    .name = "configuration",
+    .version_id = 1,
+    .post_load = configuration_post_load,
+    .pre_save = configuration_pre_save,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(len, SaveState),
+        VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, 0, len),
+        VMSTATE_END_OF_LIST()
+    },
 };
 
 static void dump_vmstate_vmsd(FILE *out_file,
@@ -653,41 +697,6 @@
     }
 }
 
-/*
- * Read a footer off the wire and check that it matches the expected section
- *
- * Returns: true if the footer was good
- *          false if there is a problem (and calls error_report to say why)
- */
-static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
-{
-    uint8_t read_mark;
-    uint32_t read_section_id;
-
-    if (skip_section_footers) {
-        /* No footer to check */
-        return true;
-    }
-
-    read_mark = qemu_get_byte(f);
-
-    if (read_mark != QEMU_VM_SECTION_FOOTER) {
-        error_report("Missing section footer for %s", se->idstr);
-        return false;
-    }
-
-    read_section_id = qemu_get_be32(f);
-    if (read_section_id != se->section_id) {
-        error_report("Mismatched section id in footer for %s -"
-                     " read 0x%x expected 0x%x",
-                     se->idstr, read_section_id, se->section_id);
-        return false;
-    }
-
-    /* All good */
-    return true;
-}
-
 bool qemu_savevm_state_blocked(Error **errp)
 {
     SaveStateEntry *se;
@@ -723,6 +732,11 @@
         se->ops->set_params(params, se->opaque);
     }
 
+    if (!savevm_state.skip_configuration) {
+        qemu_put_byte(f, QEMU_VM_CONFIGURATION);
+        vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0);
+    }
+
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (!se->ops || !se->ops->save_live_setup) {
             continue;
@@ -836,6 +850,11 @@
         if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
             continue;
         }
+        if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
+            trace_savevm_section_skip(se->idstr, se->section_id);
+            continue;
+        }
+
         trace_savevm_section_start(se->idstr, se->section_id);
 
         json_start_object(vmdesc, NULL);
@@ -949,6 +968,9 @@
         if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
             continue;
         }
+        if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
+            continue;
+        }
 
         save_section_header(f, se, QEMU_VM_SECTION_FULL);
 
@@ -989,6 +1011,41 @@
     int version_id;
 };
 
+/*
+ * Read a footer off the wire and check that it matches the expected section
+ *
+ * Returns: true if the footer was good
+ *          false if there is a problem (and calls error_report to say why)
+ */
+static bool check_section_footer(QEMUFile *f, LoadStateEntry *le)
+{
+    uint8_t read_mark;
+    uint32_t read_section_id;
+
+    if (skip_section_footers) {
+        /* No footer to check */
+        return true;
+    }
+
+    read_mark = qemu_get_byte(f);
+
+    if (read_mark != QEMU_VM_SECTION_FOOTER) {
+        error_report("Missing section footer for %s", le->se->idstr);
+        return false;
+    }
+
+    read_section_id = qemu_get_be32(f);
+    if (read_section_id != le->section_id) {
+        error_report("Mismatched section id in footer for %s -"
+                     " read 0x%x expected 0x%x",
+                     le->se->idstr, read_section_id, le->section_id);
+        return false;
+    }
+
+    /* All good */
+    return true;
+}
+
 void loadvm_free_handlers(MigrationIncomingState *mis)
 {
     LoadStateEntry *le, *new_le;
@@ -1029,6 +1086,18 @@
         return -ENOTSUP;
     }
 
+    if (!savevm_state.skip_configuration) {
+        if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) {
+            error_report("Configuration section missing");
+            return -EINVAL;
+        }
+        ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0);
+
+        if (ret) {
+            return ret;
+        }
+    }
+
     while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
         uint32_t instance_id, version_id, section_id;
         SaveStateEntry *se;
@@ -1082,7 +1151,7 @@
                              " device '%s'", instance_id, idstr);
                 goto out;
             }
-            if (!check_section_footer(f, le->se)) {
+            if (!check_section_footer(f, le)) {
                 ret = -EINVAL;
                 goto out;
             }
@@ -1109,7 +1178,7 @@
                              section_id, le->se->idstr);
                 goto out;
             }
-            if (!check_section_footer(f, le->se)) {
+            if (!check_section_footer(f, le)) {
                 ret = -EINVAL;
                 goto out;
             }
@@ -1127,16 +1196,35 @@
      * Try to read in the VMDESC section as well, so that dumping tools that
      * intercept our migration stream have the chance to see it.
      */
-    if (qemu_get_byte(f) == QEMU_VM_VMDESCRIPTION) {
-        uint32_t size = qemu_get_be32(f);
-        uint8_t *buf = g_malloc(0x1000);
 
-        while (size > 0) {
-            uint32_t read_chunk = MIN(size, 0x1000);
-            qemu_get_buffer(f, buf, read_chunk);
-            size -= read_chunk;
+    /* We've got to be careful; if we don't read the data and just shut the fd
+     * then the sender can error if we close while it's still sending.
+     * We also mustn't read data that isn't there; some transports (RDMA)
+     * will stall waiting for that data when the source has already closed.
+     */
+    if (should_send_vmdesc()) {
+        uint8_t *buf;
+        uint32_t size;
+        section_type = qemu_get_byte(f);
+
+        if (section_type != QEMU_VM_VMDESCRIPTION) {
+            error_report("Expected vmdescription section, but got %d",
+                         section_type);
+            /*
+             * It doesn't seem worth failing at this point since
+             * we apparently have an otherwise valid VM state
+             */
+        } else {
+            buf = g_malloc(0x1000);
+            size = qemu_get_be32(f);
+
+            while (size > 0) {
+                uint32_t read_chunk = MIN(size, 0x1000);
+                qemu_get_buffer(f, buf, read_chunk);
+                size -= read_chunk;
+            }
+            g_free(buf);
         }
-        g_free(buf);
     }
 
     cpu_synchronize_all_post_init();
diff --git a/migration/vmstate.c b/migration/vmstate.c
index 6138d1a..e8ccf22 100644
--- a/migration/vmstate.c
+++ b/migration/vmstate.c
@@ -276,6 +276,17 @@
     json_end_object(vmdesc);
 }
 
+
+bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque)
+{
+    if (vmsd->needed && !vmsd->needed(opaque)) {
+        /* optional section not needed */
+        return false;
+    }
+    return true;
+}
+
+
 void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
                         void *opaque, QJSON *vmdesc)
 {
diff --git a/pc-bios/README b/pc-bios/README
index 63e7254..05cf042 100644
--- a/pc-bios/README
+++ b/pc-bios/README
@@ -17,7 +17,7 @@
 - SLOF (Slimline Open Firmware) is a free IEEE 1275 Open Firmware
   implementation for certain IBM POWER hardware.  The sources are at
   https://github.com/aik/SLOF, and the image currently in qemu is
-  built from git tag qemu-slof-20150313.
+  built from git tag qemu-slof-20150429.
 
 - sgabios (the Serial Graphics Adapter option ROM) provides a means for
   legacy x86 software to communicate with an attached serial console as
diff --git a/pc-bios/slof.bin b/pc-bios/slof.bin
index ab72cba..0398ac6 100644
--- a/pc-bios/slof.bin
+++ b/pc-bios/slof.bin
Binary files differ
diff --git a/qapi-schema.json b/qapi-schema.json
index 106008c..1285b8c 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -523,6 +523,9 @@
 #          minimize migration traffic. The feature is disabled by default.
 #          (since 2.4 )
 #
+# @events: generate events for each migration state change
+#          (since 2.4 )
+#
 # @auto-converge: If enabled, QEMU will automatically throttle down the guest
 #          to speed up convergence of RAM migration. (since 1.6)
 #
@@ -530,7 +533,7 @@
 ##
 { 'enum': 'MigrationCapability',
   'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
-           'compress'] }
+           'compress', 'events'] }
 
 ##
 # @MigrationCapabilityStatus
diff --git a/qapi/event.json b/qapi/event.json
index 378dda5..f0cef01 100644
--- a/qapi/event.json
+++ b/qapi/event.json
@@ -243,6 +243,18 @@
 { 'event': 'SPICE_MIGRATE_COMPLETED' }
 
 ##
+# @MIGRATION
+#
+# Emitted when a migration event happens
+#
+# @status: @MigrationStatus describing the current migration status.
+#
+# Since: 2.4
+##
+{ 'event': 'MIGRATION',
+  'data': {'status': 'MigrationStatus'}}
+
+##
 # @ACPI_DEVICE_OST
 #
 # Emitted when guest executes ACPI _OST method.
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index befd00b..675f4b4 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -154,6 +154,8 @@
 
     /* If user has passed a time, validate and set it. */
     if (has_time) {
+        GDate date = { 0, };
+
         /* year-2038 will overflow in case time_t is 32bit */
         if (time_ns / 1000000000 != (time_t)(time_ns / 1000000000)) {
             error_setg(errp, "Time %" PRId64 " is too large", time_ns);
@@ -162,6 +164,11 @@
 
         tv.tv_sec = time_ns / 1000000000;
         tv.tv_usec = (time_ns % 1000000000) / 1000;
+        g_date_set_time_t(&date, tv.tv_sec);
+        if (date.year < 1970 || date.year >= 2070) {
+            error_setg_errno(errp, errno, "Invalid time");
+            return;
+        }
 
         ret = settimeofday(&tv, NULL);
         if (ret < 0) {
@@ -1325,18 +1332,18 @@
 /*
  * Walk list of mounted file systems in the guest, and trim them.
  */
-void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
+GuestFilesystemTrimResponse *
+qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
 {
+    GuestFilesystemTrimResponse *response;
+    GuestFilesystemTrimResultList *list;
+    GuestFilesystemTrimResult *result;
     int ret = 0;
     FsMountList mounts;
     struct FsMount *mount;
     int fd;
     Error *local_err = NULL;
-    struct fstrim_range r = {
-        .start = 0,
-        .len = -1,
-        .minlen = has_minimum ? minimum : 0,
-    };
+    struct fstrim_range r;
 
     slog("guest-fstrim called");
 
@@ -1344,36 +1351,59 @@
     build_fs_mount_list(&mounts, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
-        return;
+        return NULL;
     }
 
+    response = g_malloc0(sizeof(*response));
+
     QTAILQ_FOREACH(mount, &mounts, next) {
+        result = g_malloc0(sizeof(*result));
+        result->path = g_strdup(mount->dirname);
+
+        list = g_malloc0(sizeof(*list));
+        list->value = result;
+        list->next = response->paths;
+        response->paths = list;
+
         fd = qemu_open(mount->dirname, O_RDONLY);
         if (fd == -1) {
-            error_setg_errno(errp, errno, "failed to open %s", mount->dirname);
-            goto error;
+            result->error = g_strdup_printf("failed to open: %s",
+                                            strerror(errno));
+            result->has_error = true;
+            continue;
         }
 
         /* We try to cull filesytems we know won't work in advance, but other
          * filesytems may not implement fstrim for less obvious reasons.  These
-         * will report EOPNOTSUPP; we simply ignore these errors.  Any other
-         * error means an unexpected error, so return it in those cases.  In
-         * some other cases ENOTTY will be reported (e.g. CD-ROMs).
+         * will report EOPNOTSUPP; while in some other cases ENOTTY will be
+         * reported (e.g. CD-ROMs).
+         * Any other error means an unexpected error.
          */
+        r.start = 0;
+        r.len = -1;
+        r.minlen = has_minimum ? minimum : 0;
         ret = ioctl(fd, FITRIM, &r);
         if (ret == -1) {
-            if (errno != ENOTTY && errno != EOPNOTSUPP) {
-                error_setg_errno(errp, errno, "failed to trim %s",
-                                 mount->dirname);
-                close(fd);
-                goto error;
+            result->has_error = true;
+            if (errno == ENOTTY || errno == EOPNOTSUPP) {
+                result->error = g_strdup("trim not supported");
+            } else {
+                result->error = g_strdup_printf("failed to trim: %s",
+                                                strerror(errno));
             }
+            close(fd);
+            continue;
         }
+
+        result->has_minimum = true;
+        result->minimum = r.minlen;
+        result->has_trimmed = true;
+        result->trimmed = r.len;
         close(fd);
     }
 
-error:
     free_fs_mount_list(&mounts);
+    return response;
 }
 #endif /* CONFIG_FSTRIM */
 
@@ -2402,9 +2432,11 @@
 #endif /* CONFIG_FSFREEZE */
 
 #if !defined(CONFIG_FSTRIM)
-void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
+GuestFilesystemTrimResponse *
+qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
 {
     error_setg(errp, QERR_UNSUPPORTED);
+    return NULL;
 }
 #endif
 
diff --git a/qga/commands-win32.c b/qga/commands-win32.c
index fbddc8b..a7822d5 100644
--- a/qga/commands-win32.c
+++ b/qga/commands-win32.c
@@ -16,11 +16,22 @@
 #include <powrprof.h>
 #include <stdio.h>
 #include <string.h>
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#include <iptypes.h>
+#include <iphlpapi.h>
+#ifdef CONFIG_QGA_NTDDSCSI
+#include <winioctl.h>
+#include <ntddscsi.h>
+#include <setupapi.h>
+#include <initguid.h>
+#endif
 #include "qga/guest-agent-core.h"
 #include "qga/vss-win32.h"
 #include "qga-qmp-commands.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/queue.h"
+#include "qemu/host-utils.h"
 
 #ifndef SHTDN_REASON_FLAG_PLANNED
 #define SHTDN_REASON_FLAG_PLANNED 0x80000000
@@ -382,10 +393,305 @@
     QTAILQ_INIT(&guest_file_state.filehandles);
 }
 
+#ifdef CONFIG_QGA_NTDDSCSI
+
+static STORAGE_BUS_TYPE win2qemu[] = {
+    [BusTypeUnknown] = GUEST_DISK_BUS_TYPE_UNKNOWN,
+    [BusTypeScsi] = GUEST_DISK_BUS_TYPE_SCSI,
+    [BusTypeAtapi] = GUEST_DISK_BUS_TYPE_IDE,
+    [BusTypeAta] = GUEST_DISK_BUS_TYPE_IDE,
+    [BusType1394] = GUEST_DISK_BUS_TYPE_IEEE1394,
+    [BusTypeSsa] = GUEST_DISK_BUS_TYPE_SSA,
+    [BusTypeFibre] = GUEST_DISK_BUS_TYPE_SSA,
+    [BusTypeUsb] = GUEST_DISK_BUS_TYPE_USB,
+    [BusTypeRAID] = GUEST_DISK_BUS_TYPE_RAID,
+#if (_WIN32_WINNT >= 0x0600)
+    [BusTypeiScsi] = GUEST_DISK_BUS_TYPE_ISCSI,
+    [BusTypeSas] = GUEST_DISK_BUS_TYPE_SAS,
+    [BusTypeSata] = GUEST_DISK_BUS_TYPE_SATA,
+    [BusTypeSd] =  GUEST_DISK_BUS_TYPE_SD,
+    [BusTypeMmc] = GUEST_DISK_BUS_TYPE_MMC,
+#endif
+#if (_WIN32_WINNT >= 0x0601)
+    [BusTypeVirtual] = GUEST_DISK_BUS_TYPE_VIRTUAL,
+    [BusTypeFileBackedVirtual] = GUEST_DISK_BUS_TYPE_FILE_BACKED_VIRTUAL,
+#endif
+};
+
+static GuestDiskBusType find_bus_type(STORAGE_BUS_TYPE bus)
+{
+    if (bus > ARRAY_SIZE(win2qemu) || (int)bus < 0) {
+        return GUEST_DISK_BUS_TYPE_UNKNOWN;
+    }
+    return win2qemu[(int)bus];
+}
+
+DEFINE_GUID(GUID_DEVINTERFACE_VOLUME,
+        0x53f5630dL, 0xb6bf, 0x11d0, 0x94, 0xf2,
+        0x00, 0xa0, 0xc9, 0x1e, 0xfb, 0x8b);
+
+static GuestPCIAddress *get_pci_info(char *guid, Error **errp)
+{
+    HDEVINFO dev_info;
+    SP_DEVINFO_DATA dev_info_data;
+    DWORD size = 0;
+    int i;
+    char dev_name[MAX_PATH];
+    char *buffer = NULL;
+    GuestPCIAddress *pci = NULL;
+    char *name = g_strdup(&guid[4]);
+
+    if (!QueryDosDevice(name, dev_name, ARRAY_SIZE(dev_name))) {
+        error_setg_win32(errp, GetLastError(), "failed to get dos device name");
+        goto out;
+    }
+
+    dev_info = SetupDiGetClassDevs(&GUID_DEVINTERFACE_VOLUME, 0, 0,
+                                   DIGCF_PRESENT | DIGCF_DEVICEINTERFACE);
+    if (dev_info == INVALID_HANDLE_VALUE) {
+        error_setg_win32(errp, GetLastError(), "failed to get devices tree");
+        goto out;
+    }
+
+    dev_info_data.cbSize = sizeof(SP_DEVINFO_DATA);
+    for (i = 0; SetupDiEnumDeviceInfo(dev_info, i, &dev_info_data); i++) {
+        DWORD addr, bus, slot, func, dev, data, size2;
+        while (!SetupDiGetDeviceRegistryProperty(dev_info, &dev_info_data,
+                                            SPDRP_PHYSICAL_DEVICE_OBJECT_NAME,
+                                            &data, (PBYTE)buffer, size,
+                                            &size2)) {
+            size = MAX(size, size2);
+            if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
+                g_free(buffer);
+                /* Double the size to avoid problems on
+                 * W2k MBCS systems per KB 888609.
+                 * https://support.microsoft.com/en-us/kb/259695 */
+                buffer = g_malloc(size * 2);
+            } else {
+                error_setg_win32(errp, GetLastError(),
+                        "failed to get device name");
+                goto out;
+            }
+        }
+
+        if (g_strcmp0(buffer, dev_name)) {
+            continue;
+        }
+
+        /* There is no need to allocate buffer in the next functions. The size
+         * is known and ULONG according to
+         * https://support.microsoft.com/en-us/kb/253232
+         * https://msdn.microsoft.com/en-us/library/windows/hardware/ff543095(v=vs.85).aspx
+         */
+        if (!SetupDiGetDeviceRegistryProperty(dev_info, &dev_info_data,
+                   SPDRP_BUSNUMBER, &data, (PBYTE)&bus, size, NULL)) {
+            break;
+        }
+
+        /* The function retrieves the device's address. This value will be
+         * transformed into device function and number */
+        if (!SetupDiGetDeviceRegistryProperty(dev_info, &dev_info_data,
+                   SPDRP_ADDRESS, &data, (PBYTE)&addr, size, NULL)) {
+            break;
+        }
+
+        /* This call returns UINumber of DEVICE_CAPABILITIES structure.
+         * This number is typically a user-perceived slot number. */
+        if (!SetupDiGetDeviceRegistryProperty(dev_info, &dev_info_data,
+                   SPDRP_UI_NUMBER, &data, (PBYTE)&slot, size, NULL)) {
+            break;
+        }
+
+        /* SetupApi gives us the same information as driver with
+         * IoGetDeviceProperty. According to Microsoft
+         * https://support.microsoft.com/en-us/kb/253232
+         * FunctionNumber = (USHORT)((propertyAddress) & 0x0000FFFF);
+         * DeviceNumber = (USHORT)(((propertyAddress) >> 16) & 0x0000FFFF);
+         * SPDRP_ADDRESS is propertyAddress, so we do the same.*/
+
+        func = addr & 0x0000FFFF;
+        dev = (addr >> 16) & 0x0000FFFF;
+        pci = g_malloc0(sizeof(*pci));
+        pci->domain = dev;
+        pci->slot = slot;
+        pci->function = func;
+        pci->bus = bus;
+        break;
+    }
+out:
+    g_free(buffer);
+    g_free(name);
+    return pci;
+}
+
+static int get_disk_bus_type(HANDLE vol_h, Error **errp)
+{
+    STORAGE_PROPERTY_QUERY query;
+    STORAGE_DEVICE_DESCRIPTOR *dev_desc, buf;
+    DWORD received;
+
+    dev_desc = &buf;
+    dev_desc->Size = sizeof(buf);
+    query.PropertyId = StorageDeviceProperty;
+    query.QueryType = PropertyStandardQuery;
+
+    if (!DeviceIoControl(vol_h, IOCTL_STORAGE_QUERY_PROPERTY, &query,
+                         sizeof(STORAGE_PROPERTY_QUERY), dev_desc,
+                         dev_desc->Size, &received, NULL)) {
+        error_setg_win32(errp, GetLastError(), "failed to get bus type");
+        return -1;
+    }
+
+    return dev_desc->BusType;
+}
+
+/* VSS provider works with volumes, thus there is no difference if
+ * the volume consist of spanned disks. Info about the first disk in the
+ * volume is returned for the spanned disk group (LVM) */
+static GuestDiskAddressList *build_guest_disk_info(char *guid, Error **errp)
+{
+    GuestDiskAddressList *list = NULL;
+    GuestDiskAddress *disk;
+    SCSI_ADDRESS addr, *scsi_ad;
+    DWORD len;
+    int bus;
+    HANDLE vol_h;
+
+    scsi_ad = &addr;
+    char *name = g_strndup(guid, strlen(guid)-1);
+
+    vol_h = CreateFile(name, 0, FILE_SHARE_READ, NULL, OPEN_EXISTING,
+                       0, NULL);
+    if (vol_h == INVALID_HANDLE_VALUE) {
+        error_setg_win32(errp, GetLastError(), "failed to open volume");
+        goto out_free;
+    }
+
+    bus = get_disk_bus_type(vol_h, errp);
+    if (bus < 0) {
+        goto out_close;
+    }
+
+    disk = g_malloc0(sizeof(*disk));
+    disk->bus_type = find_bus_type(bus);
+    if (bus == BusTypeScsi || bus == BusTypeAta || bus == BusTypeRAID
+#if (_WIN32_WINNT >= 0x0600)
+            /* This bus type is not supported before Windows Server 2003 SP1 */
+            || bus == BusTypeSas
+#endif
+        ) {
+        /* We are able to use the same ioctls for different bus types
+         * according to Microsoft docs
+         * https://technet.microsoft.com/en-us/library/ee851589(v=ws.10).aspx */
+        if (DeviceIoControl(vol_h, IOCTL_SCSI_GET_ADDRESS, NULL, 0, scsi_ad,
+                            sizeof(SCSI_ADDRESS), &len, NULL)) {
+            disk->unit = addr.Lun;
+            disk->target = addr.TargetId;
+            disk->bus = addr.PathId;
+            disk->pci_controller = get_pci_info(name, errp);
+        }
+        /* We do not set error in this case, because we still have enough
+         * information about volume. */
+    } else {
+         disk->pci_controller = NULL;
+    }
+
+    list = g_malloc0(sizeof(*list));
+    list->value = disk;
+    list->next = NULL;
+out_close:
+    CloseHandle(vol_h);
+out_free:
+    g_free(name);
+    return list;
+}
+
+#else
+
+static GuestDiskAddressList *build_guest_disk_info(char *guid, Error **errp)
+{
+    return NULL;
+}
+
+#endif /* CONFIG_QGA_NTDDSCSI */
+
+static GuestFilesystemInfo *build_guest_fsinfo(char *guid, Error **errp)
+{
+    DWORD info_size;
+    char mnt, *mnt_point;
+    char fs_name[32];
+    char vol_info[MAX_PATH+1];
+    size_t len;
+    GuestFilesystemInfo *fs = NULL;
+
+    GetVolumePathNamesForVolumeName(guid, (LPCH)&mnt, 0, &info_size);
+    if (GetLastError() != ERROR_MORE_DATA) {
+        error_setg_win32(errp, GetLastError(), "failed to get volume name");
+        return NULL;
+    }
+
+    mnt_point = g_malloc(info_size + 1);
+    if (!GetVolumePathNamesForVolumeName(guid, mnt_point, info_size,
+                                         &info_size)) {
+        error_setg_win32(errp, GetLastError(), "failed to get volume name");
+        goto free;
+    }
+
+    len = strlen(mnt_point);
+    mnt_point[len] = '\\';
+    mnt_point[len+1] = 0;
+    if (!GetVolumeInformation(mnt_point, vol_info, sizeof(vol_info), NULL, NULL,
+                              NULL, (LPSTR)&fs_name, sizeof(fs_name))) {
+        if (GetLastError() != ERROR_NOT_READY) {
+            error_setg_win32(errp, GetLastError(), "failed to get volume info");
+        }
+        goto free;
+    }
+
+    fs_name[sizeof(fs_name) - 1] = 0;
+    fs = g_malloc(sizeof(*fs));
+    fs->name = g_strdup(guid);
+    if (len == 0) {
+        fs->mountpoint = g_strdup("System Reserved");
+    } else {
+        fs->mountpoint = g_strndup(mnt_point, len);
+    }
+    fs->type = g_strdup(fs_name);
+    fs->disk = build_guest_disk_info(guid, errp);;
+free:
+    g_free(mnt_point);
+    return fs;
+}
+
 GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp)
 {
-    error_setg(errp, QERR_UNSUPPORTED);
-    return NULL;
+    HANDLE vol_h;
+    GuestFilesystemInfoList *new, *ret = NULL;
+    char guid[256];
+
+    vol_h = FindFirstVolume(guid, sizeof(guid));
+    if (vol_h == INVALID_HANDLE_VALUE) {
+        error_setg_win32(errp, GetLastError(), "failed to find any volume");
+        return NULL;
+    }
+
+    do {
+        GuestFilesystemInfo *info = build_guest_fsinfo(guid, errp);
+        if (info == NULL) {
+            continue;
+        }
+        new = g_malloc(sizeof(*ret));
+        new->value = info;
+        new->next = ret;
+        ret = new;
+    } while (FindNextVolume(vol_h, guid, sizeof(guid)));
+
+    if (GetLastError() != ERROR_NO_MORE_FILES) {
+        error_setg_win32(errp, GetLastError(), "failed to find next volume");
+    }
+
+    FindVolumeClose(vol_h);
+    return ret;
 }
 
 /*
@@ -493,9 +799,11 @@
  * Walk list of mounted file systems in the guest, and discard unused
  * areas.
  */
-void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
+GuestFilesystemTrimResponse *
+qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
 {
     error_setg(errp, QERR_UNSUPPORTED);
+    return NULL;
 }
 
 typedef enum {
@@ -589,10 +897,218 @@
     error_setg(errp, QERR_UNSUPPORTED);
 }
 
+static IP_ADAPTER_ADDRESSES *guest_get_adapters_addresses(Error **errp)
+{
+    IP_ADAPTER_ADDRESSES *adptr_addrs = NULL;
+    ULONG adptr_addrs_len = 0;
+    DWORD ret;
+
+    /* Call the first time to get the adptr_addrs_len. */
+    GetAdaptersAddresses(AF_UNSPEC, GAA_FLAG_INCLUDE_PREFIX,
+                         NULL, adptr_addrs, &adptr_addrs_len);
+
+    adptr_addrs = g_malloc(adptr_addrs_len);
+    ret = GetAdaptersAddresses(AF_UNSPEC, GAA_FLAG_INCLUDE_PREFIX,
+                               NULL, adptr_addrs, &adptr_addrs_len);
+    if (ret != ERROR_SUCCESS) {
+        error_setg_win32(errp, ret, "failed to get adapters addresses");
+        g_free(adptr_addrs);
+        adptr_addrs = NULL;
+    }
+    return adptr_addrs;
+}
+
+static char *guest_wctomb_dup(WCHAR *wstr)
+{
+    char *str;
+    size_t i;
+
+    i = wcslen(wstr) + 1;
+    str = g_malloc(i);
+    WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK,
+                        wstr, -1, str, i, NULL, NULL);
+    return str;
+}
+
+static char *guest_addr_to_str(IP_ADAPTER_UNICAST_ADDRESS *ip_addr,
+                               Error **errp)
+{
+    char addr_str[INET6_ADDRSTRLEN + INET_ADDRSTRLEN];
+    DWORD len;
+    int ret;
+
+    if (ip_addr->Address.lpSockaddr->sa_family == AF_INET ||
+            ip_addr->Address.lpSockaddr->sa_family == AF_INET6) {
+        len = sizeof(addr_str);
+        ret = WSAAddressToString(ip_addr->Address.lpSockaddr,
+                                 ip_addr->Address.iSockaddrLength,
+                                 NULL,
+                                 addr_str,
+                                 &len);
+        if (ret != 0) {
+            error_setg_win32(errp, WSAGetLastError(),
+                "failed address presentation form conversion");
+            return NULL;
+        }
+        return g_strdup(addr_str);
+    }
+    return NULL;
+}
+
+#if (_WIN32_WINNT >= 0x0600)
+static int64_t guest_ip_prefix(IP_ADAPTER_UNICAST_ADDRESS *ip_addr)
+{
+    /* For Windows Vista/2008 and newer, use the OnLinkPrefixLength
+     * field to obtain the prefix.
+     */
+    return ip_addr->OnLinkPrefixLength;
+}
+#else
+/* When using the Windows XP and 2003 build environment, do the best we can to
+ * figure out the prefix.
+ */
+static IP_ADAPTER_INFO *guest_get_adapters_info(void)
+{
+    IP_ADAPTER_INFO *adptr_info = NULL;
+    ULONG adptr_info_len = 0;
+    DWORD ret;
+
+    /* Call the first time to get the adptr_info_len. */
+    GetAdaptersInfo(adptr_info, &adptr_info_len);
+
+    adptr_info = g_malloc(adptr_info_len);
+    ret = GetAdaptersInfo(adptr_info, &adptr_info_len);
+    if (ret != ERROR_SUCCESS) {
+        g_free(adptr_info);
+        adptr_info = NULL;
+    }
+    return adptr_info;
+}
+
+static int64_t guest_ip_prefix(IP_ADAPTER_UNICAST_ADDRESS *ip_addr)
+{
+    int64_t prefix = -1; /* Use for AF_INET6 and unknown/undetermined values. */
+    IP_ADAPTER_INFO *adptr_info, *info;
+    IP_ADDR_STRING *ip;
+    struct in_addr *p;
+
+    if (ip_addr->Address.lpSockaddr->sa_family != AF_INET) {
+        return prefix;
+    }
+    adptr_info = guest_get_adapters_info();
+    if (adptr_info == NULL) {
+        return prefix;
+    }
+
+    /* Match up the passed in ip_addr with one found in adaptr_info.
+     * The matching one in adptr_info will have the netmask.
+     */
+    p = &((struct sockaddr_in *)ip_addr->Address.lpSockaddr)->sin_addr;
+    for (info = adptr_info; info; info = info->Next) {
+        for (ip = &info->IpAddressList; ip; ip = ip->Next) {
+            if (p->S_un.S_addr == inet_addr(ip->IpAddress.String)) {
+                prefix = ctpop32(inet_addr(ip->IpMask.String));
+                goto out;
+            }
+        }
+    }
+out:
+    g_free(adptr_info);
+    return prefix;
+}
+#endif
+
 GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
 {
-    error_setg(errp, QERR_UNSUPPORTED);
-    return NULL;
+    IP_ADAPTER_ADDRESSES *adptr_addrs, *addr;
+    IP_ADAPTER_UNICAST_ADDRESS *ip_addr = NULL;
+    GuestNetworkInterfaceList *head = NULL, *cur_item = NULL;
+    GuestIpAddressList *head_addr, *cur_addr;
+    GuestNetworkInterfaceList *info;
+    GuestIpAddressList *address_item = NULL;
+    unsigned char *mac_addr;
+    char *addr_str;
+    WORD wsa_version;
+    WSADATA wsa_data;
+    int ret;
+
+    adptr_addrs = guest_get_adapters_addresses(errp);
+    if (adptr_addrs == NULL) {
+        return NULL;
+    }
+
+    /* Make WSA APIs available. */
+    wsa_version = MAKEWORD(2, 2);
+    ret = WSAStartup(wsa_version, &wsa_data);
+    if (ret != 0) {
+        error_setg_win32(errp, ret, "failed socket startup");
+        goto out;
+    }
+
+    for (addr = adptr_addrs; addr; addr = addr->Next) {
+        info = g_malloc0(sizeof(*info));
+
+        if (cur_item == NULL) {
+            head = cur_item = info;
+        } else {
+            cur_item->next = info;
+            cur_item = info;
+        }
+
+        info->value = g_malloc0(sizeof(*info->value));
+        info->value->name = guest_wctomb_dup(addr->FriendlyName);
+
+        if (addr->PhysicalAddressLength != 0) {
+            mac_addr = addr->PhysicalAddress;
+
+            info->value->hardware_address =
+                g_strdup_printf("%02x:%02x:%02x:%02x:%02x:%02x",
+                                (int) mac_addr[0], (int) mac_addr[1],
+                                (int) mac_addr[2], (int) mac_addr[3],
+                                (int) mac_addr[4], (int) mac_addr[5]);
+
+            info->value->has_hardware_address = true;
+        }
+
+        head_addr = NULL;
+        cur_addr = NULL;
+        for (ip_addr = addr->FirstUnicastAddress;
+                ip_addr;
+                ip_addr = ip_addr->Next) {
+            addr_str = guest_addr_to_str(ip_addr, errp);
+            if (addr_str == NULL) {
+                continue;
+            }
+
+            address_item = g_malloc0(sizeof(*address_item));
+
+            if (!cur_addr) {
+                head_addr = cur_addr = address_item;
+            } else {
+                cur_addr->next = address_item;
+                cur_addr = address_item;
+            }
+
+            address_item->value = g_malloc0(sizeof(*address_item->value));
+            address_item->value->ip_address = addr_str;
+            address_item->value->prefix = guest_ip_prefix(ip_addr);
+            if (ip_addr->Address.lpSockaddr->sa_family == AF_INET) {
+                address_item->value->ip_address_type =
+                    GUEST_IP_ADDRESS_TYPE_IPV4;
+            } else if (ip_addr->Address.lpSockaddr->sa_family == AF_INET6) {
+                address_item->value->ip_address_type =
+                    GUEST_IP_ADDRESS_TYPE_IPV6;
+            }
+        }
+        if (head_addr) {
+            info->value->has_ip_addresses = true;
+            info->value->ip_addresses = head_addr;
+        }
+    }
+    WSACleanup();
+out:
+    g_free(adptr_addrs);
+    return head;
 }
 
 int64_t qmp_guest_get_time(Error **errp)
@@ -707,12 +1223,12 @@
 GList *ga_command_blacklist_init(GList *blacklist)
 {
     const char *list_unsupported[] = {
-        "guest-suspend-hybrid", "guest-network-get-interfaces",
+        "guest-suspend-hybrid",
         "guest-get-vcpus", "guest-set-vcpus",
         "guest-set-user-password",
         "guest-get-memory-blocks", "guest-set-memory-blocks",
         "guest-get-memory-block-size",
-        "guest-fsfreeze-freeze-list", "guest-get-fsinfo",
+        "guest-fsfreeze-freeze-list",
         "guest-fstrim", NULL};
     char **p = (char **)list_unsupported;
 
diff --git a/qga/main.c b/qga/main.c
index 23cde01..791982e 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -274,7 +274,7 @@
 
     level &= G_LOG_LEVEL_MASK;
 #ifndef _WIN32
-    if (domain && strcmp(domain, "syslog") == 0) {
+    if (g_strcmp0(domain, "syslog") == 0) {
         syslog(LOG_INFO, "%s: %s", level_str, msg);
     } else if (level & s->log_level) {
 #else
diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
index b446dc7..8a9b818 100644
--- a/qga/qapi-schema.json
+++ b/qga/qapi-schema.json
@@ -425,6 +425,30 @@
   'returns': 'int' }
 
 ##
+# @GuestFilesystemTrimResult
+#
+# @path: path that was trimmed
+# @error: an error message when trim failed
+# @trimmed: bytes trimmed for this path
+# @minimum: reported effective minimum for this path
+#
+# Since: 2.4
+##
+{ 'struct': 'GuestFilesystemTrimResult',
+  'data': {'path': 'str',
+           '*trimmed': 'int', '*minimum': 'int', '*error': 'str'} }
+
+##
+# @GuestFilesystemTrimResponse
+#
+# @paths: list of @GuestFilesystemTrimResult per path that was trimmed
+#
+# Since: 2.4
+##
+{ 'struct': 'GuestFilesystemTrimResponse',
+  'data': {'paths': ['GuestFilesystemTrimResult']} }
+
+##
 # @guest-fstrim:
 #
 # Discard (or "trim") blocks which are not in use by the filesystem.
@@ -437,12 +461,14 @@
 #       fragmented free space, although not all blocks will be discarded.
 #       The default value is zero, meaning "discard every free block".
 #
-# Returns: Nothing.
+# Returns: A @GuestFilesystemTrimResponse which contains the
+#          status of all trimmed paths. (since 2.4)
 #
 # Since: 1.2
 ##
 { 'command': 'guest-fstrim',
-  'data': { '*minimum': 'int' } }
+  'data': { '*minimum': 'int' },
+  'returns': 'GuestFilesystemTrimResponse' }
 
 ##
 # @guest-suspend-disk
@@ -677,12 +703,24 @@
 # @uml: UML disks
 # @sata: SATA disks
 # @sd: SD cards
+# @unknown: Unknown bus type
+# @ieee1394: Win IEEE 1394 bus type
+# @ssa: Win SSA bus type
+# @fibre: Win fiber channel bus type
+# @raid: Win RAID bus type
+# @iscsi: Win iScsi bus type
+# @sas: Win serial-attaches SCSI bus type
+# @mmc: Win multimedia card (MMC) bus type
+# @virtual: Win virtual bus type
+# @file-backed virtual: Win file-backed bus type
 #
 # Since: 2.2
 ##
 { 'enum': 'GuestDiskBusType',
   'data': [ 'ide', 'fdc', 'scsi', 'virtio', 'xen', 'usb', 'uml', 'sata',
-            'sd' ] }
+            'sd', 'unknown', 'ieee1394', 'ssa', 'fibre', 'raid', 'iscsi',
+            'sas', 'mmc', 'virtual', 'file-backed-virtual' ] }
+
 
 ##
 # @GuestPCIAddress:
diff --git a/roms/SLOF b/roms/SLOF
index c89b0df..7d766a3 160000
--- a/roms/SLOF
+++ b/roms/SLOF
@@ -1 +1 @@
-Subproject commit c89b0df661c0a6bfa9ff0ed4a371f631f5ee38b0
+Subproject commit 7d766a3ac9b2474f6c7da0084d43590cbbf047bf
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index baf4220..47378d9 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -31,7 +31,7 @@
 cp_virtio() {
     from=$1
     to=$2
-    virtio=$(find "$from" -name '*virtio*h' -o -name "input.h")
+    virtio=$(find "$from" -name '*virtio*h' -o -name "input.h" -o -name "pci_regs.h")
     if [ "$virtio" ]; then
         rm -rf "$to"
         mkdir -p "$to"
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 36b07f9..b4f9461 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -286,6 +286,17 @@
     NULL, NULL, NULL, NULL,
 };
 
+static const char *cpuid_6_feature_name[] = {
+    NULL, NULL, "arat", NULL,
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL,
+};
+
 #define I486_FEATURES (CPUID_FP87 | CPUID_VME | CPUID_PSE)
 #define PENTIUM_FEATURES (I486_FEATURES | CPUID_DE | CPUID_TSC | \
           CPUID_MSR | CPUID_MCE | CPUID_CX8 | CPUID_MMX | CPUID_APIC)
@@ -341,6 +352,7 @@
           CPUID_7_0_EBX_ERMS, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
           CPUID_7_0_EBX_RDSEED */
 #define TCG_APM_FEATURES 0
+#define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT
 
 
 typedef struct FeatureWordInfo {
@@ -410,6 +422,11 @@
         .cpuid_reg = R_EAX,
         .tcg_features = 0,
     },
+    [FEAT_6_EAX] = {
+        .feat_names = cpuid_6_feature_name,
+        .cpuid_eax = 6, .cpuid_reg = R_EAX,
+        .tcg_features = TCG_6_EAX_FEATURES,
+    },
 };
 
 typedef struct X86RegisterInfo32 {
@@ -1003,6 +1020,8 @@
             CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
         .features[FEAT_8000_0001_ECX] =
             CPUID_EXT3_LAHF_LM,
+        .features[FEAT_6_EAX] =
+            CPUID_6_EAX_ARAT,
         .xlevel = 0x8000000A,
         .model_id = "Westmere E56xx/L56xx/X56xx (Nehalem-C)",
     },
@@ -1032,6 +1051,8 @@
             CPUID_EXT3_LAHF_LM,
         .features[FEAT_XSAVE] =
             CPUID_XSAVE_XSAVEOPT,
+        .features[FEAT_6_EAX] =
+            CPUID_6_EAX_ARAT,
         .xlevel = 0x8000000A,
         .model_id = "Intel Xeon E312xx (Sandy Bridge)",
     },
@@ -1064,6 +1085,8 @@
             CPUID_EXT3_LAHF_LM,
         .features[FEAT_XSAVE] =
             CPUID_XSAVE_XSAVEOPT,
+        .features[FEAT_6_EAX] =
+            CPUID_6_EAX_ARAT,
         .xlevel = 0x8000000A,
         .model_id = "Intel Xeon E3-12xx v2 (Ivy Bridge)",
     },
@@ -1098,6 +1121,8 @@
             CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID,
         .features[FEAT_XSAVE] =
             CPUID_XSAVE_XSAVEOPT,
+        .features[FEAT_6_EAX] =
+            CPUID_6_EAX_ARAT,
         .xlevel = 0x8000000A,
         .model_id = "Intel Core Processor (Haswell, no TSX)",
     },    {
@@ -1132,6 +1157,8 @@
             CPUID_7_0_EBX_RTM,
         .features[FEAT_XSAVE] =
             CPUID_XSAVE_XSAVEOPT,
+        .features[FEAT_6_EAX] =
+            CPUID_6_EAX_ARAT,
         .xlevel = 0x8000000A,
         .model_id = "Intel Core Processor (Haswell)",
     },
@@ -1168,6 +1195,8 @@
             CPUID_7_0_EBX_SMAP,
         .features[FEAT_XSAVE] =
             CPUID_XSAVE_XSAVEOPT,
+        .features[FEAT_6_EAX] =
+            CPUID_6_EAX_ARAT,
         .xlevel = 0x8000000A,
         .model_id = "Intel Core Processor (Broadwell, no TSX)",
     },
@@ -1204,6 +1233,8 @@
             CPUID_7_0_EBX_SMAP,
         .features[FEAT_XSAVE] =
             CPUID_XSAVE_XSAVEOPT,
+        .features[FEAT_6_EAX] =
+            CPUID_6_EAX_ARAT,
         .xlevel = 0x8000000A,
         .model_id = "Intel Core Processor (Broadwell)",
     },
@@ -2359,7 +2390,7 @@
         break;
     case 6:
         /* Thermal and Power Leaf */
-        *eax = 0;
+        *eax = env->features[FEAT_6_EAX];
         *ebx = 0;
         *ecx = 0;
         *edx = 0;
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index ac39291..14dced0 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -415,6 +415,7 @@
     FEAT_KVM,           /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */
     FEAT_SVM,           /* CPUID[8000_000A].EDX */
     FEAT_XSAVE,         /* CPUID[EAX=0xd,ECX=1].EAX */
+    FEAT_6_EAX,         /* CPUID[6].EAX */
     FEATURE_WORDS,
 } FeatureWord;
 
@@ -580,6 +581,8 @@
 #define CPUID_XSAVE_XGETBV1    (1U << 2)
 #define CPUID_XSAVE_XSAVES     (1U << 3)
 
+#define CPUID_6_EAX_ARAT       (1U << 2)
+
 /* CPUID[0x80000007].EDX flags: */
 #define CPUID_APM_INVTSC       (1U << 8)
 
@@ -959,7 +962,7 @@
     uint8_t has_error_code;
     uint32_t sipi_vector;
     bool tsc_valid;
-    int tsc_khz;
+    int64_t tsc_khz;
     void *kvm_xsave_buf;
 
     uint64_t mcg_cap;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 9038bf7..066d03d 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -238,6 +238,8 @@
         if (!kvm_irqchip_in_kernel()) {
             ret &= ~CPUID_EXT_X2APIC;
         }
+    } else if (function == 6 && reg == R_EAX) {
+        ret |= CPUID_6_EAX_ARAT; /* safe to allow because of emulated APIC */
     } else if (function == 0x80000001 && reg == R_EDX) {
         /* On Intel, kvm returns cpuid according to the Intel spec,
          * so add missing bits according to the AMD spec:
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index ddf469f..110436d 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -40,6 +40,7 @@
 #include "trace.h"
 #include "exec/gdbstub.h"
 #include "exec/memattrs.h"
+#include "sysemu/hostmem.h"
 
 //#define DEBUG_KVM
 
@@ -303,16 +304,11 @@
     kvm_get_fallback_smmu_info(cpu, info);
 }
 
-static long getrampagesize(void)
+static long gethugepagesize(const char *mem_path)
 {
     struct statfs fs;
     int ret;
 
-    if (!mem_path) {
-        /* guest RAM is backed by normal anonymous pages */
-        return getpagesize();
-    }
-
     do {
         ret = statfs(mem_path, &fs);
     } while (ret != 0 && errno == EINTR);
@@ -334,6 +330,55 @@
     return fs.f_bsize;
 }
 
+static int find_max_supported_pagesize(Object *obj, void *opaque)
+{
+    char *mem_path;
+    long *hpsize_min = opaque;
+
+    if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
+        mem_path = object_property_get_str(obj, "mem-path", NULL);
+        if (mem_path) {
+            long hpsize = gethugepagesize(mem_path);
+            if (hpsize < *hpsize_min) {
+                *hpsize_min = hpsize;
+            }
+        } else {
+            *hpsize_min = getpagesize();
+        }
+    }
+
+    return 0;
+}
+
+static long getrampagesize(void)
+{
+    long hpsize = LONG_MAX;
+    Object *memdev_root;
+
+    if (mem_path) {
+        return gethugepagesize(mem_path);
+    }
+
+    /* it's possible we have memory-backend objects with
+     * hugepage-backed RAM. these may get mapped into system
+     * address space via -numa parameters or memory hotplug
+     * hooks. we want to take these into account, but we
+     * also want to make sure these supported hugepage
+     * sizes are applicable across the entire range of memory
+     * we may boot from, so we take the min across all
+     * backends, and assume normal pages in cases where a
+     * backend isn't backed by hugepages.
+     */
+    memdev_root = object_resolve_path("/objects", NULL);
+    if (!memdev_root) {
+        return getpagesize();
+    }
+
+    object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
+
+    return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
+}
+
 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 {
     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
diff --git a/target-s390x/int_helper.c b/target-s390x/int_helper.c
index 2c2b3f6..a46c736 100644
--- a/target-s390x/int_helper.c
+++ b/target-s390x/int_helper.c
@@ -121,11 +121,12 @@
     return clz64(v);
 }
 
-uint64_t HELPER(cvd)(int32_t bin)
+uint64_t HELPER(cvd)(int32_t reg)
 {
     /* positive 0 */
     uint64_t dec = 0x0c;
-    int shift = 4;
+    int64_t bin = reg;
+    int shift;
 
     if (bin < 0) {
         bin = -bin;
@@ -133,9 +134,7 @@
     }
 
     for (shift = 4; (shift < 64) && bin; shift += 4) {
-        int current_number = bin % 10;
-
-        dec |= (current_number) << shift;
+        dec |= (bin % 10) << shift;
         bin /= 10;
     }
 
diff --git a/target-s390x/mem_helper.c b/target-s390x/mem_helper.c
index 3ccbeb9..6f8bd79 100644
--- a/target-s390x/mem_helper.c
+++ b/target-s390x/mem_helper.c
@@ -482,6 +482,7 @@
         case 0xc00:
             helper_tr(env, l, get_address(env, 0, b1, d1),
                       get_address(env, 0, b2, d2));
+            break;
         case 0xd00:
             cc = helper_trt(env, l, get_address(env, 0, b1, d1),
                             get_address(env, 0, b2, d2));
@@ -550,7 +551,7 @@
     uint64_t dest = get_address_31fix(env, r1);
     uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
     uint64_t src = get_address_31fix(env, r2);
-    uint8_t pad = src >> 24;
+    uint8_t pad = env->regs[r2 + 1] >> 24;
     uint8_t v;
     uint32_t cc;
 
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 669fafe..921991e 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -1643,8 +1643,10 @@
 
     base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
 
-    label_ptr = s->code_ptr + 1;
-    tcg_out_insn(s, RI, BRC, S390_CC_NE, 0);
+    /* We need to keep the offset unchanged for retranslation.  */
+    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
+    label_ptr = s->code_ptr;
+    s->code_ptr += 1;
 
     tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
 
@@ -1669,8 +1671,10 @@
 
     base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
 
-    label_ptr = s->code_ptr + 1;
-    tcg_out_insn(s, RI, BRC, S390_CC_NE, 0);
+    /* We need to keep the offset unchanged for retranslation.  */
+    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
+    label_ptr = s->code_ptr;
+    s->code_ptr += 1;
 
     tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
 
diff --git a/tests/Makefile b/tests/Makefile
index 09838ac..2c4b8dc 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -137,6 +137,9 @@
 gcov-files-pci-y += hw/display/vga.c
 gcov-files-pci-y += hw/display/cirrus_vga.c
 gcov-files-pci-y += hw/display/vga-pci.c
+gcov-files-pci-y += hw/display/virtio-gpu.c
+gcov-files-pci-y += hw/display/virtio-gpu-pci.c
+gcov-files-pci-$(CONFIG_VIRTIO_VGA) += hw/display/virtio-vga.c
 check-qtest-pci-y += tests/intel-hda-test$(EXESUF)
 gcov-files-pci-y += hw/audio/intel-hda.c hw/audio/hda-codec.c
 
@@ -154,6 +157,7 @@
 check-qtest-i386-y += tests/fw_cfg-test$(EXESUF)
 check-qtest-i386-y += tests/drive_del-test$(EXESUF)
 check-qtest-i386-y += tests/wdt_ib700-test$(EXESUF)
+check-qtest-i386-y += tests/tco-test$(EXESUF)
 gcov-files-i386-y += hw/watchdog/watchdog.c hw/watchdog/wdt_ib700.c
 check-qtest-i386-y += $(check-qtest-pci-y)
 gcov-files-i386-y += $(gcov-files-pci-y)
@@ -374,6 +378,7 @@
 tests/vmxnet3-test$(EXESUF): tests/vmxnet3-test.o
 tests/ne2000-test$(EXESUF): tests/ne2000-test.o
 tests/wdt_ib700-test$(EXESUF): tests/wdt_ib700-test.o
+tests/tco-test$(EXESUF): tests/tco-test.o $(libqos-pc-obj-y)
 tests/virtio-balloon-test$(EXESUF): tests/virtio-balloon-test.o
 tests/virtio-blk-test$(EXESUF): tests/virtio-blk-test.o $(libqos-virtio-obj-y)
 tests/virtio-net-test$(EXESUF): tests/virtio-net-test.o $(libqos-pc-obj-y)
diff --git a/tests/display-vga-test.c b/tests/display-vga-test.c
index 17f5910..7694344 100644
--- a/tests/display-vga-test.c
+++ b/tests/display-vga-test.c
@@ -36,6 +36,20 @@
     qtest_end();
 }
 
+static void pci_virtio_gpu(void)
+{
+    qtest_start("-vga none -device virtio-gpu-pci");
+    qtest_end();
+}
+
+#ifdef CONFIG_VIRTIO_VGA
+static void pci_virtio_vga(void)
+{
+    qtest_start("-vga none -device virtio-vga");
+    qtest_end();
+}
+#endif
+
 int main(int argc, char **argv)
 {
     int ret;
@@ -46,6 +60,10 @@
     qtest_add_func("/display/pci/stdvga", pci_stdvga);
     qtest_add_func("/display/pci/secondary", pci_secondary);
     qtest_add_func("/display/pci/multihead", pci_multihead);
+    qtest_add_func("/display/pci/virtio-gpu", pci_virtio_gpu);
+#ifdef CONFIG_VIRTIO_VGA
+    qtest_add_func("/display/pci/virtio-vga", pci_virtio_vga);
+#endif
     ret = g_test_run();
 
     return ret;
diff --git a/tests/libqos/ahci.c b/tests/libqos/ahci.c
index 33ecd2a..cf66b3e 100644
--- a/tests/libqos/ahci.c
+++ b/tests/libqos/ahci.c
@@ -545,16 +545,18 @@
     ahci->port[port].prdtl[slot] = 0;
 }
 
-void ahci_write_fis(AHCIQState *ahci, RegH2DFIS *fis, uint64_t addr)
+void ahci_write_fis(AHCIQState *ahci, AHCICommand *cmd)
 {
-    RegH2DFIS tmp = *fis;
+    RegH2DFIS tmp = cmd->fis;
+    uint64_t addr = cmd->header.ctba;
 
-    /* The auxiliary FIS fields are defined per-command and are not
-     * currently implemented in libqos/ahci.o, but may or may not need
-     * to be flipped. */
-
-    /* All other FIS fields are 8 bit and do not need to be flipped. */
-    tmp.count = cpu_to_le16(tmp.count);
+    /* NCQ commands use exclusively 8 bit fields and needs no adjustment.
+     * Only the count field needs to be adjusted for non-NCQ commands.
+     * The auxiliary FIS fields are defined per-command and are not currently
+     * implemented in libqos/ahci.o, but may or may not need to be flipped. */
+    if (!cmd->props->ncq) {
+        tmp.count = cpu_to_le16(tmp.count);
+    }
 
     memwrite(addr, &tmp, sizeof(tmp));
 }
@@ -877,7 +879,7 @@
 
     /* Commit the command header and command FIS */
     ahci_set_command_header(ahci, port, cmd->slot, &(cmd->header));
-    ahci_write_fis(ahci, &(cmd->fis), table_ptr);
+    ahci_write_fis(ahci, cmd);
 
     /* Construct and write the PRDs to the command table */
     g_assert_cmphex(prdtl, ==, cmd->header.prdtl);
diff --git a/tests/libqos/ahci.h b/tests/libqos/ahci.h
index a08a9dd..cffc2c3 100644
--- a/tests/libqos/ahci.h
+++ b/tests/libqos/ahci.h
@@ -548,7 +548,7 @@
 void ahci_set_command_header(AHCIQState *ahci, uint8_t port,
                              uint8_t slot, AHCICommandHeader *cmd);
 void ahci_destroy_command(AHCIQState *ahci, uint8_t port, uint8_t slot);
-void ahci_write_fis(AHCIQState *ahci, RegH2DFIS *fis, uint64_t addr);
+void ahci_write_fis(AHCIQState *ahci, AHCICommand *cmd);
 unsigned ahci_pick_cmd(AHCIQState *ahci, uint8_t port);
 unsigned size_to_prdtl(unsigned bytes, unsigned bytes_per_prd);
 void ahci_guest_io(AHCIQState *ahci, uint8_t port, uint8_t ide_cmd,
diff --git a/tests/rocker/bridge-vlan b/tests/rocker/bridge-vlan
index ef9e5f5..897d82c 100755
--- a/tests/rocker/bridge-vlan
+++ b/tests/rocker/bridge-vlan
@@ -20,8 +20,8 @@
 
 # add both ports to VLAN 57
 
-simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1 master self"
-simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2 master self"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2"
 
 # turn off learning and flooding in SW
 
diff --git a/tests/rocker/bridge-vlan-stp b/tests/rocker/bridge-vlan-stp
index c660312..85d2646 100755
--- a/tests/rocker/bridge-vlan-stp
+++ b/tests/rocker/bridge-vlan-stp
@@ -21,8 +21,8 @@
 
 # add both ports to VLAN 57
 
-simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1 master self"
-simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2 master self"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2"
 
 # turn off learning and flooding in SW
 
diff --git a/tests/tco-test.c b/tests/tco-test.c
new file mode 100644
index 0000000..419f7cf
--- /dev/null
+++ b/tests/tco-test.c
@@ -0,0 +1,465 @@
+/*
+ * QEMU ICH9 TCO emulation tests
+ *
+ * Copyright (c) 2015 Paulo Alcantara <pcacjr@zytor.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include <glib.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "libqtest.h"
+#include "libqos/pci.h"
+#include "libqos/pci-pc.h"
+#include "hw/pci/pci_regs.h"
+#include "hw/i386/ich9.h"
+#include "hw/acpi/ich9.h"
+#include "hw/acpi/tco.h"
+
+#define RCBA_BASE_ADDR    0xfed1c000
+#define PM_IO_BASE_ADDR   0xb000
+
+enum {
+    TCO_RLD_DEFAULT         = 0x0000,
+    TCO_DAT_IN_DEFAULT      = 0x00,
+    TCO_DAT_OUT_DEFAULT     = 0x00,
+    TCO1_STS_DEFAULT        = 0x0000,
+    TCO2_STS_DEFAULT        = 0x0000,
+    TCO1_CNT_DEFAULT        = 0x0000,
+    TCO2_CNT_DEFAULT        = 0x0008,
+    TCO_MESSAGE1_DEFAULT    = 0x00,
+    TCO_MESSAGE2_DEFAULT    = 0x00,
+    TCO_WDCNT_DEFAULT       = 0x00,
+    TCO_TMR_DEFAULT         = 0x0004,
+    SW_IRQ_GEN_DEFAULT      = 0x03,
+};
+
+#define TCO_SECS_TO_TICKS(secs)     (((secs) * 10) / 6)
+#define TCO_TICKS_TO_SECS(ticks)    (((ticks) * 6) / 10)
+
+typedef struct {
+    const char *args;
+    bool noreboot;
+    QPCIDevice *dev;
+    void *tco_io_base;
+} TestData;
+
+static void test_init(TestData *d)
+{
+    QPCIBus *bus;
+    QTestState *qs;
+    char *s;
+
+    s = g_strdup_printf("-machine q35 %s %s",
+                        d->noreboot ? "" : "-global ICH9-LPC.noreboot=false",
+                        !d->args ? "" : d->args);
+    qs = qtest_start(s);
+    qtest_irq_intercept_in(qs, "ioapic");
+    g_free(s);
+
+    bus = qpci_init_pc();
+    d->dev = qpci_device_find(bus, QPCI_DEVFN(0x1f, 0x00));
+    g_assert(d->dev != NULL);
+
+    qpci_device_enable(d->dev);
+
+    /* set ACPI PM I/O space base address */
+    qpci_config_writel(d->dev, ICH9_LPC_PMBASE, PM_IO_BASE_ADDR | 0x1);
+    /* enable ACPI I/O */
+    qpci_config_writeb(d->dev, ICH9_LPC_ACPI_CTRL, 0x80);
+    /* set Root Complex BAR */
+    qpci_config_writel(d->dev, ICH9_LPC_RCBA, RCBA_BASE_ADDR | 0x1);
+
+    d->tco_io_base = (void *)((uintptr_t)PM_IO_BASE_ADDR + 0x60);
+}
+
+static void stop_tco(const TestData *d)
+{
+    uint32_t val;
+
+    val = qpci_io_readw(d->dev, d->tco_io_base + TCO1_CNT);
+    val |= TCO_TMR_HLT;
+    qpci_io_writew(d->dev, d->tco_io_base + TCO1_CNT, val);
+}
+
+static void start_tco(const TestData *d)
+{
+    uint32_t val;
+
+    val = qpci_io_readw(d->dev, d->tco_io_base + TCO1_CNT);
+    val &= ~TCO_TMR_HLT;
+    qpci_io_writew(d->dev, d->tco_io_base + TCO1_CNT, val);
+}
+
+static void load_tco(const TestData *d)
+{
+    qpci_io_writew(d->dev, d->tco_io_base + TCO_RLD, 4);
+}
+
+static void set_tco_timeout(const TestData *d, uint16_t ticks)
+{
+    qpci_io_writew(d->dev, d->tco_io_base + TCO_TMR, ticks);
+}
+
+static void clear_tco_status(const TestData *d)
+{
+    qpci_io_writew(d->dev, d->tco_io_base + TCO1_STS, 0x0008);
+    qpci_io_writew(d->dev, d->tco_io_base + TCO2_STS, 0x0002);
+    qpci_io_writew(d->dev, d->tco_io_base + TCO2_STS, 0x0004);
+}
+
+static void reset_on_second_timeout(bool enable)
+{
+    uint32_t val;
+
+    val = readl(RCBA_BASE_ADDR + ICH9_CC_GCS);
+    if (enable) {
+        val &= ~ICH9_CC_GCS_NO_REBOOT;
+    } else {
+        val |= ICH9_CC_GCS_NO_REBOOT;
+    }
+    writel(RCBA_BASE_ADDR + ICH9_CC_GCS, val);
+}
+
+static void test_tco_defaults(void)
+{
+    TestData d;
+
+    d.args = NULL;
+    d.noreboot = true;
+    test_init(&d);
+    g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_base + TCO_RLD), ==,
+                    TCO_RLD_DEFAULT);
+    /* TCO_DAT_IN & TCO_DAT_OUT */
+    g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_base + TCO_DAT_IN), ==,
+                    (TCO_DAT_OUT_DEFAULT << 8) | TCO_DAT_IN_DEFAULT);
+    /* TCO1_STS & TCO2_STS */
+    g_assert_cmpint(qpci_io_readl(d.dev, d.tco_io_base + TCO1_STS), ==,
+                    (TCO2_STS_DEFAULT << 16) | TCO1_STS_DEFAULT);
+    /* TCO1_CNT & TCO2_CNT */
+    g_assert_cmpint(qpci_io_readl(d.dev, d.tco_io_base + TCO1_CNT), ==,
+                    (TCO2_CNT_DEFAULT << 16) | TCO1_CNT_DEFAULT);
+    /* TCO_MESSAGE1 & TCO_MESSAGE2 */
+    g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_base + TCO_MESSAGE1), ==,
+                    (TCO_MESSAGE2_DEFAULT << 8) | TCO_MESSAGE1_DEFAULT);
+    g_assert_cmpint(qpci_io_readb(d.dev, d.tco_io_base + TCO_WDCNT), ==,
+                    TCO_WDCNT_DEFAULT);
+    g_assert_cmpint(qpci_io_readb(d.dev, d.tco_io_base + SW_IRQ_GEN), ==,
+                    SW_IRQ_GEN_DEFAULT);
+    g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_base + TCO_TMR), ==,
+                    TCO_TMR_DEFAULT);
+    qtest_end();
+}
+
+static void test_tco_timeout(void)
+{
+    TestData d;
+    const uint16_t ticks = TCO_SECS_TO_TICKS(4);
+    uint32_t val;
+    int ret;
+
+    d.args = NULL;
+    d.noreboot = true;
+    test_init(&d);
+
+    stop_tco(&d);
+    clear_tco_status(&d);
+    reset_on_second_timeout(false);
+    set_tco_timeout(&d, ticks);
+    load_tco(&d);
+    start_tco(&d);
+    clock_step(ticks * TCO_TICK_NSEC);
+
+    /* test first timeout */
+    val = qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS);
+    ret = val & TCO_TIMEOUT ? 1 : 0;
+    g_assert(ret == 1);
+
+    /* test clearing timeout bit */
+    val |= TCO_TIMEOUT;
+    qpci_io_writew(d.dev, d.tco_io_base + TCO1_STS, val);
+    val = qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS);
+    ret = val & TCO_TIMEOUT ? 1 : 0;
+    g_assert(ret == 0);
+
+    /* test second timeout */
+    clock_step(ticks * TCO_TICK_NSEC);
+    val = qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS);
+    ret = val & TCO_TIMEOUT ? 1 : 0;
+    g_assert(ret == 1);
+    val = qpci_io_readw(d.dev, d.tco_io_base + TCO2_STS);
+    ret = val & TCO_SECOND_TO_STS ? 1 : 0;
+    g_assert(ret == 1);
+
+    stop_tco(&d);
+    qtest_end();
+}
+
+static void test_tco_max_timeout(void)
+{
+    TestData d;
+    const uint16_t ticks = 0xffff;
+    uint32_t val;
+    int ret;
+
+    d.args = NULL;
+    d.noreboot = true;
+    test_init(&d);
+
+    stop_tco(&d);
+    clear_tco_status(&d);
+    reset_on_second_timeout(false);
+    set_tco_timeout(&d, ticks);
+    load_tco(&d);
+    start_tco(&d);
+    clock_step(((ticks & TCO_TMR_MASK) - 1) * TCO_TICK_NSEC);
+
+    val = qpci_io_readw(d.dev, d.tco_io_base + TCO_RLD);
+    g_assert_cmpint(val & TCO_RLD_MASK, ==, 1);
+    val = qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS);
+    ret = val & TCO_TIMEOUT ? 1 : 0;
+    g_assert(ret == 0);
+    clock_step(TCO_TICK_NSEC);
+    val = qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS);
+    ret = val & TCO_TIMEOUT ? 1 : 0;
+    g_assert(ret == 1);
+
+    stop_tco(&d);
+    qtest_end();
+}
+
+static QDict *get_watchdog_action(void)
+{
+    QDict *ev = qmp("");
+    QDict *data;
+    g_assert(!strcmp(qdict_get_str(ev, "event"), "WATCHDOG"));
+
+    data = qdict_get_qdict(ev, "data");
+    QINCREF(data);
+    QDECREF(ev);
+    return data;
+}
+
+static void test_tco_second_timeout_pause(void)
+{
+    TestData td;
+    const uint16_t ticks = TCO_SECS_TO_TICKS(32);
+    QDict *ad;
+
+    td.args = "-watchdog-action pause";
+    td.noreboot = false;
+    test_init(&td);
+
+    stop_tco(&td);
+    clear_tco_status(&td);
+    reset_on_second_timeout(true);
+    set_tco_timeout(&td, TCO_SECS_TO_TICKS(16));
+    load_tco(&td);
+    start_tco(&td);
+    clock_step(ticks * TCO_TICK_NSEC * 2);
+    ad = get_watchdog_action();
+    g_assert(!strcmp(qdict_get_str(ad, "action"), "pause"));
+    QDECREF(ad);
+
+    stop_tco(&td);
+    qtest_end();
+}
+
+static void test_tco_second_timeout_reset(void)
+{
+    TestData td;
+    const uint16_t ticks = TCO_SECS_TO_TICKS(16);
+    QDict *ad;
+
+    td.args = "-watchdog-action reset";
+    td.noreboot = false;
+    test_init(&td);
+
+    stop_tco(&td);
+    clear_tco_status(&td);
+    reset_on_second_timeout(true);
+    set_tco_timeout(&td, TCO_SECS_TO_TICKS(16));
+    load_tco(&td);
+    start_tco(&td);
+    clock_step(ticks * TCO_TICK_NSEC * 2);
+    ad = get_watchdog_action();
+    g_assert(!strcmp(qdict_get_str(ad, "action"), "reset"));
+    QDECREF(ad);
+
+    stop_tco(&td);
+    qtest_end();
+}
+
+static void test_tco_second_timeout_shutdown(void)
+{
+    TestData td;
+    const uint16_t ticks = TCO_SECS_TO_TICKS(128);
+    QDict *ad;
+
+    td.args = "-watchdog-action shutdown";
+    td.noreboot = false;
+    test_init(&td);
+
+    stop_tco(&td);
+    clear_tco_status(&td);
+    reset_on_second_timeout(true);
+    set_tco_timeout(&td, ticks);
+    load_tco(&td);
+    start_tco(&td);
+    clock_step(ticks * TCO_TICK_NSEC * 2);
+    ad = get_watchdog_action();
+    g_assert(!strcmp(qdict_get_str(ad, "action"), "shutdown"));
+    QDECREF(ad);
+
+    stop_tco(&td);
+    qtest_end();
+}
+
+static void test_tco_second_timeout_none(void)
+{
+    TestData td;
+    const uint16_t ticks = TCO_SECS_TO_TICKS(256);
+    QDict *ad;
+
+    td.args = "-watchdog-action none";
+    td.noreboot = false;
+    test_init(&td);
+
+    stop_tco(&td);
+    clear_tco_status(&td);
+    reset_on_second_timeout(true);
+    set_tco_timeout(&td, ticks);
+    load_tco(&td);
+    start_tco(&td);
+    clock_step(ticks * TCO_TICK_NSEC * 2);
+    ad = get_watchdog_action();
+    g_assert(!strcmp(qdict_get_str(ad, "action"), "none"));
+    QDECREF(ad);
+
+    stop_tco(&td);
+    qtest_end();
+}
+
+static void test_tco_ticks_counter(void)
+{
+    TestData d;
+    uint16_t ticks = TCO_SECS_TO_TICKS(8);
+    uint16_t rld;
+
+    d.args = NULL;
+    d.noreboot = true;
+    test_init(&d);
+
+    stop_tco(&d);
+    clear_tco_status(&d);
+    reset_on_second_timeout(false);
+    set_tco_timeout(&d, ticks);
+    load_tco(&d);
+    start_tco(&d);
+
+    do {
+        rld = qpci_io_readw(d.dev, d.tco_io_base + TCO_RLD) & TCO_RLD_MASK;
+        g_assert_cmpint(rld, ==, ticks);
+        clock_step(TCO_TICK_NSEC);
+        ticks--;
+    } while (!(qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS) & TCO_TIMEOUT));
+
+    stop_tco(&d);
+    qtest_end();
+}
+
+static void test_tco1_control_bits(void)
+{
+    TestData d;
+    uint16_t val;
+
+    d.args = NULL;
+    d.noreboot = true;
+    test_init(&d);
+
+    val = TCO_LOCK;
+    qpci_io_writew(d.dev, d.tco_io_base + TCO1_CNT, val);
+    val &= ~TCO_LOCK;
+    qpci_io_writew(d.dev, d.tco_io_base + TCO1_CNT, val);
+    g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_base + TCO1_CNT), ==,
+                    TCO_LOCK);
+    qtest_end();
+}
+
+static void test_tco1_status_bits(void)
+{
+    TestData d;
+    uint16_t ticks = 8;
+    uint16_t val;
+    int ret;
+
+    d.args = NULL;
+    d.noreboot = true;
+    test_init(&d);
+
+    stop_tco(&d);
+    clear_tco_status(&d);
+    reset_on_second_timeout(false);
+    set_tco_timeout(&d, ticks);
+    load_tco(&d);
+    start_tco(&d);
+    clock_step(ticks * TCO_TICK_NSEC);
+
+    qpci_io_writeb(d.dev, d.tco_io_base + TCO_DAT_IN, 0);
+    qpci_io_writeb(d.dev, d.tco_io_base + TCO_DAT_OUT, 0);
+    val = qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS);
+    ret = val & (TCO_TIMEOUT | SW_TCO_SMI | TCO_INT_STS) ? 1 : 0;
+    g_assert(ret == 1);
+    qpci_io_writew(d.dev, d.tco_io_base + TCO1_STS, val);
+    g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS), ==, 0);
+    qtest_end();
+}
+
+static void test_tco2_status_bits(void)
+{
+    TestData d;
+    uint16_t ticks = 8;
+    uint16_t val;
+    int ret;
+
+    d.args = NULL;
+    d.noreboot = true;
+    test_init(&d);
+
+    stop_tco(&d);
+    clear_tco_status(&d);
+    reset_on_second_timeout(true);
+    set_tco_timeout(&d, ticks);
+    load_tco(&d);
+    start_tco(&d);
+    clock_step(ticks * TCO_TICK_NSEC * 2);
+
+    val = qpci_io_readw(d.dev, d.tco_io_base + TCO2_STS);
+    ret = val & (TCO_SECOND_TO_STS | TCO_BOOT_STS) ? 1 : 0;
+    g_assert(ret == 1);
+    qpci_io_writew(d.dev, d.tco_io_base + TCO2_STS, val);
+    g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_base + TCO2_STS), ==, 0);
+    qtest_end();
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+
+    qtest_add_func("tco/defaults", test_tco_defaults);
+    qtest_add_func("tco/timeout/no_action", test_tco_timeout);
+    qtest_add_func("tco/timeout/no_action/max", test_tco_max_timeout);
+    qtest_add_func("tco/second_timeout/pause", test_tco_second_timeout_pause);
+    qtest_add_func("tco/second_timeout/reset", test_tco_second_timeout_reset);
+    qtest_add_func("tco/second_timeout/shutdown",
+                   test_tco_second_timeout_shutdown);
+    qtest_add_func("tco/second_timeout/none", test_tco_second_timeout_none);
+    qtest_add_func("tco/counter", test_tco_ticks_counter);
+    qtest_add_func("tco/tco1_control/bits", test_tco1_control_bits);
+    qtest_add_func("tco/tco1_status/bits", test_tco1_status_bits);
+    qtest_add_func("tco/tco2_status/bits", test_tco2_status_bits);
+    return g_test_run();
+}
diff --git a/trace-events b/trace-events
index 52b7efa..2d395c5 100644
--- a/trace-events
+++ b/trace-events
@@ -1191,6 +1191,7 @@
 qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u"
 savevm_section_start(const char *id, unsigned int section_id) "%s, section_id %u"
 savevm_section_end(const char *id, unsigned int section_id, int ret) "%s, section_id %u -> %d"
+savevm_section_skip(const char *id, unsigned int section_id) "%s, section_id %u"
 savevm_state_begin(void) ""
 savevm_state_header(void) ""
 savevm_state_iterate(void) ""
@@ -1403,10 +1404,13 @@
 migrate_fd_cancel(void) ""
 migrate_pending(uint64_t size, uint64_t max) "pending size %" PRIu64 " max %" PRIu64
 migrate_transferred(uint64_t tranferred, uint64_t time_spent, double bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %g max_size %" PRId64
+migrate_state_too_big(void) ""
+migrate_global_state_post_load(const char *state) "loaded state: %s"
+migrate_global_state_pre_save(const char *state) "saved state: %s"
 
 # migration/rdma.c
-qemu_dma_accept_incoming_migration(void) ""
-qemu_dma_accept_incoming_migration_accepted(void) ""
+qemu_rdma_accept_incoming_migration(void) ""
+qemu_rdma_accept_incoming_migration_accepted(void) ""
 qemu_rdma_accept_pin_state(bool pin) "%d"
 qemu_rdma_accept_pin_verbsc(void *verbs) "Verbs context after listen: %p"
 qemu_rdma_block_for_wrid_miss(const char *wcompstr, int wcomp, const char *gcompstr, uint64_t req) "A Wanted wrid %s (%d) but got %s (%" PRIu64 ")"
@@ -1433,13 +1437,14 @@
 qemu_rdma_registration_handle_compress(int64_t length, int index, int64_t offset) "Zapping zero chunk: %" PRId64 " bytes, index %d, offset %" PRId64
 qemu_rdma_registration_handle_finished(void) ""
 qemu_rdma_registration_handle_ram_blocks(void) ""
+qemu_rdma_registration_handle_ram_blocks_loop(const char *name, uint64_t offset, uint64_t length, void *local_host_addr, unsigned int src_index) "%s: @%" PRIx64 "/%" PRIu64 " host:@%p src_index: %u"
 qemu_rdma_registration_handle_register(int requests) "%d requests"
 qemu_rdma_registration_handle_register_loop(int req, int index, uint64_t addr, uint64_t chunks) "Registration request (%d): index %d, current_addr %" PRIu64 " chunks: %" PRIu64
 qemu_rdma_registration_handle_register_rkey(int rkey) "%x"
 qemu_rdma_registration_handle_unregister(int requests) "%d requests"
 qemu_rdma_registration_handle_unregister_loop(int count, int index, uint64_t chunk) "Unregistration request (%d): index %d, chunk %" PRIu64
 qemu_rdma_registration_handle_unregister_success(uint64_t chunk) "%" PRIu64
-qemu_rdma_registration_handle_wait(uint64_t flags) "Waiting for next request %" PRIu64
+qemu_rdma_registration_handle_wait(void) ""
 qemu_rdma_registration_start(uint64_t flags) "%" PRIu64
 qemu_rdma_registration_stop(uint64_t flags) "%" PRIu64
 qemu_rdma_registration_stop_ram(void) ""
@@ -1458,8 +1463,9 @@
 qemu_rdma_write_one_sendreg(uint64_t chunk, int len, int index, int64_t offset) "Sending registration request chunk %" PRIu64 " for %d bytes, index: %d, offset: %" PRId64
 qemu_rdma_write_one_top(uint64_t chunks, uint64_t size) "Writing %" PRIu64 " chunks, (%" PRIu64 " MB)"
 qemu_rdma_write_one_zero(uint64_t chunk, int len, int index, int64_t offset) "Entire chunk is zero, sending compress: %" PRIu64 " for %d bytes, index: %d, offset: %" PRId64
-rdma_add_block(int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Added Block: %d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d"
-rdma_delete_block(int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Deleted Block: %d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d"
+rdma_add_block(const char *block_name, int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Added Block: '%s':%d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d"
+rdma_block_notification_handle(const char *name, int index) "%s at %d"
+rdma_delete_block(void *block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Deleted Block: %p, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d"
 rdma_start_incoming_migration(void) ""
 rdma_start_incoming_migration_after_dest_init(void) ""
 rdma_start_incoming_migration_after_rdma_listen(void) ""
@@ -1594,6 +1600,7 @@
 vfio_platform_intp_inject_pending_lockheld(int pin, int fd) "Inject pending IRQ #%d (fd = %d)"
 vfio_platform_populate_interrupts(int pin, int count, int flags) "- IRQ index %d: count %d, flags=0x%x"
 vfio_intp_interrupt_set_pending(int index) "irq %d is set PENDING"
+vfio_platform_start_irqfd_injection(int index, int fd, int resamplefd) "IRQ index=%d, fd = %d, resamplefd = %d"
 
 #hw/acpi/memory_hotplug.c
 mhp_acpi_invalid_slot_selected(uint32_t slot) "0x%"PRIx32
diff --git a/translate-all.c b/translate-all.c
index 412bc90..50d53fd 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -118,6 +118,7 @@
 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
 
 uintptr_t qemu_real_host_page_size;
+uintptr_t qemu_real_host_page_mask;
 uintptr_t qemu_host_page_size;
 uintptr_t qemu_host_page_mask;
 
@@ -307,6 +308,7 @@
     /* NOTE: we can always suppose that qemu_host_page_size >=
        TARGET_PAGE_SIZE */
     qemu_real_host_page_size = getpagesize();
+    qemu_real_host_page_mask = ~(qemu_real_host_page_size - 1);
     if (qemu_host_page_size == 0) {
         qemu_host_page_size = qemu_real_host_page_size;
     }
diff --git a/vl.c b/vl.c
index 3eea5c4..3f269dc 100644
--- a/vl.c
+++ b/vl.c
@@ -574,8 +574,14 @@
     { RUN_STATE_DEBUG, RUN_STATE_RUNNING },
     { RUN_STATE_DEBUG, RUN_STATE_FINISH_MIGRATE },
 
-    { RUN_STATE_INMIGRATE, RUN_STATE_RUNNING },
+    { RUN_STATE_INMIGRATE, RUN_STATE_INTERNAL_ERROR },
+    { RUN_STATE_INMIGRATE, RUN_STATE_IO_ERROR },
     { RUN_STATE_INMIGRATE, RUN_STATE_PAUSED },
+    { RUN_STATE_INMIGRATE, RUN_STATE_RUNNING },
+    { RUN_STATE_INMIGRATE, RUN_STATE_SHUTDOWN },
+    { RUN_STATE_INMIGRATE, RUN_STATE_SUSPENDED },
+    { RUN_STATE_INMIGRATE, RUN_STATE_WATCHDOG },
+    { RUN_STATE_INMIGRATE, RUN_STATE_GUEST_PANICKED },
 
     { RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED },
     { RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE },
@@ -635,6 +641,18 @@
     return current_run_state == state;
 }
 
+bool runstate_store(char *str, size_t size)
+{
+    const char *state = RunState_lookup[current_run_state];
+    size_t len = strlen(state) + 1;
+
+    if (len > size) {
+        return false;
+    }
+    memcpy(str, state, len);
+    return true;
+}
+
 static void runstate_init(void)
 {
     const RunStateTransition *p;
@@ -4610,6 +4628,7 @@
         return 0;
     }
 
+    register_global_state();
     if (incoming) {
         Error *local_err = NULL;
         qemu_start_incoming_migration(incoming, &local_err);