Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging

* KVM error improvement from Laurent
* CONFIG_PARALLEL fix from Mirek
* Atomic/optimized dirty bitmap access from myself and Stefan
* BUILD_DIR convenience/bugfix from Peter C
* Memory leak fix from Shannon
* SMM improvements (though still TCG only) from myself and Gerd, acked by mst

# gpg: Signature made Fri Jun  5 18:45:20 2015 BST using RSA key ID 78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream: (62 commits)
  update Linux headers from kvm/next
  atomics: add explicit compiler fence in __atomic memory barriers
  ich9: implement SMI_LOCK
  q35: implement TSEG
  q35: add test for SMRAM.D_LCK
  q35: implement SMRAM.D_LCK
  q35: add config space wmask for SMRAM and ESMRAMC
  q35: fix ESMRAMC default
  q35: implement high SMRAM
  hw/i386: remove smram_update
  target-i386: use memory API to implement SMRAM
  hw/i386: add a separate region that tracks the SMRAME bit
  target-i386: create a separate AddressSpace for each CPU
  vl: run "late" notifiers immediately
  qom: add object_property_add_const_link
  vl: allow full-blown QemuOpts syntax for -global
  pflash_cfi01: add secure property
  pflash_cfi01: change to new-style MMIO accessors
  pflash_cfi01: change big-endian property to BIT type
  target-i386: wake up processors that receive an SMI
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index d8a8edd..6e79459 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -324,7 +324,7 @@
 /* Intel 82078 floppy disk controller emulation          */
 
 static void fdctrl_reset(FDCtrl *fdctrl, int do_irq);
-static void fdctrl_reset_fifo(FDCtrl *fdctrl);
+static void fdctrl_to_command_phase(FDCtrl *fdctrl);
 static int fdctrl_transfer_handler (void *opaque, int nchan,
                                     int dma_pos, int dma_len);
 static void fdctrl_raise_irq(FDCtrl *fdctrl);
@@ -495,6 +495,33 @@
     FD_DIR_DSKCHG   = 0x80,
 };
 
+/*
+ * See chapter 5.0 "Controller phases" of the spec:
+ *
+ * Command phase:
+ * The host writes a command and its parameters into the FIFO. The command
+ * phase is completed when all parameters for the command have been supplied,
+ * and execution phase is entered.
+ *
+ * Execution phase:
+ * Data transfers, either DMA or non-DMA. For non-DMA transfers, the FIFO
+ * contains the payload now, otherwise it's unused. When all bytes of the
+ * required data have been transferred, the state is switched to either result
+ * phase (if the command produces status bytes) or directly back into the
+ * command phase for the next command.
+ *
+ * Result phase:
+ * The host reads out the FIFO, which contains one or more result bytes now.
+ */
+enum {
+    /* Only for migration: reconstruct phase from registers like qemu 2.3 */
+    FD_PHASE_RECONSTRUCT    = 0,
+
+    FD_PHASE_COMMAND        = 1,
+    FD_PHASE_EXECUTION      = 2,
+    FD_PHASE_RESULT         = 3,
+};
+
 #define FD_MULTI_TRACK(state) ((state) & FD_STATE_MULTI)
 #define FD_FORMAT_CMD(state) ((state) & FD_STATE_FORMAT)
 
@@ -504,6 +531,7 @@
     /* Controller state */
     QEMUTimer *result_timer;
     int dma_chann;
+    uint8_t phase;
     /* Controller's identification */
     uint8_t version;
     /* HW */
@@ -744,6 +772,28 @@
     }
 };
 
+/*
+ * Reconstructs the phase from register values according to the logic that was
+ * implemented in qemu 2.3. This is the default value that is used if the phase
+ * subsection is not present on migration.
+ *
+ * Don't change this function to reflect newer qemu versions, it is part of
+ * the migration ABI.
+ */
+static int reconstruct_phase(FDCtrl *fdctrl)
+{
+    if (fdctrl->msr & FD_MSR_NONDMA) {
+        return FD_PHASE_EXECUTION;
+    } else if ((fdctrl->msr & FD_MSR_RQM) == 0) {
+        /* qemu 2.3 disabled RQM only during DMA transfers */
+        return FD_PHASE_EXECUTION;
+    } else if (fdctrl->msr & FD_MSR_DIO) {
+        return FD_PHASE_RESULT;
+    } else {
+        return FD_PHASE_COMMAND;
+    }
+}
+
 static void fdc_pre_save(void *opaque)
 {
     FDCtrl *s = opaque;
@@ -751,12 +801,24 @@
     s->dor_vmstate = s->dor | GET_CUR_DRV(s);
 }
 
+static int fdc_pre_load(void *opaque)
+{
+    FDCtrl *s = opaque;
+    s->phase = FD_PHASE_RECONSTRUCT;
+    return 0;
+}
+
 static int fdc_post_load(void *opaque, int version_id)
 {
     FDCtrl *s = opaque;
 
     SET_CUR_DRV(s, s->dor_vmstate & FD_DOR_SELMASK);
     s->dor = s->dor_vmstate & ~FD_DOR_SELMASK;
+
+    if (s->phase == FD_PHASE_RECONSTRUCT) {
+        s->phase = reconstruct_phase(s);
+    }
+
     return 0;
 }
 
@@ -794,11 +856,29 @@
     }
 };
 
+static bool fdc_phase_needed(void *opaque)
+{
+    FDCtrl *fdctrl = opaque;
+
+    return reconstruct_phase(fdctrl) != fdctrl->phase;
+}
+
+static const VMStateDescription vmstate_fdc_phase = {
+    .name = "fdc/phase",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8(phase, FDCtrl),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_fdc = {
     .name = "fdc",
     .version_id = 2,
     .minimum_version_id = 2,
     .pre_save = fdc_pre_save,
+    .pre_load = fdc_pre_load,
     .post_load = fdc_post_load,
     .fields = (VMStateField[]) {
         /* Controller State */
@@ -839,6 +919,9 @@
             .vmsd = &vmstate_fdc_result_timer,
             .needed = fdc_result_timer_needed,
         } , {
+            .vmsd = &vmstate_fdc_phase,
+            .needed = fdc_phase_needed,
+        } , {
             /* empty */
         }
     }
@@ -918,7 +1001,7 @@
     fdctrl->data_dir = FD_DIR_WRITE;
     for (i = 0; i < MAX_FD; i++)
         fd_recalibrate(&fdctrl->drives[i]);
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
     if (do_irq) {
         fdctrl->status0 |= FD_SR0_RDYCHG;
         fdctrl_raise_irq(fdctrl);
@@ -1134,17 +1217,22 @@
     return retval;
 }
 
-/* FIFO state control */
-static void fdctrl_reset_fifo(FDCtrl *fdctrl)
+/* Clear the FIFO and update the state for receiving the next command */
+static void fdctrl_to_command_phase(FDCtrl *fdctrl)
 {
+    fdctrl->phase = FD_PHASE_COMMAND;
     fdctrl->data_dir = FD_DIR_WRITE;
     fdctrl->data_pos = 0;
+    fdctrl->data_len = 1; /* Accept command byte, adjust for params later */
     fdctrl->msr &= ~(FD_MSR_CMDBUSY | FD_MSR_DIO);
+    fdctrl->msr |= FD_MSR_RQM;
 }
 
-/* Set FIFO status for the host to read */
-static void fdctrl_set_fifo(FDCtrl *fdctrl, int fifo_len)
+/* Update the state to allow the guest to read out the command status.
+ * @fifo_len is the number of result bytes to be read out. */
+static void fdctrl_to_result_phase(FDCtrl *fdctrl, int fifo_len)
 {
+    fdctrl->phase = FD_PHASE_RESULT;
     fdctrl->data_dir = FD_DIR_READ;
     fdctrl->data_len = fifo_len;
     fdctrl->data_pos = 0;
@@ -1157,7 +1245,7 @@
     qemu_log_mask(LOG_UNIMP, "fdc: unimplemented command 0x%02x\n",
                   fdctrl->fifo[0]);
     fdctrl->fifo[0] = FD_SR0_INVCMD;
-    fdctrl_set_fifo(fdctrl, 1);
+    fdctrl_to_result_phase(fdctrl, 1);
 }
 
 /* Seek to next sector
@@ -1238,7 +1326,7 @@
     fdctrl->msr |= FD_MSR_RQM | FD_MSR_DIO;
     fdctrl->msr &= ~FD_MSR_NONDMA;
 
-    fdctrl_set_fifo(fdctrl, 7);
+    fdctrl_to_result_phase(fdctrl, 7);
     fdctrl_raise_irq(fdctrl);
 }
 
@@ -1352,7 +1440,7 @@
         }
     }
     FLOPPY_DPRINTF("start non-DMA transfer\n");
-    fdctrl->msr |= FD_MSR_NONDMA;
+    fdctrl->msr |= FD_MSR_NONDMA | FD_MSR_RQM;
     if (direction != FD_DIR_WRITE)
         fdctrl->msr |= FD_MSR_DIO;
     /* IO based transfer: calculate len */
@@ -1505,9 +1593,16 @@
         FLOPPY_DPRINTF("error: controller not ready for reading\n");
         return 0;
     }
+
+    /* If data_len spans multiple sectors, the current position in the FIFO
+     * wraps around while fdctrl->data_pos is the real position in the whole
+     * request. */
     pos = fdctrl->data_pos;
     pos %= FD_SECTOR_LEN;
-    if (fdctrl->msr & FD_MSR_NONDMA) {
+
+    switch (fdctrl->phase) {
+    case FD_PHASE_EXECUTION:
+        assert(fdctrl->msr & FD_MSR_NONDMA);
         if (pos == 0) {
             if (fdctrl->data_pos != 0)
                 if (!fdctrl_seek_to_next_sect(fdctrl, cur_drv)) {
@@ -1523,20 +1618,28 @@
                 memset(fdctrl->fifo, 0, FD_SECTOR_LEN);
             }
         }
-    }
-    retval = fdctrl->fifo[pos];
-    if (++fdctrl->data_pos == fdctrl->data_len) {
-        fdctrl->data_pos = 0;
-        /* Switch from transfer mode to status mode
-         * then from status mode to command mode
-         */
-        if (fdctrl->msr & FD_MSR_NONDMA) {
+
+        if (++fdctrl->data_pos == fdctrl->data_len) {
+            fdctrl->msr &= ~FD_MSR_RQM;
             fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00);
-        } else {
-            fdctrl_reset_fifo(fdctrl);
+        }
+        break;
+
+    case FD_PHASE_RESULT:
+        assert(!(fdctrl->msr & FD_MSR_NONDMA));
+        if (++fdctrl->data_pos == fdctrl->data_len) {
+            fdctrl->msr &= ~FD_MSR_RQM;
+            fdctrl_to_command_phase(fdctrl);
             fdctrl_reset_irq(fdctrl);
         }
+        break;
+
+    case FD_PHASE_COMMAND:
+    default:
+        abort();
     }
+
+    retval = fdctrl->fifo[pos];
     FLOPPY_DPRINTF("data register: 0x%02x\n", retval);
 
     return retval;
@@ -1606,7 +1709,7 @@
 {
     fdctrl->lock = (fdctrl->fifo[0] & 0x80) ? 1 : 0;
     fdctrl->fifo[0] = fdctrl->lock << 4;
-    fdctrl_set_fifo(fdctrl, 1);
+    fdctrl_to_result_phase(fdctrl, 1);
 }
 
 static void fdctrl_handle_dumpreg(FDCtrl *fdctrl, int direction)
@@ -1631,20 +1734,20 @@
         (cur_drv->perpendicular << 2);
     fdctrl->fifo[8] = fdctrl->config;
     fdctrl->fifo[9] = fdctrl->precomp_trk;
-    fdctrl_set_fifo(fdctrl, 10);
+    fdctrl_to_result_phase(fdctrl, 10);
 }
 
 static void fdctrl_handle_version(FDCtrl *fdctrl, int direction)
 {
     /* Controller's version */
     fdctrl->fifo[0] = fdctrl->version;
-    fdctrl_set_fifo(fdctrl, 1);
+    fdctrl_to_result_phase(fdctrl, 1);
 }
 
 static void fdctrl_handle_partid(FDCtrl *fdctrl, int direction)
 {
     fdctrl->fifo[0] = 0x41; /* Stepping 1 */
-    fdctrl_set_fifo(fdctrl, 1);
+    fdctrl_to_result_phase(fdctrl, 1);
 }
 
 static void fdctrl_handle_restore(FDCtrl *fdctrl, int direction)
@@ -1667,7 +1770,7 @@
     fdctrl->config = fdctrl->fifo[11];
     fdctrl->precomp_trk = fdctrl->fifo[12];
     fdctrl->pwrd = fdctrl->fifo[13];
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
 }
 
 static void fdctrl_handle_save(FDCtrl *fdctrl, int direction)
@@ -1697,7 +1800,7 @@
     fdctrl->fifo[12] = fdctrl->pwrd;
     fdctrl->fifo[13] = 0;
     fdctrl->fifo[14] = 0;
-    fdctrl_set_fifo(fdctrl, 15);
+    fdctrl_to_result_phase(fdctrl, 15);
 }
 
 static void fdctrl_handle_readid(FDCtrl *fdctrl, int direction)
@@ -1746,7 +1849,7 @@
     else
         fdctrl->dor |= FD_DOR_DMAEN;
     /* No result back */
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
 }
 
 static void fdctrl_handle_sense_drive_status(FDCtrl *fdctrl, int direction)
@@ -1762,7 +1865,7 @@
         (cur_drv->head << 2) |
         GET_CUR_DRV(fdctrl) |
         0x28;
-    fdctrl_set_fifo(fdctrl, 1);
+    fdctrl_to_result_phase(fdctrl, 1);
 }
 
 static void fdctrl_handle_recalibrate(FDCtrl *fdctrl, int direction)
@@ -1772,7 +1875,7 @@
     SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK);
     cur_drv = get_cur_drv(fdctrl);
     fd_recalibrate(cur_drv);
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
     /* Raise Interrupt */
     fdctrl->status0 |= FD_SR0_SEEK;
     fdctrl_raise_irq(fdctrl);
@@ -1788,7 +1891,7 @@
         fdctrl->reset_sensei--;
     } else if (!(fdctrl->sra & FD_SRA_INTPEND)) {
         fdctrl->fifo[0] = FD_SR0_INVCMD;
-        fdctrl_set_fifo(fdctrl, 1);
+        fdctrl_to_result_phase(fdctrl, 1);
         return;
     } else {
         fdctrl->fifo[0] =
@@ -1797,7 +1900,7 @@
     }
 
     fdctrl->fifo[1] = cur_drv->track;
-    fdctrl_set_fifo(fdctrl, 2);
+    fdctrl_to_result_phase(fdctrl, 2);
     fdctrl_reset_irq(fdctrl);
     fdctrl->status0 = FD_SR0_RDYCHG;
 }
@@ -1808,7 +1911,7 @@
 
     SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK);
     cur_drv = get_cur_drv(fdctrl);
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
     /* The seek command just sends step pulses to the drive and doesn't care if
      * there is a medium inserted of if it's banging the head against the drive.
      */
@@ -1825,7 +1928,7 @@
     if (fdctrl->fifo[1] & 0x80)
         cur_drv->perpendicular = fdctrl->fifo[1] & 0x7;
     /* No result back */
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
 }
 
 static void fdctrl_handle_configure(FDCtrl *fdctrl, int direction)
@@ -1833,20 +1936,20 @@
     fdctrl->config = fdctrl->fifo[2];
     fdctrl->precomp_trk =  fdctrl->fifo[3];
     /* No result back */
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
 }
 
 static void fdctrl_handle_powerdown_mode(FDCtrl *fdctrl, int direction)
 {
     fdctrl->pwrd = fdctrl->fifo[1];
     fdctrl->fifo[0] = fdctrl->fifo[1];
-    fdctrl_set_fifo(fdctrl, 1);
+    fdctrl_to_result_phase(fdctrl, 1);
 }
 
 static void fdctrl_handle_option(FDCtrl *fdctrl, int direction)
 {
     /* No result back */
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
 }
 
 static void fdctrl_handle_drive_specification_command(FDCtrl *fdctrl, int direction)
@@ -1862,15 +1965,15 @@
             fdctrl->fifo[0] = fdctrl->fifo[1];
             fdctrl->fifo[2] = 0;
             fdctrl->fifo[3] = 0;
-            fdctrl_set_fifo(fdctrl, 4);
+            fdctrl_to_result_phase(fdctrl, 4);
         } else {
-            fdctrl_reset_fifo(fdctrl);
+            fdctrl_to_command_phase(fdctrl);
         }
     } else if (fdctrl->data_len > 7) {
         /* ERROR */
         fdctrl->fifo[0] = 0x80 |
             (cur_drv->head << 2) | GET_CUR_DRV(fdctrl);
-        fdctrl_set_fifo(fdctrl, 1);
+        fdctrl_to_result_phase(fdctrl, 1);
     }
 }
 
@@ -1887,7 +1990,7 @@
         fd_seek(cur_drv, cur_drv->head,
                 cur_drv->track + fdctrl->fifo[2], cur_drv->sect, 1);
     }
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
     /* Raise Interrupt */
     fdctrl->status0 |= FD_SR0_SEEK;
     fdctrl_raise_irq(fdctrl);
@@ -1905,20 +2008,25 @@
         fd_seek(cur_drv, cur_drv->head,
                 cur_drv->track - fdctrl->fifo[2], cur_drv->sect, 1);
     }
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
     /* Raise Interrupt */
     fdctrl->status0 |= FD_SR0_SEEK;
     fdctrl_raise_irq(fdctrl);
 }
 
-static const struct {
+/*
+ * Handlers for the execution phase of each command
+ */
+typedef struct FDCtrlCommand {
     uint8_t value;
     uint8_t mask;
     const char* name;
     int parameters;
     void (*handler)(FDCtrl *fdctrl, int direction);
     int direction;
-} handlers[] = {
+} FDCtrlCommand;
+
+static const FDCtrlCommand handlers[] = {
     { FD_CMD_READ, 0x1f, "READ", 8, fdctrl_start_transfer, FD_DIR_READ },
     { FD_CMD_WRITE, 0x3f, "WRITE", 8, fdctrl_start_transfer, FD_DIR_WRITE },
     { FD_CMD_SEEK, 0xff, "SEEK", 2, fdctrl_handle_seek },
@@ -1955,9 +2063,19 @@
 /* Associate command to an index in the 'handlers' array */
 static uint8_t command_to_handler[256];
 
+static const FDCtrlCommand *get_command(uint8_t cmd)
+{
+    int idx;
+
+    idx = command_to_handler[cmd];
+    FLOPPY_DPRINTF("%s command\n", handlers[idx].name);
+    return &handlers[idx];
+}
+
 static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value)
 {
     FDrive *cur_drv;
+    const FDCtrlCommand *cmd;
     uint32_t pos;
 
     /* Reset mode */
@@ -1970,12 +2088,27 @@
         return;
     }
     fdctrl->dsr &= ~FD_DSR_PWRDOWN;
-    /* Is it write command time ? */
-    if (fdctrl->msr & FD_MSR_NONDMA) {
+
+    FLOPPY_DPRINTF("%s: %02x\n", __func__, value);
+
+    /* If data_len spans multiple sectors, the current position in the FIFO
+     * wraps around while fdctrl->data_pos is the real position in the whole
+     * request. */
+    pos = fdctrl->data_pos++;
+    pos %= FD_SECTOR_LEN;
+    fdctrl->fifo[pos] = value;
+
+    if (fdctrl->data_pos == fdctrl->data_len) {
+        fdctrl->msr &= ~FD_MSR_RQM;
+    }
+
+    switch (fdctrl->phase) {
+    case FD_PHASE_EXECUTION:
+        /* For DMA requests, RQM should be cleared during execution phase, so
+         * we would have errored out above. */
+        assert(fdctrl->msr & FD_MSR_NONDMA);
+
         /* FIFO data write */
-        pos = fdctrl->data_pos++;
-        pos %= FD_SECTOR_LEN;
-        fdctrl->fifo[pos] = value;
         if (pos == FD_SECTOR_LEN - 1 ||
             fdctrl->data_pos == fdctrl->data_len) {
             cur_drv = get_cur_drv(fdctrl);
@@ -1983,45 +2116,54 @@
                 < 0) {
                 FLOPPY_DPRINTF("error writing sector %d\n",
                                fd_sector(cur_drv));
-                return;
+                break;
             }
             if (!fdctrl_seek_to_next_sect(fdctrl, cur_drv)) {
                 FLOPPY_DPRINTF("error seeking to next sector %d\n",
                                fd_sector(cur_drv));
-                return;
+                break;
             }
         }
-        /* Switch from transfer mode to status mode
-         * then from status mode to command mode
-         */
-        if (fdctrl->data_pos == fdctrl->data_len)
-            fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00);
-        return;
-    }
-    if (fdctrl->data_pos == 0) {
-        /* Command */
-        pos = command_to_handler[value & 0xff];
-        FLOPPY_DPRINTF("%s command\n", handlers[pos].name);
-        fdctrl->data_len = handlers[pos].parameters + 1;
-        fdctrl->msr |= FD_MSR_CMDBUSY;
-    }
 
-    FLOPPY_DPRINTF("%s: %02x\n", __func__, value);
-    pos = fdctrl->data_pos++;
-    pos %= FD_SECTOR_LEN;
-    fdctrl->fifo[pos] = value;
-    if (fdctrl->data_pos == fdctrl->data_len) {
-        /* We now have all parameters
-         * and will be able to treat the command
-         */
-        if (fdctrl->data_state & FD_STATE_FORMAT) {
-            fdctrl_format_sector(fdctrl);
-            return;
+        /* Switch to result phase when done with the transfer */
+        if (fdctrl->data_pos == fdctrl->data_len) {
+            fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00);
+        }
+        break;
+
+    case FD_PHASE_COMMAND:
+        assert(!(fdctrl->msr & FD_MSR_NONDMA));
+        assert(fdctrl->data_pos < FD_SECTOR_LEN);
+
+        if (pos == 0) {
+            /* The first byte specifies the command. Now we start reading
+             * as many parameters as this command requires. */
+            cmd = get_command(value);
+            fdctrl->data_len = cmd->parameters + 1;
+            if (cmd->parameters) {
+                fdctrl->msr |= FD_MSR_RQM;
+            }
+            fdctrl->msr |= FD_MSR_CMDBUSY;
         }
 
-        pos = command_to_handler[fdctrl->fifo[0] & 0xff];
-        FLOPPY_DPRINTF("treat %s command\n", handlers[pos].name);
-        (*handlers[pos].handler)(fdctrl, handlers[pos].direction);
+        if (fdctrl->data_pos == fdctrl->data_len) {
+            /* We have all parameters now, execute the command */
+            fdctrl->phase = FD_PHASE_EXECUTION;
+
+            if (fdctrl->data_state & FD_STATE_FORMAT) {
+                fdctrl_format_sector(fdctrl);
+                break;
+            }
+
+            cmd = get_command(fdctrl->fifo[0]);
+            FLOPPY_DPRINTF("Calling handler for '%s'\n", cmd->name);
+            cmd->handler(fdctrl, cmd->direction);
+        }
+        break;
+
+    case FD_PHASE_RESULT:
+    default:
+        abort();
     }
 }
 
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index 585a27b..dd52d50 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -51,8 +51,15 @@
 
 #define MACIO_PAGE_SIZE 4096
 
+/*
+ * Unaligned DMA read/write access functions required for OS X/Darwin which
+ * don't perform DMA transactions on sector boundaries. These functions are
+ * modelled on bdrv_co_do_preadv()/bdrv_co_do_pwritev() and so should be
+ * easy to remove if the unaligned block APIs are ever exposed.
+ */
+
 static void pmac_dma_read(BlockBackend *blk,
-                          int64_t sector_num, int nb_sectors,
+                          int64_t offset, unsigned int bytes,
                           void (*cb)(void *opaque, int ret), void *opaque)
 {
     DBDMA_io *io = opaque;
@@ -60,76 +67,48 @@
     IDEState *s = idebus_active_if(&m->bus);
     dma_addr_t dma_addr, dma_len;
     void *mem;
-    int nsector, remainder;
+    int64_t sector_num;
+    int nsector;
+    uint64_t align = BDRV_SECTOR_SIZE;
+    size_t head_bytes, tail_bytes;
 
     qemu_iovec_destroy(&io->iov);
     qemu_iovec_init(&io->iov, io->len / MACIO_PAGE_SIZE + 1);
 
-    if (io->remainder_len > 0) {
-        /* Return remainder of request */
-        int transfer = MIN(io->remainder_len, io->len);
+    sector_num = (offset >> 9);
+    nsector = (io->len >> 9);
 
-        MACIO_DPRINTF("--- DMA read pop     - bounce addr: %p addr: %"
-                      HWADDR_PRIx " remainder_len: %x\n",
-                      &io->remainder + (0x200 - transfer), io->addr,
-                      io->remainder_len);
-
-        cpu_physical_memory_write(io->addr,
-                                  &io->remainder + (0x200 - transfer),
-                                  transfer);
-
-        io->remainder_len -= transfer;
-        io->len -= transfer;
-        io->addr += transfer;
-
-        s->io_buffer_index += transfer;
-        s->io_buffer_size -= transfer;
-
-        if (io->remainder_len != 0) {
-            /* Still waiting for remainder */
-            return;
-        }
-
-        if (io->len == 0) {
-            MACIO_DPRINTF("--- finished all read processing; go and finish\n");
-            cb(opaque, 0);
-            return;
-        }
-    }
-
-    if (s->drive_kind == IDE_CD) {
-        sector_num = (int64_t)(s->lba << 2) + (s->io_buffer_index >> 9);
-    } else {
-        sector_num = ide_get_sector(s) + (s->io_buffer_index >> 9);
-    }
-
-    nsector = ((io->len + 0x1ff) >> 9);
-    remainder = (nsector << 9) - io->len;
-
-    MACIO_DPRINTF("--- DMA read transfer - addr: %" HWADDR_PRIx " len: %x\n",
-                  io->addr, io->len);
+    MACIO_DPRINTF("--- DMA read transfer (0x%" HWADDR_PRIx ",0x%x): "
+                  "sector_num: %" PRId64 ", nsector: %d\n", io->addr, io->len,
+                  sector_num, nsector);
 
     dma_addr = io->addr;
     dma_len = io->len;
     mem = dma_memory_map(&address_space_memory, dma_addr, &dma_len,
                          DMA_DIRECTION_FROM_DEVICE);
 
-    if (!remainder) {
-        MACIO_DPRINTF("--- DMA read aligned - addr: %" HWADDR_PRIx
-                      " len: %x\n", io->addr, io->len);
-        qemu_iovec_add(&io->iov, mem, io->len);
-    } else {
-        MACIO_DPRINTF("--- DMA read unaligned - addr: %" HWADDR_PRIx
-                      " len: %x\n", io->addr, io->len);
-        qemu_iovec_add(&io->iov, mem, io->len);
+    if (offset & (align - 1)) {
+        head_bytes = offset & (align - 1);
 
-        MACIO_DPRINTF("--- DMA read push    - bounce addr: %p "
-                      "remainder_len: %x\n",
-                      &io->remainder + 0x200 - remainder, remainder);
-        qemu_iovec_add(&io->iov, &io->remainder + 0x200 - remainder,
-                       remainder);
+        MACIO_DPRINTF("--- DMA unaligned head: sector %" PRId64 ", "
+                      "discarding %zu bytes\n", sector_num, head_bytes);
 
-        io->remainder_len = remainder;
+        qemu_iovec_add(&io->iov, &io->head_remainder, head_bytes);
+
+        bytes += offset & (align - 1);
+        offset = offset & ~(align - 1);
+    }
+
+    qemu_iovec_add(&io->iov, mem, io->len);
+
+    if ((offset + bytes) & (align - 1)) {
+        tail_bytes = (offset + bytes) & (align - 1);
+
+        MACIO_DPRINTF("--- DMA unaligned tail: sector %" PRId64 ", "
+                      "discarding bytes %zu\n", sector_num, tail_bytes);
+
+        qemu_iovec_add(&io->iov, &io->tail_remainder, align - tail_bytes);
+        bytes = ROUND_UP(bytes, align);
     }
 
     s->io_buffer_size -= io->len;
@@ -137,15 +116,15 @@
 
     io->len = 0;
 
-    MACIO_DPRINTF("--- Block read transfer   - sector_num: %"PRIx64"  "
-                  "nsector: %x\n",
-                  sector_num, nsector);
+    MACIO_DPRINTF("--- Block read transfer - sector_num: %" PRIx64 "  "
+                  "nsector: %x\n", (offset >> 9), (bytes >> 9));
 
-    m->aiocb = blk_aio_readv(blk, sector_num, &io->iov, nsector, cb, io);
+    m->aiocb = blk_aio_readv(blk, (offset >> 9), &io->iov, (bytes >> 9),
+                             cb, io);
 }
 
 static void pmac_dma_write(BlockBackend *blk,
-                         int64_t sector_num, int nb_sectors,
+                         int64_t offset, int bytes,
                          void (*cb)(void *opaque, int ret), void *opaque)
 {
     DBDMA_io *io = opaque;
@@ -153,53 +132,20 @@
     IDEState *s = idebus_active_if(&m->bus);
     dma_addr_t dma_addr, dma_len;
     void *mem;
-    int nsector, remainder;
-    int extra = 0;
+    int64_t sector_num;
+    int nsector;
+    uint64_t align = BDRV_SECTOR_SIZE;
+    size_t head_bytes, tail_bytes;
+    bool unaligned_head = false, unaligned_tail = false;
 
     qemu_iovec_destroy(&io->iov);
     qemu_iovec_init(&io->iov, io->len / MACIO_PAGE_SIZE + 1);
 
-    if (io->remainder_len > 0) {
-        /* Return remainder of request */
-        int transfer = MIN(io->remainder_len, io->len);
-
-        MACIO_DPRINTF("--- processing write remainder %x\n", transfer);
-        cpu_physical_memory_read(io->addr,
-                                 &io->remainder + (0x200 - transfer),
-                                 transfer);
-
-        io->remainder_len -= transfer;
-        io->len -= transfer;
-        io->addr += transfer;
-
-        s->io_buffer_index += transfer;
-        s->io_buffer_size -= transfer;
-
-        if (io->remainder_len != 0) {
-            /* Still waiting for remainder */
-            return;
-        }
-
-        MACIO_DPRINTF("--> prepending bounce buffer with size 0x200\n");
-
-        /* Sector transfer complete - prepend to request */
-        qemu_iovec_add(&io->iov, &io->remainder, 0x200);
-        extra = 1;
-    }
-
-    if (s->drive_kind == IDE_CD) {
-        sector_num = (int64_t)(s->lba << 2) + (s->io_buffer_index >> 9);
-    } else {
-        sector_num = ide_get_sector(s) + (s->io_buffer_index >> 9);
-    }
-
+    sector_num = (offset >> 9);
     nsector = (io->len >> 9);
-    remainder = io->len - (nsector << 9);
 
-    MACIO_DPRINTF("--- DMA write transfer - addr: %" HWADDR_PRIx " len: %x\n",
-                  io->addr, io->len);
-    MACIO_DPRINTF("xxx remainder: %x\n", remainder);
-    MACIO_DPRINTF("xxx sector_num: %"PRIx64"   nsector: %x\n",
+    MACIO_DPRINTF("--- DMA write transfer (0x%" HWADDR_PRIx ",0x%x): "
+                  "sector_num: %" PRId64 ", nsector: %d\n", io->addr, io->len,
                   sector_num, nsector);
 
     dma_addr = io->addr;
@@ -207,36 +153,59 @@
     mem = dma_memory_map(&address_space_memory, dma_addr, &dma_len,
                          DMA_DIRECTION_TO_DEVICE);
 
-    if (!remainder) {
-        MACIO_DPRINTF("--- DMA write aligned - addr: %" HWADDR_PRIx
-                      " len: %x\n", io->addr, io->len);
+    if (offset & (align - 1)) {
+        head_bytes = offset & (align - 1);
+        sector_num = ((offset & ~(align - 1)) >> 9);
+
+        MACIO_DPRINTF("--- DMA unaligned head: pre-reading head sector %"
+                      PRId64 "\n", sector_num);
+
+        blk_pread(s->blk, (sector_num << 9), &io->head_remainder, align);
+
+        qemu_iovec_add(&io->iov, &io->head_remainder, head_bytes);
         qemu_iovec_add(&io->iov, mem, io->len);
-    } else {
-        /* Write up to last complete sector */
-        MACIO_DPRINTF("--- DMA write unaligned - addr: %" HWADDR_PRIx
-                      " len: %x\n", io->addr, (nsector << 9));
-        qemu_iovec_add(&io->iov, mem, (nsector << 9));
 
-        MACIO_DPRINTF("--- DMA write read    - bounce addr: %p "
-                      "remainder_len: %x\n", &io->remainder, remainder);
-        cpu_physical_memory_read(io->addr + (nsector << 9), &io->remainder,
-                                 remainder);
+        bytes += offset & (align - 1);
+        offset = offset & ~(align - 1);
 
-        io->remainder_len = 0x200 - remainder;
-
-        MACIO_DPRINTF("xxx remainder_len: %x\n", io->remainder_len);
+        unaligned_head = true;
     }
 
-    s->io_buffer_size -= ((nsector + extra) << 9);
-    s->io_buffer_index += ((nsector + extra) << 9);
+    if ((offset + bytes) & (align - 1)) {
+        tail_bytes = (offset + bytes) & (align - 1);
+        sector_num = (((offset + bytes) & ~(align - 1)) >> 9);
+
+        MACIO_DPRINTF("--- DMA unaligned tail: pre-reading tail sector %"
+                      PRId64 "\n", sector_num);
+
+        blk_pread(s->blk, (sector_num << 9), &io->tail_remainder, align);
+
+        if (!unaligned_head) {
+            qemu_iovec_add(&io->iov, mem, io->len);
+        }
+
+        qemu_iovec_add(&io->iov, &io->tail_remainder + tail_bytes,
+                       align - tail_bytes);
+
+        bytes = ROUND_UP(bytes, align);
+
+        unaligned_tail = true;
+    }
+
+    if (!unaligned_head && !unaligned_tail) {
+        qemu_iovec_add(&io->iov, mem, io->len);
+    }
+
+    s->io_buffer_size -= io->len;
+    s->io_buffer_index += io->len;
 
     io->len = 0;
 
-    MACIO_DPRINTF("--- Block write transfer   - sector_num: %"PRIx64"  "
-                  "nsector: %x\n", sector_num, nsector + extra);
+    MACIO_DPRINTF("--- Block write transfer - sector_num: %" PRIx64 "  "
+                  "nsector: %x\n", (offset >> 9), (bytes >> 9));
 
-    m->aiocb = blk_aio_writev(blk, sector_num, &io->iov, nsector + extra, cb,
-                              io);
+    m->aiocb = blk_aio_writev(blk, (offset >> 9), &io->iov, (bytes >> 9),
+                              cb, io);
 }
 
 static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
@@ -244,19 +213,12 @@
     DBDMA_io *io = opaque;
     MACIOIDEState *m = io->opaque;
     IDEState *s = idebus_active_if(&m->bus);
-    int64_t sector_num;
-    int nsector, remainder;
+    int64_t offset;
 
-    MACIO_DPRINTF("\ns is %p\n", s);
-    MACIO_DPRINTF("io_buffer_index: %x\n", s->io_buffer_index);
-    MACIO_DPRINTF("io_buffer_size: %x   packet_transfer_size: %x\n",
-                  s->io_buffer_size, s->packet_transfer_size);
-    MACIO_DPRINTF("lba: %x\n", s->lba);
-    MACIO_DPRINTF("io_addr: %" HWADDR_PRIx "  io_len: %x\n", io->addr,
-                  io->len);
+    MACIO_DPRINTF("pmac_ide_atapi_transfer_cb\n");
 
     if (ret < 0) {
-        MACIO_DPRINTF("THERE WAS AN ERROR!  %d\n", ret);
+        MACIO_DPRINTF("DMA error: %d\n", ret);
         ide_atapi_io_error(s, ret);
         goto done;
     }
@@ -270,6 +232,7 @@
     }
 
     if (s->io_buffer_size <= 0) {
+        MACIO_DPRINTF("End of IDE transfer\n");
         ide_atapi_cmd_ok(s);
         m->dma_active = false;
         goto done;
@@ -289,19 +252,13 @@
         goto done;
     }
 
-    /* Calculate number of sectors */
-    sector_num = (int64_t)(s->lba << 2) + (s->io_buffer_index >> 9);
-    nsector = (io->len + 0x1ff) >> 9;
-    remainder = io->len & 0x1ff;
+    /* Calculate current offset */
+    offset = (int64_t)(s->lba << 11) + s->io_buffer_index;
 
-    MACIO_DPRINTF("nsector: %d   remainder: %x\n", nsector, remainder);
-    MACIO_DPRINTF("sector: %"PRIx64"   %zx\n", sector_num, io->iov.size / 512);
-
-    pmac_dma_read(s->blk, sector_num, nsector, pmac_ide_atapi_transfer_cb, io);
+    pmac_dma_read(s->blk, offset, io->len, pmac_ide_atapi_transfer_cb, io);
     return;
 
 done:
-    MACIO_DPRINTF("done DMA\n\n");
     block_acct_done(blk_get_stats(s->blk), &s->acct);
     io->dma_end(opaque);
 
@@ -313,16 +270,14 @@
     DBDMA_io *io = opaque;
     MACIOIDEState *m = io->opaque;
     IDEState *s = idebus_active_if(&m->bus);
-    int64_t sector_num;
-    int nsector, remainder;
+    int64_t offset;
 
     MACIO_DPRINTF("pmac_ide_transfer_cb\n");
 
     if (ret < 0) {
-        MACIO_DPRINTF("DMA error\n");
+        MACIO_DPRINTF("DMA error: %d\n", ret);
         m->aiocb = NULL;
         ide_dma_error(s);
-        io->remainder_len = 0;
         goto done;
     }
 
@@ -335,7 +290,7 @@
     }
 
     if (s->io_buffer_size <= 0) {
-        MACIO_DPRINTF("end of transfer\n");
+        MACIO_DPRINTF("End of IDE transfer\n");
         s->status = READY_STAT | SEEK_STAT;
         ide_set_irq(s->bus);
         m->dma_active = false;
@@ -348,24 +303,16 @@
     }
 
     /* Calculate number of sectors */
-    sector_num = ide_get_sector(s) + (s->io_buffer_index >> 9);
-    nsector = (io->len + 0x1ff) >> 9;
-    remainder = io->len & 0x1ff;
-
-    s->nsector -= nsector;
-
-    MACIO_DPRINTF("nsector: %d   remainder: %x\n", nsector, remainder);
-    MACIO_DPRINTF("sector: %"PRIx64"   %x\n", sector_num, nsector);
+    offset = (ide_get_sector(s) << 9) + s->io_buffer_index;
 
     switch (s->dma_cmd) {
     case IDE_DMA_READ:
-        pmac_dma_read(s->blk, sector_num, nsector, pmac_ide_transfer_cb, io);
+        pmac_dma_read(s->blk, offset, io->len, pmac_ide_transfer_cb, io);
         break;
     case IDE_DMA_WRITE:
-        pmac_dma_write(s->blk, sector_num, nsector, pmac_ide_transfer_cb, io);
+        pmac_dma_write(s->blk, offset, io->len, pmac_ide_transfer_cb, io);
         break;
     case IDE_DMA_TRIM:
-        MACIO_DPRINTF("TRIM command issued!");
         break;
     }
 
@@ -561,15 +508,12 @@
                             BlockCompletionFunc *cb)
 {
     MACIOIDEState *m = container_of(dma, MACIOIDEState, dma);
-    DBDMAState *dbdma = m->dbdma;
-    DBDMA_io *io;
-    int i;
 
     s->io_buffer_index = 0;
     if (s->drive_kind == IDE_CD) {
         s->io_buffer_size = s->packet_transfer_size;
     } else {
-        s->io_buffer_size = s->nsector * 0x200;
+        s->io_buffer_size = s->nsector * BDRV_SECTOR_SIZE;
     }
 
     MACIO_DPRINTF("\n\n------------ IDE transfer\n");
@@ -578,15 +522,6 @@
     MACIO_DPRINTF("lba: %x    size: %x\n", s->lba, s->io_buffer_size);
     MACIO_DPRINTF("-------------------------\n");
 
-    for (i = 0; i < DBDMA_CHANNELS; i++) {
-        io = &dbdma->channels[i].io;
-
-        if (io->opaque == m) {
-            io->remainder_len = 0;
-        }
-    }
-
-    MACIO_DPRINTF("\n");
     m->dma_active = true;
     DBDMA_kick(m->dbdma);
 }
diff --git a/include/hw/ppc/mac_dbdma.h b/include/hw/ppc/mac_dbdma.h
index c580327..c687021 100644
--- a/include/hw/ppc/mac_dbdma.h
+++ b/include/hw/ppc/mac_dbdma.h
@@ -40,8 +40,8 @@
     /* DMA is in progress, don't start another one */
     bool processing;
     /* unaligned last sector of a request */
-    uint8_t remainder[0x200];
-    int remainder_len;
+    uint8_t head_remainder[0x200];
+    uint8_t tail_remainder[0x200];
     QEMUIOVector iov;
 };
 
diff --git a/monitor.c b/monitor.c
index c7baa91..9afee7b 100644
--- a/monitor.c
+++ b/monitor.c
@@ -4955,6 +4955,8 @@
                           "arguments", "object");
                 return NULL;
             }
+        } else if (!strcmp(arg_name, "id")) {
+            /* Any string is acceptable as "id", so nothing to check */
         } else {
             error_set(errp, QERR_QMP_EXTRA_MEMBER, arg_name);
             return NULL;
diff --git a/qemu-options.hx b/qemu-options.hx
index 3085c1f..4be98f7 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -246,8 +246,7 @@
 DEF("m", HAS_ARG, QEMU_OPTION_m,
     "-m[emory] [size=]megs[,slots=n,maxmem=size]\n"
     "                configure guest RAM\n"
-    "                size: initial amount of guest memory (default: "
-    stringify(DEFAULT_RAM_SIZE) "MiB)\n"
+    "                size: initial amount of guest memory\n"
     "                slots: number of hotplug slots (default: none)\n"
     "                maxmem: maximum amount of guest memory (default: none)\n"
     "NOTE: Some architectures might enforce a specific granularity\n",
diff --git a/tests/fdc-test.c b/tests/fdc-test.c
index 3c6c83c..416394f 100644
--- a/tests/fdc-test.c
+++ b/tests/fdc-test.c
@@ -218,6 +218,10 @@
         inb(FLOPPY_BASE + reg_fifo);
     }
 
+    msr = inb(FLOPPY_BASE + reg_msr);
+    assert_bit_set(msr, BUSY | RQM | DIO);
+    g_assert(get_irq(FLOPPY_IRQ));
+
     st0 = floppy_recv();
     if (st0 != expected_st0) {
         ret = 1;
@@ -228,8 +232,15 @@
     floppy_recv();
     floppy_recv();
     floppy_recv();
+    g_assert(get_irq(FLOPPY_IRQ));
     floppy_recv();
 
+    /* Check that we're back in command phase */
+    msr = inb(FLOPPY_BASE + reg_msr);
+    assert_bit_clear(msr, BUSY | DIO);
+    assert_bit_set(msr, RQM);
+    g_assert(!get_irq(FLOPPY_IRQ));
+
     return ret;
 }
 
@@ -403,6 +414,7 @@
     uint8_t head = 0;
     uint8_t cyl;
     uint8_t st0;
+    uint8_t msr;
 
     /* Seek to track 0 and check with READ ID */
     send_seek(0);
@@ -411,18 +423,29 @@
     g_assert(!get_irq(FLOPPY_IRQ));
     floppy_send(head << 2 | drive);
 
+    msr = inb(FLOPPY_BASE + reg_msr);
+    if (!get_irq(FLOPPY_IRQ)) {
+        assert_bit_set(msr, BUSY);
+        assert_bit_clear(msr, RQM);
+    }
+
     while (!get_irq(FLOPPY_IRQ)) {
         /* qemu involves a timer with READ ID... */
         clock_step(1000000000LL / 50);
     }
 
+    msr = inb(FLOPPY_BASE + reg_msr);
+    assert_bit_set(msr, BUSY | RQM | DIO);
+
     st0 = floppy_recv();
     floppy_recv();
     floppy_recv();
     cyl = floppy_recv();
     head = floppy_recv();
     floppy_recv();
+    g_assert(get_irq(FLOPPY_IRQ));
     floppy_recv();
+    g_assert(!get_irq(FLOPPY_IRQ));
 
     g_assert_cmpint(cyl, ==, 0);
     g_assert_cmpint(head, ==, 0);
@@ -443,18 +466,29 @@
     g_assert(!get_irq(FLOPPY_IRQ));
     floppy_send(head << 2 | drive);
 
+    msr = inb(FLOPPY_BASE + reg_msr);
+    if (!get_irq(FLOPPY_IRQ)) {
+        assert_bit_set(msr, BUSY);
+        assert_bit_clear(msr, RQM);
+    }
+
     while (!get_irq(FLOPPY_IRQ)) {
         /* qemu involves a timer with READ ID... */
         clock_step(1000000000LL / 50);
     }
 
+    msr = inb(FLOPPY_BASE + reg_msr);
+    assert_bit_set(msr, BUSY | RQM | DIO);
+
     st0 = floppy_recv();
     floppy_recv();
     floppy_recv();
     cyl = floppy_recv();
     head = floppy_recv();
     floppy_recv();
+    g_assert(get_irq(FLOPPY_IRQ));
     floppy_recv();
+    g_assert(!get_irq(FLOPPY_IRQ));
 
     g_assert_cmpint(cyl, ==, 8);
     g_assert_cmpint(head, ==, 1);