Fix VM state change handlers running out of order
When a VM state change handler changes VM state, other VM state change
handlers can see the state transitions out of order.
bmdma_map(), scsi_disk_init() and virtio_blk_init() install VM state
change handlers to restart DMA. These handlers can vm_stop() by
running into a write error on a drive with werror=stop. This throws
the VM state change handler callback into disarray. Here's an example
case I observed:
0. The virtual IDE drive goes south. All future writes return errors.
1. Something encounters a write error, and duly stops the VM with
vm_stop().
2. vm_stop() calls vm_state_notify(0).
3. vm_state_notify() runs the callbacks in list vm_change_state_head.
It contains ide_dma_restart_cb() installed by bmdma_map(). It also
contains audio_vm_change_state_handler() installed by audio_init().
4. audio_vm_change_state_handler() stops audio stuff.
5. User continues VM with monitor command "c". This runs vm_start().
6. vm_start() calls vm_state_notify(1).
7. vm_state_notify() runs the callbacks in vm_change_state_head.
8. ide_dma_restart_cb() happens to come first. It does its work, runs
into a write error, and duly stops the VM with vm_stop().
9. vm_stop() runs vm_state_notify(0).
10. vm_state_notify() runs the callbacks in vm_change_state_head.
11. audio_vm_change_state_handler() stops audio stuff. Which isn't
running.
12. vm_stop() finishes, ide_dma_restart_cb() finishes, step 7's
vm_state_notify() resumes running handlers.
13. audio_vm_change_state_handler() starts audio stuff. Oopsie.
Fix this by moving the actual write from each VM state change handler
into a new bottom half (suggested by Gleb Natapov).
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
diff --git a/hw/ide.c b/hw/ide.c
index 1e56786..2eea438 100644
--- a/hw/ide.c
+++ b/hw/ide.c
@@ -501,6 +501,7 @@
QEMUIOVector qiov;
int64_t sector_num;
uint32_t nsector;
+ QEMUBH *bh;
} BMDMAState;
typedef struct PCIIDEState {
@@ -1109,11 +1110,13 @@
}
}
-static void ide_dma_restart_cb(void *opaque, int running, int reason)
+static void ide_dma_restart_bh(void *opaque)
{
BMDMAState *bm = opaque;
- if (!running)
- return;
+
+ qemu_bh_delete(bm->bh);
+ bm->bh = NULL;
+
if (bm->status & BM_STATUS_DMA_RETRY) {
bm->status &= ~BM_STATUS_DMA_RETRY;
ide_dma_restart(bm->ide_if);
@@ -1123,6 +1126,19 @@
}
}
+static void ide_dma_restart_cb(void *opaque, int running, int reason)
+{
+ BMDMAState *bm = opaque;
+
+ if (!running)
+ return;
+
+ if (!bm->bh) {
+ bm->bh = qemu_bh_new(ide_dma_restart_bh, bm);
+ qemu_bh_schedule(bm->bh);
+ }
+}
+
static void ide_write_dma_cb(void *opaque, int ret)
{
BMDMAState *bm = opaque;
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 8b6426f..5b825c9 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -74,6 +74,7 @@
scsi_completionfn completion;
void *opaque;
char drive_serial_str[21];
+ QEMUBH *bh;
};
/* Global pool of SCSIRequest structures. */
@@ -308,12 +309,13 @@
return 0;
}
-static void scsi_dma_restart_cb(void *opaque, int running, int reason)
+static void scsi_dma_restart_bh(void *opaque)
{
SCSIDeviceState *s = opaque;
SCSIRequest *r = s->requests;
- if (!running)
- return;
+
+ qemu_bh_delete(s->bh);
+ s->bh = NULL;
while (r) {
if (r->status & SCSI_REQ_STATUS_RETRY) {
@@ -324,6 +326,19 @@
}
}
+static void scsi_dma_restart_cb(void *opaque, int running, int reason)
+{
+ SCSIDeviceState *s = opaque;
+
+ if (!running)
+ return;
+
+ if (!s->bh) {
+ s->bh = qemu_bh_new(scsi_dma_restart_bh, s);
+ qemu_bh_schedule(s->bh);
+ }
+}
+
/* Return a pointer to the data buffer. */
static uint8_t *scsi_get_buf(SCSIDevice *d, uint32_t tag)
{
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index 2beff52..5036b5b 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -26,6 +26,7 @@
VirtQueue *vq;
void *rq;
char serial_str[BLOCK_SERIAL_STRLEN + 1];
+ QEMUBH *bh;
} VirtIOBlock;
static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
@@ -302,13 +303,13 @@
*/
}
-static void virtio_blk_dma_restart_cb(void *opaque, int running, int reason)
+static void virtio_blk_dma_restart_bh(void *opaque)
{
VirtIOBlock *s = opaque;
VirtIOBlockReq *req = s->rq;
- if (!running)
- return;
+ qemu_bh_delete(s->bh);
+ s->bh = NULL;
s->rq = NULL;
@@ -318,6 +319,19 @@
}
}
+static void virtio_blk_dma_restart_cb(void *opaque, int running, int reason)
+{
+ VirtIOBlock *s = opaque;
+
+ if (!running)
+ return;
+
+ if (!s->bh) {
+ s->bh = qemu_bh_new(virtio_blk_dma_restart_bh, s);
+ qemu_bh_schedule(s->bh);
+ }
+}
+
static void virtio_blk_reset(VirtIODevice *vdev)
{
/*