block/iscsi: handle BUSY condition
this patch adds handling of BUSY status reponse from an iSCSI target.
Currently, we fail with -EIO in case of SCSI_STATUS_BUSY while the
obvious reaction would be to retry the operation after some time.
The retry time is randomly choosen from a range with exponential
growth increasing with each retry.
This patch includes most of the changes by a an upcoming patch
from Stefan Hajnoczi:
iscsi: implement .bdrv_detach/attach_aio_context()
because I also need the reference to the aio_context for
the retry timer to work. I included the changes to maintain
better mergeability.
Signed-off-by: Peter Lieven <pl@kamp.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
diff --git a/block/iscsi.c b/block/iscsi.c
index 6f87605..e64659f 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -26,6 +26,7 @@
#include "config-host.h"
#include <poll.h>
+#include <math.h>
#include <arpa/inet.h>
#include "qemu-common.h"
#include "qemu/config-file.h"
@@ -75,6 +76,7 @@
Coroutine *co;
QEMUBH *bh;
IscsiLun *iscsilun;
+ QEMUTimer retry_timer;
} IscsiTask;
typedef struct IscsiAIOCB {
@@ -86,7 +88,6 @@
uint8_t *buf;
int status;
int canceled;
- int retries;
int64_t sector_num;
int nb_sectors;
#ifdef __linux__
@@ -96,7 +97,8 @@
#define NOP_INTERVAL 5000
#define MAX_NOP_FAILURES 3
-#define ISCSI_CMD_RETRIES 5
+#define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
+static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048};
/* this threshhold is a trade-off knob to choose between
* the potential additional overhead of an extra GET_LBA_STATUS request
@@ -146,6 +148,19 @@
qemu_coroutine_enter(iTask->co, NULL);
}
+static void iscsi_retry_timer_expired(void *opaque)
+{
+ struct IscsiTask *iTask = opaque;
+ if (iTask->co) {
+ qemu_coroutine_enter(iTask->co, NULL);
+ }
+}
+
+static inline unsigned exp_random(double mean)
+{
+ return -mean * log((double)rand() / RAND_MAX);
+}
+
static void
iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
void *command_data, void *opaque)
@@ -158,14 +173,30 @@
iTask->do_retry = 0;
iTask->task = task;
- if (iTask->retries-- > 0 && status == SCSI_STATUS_CHECK_CONDITION
- && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
- error_report("iSCSI CheckCondition: %s", iscsi_get_error(iscsi));
- iTask->do_retry = 1;
- goto out;
- }
-
if (status != SCSI_STATUS_GOOD) {
+ if (iTask->retries++ < ISCSI_CMD_RETRIES) {
+ if (status == SCSI_STATUS_CHECK_CONDITION
+ && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
+ error_report("iSCSI CheckCondition: %s",
+ iscsi_get_error(iscsi));
+ iTask->do_retry = 1;
+ goto out;
+ }
+ if (status == SCSI_STATUS_BUSY) {
+ unsigned retry_time =
+ exp_random(iscsi_retry_times[iTask->retries - 1]);
+ error_report("iSCSI Busy (retry #%u in %u ms): %s",
+ iTask->retries, retry_time,
+ iscsi_get_error(iscsi));
+ aio_timer_init(iTask->iscsilun->aio_context,
+ &iTask->retry_timer, QEMU_CLOCK_REALTIME,
+ SCALE_MS, iscsi_retry_timer_expired, iTask);
+ timer_mod(&iTask->retry_timer,
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
+ iTask->do_retry = 1;
+ return;
+ }
+ }
error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
}
@@ -180,9 +211,9 @@
static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
{
*iTask = (struct IscsiTask) {
- .co = qemu_coroutine_self(),
- .retries = ISCSI_CMD_RETRIES,
- .iscsilun = iscsilun,
+ .co = qemu_coroutine_self(),
+ .retries = ISCSI_CMD_RETRIES,
+ .iscsilun = iscsilun,
};
}