Fix a race condition and non-leaf imagesgrowing in VMDK chains, by Igor
Lvovsky.
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@2987 c046a42c-6fe2-441c-8c8c-71466251a162
diff --git a/block-vmdk.c b/block-vmdk.c
index 28df8ae..e595eab 100644
--- a/block-vmdk.c
+++ b/block-vmdk.c
@@ -75,8 +75,25 @@
unsigned int cluster_sectors;
uint32_t parent_cid;
+ int is_parent;
} BDRVVmdkState;
+typedef struct VmdkMetaData {
+ uint32_t offset;
+ unsigned int l1_index;
+ unsigned int l2_index;
+ unsigned int l2_offset;
+ int valid;
+} VmdkMetaData;
+
+typedef struct ActiveBDRVState{
+ BlockDriverState *hd; // active image handler
+ uint64_t cluster_offset; // current write offset
+}ActiveBDRVState;
+
+static ActiveBDRVState activeBDRV;
+
+
static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
{
uint32_t magic;
@@ -305,7 +322,7 @@
bdrv_close(bs->backing_hd);
}
-
+int parent_open = 0;
static int vmdk_parent_open(BlockDriverState *bs, const char * filename)
{
BDRVVmdkState *s = bs->opaque;
@@ -339,8 +356,10 @@
bdrv_close(s->hd);
return -1;
}
- if (bdrv_open(s->hd->backing_hd, parent_img_name, 0) < 0)
+ parent_open = 1;
+ if (bdrv_open(s->hd->backing_hd, parent_img_name, BDRV_O_RDONLY) < 0)
goto failure;
+ parent_open = 0;
}
return 0;
@@ -352,6 +371,11 @@
uint32_t magic;
int l1_size, i, ret;
+ if (parent_open)
+ // Parent must be opened as RO.
+ flags = BDRV_O_RDONLY;
+ fprintf(stderr, "(VMDK) image open: flags=0x%x filename=%s\n", flags, bs->filename);
+
ret = bdrv_file_open(&s->hd, filename, flags);
if (ret < 0)
return ret;
@@ -387,6 +411,11 @@
s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
+ if (parent_open)
+ s->is_parent = 1;
+ else
+ s->is_parent = 0;
+
// try to open parent images, if exist
if (vmdk_parent_open(bs, filename) != 0)
goto fail;
@@ -430,7 +459,8 @@
return -1;
}
-static uint64_t get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate);
+static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
+ uint64_t offset, int allocate);
static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
uint64_t offset, int allocate)
@@ -446,27 +476,55 @@
if (!vmdk_is_cid_valid(bs))
return -1;
- parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, offset, allocate);
- if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) !=
- ps->cluster_sectors*512)
- return -1;
- if (bdrv_pwrite(s->hd, cluster_offset << 9, whole_grain, sizeof(whole_grain)) !=
- sizeof(whole_grain))
- return -1;
+ parent_cluster_offset = get_cluster_offset(s->hd->backing_hd, NULL, offset, allocate);
+
+ if (parent_cluster_offset) {
+ BDRVVmdkState *act_s = activeBDRV.hd->opaque;
+
+ if (bdrv_pread(ps->hd, parent_cluster_offset, whole_grain, ps->cluster_sectors*512) != ps->cluster_sectors*512)
+ return -1;
+
+ //Write grain only into the active image
+ if (bdrv_pwrite(act_s->hd, activeBDRV.cluster_offset << 9, whole_grain, sizeof(whole_grain)) != sizeof(whole_grain))
+ return -1;
+ }
}
return 0;
}
-static uint64_t get_cluster_offset(BlockDriverState *bs,
+static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data)
+{
+ BDRVVmdkState *s = bs->opaque;
+
+ /* update L2 table */
+ if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
+ &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset))
+ return -1;
+ /* update backup L2 table */
+ if (s->l1_backup_table_offset != 0) {
+ m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
+ if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
+ &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset))
+ return -1;
+ }
+
+ return 0;
+}
+
+static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
uint64_t offset, int allocate)
{
BDRVVmdkState *s = bs->opaque;
unsigned int l1_index, l2_offset, l2_index;
int min_index, i, j;
- uint32_t min_count, *l2_table, tmp;
+ uint32_t min_count, *l2_table, tmp = 0;
uint64_t cluster_offset;
-
+ int status;
+
+ if (m_data)
+ m_data->valid = 0;
+
l1_index = (offset >> 9) / s->l1_entry_sectors;
if (l1_index >= s->l1_size)
return 0;
@@ -504,32 +562,45 @@
found:
l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
cluster_offset = le32_to_cpu(l2_table[l2_index]);
+
if (!cluster_offset) {
struct stat file_buf;
if (!allocate)
return 0;
- stat(s->hd->filename, &file_buf);
- cluster_offset = file_buf.st_size;
- bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9));
-
- cluster_offset >>= 9;
- /* update L2 table */
- tmp = cpu_to_le32(cluster_offset);
- l2_table[l2_index] = tmp;
- if (bdrv_pwrite(s->hd, ((int64_t)l2_offset * 512) + (l2_index * sizeof(tmp)),
- &tmp, sizeof(tmp)) != sizeof(tmp))
- return 0;
- /* update backup L2 table */
- if (s->l1_backup_table_offset != 0) {
- l2_offset = s->l1_backup_table[l1_index];
- if (bdrv_pwrite(s->hd, ((int64_t)l2_offset * 512) + (l2_index * sizeof(tmp)),
- &tmp, sizeof(tmp)) != sizeof(tmp))
+ // Avoid the L2 tables update for the images that have snapshots.
+ if (!s->is_parent) {
+ status = stat(s->hd->filename, &file_buf);
+ if (status == -1) {
+ fprintf(stderr, "(VMDK) Fail file stat: filename =%s size=0x%lx errno=%s\n",
+ s->hd->filename, (uint64_t)file_buf.st_size, strerror(errno));
return 0;
- }
+ }
+ cluster_offset = file_buf.st_size;
+ bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9));
+ cluster_offset >>= 9;
+ tmp = cpu_to_le32(cluster_offset);
+ l2_table[l2_index] = tmp;
+ // Save the active image state
+ activeBDRV.cluster_offset = cluster_offset;
+ activeBDRV.hd = bs;
+ }
+ /* First of all we write grain itself, to avoid race condition
+ * that may to corrupt the image.
+ * This problem may occur because of insufficient space on host disk
+ * or inappropriate VM shutdown.
+ */
if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
return 0;
+
+ if (m_data) {
+ m_data->offset = tmp;
+ m_data->l1_index = l1_index;
+ m_data->l2_index = l2_index;
+ m_data->l2_offset = l2_offset;
+ m_data->valid = 1;
+ }
}
cluster_offset <<= 9;
return cluster_offset;
@@ -542,7 +613,7 @@
int index_in_cluster, n;
uint64_t cluster_offset;
- cluster_offset = get_cluster_offset(bs, sector_num << 9, 0);
+ cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
index_in_cluster = sector_num % s->cluster_sectors;
n = s->cluster_sectors - index_in_cluster;
if (n > nb_sectors)
@@ -559,7 +630,7 @@
uint64_t cluster_offset;
while (nb_sectors > 0) {
- cluster_offset = get_cluster_offset(bs, sector_num << 9, 0);
+ cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
index_in_cluster = sector_num % s->cluster_sectors;
n = s->cluster_sectors - index_in_cluster;
if (n > nb_sectors)
@@ -590,20 +661,34 @@
const uint8_t *buf, int nb_sectors)
{
BDRVVmdkState *s = bs->opaque;
+ VmdkMetaData m_data;
int index_in_cluster, n;
uint64_t cluster_offset;
static int cid_update = 0;
+ if (sector_num > bs->total_sectors) {
+ fprintf(stderr,
+ "(VMDK) Wrong offset: sector_num=0x%lx total_sectors=0x%lx\n",
+ sector_num, bs->total_sectors);
+ return -1;
+ }
+
while (nb_sectors > 0) {
index_in_cluster = sector_num & (s->cluster_sectors - 1);
n = s->cluster_sectors - index_in_cluster;
if (n > nb_sectors)
n = nb_sectors;
- cluster_offset = get_cluster_offset(bs, sector_num << 9, 1);
+ cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1);
if (!cluster_offset)
return -1;
+
if (bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
return -1;
+ if (m_data.valid) {
+ /* update L2 tables */
+ if (vmdk_L2update(bs, &m_data) == -1)
+ return -1;
+ }
nb_sectors -= n;
sector_num += n;
buf += n * 512;