Merge tag 'ide-pull-request' of https://gitlab.com/jsnow/qemu into staging
IDE Pull request
# -----BEGIN PGP SIGNATURE-----
#
# iQIzBAABCAAdFiEE+ber27ys35W+dsvQfe+BBqr8OQ4FAmT5RpYACgkQfe+BBqr8
# OQ7GuA//S/gyyqsnltz4W9D0liaan1a2YsSx7Q2gcKdotdmFwgEHWWuVKorCteQt
# 1AtkFiA1bawF9ZSRQIpQzMNDOkSJHOs/0HXhdbNRs6JZ6C+c/aLnNSpxIfFpkP3I
# Wcrmi98F8zHlRc+KGqvZFHW+woqWJxTvglG4OmpMhMWCZRuqADeaxWaywgSXxlK+
# MtmpsslPeTxHdwa6ijXCJd2ghP59z391Ulo4kZ7YOMou/YLEd/AnezBDtepDGnbb
# TnyDcvGf+Dp5nJ4Rcp22frZdcxb44+wt2QlQFDp+h6r7KzIEwGIK2LL37sN8VHwU
# B8GbYkjoPnau2cOaLgmpC1reWkdwaiXfaI+1B/35/jg6hwYHFe6F03+JstMWXHXt
# ++Wy4MKDx5wRt7cmOu6htS776UC15NMcZB0AzxQuE5mL+eSNp1n5Nw5UW2iD/USL
# LD2dlMO05acdqn2iXoMTX/K1cUo1wRkEns7PISk+F2ve0PTS1RJUvuiNXs+aDrt9
# +AfE/e025YMQY8CWLiaihfNH7/QY8vS874SrcDr5rtfhitu16nqq5JpjnyzkqgbR
# PE+5JWT3QGBOcDMQeNUDfxFlcCVDm3ffIKo/7/PDCfeKQsJkG/nVGF7OmlAVmoUD
# GsvIlKBegIQvpp8LRabzfeTfbj7NGKFwaShQ6wcqxOakjy+iKx8=
# =ZRVt
# -----END PGP SIGNATURE-----
# gpg: Signature made Wed 06 Sep 2023 23:42:14 EDT
# gpg: using RSA key F9B7ABDBBCACDF95BE76CBD07DEF8106AAFC390E
# gpg: Good signature from "John Snow (John Huston) <jsnow@redhat.com>" [unknown]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: FAEB 9711 A12C F475 812F 18F2 88A9 064D 1835 61EB
# Subkey fingerprint: F9B7 ABDB BCAC DF95 BE76 CBD0 7DEF 8106 AAFC 390E
* tag 'ide-pull-request' of https://gitlab.com/jsnow/qemu:
hw/ide/ahci: fix broken SError handling
hw/ide/ahci: fix ahci_write_fis_sdb()
hw/ide/ahci: PxCI should not get cleared when ERR_STAT is set
hw/ide/ahci: PxSACT and PxCI is cleared when PxCMD.ST is cleared
hw/ide/ahci: simplify and document PxCI handling
hw/ide/ahci: write D2H FIS when processing NCQ command
hw/ide/core: set ERR_STAT in unsupported command completion
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
diff --git a/MAINTAINERS b/MAINTAINERS
index 3b29568..b471973 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3693,6 +3693,7 @@
F: block/parallels.c
F: block/parallels-ext.c
F: docs/interop/parallels.txt
+T: git https://src.openvz.org/scm/~den/qemu.git parallels
qed
M: Stefan Hajnoczi <stefanha@redhat.com>
diff --git a/block/parallels.c b/block/parallels.c
index 18e34ae..48c32d6 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -136,6 +136,12 @@
return MIN(nb_sectors, ret);
}
+static uint32_t host_cluster_index(BDRVParallelsState *s, int64_t off)
+{
+ off -= s->data_start << BDRV_SECTOR_BITS;
+ return off / s->cluster_size;
+}
+
static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
int nb_sectors, int *pnum)
{
@@ -188,7 +194,8 @@
idx = sector_num / s->tracks;
to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx;
- /* This function is called only by parallels_co_writev(), which will never
+ /*
+ * This function is called only by parallels_co_writev(), which will never
* pass a sector_num at or beyond the end of the image (because the block
* layer never passes such a sector_num to that function). Therefore, idx
* is always below s->bat_size.
@@ -196,7 +203,8 @@
* exceed the image end. Therefore, idx + to_allocate cannot exceed
* s->bat_size.
* Note that s->bat_size is an unsigned int, therefore idx + to_allocate
- * will always fit into a uint32_t. */
+ * will always fit into a uint32_t.
+ */
assert(idx < s->bat_size && idx + to_allocate <= s->bat_size);
space = to_allocate * s->tracks;
@@ -230,13 +238,15 @@
}
}
- /* Try to read from backing to fill empty clusters
+ /*
+ * Try to read from backing to fill empty clusters
* FIXME: 1. previous write_zeroes may be redundant
* 2. most of data we read from backing will be rewritten by
* parallels_co_writev. On aligned-to-cluster write we do not need
* this read at all.
* 3. it would be good to combine write of data from backing and new
- * data into one write call */
+ * data into one write call.
+ */
if (bs->backing) {
int64_t nb_cow_sectors = to_allocate * s->tracks;
int64_t nb_cow_bytes = nb_cow_sectors << BDRV_SECTOR_BITS;
@@ -440,6 +450,81 @@
}
}
+/*
+ * Returns true if data_off is correct, otherwise false. In both cases
+ * correct_offset is set to the proper value.
+ */
+static bool parallels_test_data_off(BDRVParallelsState *s,
+ int64_t file_nb_sectors,
+ uint32_t *correct_offset)
+{
+ uint32_t data_off, min_off;
+ bool old_magic;
+
+ /*
+ * There are two slightly different image formats: with "WithoutFreeSpace"
+ * or "WithouFreSpacExt" magic words. Call the first one as "old magic".
+ * In such images data_off field can be zero. In this case the offset is
+ * calculated as the end of BAT table plus some padding to ensure sector
+ * size alignment.
+ */
+ old_magic = !memcmp(s->header->magic, HEADER_MAGIC, 16);
+
+ min_off = DIV_ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE);
+ if (!old_magic) {
+ min_off = ROUND_UP(min_off, s->cluster_size / BDRV_SECTOR_SIZE);
+ }
+
+ if (correct_offset) {
+ *correct_offset = min_off;
+ }
+
+ data_off = le32_to_cpu(s->header->data_off);
+ if (data_off == 0 && old_magic) {
+ return true;
+ }
+
+ if (data_off < min_off || data_off > file_nb_sectors) {
+ return false;
+ }
+
+ if (correct_offset) {
+ *correct_offset = data_off;
+ }
+
+ return true;
+}
+
+static int coroutine_fn GRAPH_RDLOCK
+parallels_check_data_off(BlockDriverState *bs, BdrvCheckResult *res,
+ BdrvCheckMode fix)
+{
+ BDRVParallelsState *s = bs->opaque;
+ int64_t file_size;
+ uint32_t data_off;
+
+ file_size = bdrv_co_nb_sectors(bs->file->bs);
+ if (file_size < 0) {
+ res->check_errors++;
+ return file_size;
+ }
+
+ if (parallels_test_data_off(s, file_size, &data_off)) {
+ return 0;
+ }
+
+ res->corruptions++;
+ if (fix & BDRV_FIX_ERRORS) {
+ s->header->data_off = cpu_to_le32(data_off);
+ res->corruptions_fixed++;
+ }
+
+ fprintf(stderr, "%s data_off field has incorrect value\n",
+ fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
+
+ return 0;
+}
+
static int coroutine_fn GRAPH_RDLOCK
parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix)
@@ -484,13 +569,13 @@
static int coroutine_fn GRAPH_RDLOCK
parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res,
- BdrvCheckMode fix)
+ BdrvCheckMode fix, bool explicit)
{
BDRVParallelsState *s = bs->opaque;
int64_t size;
int ret;
- size = bdrv_getlength(bs->file->bs);
+ size = bdrv_co_getlength(bs->file->bs);
if (size < 0) {
res->check_errors++;
return size;
@@ -499,10 +584,13 @@
if (size > res->image_end_offset) {
int64_t count;
count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size);
- fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n",
- fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
- size - res->image_end_offset);
- res->leaks += count;
+ if (explicit) {
+ fprintf(stderr,
+ "%s space leaked at the end of the image %" PRId64 "\n",
+ fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
+ size - res->image_end_offset);
+ res->leaks += count;
+ }
if (fix & BDRV_FIX_LEAKS) {
Error *local_err = NULL;
@@ -517,13 +605,148 @@
res->check_errors++;
return ret;
}
- res->leaks_fixed += count;
+ if (explicit) {
+ res->leaks_fixed += count;
+ }
}
}
return 0;
}
+static int coroutine_fn GRAPH_RDLOCK
+parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res,
+ BdrvCheckMode fix)
+{
+ BDRVParallelsState *s = bs->opaque;
+ int64_t host_off, host_sector, guest_sector;
+ unsigned long *bitmap;
+ uint32_t i, bitmap_size, cluster_index, bat_entry;
+ int n, ret = 0;
+ uint64_t *buf = NULL;
+ bool fixed = false;
+
+ /*
+ * Create a bitmap of used clusters.
+ * If a bit is set, there is a BAT entry pointing to this cluster.
+ * Loop through the BAT entries, check bits relevant to an entry offset.
+ * If bit is set, this entry is duplicated. Otherwise set the bit.
+ *
+ * We shouldn't worry about newly allocated clusters outside the image
+ * because they are created higher then any existing cluster pointed by
+ * a BAT entry.
+ */
+ bitmap_size = host_cluster_index(s, res->image_end_offset);
+ if (bitmap_size == 0) {
+ return 0;
+ }
+ if (res->image_end_offset % s->cluster_size) {
+ /* A not aligned image end leads to a bitmap shorter by 1 */
+ bitmap_size++;
+ }
+
+ bitmap = bitmap_new(bitmap_size);
+
+ buf = qemu_blockalign(bs, s->cluster_size);
+
+ for (i = 0; i < s->bat_size; i++) {
+ host_off = bat2sect(s, i) << BDRV_SECTOR_BITS;
+ if (host_off == 0) {
+ continue;
+ }
+
+ cluster_index = host_cluster_index(s, host_off);
+ assert(cluster_index < bitmap_size);
+ if (!test_bit(cluster_index, bitmap)) {
+ bitmap_set(bitmap, cluster_index, 1);
+ continue;
+ }
+
+ /* this cluster duplicates another one */
+ fprintf(stderr, "%s duplicate offset in BAT entry %u\n",
+ fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
+
+ res->corruptions++;
+
+ if (!(fix & BDRV_FIX_ERRORS)) {
+ continue;
+ }
+
+ /*
+ * Reset the entry and allocate a new cluster
+ * for the relevant guest offset. In this way we let
+ * the lower layer to place the new cluster properly.
+ * Copy the original cluster to the allocated one.
+ * But before save the old offset value for repairing
+ * if we have an error.
+ */
+ bat_entry = s->bat_bitmap[i];
+ parallels_set_bat_entry(s, i, 0);
+
+ ret = bdrv_co_pread(bs->file, host_off, s->cluster_size, buf, 0);
+ if (ret < 0) {
+ res->check_errors++;
+ goto out_repair_bat;
+ }
+
+ guest_sector = (i * (int64_t)s->cluster_size) >> BDRV_SECTOR_BITS;
+ host_sector = allocate_clusters(bs, guest_sector, s->tracks, &n);
+ if (host_sector < 0) {
+ res->check_errors++;
+ goto out_repair_bat;
+ }
+ host_off = host_sector << BDRV_SECTOR_BITS;
+
+ ret = bdrv_co_pwrite(bs->file, host_off, s->cluster_size, buf, 0);
+ if (ret < 0) {
+ res->check_errors++;
+ goto out_repair_bat;
+ }
+
+ if (host_off + s->cluster_size > res->image_end_offset) {
+ res->image_end_offset = host_off + s->cluster_size;
+ }
+
+ /*
+ * In the future allocate_cluster() will reuse holed offsets
+ * inside the image. Keep the used clusters bitmap content
+ * consistent for the new allocated clusters too.
+ *
+ * Note, clusters allocated outside the current image are not
+ * considered, and the bitmap size doesn't change.
+ */
+ cluster_index = host_cluster_index(s, host_off);
+ if (cluster_index < bitmap_size) {
+ bitmap_set(bitmap, cluster_index, 1);
+ }
+
+ fixed = true;
+ res->corruptions_fixed++;
+
+ }
+
+ if (fixed) {
+ /*
+ * When new clusters are allocated, the file size increases by
+ * 128 Mb. We need to truncate the file to the right size. Let
+ * the leak fix code make its job without res changing.
+ */
+ ret = parallels_check_leak(bs, res, fix, false);
+ }
+
+out_free:
+ g_free(buf);
+ g_free(bitmap);
+ return ret;
+/*
+ * We can get here only from places where index and old_offset have
+ * meaningful values.
+ */
+out_repair_bat:
+ s->bat_bitmap[i] = bat_entry;
+ goto out_free;
+}
+
static void parallels_collect_statistics(BlockDriverState *bs,
BdrvCheckResult *res,
BdrvCheckMode fix)
@@ -565,12 +788,22 @@
WITH_QEMU_LOCK_GUARD(&s->lock) {
parallels_check_unclean(bs, res, fix);
+ ret = parallels_check_data_off(bs, res, fix);
+ if (ret < 0) {
+ return ret;
+ }
+
ret = parallels_check_outside_image(bs, res, fix);
if (ret < 0) {
return ret;
}
- ret = parallels_check_leak(bs, res, fix);
+ ret = parallels_check_leak(bs, res, fix, true);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = parallels_check_duplicate(bs, res, fix);
if (ret < 0) {
return ret;
}
@@ -798,10 +1031,12 @@
BDRVParallelsState *s = bs->opaque;
ParallelsHeader ph;
int ret, size, i;
- int64_t file_nb_sectors;
+ int64_t file_nb_sectors, sector;
+ uint32_t data_start;
QemuOpts *opts = NULL;
Error *local_err = NULL;
char *buf;
+ bool data_off_is_correct;
ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
if (ret < 0) {
@@ -859,15 +1094,6 @@
ret = -ENOMEM;
goto fail;
}
- s->data_end = le32_to_cpu(ph.data_off);
- if (s->data_end == 0) {
- s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE);
- }
- if (s->data_end < s->header_size) {
- /* there is not enough unused space to fit to block align between BAT
- and actual data. We can't avoid read-modify-write... */
- s->header_size = size;
- }
ret = bdrv_pread(bs->file, 0, s->header_size, s->header, 0);
if (ret < 0) {
@@ -875,33 +1101,20 @@
}
s->bat_bitmap = (uint32_t *)(s->header + 1);
- for (i = 0; i < s->bat_size; i++) {
- int64_t off = bat2sect(s, i);
- if (off >= file_nb_sectors) {
- if (flags & BDRV_O_CHECK) {
- continue;
- }
- error_setg(errp, "parallels: Offset %" PRIi64 " in BAT[%d] entry "
- "is larger than file size (%" PRIi64 ")",
- off << BDRV_SECTOR_BITS, i,
- file_nb_sectors << BDRV_SECTOR_BITS);
- ret = -EINVAL;
- goto fail;
- }
- if (off >= s->data_end) {
- s->data_end = off + s->tracks;
- }
+ if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) {
+ s->header_unclean = true;
}
- if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) {
- /* Image was not closed correctly. The check is mandatory */
- s->header_unclean = true;
- if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
- error_setg(errp, "parallels: Image was not closed correctly; "
- "cannot be opened read/write");
- ret = -EACCES;
- goto fail;
- }
+ data_off_is_correct = parallels_test_data_off(s, file_nb_sectors,
+ &data_start);
+ s->data_start = data_start;
+ s->data_end = s->data_start;
+ if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) {
+ /*
+ * There is not enough unused space to fit to block align between BAT
+ * and actual data. We can't avoid read-modify-write...
+ */
+ s->header_size = size;
}
opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, errp);
@@ -962,10 +1175,41 @@
bdrv_get_device_or_node_name(bs));
ret = migrate_add_blocker(s->migration_blocker, errp);
if (ret < 0) {
- error_free(s->migration_blocker);
+ error_setg(errp, "Migration blocker error");
goto fail;
}
qemu_co_mutex_init(&s->lock);
+
+ for (i = 0; i < s->bat_size; i++) {
+ sector = bat2sect(s, i);
+ if (sector + s->tracks > s->data_end) {
+ s->data_end = sector + s->tracks;
+ }
+ }
+
+ /*
+ * We don't repair the image here if it's opened for checks. Also we don't
+ * want to change inactive images and can't change readonly images.
+ */
+ if ((flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) || !(flags & BDRV_O_RDWR)) {
+ return 0;
+ }
+
+ /*
+ * Repair the image if it's dirty or
+ * out-of-image corruption was detected.
+ */
+ if (s->data_end > file_nb_sectors || s->header_unclean
+ || !data_off_is_correct) {
+ BdrvCheckResult res;
+ ret = bdrv_check(bs, &res, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Could not repair corrupted image");
+ migrate_del_blocker(s->migration_blocker);
+ goto fail;
+ }
+ }
+
return 0;
fail_format:
@@ -973,6 +1217,12 @@
fail_options:
ret = -EINVAL;
fail:
+ /*
+ * "s" object was allocated by g_malloc0 so we can safely
+ * try to free its fields even they were not allocated.
+ */
+ error_free(s->migration_blocker);
+ g_free(s->bat_dirty_bmap);
qemu_vfree(s->header);
return ret;
}
diff --git a/block/parallels.h b/block/parallels.h
index f22f43f..4e53e95 100644
--- a/block/parallels.h
+++ b/block/parallels.h
@@ -75,6 +75,7 @@
uint32_t *bat_bitmap;
unsigned int bat_size;
+ int64_t data_start;
int64_t data_end;
uint64_t prealloc_size;
ParallelsPreallocMode prealloc_mode;
diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c
index e536b3e..9b10e90 100644
--- a/hw/intc/pnv_xive.c
+++ b/hw/intc/pnv_xive.c
@@ -85,28 +85,6 @@
}
/*
- * Remote access to controllers. HW uses MMIOs. For now, a simple scan
- * of the chips is good enough.
- *
- * TODO: Block scope support
- */
-static PnvXive *pnv_xive_get_remote(uint8_t blk)
-{
- PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
- int i;
-
- for (i = 0; i < pnv->num_chips; i++) {
- Pnv9Chip *chip9 = PNV9_CHIP(pnv->chips[i]);
- PnvXive *xive = &chip9->xive;
-
- if (pnv_xive_block_id(xive) == blk) {
- return xive;
- }
- }
- return NULL;
-}
-
-/*
* VST accessors for SBE, EAT, ENDT, NVT
*
* Indirect VST tables are arrays of VSDs pointing to a page (of same
@@ -209,6 +187,42 @@
return pnv_xive_vst_addr_direct(xive, type, vsd, (idx % vst_per_page));
}
+/*
+ * This is a simplified model of operation forwarding on a remote IC.
+ *
+ * A PC MMIO address is built to identify the NVT structure. The load
+ * on the remote IC will return the address of the structure in RAM,
+ * which will then be used by pnv_xive_vst_write/read to perform the
+ * RAM operation.
+ */
+static uint64_t pnv_xive_vst_addr_remote(PnvXive *xive, uint32_t type,
+ uint64_t vsd, uint8_t blk,
+ uint32_t idx)
+{
+ const XiveVstInfo *info = &vst_infos[type];
+ uint64_t remote_addr = vsd & VSD_ADDRESS_MASK;
+ uint64_t vst_addr;
+ MemTxResult result;
+
+ if (type != VST_TSEL_VPDT) {
+ xive_error(xive, "VST: invalid access on remote VST %s %x/%x !?",
+ info->name, blk, idx);
+ return 0;
+ }
+
+ remote_addr |= idx << xive->pc_shift;
+
+ vst_addr = address_space_ldq_be(&address_space_memory, remote_addr,
+ MEMTXATTRS_UNSPECIFIED, &result);
+ if (result != MEMTX_OK) {
+ xive_error(xive, "VST: read failed at @0x%" HWADDR_PRIx
+ " for NVT %x/%x\n", remote_addr, blk, idx);
+ return 0;
+ }
+
+ return vst_addr;
+}
+
static uint64_t pnv_xive_vst_addr(PnvXive *xive, uint32_t type, uint8_t blk,
uint32_t idx)
{
@@ -225,9 +239,7 @@
/* Remote VST access */
if (GETFIELD(VSD_MODE, vsd) == VSD_MODE_FORWARD) {
- xive = pnv_xive_get_remote(blk);
-
- return xive ? pnv_xive_vst_addr(xive, type, blk, idx) : 0;
+ return pnv_xive_vst_addr_remote(xive, type, vsd, blk, idx);
}
if (VSD_INDIRECT & vsd) {
@@ -242,12 +254,20 @@
{
const XiveVstInfo *info = &vst_infos[type];
uint64_t addr = pnv_xive_vst_addr(xive, type, blk, idx);
+ MemTxResult result;
if (!addr) {
return -1;
}
- cpu_physical_memory_read(addr, data, info->size);
+ result = address_space_read(&address_space_memory, addr,
+ MEMTXATTRS_UNSPECIFIED, data,
+ info->size);
+ if (result != MEMTX_OK) {
+ xive_error(xive, "VST: read failed at @0x%" HWADDR_PRIx
+ " for VST %s %x/%x\n", addr, info->name, blk, idx);
+ return -1;
+ }
return 0;
}
@@ -258,16 +278,27 @@
{
const XiveVstInfo *info = &vst_infos[type];
uint64_t addr = pnv_xive_vst_addr(xive, type, blk, idx);
+ MemTxResult result;
if (!addr) {
return -1;
}
if (word_number == XIVE_VST_WORD_ALL) {
- cpu_physical_memory_write(addr, data, info->size);
+ result = address_space_write(&address_space_memory, addr,
+ MEMTXATTRS_UNSPECIFIED, data,
+ info->size);
} else {
- cpu_physical_memory_write(addr + word_number * 4,
- data + word_number * 4, 4);
+ result = address_space_write(&address_space_memory,
+ addr + word_number * 4,
+ MEMTXATTRS_UNSPECIFIED,
+ data + word_number * 4, 4);
+ }
+
+ if (result != MEMTX_OK) {
+ xive_error(xive, "VST: write failed at @0x%" HWADDR_PRIx
+ "for VST %s %x/%x\n", addr, info->name, blk, idx);
+ return -1;
}
return 0;
}
@@ -275,12 +306,26 @@
static int pnv_xive_get_end(XiveRouter *xrtr, uint8_t blk, uint32_t idx,
XiveEND *end)
{
+ PnvXive *xive = PNV_XIVE(xrtr);
+
+ if (pnv_xive_block_id(xive) != blk) {
+ xive_error(xive, "VST: END %x/%x is remote !?", blk, idx);
+ return -1;
+ }
+
return pnv_xive_vst_read(PNV_XIVE(xrtr), VST_TSEL_EQDT, blk, idx, end);
}
static int pnv_xive_write_end(XiveRouter *xrtr, uint8_t blk, uint32_t idx,
XiveEND *end, uint8_t word_number)
{
+ PnvXive *xive = PNV_XIVE(xrtr);
+
+ if (pnv_xive_block_id(xive) != blk) {
+ xive_error(xive, "VST: END %x/%x is remote !?", blk, idx);
+ return -1;
+ }
+
return pnv_xive_vst_write(PNV_XIVE(xrtr), VST_TSEL_EQDT, blk, idx, end,
word_number);
}
@@ -1349,6 +1394,50 @@
#define PNV_XIVE_SYNC_PUSH 0xf00 /* Sync push context */
#define PNV_XIVE_SYNC_VPC 0xf80 /* Sync remove VPC store */
+static void pnv_xive_end_notify(XiveRouter *xrtr, XiveEAS *eas)
+{
+ PnvXive *xive = PNV_XIVE(xrtr);
+ uint8_t end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
+ uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
+ uint32_t end_data = xive_get_field64(EAS_END_DATA, eas->w);
+ uint64_t end_vsd = xive->vsds[VST_TSEL_EQDT][end_blk];
+
+ switch (GETFIELD(VSD_MODE, end_vsd)) {
+ case VSD_MODE_EXCLUSIVE:
+ /* Perform the END notification on the local IC. */
+ xive_router_end_notify(xrtr, eas);
+ break;
+
+ case VSD_MODE_FORWARD: {
+ MemTxResult result;
+ uint64_t notif_port = end_vsd & VSD_ADDRESS_MASK;
+ uint64_t data = XIVE_TRIGGER_END | XIVE_TRIGGER_PQ |
+ be64_to_cpu(eas->w);
+
+ /* Forward the store on the remote IC notify page. */
+ address_space_stq_be(&address_space_memory, notif_port, data,
+ MEMTXATTRS_UNSPECIFIED, &result);
+ if (result != MEMTX_OK) {
+ xive_error(xive, "IC: Forward notif END %x/%x [%x] failed @%"
+ HWADDR_PRIx, end_blk, end_idx, end_data, notif_port);
+ return;
+ }
+ break;
+ }
+
+ case VSD_MODE_INVALID:
+ default:
+ /* Set FIR */
+ xive_error(xive, "IC: Invalid END VSD for block %x", end_blk);
+ return;
+ }
+}
+
+/*
+ * The notify page can either be used to receive trigger events from
+ * the HW controllers (PHB, PSI) or to reroute interrupts between
+ * Interrupt controllers.
+ */
static void pnv_xive_ic_hw_trigger(PnvXive *xive, hwaddr addr, uint64_t val)
{
uint8_t blk;
@@ -1357,8 +1446,8 @@
trace_pnv_xive_ic_hw_trigger(addr, val);
if (val & XIVE_TRIGGER_END) {
- xive_error(xive, "IC: END trigger at @0x%"HWADDR_PRIx" data 0x%"PRIx64,
- addr, val);
+ val = cpu_to_be64(val);
+ pnv_xive_end_notify(XIVE_ROUTER(xive), (XiveEAS *) &val);
return;
}
@@ -1703,16 +1792,20 @@
};
/*
- * Presenter Controller MMIO region. The Virtualization Controller
- * updates the IPB in the NVT table when required. Not modeled.
+ * Presenter Controller MMIO region. Points to the NVT sets.
+ *
+ * HW implements all possible mem ops to the underlying NVT structure
+ * but QEMU does not need to be so precise. The model implementation
+ * simply returns the RAM address of the NVT structure which is then
+ * used by pnv_xive_vst_write/read to perform the RAM operation.
*/
-static uint64_t pnv_xive_pc_read(void *opaque, hwaddr addr,
- unsigned size)
+static uint64_t pnv_xive_pc_read(void *opaque, hwaddr offset, unsigned size)
{
PnvXive *xive = PNV_XIVE(opaque);
+ uint32_t nvt_idx = offset >> xive->pc_shift;
+ uint8_t blk = pnv_xive_block_id(xive); /* TODO: VDT -> block xlate */
- xive_error(xive, "PC: invalid read @%"HWADDR_PRIx, addr);
- return -1;
+ return pnv_xive_vst_addr(xive, VST_TSEL_VPDT, blk, nvt_idx);
}
static void pnv_xive_pc_write(void *opaque, hwaddr addr,
@@ -1898,6 +1991,7 @@
memory_region_init_io(&xive->ic_notify_mmio, OBJECT(dev),
&pnv_xive_ic_notify_ops,
xive, "xive-ic-notify", 1 << xive->ic_shift);
+ xive->ic_notify_mmio.disable_reentrancy_guard = true;
/* The Pervasive LSI trigger and EOI pages (not modeled) */
memory_region_init_io(&xive->ic_lsi_mmio, OBJECT(dev), &pnv_xive_ic_lsi_ops,
@@ -1933,6 +2027,7 @@
/* Presenter Controller MMIO region (not modeled) */
memory_region_init_io(&xive->pc_mmio, OBJECT(xive), &pnv_xive_pc_ops, xive,
"xive-pc", PNV9_XIVE_PC_SIZE);
+ xive->pc_mmio.disable_reentrancy_guard = true;
/* Thread Interrupt Management Area (Direct) */
memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &pnv_xive_tm_ops,
@@ -1998,6 +2093,7 @@
xrc->get_nvt = pnv_xive_get_nvt;
xrc->write_nvt = pnv_xive_write_nvt;
xrc->get_block_id = pnv_xive_get_block_id;
+ xrc->end_notify = pnv_xive_end_notify;
xnc->notify = pnv_xive_notify;
xpc->match_nvt = pnv_xive_match_nvt;
diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c
index bbb44a5..4b8d0a5 100644
--- a/hw/intc/pnv_xive2.c
+++ b/hw/intc/pnv_xive2.c
@@ -240,12 +240,20 @@
{
const XiveVstInfo *info = &vst_infos[type];
uint64_t addr = pnv_xive2_vst_addr(xive, type, blk, idx);
+ MemTxResult result;
if (!addr) {
return -1;
}
- cpu_physical_memory_read(addr, data, info->size);
+ result = address_space_read(&address_space_memory, addr,
+ MEMTXATTRS_UNSPECIFIED, data,
+ info->size);
+ if (result != MEMTX_OK) {
+ xive2_error(xive, "VST: read failed at @0x%" HWADDR_PRIx
+ " for VST %s %x/%x\n", addr, info->name, blk, idx);
+ return -1;
+ }
return 0;
}
@@ -256,16 +264,27 @@
{
const XiveVstInfo *info = &vst_infos[type];
uint64_t addr = pnv_xive2_vst_addr(xive, type, blk, idx);
+ MemTxResult result;
if (!addr) {
return -1;
}
if (word_number == XIVE_VST_WORD_ALL) {
- cpu_physical_memory_write(addr, data, info->size);
+ result = address_space_write(&address_space_memory, addr,
+ MEMTXATTRS_UNSPECIFIED, data,
+ info->size);
} else {
- cpu_physical_memory_write(addr + word_number * 4,
- data + word_number * 4, 4);
+ result = address_space_write(&address_space_memory,
+ addr + word_number * 4,
+ MEMTXATTRS_UNSPECIFIED,
+ data + word_number * 4, 4);
+ }
+
+ if (result != MEMTX_OK) {
+ xive2_error(xive, "VST: write failed at @0x%" HWADDR_PRIx
+ "for VST %s %x/%x\n", addr, info->name, blk, idx);
+ return -1;
}
return 0;
}
diff --git a/hw/intc/pnv_xive_regs.h b/hw/intc/pnv_xive_regs.h
index c78f030..7938476 100644
--- a/hw/intc/pnv_xive_regs.h
+++ b/hw/intc/pnv_xive_regs.h
@@ -228,6 +228,7 @@
* VSD and is only meant to be used in indirect mode !
*/
#define VSD_MODE PPC_BITMASK(0, 1)
+#define VSD_MODE_INVALID 0
#define VSD_MODE_SHARED 1
#define VSD_MODE_EXCLUSIVE 2
#define VSD_MODE_FORWARD 3
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index 56670b2..df3ee04 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -1518,6 +1518,13 @@
assert(xrtr->xfb);
}
+static void xive_router_end_notify_handler(XiveRouter *xrtr, XiveEAS *eas)
+{
+ XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
+
+ return xrc->end_notify(xrtr, eas);
+}
+
/*
* Encode the HW CAM line in the block group mode format :
*
@@ -1664,8 +1671,7 @@
* another chip. We don't model the PowerBus but the END trigger
* message has the same parameters than in the function below.
*/
-static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk,
- uint32_t end_idx, uint32_t end_data)
+void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas)
{
XiveEND end;
uint8_t priority;
@@ -1675,6 +1681,10 @@
XiveNVT nvt;
bool found;
+ uint8_t end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
+ uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
+ uint32_t end_data = xive_get_field64(EAS_END_DATA, eas->w);
+
/* END cache lookup */
if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
@@ -1817,10 +1827,7 @@
/*
* The END trigger becomes an Escalation trigger
*/
- xive_router_end_notify(xrtr,
- xive_get_field32(END_W4_ESC_END_BLOCK, end.w4),
- xive_get_field32(END_W4_ESC_END_INDEX, end.w4),
- xive_get_field32(END_W5_ESC_END_DATA, end.w5));
+ xive_router_end_notify_handler(xrtr, (XiveEAS *) &end.w4);
}
void xive_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked)
@@ -1871,10 +1878,7 @@
/*
* The event trigger becomes an END trigger
*/
- xive_router_end_notify(xrtr,
- xive_get_field64(EAS_END_BLOCK, eas.w),
- xive_get_field64(EAS_END_INDEX, eas.w),
- xive_get_field64(EAS_END_DATA, eas.w));
+ xive_router_end_notify_handler(xrtr, &eas);
}
static Property xive_router_properties[] = {
@@ -1887,12 +1891,16 @@
{
DeviceClass *dc = DEVICE_CLASS(klass);
XiveNotifierClass *xnc = XIVE_NOTIFIER_CLASS(klass);
+ XiveRouterClass *xrc = XIVE_ROUTER_CLASS(klass);
dc->desc = "XIVE Router Engine";
device_class_set_props(dc, xive_router_properties);
/* Parent is SysBusDeviceClass. No need to call its realize hook */
dc->realize = xive_router_realize;
xnc->notify = xive_router_notify;
+
+ /* By default, the router handles END triggers locally */
+ xrc->end_notify = xive_router_end_notify;
}
static const TypeInfo xive_router_info = {
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 67793a8..d5b6820 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -712,7 +712,7 @@
p->kernel_base = kernel_base;
p->kernel_size = kernel_size;
- qemu_register_reset(ppce500_reset_device_tree, p);
+ qemu_register_reset_nosnapshotload(ppce500_reset_device_tree, p);
p->notifier.notify = ppce500_init_notify;
qemu_add_machine_init_done_notifier(&p->notifier);
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index 510ff0e..9acc7ad 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -81,6 +81,7 @@
{
PowerPCCPU *cpu = opaque;
+ cpu_ppc_tb_reset(&cpu->env);
cpu_reset(CPU(cpu));
}
diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c
index 075367d..bd397cf 100644
--- a/hw/ppc/pegasos2.c
+++ b/hw/ppc/pegasos2.c
@@ -99,6 +99,7 @@
cpu->env.gpr[1] = 2 * VOF_STACK_SIZE - 0x20;
cpu->env.nip = 0x100;
}
+ cpu_ppc_tb_reset(&cpu->env);
}
static void pegasos2_pci_irq(void *opaque, int n, int level)
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index 9b39d52..8c7afe0 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -61,6 +61,8 @@
hreg_compute_hflags(env);
ppc_maybe_interrupt(env);
+ cpu_ppc_tb_reset(env);
+
pcc->intc_reset(pc->chip, cpu);
}
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 0e0a3d9..aeb116d 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -32,6 +32,7 @@
#include "qemu/main-loop.h"
#include "qemu/error-report.h"
#include "sysemu/kvm.h"
+#include "sysemu/replay.h"
#include "sysemu/runstate.h"
#include "kvm_ppc.h"
#include "migration/vmstate.h"
@@ -58,7 +59,9 @@
if (old_pending != env->pending_interrupts) {
ppc_maybe_interrupt(env);
- kvmppc_set_interrupt(cpu, irq, level);
+ if (kvm_enabled()) {
+ kvmppc_set_interrupt(cpu, irq, level);
+ }
}
trace_ppc_irq_set_exit(env, irq, level, env->pending_interrupts,
@@ -482,10 +485,32 @@
/*****************************************************************************/
/* PowerPC time base and decrementer emulation */
+/*
+ * Conversion between QEMU_CLOCK_VIRTUAL ns and timebase (TB) ticks:
+ * TB ticks are arrived at by multiplying tb_freq then dividing by
+ * ns per second, and rounding down. TB ticks drive all clocks and
+ * timers in the target machine.
+ *
+ * Converting TB intervals to ns for the purpose of setting a
+ * QEMU_CLOCK_VIRTUAL timer should go the other way, but rounding
+ * up. Rounding down could cause the timer to fire before the TB
+ * value has been reached.
+ */
+static uint64_t ns_to_tb(uint32_t freq, int64_t clock)
+{
+ return muldiv64(clock, freq, NANOSECONDS_PER_SECOND);
+}
+
+/* virtual clock in TB ticks, not adjusted by TB offset */
+static int64_t tb_to_ns_round_up(uint32_t freq, uint64_t tb)
+{
+ return muldiv64_round_up(tb, NANOSECONDS_PER_SECOND, freq);
+}
+
uint64_t cpu_ppc_get_tb(ppc_tb_t *tb_env, uint64_t vmclk, int64_t tb_offset)
{
/* TB time in tb periods */
- return muldiv64(vmclk, tb_env->tb_freq, NANOSECONDS_PER_SECOND) + tb_offset;
+ return ns_to_tb(tb_env->tb_freq, vmclk) + tb_offset;
}
uint64_t cpu_ppc_load_tbl (CPUPPCState *env)
@@ -497,7 +522,8 @@
return env->spr[SPR_TBL];
}
- tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset);
+ tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+ tb_env->tb_offset);
trace_ppc_tb_load(tb);
return tb;
@@ -508,7 +534,8 @@
ppc_tb_t *tb_env = env->tb_env;
uint64_t tb;
- tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset);
+ tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+ tb_env->tb_offset);
trace_ppc_tb_load(tb);
return tb >> 32;
@@ -526,8 +553,7 @@
static inline void cpu_ppc_store_tb(ppc_tb_t *tb_env, uint64_t vmclk,
int64_t *tb_offsetp, uint64_t value)
{
- *tb_offsetp = value -
- muldiv64(vmclk, tb_env->tb_freq, NANOSECONDS_PER_SECOND);
+ *tb_offsetp = value - ns_to_tb(tb_env->tb_freq, vmclk);
trace_ppc_tb_store(value, *tb_offsetp);
}
@@ -565,7 +591,8 @@
ppc_tb_t *tb_env = env->tb_env;
uint64_t tb;
- tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset);
+ tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+ tb_env->atb_offset);
trace_ppc_tb_load(tb);
return tb;
@@ -576,7 +603,8 @@
ppc_tb_t *tb_env = env->tb_env;
uint64_t tb;
- tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset);
+ tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+ tb_env->atb_offset);
trace_ppc_tb_load(tb);
return tb >> 32;
@@ -683,64 +711,77 @@
return ((tb_env->flags & flags) == PPC_DECR_UNDERFLOW_TRIGGERED);
}
-static inline int64_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next)
+static inline int64_t __cpu_ppc_load_decr(CPUPPCState *env, int64_t now,
+ uint64_t next)
{
ppc_tb_t *tb_env = env->tb_env;
- int64_t decr, diff;
+ uint64_t n;
+ int64_t decr;
- diff = next - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- if (diff >= 0) {
- decr = muldiv64(diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND);
- } else if (tb_env->flags & PPC_TIMER_BOOKE) {
+ n = ns_to_tb(tb_env->decr_freq, now);
+ if (next > n && tb_env->flags & PPC_TIMER_BOOKE) {
decr = 0;
- } else {
- decr = -muldiv64(-diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND);
+ } else {
+ decr = next - n;
}
+
trace_ppc_decr_load(decr);
return decr;
}
-target_ulong cpu_ppc_load_decr(CPUPPCState *env)
+static target_ulong _cpu_ppc_load_decr(CPUPPCState *env, int64_t now)
{
ppc_tb_t *tb_env = env->tb_env;
uint64_t decr;
- if (kvm_enabled()) {
- return env->spr[SPR_DECR];
- }
-
- decr = _cpu_ppc_load_decr(env, tb_env->decr_next);
+ decr = __cpu_ppc_load_decr(env, now, tb_env->decr_next);
/*
* If large decrementer is enabled then the decrementer is signed extened
* to 64 bits, otherwise it is a 32 bit value.
*/
if (env->spr[SPR_LPCR] & LPCR_LD) {
- return decr;
+ PowerPCCPU *cpu = env_archcpu(env);
+ PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+ return sextract64(decr, 0, pcc->lrg_decr_bits);
}
return (uint32_t) decr;
}
-target_ulong cpu_ppc_load_hdecr(CPUPPCState *env)
+target_ulong cpu_ppc_load_decr(CPUPPCState *env)
+{
+ if (kvm_enabled()) {
+ return env->spr[SPR_DECR];
+ } else {
+ return _cpu_ppc_load_decr(env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
+ }
+}
+
+static target_ulong _cpu_ppc_load_hdecr(CPUPPCState *env, int64_t now)
{
PowerPCCPU *cpu = env_archcpu(env);
PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
ppc_tb_t *tb_env = env->tb_env;
uint64_t hdecr;
- hdecr = _cpu_ppc_load_decr(env, tb_env->hdecr_next);
+ hdecr = __cpu_ppc_load_decr(env, now, tb_env->hdecr_next);
/*
* If we have a large decrementer (POWER9 or later) then hdecr is sign
* extended to 64 bits, otherwise it is 32 bits.
*/
if (pcc->lrg_decr_bits > 32) {
- return hdecr;
+ return sextract64(hdecr, 0, pcc->lrg_decr_bits);
}
return (uint32_t) hdecr;
}
+target_ulong cpu_ppc_load_hdecr(CPUPPCState *env)
+{
+ return _cpu_ppc_load_hdecr(env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
+}
+
uint64_t cpu_ppc_load_purr (CPUPPCState *env)
{
ppc_tb_t *tb_env = env->tb_env;
@@ -785,7 +826,7 @@
ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 0);
}
-static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
+static void __cpu_ppc_store_decr(PowerPCCPU *cpu, int64_t now, uint64_t *nextp,
QEMUTimer *timer,
void (*raise_excp)(void *),
void (*lower_excp)(PowerPCCPU *),
@@ -794,7 +835,7 @@
{
CPUPPCState *env = &cpu->env;
ppc_tb_t *tb_env = env->tb_env;
- uint64_t now, next;
+ uint64_t next;
int64_t signed_value;
int64_t signed_decr;
@@ -806,10 +847,14 @@
trace_ppc_decr_store(nr_bits, decr, value);
- if (kvm_enabled()) {
- /* KVM handles decrementer exceptions, we don't need our own timer */
- return;
- }
+ /*
+ * Calculate the next decrementer event and set a timer.
+ * decr_next is in timebase units to keep rounding simple. Note it is
+ * not adjusted by tb_offset because if TB changes via tb_offset changing,
+ * decrementer does not change, so not directly comparable with TB.
+ */
+ next = ns_to_tb(tb_env->decr_freq, now) + value;
+ *nextp = next; /* nextp is in timebase units */
/*
* Going from 1 -> 0 or 0 -> -1 is the event to generate a DEC interrupt.
@@ -832,21 +877,17 @@
(*lower_excp)(cpu);
}
- /* Calculate the next timer event */
- now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- next = now + muldiv64(value, NANOSECONDS_PER_SECOND, tb_env->decr_freq);
- *nextp = next;
-
/* Adjust timer */
- timer_mod(timer, next);
+ timer_mod(timer, tb_to_ns_round_up(tb_env->decr_freq, next));
}
-static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu, target_ulong decr,
- target_ulong value, int nr_bits)
+static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu, int64_t now,
+ target_ulong decr, target_ulong value,
+ int nr_bits)
{
ppc_tb_t *tb_env = cpu->env.tb_env;
- __cpu_ppc_store_decr(cpu, &tb_env->decr_next, tb_env->decr_timer,
+ __cpu_ppc_store_decr(cpu, now, &tb_env->decr_next, tb_env->decr_timer,
tb_env->decr_timer->cb, &cpu_ppc_decr_lower,
tb_env->flags, decr, value, nr_bits);
}
@@ -855,13 +896,22 @@
{
PowerPCCPU *cpu = env_archcpu(env);
PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+ int64_t now;
+ target_ulong decr;
int nr_bits = 32;
+ if (kvm_enabled()) {
+ /* KVM handles decrementer exceptions, we don't need our own timer */
+ return;
+ }
+
if (env->spr[SPR_LPCR] & LPCR_LD) {
nr_bits = pcc->lrg_decr_bits;
}
- _cpu_ppc_store_decr(cpu, cpu_ppc_load_decr(env), value, nr_bits);
+ now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ decr = _cpu_ppc_load_decr(env, now);
+ _cpu_ppc_store_decr(cpu, now, decr, value, nr_bits);
}
static void cpu_ppc_decr_cb(void *opaque)
@@ -871,14 +921,15 @@
cpu_ppc_decr_excp(cpu);
}
-static inline void _cpu_ppc_store_hdecr(PowerPCCPU *cpu, target_ulong hdecr,
- target_ulong value, int nr_bits)
+static inline void _cpu_ppc_store_hdecr(PowerPCCPU *cpu, int64_t now,
+ target_ulong hdecr, target_ulong value,
+ int nr_bits)
{
ppc_tb_t *tb_env = cpu->env.tb_env;
if (tb_env->hdecr_timer != NULL) {
/* HDECR (Book3S 64bit) is edge-based, not level like DECR */
- __cpu_ppc_store_decr(cpu, &tb_env->hdecr_next, tb_env->hdecr_timer,
+ __cpu_ppc_store_decr(cpu, now, &tb_env->hdecr_next, tb_env->hdecr_timer,
tb_env->hdecr_timer->cb, &cpu_ppc_hdecr_lower,
PPC_DECR_UNDERFLOW_TRIGGERED,
hdecr, value, nr_bits);
@@ -889,9 +940,12 @@
{
PowerPCCPU *cpu = env_archcpu(env);
PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+ int64_t now;
+ target_ulong hdecr;
- _cpu_ppc_store_hdecr(cpu, cpu_ppc_load_hdecr(env), value,
- pcc->lrg_decr_bits);
+ now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ hdecr = _cpu_ppc_load_hdecr(env, now);
+ _cpu_ppc_store_hdecr(cpu, now, hdecr, value, pcc->lrg_decr_bits);
}
static void cpu_ppc_hdecr_cb(void *opaque)
@@ -901,29 +955,16 @@
cpu_ppc_hdecr_excp(cpu);
}
-void cpu_ppc_store_purr(CPUPPCState *env, uint64_t value)
+static void _cpu_ppc_store_purr(CPUPPCState *env, int64_t now, uint64_t value)
{
ppc_tb_t *tb_env = env->tb_env;
- cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
- &tb_env->purr_offset, value);
+ cpu_ppc_store_tb(tb_env, now, &tb_env->purr_offset, value);
}
-static void cpu_ppc_set_tb_clk (void *opaque, uint32_t freq)
+void cpu_ppc_store_purr(CPUPPCState *env, uint64_t value)
{
- CPUPPCState *env = opaque;
- PowerPCCPU *cpu = env_archcpu(env);
- ppc_tb_t *tb_env = env->tb_env;
-
- tb_env->tb_freq = freq;
- tb_env->decr_freq = freq;
- /* There is a bug in Linux 2.4 kernels:
- * if a decrementer exception is pending when it enables msr_ee at startup,
- * it's not ready to handle it...
- */
- _cpu_ppc_store_decr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32);
- _cpu_ppc_store_hdecr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32);
- cpu_ppc_store_purr(env, 0x0000000000000000ULL);
+ _cpu_ppc_store_purr(env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), value);
}
static void timebase_save(PPCTimebase *tb)
@@ -936,8 +977,14 @@
return;
}
- /* not used anymore, we keep it for compatibility */
- tb->time_of_the_day_ns = qemu_clock_get_ns(QEMU_CLOCK_HOST);
+ if (replay_mode == REPLAY_MODE_NONE) {
+ /* not used anymore, we keep it for compatibility */
+ tb->time_of_the_day_ns = qemu_clock_get_ns(QEMU_CLOCK_HOST);
+ } else {
+ /* simpler for record-replay to avoid this event, compat not needed */
+ tb->time_of_the_day_ns = 0;
+ }
+
/*
* tb_offset is only expected to be changed by QEMU so
* there is no need to update it from KVM here
@@ -1027,7 +1074,7 @@
};
/* Set up (once) timebase frequency (in Hz) */
-clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq)
+void cpu_ppc_tb_init(CPUPPCState *env, uint32_t freq)
{
PowerPCCPU *cpu = env_archcpu(env);
ppc_tb_t *tb_env;
@@ -1040,16 +1087,41 @@
tb_env->flags |= PPC_DECR_UNDERFLOW_LEVEL;
}
/* Create new timer */
- tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_ppc_decr_cb, cpu);
+ tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+ &cpu_ppc_decr_cb, cpu);
if (env->has_hv_mode && !cpu->vhyp) {
- tb_env->hdecr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_ppc_hdecr_cb,
- cpu);
+ tb_env->hdecr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+ &cpu_ppc_hdecr_cb, cpu);
} else {
tb_env->hdecr_timer = NULL;
}
- cpu_ppc_set_tb_clk(env, freq);
- return &cpu_ppc_set_tb_clk;
+ tb_env->tb_freq = freq;
+ tb_env->decr_freq = freq;
+}
+
+void cpu_ppc_tb_reset(CPUPPCState *env)
+{
+ PowerPCCPU *cpu = env_archcpu(env);
+ ppc_tb_t *tb_env = env->tb_env;
+
+ timer_del(tb_env->decr_timer);
+ ppc_set_irq(cpu, PPC_INTERRUPT_DECR, 0);
+ tb_env->decr_next = 0;
+ if (tb_env->hdecr_timer != NULL) {
+ timer_del(tb_env->hdecr_timer);
+ ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 0);
+ tb_env->hdecr_next = 0;
+ }
+
+ /*
+ * There is a bug in Linux 2.4 kernels:
+ * if a decrementer exception is pending when it enables msr_ee at startup,
+ * it's not ready to handle it...
+ */
+ cpu_ppc_store_decr(env, -1);
+ cpu_ppc_store_hdecr(env, -1);
+ cpu_ppc_store_purr(env, 0x0000000000000000ULL);
}
void cpu_ppc_tb_free(CPUPPCState *env)
@@ -1125,9 +1197,7 @@
/* Cannot occur, but makes gcc happy */
return;
}
- next = now + muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->tb_freq);
- if (next == now)
- next++;
+ next = now + tb_to_ns_round_up(tb_env->tb_freq, next);
timer_mod(ppc40x_timer->fit_timer, next);
env->spr[SPR_40x_TSR] |= 1 << 26;
if ((env->spr[SPR_40x_TCR] >> 23) & 0x1) {
@@ -1153,14 +1223,15 @@
} else {
trace_ppc4xx_pit_start(ppc40x_timer->pit_reload);
now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- next = now + muldiv64(ppc40x_timer->pit_reload,
- NANOSECONDS_PER_SECOND, tb_env->decr_freq);
- if (is_excp)
- next += tb_env->decr_next - now;
- if (next == now)
- next++;
+
+ if (is_excp) {
+ tb_env->decr_next += ppc40x_timer->pit_reload;
+ } else {
+ tb_env->decr_next = ns_to_tb(tb_env->decr_freq, now)
+ + ppc40x_timer->pit_reload;
+ }
+ next = tb_to_ns_round_up(tb_env->decr_freq, tb_env->decr_next);
timer_mod(tb_env->decr_timer, next);
- tb_env->decr_next = next;
}
}
@@ -1213,9 +1284,7 @@
/* Cannot occur, but makes gcc happy */
return;
}
- next = now + muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->decr_freq);
- if (next == now)
- next++;
+ next = now + tb_to_ns_round_up(tb_env->decr_freq, next);
trace_ppc4xx_wdt(env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]);
switch ((env->spr[SPR_40x_TSR] >> 30) & 0x3) {
case 0x0:
@@ -1465,5 +1534,7 @@
CPUPPCState *env = &cpu->env;
env->irq_input_state = 0;
- kvmppc_set_interrupt(cpu, PPC_INTERRUPT_EXT, 0);
+ if (kvm_enabled()) {
+ kvmppc_set_interrupt(cpu, PPC_INTERRUPT_EXT, 0);
+ }
}
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index d9231c7..f6fd35f 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -67,6 +67,7 @@
PowerPCCPU *cpu = opaque;
cpu_reset(CPU(cpu));
+ cpu_ppc_tb_reset(&cpu->env);
}
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 07e91e3..f7cc6a8 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1022,7 +1022,6 @@
{
MachineState *machine = MACHINE(spapr);
SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
- uint8_t rng_seed[32];
int chosen;
_FDT(chosen = fdt_add_subnode(fdt, 0, "chosen"));
@@ -1100,8 +1099,7 @@
spapr_dt_ov5_platform_support(spapr, fdt, chosen);
}
- qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed));
- _FDT(fdt_setprop(fdt, chosen, "rng-seed", rng_seed, sizeof(rng_seed)));
+ _FDT(fdt_setprop(fdt, chosen, "rng-seed", spapr->fdt_rng_seed, 32));
_FDT(spapr_dt_ovec(fdt, chosen, spapr->ov5_cas, "ibm,architecture-vec-5"));
}
@@ -1322,6 +1320,22 @@
}
}
+/* May be used when the machine is not running */
+void spapr_init_all_lpcrs(target_ulong value, target_ulong mask)
+{
+ CPUState *cs;
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ CPUPPCState *env = &cpu->env;
+ target_ulong lpcr;
+
+ lpcr = env->spr[SPR_LPCR];
+ lpcr &= ~(LPCR_HR | LPCR_UPRT);
+ ppc_store_lpcr(cpu, lpcr);
+ }
+}
+
+
static bool spapr_get_pate(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu,
target_ulong lpid, ppc_v3_pate_t *entry)
{
@@ -1583,7 +1597,7 @@
}
/* We're setting up a hash table, so that means we're not radix */
spapr->patb_entry = 0;
- spapr_set_all_lpcrs(0, LPCR_HR | LPCR_UPRT);
+ spapr_init_all_lpcrs(0, LPCR_HR | LPCR_UPRT);
return 0;
}
@@ -1638,6 +1652,14 @@
void *fdt;
int rc;
+ if (reason != SHUTDOWN_CAUSE_SNAPSHOT_LOAD) {
+ /*
+ * Record-replay snapshot load must not consume random, this was
+ * already replayed from initial machine reset.
+ */
+ qemu_guest_getrandom_nofail(spapr->fdt_rng_seed, 32);
+ }
+
pef_kvm_reset(machine->cgs, &error_fatal);
spapr_caps_apply(spapr);
@@ -1661,7 +1683,7 @@
spapr_ovec_cleanup(spapr->ov5_cas);
spapr->ov5_cas = spapr_ovec_new();
- ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal);
+ ppc_init_compat_all(spapr->max_compat_pvr, &error_fatal);
/*
* This is fixing some of the default configuration of the XIVE
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index b482d97..91fae56 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -74,6 +74,8 @@
kvm_check_mmu(cpu, &error_fatal);
+ cpu_ppc_tb_reset(env);
+
spapr_irq_cpu_intc_reset(spapr, cpu);
}
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 9b1f225..b7dc388 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -3,6 +3,7 @@
#include "qapi/error.h"
#include "sysemu/hw_accel.h"
#include "sysemu/runstate.h"
+#include "sysemu/tcg.h"
#include "qemu/log.h"
#include "qemu/main-loop.h"
#include "qemu/module.h"
@@ -789,6 +790,54 @@
return H_SUCCESS;
}
+static target_ulong h_set_mode_resource_set_ciabr(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong mflags,
+ target_ulong value1,
+ target_ulong value2)
+{
+ CPUPPCState *env = &cpu->env;
+
+ assert(tcg_enabled()); /* KVM will have handled this */
+
+ if (mflags) {
+ return H_UNSUPPORTED_FLAG;
+ }
+ if (value2) {
+ return H_P4;
+ }
+ if ((value1 & PPC_BITMASK(62, 63)) == 0x3) {
+ return H_P3;
+ }
+
+ ppc_store_ciabr(env, value1);
+
+ return H_SUCCESS;
+}
+
+static target_ulong h_set_mode_resource_set_dawr0(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong mflags,
+ target_ulong value1,
+ target_ulong value2)
+{
+ CPUPPCState *env = &cpu->env;
+
+ assert(tcg_enabled()); /* KVM will have handled this */
+
+ if (mflags) {
+ return H_UNSUPPORTED_FLAG;
+ }
+ if (value2 & PPC_BIT(61)) {
+ return H_P4;
+ }
+
+ ppc_store_dawr0(env, value1);
+ ppc_store_dawrx0(env, value2);
+
+ return H_SUCCESS;
+}
+
static target_ulong h_set_mode_resource_le(PowerPCCPU *cpu,
SpaprMachineState *spapr,
target_ulong mflags,
@@ -858,6 +907,14 @@
target_ulong ret = H_P2;
switch (resource) {
+ case H_SET_MODE_RESOURCE_SET_CIABR:
+ ret = h_set_mode_resource_set_ciabr(cpu, spapr, args[0], args[2],
+ args[3]);
+ break;
+ case H_SET_MODE_RESOURCE_SET_DAWR0:
+ ret = h_set_mode_resource_set_dawr0(cpu, spapr, args[0], args[2],
+ args[3]);
+ break;
case H_SET_MODE_RESOURCE_LE:
ret = h_set_mode_resource_le(cpu, spapr, args[0], args[2], args[3]);
break;
diff --git a/hw/ppc/vof.c b/hw/ppc/vof.c
index 18c3f92..e3b430a 100644
--- a/hw/ppc/vof.c
+++ b/hw/ppc/vof.c
@@ -1024,6 +1024,8 @@
}
vof->claimed = NULL;
vof->of_instances = NULL;
+ vof->of_instance_last = 0;
+ vof->claimed_base = 0;
}
void vof_build_dt(void *fdt, Vof *vof)
diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h
index e095c00..17a8dfc 100644
--- a/include/hw/ppc/ppc.h
+++ b/include/hw/ppc/ppc.h
@@ -54,7 +54,8 @@
*/
uint64_t cpu_ppc_get_tb(ppc_tb_t *tb_env, uint64_t vmclk, int64_t tb_offset);
-clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq);
+void cpu_ppc_tb_init(CPUPPCState *env, uint32_t freq);
+void cpu_ppc_tb_reset(CPUPPCState *env);
void cpu_ppc_tb_free(CPUPPCState *env);
void cpu_ppc_hdecr_init(CPUPPCState *env);
void cpu_ppc_hdecr_exit(CPUPPCState *env);
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 538b2df..f4bd204 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -204,6 +204,7 @@
uint32_t fdt_size;
uint32_t fdt_initial_size;
void *fdt_blob;
+ uint8_t fdt_rng_seed[32];
long kernel_size;
bool kernel_le;
uint64_t kernel_addr;
@@ -1012,6 +1013,7 @@
#define SPAPR_OV5_XIVE_BOTH 0x80 /* Only to advertise on the platform */
void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
+void spapr_init_all_lpcrs(target_ulong value, target_ulong mask);
hwaddr spapr_get_rtas_addr(void);
bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr);
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index 9f580a2..f120874 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -401,6 +401,7 @@
int (*write_nvt)(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
XiveNVT *nvt, uint8_t word_number);
uint8_t (*get_block_id)(XiveRouter *xrtr);
+ void (*end_notify)(XiveRouter *xrtr, XiveEAS *eas);
};
int xive_router_get_eas(XiveRouter *xrtr, uint8_t eas_blk, uint32_t eas_idx,
@@ -414,6 +415,7 @@
int xive_router_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
XiveNVT *nvt, uint8_t word_number);
void xive_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked);
+void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas);
/*
* XIVE Presenter
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index 0116183..ead97d3 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -56,6 +56,11 @@
return (__int128_t)a * b / c;
}
+static inline uint64_t muldiv64_round_up(uint64_t a, uint32_t b, uint32_t c)
+{
+ return ((__int128_t)a * b + c - 1) / c;
+}
+
static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
uint64_t divisor)
{
@@ -83,7 +88,8 @@
uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
-static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
+static inline uint64_t muldiv64_rounding(uint64_t a, uint32_t b, uint32_t c,
+ bool round_up)
{
union {
uint64_t ll;
@@ -99,12 +105,25 @@
u.ll = a;
rl = (uint64_t)u.l.low * (uint64_t)b;
+ if (round_up) {
+ rl += c - 1;
+ }
rh = (uint64_t)u.l.high * (uint64_t)b;
rh += (rl >> 32);
res.l.high = rh / c;
res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
return res.ll;
}
+
+static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
+{
+ return muldiv64_rounding(a, b, c, false);
+}
+
+static inline uint64_t muldiv64_round_up(uint64_t a, uint32_t b, uint32_t c)
+{
+ return muldiv64_rounding(a, b, c, true);
+}
#endif
/**
diff --git a/target/ppc/compat.c b/target/ppc/compat.c
index 7949a24..ebef2cc 100644
--- a/target/ppc/compat.c
+++ b/target/ppc/compat.c
@@ -229,6 +229,25 @@
return 0;
}
+/* To be used when the machine is not running */
+int ppc_init_compat_all(uint32_t compat_pvr, Error **errp)
+{
+ CPUState *cs;
+
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ int ret;
+
+ ret = ppc_set_compat(cpu, compat_pvr, errp);
+
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
int ppc_compat_max_vthreads(PowerPCCPU *cpu)
{
const CompatInfo *compat = compat_by_pvr(cpu->compat_pvr);
diff --git a/target/ppc/cpu.c b/target/ppc/cpu.c
index 424f2e1..e3ad8e0 100644
--- a/target/ppc/cpu.c
+++ b/target/ppc/cpu.c
@@ -59,6 +59,7 @@
env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT);
env->vscr_sat.u64[1] = 0;
set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status);
+ set_flush_inputs_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status);
}
uint32_t ppc_get_vscr(CPUPPCState *env)
@@ -102,6 +103,92 @@
ppc_maybe_interrupt(env);
}
+
+#if defined(TARGET_PPC64)
+void ppc_update_ciabr(CPUPPCState *env)
+{
+ CPUState *cs = env_cpu(env);
+ target_ulong ciabr = env->spr[SPR_CIABR];
+ target_ulong ciea, priv;
+
+ ciea = ciabr & PPC_BITMASK(0, 61);
+ priv = ciabr & PPC_BITMASK(62, 63);
+
+ if (env->ciabr_breakpoint) {
+ cpu_breakpoint_remove_by_ref(cs, env->ciabr_breakpoint);
+ env->ciabr_breakpoint = NULL;
+ }
+
+ if (priv) {
+ cpu_breakpoint_insert(cs, ciea, BP_CPU, &env->ciabr_breakpoint);
+ }
+}
+
+void ppc_store_ciabr(CPUPPCState *env, target_ulong val)
+{
+ env->spr[SPR_CIABR] = val;
+ ppc_update_ciabr(env);
+}
+
+void ppc_update_daw0(CPUPPCState *env)
+{
+ CPUState *cs = env_cpu(env);
+ target_ulong deaw = env->spr[SPR_DAWR0] & PPC_BITMASK(0, 60);
+ uint32_t dawrx = env->spr[SPR_DAWRX0];
+ int mrd = extract32(dawrx, PPC_BIT_NR(48), 54 - 48);
+ bool dw = extract32(dawrx, PPC_BIT_NR(57), 1);
+ bool dr = extract32(dawrx, PPC_BIT_NR(58), 1);
+ bool hv = extract32(dawrx, PPC_BIT_NR(61), 1);
+ bool sv = extract32(dawrx, PPC_BIT_NR(62), 1);
+ bool pr = extract32(dawrx, PPC_BIT_NR(62), 1);
+ vaddr len;
+ int flags;
+
+ if (env->dawr0_watchpoint) {
+ cpu_watchpoint_remove_by_ref(cs, env->dawr0_watchpoint);
+ env->dawr0_watchpoint = NULL;
+ }
+
+ if (!dr && !dw) {
+ return;
+ }
+
+ if (!hv && !sv && !pr) {
+ return;
+ }
+
+ len = (mrd + 1) * 8;
+ flags = BP_CPU | BP_STOP_BEFORE_ACCESS;
+ if (dr) {
+ flags |= BP_MEM_READ;
+ }
+ if (dw) {
+ flags |= BP_MEM_WRITE;
+ }
+
+ cpu_watchpoint_insert(cs, deaw, len, flags, &env->dawr0_watchpoint);
+}
+
+void ppc_store_dawr0(CPUPPCState *env, target_ulong val)
+{
+ env->spr[SPR_DAWR0] = val;
+ ppc_update_daw0(env);
+}
+
+void ppc_store_dawrx0(CPUPPCState *env, uint32_t val)
+{
+ int hrammc = extract32(val, PPC_BIT_NR(56), 1);
+
+ if (hrammc) {
+ /* This might be done with a second watchpoint at the xor of DEAW[0] */
+ qemu_log_mask(LOG_UNIMP, "%s: DAWRX0[HRAMMC] is unimplemented\n",
+ __func__);
+ }
+
+ env->spr[SPR_DAWRX0] = val;
+ ppc_update_daw0(env);
+}
+#endif
#endif
static inline void fpscr_set_rounding_mode(CPUPPCState *env)
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 25fac95..173e4c3 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1121,7 +1121,9 @@
target_ulong reserve_addr; /* Reservation address */
target_ulong reserve_length; /* Reservation larx op size (bytes) */
target_ulong reserve_val; /* Reservation value */
+#if defined(TARGET_PPC64)
target_ulong reserve_val2;
+#endif
/* These are used in supervisor mode only */
target_ulong msr; /* machine state register */
@@ -1137,6 +1139,8 @@
/* MMU context, only relevant for full system emulation */
#if defined(TARGET_PPC64)
ppc_slb_t slb[MAX_SLB_ENTRIES]; /* PowerPC 64 SLB area */
+ struct CPUBreakpoint *ciabr_breakpoint;
+ struct CPUWatchpoint *dawr0_watchpoint;
#endif
target_ulong sr[32]; /* segment registers */
uint32_t nb_BATs; /* number of BATs */
@@ -1403,6 +1407,11 @@
#if !defined(CONFIG_USER_ONLY)
void ppc_store_sdr1(CPUPPCState *env, target_ulong value);
void ppc_store_lpcr(PowerPCCPU *cpu, target_ulong val);
+void ppc_update_ciabr(CPUPPCState *env);
+void ppc_store_ciabr(CPUPPCState *env, target_ulong value);
+void ppc_update_daw0(CPUPPCState *env);
+void ppc_store_dawr0(CPUPPCState *env, target_ulong value);
+void ppc_store_dawrx0(CPUPPCState *env, uint32_t value);
#endif /* !defined(CONFIG_USER_ONLY) */
void ppc_store_msr(CPUPPCState *env, target_ulong value);
@@ -1495,6 +1504,7 @@
#if !defined(CONFIG_USER_ONLY)
int ppc_set_compat_all(uint32_t compat_pvr, Error **errp);
+int ppc_init_compat_all(uint32_t compat_pvr, Error **errp);
#endif
int ppc_compat_max_vthreads(PowerPCCPU *cpu);
void ppc_compat_add_property(Object *obj, const char *name,
@@ -1897,7 +1907,9 @@
#define SPR_PSSCR (0x357)
#define SPR_440_INV0 (0x370)
#define SPR_440_INV1 (0x371)
+#define SPR_TRIG1 (0x371)
#define SPR_440_INV2 (0x372)
+#define SPR_TRIG2 (0x372)
#define SPR_440_INV3 (0x373)
#define SPR_440_ITV0 (0x374)
#define SPR_440_ITV1 (0x375)
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 02b7aad..7ab5ee9 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -5117,17 +5117,17 @@
spr_register_kvm_hv(env, SPR_DAWR0, "DAWR0",
SPR_NOACCESS, SPR_NOACCESS,
SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_generic,
+ &spr_read_generic, &spr_write_dawr0,
KVM_REG_PPC_DAWR, 0x00000000);
spr_register_kvm_hv(env, SPR_DAWRX0, "DAWRX0",
SPR_NOACCESS, SPR_NOACCESS,
SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_generic32,
+ &spr_read_generic, &spr_write_dawrx0,
KVM_REG_PPC_DAWRX, 0x00000000);
spr_register_kvm_hv(env, SPR_CIABR, "CIABR",
SPR_NOACCESS, SPR_NOACCESS,
SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_generic,
+ &spr_read_generic, &spr_write_ciabr,
KVM_REG_PPC_CIABR, 0x00000000);
}
@@ -5660,6 +5660,16 @@
SPR_NOACCESS, SPR_NOACCESS,
&spr_read_tfmr, &spr_write_tfmr,
0x00000000);
+ spr_register_hv(env, SPR_TRIG1, "TRIG1",
+ SPR_NOACCESS, SPR_NOACCESS,
+ &spr_access_nop, &spr_write_generic,
+ &spr_access_nop, &spr_write_generic,
+ 0x00000000);
+ spr_register_hv(env, SPR_TRIG2, "TRIG2",
+ SPR_NOACCESS, SPR_NOACCESS,
+ &spr_access_nop, &spr_write_generic,
+ &spr_access_nop, &spr_write_generic,
+ 0x00000000);
#endif
}
@@ -7149,6 +7159,8 @@
env->nip = env->hreset_vector | env->excp_prefix;
if (tcg_enabled()) {
+ cpu_breakpoint_remove_all(s, BP_CPU);
+ cpu_watchpoint_remove_all(s, BP_CPU);
if (env->mmu_model != POWERPC_MMU_REAL) {
ppc_tlb_invalidate_all(env);
}
@@ -7336,6 +7348,9 @@
.cpu_exec_exit = ppc_cpu_exec_exit,
.do_unaligned_access = ppc_cpu_do_unaligned_access,
.do_transaction_failed = ppc_cpu_do_transaction_failed,
+ .debug_excp_handler = ppc_cpu_debug_excp_handler,
+ .debug_check_breakpoint = ppc_cpu_debug_check_breakpoint,
+ .debug_check_watchpoint = ppc_cpu_debug_check_watchpoint,
#endif /* !CONFIG_USER_ONLY */
};
#endif /* CONFIG_TCG */
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 9aa8e46..72ec2be 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -843,6 +843,7 @@
PPCVirtualHypervisorClass *vhc =
PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
vhc->hypercall(cpu->vhyp, cpu);
+ powerpc_reset_excp_state(cpu);
return;
}
@@ -1014,6 +1015,7 @@
PPCVirtualHypervisorClass *vhc =
PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
vhc->hypercall(cpu->vhyp, cpu);
+ powerpc_reset_excp_state(cpu);
return;
}
@@ -1526,6 +1528,7 @@
PPCVirtualHypervisorClass *vhc =
PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
vhc->hypercall(cpu->vhyp, cpu);
+ powerpc_reset_excp_state(cpu);
return;
}
if (env->insns_flags2 & PPC2_ISA310) {
@@ -1571,9 +1574,11 @@
}
}
break;
+ case POWERPC_EXCP_TRACE: /* Trace exception */
+ msr |= env->error_code;
+ /* fall through */
case POWERPC_EXCP_DSEG: /* Data segment exception */
case POWERPC_EXCP_ISEG: /* Instruction segment exception */
- case POWERPC_EXCP_TRACE: /* Trace exception */
case POWERPC_EXCP_SDOOR: /* Doorbell interrupt */
case POWERPC_EXCP_PERFM: /* Performance monitor interrupt */
break;
@@ -3168,6 +3173,18 @@
}
#endif /* TARGET_PPC64 */
+/* Single-step tracing */
+void helper_book3s_trace(CPUPPCState *env, target_ulong prev_ip)
+{
+ uint32_t error_code = 0;
+ if (env->insns_flags2 & PPC2_ISA207S) {
+ /* Load/store reporting, SRR1[35, 36] and SDAR, are not implemented. */
+ env->spr[SPR_POWER_SIAR] = prev_ip;
+ error_code = PPC_BIT(33);
+ }
+ raise_exception_err(env, POWERPC_EXCP_TRACE, error_code);
+}
+
void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
MMUAccessType access_type,
int mmu_idx, uintptr_t retaddr)
@@ -3243,5 +3260,97 @@
cs->exception_index = POWERPC_EXCP_MCHECK;
cpu_loop_exit_restore(cs, retaddr);
}
+
+void ppc_cpu_debug_excp_handler(CPUState *cs)
+{
+#if defined(TARGET_PPC64)
+ CPUPPCState *env = cs->env_ptr;
+
+ if (env->insns_flags2 & PPC2_ISA207S) {
+ if (cs->watchpoint_hit) {
+ if (cs->watchpoint_hit->flags & BP_CPU) {
+ env->spr[SPR_DAR] = cs->watchpoint_hit->hitaddr;
+ env->spr[SPR_DSISR] = PPC_BIT(41);
+ cs->watchpoint_hit = NULL;
+ raise_exception(env, POWERPC_EXCP_DSI);
+ }
+ cs->watchpoint_hit = NULL;
+ } else if (cpu_breakpoint_test(cs, env->nip, BP_CPU)) {
+ raise_exception_err(env, POWERPC_EXCP_TRACE,
+ PPC_BIT(33) | PPC_BIT(43));
+ }
+ }
+#endif
+}
+
+bool ppc_cpu_debug_check_breakpoint(CPUState *cs)
+{
+#if defined(TARGET_PPC64)
+ CPUPPCState *env = cs->env_ptr;
+
+ if (env->insns_flags2 & PPC2_ISA207S) {
+ target_ulong priv;
+
+ priv = env->spr[SPR_CIABR] & PPC_BITMASK(62, 63);
+ switch (priv) {
+ case 0x1: /* problem */
+ return env->msr & ((target_ulong)1 << MSR_PR);
+ case 0x2: /* supervisor */
+ return (!(env->msr & ((target_ulong)1 << MSR_PR)) &&
+ !(env->msr & ((target_ulong)1 << MSR_HV)));
+ case 0x3: /* hypervisor */
+ return (!(env->msr & ((target_ulong)1 << MSR_PR)) &&
+ (env->msr & ((target_ulong)1 << MSR_HV)));
+ default:
+ g_assert_not_reached();
+ }
+ }
+#endif
+
+ return false;
+}
+
+bool ppc_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp)
+{
+#if defined(TARGET_PPC64)
+ CPUPPCState *env = cs->env_ptr;
+
+ if (env->insns_flags2 & PPC2_ISA207S) {
+ if (wp == env->dawr0_watchpoint) {
+ uint32_t dawrx = env->spr[SPR_DAWRX0];
+ bool wt = extract32(dawrx, PPC_BIT_NR(59), 1);
+ bool wti = extract32(dawrx, PPC_BIT_NR(60), 1);
+ bool hv = extract32(dawrx, PPC_BIT_NR(61), 1);
+ bool sv = extract32(dawrx, PPC_BIT_NR(62), 1);
+ bool pr = extract32(dawrx, PPC_BIT_NR(62), 1);
+
+ if ((env->msr & ((target_ulong)1 << MSR_PR)) && !pr) {
+ return false;
+ } else if ((env->msr & ((target_ulong)1 << MSR_HV)) && !hv) {
+ return false;
+ } else if (!sv) {
+ return false;
+ }
+
+ if (!wti) {
+ if (env->msr & ((target_ulong)1 << MSR_DR)) {
+ if (!wt) {
+ return false;
+ }
+ } else {
+ if (wt) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
+ }
+#endif
+
+ return false;
+}
+
#endif /* CONFIG_TCG */
#endif /* !CONFIG_USER_ONLY */
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index abec6fe..86f97ee 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -25,6 +25,9 @@
DEF_HELPER_2(rfebb, void, env, tl)
DEF_HELPER_2(store_lpcr, void, env, tl)
DEF_HELPER_2(store_pcr, void, env, tl)
+DEF_HELPER_2(store_ciabr, void, env, tl)
+DEF_HELPER_2(store_dawr0, void, env, tl)
+DEF_HELPER_2(store_dawrx0, void, env, tl)
DEF_HELPER_2(store_mmcr0, void, env, tl)
DEF_HELPER_2(store_mmcr1, void, env, tl)
DEF_HELPER_3(store_pmc, void, env, i32, i64)
@@ -32,6 +35,7 @@
DEF_HELPER_2(insns_inc, void, env, i32)
DEF_HELPER_1(handle_pmc5_overflow, void, env)
#endif
+DEF_HELPER_2(book3s_trace, void, env, tl)
DEF_HELPER_1(check_tlb_flush_local, void, env)
DEF_HELPER_1(check_tlb_flush_global, void, env)
#endif
diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index 57acb32..15803bc 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -301,6 +301,9 @@
MMUAccessType access_type,
int mmu_idx, MemTxAttrs attrs,
MemTxResult response, uintptr_t retaddr);
+void ppc_cpu_debug_excp_handler(CPUState *cs);
+bool ppc_cpu_debug_check_breakpoint(CPUState *cs);
+bool ppc_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp);
#endif
FIELD(GER_MSK, XMSK, 0, 4)
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 7698501..51112bd 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -1320,7 +1320,7 @@
return 0;
}
- if (!kvm_enabled() || !cap_interrupt_unset) {
+ if (!cap_interrupt_unset) {
return 0;
}
diff --git a/target/ppc/machine.c b/target/ppc/machine.c
index 1270a1f..68cbdff 100644
--- a/target/ppc/machine.c
+++ b/target/ppc/machine.c
@@ -9,6 +9,7 @@
#include "qapi/error.h"
#include "kvm_ppc.h"
#include "power8-pmu.h"
+#include "sysemu/replay.h"
static void post_load_update_msr(CPUPPCState *env)
{
@@ -208,6 +209,14 @@
/* Used to retain migration compatibility for pre 6.0 for 601 machines. */
env->hflags_compat_nmsr = 0;
+ if (tcg_enabled()) {
+ /*
+ * TCG does not maintain the DECR spr (unlike KVM) so have to save
+ * it here.
+ */
+ env->spr[SPR_DECR] = cpu_ppc_load_decr(env);
+ }
+
return 0;
}
@@ -313,6 +322,17 @@
post_load_update_msr(env);
if (tcg_enabled()) {
+ /* Re-set breaks based on regs */
+#if defined(TARGET_PPC64)
+ ppc_update_ciabr(env);
+ ppc_update_daw0(env);
+#endif
+ /*
+ * TCG needs to re-start the decrementer timer and/or raise the
+ * interrupt. This works for level-triggered decrementer. Edge
+ * triggered types (including HDEC) would need to carry more state.
+ */
+ cpu_ppc_store_decr(env, env->spr[SPR_DECR]);
pmu_mmcr01_updated(env);
}
@@ -670,6 +690,27 @@
}
};
+static bool reservation_needed(void *opaque)
+{
+ return (replay_mode != REPLAY_MODE_NONE);
+}
+
+static const VMStateDescription vmstate_reservation = {
+ .name = "cpu/reservation",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = reservation_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINTTL(env.reserve_addr, PowerPCCPU),
+ VMSTATE_UINTTL(env.reserve_length, PowerPCCPU),
+ VMSTATE_UINTTL(env.reserve_val, PowerPCCPU),
+#if defined(TARGET_PPC64)
+ VMSTATE_UINTTL(env.reserve_val2, PowerPCCPU),
+#endif
+ VMSTATE_END_OF_LIST()
+ }
+};
+
const VMStateDescription vmstate_ppc_cpu = {
.name = "cpu",
.version_id = 5,
@@ -691,8 +732,7 @@
VMSTATE_UINTTL_ARRAY(env.spr, PowerPCCPU, 1024),
VMSTATE_UINT64(env.spe_acc, PowerPCCPU),
- /* Reservation */
- VMSTATE_UINTTL(env.reserve_addr, PowerPCCPU),
+ VMSTATE_UNUSED(sizeof(target_ulong)), /* was env.reserve_addr */
/* Supervisor mode architected state */
VMSTATE_UINTTL(env.msr, PowerPCCPU),
@@ -721,6 +761,7 @@
&vmstate_tlbemb,
&vmstate_tlbmas,
&vmstate_compat,
+ &vmstate_reservation,
NULL
}
};
diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
index 692d058..a05bdf7 100644
--- a/target/ppc/misc_helper.c
+++ b/target/ppc/misc_helper.c
@@ -199,6 +199,21 @@
env->spr[SPR_PCR] = value & pcc->pcr_mask;
}
+void helper_store_ciabr(CPUPPCState *env, target_ulong value)
+{
+ ppc_store_ciabr(env, value);
+}
+
+void helper_store_dawr0(CPUPPCState *env, target_ulong value)
+{
+ ppc_store_dawr0(env, value);
+}
+
+void helper_store_dawrx0(CPUPPCState *env, target_ulong value)
+{
+ ppc_store_dawrx0(env, value);
+}
+
/*
* DPDES register is shared. Each bit reflects the state of the
* doorbell interrupt of a thread of the same core.
diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 920084b..5823e03 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -219,27 +219,25 @@
return false;
}
-static void ppc_radix64_set_rc(PowerPCCPU *cpu, MMUAccessType access_type,
- uint64_t pte, hwaddr pte_addr, int *prot)
+static int ppc_radix64_check_rc(MMUAccessType access_type, uint64_t pte)
{
- CPUState *cs = CPU(cpu);
- uint64_t npte;
+ switch (access_type) {
+ case MMU_DATA_STORE:
+ if (!(pte & R_PTE_C)) {
+ break;
+ }
+ /* fall through */
+ case MMU_INST_FETCH:
+ case MMU_DATA_LOAD:
+ if (!(pte & R_PTE_R)) {
+ break;
+ }
- npte = pte | R_PTE_R; /* Always set reference bit */
-
- if (access_type == MMU_DATA_STORE) { /* Store/Write */
- npte |= R_PTE_C; /* Set change bit */
- } else {
- /*
- * Treat the page as read-only for now, so that a later write
- * will pass through this function again to set the C bit.
- */
- *prot &= ~PAGE_WRITE;
+ /* R/C bits are already set appropriately for this access */
+ return 0;
}
- if (pte ^ npte) { /* If pte has changed then write it back */
- stq_phys(cs->as, pte_addr, npte);
- }
+ return 1;
}
static bool ppc_radix64_is_valid_level(int level, int psize, uint64_t nls)
@@ -380,7 +378,8 @@
ppc_v3_pate_t pate,
hwaddr *h_raddr, int *h_prot,
int *h_page_size, bool pde_addr,
- int mmu_idx, bool guest_visible)
+ int mmu_idx, uint64_t lpid,
+ bool guest_visible)
{
MMUAccessType access_type = orig_access_type;
int fault_cause = 0;
@@ -418,7 +417,24 @@
}
if (guest_visible) {
- ppc_radix64_set_rc(cpu, access_type, pte, pte_addr, h_prot);
+ if (ppc_radix64_check_rc(access_type, pte)) {
+ /*
+ * Per ISA 3.1 Book III, 7.5.3 and 7.5.5, failure to set R/C during
+ * partition-scoped translation when effLPID = 0 results in normal
+ * (non-Hypervisor) Data and Instruction Storage Interrupts
+ * respectively.
+ *
+ * ISA 3.0 is ambiguous about this, but tests on POWER9 hardware
+ * seem to exhibit the same behavior.
+ */
+ if (lpid > 0) {
+ ppc_radix64_raise_hsi(cpu, access_type, eaddr, g_raddr,
+ DSISR_ATOMIC_RC);
+ } else {
+ ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_ATOMIC_RC);
+ }
+ return 1;
+ }
}
return 0;
@@ -447,7 +463,8 @@
vaddr eaddr, uint64_t pid,
ppc_v3_pate_t pate, hwaddr *g_raddr,
int *g_prot, int *g_page_size,
- int mmu_idx, bool guest_visible)
+ int mmu_idx, uint64_t lpid,
+ bool guest_visible)
{
CPUState *cs = CPU(cpu);
CPUPPCState *env = &cpu->env;
@@ -497,7 +514,7 @@
ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr,
prtbe_addr, pate, &h_raddr,
&h_prot, &h_page_size, true,
- 5, guest_visible);
+ 5, lpid, guest_visible);
if (ret) {
return ret;
}
@@ -539,7 +556,8 @@
ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr,
pte_addr, pate, &h_raddr,
&h_prot, &h_page_size,
- true, 5, guest_visible);
+ true, 5, lpid,
+ guest_visible);
if (ret) {
return ret;
}
@@ -580,7 +598,11 @@
}
if (guest_visible) {
- ppc_radix64_set_rc(cpu, access_type, pte, pte_addr, g_prot);
+ /* R/C bits not appropriately set for access */
+ if (ppc_radix64_check_rc(access_type, pte)) {
+ ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_ATOMIC_RC);
+ return 1;
+ }
}
return 0;
@@ -695,7 +717,8 @@
if (relocation) {
int ret = ppc_radix64_process_scoped_xlate(cpu, access_type, eaddr, pid,
pate, &g_raddr, &prot,
- &psize, mmu_idx, guest_visible);
+ &psize, mmu_idx, lpid,
+ guest_visible);
if (ret) {
return false;
}
@@ -719,7 +742,8 @@
ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr,
g_raddr, pate, raddr,
&prot, &psize, false,
- mmu_idx, guest_visible);
+ mmu_idx, lpid,
+ guest_visible);
if (ret) {
return false;
}
diff --git a/target/ppc/spr_common.h b/target/ppc/spr_common.h
index 5995070..8a9d6cd 100644
--- a/target/ppc/spr_common.h
+++ b/target/ppc/spr_common.h
@@ -159,6 +159,9 @@
#ifdef TARGET_PPC64
void spr_read_cfar(DisasContext *ctx, int gprn, int sprn);
void spr_write_cfar(DisasContext *ctx, int sprn, int gprn);
+void spr_write_ciabr(DisasContext *ctx, int sprn, int gprn);
+void spr_write_dawr0(DisasContext *ctx, int sprn, int gprn);
+void spr_write_dawrx0(DisasContext *ctx, int sprn, int gprn);
void spr_write_ureg(DisasContext *ctx, int sprn, int gprn);
void spr_read_purr(DisasContext *ctx, int gprn, int sprn);
void spr_write_purr(DisasContext *ctx, int sprn, int gprn);
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 7111b34..6b242ae 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -75,7 +75,9 @@
static TCGv cpu_reserve;
static TCGv cpu_reserve_length;
static TCGv cpu_reserve_val;
+#if defined(TARGET_PPC64)
static TCGv cpu_reserve_val2;
+#endif
static TCGv cpu_fpscr;
static TCGv_i32 cpu_access_type;
@@ -149,9 +151,11 @@
cpu_reserve_val = tcg_global_mem_new(cpu_env,
offsetof(CPUPPCState, reserve_val),
"reserve_val");
+#if defined(TARGET_PPC64)
cpu_reserve_val2 = tcg_global_mem_new(cpu_env,
offsetof(CPUPPCState, reserve_val2),
"reserve_val2");
+#endif
cpu_fpscr = tcg_global_mem_new(cpu_env,
offsetof(CPUPPCState, fpscr), "fpscr");
@@ -336,8 +340,9 @@
* The exception can be either POWERPC_EXCP_TRACE (on most PowerPCs) or
* POWERPC_EXCP_DEBUG (on BookE).
*/
-static uint32_t gen_prep_dbgex(DisasContext *ctx)
+static void gen_debug_exception(DisasContext *ctx, bool rfi_type)
{
+#if !defined(CONFIG_USER_ONLY)
if (ctx->flags & POWERPC_FLAG_DE) {
target_ulong dbsr = 0;
if (ctx->singlestep_enabled & CPU_SINGLE_STEP) {
@@ -350,16 +355,18 @@
gen_load_spr(t0, SPR_BOOKE_DBSR);
tcg_gen_ori_tl(t0, t0, dbsr);
gen_store_spr(SPR_BOOKE_DBSR, t0);
- return POWERPC_EXCP_DEBUG;
+ gen_helper_raise_exception(cpu_env,
+ tcg_constant_i32(POWERPC_EXCP_DEBUG));
+ ctx->base.is_jmp = DISAS_NORETURN;
} else {
- return POWERPC_EXCP_TRACE;
+ if (!rfi_type) { /* BookS does not single step rfi type instructions */
+ TCGv t0 = tcg_temp_new();
+ tcg_gen_movi_tl(t0, ctx->cia);
+ gen_helper_book3s_trace(cpu_env, t0);
+ ctx->base.is_jmp = DISAS_NORETURN;
+ }
}
-}
-
-static void gen_debug_exception(DisasContext *ctx)
-{
- gen_helper_raise_exception(cpu_env, tcg_constant_i32(gen_prep_dbgex(ctx)));
- ctx->base.is_jmp = DISAS_NORETURN;
+#endif
}
static inline void gen_inval_exception(DisasContext *ctx, uint32_t error)
@@ -554,8 +561,9 @@
tcg_gen_mov_tl(cpu_lr, cpu_gpr[gprn]);
}
-/* CFAR */
#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
+/* Debug facilities */
+/* CFAR */
void spr_read_cfar(DisasContext *ctx, int gprn, int sprn)
{
tcg_gen_mov_tl(cpu_gpr[gprn], cpu_cfar);
@@ -565,6 +573,26 @@
{
tcg_gen_mov_tl(cpu_cfar, cpu_gpr[gprn]);
}
+
+/* Breakpoint */
+void spr_write_ciabr(DisasContext *ctx, int sprn, int gprn)
+{
+ translator_io_start(&ctx->base);
+ gen_helper_store_ciabr(cpu_env, cpu_gpr[gprn]);
+}
+
+/* Watchpoint */
+void spr_write_dawr0(DisasContext *ctx, int sprn, int gprn)
+{
+ translator_io_start(&ctx->base);
+ gen_helper_store_dawr0(cpu_env, cpu_gpr[gprn]);
+}
+
+void spr_write_dawrx0(DisasContext *ctx, int sprn, int gprn)
+{
+ translator_io_start(&ctx->base);
+ gen_helper_store_dawrx0(cpu_env, cpu_gpr[gprn]);
+}
#endif /* defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) */
/* CTR */
@@ -4182,7 +4210,7 @@
static void gen_lookup_and_goto_ptr(DisasContext *ctx)
{
if (unlikely(ctx->singlestep_enabled)) {
- gen_debug_exception(ctx);
+ gen_debug_exception(ctx, false);
} else {
/*
* tcg_gen_lookup_and_goto_ptr will exit the TB if
@@ -7408,8 +7436,9 @@
}
/* Honor single stepping. */
- if (unlikely(ctx->singlestep_enabled & CPU_SINGLE_STEP)
- && (nip <= 0x100 || nip > 0xf00)) {
+ if (unlikely(ctx->singlestep_enabled & CPU_SINGLE_STEP)) {
+ bool rfi_type = false;
+
switch (is_jmp) {
case DISAS_TOO_MANY:
case DISAS_EXIT_UPDATE:
@@ -7418,12 +7447,19 @@
break;
case DISAS_EXIT:
case DISAS_CHAIN:
+ /*
+ * This is a heuristic, to put it kindly. The rfi class of
+ * instructions are among the few outside branches that change
+ * NIP without taking an interrupt. Single step trace interrupts
+ * do not fire on completion of these instructions.
+ */
+ rfi_type = true;
break;
default:
g_assert_not_reached();
}
- gen_debug_exception(ctx);
+ gen_debug_exception(ctx, rfi_type);
return;
}
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc
index 4ce02fd..7ff7e1e 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -71,7 +71,7 @@
{
#if defined(TARGET_PPC64)
TCGv ea;
- TCGv_i64 low_addr_gpr, high_addr_gpr;
+ TCGv_i64 lo, hi;
TCGv_i128 t16;
REQUIRE_INSNS_FLAGS(ctx, 64BX);
@@ -94,21 +94,21 @@
gen_set_access_type(ctx, ACCESS_INT);
ea = do_ea_calc(ctx, a->ra, tcg_constant_tl(a->si));
- if (prefixed || !ctx->le_mode) {
- low_addr_gpr = cpu_gpr[a->rt];
- high_addr_gpr = cpu_gpr[a->rt + 1];
+ if (ctx->le_mode && prefixed) {
+ lo = cpu_gpr[a->rt];
+ hi = cpu_gpr[a->rt + 1];
} else {
- low_addr_gpr = cpu_gpr[a->rt + 1];
- high_addr_gpr = cpu_gpr[a->rt];
+ lo = cpu_gpr[a->rt + 1];
+ hi = cpu_gpr[a->rt];
}
t16 = tcg_temp_new_i128();
if (store) {
- tcg_gen_concat_i64_i128(t16, low_addr_gpr, high_addr_gpr);
+ tcg_gen_concat_i64_i128(t16, lo, hi);
tcg_gen_qemu_st_i128(t16, ea, ctx->mem_idx, DEF_MEMOP(MO_128));
} else {
tcg_gen_qemu_ld_i128(t16, ea, ctx->mem_idx, DEF_MEMOP(MO_128));
- tcg_gen_extr_i128_i64(low_addr_gpr, high_addr_gpr, t16);
+ tcg_gen_extr_i128_i64(lo, hi, t16);
}
#else
qemu_build_not_reached();
diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py
index 79c607b..a186105 100644
--- a/tests/avocado/replay_kernel.py
+++ b/tests/avocado/replay_kernel.py
@@ -255,8 +255,7 @@ def test_ppc64_pseries(self):
kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=hvc0'
- # icount is not good enough for PPC64 for complete boot yet
- console_pattern = 'Kernel command line: %s' % kernel_command_line
+ console_pattern = 'VFS: Cannot open root device'
self.run_rr(kernel_path, kernel_command_line, console_pattern)
def test_ppc64_powernv(self):
diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py
index 680c314..fc47874 100644
--- a/tests/avocado/reverse_debugging.py
+++ b/tests/avocado/reverse_debugging.py
@@ -150,16 +150,33 @@ def reverse_debugging(self, shift=7, args=None):
self.check_pc(g, addr)
logger.info('found position %x' % addr)
- logger.info('seeking to the end (icount %s)' % (last_icount - 1))
- vm.qmp('replay-break', icount=last_icount - 1)
- # continue - will return after pausing
- g.cmd(b'c', b'T02thread:01;')
+ # visit the recorded instruction in forward order
+ logger.info('stepping forward')
+ for addr in steps:
+ self.check_pc(g, addr)
+ self.gdb_step(g)
+ logger.info('found position %x' % addr)
+ # set breakpoints for the instructions just stepped over
logger.info('setting breakpoints')
for addr in steps:
# hardware breakpoint at addr with len=1
g.cmd(b'Z1,%x,1' % addr, b'OK')
+ # this may hit a breakpoint if first instructions are executed
+ # again
+ logger.info('continuing execution')
+ vm.qmp('replay-break', icount=last_icount - 1)
+ # continue - will return after pausing
+ # This could stop at the end and get a T02 return, or by
+ # re-executing one of the breakpoints and get a T05 return.
+ g.cmd(b'c')
+ if self.vm_get_icount(vm) == last_icount - 1:
+ logger.info('reached the end (icount %s)' % (last_icount - 1))
+ else:
+ logger.info('hit a breakpoint again at %x (icount %s)' %
+ (self.get_pc(g), self.vm_get_icount(vm)))
+
logger.info('running reverse continue to reach %x' % steps[-1])
# reverse continue - will return after stopping at the breakpoint
g.cmd(b'bc', b'T05thread:01;')
@@ -216,3 +233,32 @@ def test_aarch64_virt(self):
self.reverse_debugging(
args=('-kernel', kernel_path))
+
+class ReverseDebugging_ppc64(ReverseDebugging):
+ """
+ :avocado: tags=accel:tcg
+ """
+
+ REG_PC = 0x40
+
+ # unidentified gitlab timeout problem
+ @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
+ def test_ppc64_pseries(self):
+ """
+ :avocado: tags=arch:ppc64
+ :avocado: tags=machine:pseries
+ """
+ # SLOF branches back to its entry point, which causes this test
+ # to take the 'hit a breakpoint again' path. That's not a problem,
+ # just slightly different than the other machines.
+ self.endian_is_le = False
+ self.reverse_debugging()
+
+ @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab')
+ def test_ppc64_powernv(self):
+ """
+ :avocado: tags=arch:ppc64
+ :avocado: tags=machine:powernv
+ """
+ self.endian_is_le = False
+ self.reverse_debugging()
diff --git a/tests/qemu-iotests/131 b/tests/qemu-iotests/131
index a847692..304bbb3 100755
--- a/tests/qemu-iotests/131
+++ b/tests/qemu-iotests/131
@@ -44,31 +44,35 @@
inuse_offset=$((0x2c))
size=$((64 * 1024 * 1024))
-CLUSTER_SIZE=64k
IMGFMT=parallels
_make_test_img $size
-echo == read empty image ==
-{ $QEMU_IO -c "read -P 0 32k 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
-echo == write more than 1 block in a row ==
-{ $QEMU_IO -c "write -P 0x11 32k 128k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
-echo == read less than block ==
-{ $QEMU_IO -c "read -P 0x11 32k 32k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
-echo == read exactly 1 block ==
-{ $QEMU_IO -c "read -P 0x11 64k 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
-echo == read more than 1 block ==
-{ $QEMU_IO -c "read -P 0x11 32k 128k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
-echo == check that there is no trash after written ==
-{ $QEMU_IO -c "read -P 0 160k 32k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
-echo == check that there is no trash before written ==
-{ $QEMU_IO -c "read -P 0 0 32k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+# get cluster size in sectors from "tracks" header field
+CLUSTER_SIZE_OFFSET=28
+CLUSTER_SIZE=$(peek_file_le $TEST_IMG $CLUSTER_SIZE_OFFSET 4)
+CLUSTER_SIZE=$((CLUSTER_SIZE * 512))
+CLUSTER_HALF_SIZE=$((CLUSTER_SIZE / 2))
+CLUSTER_DBL_SIZE=$((CLUSTER_SIZE * 2))
-echo "== Corrupt image =="
+echo == read empty image ==
+{ $QEMU_IO -c "read -P 0 $CLUSTER_HALF_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+echo == write more than 1 block in a row ==
+{ $QEMU_IO -c "write -P 0x11 $CLUSTER_HALF_SIZE $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+echo == read less than block ==
+{ $QEMU_IO -c "read -P 0x11 $CLUSTER_HALF_SIZE $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+echo == read exactly 1 block ==
+{ $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+echo == read more than 1 block ==
+{ $QEMU_IO -c "read -P 0x11 $CLUSTER_HALF_SIZE $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+echo == check that there is no trash after written ==
+{ $QEMU_IO -c "read -P 0 $((CLUSTER_HALF_SIZE + CLUSTER_DBL_SIZE)) $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+echo == check that there is no trash before written ==
+{ $QEMU_IO -c "read -P 0 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== corrupt image =="
poke_file "$TEST_IMG" "$inuse_offset" "\x59\x6e\x6f\x74"
-{ $QEMU_IO -c "read -P 0x11 64k 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
-_check_test_img
-_check_test_img -r all
-{ $QEMU_IO -c "read -P 0x11 64k 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+echo "== read corrupted image with repairing =="
+{ $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
echo "== allocate with backing =="
# Verify that allocating clusters works fine even when there is a backing image.
@@ -83,7 +87,7 @@
# Write some data to the base image (which would trigger an assertion failure if
# interpreted as a QEMUIOVector)
-$QEMU_IO -c 'write -P 42 0 64k' "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -P 42 0 $CLUSTER_SIZE" "$TEST_IMG.base" | _filter_qemu_io
# Parallels does not seem to support storing a backing filename in the image
# itself, so we need to build our backing chain on the command line
@@ -99,8 +103,8 @@
QEMU_IO_OPTIONS=$QEMU_IO_OPTIONS_NO_FMT \
$QEMU_IO --image-opts "$imgopts" \
-c 'read -P 1 0 64' \
- -c "read -P 42 64 $((64 * 1024 - 64))" \
- -c "read -P 0 64k $((size - 64 * 1024))" \
+ -c "read -P 42 64 $((CLUSTER_SIZE - 64))" \
+ -c "read -P 0 $CLUSTER_SIZE $((size - CLUSTER_SIZE))" \
| _filter_qemu_io
# success, all done
diff --git a/tests/qemu-iotests/131.out b/tests/qemu-iotests/131.out
index de5ef7a..d290457 100644
--- a/tests/qemu-iotests/131.out
+++ b/tests/qemu-iotests/131.out
@@ -1,53 +1,42 @@
QA output created by 131
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
== read empty image ==
-read 65536/65536 bytes at offset 32768
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1048576/1048576 bytes at offset 524288
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
== write more than 1 block in a row ==
-wrote 131072/131072 bytes at offset 32768
-128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 2097152/2097152 bytes at offset 524288
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
== read less than block ==
-read 32768/32768 bytes at offset 32768
-32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 524288/524288 bytes at offset 524288
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
== read exactly 1 block ==
-read 65536/65536 bytes at offset 65536
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
== read more than 1 block ==
-read 131072/131072 bytes at offset 32768
-128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 2097152/2097152 bytes at offset 524288
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
== check that there is no trash after written ==
-read 32768/32768 bytes at offset 163840
-32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 524288/524288 bytes at offset 2621440
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
== check that there is no trash before written ==
-read 32768/32768 bytes at offset 0
-32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-== Corrupt image ==
-qemu-io: can't open device TEST_DIR/t.parallels: parallels: Image was not closed correctly; cannot be opened read/write
-ERROR image was not closed correctly
-
-1 errors were found on the image.
-Data may be corrupted, or further writes to the image may corrupt it.
+read 524288/524288 bytes at offset 0
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== corrupt image ==
+== read corrupted image with repairing ==
Repairing image was not closed correctly
-The following inconsistencies were found and repaired:
-
- 0 leaked clusters
- 1 corruptions
-
-Double checking the fixed image now...
-No errors were found on the image.
-read 65536/65536 bytes at offset 65536
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
== allocate with backing ==
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
-wrote 65536/65536 bytes at offset 0
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 64/64 bytes at offset 0
64 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 64/64 bytes at offset 0
64 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65472/65472 bytes at offset 64
-63.938 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 67043328/67043328 bytes at offset 65536
-63.938 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1048512/1048512 bytes at offset 64
+1023.938 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 66060288/66060288 bytes at offset 1048576
+63 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
*** done
diff --git a/tests/qemu-iotests/tests/parallels-checks b/tests/qemu-iotests/tests/parallels-checks
new file mode 100755
index 0000000..a7a1b35
--- /dev/null
+++ b/tests/qemu-iotests/tests/parallels-checks
@@ -0,0 +1,145 @@
+#!/usr/bin/env bash
+# group: rw quick
+#
+# Test qemu-img check for parallels format
+#
+# Copyright (C) 2022 Virtuozzo International GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=alexander.ivanov@virtuozzo.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+status=1 # failure is the default!
+
+_cleanup()
+{
+ _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ../common.rc
+. ../common.filter
+
+_supported_fmt parallels
+_supported_proto file
+_supported_os Linux
+
+SIZE=$((4 * 1024 * 1024))
+IMGFMT=parallels
+CLUSTER_SIZE_OFFSET=28
+DATA_OFF_OFFSET=48
+BAT_OFFSET=64
+
+_make_test_img $SIZE
+
+CLUSTER_SIZE=$(peek_file_le $TEST_IMG $CLUSTER_SIZE_OFFSET 4)
+CLUSTER_SIZE=$((CLUSTER_SIZE * 512))
+LAST_CLUSTER_OFF=$((SIZE - CLUSTER_SIZE))
+LAST_CLUSTER=$((LAST_CLUSTER_OFF/CLUSTER_SIZE))
+
+echo "== TEST OUT OF IMAGE CHECK =="
+
+echo "== write pattern =="
+{ $QEMU_IO -c "write -P 0x11 0 $SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== corrupt image =="
+cluster=$(($LAST_CLUSTER + 2))
+poke_file "$TEST_IMG" "$BAT_OFFSET" "\x$cluster\x00\x00\x00"
+
+echo "== read corrupted image with repairing =="
+{ $QEMU_IO -c "read -P 0x00 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+# Clear image
+_make_test_img $SIZE
+
+echo "== TEST LEAK CHECK =="
+
+echo "== write pattern to last cluster =="
+echo "write -P 0x11 $LAST_CLUSTER_OFF $CLUSTER_SIZE"
+{ $QEMU_IO -c "write -P 0x11 $LAST_CLUSTER_OFF $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+file_size=`stat --printf="%s" "$TEST_IMG"`
+echo "file size: $file_size"
+
+echo "== extend image by 1 cluster =="
+fallocate -xl $((file_size + CLUSTER_SIZE)) "$TEST_IMG"
+
+file_size=`stat --printf="%s" "$TEST_IMG"`
+echo "file size: $file_size"
+
+echo "== repair image =="
+_check_test_img -r all
+
+file_size=`stat --printf="%s" "$TEST_IMG"`
+echo "file size: $file_size"
+
+echo "== check last cluster =="
+{ $QEMU_IO -c "read -P 0x11 $LAST_CLUSTER_OFF $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+# Clear image
+_make_test_img $SIZE
+
+echo "== TEST DUPLICATION CHECK =="
+
+echo "== write pattern to whole image =="
+{ $QEMU_IO -c "write -P 0x11 0 $SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== write another pattern to second cluster =="
+{ $QEMU_IO -c "write -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== check second cluster =="
+{ $QEMU_IO -c "read -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== corrupt image =="
+poke_file "$TEST_IMG" "$(($BAT_OFFSET + 4))" "\x01\x00\x00\x00"
+
+echo "== check second cluster =="
+{ $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== repair image =="
+_check_test_img -r all
+
+echo "== check second cluster =="
+{ $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== check first cluster on host =="
+printf "content: 0x%02x\n" `peek_file_le $TEST_IMG $(($CLUSTER_SIZE)) 1`
+
+echo "== check second cluster on host =="
+printf "content: 0x%02x\n" `peek_file_le $TEST_IMG $(($CLUSTER_SIZE)) 1`
+
+# Clear image
+_make_test_img $SIZE
+
+echo "== TEST DATA_OFF CHECK =="
+
+echo "== write pattern to first cluster =="
+{ $QEMU_IO -c "write -P 0x55 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo "== spoil data_off field =="
+poke_file "$TEST_IMG" "$DATA_OFF_OFFSET" "\xff\xff\xff\xff"
+
+echo "== check first cluster =="
+{ $QEMU_IO -c "read -P 0x55 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/tests/parallels-checks.out b/tests/qemu-iotests/tests/parallels-checks.out
new file mode 100644
index 0000000..98a3a7f
--- /dev/null
+++ b/tests/qemu-iotests/tests/parallels-checks.out
@@ -0,0 +1,75 @@
+QA output created by parallels-checks
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304
+== TEST OUT OF IMAGE CHECK ==
+== write pattern ==
+wrote 4194304/4194304 bytes at offset 0
+4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== corrupt image ==
+== read corrupted image with repairing ==
+Repairing cluster 0 is outside image
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304
+== TEST LEAK CHECK ==
+== write pattern to last cluster ==
+write -P 0x11 3145728 1048576
+wrote 1048576/1048576 bytes at offset 3145728
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+file size: 2097152
+== extend image by 1 cluster ==
+file size: 3145728
+== repair image ==
+Repairing space leaked at the end of the image 1048576
+The following inconsistencies were found and repaired:
+
+ 1 leaked clusters
+ 0 corruptions
+
+Double checking the fixed image now...
+No errors were found on the image.
+file size: 2097152
+== check last cluster ==
+read 1048576/1048576 bytes at offset 3145728
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304
+== TEST DUPLICATION CHECK ==
+== write pattern to whole image ==
+wrote 4194304/4194304 bytes at offset 0
+4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== write another pattern to second cluster ==
+wrote 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check second cluster ==
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== corrupt image ==
+== check second cluster ==
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== repair image ==
+Repairing duplicate offset in BAT entry 1
+The following inconsistencies were found and repaired:
+
+ 0 leaked clusters
+ 1 corruptions
+
+Double checking the fixed image now...
+No errors were found on the image.
+== check second cluster ==
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== check first cluster on host ==
+content: 0x11
+== check second cluster on host ==
+content: 0x11
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304
+== TEST DATA_OFF CHECK ==
+== write pattern to first cluster ==
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== spoil data_off field ==
+== check first cluster ==
+Repairing data_off field has incorrect value
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+*** done