Merge tag 'pull-vfio-20231106' of https://github.com/legoater/qemu into staging
vfio queue:
* Support for non 64b IOVA space
* Introduction of a PCIIOMMUOps callback structure to ease future
extensions
* Fix for a buffer overrun when writing the VF token
* PPC cleanups preparing ground for IOMMUFD support
# -----BEGIN PGP SIGNATURE-----
#
# iQIzBAABCAAdFiEEoPZlSPBIlev+awtgUaNDx8/77KEFAmVI+bIACgkQUaNDx8/7
# 7KHW4g/9FmgX0k2Elm1BAul3slJtuBT8/iHKfK19rhXICxhxS5xBWJA8FmosTWAT
# 91YqQJhOHARxLd9VROfv8Fq8sAo+Ys8bP3PTXh5satjY5gR9YtmMSVqvsAVLn7lv
# a/0xp7wPJt2UeKzvRNUqFXNr7yHPwxFxbJbmmAJbNte8p+TfE2qvojbJnu7BjJbg
# sTtS/vFWNJwtuNYTkMRoiZaUKEoEZ8LnslOqKUjgeO59g4i3Dq8e2JCmHANPFWUK
# cWmr7AqcXgXEnLSDWTtfN53bjcSCYkFVb4WV4Wv1/7hUF5jQ4UR0l3B64xWe0M3/
# Prak3bWOM/o7JwLBsgaWPngXA9V0WFBTXVF4x5qTwhuR1sSV8MxUvTKxI+qqiEzA
# FjU89oSZ+zXId/hEUuTL6vn1Th8/6mwD0L9ORchNOQUKzCjBzI4MVPB09nM3AdPC
# LGThlufsZktdoU2KjMHpc+gMIXQYsxkgvm07K5iZTZ5eJ4tV5KB0aPvTZppGUxe1
# YY9og9F3hxjDHQtEuSY2rzBQI7nrUpd1ZI5ut/3ZgDWkqD6aGRtMme4n4GsGsYb2
# Ht9+d2RL9S8uPUh+7rV8K/N3+vXgXRaEYTuAScKtflEbA7YnZA5nUdMng8x0kMTQ
# Y73XCd4UGWDfSSZsgaIHGkM/MRIHgmlrfcwPkWqWW9vF+92O6Hw=
# =/Du0
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 06 Nov 2023 22:35:30 HKT
# gpg: using RSA key A0F66548F04895EBFE6B0B6051A343C7CFFBECA1
# gpg: Good signature from "Cédric Le Goater <clg@redhat.com>" [unknown]
# gpg: aka "Cédric Le Goater <clg@kaod.org>" [unknown]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: A0F6 6548 F048 95EB FE6B 0B60 51A3 43C7 CFFB ECA1
* tag 'pull-vfio-20231106' of https://github.com/legoater/qemu: (22 commits)
vfio/common: Move vfio_host_win_add/del into spapr.c
vfio/spapr: Make vfio_spapr_create/remove_window static
vfio/container: Move spapr specific init/deinit into spapr.c
vfio/container: Move vfio_container_add/del_section_window into spapr.c
vfio/container: Move IBM EEH related functions into spapr_pci_vfio.c
util/uuid: Define UUID_STR_LEN from UUID_NONE string
util/uuid: Remove UUID_FMT_LEN
vfio/pci: Fix buffer overrun when writing the VF token
util/uuid: Add UUID_STR_LEN definition
hw/pci: modify pci_setup_iommu() to set PCIIOMMUOps
test: Add some tests for range and resv-mem helpers
virtio-iommu: Consolidate host reserved regions and property set ones
virtio-iommu: Implement set_iova_ranges() callback
virtio-iommu: Record whether a probe request has been issued
range: Introduce range_inverse_array()
virtio-iommu: Introduce per IOMMUDevice reserved regions
util/reserved-region: Add new ReservedRegion helpers
range: Make range_compare() public
virtio-iommu: Rename reserved_regions into prop_resv_regions
vfio: Collect container iova range info
...
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
diff --git a/block/parallels-ext.c b/block/parallels-ext.c
index 8a109f0..4d8ecf5 100644
--- a/block/parallels-ext.c
+++ b/block/parallels-ext.c
@@ -130,7 +130,7 @@
g_autofree uint64_t *l1_table = NULL;
BdrvDirtyBitmap *bitmap;
QemuUUID uuid;
- char uuidstr[UUID_FMT_LEN + 1];
+ char uuidstr[UUID_STR_LEN];
int i;
if (data_size < sizeof(bf)) {
diff --git a/block/vdi.c b/block/vdi.c
index c647d72..7cfd12b 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -239,7 +239,7 @@
static void vdi_header_print(VdiHeader *header)
{
- char uuidstr[37];
+ char uuidstr[UUID_STR_LEN];
QemuUUID uuid;
logout("text %s", header->text);
logout("signature 0x%08x\n", header->signature);
diff --git a/docs/devel/index-api.rst b/docs/devel/index-api.rst
index 539ad29..fe01b2b 100644
--- a/docs/devel/index-api.rst
+++ b/docs/devel/index-api.rst
@@ -11,6 +11,7 @@
loads-stores
memory
modules
+ pci
qom-api
qdev-api
ui
diff --git a/docs/devel/pci.rst b/docs/devel/pci.rst
new file mode 100644
index 0000000..6873933
--- /dev/null
+++ b/docs/devel/pci.rst
@@ -0,0 +1,8 @@
+=============
+PCI subsystem
+=============
+
+API Reference
+-------------
+
+.. kernel-doc:: include/hw/pci/pci.h
diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c
index 49a8055..e8711ae 100644
--- a/hw/alpha/typhoon.c
+++ b/hw/alpha/typhoon.c
@@ -738,6 +738,10 @@
return &s->pchip.iommu_as;
}
+static const PCIIOMMUOps typhoon_iommu_ops = {
+ .get_address_space = typhoon_pci_dma_iommu,
+};
+
static void typhoon_set_irq(void *opaque, int irq, int level)
{
TyphoonState *s = opaque;
@@ -897,7 +901,7 @@
"iommu-typhoon", UINT64_MAX);
address_space_init(&s->pchip.iommu_as, MEMORY_REGION(&s->pchip.iommu),
"pchip0-pci");
- pci_setup_iommu(b, typhoon_pci_dma_iommu, s);
+ pci_setup_iommu(b, &typhoon_iommu_ops, s);
/* Pchip0 PCI special/interrupt acknowledge, 0x801.F800.0000, 64MB. */
memory_region_init_io(&s->pchip.reg_iack, OBJECT(s), &alpha_pci_iack_ops,
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index f35ae9a..9a8ac45 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -605,6 +605,10 @@
return &sdev->as;
}
+static const PCIIOMMUOps smmu_ops = {
+ .get_address_space = smmu_find_add_as,
+};
+
IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid)
{
uint8_t bus_n, devfn;
@@ -661,7 +665,7 @@
s->smmu_pcibus_by_busptr = g_hash_table_new(NULL, NULL);
if (s->primary_bus) {
- pci_setup_iommu(s->primary_bus, smmu_find_add_as, s);
+ pci_setup_iommu(s->primary_bus, &smmu_ops, s);
} else {
error_setg(errp, "SMMU is not attached to any PCI bus!");
}
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 2f1dbb3..b46d16c 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -705,7 +705,7 @@
int rc;
rc = snprintf(buffer, sizeof(buffer), "0x%"PRIx64":0x%"PRIx64":%u",
- rr->low, rr->high, rr->type);
+ range_lob(&rr->range), range_upb(&rr->range), rr->type);
assert(rc < sizeof(buffer));
visit_type_str(v, name, &p, errp);
@@ -717,6 +717,7 @@
Property *prop = opaque;
ReservedRegion *rr = object_field_prop_ptr(obj, prop);
const char *endptr;
+ uint64_t lob, upb;
char *str;
int ret;
@@ -724,7 +725,7 @@
return;
}
- ret = qemu_strtou64(str, &endptr, 16, &rr->low);
+ ret = qemu_strtou64(str, &endptr, 16, &lob);
if (ret) {
error_setg(errp, "start address of '%s'"
" must be a hexadecimal integer", name);
@@ -734,7 +735,7 @@
goto separator_error;
}
- ret = qemu_strtou64(endptr + 1, &endptr, 16, &rr->high);
+ ret = qemu_strtou64(endptr + 1, &endptr, 16, &upb);
if (ret) {
error_setg(errp, "end address of '%s'"
" must be a hexadecimal integer", name);
@@ -744,6 +745,8 @@
goto separator_error;
}
+ range_set_bounds(&rr->range, lob, upb);
+
ret = qemu_strtoui(endptr + 1, &endptr, 10, &rr->type);
if (ret) {
error_setg(errp, "type of '%s'"
@@ -1111,7 +1114,7 @@
{
Property *prop = opaque;
QemuUUID *uuid = object_field_prop_ptr(obj, prop);
- char buffer[UUID_FMT_LEN + 1];
+ char buffer[UUID_STR_LEN];
char *p = buffer;
qemu_uuid_unparse(uuid, buffer);
diff --git a/hw/hyperv/vmbus.c b/hw/hyperv/vmbus.c
index 271289f..c64eaa5 100644
--- a/hw/hyperv/vmbus.c
+++ b/hw/hyperv/vmbus.c
@@ -2271,7 +2271,7 @@
VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev));
BusChild *child;
Error *err = NULL;
- char idstr[UUID_FMT_LEN + 1];
+ char idstr[UUID_STR_LEN];
assert(!qemu_uuid_is_null(&vdev->instanceid));
@@ -2467,7 +2467,7 @@
static char *vmbus_get_fw_dev_path(DeviceState *dev)
{
VMBusDevice *vdev = VMBUS_DEVICE(dev);
- char uuid[UUID_FMT_LEN + 1];
+ char uuid[UUID_STR_LEN];
qemu_uuid_unparse(&vdev->instanceid, uuid);
return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid);
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 7965415..4203144 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1450,6 +1450,10 @@
return &iommu_as[devfn]->as;
}
+static const PCIIOMMUOps amdvi_iommu_ops = {
+ .get_address_space = amdvi_host_dma_iommu,
+};
+
static const MemoryRegionOps mmio_mem_ops = {
.read = amdvi_mmio_read,
.write = amdvi_mmio_write,
@@ -1581,7 +1585,7 @@
AMDVI_MMIO_SIZE);
memory_region_add_subregion(get_system_memory(), AMDVI_BASE_ADDR,
&s->mmio);
- pci_setup_iommu(bus, amdvi_host_dma_iommu, s);
+ pci_setup_iommu(bus, &amdvi_iommu_ops, s);
amdvi_init(s);
}
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 1c6c186..a6f1a7a 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -4088,6 +4088,10 @@
return &vtd_as->as;
}
+static PCIIOMMUOps vtd_iommu_ops = {
+ .get_address_space = vtd_host_dma_iommu,
+};
+
static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
{
X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
@@ -4210,7 +4214,7 @@
s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal,
g_free, g_free);
vtd_init(s);
- pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
+ pci_setup_iommu(bus, &vtd_iommu_ops, dev);
/* Pseudo address space under root PCI bus. */
x86ms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);
qemu_add_machine_init_done_notifier(&vtd_machine_done_notify);
diff --git a/hw/pci-host/astro.c b/hw/pci-host/astro.c
index 4b2d7ca..84e0ca1 100644
--- a/hw/pci-host/astro.c
+++ b/hw/pci-host/astro.c
@@ -345,6 +345,10 @@
return &s->astro->iommu_as;
}
+static const PCIIOMMUOps elroy_pcihost_iommu_ops = {
+ .get_address_space = elroy_pcihost_set_iommu,
+};
+
/*
* Encoding in IOSAPIC:
* base_addr == 0xfffa0000, we want to get 0xa0ff0000.
@@ -834,7 +838,7 @@
&elroy->pci_io);
/* Host memory as seen from the PCI side, via the IOMMU. */
- pci_setup_iommu(PCI_HOST_BRIDGE(elroy)->bus, elroy_pcihost_set_iommu,
+ pci_setup_iommu(PCI_HOST_BRIDGE(elroy)->bus, &elroy_pcihost_iommu_ops,
elroy);
}
}
diff --git a/hw/pci-host/designware.c b/hw/pci-host/designware.c
index 6f5442f..f477f97 100644
--- a/hw/pci-host/designware.c
+++ b/hw/pci-host/designware.c
@@ -663,6 +663,10 @@
return &s->pci.address_space;
}
+static const PCIIOMMUOps designware_iommu_ops = {
+ .get_address_space = designware_pcie_host_set_iommu,
+};
+
static void designware_pcie_host_realize(DeviceState *dev, Error **errp)
{
PCIHostState *pci = PCI_HOST_BRIDGE(dev);
@@ -705,7 +709,7 @@
address_space_init(&s->pci.address_space,
&s->pci.address_space_root,
"pcie-bus-address-space");
- pci_setup_iommu(pci->bus, designware_pcie_host_set_iommu, s);
+ pci_setup_iommu(pci->bus, &designware_iommu_ops, s);
qdev_realize(DEVICE(&s->root), BUS(pci->bus), &error_fatal);
}
diff --git a/hw/pci-host/dino.c b/hw/pci-host/dino.c
index 8250322..5b0947a 100644
--- a/hw/pci-host/dino.c
+++ b/hw/pci-host/dino.c
@@ -354,6 +354,10 @@
return &s->bm_as;
}
+static const PCIIOMMUOps dino_iommu_ops = {
+ .get_address_space = dino_pcihost_set_iommu,
+};
+
/*
* Dino interrupts are connected as shown on Page 78, Table 23
* (Little-endian bit numbers)
@@ -481,7 +485,7 @@
g_free(name);
}
- pci_setup_iommu(phb->bus, dino_pcihost_set_iommu, s);
+ pci_setup_iommu(phb->bus, &dino_iommu_ops, s);
sysbus_init_mmio(sbd, &s->this_mem);
diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c
index c5e58f4..2a74dbe 100644
--- a/hw/pci-host/pnv_phb3.c
+++ b/hw/pci-host/pnv_phb3.c
@@ -968,6 +968,10 @@
return &ds->dma_as;
}
+static PCIIOMMUOps pnv_phb3_iommu_ops = {
+ .get_address_space = pnv_phb3_dma_iommu,
+};
+
static void pnv_phb3_instance_init(Object *obj)
{
PnvPHB3 *phb = PNV_PHB3(obj);
@@ -1012,7 +1016,7 @@
object_property_set_int(OBJECT(pci->bus), "chip-id", phb->chip_id,
&error_abort);
- pci_setup_iommu(pci->bus, pnv_phb3_dma_iommu, phb);
+ pci_setup_iommu(pci->bus, &pnv_phb3_iommu_ops, phb);
}
static void pnv_phb3_realize(DeviceState *dev, Error **errp)
diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index 29cb11a..37c7afc 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -1518,6 +1518,10 @@
&phb->phb_regs_mr);
}
+static PCIIOMMUOps pnv_phb4_iommu_ops = {
+ .get_address_space = pnv_phb4_dma_iommu,
+};
+
static void pnv_phb4_instance_init(Object *obj)
{
PnvPHB4 *phb = PNV_PHB4(obj);
@@ -1557,7 +1561,7 @@
object_property_set_int(OBJECT(pci->bus), "chip-id", phb->chip_id,
&error_abort);
- pci_setup_iommu(pci->bus, pnv_phb4_dma_iommu, phb);
+ pci_setup_iommu(pci->bus, &pnv_phb4_iommu_ops, phb);
pci->bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
}
diff --git a/hw/pci-host/ppce500.c b/hw/pci-host/ppce500.c
index 3881424..453a4e6 100644
--- a/hw/pci-host/ppce500.c
+++ b/hw/pci-host/ppce500.c
@@ -435,6 +435,10 @@
return &s->bm_as;
}
+static const PCIIOMMUOps ppce500_iommu_ops = {
+ .get_address_space = e500_pcihost_set_iommu,
+};
+
static void e500_pcihost_realize(DeviceState *dev, Error **errp)
{
SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
@@ -469,7 +473,7 @@
memory_region_init(&s->bm, OBJECT(s), "bm-e500", UINT64_MAX);
memory_region_add_subregion(&s->bm, 0x0, &s->busmem);
address_space_init(&s->bm_as, &s->bm, "pci-bm");
- pci_setup_iommu(b, e500_pcihost_set_iommu, s);
+ pci_setup_iommu(b, &ppce500_iommu_ops, s);
pci_create_simple(b, 0, "e500-host-bridge");
diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c
index 9a11ac4..86c3a49 100644
--- a/hw/pci-host/raven.c
+++ b/hw/pci-host/raven.c
@@ -223,6 +223,10 @@
return &s->bm_as;
}
+static const PCIIOMMUOps raven_iommu_ops = {
+ .get_address_space = raven_pcihost_set_iommu,
+};
+
static void raven_change_gpio(void *opaque, int n, int level)
{
PREPPCIState *s = opaque;
@@ -320,7 +324,7 @@
memory_region_add_subregion(&s->bm, 0 , &s->bm_pci_memory_alias);
memory_region_add_subregion(&s->bm, 0x80000000, &s->bm_ram_alias);
address_space_init(&s->bm_as, &s->bm, "raven-bm");
- pci_setup_iommu(&s->pci_bus, raven_pcihost_set_iommu, s);
+ pci_setup_iommu(&s->pci_bus, &raven_iommu_ops, s);
h->bus = &s->pci_bus;
diff --git a/hw/pci-host/sabre.c b/hw/pci-host/sabre.c
index dcb2e23..d0851b4 100644
--- a/hw/pci-host/sabre.c
+++ b/hw/pci-host/sabre.c
@@ -112,6 +112,10 @@
return &is->iommu_as;
}
+static const PCIIOMMUOps sabre_iommu_ops = {
+ .get_address_space = sabre_pci_dma_iommu,
+};
+
static void sabre_config_write(void *opaque, hwaddr addr,
uint64_t val, unsigned size)
{
@@ -384,7 +388,7 @@
/* IOMMU */
memory_region_add_subregion_overlap(&s->sabre_config, 0x200,
sysbus_mmio_get_region(SYS_BUS_DEVICE(s->iommu), 0), 1);
- pci_setup_iommu(phb->bus, sabre_pci_dma_iommu, s->iommu);
+ pci_setup_iommu(phb->bus, &sabre_iommu_ops, s->iommu);
/* APB secondary busses */
pci_dev = pci_new_multifunction(PCI_DEVFN(1, 0), TYPE_SIMBA_PCI_BRIDGE);
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 885c04b..c49417a 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2678,7 +2678,7 @@
PCIBus *iommu_bus = bus;
uint8_t devfn = dev->devfn;
- while (iommu_bus && !iommu_bus->iommu_fn && iommu_bus->parent_dev) {
+ while (iommu_bus && !iommu_bus->iommu_ops && iommu_bus->parent_dev) {
PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev);
/*
@@ -2717,15 +2717,23 @@
iommu_bus = parent_bus;
}
- if (!pci_bus_bypass_iommu(bus) && iommu_bus && iommu_bus->iommu_fn) {
- return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn);
+ if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) {
+ return iommu_bus->iommu_ops->get_address_space(bus,
+ iommu_bus->iommu_opaque, devfn);
}
return &address_space_memory;
}
-void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque)
+void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
{
- bus->iommu_fn = fn;
+ /*
+ * If called, pci_setup_iommu() should provide a minimum set of
+ * useful callbacks for the bus.
+ */
+ assert(ops);
+ assert(ops->get_address_space);
+
+ bus->iommu_ops = ops;
bus->iommu_opaque = opaque;
}
diff --git a/hw/ppc/ppc440_pcix.c b/hw/ppc/ppc440_pcix.c
index 672090d..df4ee37 100644
--- a/hw/ppc/ppc440_pcix.c
+++ b/hw/ppc/ppc440_pcix.c
@@ -449,6 +449,10 @@
return &s->bm_as;
}
+static const PCIIOMMUOps ppc440_iommu_ops = {
+ .get_address_space = ppc440_pcix_set_iommu,
+};
+
/*
* Some guests on sam460ex write all kinds of garbage here such as
* missing enable bit and low bits set and still expect this to work
@@ -503,7 +507,7 @@
memory_region_init(&s->bm, OBJECT(s), "bm-ppc440-pcix", UINT64_MAX);
memory_region_add_subregion(&s->bm, 0x0, &s->busmem);
address_space_init(&s->bm_as, &s->bm, "pci-bm");
- pci_setup_iommu(h->bus, ppc440_pcix_set_iommu, s);
+ pci_setup_iommu(h->bus, &ppc440_iommu_ops, s);
memory_region_init(&s->container, OBJECT(s), "pci-container", PCI_ALL_SIZE);
memory_region_init_io(&h->conf_mem, OBJECT(s), &ppc440_pcix_host_conf_ops,
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 370c5a9..a27024e 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -780,6 +780,10 @@
return &phb->iommu_as;
}
+static const PCIIOMMUOps spapr_iommu_ops = {
+ .get_address_space = spapr_pci_dma_iommu,
+};
+
static char *spapr_phb_vfio_get_loc_code(SpaprPhbState *sphb, PCIDevice *pdev)
{
g_autofree char *path = NULL;
@@ -1978,7 +1982,7 @@
memory_region_add_subregion(&sphb->iommu_root, SPAPR_PCI_MSI_WINDOW,
&sphb->msiwindow);
- pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
+ pci_setup_iommu(bus, &spapr_iommu_ops, sphb);
pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq);
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
index 9016720..f283f7e 100644
--- a/hw/ppc/spapr_pci_vfio.c
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -18,14 +18,112 @@
*/
#include "qemu/osdep.h"
+#include <sys/ioctl.h>
#include <linux/vfio.h>
#include "hw/ppc/spapr.h"
#include "hw/pci-host/spapr.h"
#include "hw/pci/msix.h"
#include "hw/pci/pci_device.h"
-#include "hw/vfio/vfio.h"
+#include "hw/vfio/vfio-common.h"
#include "qemu/error-report.h"
+/*
+ * Interfaces for IBM EEH (Enhanced Error Handling)
+ */
+static bool vfio_eeh_container_ok(VFIOContainer *container)
+{
+ /*
+ * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO
+ * implementation is broken if there are multiple groups in a
+ * container. The hardware works in units of Partitionable
+ * Endpoints (== IOMMU groups) and the EEH operations naively
+ * iterate across all groups in the container, without any logic
+ * to make sure the groups have their state synchronized. For
+ * certain operations (ENABLE) that might be ok, until an error
+ * occurs, but for others (GET_STATE) it's clearly broken.
+ */
+
+ /*
+ * XXX Once fixed kernels exist, test for them here
+ */
+
+ if (QLIST_EMPTY(&container->group_list)) {
+ return false;
+ }
+
+ if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) {
+ return false;
+ }
+
+ return true;
+}
+
+static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op)
+{
+ struct vfio_eeh_pe_op pe_op = {
+ .argsz = sizeof(pe_op),
+ .op = op,
+ };
+ int ret;
+
+ if (!vfio_eeh_container_ok(container)) {
+ error_report("vfio/eeh: EEH_PE_OP 0x%x: "
+ "kernel requires a container with exactly one group", op);
+ return -EPERM;
+ }
+
+ ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op);
+ if (ret < 0) {
+ error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op);
+ return -errno;
+ }
+
+ return ret;
+}
+
+static VFIOContainer *vfio_eeh_as_container(AddressSpace *as)
+{
+ VFIOAddressSpace *space = vfio_get_address_space(as);
+ VFIOContainer *container = NULL;
+
+ if (QLIST_EMPTY(&space->containers)) {
+ /* No containers to act on */
+ goto out;
+ }
+
+ container = QLIST_FIRST(&space->containers);
+
+ if (QLIST_NEXT(container, next)) {
+ /*
+ * We don't yet have logic to synchronize EEH state across
+ * multiple containers
+ */
+ container = NULL;
+ goto out;
+ }
+
+out:
+ vfio_put_address_space(space);
+ return container;
+}
+
+static bool vfio_eeh_as_ok(AddressSpace *as)
+{
+ VFIOContainer *container = vfio_eeh_as_container(as);
+
+ return (container != NULL) && vfio_eeh_container_ok(container);
+}
+
+static int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
+{
+ VFIOContainer *container = vfio_eeh_as_container(as);
+
+ if (!container) {
+ return -ENODEV;
+ }
+ return vfio_eeh_container_op(container, op);
+}
+
bool spapr_phb_eeh_available(SpaprPhbState *sphb)
{
return vfio_eeh_as_ok(&sphb->iommu_as);
diff --git a/hw/remote/iommu.c b/hw/remote/iommu.c
index 1391dd7..7c56aad 100644
--- a/hw/remote/iommu.c
+++ b/hw/remote/iommu.c
@@ -100,6 +100,10 @@
iommu->elem_by_devfn = NULL;
}
+static const PCIIOMMUOps remote_iommu_ops = {
+ .get_address_space = remote_iommu_find_add_as,
+};
+
void remote_iommu_setup(PCIBus *pci_bus)
{
RemoteIommu *iommu = NULL;
@@ -108,7 +112,7 @@
iommu = REMOTE_IOMMU(object_new(TYPE_REMOTE_IOMMU));
- pci_setup_iommu(pci_bus, remote_iommu_find_add_as, iommu);
+ pci_setup_iommu(pci_bus, &remote_iommu_ops, iommu);
object_property_add_child(OBJECT(pci_bus), "remote-iommu", OBJECT(iommu));
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 2ca36f9..347580e 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -652,6 +652,10 @@
return &iommu->as;
}
+static const PCIIOMMUOps s390_iommu_ops = {
+ .get_address_space = s390_pci_dma_iommu,
+};
+
static uint8_t set_ind_atomic(uint64_t ind_loc, uint8_t to_be_set)
{
uint8_t expected, actual;
@@ -839,7 +843,7 @@
b = pci_register_root_bus(dev, NULL, s390_pci_set_irq, s390_pci_map_irq,
NULL, get_system_memory(), get_system_io(), 0,
64, TYPE_PCI_BUS);
- pci_setup_iommu(b, s390_pci_dma_iommu, s);
+ pci_setup_iommu(b, &s390_iommu_ops, s);
bus = BUS(b);
qbus_set_hotplug_handler(bus, OBJECT(dev));
@@ -1058,7 +1062,7 @@
pdev = PCI_DEVICE(dev);
pci_bridge_map_irq(pb, dev->id, s390_pci_map_irq);
- pci_setup_iommu(&pb->sec_bus, s390_pci_dma_iommu, s);
+ pci_setup_iommu(&pb->sec_bus, &s390_iommu_ops, s);
qbus_set_hotplug_handler(BUS(&pb->sec_bus), OBJECT(s));
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index 5f257bf..bbf69ff 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -14,7 +14,6 @@
#include <linux/vfio.h>
#include <sys/ioctl.h>
#include "qapi/error.h"
-#include "hw/vfio/vfio.h"
#include "hw/vfio/vfio-common.h"
#include "hw/s390x/ap-device.h"
#include "qemu/error-report.h"
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 6623ae2..d857bb8 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -20,7 +20,6 @@
#include <sys/ioctl.h>
#include "qapi/error.h"
-#include "hw/vfio/vfio.h"
#include "hw/vfio/vfio-common.h"
#include "hw/s390x/s390-ccw.h"
#include "hw/s390x/vfio-ccw.h"
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index d806057..e70fdf5 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -26,7 +26,6 @@
#include <linux/vfio.h>
#include "hw/vfio/vfio-common.h"
-#include "hw/vfio/vfio.h"
#include "hw/vfio/pci.h"
#include "exec/address-spaces.h"
#include "exec/memory.h"
@@ -246,44 +245,6 @@
return true;
}
-void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova,
- hwaddr max_iova, uint64_t iova_pgsizes)
-{
- VFIOHostDMAWindow *hostwin;
-
- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
- if (ranges_overlap(hostwin->min_iova,
- hostwin->max_iova - hostwin->min_iova + 1,
- min_iova,
- max_iova - min_iova + 1)) {
- hw_error("%s: Overlapped IOMMU are not enabled", __func__);
- }
- }
-
- hostwin = g_malloc0(sizeof(*hostwin));
-
- hostwin->min_iova = min_iova;
- hostwin->max_iova = max_iova;
- hostwin->iova_pgsizes = iova_pgsizes;
- QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next);
-}
-
-int vfio_host_win_del(VFIOContainer *container,
- hwaddr min_iova, hwaddr max_iova)
-{
- VFIOHostDMAWindow *hostwin;
-
- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
- if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) {
- QLIST_REMOVE(hostwin, hostwin_next);
- g_free(hostwin);
- return 0;
- }
- }
-
- return -1;
-}
-
static bool vfio_listener_skipped_section(MemoryRegionSection *section)
{
return (!memory_region_is_ram(section->mr) &&
@@ -532,22 +493,6 @@
g_free(vrdl);
}
-static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container,
- hwaddr iova, hwaddr end)
-{
- VFIOHostDMAWindow *hostwin;
- bool hostwin_found = false;
-
- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
- if (hostwin->min_iova <= iova && end <= hostwin->max_iova) {
- hostwin_found = true;
- break;
- }
- }
-
- return hostwin_found ? hostwin : NULL;
-}
-
static bool vfio_known_safe_misalignment(MemoryRegionSection *section)
{
MemoryRegion *mr = section->mr;
@@ -626,7 +571,6 @@
Int128 llend, llsize;
void *vaddr;
int ret;
- VFIOHostDMAWindow *hostwin;
Error *err = NULL;
if (!vfio_listener_valid_section(section, "region_add")) {
@@ -648,13 +592,6 @@
goto fail;
}
- hostwin = vfio_find_hostwin(container, iova, end);
- if (!hostwin) {
- error_setg(&err, "Container %p can't map guest IOVA region"
- " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, iova, end);
- goto fail;
- }
-
memory_region_ref(section->mr);
if (memory_region_is_iommu(section->mr)) {
@@ -693,6 +630,15 @@
goto fail;
}
+ if (container->iova_ranges) {
+ ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr,
+ container->iova_ranges, &err);
+ if (ret) {
+ g_free(giommu);
+ goto fail;
+ }
+ }
+
ret = memory_region_register_iommu_notifier(section->mr, &giommu->n,
&err);
if (ret) {
@@ -726,7 +672,7 @@
llsize = int128_sub(llend, int128_make64(iova));
if (memory_region_is_ram_device(section->mr)) {
- hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
+ hwaddr pgmask = (1ULL << ctz64(container->pgsizes)) - 1;
if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) {
trace_vfio_listener_region_add_no_dma_map(
@@ -825,12 +771,8 @@
if (memory_region_is_ram_device(section->mr)) {
hwaddr pgmask;
- VFIOHostDMAWindow *hostwin;
- hostwin = vfio_find_hostwin(container, iova, end);
- assert(hostwin); /* or region_add() would have failed */
-
- pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1;
+ pgmask = (1ULL << ctz64(container->pgsizes)) - 1;
try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
} else if (memory_region_has_ram_discard_manager(section->mr)) {
vfio_unregister_ram_discard_listener(container, section);
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index adc4672..2420100 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -20,20 +20,15 @@
#include "qemu/osdep.h"
#include <sys/ioctl.h>
-#ifdef CONFIG_KVM
-#include <linux/kvm.h>
-#endif
#include <linux/vfio.h>
#include "hw/vfio/vfio-common.h"
-#include "hw/vfio/vfio.h"
#include "exec/address-spaces.h"
#include "exec/memory.h"
#include "exec/ram_addr.h"
#include "hw/hw.h"
#include "qemu/error-report.h"
#include "qemu/range.h"
-#include "sysemu/kvm.h"
#include "sysemu/reset.h"
#include "trace.h"
#include "qapi/error.h"
@@ -205,92 +200,6 @@
return -errno;
}
-int vfio_container_add_section_window(VFIOContainer *container,
- MemoryRegionSection *section,
- Error **errp)
-{
- VFIOHostDMAWindow *hostwin;
- hwaddr pgsize = 0;
- int ret;
-
- if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) {
- return 0;
- }
-
- /* For now intersections are not allowed, we may relax this later */
- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
- if (ranges_overlap(hostwin->min_iova,
- hostwin->max_iova - hostwin->min_iova + 1,
- section->offset_within_address_space,
- int128_get64(section->size))) {
- error_setg(errp,
- "region [0x%"PRIx64",0x%"PRIx64"] overlaps with existing"
- "host DMA window [0x%"PRIx64",0x%"PRIx64"]",
- section->offset_within_address_space,
- section->offset_within_address_space +
- int128_get64(section->size) - 1,
- hostwin->min_iova, hostwin->max_iova);
- return -EINVAL;
- }
- }
-
- ret = vfio_spapr_create_window(container, section, &pgsize);
- if (ret) {
- error_setg_errno(errp, -ret, "Failed to create SPAPR window");
- return ret;
- }
-
- vfio_host_win_add(container, section->offset_within_address_space,
- section->offset_within_address_space +
- int128_get64(section->size) - 1, pgsize);
-#ifdef CONFIG_KVM
- if (kvm_enabled()) {
- VFIOGroup *group;
- IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
- struct kvm_vfio_spapr_tce param;
- struct kvm_device_attr attr = {
- .group = KVM_DEV_VFIO_GROUP,
- .attr = KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE,
- .addr = (uint64_t)(unsigned long)¶m,
- };
-
- if (!memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_SPAPR_TCE_FD,
- ¶m.tablefd)) {
- QLIST_FOREACH(group, &container->group_list, container_next) {
- param.groupfd = group->fd;
- if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
- error_setg_errno(errp, errno,
- "vfio: failed GROUP_SET_SPAPR_TCE for "
- "KVM VFIO device %d and group fd %d",
- param.tablefd, param.groupfd);
- return -errno;
- }
- trace_vfio_spapr_group_attach(param.groupfd, param.tablefd);
- }
- }
- }
-#endif
- return 0;
-}
-
-void vfio_container_del_section_window(VFIOContainer *container,
- MemoryRegionSection *section)
-{
- if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) {
- return;
- }
-
- vfio_spapr_remove_window(container,
- section->offset_within_address_space);
- if (vfio_host_win_del(container,
- section->offset_within_address_space,
- section->offset_within_address_space +
- int128_get64(section->size) - 1) < 0) {
- hw_error("%s: Cannot delete missing window at %"HWADDR_PRIx,
- __func__, section->offset_within_address_space);
- }
-}
-
int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
{
int ret;
@@ -355,14 +264,6 @@
return ret;
}
-static void vfio_listener_release(VFIOContainer *container)
-{
- memory_listener_unregister(&container->listener);
- if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
- memory_listener_unregister(&container->prereg_listener);
- }
-}
-
static struct vfio_info_cap_header *
vfio_get_iommu_type1_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
{
@@ -382,7 +283,7 @@
/* If the capability cannot be found, assume no DMA limiting */
hdr = vfio_get_iommu_type1_info_cap(info,
VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL);
- if (hdr == NULL) {
+ if (!hdr) {
return false;
}
@@ -394,6 +295,32 @@
return true;
}
+static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info,
+ VFIOContainer *container)
+{
+ struct vfio_info_cap_header *hdr;
+ struct vfio_iommu_type1_info_cap_iova_range *cap;
+
+ hdr = vfio_get_iommu_type1_info_cap(info,
+ VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
+ if (!hdr) {
+ return false;
+ }
+
+ cap = (void *)hdr;
+
+ for (int i = 0; i < cap->nr_iovas; i++) {
+ Range *range = g_new(Range, 1);
+
+ range_set_bounds(range, cap->iova_ranges[i].start,
+ cap->iova_ranges[i].end);
+ container->iova_ranges =
+ range_list_insert(container->iova_ranges, range);
+ }
+
+ return true;
+}
+
static void vfio_kvm_device_add_group(VFIOGroup *group)
{
Error *err = NULL;
@@ -535,6 +462,12 @@
}
}
+static void vfio_free_container(VFIOContainer *container)
+{
+ g_list_free_full(container->iova_ranges, g_free);
+ g_free(container);
+}
+
static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
Error **errp)
{
@@ -616,8 +549,8 @@
container->error = NULL;
container->dirty_pages_supported = false;
container->dma_max_mappings = 0;
+ container->iova_ranges = NULL;
QLIST_INIT(&container->giommu_list);
- QLIST_INIT(&container->hostwin_list);
QLIST_INIT(&container->vrdl_list);
ret = vfio_init_container(container, group->fd, errp);
@@ -652,84 +585,21 @@
if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) {
container->dma_max_mappings = 65535;
}
+
+ vfio_get_info_iova_range(info, container);
+
vfio_get_iommu_info_migration(container, info);
g_free(info);
-
- /*
- * FIXME: We should parse VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE
- * information to get the actual window extent rather than assume
- * a 64-bit IOVA address space.
- */
- vfio_host_win_add(container, 0, (hwaddr)-1, container->pgsizes);
-
break;
}
case VFIO_SPAPR_TCE_v2_IOMMU:
case VFIO_SPAPR_TCE_IOMMU:
{
- struct vfio_iommu_spapr_tce_info info;
- bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU;
-
- /*
- * The host kernel code implementing VFIO_IOMMU_DISABLE is called
- * when container fd is closed so we do not call it explicitly
- * in this file.
- */
- if (!v2) {
- ret = ioctl(fd, VFIO_IOMMU_ENABLE);
- if (ret) {
- error_setg_errno(errp, errno, "failed to enable container");
- ret = -errno;
- goto enable_discards_exit;
- }
- } else {
- container->prereg_listener = vfio_prereg_listener;
-
- memory_listener_register(&container->prereg_listener,
- &address_space_memory);
- if (container->error) {
- memory_listener_unregister(&container->prereg_listener);
- ret = -1;
- error_propagate_prepend(errp, container->error,
- "RAM memory listener initialization failed: ");
- goto enable_discards_exit;
- }
- }
-
- info.argsz = sizeof(info);
- ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
+ ret = vfio_spapr_container_init(container, errp);
if (ret) {
- error_setg_errno(errp, errno,
- "VFIO_IOMMU_SPAPR_TCE_GET_INFO failed");
- ret = -errno;
- if (v2) {
- memory_listener_unregister(&container->prereg_listener);
- }
goto enable_discards_exit;
}
-
- if (v2) {
- container->pgsizes = info.ddw.pgsizes;
- /*
- * There is a default window in just created container.
- * To make region_add/del simpler, we better remove this
- * window now and let those iommu_listener callbacks
- * create/remove them when needed.
- */
- ret = vfio_spapr_remove_window(container, info.dma32_window_start);
- if (ret) {
- error_setg_errno(errp, -ret,
- "failed to remove existing window");
- goto enable_discards_exit;
- }
- } else {
- /* The default table uses 4K pages */
- container->pgsizes = 0x1000;
- vfio_host_win_add(container, info.dma32_window_start,
- info.dma32_window_start +
- info.dma32_window_size - 1,
- 0x1000);
- }
+ break;
}
}
@@ -759,13 +629,17 @@
QLIST_REMOVE(group, container_next);
QLIST_REMOVE(container, next);
vfio_kvm_device_del_group(group);
- vfio_listener_release(container);
+ memory_listener_unregister(&container->listener);
+ if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU ||
+ container->iommu_type == VFIO_SPAPR_TCE_IOMMU) {
+ vfio_spapr_container_deinit(container);
+ }
enable_discards_exit:
vfio_ram_block_discard_disable(container, false);
free_container_exit:
- g_free(container);
+ vfio_free_container(container);
close_fd_exit:
close(fd);
@@ -789,7 +663,11 @@
* group.
*/
if (QLIST_EMPTY(&container->group_list)) {
- vfio_listener_release(container);
+ memory_listener_unregister(&container->listener);
+ if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU ||
+ container->iommu_type == VFIO_SPAPR_TCE_IOMMU) {
+ vfio_spapr_container_deinit(container);
+ }
}
if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) {
@@ -800,7 +678,6 @@
if (QLIST_EMPTY(&container->group_list)) {
VFIOAddressSpace *space = container->space;
VFIOGuestIOMMU *giommu, *tmp;
- VFIOHostDMAWindow *hostwin, *next;
QLIST_REMOVE(container, next);
@@ -811,15 +688,9 @@
g_free(giommu);
}
- QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next,
- next) {
- QLIST_REMOVE(hostwin, hostwin_next);
- g_free(hostwin);
- }
-
trace_vfio_disconnect_container(container->fd);
close(container->fd);
- g_free(container);
+ vfio_free_container(container);
vfio_put_address_space(space);
}
@@ -975,103 +846,6 @@
close(vbasedev->fd);
}
-/*
- * Interfaces for IBM EEH (Enhanced Error Handling)
- */
-static bool vfio_eeh_container_ok(VFIOContainer *container)
-{
- /*
- * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO
- * implementation is broken if there are multiple groups in a
- * container. The hardware works in units of Partitionable
- * Endpoints (== IOMMU groups) and the EEH operations naively
- * iterate across all groups in the container, without any logic
- * to make sure the groups have their state synchronized. For
- * certain operations (ENABLE) that might be ok, until an error
- * occurs, but for others (GET_STATE) it's clearly broken.
- */
-
- /*
- * XXX Once fixed kernels exist, test for them here
- */
-
- if (QLIST_EMPTY(&container->group_list)) {
- return false;
- }
-
- if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) {
- return false;
- }
-
- return true;
-}
-
-static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op)
-{
- struct vfio_eeh_pe_op pe_op = {
- .argsz = sizeof(pe_op),
- .op = op,
- };
- int ret;
-
- if (!vfio_eeh_container_ok(container)) {
- error_report("vfio/eeh: EEH_PE_OP 0x%x: "
- "kernel requires a container with exactly one group", op);
- return -EPERM;
- }
-
- ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op);
- if (ret < 0) {
- error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op);
- return -errno;
- }
-
- return ret;
-}
-
-static VFIOContainer *vfio_eeh_as_container(AddressSpace *as)
-{
- VFIOAddressSpace *space = vfio_get_address_space(as);
- VFIOContainer *container = NULL;
-
- if (QLIST_EMPTY(&space->containers)) {
- /* No containers to act on */
- goto out;
- }
-
- container = QLIST_FIRST(&space->containers);
-
- if (QLIST_NEXT(container, next)) {
- /*
- * We don't yet have logic to synchronize EEH state across
- * multiple containers
- */
- container = NULL;
- goto out;
- }
-
-out:
- vfio_put_address_space(space);
- return container;
-}
-
-bool vfio_eeh_as_ok(AddressSpace *as)
-{
- VFIOContainer *container = vfio_eeh_as_container(as);
-
- return (container != NULL) && vfio_eeh_container_ok(container);
-}
-
-int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
-{
- VFIOContainer *container = vfio_eeh_as_container(as);
-
- if (!container) {
- return -ENODEV;
- }
- return vfio_eeh_container_op(container, op);
-}
-
static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
{
char *tmp, group_path[PATH_MAX], *group_name;
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index 7e5da21..168847e 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -23,7 +23,6 @@
#include <sys/ioctl.h>
#include "hw/vfio/vfio-common.h"
-#include "hw/vfio/vfio.h"
#include "hw/hw.h"
#include "trace.h"
#include "qapi/error.h"
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index b27011c..c62c02f 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3081,7 +3081,7 @@
struct stat st;
int i, ret;
bool is_mdev;
- char uuid[UUID_FMT_LEN];
+ char uuid[UUID_STR_LEN];
char *name;
if (!vbasedev->sysfsdev) {
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
index 9ec1e95..83da2f7 100644
--- a/hw/vfio/spapr.c
+++ b/hw/vfio/spapr.c
@@ -11,6 +11,11 @@
#include "qemu/osdep.h"
#include <sys/ioctl.h>
#include <linux/vfio.h>
+#ifdef CONFIG_KVM
+#include <linux/kvm.h>
+#endif
+#include "sysemu/kvm.h"
+#include "exec/address-spaces.h"
#include "hw/vfio/vfio-common.h"
#include "hw/hw.h"
@@ -135,15 +140,90 @@
trace_vfio_prereg_unregister(reg.vaddr, reg.size, ret ? -errno : 0);
}
-const MemoryListener vfio_prereg_listener = {
+static const MemoryListener vfio_prereg_listener = {
.name = "vfio-pre-reg",
.region_add = vfio_prereg_listener_region_add,
.region_del = vfio_prereg_listener_region_del,
};
-int vfio_spapr_create_window(VFIOContainer *container,
- MemoryRegionSection *section,
- hwaddr *pgsize)
+static void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova,
+ hwaddr max_iova, uint64_t iova_pgsizes)
+{
+ VFIOHostDMAWindow *hostwin;
+
+ QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
+ if (ranges_overlap(hostwin->min_iova,
+ hostwin->max_iova - hostwin->min_iova + 1,
+ min_iova,
+ max_iova - min_iova + 1)) {
+ hw_error("%s: Overlapped IOMMU are not enabled", __func__);
+ }
+ }
+
+ hostwin = g_malloc0(sizeof(*hostwin));
+
+ hostwin->min_iova = min_iova;
+ hostwin->max_iova = max_iova;
+ hostwin->iova_pgsizes = iova_pgsizes;
+ QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next);
+}
+
+static int vfio_host_win_del(VFIOContainer *container,
+ hwaddr min_iova, hwaddr max_iova)
+{
+ VFIOHostDMAWindow *hostwin;
+
+ QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
+ if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) {
+ QLIST_REMOVE(hostwin, hostwin_next);
+ g_free(hostwin);
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container,
+ hwaddr iova, hwaddr end)
+{
+ VFIOHostDMAWindow *hostwin;
+ bool hostwin_found = false;
+
+ QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
+ if (hostwin->min_iova <= iova && end <= hostwin->max_iova) {
+ hostwin_found = true;
+ break;
+ }
+ }
+
+ return hostwin_found ? hostwin : NULL;
+}
+
+static int vfio_spapr_remove_window(VFIOContainer *container,
+ hwaddr offset_within_address_space)
+{
+ struct vfio_iommu_spapr_tce_remove remove = {
+ .argsz = sizeof(remove),
+ .start_addr = offset_within_address_space,
+ };
+ int ret;
+
+ ret = ioctl(container->fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove);
+ if (ret) {
+ error_report("Failed to remove window at %"PRIx64,
+ (uint64_t)remove.start_addr);
+ return -errno;
+ }
+
+ trace_vfio_spapr_remove_window(offset_within_address_space);
+
+ return 0;
+}
+
+static int vfio_spapr_create_window(VFIOContainer *container,
+ MemoryRegionSection *section,
+ hwaddr *pgsize)
{
int ret = 0;
IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
@@ -233,23 +313,195 @@
return 0;
}
-int vfio_spapr_remove_window(VFIOContainer *container,
- hwaddr offset_within_address_space)
+int vfio_container_add_section_window(VFIOContainer *container,
+ MemoryRegionSection *section,
+ Error **errp)
{
- struct vfio_iommu_spapr_tce_remove remove = {
- .argsz = sizeof(remove),
- .start_addr = offset_within_address_space,
- };
+ VFIOHostDMAWindow *hostwin;
+ hwaddr pgsize = 0;
int ret;
- ret = ioctl(container->fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove);
- if (ret) {
- error_report("Failed to remove window at %"PRIx64,
- (uint64_t)remove.start_addr);
- return -errno;
+ /*
+ * VFIO_SPAPR_TCE_IOMMU supports a single host window between
+ * [dma32_window_start, dma32_window_size), we need to ensure
+ * the section fall in this range.
+ */
+ if (container->iommu_type == VFIO_SPAPR_TCE_IOMMU) {
+ hwaddr iova, end;
+
+ iova = section->offset_within_address_space;
+ end = iova + int128_get64(section->size) - 1;
+
+ if (!vfio_find_hostwin(container, iova, end)) {
+ error_setg(errp, "Container %p can't map guest IOVA region"
+ " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container,
+ iova, end);
+ return -EINVAL;
+ }
+ return 0;
}
- trace_vfio_spapr_remove_window(offset_within_address_space);
+ if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) {
+ return 0;
+ }
+
+ /* For now intersections are not allowed, we may relax this later */
+ QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
+ if (ranges_overlap(hostwin->min_iova,
+ hostwin->max_iova - hostwin->min_iova + 1,
+ section->offset_within_address_space,
+ int128_get64(section->size))) {
+ error_setg(errp,
+ "region [0x%"PRIx64",0x%"PRIx64"] overlaps with existing"
+ "host DMA window [0x%"PRIx64",0x%"PRIx64"]",
+ section->offset_within_address_space,
+ section->offset_within_address_space +
+ int128_get64(section->size) - 1,
+ hostwin->min_iova, hostwin->max_iova);
+ return -EINVAL;
+ }
+ }
+
+ ret = vfio_spapr_create_window(container, section, &pgsize);
+ if (ret) {
+ error_setg_errno(errp, -ret, "Failed to create SPAPR window");
+ return ret;
+ }
+
+ vfio_host_win_add(container, section->offset_within_address_space,
+ section->offset_within_address_space +
+ int128_get64(section->size) - 1, pgsize);
+#ifdef CONFIG_KVM
+ if (kvm_enabled()) {
+ VFIOGroup *group;
+ IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
+ struct kvm_vfio_spapr_tce param;
+ struct kvm_device_attr attr = {
+ .group = KVM_DEV_VFIO_GROUP,
+ .attr = KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE,
+ .addr = (uint64_t)(unsigned long)¶m,
+ };
+
+ if (!memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_SPAPR_TCE_FD,
+ ¶m.tablefd)) {
+ QLIST_FOREACH(group, &container->group_list, container_next) {
+ param.groupfd = group->fd;
+ if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) {
+ error_setg_errno(errp, errno,
+ "vfio: failed GROUP_SET_SPAPR_TCE for "
+ "KVM VFIO device %d and group fd %d",
+ param.tablefd, param.groupfd);
+ return -errno;
+ }
+ trace_vfio_spapr_group_attach(param.groupfd, param.tablefd);
+ }
+ }
+ }
+#endif
+ return 0;
+}
+
+void vfio_container_del_section_window(VFIOContainer *container,
+ MemoryRegionSection *section)
+{
+ if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) {
+ return;
+ }
+
+ vfio_spapr_remove_window(container,
+ section->offset_within_address_space);
+ if (vfio_host_win_del(container,
+ section->offset_within_address_space,
+ section->offset_within_address_space +
+ int128_get64(section->size) - 1) < 0) {
+ hw_error("%s: Cannot delete missing window at %"HWADDR_PRIx,
+ __func__, section->offset_within_address_space);
+ }
+}
+
+int vfio_spapr_container_init(VFIOContainer *container, Error **errp)
+{
+ struct vfio_iommu_spapr_tce_info info;
+ bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU;
+ int ret, fd = container->fd;
+
+ QLIST_INIT(&container->hostwin_list);
+
+ /*
+ * The host kernel code implementing VFIO_IOMMU_DISABLE is called
+ * when container fd is closed so we do not call it explicitly
+ * in this file.
+ */
+ if (!v2) {
+ ret = ioctl(fd, VFIO_IOMMU_ENABLE);
+ if (ret) {
+ error_setg_errno(errp, errno, "failed to enable container");
+ return -errno;
+ }
+ } else {
+ container->prereg_listener = vfio_prereg_listener;
+
+ memory_listener_register(&container->prereg_listener,
+ &address_space_memory);
+ if (container->error) {
+ ret = -1;
+ error_propagate_prepend(errp, container->error,
+ "RAM memory listener initialization failed: ");
+ goto listener_unregister_exit;
+ }
+ }
+
+ info.argsz = sizeof(info);
+ ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
+ if (ret) {
+ error_setg_errno(errp, errno,
+ "VFIO_IOMMU_SPAPR_TCE_GET_INFO failed");
+ ret = -errno;
+ goto listener_unregister_exit;
+ }
+
+ if (v2) {
+ container->pgsizes = info.ddw.pgsizes;
+ /*
+ * There is a default window in just created container.
+ * To make region_add/del simpler, we better remove this
+ * window now and let those iommu_listener callbacks
+ * create/remove them when needed.
+ */
+ ret = vfio_spapr_remove_window(container, info.dma32_window_start);
+ if (ret) {
+ error_setg_errno(errp, -ret,
+ "failed to remove existing window");
+ goto listener_unregister_exit;
+ }
+ } else {
+ /* The default table uses 4K pages */
+ container->pgsizes = 0x1000;
+ vfio_host_win_add(container, info.dma32_window_start,
+ info.dma32_window_start +
+ info.dma32_window_size - 1,
+ 0x1000);
+ }
return 0;
+
+listener_unregister_exit:
+ if (v2) {
+ memory_listener_unregister(&container->prereg_listener);
+ }
+ return ret;
+}
+
+void vfio_spapr_container_deinit(VFIOContainer *container)
+{
+ VFIOHostDMAWindow *hostwin, *next;
+
+ if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
+ memory_listener_unregister(&container->prereg_listener);
+ }
+ QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next,
+ next) {
+ QLIST_REMOVE(hostwin, hostwin_next);
+ g_free(hostwin);
+ }
}
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 0af7a28..637cac4 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -135,6 +135,7 @@
virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s"
virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)"
virtio_iommu_freeze_granule(uint64_t page_size_mask) "granule set to 0x%"PRIx64
+virtio_iommu_host_resv_regions(const char *name, uint32_t index, uint64_t lob, uint64_t upb) "mr=%s host-resv-reg[%d] = [0x%"PRIx64",0x%"PRIx64"]"
# virtio-mem.c
virtio_mem_send_response(uint16_t type) "type=%" PRIu16
diff --git a/hw/virtio/virtio-iommu-pci.c b/hw/virtio/virtio-iommu-pci.c
index 7ef2f9d..9459fbf 100644
--- a/hw/virtio/virtio-iommu-pci.c
+++ b/hw/virtio/virtio-iommu-pci.c
@@ -37,7 +37,7 @@
static Property virtio_iommu_pci_properties[] = {
DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0),
DEFINE_PROP_ARRAY("reserved-regions", VirtIOIOMMUPCI,
- vdev.nb_reserved_regions, vdev.reserved_regions,
+ vdev.nr_prop_resv_regions, vdev.prop_resv_regions,
qdev_prop_reserved_region, ReservedRegion),
DEFINE_PROP_END_OF_LIST(),
};
@@ -54,9 +54,9 @@
"for the virtio-iommu-pci device");
return;
}
- for (int i = 0; i < s->nb_reserved_regions; i++) {
- if (s->reserved_regions[i].type != VIRTIO_IOMMU_RESV_MEM_T_RESERVED &&
- s->reserved_regions[i].type != VIRTIO_IOMMU_RESV_MEM_T_MSI) {
+ for (int i = 0; i < s->nr_prop_resv_regions; i++) {
+ if (s->prop_resv_regions[i].type != VIRTIO_IOMMU_RESV_MEM_T_RESERVED &&
+ s->prop_resv_regions[i].type != VIRTIO_IOMMU_RESV_MEM_T_MSI) {
error_setg(errp, "reserved region %d has an invalid type", i);
error_append_hint(errp, "Valid values are 0 and 1\n");
return;
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index be51635..89fb576 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -20,12 +20,15 @@
#include "qemu/osdep.h"
#include "qemu/log.h"
#include "qemu/iov.h"
+#include "qemu/range.h"
+#include "qemu/reserved-region.h"
#include "exec/target_page.h"
#include "hw/qdev-properties.h"
#include "hw/virtio/virtio.h"
#include "sysemu/kvm.h"
#include "sysemu/reset.h"
#include "sysemu/sysemu.h"
+#include "qemu/reserved-region.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "trace.h"
@@ -378,6 +381,19 @@
g_free(domain);
}
+static void add_prop_resv_regions(IOMMUDevice *sdev)
+{
+ VirtIOIOMMU *s = sdev->viommu;
+ int i;
+
+ for (i = 0; i < s->nr_prop_resv_regions; i++) {
+ ReservedRegion *reg = g_new0(ReservedRegion, 1);
+
+ *reg = s->prop_resv_regions[i];
+ sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg);
+ }
+}
+
static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque,
int devfn)
{
@@ -408,6 +424,7 @@
memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX);
address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU);
+ add_prop_resv_regions(sdev);
/*
* Build the IOMMU disabled container with aliases to the
@@ -444,6 +461,10 @@
return &sdev->as;
}
+static const PCIIOMMUOps virtio_iommu_ops = {
+ .get_address_space = virtio_iommu_find_add_as,
+};
+
static int virtio_iommu_attach(VirtIOIOMMU *s,
struct virtio_iommu_req_attach *req)
{
@@ -624,29 +645,30 @@
return ret;
}
-static ssize_t virtio_iommu_fill_resv_mem_prop(VirtIOIOMMU *s, uint32_t ep,
+static ssize_t virtio_iommu_fill_resv_mem_prop(IOMMUDevice *sdev, uint32_t ep,
uint8_t *buf, size_t free)
{
struct virtio_iommu_probe_resv_mem prop = {};
size_t size = sizeof(prop), length = size - sizeof(prop.head), total;
- int i;
+ GList *l;
- total = size * s->nb_reserved_regions;
-
+ total = size * g_list_length(sdev->resv_regions);
if (total > free) {
return -ENOSPC;
}
- for (i = 0; i < s->nb_reserved_regions; i++) {
- unsigned subtype = s->reserved_regions[i].type;
+ for (l = sdev->resv_regions; l; l = l->next) {
+ ReservedRegion *reg = l->data;
+ unsigned subtype = reg->type;
+ Range *range = ®->range;
assert(subtype == VIRTIO_IOMMU_RESV_MEM_T_RESERVED ||
subtype == VIRTIO_IOMMU_RESV_MEM_T_MSI);
prop.head.type = cpu_to_le16(VIRTIO_IOMMU_PROBE_T_RESV_MEM);
prop.head.length = cpu_to_le16(length);
prop.subtype = subtype;
- prop.start = cpu_to_le64(s->reserved_regions[i].low);
- prop.end = cpu_to_le64(s->reserved_regions[i].high);
+ prop.start = cpu_to_le64(range_lob(range));
+ prop.end = cpu_to_le64(range_upb(range));
memcpy(buf, &prop, size);
@@ -666,19 +688,27 @@
uint8_t *buf)
{
uint32_t ep_id = le32_to_cpu(req->endpoint);
+ IOMMUMemoryRegion *iommu_mr = virtio_iommu_mr(s, ep_id);
size_t free = VIOMMU_PROBE_SIZE;
+ IOMMUDevice *sdev;
ssize_t count;
- if (!virtio_iommu_mr(s, ep_id)) {
+ if (!iommu_mr) {
return VIRTIO_IOMMU_S_NOENT;
}
- count = virtio_iommu_fill_resv_mem_prop(s, ep_id, buf, free);
+ sdev = container_of(iommu_mr, IOMMUDevice, iommu_mr);
+ if (!sdev) {
+ return -EINVAL;
+ }
+
+ count = virtio_iommu_fill_resv_mem_prop(sdev, ep_id, buf, free);
if (count < 0) {
return VIRTIO_IOMMU_S_INVAL;
}
buf += count;
free -= count;
+ sdev->probe_done = true;
return VIRTIO_IOMMU_S_OK;
}
@@ -856,7 +886,7 @@
bool bypass_allowed;
int granule;
bool found;
- int i;
+ GList *l;
interval.low = addr;
interval.high = addr + 1;
@@ -894,10 +924,10 @@
goto unlock;
}
- for (i = 0; i < s->nb_reserved_regions; i++) {
- ReservedRegion *reg = &s->reserved_regions[i];
+ for (l = sdev->resv_regions; l; l = l->next) {
+ ReservedRegion *reg = l->data;
- if (addr >= reg->low && addr <= reg->high) {
+ if (range_contains(®->range, addr)) {
switch (reg->type) {
case VIRTIO_IOMMU_RESV_MEM_T_MSI:
entry.perm = flag;
@@ -1131,6 +1161,106 @@
return 0;
}
+/**
+ * rebuild_resv_regions: rebuild resv regions with both the
+ * info of host resv ranges and property set resv ranges
+ */
+static int rebuild_resv_regions(IOMMUDevice *sdev)
+{
+ GList *l;
+ int i = 0;
+
+ /* free the existing list and rebuild it from scratch */
+ g_list_free_full(sdev->resv_regions, g_free);
+ sdev->resv_regions = NULL;
+
+ /* First add host reserved regions if any, all tagged as RESERVED */
+ for (l = sdev->host_resv_ranges; l; l = l->next) {
+ ReservedRegion *reg = g_new0(ReservedRegion, 1);
+ Range *r = (Range *)l->data;
+
+ reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED;
+ range_set_bounds(®->range, range_lob(r), range_upb(r));
+ sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg);
+ trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i,
+ range_lob(®->range),
+ range_upb(®->range));
+ i++;
+ }
+ /*
+ * then add higher priority reserved regions set by the machine
+ * through properties
+ */
+ add_prop_resv_regions(sdev);
+ return 0;
+}
+
+/**
+ * virtio_iommu_set_iova_ranges: Conveys the usable IOVA ranges
+ *
+ * The function turns those into reserved ranges. Once some
+ * reserved ranges have been set, new reserved regions cannot be
+ * added outside of the original ones.
+ *
+ * @mr: IOMMU MR
+ * @iova_ranges: list of usable IOVA ranges
+ * @errp: error handle
+ */
+static int virtio_iommu_set_iova_ranges(IOMMUMemoryRegion *mr,
+ GList *iova_ranges,
+ Error **errp)
+{
+ IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
+ GList *current_ranges = sdev->host_resv_ranges;
+ GList *l, *tmp, *new_ranges = NULL;
+ int ret = -EINVAL;
+
+ /* check that each new resv region is included in an existing one */
+ if (sdev->host_resv_ranges) {
+ range_inverse_array(iova_ranges,
+ &new_ranges,
+ 0, UINT64_MAX);
+
+ for (tmp = new_ranges; tmp; tmp = tmp->next) {
+ Range *newr = (Range *)tmp->data;
+ bool included = false;
+
+ for (l = current_ranges; l; l = l->next) {
+ Range * r = (Range *)l->data;
+
+ if (range_contains_range(r, newr)) {
+ included = true;
+ break;
+ }
+ }
+ if (!included) {
+ goto error;
+ }
+ }
+ /* all new reserved ranges are included in existing ones */
+ ret = 0;
+ goto out;
+ }
+
+ if (sdev->probe_done) {
+ warn_report("%s: Notified about new host reserved regions after probe",
+ mr->parent_obj.name);
+ }
+
+ range_inverse_array(iova_ranges,
+ &sdev->host_resv_ranges,
+ 0, UINT64_MAX);
+ rebuild_resv_regions(sdev);
+
+ return 0;
+error:
+ error_setg(errp, "IOMMU mr=%s Conflicting host reserved ranges set!",
+ mr->parent_obj.name);
+out:
+ g_list_free_full(new_ranges, g_free);
+ return ret;
+}
+
static void virtio_iommu_system_reset(void *opaque)
{
VirtIOIOMMU *s = opaque;
@@ -1206,7 +1336,7 @@
s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free);
if (s->primary_bus) {
- pci_setup_iommu(s->primary_bus, virtio_iommu_find_add_as, s);
+ pci_setup_iommu(s->primary_bus, &virtio_iommu_ops, s);
} else {
error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!");
}
@@ -1426,6 +1556,7 @@
imrc->replay = virtio_iommu_replay;
imrc->notify_flag_changed = virtio_iommu_notify_flag_changed;
imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask;
+ imrc->iommu_set_iova_ranges = virtio_iommu_set_iova_ranges;
}
static const TypeInfo virtio_iommu_info = {
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 9087d02..831f7c9 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -24,6 +24,7 @@
#include "qemu/bswap.h"
#include "qemu/queue.h"
#include "qemu/int128.h"
+#include "qemu/range.h"
#include "qemu/notify.h"
#include "qom/object.h"
#include "qemu/rcu.h"
@@ -79,8 +80,7 @@
typedef struct MemoryRegionOps MemoryRegionOps;
struct ReservedRegion {
- hwaddr low;
- hwaddr high;
+ Range range;
unsigned type;
};
@@ -527,6 +527,26 @@
int (*iommu_set_page_size_mask)(IOMMUMemoryRegion *iommu,
uint64_t page_size_mask,
Error **errp);
+ /**
+ * @iommu_set_iova_ranges:
+ *
+ * Propagate information about the usable IOVA ranges for a given IOMMU
+ * memory region. Used for example to propagate host physical device
+ * reserved memory region constraints to the virtual IOMMU.
+ *
+ * Optional method: if this method is not provided, then the default IOVA
+ * aperture is used.
+ *
+ * @iommu: the IOMMUMemoryRegion
+ *
+ * @iova_ranges: list of ordered IOVA ranges (at least one range)
+ *
+ * Returns 0 on success, or a negative error. In case of failure, the error
+ * object must be created.
+ */
+ int (*iommu_set_iova_ranges)(IOMMUMemoryRegion *iommu,
+ GList *iova_ranges,
+ Error **errp);
};
typedef struct RamDiscardListener RamDiscardListener;
@@ -1857,6 +1877,18 @@
Error **errp);
/**
+ * memory_region_iommu_set_iova_ranges - Set the usable IOVA ranges
+ * for a given IOMMU MR region
+ *
+ * @iommu: IOMMU memory region
+ * @iova_ranges: list of ordered IOVA ranges (at least one range)
+ * @errp: pointer to Error*, to store an error if it happens.
+ */
+int memory_region_iommu_set_iova_ranges(IOMMUMemoryRegion *iommu,
+ GList *iova_ranges,
+ Error **errp);
+
+/**
* memory_region_name: get a memory region's name
*
* Returns the string that was used to initialize the memory region.
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index ea5aff1..fa6313a 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -363,10 +363,42 @@
void pci_device_deassert_intx(PCIDevice *dev);
-typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int);
+
+/**
+ * struct PCIIOMMUOps: callbacks structure for specific IOMMU handlers
+ * of a PCIBus
+ *
+ * Allows to modify the behavior of some IOMMU operations of the PCI
+ * framework for a set of devices on a PCI bus.
+ */
+typedef struct PCIIOMMUOps {
+ /**
+ * @get_address_space: get the address space for a set of devices
+ * on a PCI bus.
+ *
+ * Mandatory callback which returns a pointer to an #AddressSpace
+ *
+ * @bus: the #PCIBus being accessed.
+ *
+ * @opaque: the data passed to pci_setup_iommu().
+ *
+ * @devfn: device and function number
+ */
+ AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn);
+} PCIIOMMUOps;
AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
-void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
+
+/**
+ * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus
+ *
+ * Let PCI host bridges define specific operations.
+ *
+ * @bus: the #PCIBus being updated.
+ * @ops: the #PCIIOMMUOps
+ * @opaque: passed to callbacks of the @ops structure.
+ */
+void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque);
pcibus_t pci_bar_address(PCIDevice *d,
int reg, uint8_t type, pcibus_t size);
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 5653175..2261312 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -33,7 +33,7 @@
struct PCIBus {
BusState qbus;
enum PCIBusFlags flags;
- PCIIOMMUFunc iommu_fn;
+ const PCIIOMMUOps *iommu_ops;
void *iommu_opaque;
uint8_t devfn_min;
uint32_t slot_reserved_mask;
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 7780b90..a4a22ac 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -99,6 +99,7 @@
QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
QLIST_ENTRY(VFIOContainer) next;
QLIST_HEAD(, VFIODevice) device_list;
+ GList *iova_ranges;
} VFIOContainer;
typedef struct VFIOGuestIOMMU {
@@ -206,11 +207,6 @@
hwaddr pages;
} VFIOBitmap;
-void vfio_host_win_add(VFIOContainer *container,
- hwaddr min_iova, hwaddr max_iova,
- uint64_t iova_pgsizes);
-int vfio_host_win_del(VFIOContainer *container, hwaddr min_iova,
- hwaddr max_iova);
VFIOAddressSpace *vfio_get_address_space(AddressSpace *as);
void vfio_put_address_space(VFIOAddressSpace *space);
bool vfio_devices_all_running_and_saving(VFIOContainer *container);
@@ -224,11 +220,14 @@
int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap,
hwaddr iova, hwaddr size);
+/* SPAPR specific */
int vfio_container_add_section_window(VFIOContainer *container,
MemoryRegionSection *section,
Error **errp);
void vfio_container_del_section_window(VFIOContainer *container,
MemoryRegionSection *section);
+int vfio_spapr_container_init(VFIOContainer *container, Error **errp);
+void vfio_spapr_container_deinit(VFIOContainer *container);
void vfio_disable_irqindex(VFIODevice *vbasedev, int index);
void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index);
@@ -288,13 +287,6 @@
struct vfio_info_cap_header *
vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id);
#endif
-extern const MemoryListener vfio_prereg_listener;
-
-int vfio_spapr_create_window(VFIOContainer *container,
- MemoryRegionSection *section,
- hwaddr *pgsize);
-int vfio_spapr_remove_window(VFIOContainer *container,
- hwaddr offset_within_address_space);
bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
void vfio_migration_exit(VFIODevice *vbasedev);
diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h
deleted file mode 100644
index 86248f54..0000000
--- a/include/hw/vfio/vfio.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef HW_VFIO_H
-#define HW_VFIO_H
-
-bool vfio_eeh_as_ok(AddressSpace *as);
-int vfio_eeh_as_op(AddressSpace *as, uint32_t op);
-
-#endif
diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h
index a93fc53..781ebae 100644
--- a/include/hw/virtio/virtio-iommu.h
+++ b/include/hw/virtio/virtio-iommu.h
@@ -39,6 +39,9 @@
AddressSpace as;
MemoryRegion root; /* The root container of the device */
MemoryRegion bypass_mr; /* The alias of shared memory MR */
+ GList *resv_regions;
+ GList *host_resv_ranges;
+ bool probe_done;
} IOMMUDevice;
typedef struct IOMMUPciBus {
@@ -55,8 +58,8 @@
GHashTable *as_by_busptr;
IOMMUPciBus *iommu_pcibus_by_bus_num[PCI_BUS_MAX];
PCIBus *primary_bus;
- ReservedRegion *reserved_regions;
- uint32_t nb_reserved_regions;
+ ReservedRegion *prop_resv_regions;
+ uint32_t nr_prop_resv_regions;
GTree *domains;
QemuRecMutex mutex;
GTree *endpoints;
diff --git a/include/qemu/range.h b/include/qemu/range.h
index 7e2b1cc..205e1da 100644
--- a/include/qemu/range.h
+++ b/include/qemu/range.h
@@ -217,6 +217,20 @@
return !(last2 < first1 || last1 < first2);
}
+/*
+ * Return -1 if @a < @b, 1 @a > @b, and 0 if they touch or overlap.
+ * Both @a and @b must not be empty.
+ */
+int range_compare(Range *a, Range *b);
+
GList *range_list_insert(GList *list, Range *data);
+/*
+ * Inverse an array of sorted ranges over the [low, high] span, ie.
+ * original ranges becomes holes in the newly allocated inv_ranges
+ */
+void range_inverse_array(GList *in_ranges,
+ GList **out_ranges,
+ uint64_t low, uint64_t high);
+
#endif
diff --git a/include/qemu/reserved-region.h b/include/qemu/reserved-region.h
new file mode 100644
index 0000000..8e6f0a9
--- /dev/null
+++ b/include/qemu/reserved-region.h
@@ -0,0 +1,32 @@
+/*
+ * QEMU ReservedRegion helpers
+ *
+ * Copyright (c) 2023 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef QEMU_RESERVED_REGION_H
+#define QEMU_RESERVED_REGION_H
+
+#include "exec/memory.h"
+
+/*
+ * Insert a new region into a sorted list of reserved regions. In case
+ * there is overlap with existing regions, the new added region has
+ * higher priority and replaces the overlapped segment.
+ */
+GList *resv_region_list_insert(GList *list, ReservedRegion *reg);
+
+#endif
diff --git a/include/qemu/uuid.h b/include/qemu/uuid.h
index e24a109..869f84a 100644
--- a/include/qemu/uuid.h
+++ b/include/qemu/uuid.h
@@ -78,9 +78,10 @@
"%02hhx%02hhx-" \
"%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx"
-#define UUID_FMT_LEN 36
-
#define UUID_NONE "00000000-0000-0000-0000-000000000000"
+QEMU_BUILD_BUG_ON(sizeof(UUID_NONE) - 1 != 36);
+
+#define UUID_STR_LEN sizeof(UUID_NONE)
void qemu_uuid_generate(QemuUUID *out);
diff --git a/migration/savevm.c b/migration/savevm.c
index bc98c2e..eec5503 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -471,8 +471,8 @@
static int vmstate_uuid_post_load(void *opaque, int version_id)
{
SaveState *state = opaque;
- char uuid_src[UUID_FMT_LEN + 1];
- char uuid_dst[UUID_FMT_LEN + 1];
+ char uuid_src[UUID_STR_LEN];
+ char uuid_dst[UUID_STR_LEN];
if (!qemu_uuid_set) {
/*
diff --git a/system/memory.c b/system/memory.c
index 4928f25..304fa84 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -1921,6 +1921,19 @@
return ret;
}
+int memory_region_iommu_set_iova_ranges(IOMMUMemoryRegion *iommu_mr,
+ GList *iova_ranges,
+ Error **errp)
+{
+ IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
+ int ret = 0;
+
+ if (imrc->iommu_set_iova_ranges) {
+ ret = imrc->iommu_set_iova_ranges(iommu_mr, iova_ranges, errp);
+ }
+ return ret;
+}
+
int memory_region_register_iommu_notifier(MemoryRegion *mr,
IOMMUNotifier *n, Error **errp)
{
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
index f33ae64..e6c51e7 100644
--- a/tests/unit/meson.build
+++ b/tests/unit/meson.build
@@ -21,6 +21,7 @@
'test-opts-visitor': [testqapi],
'test-visitor-serialization': [testqapi],
'test-bitmap': [],
+ 'test-resv-mem': [],
# all code tested by test-x86-cpuid is inside topology.h
'test-x86-cpuid': [],
'test-cutils': [],
diff --git a/tests/unit/test-resv-mem.c b/tests/unit/test-resv-mem.c
new file mode 100644
index 0000000..5963274
--- /dev/null
+++ b/tests/unit/test-resv-mem.c
@@ -0,0 +1,316 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * reserved-region/range.c unit-tests.
+ *
+ * Copyright (C) 2023, Red Hat, Inc.
+ *
+ * Author: Eric Auger <eric.auger@redhat.com>
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/range.h"
+#include "exec/memory.h"
+#include "qemu/reserved-region.h"
+
+#define DEBUG 0
+
+#if DEBUG
+static void print_ranges(const char *prefix, GList *ranges)
+{
+ GList *l;
+ int i = 0;
+
+ if (!g_list_length(ranges)) {
+ printf("%s is void\n", prefix);
+ return;
+ }
+ for (l = ranges; l; l = l->next) {
+ Range *r = (Range *)l->data;
+
+ printf("%s rev[%i] = [0x%"PRIx64",0x%"PRIx64"]\n",
+ prefix, i, range_lob(r), range_upb(r));
+ i++;
+ }
+}
+#endif
+
+static void compare_ranges(const char *prefix, GList *ranges,
+ GList *expected)
+{
+ GList *l, *e;
+
+#if DEBUG
+ print_ranges("out", ranges);
+ print_ranges("expected", expected);
+#endif
+ g_assert_cmpint(g_list_length(ranges), ==, g_list_length(expected));
+ for (l = ranges, e = expected; l ; l = l->next, e = e->next) {
+ Range *r = (Range *)l->data;
+ Range *er = (Range *)e->data;
+
+ g_assert_true(range_lob(r) == range_lob(er) &&
+ range_upb(r) == range_upb(er));
+ }
+}
+
+static GList *insert_sorted_range(GList *list, uint64_t lob, uint64_t upb)
+{
+ Range *new = g_new0(Range, 1);
+
+ range_set_bounds(new, lob, upb);
+ return range_list_insert(list, new);
+}
+
+static void reset(GList **in, GList **out, GList **expected)
+{
+ g_list_free_full(*in, g_free);
+ g_list_free_full(*out, g_free);
+ g_list_free_full(*expected, g_free);
+ *in = NULL;
+ *out = NULL;
+ *expected = NULL;
+}
+
+static void
+run_range_inverse_array(const char *prefix, GList **in, GList **expected,
+ uint64_t low, uint64_t high)
+{
+ GList *out = NULL;
+ range_inverse_array(*in, &out, low, high);
+ compare_ranges(prefix, out, *expected);
+ reset(in, &out, expected);
+}
+
+static void check_range_reverse_array(void)
+{
+ GList *in = NULL, *expected = NULL;
+
+ /* test 1 */
+
+ in = insert_sorted_range(in, 0x10000, UINT64_MAX);
+ expected = insert_sorted_range(expected, 0x0, 0xFFFF);
+ run_range_inverse_array("test1", &in, &expected, 0x0, UINT64_MAX);
+
+ /* test 2 */
+
+ in = insert_sorted_range(in, 0x10000, 0xFFFFFFFFFFFF);
+ expected = insert_sorted_range(expected, 0x0, 0xFFFF);
+ expected = insert_sorted_range(expected, 0x1000000000000, UINT64_MAX);
+ run_range_inverse_array("test1", &in, &expected, 0x0, UINT64_MAX);
+
+ /* test 3 */
+
+ in = insert_sorted_range(in, 0x0, 0xFFFF);
+ in = insert_sorted_range(in, 0x10000, 0x2FFFF);
+ expected = insert_sorted_range(expected, 0x30000, UINT64_MAX);
+ run_range_inverse_array("test1", &in, &expected, 0x0, UINT64_MAX);
+
+ /* test 4 */
+
+ in = insert_sorted_range(in, 0x50000, 0x5FFFF);
+ in = insert_sorted_range(in, 0x60000, 0xFFFFFFFFFFFF);
+ expected = insert_sorted_range(expected, 0x0, 0x4FFFF);
+ expected = insert_sorted_range(expected, 0x1000000000000, UINT64_MAX);
+ run_range_inverse_array("test1", &in, &expected, 0x0, UINT64_MAX);
+
+ /* test 5 */
+
+ in = insert_sorted_range(in, 0x0, UINT64_MAX);
+ run_range_inverse_array("test1", &in, &expected, 0x0, UINT64_MAX);
+
+ /* test 6 */
+ in = insert_sorted_range(in, 0x10000, 0x1FFFF);
+ in = insert_sorted_range(in, 0x30000, 0x6FFFF);
+ in = insert_sorted_range(in, 0x90000, UINT64_MAX);
+ expected = insert_sorted_range(expected, 0x0, 0xFFFF);
+ expected = insert_sorted_range(expected, 0x20000, 0x2FFFF);
+ expected = insert_sorted_range(expected, 0x70000, 0x8FFFF);
+ run_range_inverse_array("test1", &in, &expected, 0x0, UINT64_MAX);
+}
+
+static void check_range_reverse_array_low_end(void)
+{
+ GList *in = NULL, *expected = NULL;
+
+ /* test 1 */
+ in = insert_sorted_range(in, 0x0, UINT64_MAX);
+ run_range_inverse_array("test1", &in, &expected, 0x10000, 0xFFFFFF);
+
+ /* test 2 */
+
+ in = insert_sorted_range(in, 0x0, 0xFFFF);
+ in = insert_sorted_range(in, 0x20000, 0x2FFFF);
+ expected = insert_sorted_range(expected, 0x40000, 0xFFFFFFFFFFFF);
+ run_range_inverse_array("test2", &in, &expected, 0x40000, 0xFFFFFFFFFFFF);
+
+ /* test 3 */
+ in = insert_sorted_range(in, 0x0, 0xFFFF);
+ in = insert_sorted_range(in, 0x20000, 0x2FFFF);
+ in = insert_sorted_range(in, 0x1000000000000, UINT64_MAX);
+ expected = insert_sorted_range(expected, 0x40000, 0xFFFFFFFFFFFF);
+ run_range_inverse_array("test3", &in, &expected, 0x40000, 0xFFFFFFFFFFFF);
+
+ /* test 4 */
+
+ in = insert_sorted_range(in, 0x0, 0xFFFF);
+ in = insert_sorted_range(in, 0x20000, 0x2FFFF);
+ in = insert_sorted_range(in, 0x1000000000000, UINT64_MAX);
+ expected = insert_sorted_range(expected, 0x30000, 0xFFFFFFFFFFFF);
+ run_range_inverse_array("test4", &in, &expected, 0x20000, 0xFFFFFFFFFFFF);
+
+ /* test 5 */
+
+ in = insert_sorted_range(in, 0x2000, 0xFFFF);
+ in = insert_sorted_range(in, 0x20000, 0x2FFFF);
+ in = insert_sorted_range(in, 0x100000000, 0x1FFFFFFFF);
+ expected = insert_sorted_range(expected, 0x1000, 0x1FFF);
+ expected = insert_sorted_range(expected, 0x10000, 0x1FFFF);
+ expected = insert_sorted_range(expected, 0x30000, 0xFFFFFFFF);
+ expected = insert_sorted_range(expected, 0x200000000, 0xFFFFFFFFFFFF);
+ run_range_inverse_array("test5", &in, &expected, 0x1000, 0xFFFFFFFFFFFF);
+
+ /* test 6 */
+
+ in = insert_sorted_range(in, 0x10000000 , 0x1FFFFFFF);
+ in = insert_sorted_range(in, 0x100000000, 0x1FFFFFFFF);
+ expected = insert_sorted_range(expected, 0x0, 0xFFFF);
+ run_range_inverse_array("test6", &in, &expected, 0x0, 0xFFFF);
+}
+
+static ReservedRegion *alloc_resv_mem(unsigned type, uint64_t lob, uint64_t upb)
+{
+ ReservedRegion *r;
+
+ r = g_new0(ReservedRegion, 1);
+ r->type = type;
+ range_set_bounds(&r->range, lob, upb);
+ return r;
+}
+
+static void print_resv_region_list(const char *prefix, GList *list,
+ uint32_t expected_length)
+{
+ int i = g_list_length(list);
+
+ g_assert_cmpint(i, ==, expected_length);
+#if DEBUG
+ i = 0;
+ for (GList *l = list; l; l = l->next) {
+ ReservedRegion *r = (ReservedRegion *)l->data;
+ Range *range = &r->range;
+
+ printf("%s item[%d]=[0x%x, 0x%"PRIx64", 0x%"PRIx64"]\n",
+ prefix, i++, r->type, range_lob(range), range_upb(range));
+ }
+#endif
+}
+
+static void free_resv_region(gpointer data)
+{
+ ReservedRegion *reg = (ReservedRegion *)data;
+
+ g_free(reg);
+}
+
+static void check_resv_region_list_insert(void)
+{
+ ReservedRegion *r[10];
+ GList *l = NULL;
+
+ r[0] = alloc_resv_mem(0xA, 0, 0xFFFF);
+ r[1] = alloc_resv_mem(0xA, 0x20000, 0x2FFFF);
+ l = resv_region_list_insert(l, r[0]);
+ l = resv_region_list_insert(l, r[1]);
+ print_resv_region_list("test1", l, 2);
+
+ /* adjacent on left */
+ r[2] = alloc_resv_mem(0xB, 0x0, 0xFFF);
+ l = resv_region_list_insert(l, r[2]);
+ /* adjacent on right */
+ r[3] = alloc_resv_mem(0xC, 0x21000, 0x2FFFF);
+ l = resv_region_list_insert(l, r[3]);
+ print_resv_region_list("test2", l, 4);
+
+ /* exact overlap of D into C*/
+ r[4] = alloc_resv_mem(0xD, 0x21000, 0x2FFFF);
+ l = resv_region_list_insert(l, r[4]);
+ print_resv_region_list("test3", l, 4);
+
+ /* in the middle */
+ r[5] = alloc_resv_mem(0xE, 0x22000, 0x23FFF);
+ l = resv_region_list_insert(l, r[5]);
+ print_resv_region_list("test4", l, 6);
+
+ /* overwrites several existing ones */
+ r[6] = alloc_resv_mem(0xF, 0x10000, 0x2FFFF);
+ l = resv_region_list_insert(l, r[6]);
+ print_resv_region_list("test5", l, 3);
+
+ /* contiguous at the end */
+ r[7] = alloc_resv_mem(0x0, 0x30000, 0x40000);
+ l = resv_region_list_insert(l, r[7]);
+ print_resv_region_list("test6", l, 4);
+
+ g_list_free_full(l, free_resv_region);
+ l = NULL;
+
+ r[0] = alloc_resv_mem(0x0, 0x10000, 0x1FFFF);
+ l = resv_region_list_insert(l, r[0]);
+ /* insertion before the 1st item */
+ r[1] = alloc_resv_mem(0x1, 0x0, 0xFF);
+ l = resv_region_list_insert(l, r[1]);
+ print_resv_region_list("test8", l, 2);
+
+ /* collision on the left side */
+ r[2] = alloc_resv_mem(0xA, 0x1200, 0x11FFF);
+ l = resv_region_list_insert(l, r[2]);
+ print_resv_region_list("test9", l, 3);
+
+ /* collision on the right side */
+ r[3] = alloc_resv_mem(0xA, 0x1F000, 0x2FFFF);
+ l = resv_region_list_insert(l, r[3]);
+ print_resv_region_list("test10", l, 4);
+
+ /* override everything */
+ r[4] = alloc_resv_mem(0xF, 0x0, UINT64_MAX);
+ l = resv_region_list_insert(l, r[4]);
+ print_resv_region_list("test11", l, 1);
+
+ g_list_free_full(l, free_resv_region);
+ l = NULL;
+
+ r[0] = alloc_resv_mem(0xF, 0x1000000000000, UINT64_MAX);
+ l = resv_region_list_insert(l, r[0]);
+ print_resv_region_list("test12", l, 1);
+
+ r[1] = alloc_resv_mem(0xA, 0x0, 0xFFFFFFF);
+ l = resv_region_list_insert(l, r[1]);
+ print_resv_region_list("test12", l, 2);
+
+ r[2] = alloc_resv_mem(0xB, 0x100000000, 0x1FFFFFFFF);
+ l = resv_region_list_insert(l, r[2]);
+ print_resv_region_list("test12", l, 3);
+
+ r[3] = alloc_resv_mem(0x0, 0x010000000, 0x2FFFFFFFF);
+ l = resv_region_list_insert(l, r[3]);
+ print_resv_region_list("test12", l, 3);
+
+ g_list_free_full(l, free_resv_region);
+}
+
+int main(int argc, char **argv)
+{
+ g_test_init(&argc, &argv, NULL);
+
+ g_test_add_func("/resv-mem/range_reverse_array",
+ check_range_reverse_array);
+ g_test_add_func("/resv-mem/range_reverse_array_low_end",
+ check_range_reverse_array_low_end);
+ g_test_add_func("/resv-mem/resv_region_list_insert",
+ check_resv_region_list_insert);
+
+ g_test_run();
+
+ return 0;
+}
diff --git a/tests/unit/test-uuid.c b/tests/unit/test-uuid.c
index aedc125..739b915 100644
--- a/tests/unit/test-uuid.c
+++ b/tests/unit/test-uuid.c
@@ -145,7 +145,7 @@
int i;
for (i = 0; i < ARRAY_SIZE(uuid_test_data); i++) {
- char out[37];
+ char out[UUID_STR_LEN];
if (!uuid_test_data[i].check_unparse) {
continue;
diff --git a/util/meson.build b/util/meson.build
index 769b24f..c3a24af 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -52,6 +52,7 @@
util_ss.add(files('qht.c'))
util_ss.add(files('qsp.c'))
util_ss.add(files('range.c'))
+util_ss.add(files('reserved-region.c'))
util_ss.add(files('stats64.c'))
util_ss.add(files('systemd.c'))
util_ss.add(files('transactions.c'))
diff --git a/util/range.c b/util/range.c
index 098d9d2..9605ccf 100644
--- a/util/range.c
+++ b/util/range.c
@@ -20,11 +20,7 @@
#include "qemu/osdep.h"
#include "qemu/range.h"
-/*
- * Return -1 if @a < @b, 1 @a > @b, and 0 if they touch or overlap.
- * Both @a and @b must not be empty.
- */
-static inline int range_compare(Range *a, Range *b)
+int range_compare(Range *a, Range *b)
{
assert(!range_is_empty(a) && !range_is_empty(b));
@@ -70,3 +66,58 @@
return list;
}
+
+static inline
+GList *append_new_range(GList *list, uint64_t lob, uint64_t upb)
+{
+ Range *new = g_new0(Range, 1);
+
+ range_set_bounds(new, lob, upb);
+ return g_list_append(list, new);
+}
+
+
+void range_inverse_array(GList *in, GList **rev,
+ uint64_t low, uint64_t high)
+{
+ Range *r, *rn;
+ GList *l = in, *out = *rev;
+
+ for (l = in; l && range_upb(l->data) < low; l = l->next) {
+ continue;
+ }
+
+ if (!l) {
+ out = append_new_range(out, low, high);
+ goto exit;
+ }
+ r = (Range *)l->data;
+
+ /* first range lob is greater than min, insert a first range */
+ if (range_lob(r) > low) {
+ out = append_new_range(out, low, MIN(range_lob(r) - 1, high));
+ }
+
+ /* insert a range inbetween each original range until we reach high */
+ for (; l->next; l = l->next) {
+ r = (Range *)l->data;
+ rn = (Range *)l->next->data;
+ if (range_lob(r) >= high) {
+ goto exit;
+ }
+ if (range_compare(r, rn)) {
+ out = append_new_range(out, range_upb(r) + 1,
+ MIN(range_lob(rn) - 1, high));
+ }
+ }
+
+ /* last range */
+ r = (Range *)l->data;
+
+ /* last range upb is less than max, insert a last range */
+ if (range_upb(r) < high) {
+ out = append_new_range(out, range_upb(r) + 1, high);
+ }
+exit:
+ *rev = out;
+}
diff --git a/util/reserved-region.c b/util/reserved-region.c
new file mode 100644
index 0000000..18f83eb
--- /dev/null
+++ b/util/reserved-region.c
@@ -0,0 +1,91 @@
+/*
+ * QEMU ReservedRegion helpers
+ *
+ * Copyright (c) 2023 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/range.h"
+#include "qemu/reserved-region.h"
+
+GList *resv_region_list_insert(GList *list, ReservedRegion *reg)
+{
+ ReservedRegion *resv_iter, *new_reg;
+ Range *r = ®->range;
+ Range *range_iter;
+ GList *l;
+
+ for (l = list; l ; ) {
+ resv_iter = (ReservedRegion *)l->data;
+ range_iter = &resv_iter->range;
+
+ /* Skip all list elements strictly less than range to add */
+ if (range_compare(range_iter, r) < 0) {
+ l = l->next;
+ } else if (range_compare(range_iter, r) > 0) {
+ return g_list_insert_before(list, l, reg);
+ } else { /* there is an overlap */
+ if (range_contains_range(r, range_iter)) {
+ /* new range contains current item, simply remove this latter */
+ GList *prev = l->prev;
+ g_free(l->data);
+ list = g_list_delete_link(list, l);
+ if (prev) {
+ l = prev->next;
+ } else {
+ l = list;
+ }
+ } else if (range_contains_range(range_iter, r)) {
+ /* new region is included in the current region */
+ if (range_lob(range_iter) == range_lob(r)) {
+ /* adjacent on the left side, derives into 2 regions */
+ range_set_bounds(range_iter, range_upb(r) + 1,
+ range_upb(range_iter));
+ return g_list_insert_before(list, l, reg);
+ } else if (range_upb(range_iter) == range_upb(r)) {
+ /* adjacent on the right side, derives into 2 regions */
+ range_set_bounds(range_iter, range_lob(range_iter),
+ range_lob(r) - 1);
+ l = l->next;
+ } else {
+ uint64_t lob = range_lob(range_iter);
+ /*
+ * the new range is in the middle of an existing one,
+ * split this latter into 3 regs instead
+ */
+ range_set_bounds(range_iter, range_upb(r) + 1,
+ range_upb(range_iter));
+ new_reg = g_new0(ReservedRegion, 1);
+ new_reg->type = resv_iter->type;
+ range_set_bounds(&new_reg->range,
+ lob, range_lob(r) - 1);
+ list = g_list_insert_before(list, l, new_reg);
+ return g_list_insert_before(list, l, reg);
+ }
+ } else if (range_lob(r) < range_lob(range_iter)) {
+ range_set_bounds(range_iter, range_upb(r) + 1,
+ range_upb(range_iter));
+ return g_list_insert_before(list, l, reg);
+ } else { /* intersection on the upper range */
+ range_set_bounds(range_iter, range_lob(range_iter),
+ range_lob(r) - 1);
+ l = l->next;
+ }
+ } /* overlap */
+ }
+ return g_list_append(list, reg);
+}
+
diff --git a/util/uuid.c b/util/uuid.c
index d71aa79..234619d 100644
--- a/util/uuid.c
+++ b/util/uuid.c
@@ -51,7 +51,7 @@
void qemu_uuid_unparse(const QemuUUID *uuid, char *out)
{
const unsigned char *uu = &uuid->data[0];
- snprintf(out, UUID_FMT_LEN + 1, UUID_FMT,
+ snprintf(out, UUID_STR_LEN, UUID_FMT,
uu[0], uu[1], uu[2], uu[3], uu[4], uu[5], uu[6], uu[7],
uu[8], uu[9], uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
}