Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-5.0-20200203' into staging
ppc patch queue 2020-02093
This pull request supersedes ppc-for-5.0-20200131. The only changes
are one extra patch to suppress some irritating warnings during tests
under TCG, and an extra Tested-by in one of the other patches.
Here's the next batch of patches for ppc and associated machine types.
Highlights includes:
* Remove the deprecated "prep" machine type and its OpenHackware
firmware
* Add TCG emulation of the msgsndp etc. supervisor privileged
doorbell instructions
* Allow "pnv" machine type to run Hostboot style firmwares
* Add a virtual TPM device for spapr machines
* Implement devices for POWER8 PHB3 and POWER9 PHB4 host bridges for
the pnv machine type
* Use faster Spectre mitigation by default for POWER9 DD2.3 machines
* Introduce Firmware Assisted NMI dump facility for spapr machines
* Fix a performance regression with load/store multiple instructions
in TCG
as well as some other assorted cleanups and fixes.
# gpg: Signature made Mon 03 Feb 2020 03:30:24 GMT
# gpg: using RSA key 75F46586AE61A66CC44E87DC6C38CACA20D9B392
# gpg: Good signature from "David Gibson <david@gibson.dropbear.id.au>" [full]
# gpg: aka "David Gibson (Red Hat) <dgibson@redhat.com>" [full]
# gpg: aka "David Gibson (ozlabs.org) <dgibson@ozlabs.org>" [full]
# gpg: aka "David Gibson (kernel.org) <dwg@kernel.org>" [unknown]
# Primary key fingerprint: 75F4 6586 AE61 A66C C44E 87DC 6C38 CACA 20D9 B392
* remotes/dgibson/tags/ppc-for-5.0-20200203: (35 commits)
tests: Silence various warnings with pseries
target/ppc: Use probe_write for DCBZ
target/ppc: Remove redundant mask in DCBZ
target/ppc: Use probe_access for LMW, STMW
target/ppc: Use probe_access for LSW, STSW
ppc: spapr: Activate the FWNMI functionality
migration: Include migration support for machine check handling
ppc: spapr: Handle "ibm,nmi-register" and "ibm,nmi-interlock" RTAS calls
target/ppc: Build rtas error log upon an MCE
target/ppc: Handle NMI guest exit
ppc: spapr: Introduce FWNMI capability
Wrapper function to wait on condition for the main loop mutex
target/ppc/cpu.h: Put macro parameter in parentheses
spapr: Enable DD2.3 accelerated count cache flush in pseries-5.0 machine
ppc/pnv: change the PowerNV machine devices to be non user creatable
ppc/pnv: Add models for POWER8 PHB3 PCIe Host bridge
ppc/pnv: Add models for POWER9 PHB4 PCIe Host bridge
docs/specs/tpm: reST-ify TPM documentation
hw/ppc/Kconfig: Enable TPM_SPAPR as part of PSERIES config
tpm_spapr: Support suspend and resume
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/.gitmodules b/.gitmodules
index 19792c9..9c0501a 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -10,9 +10,6 @@
[submodule "roms/openbios"]
path = roms/openbios
url = https://git.qemu.org/git/openbios.git
-[submodule "roms/openhackware"]
- path = roms/openhackware
- url = https://git.qemu.org/git/openhackware.git
[submodule "roms/qemu-palcode"]
path = roms/qemu-palcode
url = https://git.qemu.org/git/qemu-palcode.git
diff --git a/MAINTAINERS b/MAINTAINERS
index 4ceb1ad..faffd44 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1103,7 +1103,6 @@
F: hw/rtc/m48t59-isa.c
F: include/hw/isa/pc87312.h
F: include/hw/rtc/m48t59.h
-F: pc-bios/ppc_rom.bin
F: tests/acceptance/ppc_prep_40p.py
sPAPR
diff --git a/Makefile b/Makefile
index 9a5a1e6..3b21c0e 100644
--- a/Makefile
+++ b/Makefile
@@ -784,7 +784,7 @@
BLOBS=bios.bin bios-256k.bin bios-microvm.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \
vgabios-stdvga.bin vgabios-vmware.bin vgabios-qxl.bin vgabios-virtio.bin \
vgabios-ramfb.bin vgabios-bochs-display.bin vgabios-ati.bin \
-ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin QEMU,cgthree.bin \
+openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin QEMU,cgthree.bin \
pxe-e1000.rom pxe-eepro100.rom pxe-ne2k_pci.rom \
pxe-pcnet.rom pxe-rtl8139.rom pxe-virtio.rom \
efi-e1000.rom efi-eepro100.rom efi-ne2k_pci.rom \
diff --git a/cpus.c b/cpus.c
index b612116..b4f8b84 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1839,6 +1839,11 @@
qemu_mutex_unlock(&qemu_global_mutex);
}
+void qemu_cond_wait_iothread(QemuCond *cond)
+{
+ qemu_cond_wait(cond, &qemu_global_mutex);
+}
+
static bool all_vcpus_paused(void)
{
CPUState *cpu;
diff --git a/docs/interop/firmware.json b/docs/interop/firmware.json
index 8ffb785..240f565 100644
--- a/docs/interop/firmware.json
+++ b/docs/interop/firmware.json
@@ -27,8 +27,7 @@
#
# @openfirmware: The interface is defined by the (historical) IEEE
# 1275-1994 standard. Examples for firmware projects that
-# provide this interface are: OpenBIOS, OpenHackWare,
-# SLOF.
+# provide this interface are: OpenBIOS and SLOF.
#
# @uboot: Firmware interface defined by the U-Boot project.
#
diff --git a/docs/specs/index.rst b/docs/specs/index.rst
index 984ba44..de46a8b 100644
--- a/docs/specs/index.rst
+++ b/docs/specs/index.rst
@@ -13,3 +13,4 @@
ppc-xive
ppc-spapr-xive
acpi_hw_reduced_hotplug
+ tpm
diff --git a/docs/specs/tpm.rst b/docs/specs/tpm.rst
new file mode 100644
index 0000000..2bdf637
--- /dev/null
+++ b/docs/specs/tpm.rst
@@ -0,0 +1,503 @@
+===============
+QEMU TPM Device
+===============
+
+Guest-side hardware interface
+=============================
+
+TIS interface
+-------------
+
+The QEMU TPM emulation implements a TPM TIS hardware interface
+following the Trusted Computing Group's specification "TCG PC Client
+Specific TPM Interface Specification (TIS)", Specification Version
+1.3, 21 March 2013. (see the `TIS specification`_, or a later version
+of it).
+
+The TIS interface makes a memory mapped IO region in the area
+0xfed40000-0xfed44fff available to the guest operating system.
+
+QEMU files related to TPM TIS interface:
+ - ``hw/tpm/tpm_tis.c``
+ - ``hw/tpm/tpm_tis.h``
+
+CRB interface
+-------------
+
+QEMU also implements a TPM CRB interface following the Trusted
+Computing Group's specification "TCG PC Client Platform TPM Profile
+(PTP) Specification", Family "2.0", Level 00 Revision 01.03 v22, May
+22, 2017. (see the `CRB specification`_, or a later version of it)
+
+The CRB interface makes a memory mapped IO region in the area
+0xfed40000-0xfed40fff (1 locality) available to the guest
+operating system.
+
+QEMU files related to TPM CRB interface:
+ - ``hw/tpm/tpm_crb.c``
+
+SPAPR interface
+---------------
+
+pSeries (ppc64) machines offer a tpm-spapr device model.
+
+QEMU files related to the SPAPR interface:
+ - ``hw/tpm/tpm_spapr.c``
+
+fw_cfg interface
+================
+
+The bios/firmware may read the ``"etc/tpm/config"`` fw_cfg entry for
+configuring the guest appropriately.
+
+The entry of 6 bytes has the following content, in little-endian:
+
+.. code-block:: c
+
+ #define TPM_VERSION_UNSPEC 0
+ #define TPM_VERSION_1_2 1
+ #define TPM_VERSION_2_0 2
+
+ #define TPM_PPI_VERSION_NONE 0
+ #define TPM_PPI_VERSION_1_30 1
+
+ struct FwCfgTPMConfig {
+ uint32_t tpmppi_address; /* PPI memory location */
+ uint8_t tpm_version; /* TPM version */
+ uint8_t tpmppi_version; /* PPI version */
+ };
+
+ACPI interface
+==============
+
+The TPM device is defined with ACPI ID "PNP0C31". QEMU builds a SSDT
+and passes it into the guest through the fw_cfg device. The device
+description contains the base address of the TIS interface 0xfed40000
+and the size of the MMIO area (0x5000). In case a TPM2 is used by
+QEMU, a TPM2 ACPI table is also provided. The device is described to
+be used in polling mode rather than interrupt mode primarily because
+no unused IRQ could be found.
+
+To support measurement logs to be written by the firmware,
+e.g. SeaBIOS, a TCPA table is implemented. This table provides a 64kb
+buffer where the firmware can write its log into. For TPM 2 only a
+more recent version of the TPM2 table provides support for
+measurements logs and a TCPA table does not need to be created.
+
+The TCPA and TPM2 ACPI tables follow the Trusted Computing Group
+specification "TCG ACPI Specification" Family "1.2" and "2.0", Level
+00 Revision 00.37. (see the `ACPI specification`_, or a later version
+of it)
+
+ACPI PPI Interface
+------------------
+
+QEMU supports the Physical Presence Interface (PPI) for TPM 1.2 and
+TPM 2. This interface requires ACPI and firmware support. (see the
+`PPI specification`_)
+
+PPI enables a system administrator (root) to request a modification to
+the TPM upon reboot. The PPI specification defines the operation
+requests and the actions the firmware has to take. The system
+administrator passes the operation request number to the firmware
+through an ACPI interface which writes this number to a memory
+location that the firmware knows. Upon reboot, the firmware finds the
+number and sends commands to the TPM. The firmware writes the TPM
+result code and the operation request number to a memory location that
+ACPI can read from and pass the result on to the administrator.
+
+The PPI specification defines a set of mandatory and optional
+operations for the firmware to implement. The ACPI interface also
+allows an administrator to list the supported operations. In QEMU the
+ACPI code is generated by QEMU, yet the firmware needs to implement
+support on a per-operations basis, and different firmwares may support
+a different subset. Therefore, QEMU introduces the virtual memory
+device for PPI where the firmware can indicate which operations it
+supports and ACPI can enable the ones that are supported and disable
+all others. This interface lies in main memory and has the following
+layout:
+
+ +-------------+--------+--------+-------------------------------------------+
+ | Field | Length | Offset | Description |
+ +=============+========+========+===========================================+
+ | ``func`` | 0x100 | 0x000 | Firmware sets values for each supported |
+ | | | | operation. See defined values below. |
+ +-------------+--------+--------+-------------------------------------------+
+ | ``ppin`` | 0x1 | 0x100 | SMI interrupt to use. Set by firmware. |
+ | | | | Not supported. |
+ +-------------+--------+--------+-------------------------------------------+
+ | ``ppip`` | 0x4 | 0x101 | ACPI function index to pass to SMM code. |
+ | | | | Set by ACPI. Not supported. |
+ +-------------+--------+--------+-------------------------------------------+
+ | ``pprp`` | 0x4 | 0x105 | Result of last executed operation. Set by |
+ | | | | firmware. See function index 5 for values.|
+ +-------------+--------+--------+-------------------------------------------+
+ | ``pprq`` | 0x4 | 0x109 | Operation request number to execute. See |
+ | | | | 'Physical Presence Interface Operation |
+ | | | | Summary' tables in specs. Set by ACPI. |
+ +-------------+--------+--------+-------------------------------------------+
+ | ``pprm`` | 0x4 | 0x10d | Operation request optional parameter. |
+ | | | | Values depend on operation. Set by ACPI. |
+ +-------------+--------+--------+-------------------------------------------+
+ | ``lppr`` | 0x4 | 0x111 | Last executed operation request number. |
+ | | | | Copied from pprq field by firmware. |
+ +-------------+--------+--------+-------------------------------------------+
+ | ``fret`` | 0x4 | 0x115 | Result code from SMM function. |
+ | | | | Not supported. |
+ +-------------+--------+--------+-------------------------------------------+
+ | ``res1`` | 0x40 | 0x119 | Reserved for future use |
+ +-------------+--------+--------+-------------------------------------------+
+ |``next_step``| 0x1 | 0x159 | Operation to execute after reboot by |
+ | | | | firmware. Used by firmware. |
+ +-------------+--------+--------+-------------------------------------------+
+ | ``movv`` | 0x1 | 0x15a | Memory overwrite variable |
+ +-------------+--------+--------+-------------------------------------------+
+
+The following values are supported for the ``func`` field. They
+correspond to the values used by ACPI function index 8.
+
+ +----------+-------------------------------------------------------------+
+ | Value | Description |
+ +==========+=============================================================+
+ | 0 | Operation is not implemented. |
+ +----------+-------------------------------------------------------------+
+ | 1 | Operation is only accessible through firmware. |
+ +----------+-------------------------------------------------------------+
+ | 2 | Operation is blocked for OS by firmware configuration. |
+ +----------+-------------------------------------------------------------+
+ | 3 | Operation is allowed and physically present user required. |
+ +----------+-------------------------------------------------------------+
+ | 4 | Operation is allowed and physically present user is not |
+ | | required. |
+ +----------+-------------------------------------------------------------+
+
+The location of the table is given by the fw_cfg ``tpmppi_address``
+field. The PPI memory region size is 0x400 (``TPM_PPI_ADDR_SIZE``) to
+leave enough room for future updates.
+
+QEMU files related to TPM ACPI tables:
+ - ``hw/i386/acpi-build.c``
+ - ``include/hw/acpi/tpm.h``
+
+TPM backend devices
+===================
+
+The TPM implementation is split into two parts, frontend and
+backend. The frontend part is the hardware interface, such as the TPM
+TIS interface described earlier, and the other part is the TPM backend
+interface. The backend interfaces implement the interaction with a TPM
+device, which may be a physical or an emulated device. The split
+between the front- and backend devices allows a frontend to be
+connected with any available backend. This enables the TIS interface
+to be used with the passthrough backend or the swtpm backend.
+
+QEMU files related to TPM backends:
+ - ``backends/tpm.c``
+ - ``include/sysemu/tpm_backend.h``
+ - ``include/sysemu/tpm_backend_int.h``
+
+The QEMU TPM passthrough device
+-------------------------------
+
+In case QEMU is run on Linux as the host operating system it is
+possible to make the hardware TPM device available to a single QEMU
+guest. In this case the user must make sure that no other program is
+using the device, e.g., /dev/tpm0, before trying to start QEMU with
+it.
+
+The passthrough driver uses the host's TPM device for sending TPM
+commands and receiving responses from. Besides that it accesses the
+TPM device's sysfs entry for support of command cancellation. Since
+none of the state of a hardware TPM can be migrated between hosts,
+virtual machine migration is disabled when the TPM passthrough driver
+is used.
+
+Since the host's TPM device will already be initialized by the host's
+firmware, certain commands, e.g. ``TPM_Startup()``, sent by the
+virtual firmware for device initialization, will fail. In this case
+the firmware should not use the TPM.
+
+Sharing the device with the host is generally not a recommended usage
+scenario for a TPM device. The primary reason for this is that two
+operating systems can then access the device's single set of
+resources, such as platform configuration registers
+(PCRs). Applications or kernel security subsystems, such as the Linux
+Integrity Measurement Architecture (IMA), are not expecting to share
+PCRs.
+
+QEMU files related to the TPM passthrough device:
+ - ``hw/tpm/tpm_passthrough.c``
+ - ``hw/tpm/tpm_util.c``
+ - ``hw/tpm/tpm_util.h``
+
+
+Command line to start QEMU with the TPM passthrough device using the host's
+hardware TPM ``/dev/tpm0``:
+
+.. code-block:: console
+
+ qemu-system-x86_64 -display sdl -accel kvm \
+ -m 1024 -boot d -bios bios-256k.bin -boot menu=on \
+ -tpmdev passthrough,id=tpm0,path=/dev/tpm0 \
+ -device tpm-tis,tpmdev=tpm0 test.img
+
+
+The following commands should result in similar output inside the VM
+with a Linux kernel that either has the TPM TIS driver built-in or
+available as a module:
+
+.. code-block:: console
+
+ # dmesg | grep -i tpm
+ [ 0.711310] tpm_tis 00:06: 1.2 TPM (device=id 0x1, rev-id 1)
+
+ # dmesg | grep TCPA
+ [ 0.000000] ACPI: TCPA 0x0000000003FFD191C 000032 (v02 BOCHS \
+ BXPCTCPA 0000001 BXPC 00000001)
+
+ # ls -l /dev/tpm*
+ crw-------. 1 root root 10, 224 Jul 11 10:11 /dev/tpm0
+
+ # find /sys/devices/ | grep pcrs$ | xargs cat
+ PCR-00: 35 4E 3B CE 23 9F 38 59 ...
+ ...
+ PCR-23: 00 00 00 00 00 00 00 00 ...
+
+The QEMU TPM emulator device
+----------------------------
+
+The TPM emulator device uses an external TPM emulator called 'swtpm'
+for sending TPM commands to and receiving responses from. The swtpm
+program must have been started before trying to access it through the
+TPM emulator with QEMU.
+
+The TPM emulator implements a command channel for transferring TPM
+commands and responses as well as a control channel over which control
+commands can be sent. (see the `SWTPM protocol`_ specification)
+
+The control channel serves the purpose of resetting, initializing, and
+migrating the TPM state, among other things.
+
+The swtpm program behaves like a hardware TPM and therefore needs to
+be initialized by the firmware running inside the QEMU virtual
+machine. One necessary step for initializing the device is to send
+the TPM_Startup command to it. SeaBIOS, for example, has been
+instrumented to initialize a TPM 1.2 or TPM 2 device using this
+command.
+
+QEMU files related to the TPM emulator device:
+ - ``hw/tpm/tpm_emulator.c``
+ - ``hw/tpm/tpm_util.c``
+ - ``hw/tpm/tpm_util.h``
+
+The following commands start the swtpm with a UnixIO control channel over
+a socket interface. They do not need to be run as root.
+
+.. code-block:: console
+
+ mkdir /tmp/mytpm1
+ swtpm socket --tpmstate dir=/tmp/mytpm1 \
+ --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \
+ --log level=20
+
+Command line to start QEMU with the TPM emulator device communicating
+with the swtpm (x86):
+
+.. code-block:: console
+
+ qemu-system-x86_64 -display sdl -accel kvm \
+ -m 1024 -boot d -bios bios-256k.bin -boot menu=on \
+ -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \
+ -tpmdev emulator,id=tpm0,chardev=chrtpm \
+ -device tpm-tis,tpmdev=tpm0 test.img
+
+In case a pSeries machine is emulated, use the following command line:
+
+.. code-block:: console
+
+ qemu-system-ppc64 -display sdl -machine pseries,accel=kvm \
+ -m 1024 -bios slof.bin -boot menu=on \
+ -nodefaults -device VGA -device pci-ohci -device usb-kbd \
+ -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \
+ -tpmdev emulator,id=tpm0,chardev=chrtpm \
+ -device tpm-spapr,tpmdev=tpm0 \
+ -device spapr-vscsi,id=scsi0,reg=0x00002000 \
+ -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x3,drive=drive-virtio-disk0,id=virtio-disk0 \
+ -drive file=test.img,format=raw,if=none,id=drive-virtio-disk0
+
+In case SeaBIOS is used as firmware, it should show the TPM menu item
+after entering the menu with 'ESC'.
+
+.. code-block:: console
+
+ Select boot device:
+ 1. DVD/CD [ata1-0: QEMU DVD-ROM ATAPI-4 DVD/CD]
+ [...]
+ 5. Legacy option rom
+
+ t. TPM Configuration
+
+The following commands should result in similar output inside the VM
+with a Linux kernel that either has the TPM TIS driver built-in or
+available as a module:
+
+.. code-block:: console
+
+ # dmesg | grep -i tpm
+ [ 0.711310] tpm_tis 00:06: 1.2 TPM (device=id 0x1, rev-id 1)
+
+ # dmesg | grep TCPA
+ [ 0.000000] ACPI: TCPA 0x0000000003FFD191C 000032 (v02 BOCHS \
+ BXPCTCPA 0000001 BXPC 00000001)
+
+ # ls -l /dev/tpm*
+ crw-------. 1 root root 10, 224 Jul 11 10:11 /dev/tpm0
+
+ # find /sys/devices/ | grep pcrs$ | xargs cat
+ PCR-00: 35 4E 3B CE 23 9F 38 59 ...
+ ...
+ PCR-23: 00 00 00 00 00 00 00 00 ...
+
+Migration with the TPM emulator
+===============================
+
+The TPM emulator supports the following types of virtual machine
+migration:
+
+- VM save / restore (migration into a file)
+- Network migration
+- Snapshotting (migration into storage like QoW2 or QED)
+
+The following command sequences can be used to test VM save / restore.
+
+In a 1st terminal start an instance of a swtpm using the following command:
+
+.. code-block:: console
+
+ mkdir /tmp/mytpm1
+ swtpm socket --tpmstate dir=/tmp/mytpm1 \
+ --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \
+ --log level=20 --tpm2
+
+In a 2nd terminal start the VM:
+
+.. code-block:: console
+
+ qemu-system-x86_64 -display sdl -accel kvm \
+ -m 1024 -boot d -bios bios-256k.bin -boot menu=on \
+ -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \
+ -tpmdev emulator,id=tpm0,chardev=chrtpm \
+ -device tpm-tis,tpmdev=tpm0 \
+ -monitor stdio \
+ test.img
+
+Verify that the attached TPM is working as expected using applications
+inside the VM.
+
+To store the state of the VM use the following command in the QEMU
+monitor in the 2nd terminal:
+
+.. code-block:: console
+
+ (qemu) migrate "exec:cat > testvm.bin"
+ (qemu) quit
+
+At this point a file called ``testvm.bin`` should exists and the swtpm
+and QEMU processes should have ended.
+
+To test 'VM restore' you have to start the swtpm with the same
+parameters as before. If previously a TPM 2 [--tpm2] was saved, --tpm2
+must now be passed again on the command line.
+
+In the 1st terminal restart the swtpm with the same command line as
+before:
+
+.. code-block:: console
+
+ swtpm socket --tpmstate dir=/tmp/mytpm1 \
+ --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \
+ --log level=20 --tpm2
+
+In the 2nd terminal restore the state of the VM using the additional
+'-incoming' option.
+
+.. code-block:: console
+
+ qemu-system-x86_64 -display sdl -accel kvm \
+ -m 1024 -boot d -bios bios-256k.bin -boot menu=on \
+ -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \
+ -tpmdev emulator,id=tpm0,chardev=chrtpm \
+ -device tpm-tis,tpmdev=tpm0 \
+ -incoming "exec:cat < testvm.bin" \
+ test.img
+
+Troubleshooting migration
+-------------------------
+
+There are several reasons why migration may fail. In case of problems,
+please ensure that the command lines adhere to the following rules
+and, if possible, that identical versions of QEMU and swtpm are used
+at all times.
+
+VM save and restore:
+
+ - QEMU command line parameters should be identical apart from the
+ '-incoming' option on VM restore
+
+ - swtpm command line parameters should be identical
+
+VM migration to 'localhost':
+
+ - QEMU command line parameters should be identical apart from the
+ '-incoming' option on the destination side
+
+ - swtpm command line parameters should point to two different
+ directories on the source and destination swtpm (--tpmstate dir=...)
+ (especially if different versions of libtpms were to be used on the
+ same machine).
+
+VM migration across the network:
+
+ - QEMU command line parameters should be identical apart from the
+ '-incoming' option on the destination side
+
+ - swtpm command line parameters should be identical
+
+VM Snapshotting:
+ - QEMU command line parameters should be identical
+
+ - swtpm command line parameters should be identical
+
+
+Besides that, migration failure reasons on the swtpm level may include
+the following:
+
+ - the versions of the swtpm on the source and destination sides are
+ incompatible
+
+ - downgrading of TPM state may not be supported
+
+ - the source and destination libtpms were compiled with different
+ compile-time options and the destination side refuses to accept the
+ state
+
+ - different migration keys are used on the source and destination side
+ and the destination side cannot decrypt the migrated state
+ (swtpm ... --migration-key ... )
+
+
+.. _TIS specification:
+ https://trustedcomputinggroup.org/pc-client-work-group-pc-client-specific-tpm-interface-specification-tis/
+
+.. _CRB specification:
+ https://trustedcomputinggroup.org/resource/pc-client-platform-tpm-profile-ptp-specification/
+
+
+.. _ACPI specification:
+ https://trustedcomputinggroup.org/tcg-acpi-specification/
+
+.. _PPI specification:
+ https://trustedcomputinggroup.org/resource/tcg-physical-presence-interface-specification/
+
+.. _SWTPM protocol:
+ https://github.com/stefanberger/swtpm/blob/master/man/man3/swtpm_ioctls.pod
diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt
deleted file mode 100644
index 9c8cca0..0000000
--- a/docs/specs/tpm.txt
+++ /dev/null
@@ -1,427 +0,0 @@
-QEMU TPM Device
-===============
-
-= Guest-side Hardware Interface =
-
-The QEMU TPM emulation implements a TPM TIS hardware interface following the
-Trusted Computing Group's specification "TCG PC Client Specific TPM Interface
-Specification (TIS)", Specification Version 1.3, 21 March 2013. This
-specification, or a later version of it, can be accessed from the following
-URL:
-
-https://trustedcomputinggroup.org/pc-client-work-group-pc-client-specific-tpm-interface-specification-tis/
-
-The TIS interface makes a memory mapped IO region in the area 0xfed40000 -
-0xfed44fff available to the guest operating system.
-
-
-QEMU files related to TPM TIS interface:
- - hw/tpm/tpm_tis.c
- - hw/tpm/tpm_tis.h
-
-
-QEMU also implements a TPM CRB interface following the Trusted Computing
-Group's specification "TCG PC Client Platform TPM Profile (PTP)
-Specification", Family "2.0", Level 00 Revision 01.03 v22, May 22, 2017.
-This specification, or a later version of it, can be accessed from the
-following URL:
-
-https://trustedcomputinggroup.org/resource/pc-client-platform-tpm-profile-ptp-specification/
-
-The CRB interface makes a memory mapped IO region in the area 0xfed40000 -
-0xfed40fff (1 locality) available to the guest operating system.
-
-QEMU files related to TPM CRB interface:
- - hw/tpm/tpm_crb.c
-
-= fw_cfg interface =
-
-The bios/firmware may read the "etc/tpm/config" fw_cfg entry for
-configuring the guest appropriately.
-
-The entry of 6 bytes has the following content, in little-endian:
-
- #define TPM_VERSION_UNSPEC 0
- #define TPM_VERSION_1_2 1
- #define TPM_VERSION_2_0 2
-
- #define TPM_PPI_VERSION_NONE 0
- #define TPM_PPI_VERSION_1_30 1
-
- struct FwCfgTPMConfig {
- uint32_t tpmppi_address; /* PPI memory location */
- uint8_t tpm_version; /* TPM version */
- uint8_t tpmppi_version; /* PPI version */
- };
-
-= ACPI Interface =
-
-The TPM device is defined with ACPI ID "PNP0C31". QEMU builds a SSDT and passes
-it into the guest through the fw_cfg device. The device description contains
-the base address of the TIS interface 0xfed40000 and the size of the MMIO area
-(0x5000). In case a TPM2 is used by QEMU, a TPM2 ACPI table is also provided.
-The device is described to be used in polling mode rather than interrupt mode
-primarily because no unused IRQ could be found.
-
-To support measurement logs to be written by the firmware, e.g. SeaBIOS, a TCPA
-table is implemented. This table provides a 64kb buffer where the firmware can
-write its log into. For TPM 2 only a more recent version of the TPM2 table
-provides support for measurements logs and a TCPA table does not need to be
-created.
-
-The TCPA and TPM2 ACPI tables follow the Trusted Computing Group specification
-"TCG ACPI Specification" Family "1.2" and "2.0", Level 00 Revision 00.37. This
-specification, or a later version of it, can be accessed from the following
-URL:
-
-https://trustedcomputinggroup.org/tcg-acpi-specification/
-
-== ACPI PPI Interface ==
-
-QEMU supports the Physical Presence Interface (PPI) for TPM 1.2 and TPM 2. This
-interface requires ACPI and firmware support. The specification can be found at
-the following URL:
-
-https://trustedcomputinggroup.org/resource/tcg-physical-presence-interface-specification/
-
-PPI enables a system administrator (root) to request a modification to the
-TPM upon reboot. The PPI specification defines the operation requests and the
-actions the firmware has to take. The system administrator passes the operation
-request number to the firmware through an ACPI interface which writes this
-number to a memory location that the firmware knows. Upon reboot, the firmware
-finds the number and sends commands to the TPM. The firmware writes the TPM
-result code and the operation request number to a memory location that ACPI can
-read from and pass the result on to the administrator.
-
-The PPI specification defines a set of mandatory and optional operations for
-the firmware to implement. The ACPI interface also allows an administrator to
-list the supported operations. In QEMU the ACPI code is generated by QEMU, yet
-the firmware needs to implement support on a per-operations basis, and
-different firmwares may support a different subset. Therefore, QEMU introduces
-the virtual memory device for PPI where the firmware can indicate which
-operations it supports and ACPI can enable the ones that are supported and
-disable all others. This interface lies in main memory and has the following
-layout:
-
- +----------+--------+--------+-------------------------------------------+
- | Field | Length | Offset | Description |
- +----------+--------+--------+-------------------------------------------+
- | func | 0x100 | 0x000 | Firmware sets values for each supported |
- | | | | operation. See defined values below. |
- +----------+--------+--------+-------------------------------------------+
- | ppin | 0x1 | 0x100 | SMI interrupt to use. Set by firmware. |
- | | | | Not supported. |
- +----------+--------+--------+-------------------------------------------+
- | ppip | 0x4 | 0x101 | ACPI function index to pass to SMM code. |
- | | | | Set by ACPI. Not supported. |
- +----------+--------+--------+-------------------------------------------+
- | pprp | 0x4 | 0x105 | Result of last executed operation. Set by |
- | | | | firmware. See function index 5 for values.|
- +----------+--------+--------+-------------------------------------------+
- | pprq | 0x4 | 0x109 | Operation request number to execute. See |
- | | | | 'Physical Presence Interface Operation |
- | | | | Summary' tables in specs. Set by ACPI. |
- +----------+--------+--------+-------------------------------------------+
- | pprm | 0x4 | 0x10d | Operation request optional parameter. |
- | | | | Values depend on operation. Set by ACPI. |
- +----------+--------+--------+-------------------------------------------+
- | lppr | 0x4 | 0x111 | Last executed operation request number. |
- | | | | Copied from pprq field by firmware. |
- +----------+--------+--------+-------------------------------------------+
- | fret | 0x4 | 0x115 | Result code from SMM function. |
- | | | | Not supported. |
- +----------+--------+--------+-------------------------------------------+
- | res1 | 0x40 | 0x119 | Reserved for future use |
- +----------+--------+--------+-------------------------------------------+
- | next_step| 0x1 | 0x159 | Operation to execute after reboot by |
- | | | | firmware. Used by firmware. |
- +----------+--------+--------+-------------------------------------------+
- | movv | 0x1 | 0x15a | Memory overwrite variable |
- +----------+--------+--------+-------------------------------------------+
-
- The following values are supported for the 'func' field. They correspond
- to the values used by ACPI function index 8.
-
- +----------+-------------------------------------------------------------+
- | value | Description |
- +----------+-------------------------------------------------------------+
- | 0 | Operation is not implemented. |
- +----------+-------------------------------------------------------------+
- | 1 | Operation is only accessible through firmware. |
- +----------+-------------------------------------------------------------+
- | 2 | Operation is blocked for OS by firmware configuration. |
- +----------+-------------------------------------------------------------+
- | 3 | Operation is allowed and physically present user required. |
- +----------+-------------------------------------------------------------+
- | 4 | Operation is allowed and physically present user is not |
- | | required. |
- +----------+-------------------------------------------------------------+
-
-The location of the table is given by the fw_cfg tpmppi_address field.
-The PPI memory region size is 0x400 (TPM_PPI_ADDR_SIZE) to leave
-enough room for future updates.
-
-
-QEMU files related to TPM ACPI tables:
- - hw/i386/acpi-build.c
- - include/hw/acpi/tpm.h
-
-
-= TPM backend devices =
-
-The TPM implementation is split into two parts, frontend and backend. The
-frontend part is the hardware interface, such as the TPM TIS interface
-described earlier, and the other part is the TPM backend interface. The backend
-interfaces implement the interaction with a TPM device, which may be a physical
-or an emulated device. The split between the front- and backend devices allows
-a frontend to be connected with any available backend. This enables the TIS
-interface to be used with the passthrough backend or the (future) swtpm backend.
-
-
-QEMU files related to TPM backends:
- - backends/tpm.c
- - include/sysemu/tpm_backend.h
- - include/sysemu/tpm_backend_int.h
-
-
-== The QEMU TPM passthrough device ==
-
-In case QEMU is run on Linux as the host operating system it is possible to
-make the hardware TPM device available to a single QEMU guest. In this case the
-user must make sure that no other program is using the device, e.g., /dev/tpm0,
-before trying to start QEMU with it.
-
-The passthrough driver uses the host's TPM device for sending TPM commands
-and receiving responses from. Besides that it accesses the TPM device's sysfs
-entry for support of command cancellation. Since none of the state of a
-hardware TPM can be migrated between hosts, virtual machine migration is
-disabled when the TPM passthrough driver is used.
-
-Since the host's TPM device will already be initialized by the host's firmware,
-certain commands, e.g. TPM_Startup(), sent by the virtual firmware for device
-initialization, will fail. In this case the firmware should not use the TPM.
-
-Sharing the device with the host is generally not a recommended usage scenario
-for a TPM device. The primary reason for this is that two operating systems can
-then access the device's single set of resources, such as platform configuration
-registers (PCRs). Applications or kernel security subsystems, such as the
-Linux Integrity Measurement Architecture (IMA), are not expecting to share PCRs.
-
-
-QEMU files related to the TPM passthrough device:
- - hw/tpm/tpm_passthrough.c
- - hw/tpm/tpm_util.c
- - hw/tpm/tpm_util.h
-
-
-Command line to start QEMU with the TPM passthrough device using the host's
-hardware TPM /dev/tpm0:
-
-qemu-system-x86_64 -display sdl -accel kvm \
- -m 1024 -boot d -bios bios-256k.bin -boot menu=on \
- -tpmdev passthrough,id=tpm0,path=/dev/tpm0 \
- -device tpm-tis,tpmdev=tpm0 test.img
-
-The following commands should result in similar output inside the VM with a
-Linux kernel that either has the TPM TIS driver built-in or available as a
-module:
-
-#> dmesg | grep -i tpm
-[ 0.711310] tpm_tis 00:06: 1.2 TPM (device=id 0x1, rev-id 1)
-
-#> dmesg | grep TCPA
-[ 0.000000] ACPI: TCPA 0x0000000003FFD191C 000032 (v02 BOCHS \
- BXPCTCPA 0000001 BXPC 00000001)
-
-#> ls -l /dev/tpm*
-crw-------. 1 root root 10, 224 Jul 11 10:11 /dev/tpm0
-
-#> find /sys/devices/ | grep pcrs$ | xargs cat
-PCR-00: 35 4E 3B CE 23 9F 38 59 ...
-...
-PCR-23: 00 00 00 00 00 00 00 00 ...
-
-
-== The QEMU TPM emulator device ==
-
-The TPM emulator device uses an external TPM emulator called 'swtpm' for
-sending TPM commands to and receiving responses from. The swtpm program
-must have been started before trying to access it through the TPM emulator
-with QEMU.
-
-The TPM emulator implements a command channel for transferring TPM commands
-and responses as well as a control channel over which control commands can
-be sent. The specification for the control channel can be found here:
-
-https://github.com/stefanberger/swtpm/blob/master/man/man3/swtpm_ioctls.pod
-
-
-The control channel serves the purpose of resetting, initializing, and
-migrating the TPM state, among other things.
-
-The swtpm program behaves like a hardware TPM and therefore needs to be
-initialized by the firmware running inside the QEMU virtual machine.
-One necessary step for initializing the device is to send the TPM_Startup
-command to it. SeaBIOS, for example, has been instrumented to initialize
-a TPM 1.2 or TPM 2 device using this command.
-
-
-QEMU files related to the TPM emulator device:
- - hw/tpm/tpm_emulator.c
- - hw/tpm/tpm_util.c
- - hw/tpm/tpm_util.h
-
-
-The following commands start the swtpm with a UnixIO control channel over
-a socket interface. They do not need to be run as root.
-
-mkdir /tmp/mytpm1
-swtpm socket --tpmstate dir=/tmp/mytpm1 \
- --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \
- --log level=20
-
-Command line to start QEMU with the TPM emulator device communicating with
-the swtpm:
-
-qemu-system-x86_64 -display sdl -accel kvm \
- -m 1024 -boot d -bios bios-256k.bin -boot menu=on \
- -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \
- -tpmdev emulator,id=tpm0,chardev=chrtpm \
- -device tpm-tis,tpmdev=tpm0 test.img
-
-
-In case SeaBIOS is used as firmware, it should show the TPM menu item
-after entering the menu with 'ESC'.
-
-Select boot device:
-1. DVD/CD [ata1-0: QEMU DVD-ROM ATAPI-4 DVD/CD]
-[...]
-5. Legacy option rom
-
-t. TPM Configuration
-
-
-The following commands should result in similar output inside the VM with a
-Linux kernel that either has the TPM TIS driver built-in or available as a
-module:
-
-#> dmesg | grep -i tpm
-[ 0.711310] tpm_tis 00:06: 1.2 TPM (device=id 0x1, rev-id 1)
-
-#> dmesg | grep TCPA
-[ 0.000000] ACPI: TCPA 0x0000000003FFD191C 000032 (v02 BOCHS \
- BXPCTCPA 0000001 BXPC 00000001)
-
-#> ls -l /dev/tpm*
-crw-------. 1 root root 10, 224 Jul 11 10:11 /dev/tpm0
-
-#> find /sys/devices/ | grep pcrs$ | xargs cat
-PCR-00: 35 4E 3B CE 23 9F 38 59 ...
-...
-PCR-23: 00 00 00 00 00 00 00 00 ...
-
-
-=== Migration with the TPM emulator ===
-
-The TPM emulator supports the following types of virtual machine migration:
-
-- VM save / restore (migration into a file)
-- Network migration
-- Snapshotting (migration into storage like QoW2 or QED)
-
-The following command sequences can be used to test VM save / restore.
-
-
-In a 1st terminal start an instance of a swtpm using the following command:
-
-mkdir /tmp/mytpm1
-swtpm socket --tpmstate dir=/tmp/mytpm1 \
- --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \
- --log level=20 --tpm2
-
-In a 2nd terminal start the VM:
-
-qemu-system-x86_64 -display sdl -accel kvm \
- -m 1024 -boot d -bios bios-256k.bin -boot menu=on \
- -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \
- -tpmdev emulator,id=tpm0,chardev=chrtpm \
- -device tpm-tis,tpmdev=tpm0 \
- -monitor stdio \
- test.img
-
-Verify that the attached TPM is working as expected using applications inside
-the VM.
-
-To store the state of the VM use the following command in the QEMU monitor in
-the 2nd terminal:
-
-(qemu) migrate "exec:cat > testvm.bin"
-(qemu) quit
-
-At this point a file called 'testvm.bin' should exists and the swtpm and QEMU
-processes should have ended.
-
-To test 'VM restore' you have to start the swtpm with the same parameters
-as before. If previously a TPM 2 [--tpm2] was saved, --tpm2 must now be
-passed again on the command line.
-
-In the 1st terminal restart the swtpm with the same command line as before:
-
-swtpm socket --tpmstate dir=/tmp/mytpm1 \
- --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \
- --log level=20 --tpm2
-
-In the 2nd terminal restore the state of the VM using the additional
-'-incoming' option.
-
-qemu-system-x86_64 -display sdl -accel kvm \
- -m 1024 -boot d -bios bios-256k.bin -boot menu=on \
- -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \
- -tpmdev emulator,id=tpm0,chardev=chrtpm \
- -device tpm-tis,tpmdev=tpm0 \
- -incoming "exec:cat < testvm.bin" \
- test.img
-
-
-Troubleshooting migration:
-
-There are several reasons why migration may fail. In case of problems,
-please ensure that the command lines adhere to the following rules and,
-if possible, that identical versions of QEMU and swtpm are used at all
-times.
-
-VM save and restore:
- - QEMU command line parameters should be identical apart from the
- '-incoming' option on VM restore
- - swtpm command line parameters should be identical
-
-VM migration to 'localhost':
- - QEMU command line parameters should be identical apart from the
- '-incoming' option on the destination side
- - swtpm command line parameters should point to two different
- directories on the source and destination swtpm (--tpmstate dir=...)
- (especially if different versions of libtpms were to be used on the
- same machine).
-
-VM migration across the network:
- - QEMU command line parameters should be identical apart from the
- '-incoming' option on the destination side
- - swtpm command line parameters should be identical
-
-VM Snapshotting:
- - QEMU command line parameters should be identical
- - swtpm command line parameters should be identical
-
-
-Besides that, migration failure reasons on the swtpm level may include
-the following:
-
- - the versions of the swtpm on the source and destination sides are
- incompatible
- - downgrading of TPM state may not be supported
- - the source and destination libtpms were compiled with different
- compile-time options and the destination side refuses to accept the
- state
- - different migration keys are used on the source and destination side
- and the destination side cannot decrypt the migrated state
- (swtpm ... --migration-key ... )
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 785b607..c5d507e 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -217,7 +217,7 @@
}
}
-static void icp_irq(ICSState *ics, int server, int nr, uint8_t priority)
+void icp_irq(ICSState *ics, int server, int nr, uint8_t priority)
{
ICPState *icp = xics_icp_get(ics->xics, server);
@@ -512,8 +512,14 @@
static void ics_reject(ICSState *ics, uint32_t nr)
{
+ ICSStateClass *isc = ICS_GET_CLASS(ics);
ICSIRQState *irq = ics->irqs + nr - ics->offset;
+ if (isc->reject) {
+ isc->reject(ics, nr);
+ return;
+ }
+
trace_xics_ics_reject(nr, nr - ics->offset);
if (irq->flags & XICS_FLAGS_IRQ_MSI) {
irq->status |= XICS_STATUS_REJECTED;
@@ -524,8 +530,14 @@
void ics_resend(ICSState *ics)
{
+ ICSStateClass *isc = ICS_GET_CLASS(ics);
int i;
+ if (isc->resend) {
+ isc->resend(ics);
+ return;
+ }
+
for (i = 0; i < ics->nr_irqs; i++) {
/* FIXME: filter by server#? */
if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) {
diff --git a/hw/pci-host/Makefile.objs b/hw/pci-host/Makefile.objs
index 9c466fa..8c87e84 100644
--- a/hw/pci-host/Makefile.objs
+++ b/hw/pci-host/Makefile.objs
@@ -20,3 +20,5 @@
common-obj-$(CONFIG_PCI_EXPRESS_XILINX) += xilinx-pcie.o
common-obj-$(CONFIG_PCI_EXPRESS_DESIGNWARE) += designware.o
+obj-$(CONFIG_POWERNV) += pnv_phb4.o pnv_phb4_pec.o
+obj-$(CONFIG_POWERNV) += pnv_phb3.o pnv_phb3_msi.o pnv_phb3_pbcq.o
diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c
new file mode 100644
index 0000000..74618fa
--- /dev/null
+++ b/hw/pci-host/pnv_phb3.c
@@ -0,0 +1,1197 @@
+/*
+ * QEMU PowerPC PowerNV (POWER8) PHB3 model
+ *
+ * Copyright (c) 2014-2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/visitor.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "hw/pci-host/pnv_phb3_regs.h"
+#include "hw/pci-host/pnv_phb3.h"
+#include "hw/pci/pcie_host.h"
+#include "hw/pci/pcie_port.h"
+#include "hw/ppc/pnv.h"
+#include "hw/irq.h"
+#include "hw/qdev-properties.h"
+
+#define phb3_error(phb, fmt, ...) \
+ qemu_log_mask(LOG_GUEST_ERROR, "phb3[%d:%d]: " fmt "\n", \
+ (phb)->chip_id, (phb)->phb_id, ## __VA_ARGS__)
+
+static PCIDevice *pnv_phb3_find_cfg_dev(PnvPHB3 *phb)
+{
+ PCIHostState *pci = PCI_HOST_BRIDGE(phb);
+ uint64_t addr = phb->regs[PHB_CONFIG_ADDRESS >> 3];
+ uint8_t bus, devfn;
+
+ if (!(addr >> 63)) {
+ return NULL;
+ }
+ bus = (addr >> 52) & 0xff;
+ devfn = (addr >> 44) & 0xff;
+
+ return pci_find_device(pci->bus, bus, devfn);
+}
+
+/*
+ * The CONFIG_DATA register expects little endian accesses, but as the
+ * region is big endian, we have to swap the value.
+ */
+static void pnv_phb3_config_write(PnvPHB3 *phb, unsigned off,
+ unsigned size, uint64_t val)
+{
+ uint32_t cfg_addr, limit;
+ PCIDevice *pdev;
+
+ pdev = pnv_phb3_find_cfg_dev(phb);
+ if (!pdev) {
+ return;
+ }
+ cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
+ cfg_addr |= off;
+ limit = pci_config_size(pdev);
+ if (limit <= cfg_addr) {
+ /*
+ * conventional pci device can be behind pcie-to-pci bridge.
+ * 256 <= addr < 4K has no effects.
+ */
+ return;
+ }
+ switch (size) {
+ case 1:
+ break;
+ case 2:
+ val = bswap16(val);
+ break;
+ case 4:
+ val = bswap32(val);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ pci_host_config_write_common(pdev, cfg_addr, limit, val, size);
+}
+
+static uint64_t pnv_phb3_config_read(PnvPHB3 *phb, unsigned off,
+ unsigned size)
+{
+ uint32_t cfg_addr, limit;
+ PCIDevice *pdev;
+ uint64_t val;
+
+ pdev = pnv_phb3_find_cfg_dev(phb);
+ if (!pdev) {
+ return ~0ull;
+ }
+ cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
+ cfg_addr |= off;
+ limit = pci_config_size(pdev);
+ if (limit <= cfg_addr) {
+ /*
+ * conventional pci device can be behind pcie-to-pci bridge.
+ * 256 <= addr < 4K has no effects.
+ */
+ return ~0ull;
+ }
+ val = pci_host_config_read_common(pdev, cfg_addr, limit, size);
+ switch (size) {
+ case 1:
+ return val;
+ case 2:
+ return bswap16(val);
+ case 4:
+ return bswap32(val);
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void pnv_phb3_check_m32(PnvPHB3 *phb)
+{
+ uint64_t base, start, size;
+ MemoryRegion *parent;
+ PnvPBCQState *pbcq = &phb->pbcq;
+
+ if (memory_region_is_mapped(&phb->mr_m32)) {
+ memory_region_del_subregion(phb->mr_m32.container, &phb->mr_m32);
+ }
+
+ if (!(phb->regs[PHB_PHB3_CONFIG >> 3] & PHB_PHB3C_M32_EN)) {
+ return;
+ }
+
+ /* Grab geometry from registers */
+ base = phb->regs[PHB_M32_BASE_ADDR >> 3];
+ start = phb->regs[PHB_M32_START_ADDR >> 3];
+ size = ~(phb->regs[PHB_M32_BASE_MASK >> 3] | 0xfffc000000000000ull) + 1;
+
+ /* Check if it matches an enabled MMIO region in the PBCQ */
+ if (memory_region_is_mapped(&pbcq->mmbar0) &&
+ base >= pbcq->mmio0_base &&
+ (base + size) <= (pbcq->mmio0_base + pbcq->mmio0_size)) {
+ parent = &pbcq->mmbar0;
+ base -= pbcq->mmio0_base;
+ } else if (memory_region_is_mapped(&pbcq->mmbar1) &&
+ base >= pbcq->mmio1_base &&
+ (base + size) <= (pbcq->mmio1_base + pbcq->mmio1_size)) {
+ parent = &pbcq->mmbar1;
+ base -= pbcq->mmio1_base;
+ } else {
+ return;
+ }
+
+ /* Create alias */
+ memory_region_init_alias(&phb->mr_m32, OBJECT(phb), "phb3-m32",
+ &phb->pci_mmio, start, size);
+ memory_region_add_subregion(parent, base, &phb->mr_m32);
+}
+
+static void pnv_phb3_check_m64(PnvPHB3 *phb, uint32_t index)
+{
+ uint64_t base, start, size, m64;
+ MemoryRegion *parent;
+ PnvPBCQState *pbcq = &phb->pbcq;
+
+ if (memory_region_is_mapped(&phb->mr_m64[index])) {
+ /* Should we destroy it in RCU friendly way... ? */
+ memory_region_del_subregion(phb->mr_m64[index].container,
+ &phb->mr_m64[index]);
+ }
+
+ /* Get table entry */
+ m64 = phb->ioda_M64BT[index];
+
+ if (!(m64 & IODA2_M64BT_ENABLE)) {
+ return;
+ }
+
+ /* Grab geometry from registers */
+ base = GETFIELD(IODA2_M64BT_BASE, m64) << 20;
+ if (m64 & IODA2_M64BT_SINGLE_PE) {
+ base &= ~0x1ffffffull;
+ }
+ size = GETFIELD(IODA2_M64BT_MASK, m64) << 20;
+ size |= 0xfffc000000000000ull;
+ size = ~size + 1;
+ start = base | (phb->regs[PHB_M64_UPPER_BITS >> 3]);
+
+ /* Check if it matches an enabled MMIO region in the PBCQ */
+ if (memory_region_is_mapped(&pbcq->mmbar0) &&
+ base >= pbcq->mmio0_base &&
+ (base + size) <= (pbcq->mmio0_base + pbcq->mmio0_size)) {
+ parent = &pbcq->mmbar0;
+ base -= pbcq->mmio0_base;
+ } else if (memory_region_is_mapped(&pbcq->mmbar1) &&
+ base >= pbcq->mmio1_base &&
+ (base + size) <= (pbcq->mmio1_base + pbcq->mmio1_size)) {
+ parent = &pbcq->mmbar1;
+ base -= pbcq->mmio1_base;
+ } else {
+ return;
+ }
+
+ /* Create alias */
+ memory_region_init_alias(&phb->mr_m64[index], OBJECT(phb), "phb3-m64",
+ &phb->pci_mmio, start, size);
+ memory_region_add_subregion(parent, base, &phb->mr_m64[index]);
+}
+
+static void pnv_phb3_check_all_m64s(PnvPHB3 *phb)
+{
+ uint64_t i;
+
+ for (i = 0; i < PNV_PHB3_NUM_M64; i++) {
+ pnv_phb3_check_m64(phb, i);
+ }
+}
+
+static void pnv_phb3_lxivt_write(PnvPHB3 *phb, unsigned idx, uint64_t val)
+{
+ uint8_t server, prio;
+
+ phb->ioda_LXIVT[idx] = val & (IODA2_LXIVT_SERVER |
+ IODA2_LXIVT_PRIORITY |
+ IODA2_LXIVT_NODE_ID);
+ server = GETFIELD(IODA2_LXIVT_SERVER, val);
+ prio = GETFIELD(IODA2_LXIVT_PRIORITY, val);
+
+ /*
+ * The low order 2 bits are the link pointer (Type II interrupts).
+ * Shift back to get a valid IRQ server.
+ */
+ server >>= 2;
+
+ ics_write_xive(&phb->lsis, idx, server, prio, prio);
+}
+
+static uint64_t *pnv_phb3_ioda_access(PnvPHB3 *phb,
+ unsigned *out_table, unsigned *out_idx)
+{
+ uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
+ unsigned int index = GETFIELD(PHB_IODA_AD_TADR, adreg);
+ unsigned int table = GETFIELD(PHB_IODA_AD_TSEL, adreg);
+ unsigned int mask;
+ uint64_t *tptr = NULL;
+
+ switch (table) {
+ case IODA2_TBL_LIST:
+ tptr = phb->ioda_LIST;
+ mask = 7;
+ break;
+ case IODA2_TBL_LXIVT:
+ tptr = phb->ioda_LXIVT;
+ mask = 7;
+ break;
+ case IODA2_TBL_IVC_CAM:
+ case IODA2_TBL_RBA:
+ mask = 31;
+ break;
+ case IODA2_TBL_RCAM:
+ mask = 63;
+ break;
+ case IODA2_TBL_MRT:
+ mask = 7;
+ break;
+ case IODA2_TBL_PESTA:
+ case IODA2_TBL_PESTB:
+ mask = 255;
+ break;
+ case IODA2_TBL_TVT:
+ tptr = phb->ioda_TVT;
+ mask = 511;
+ break;
+ case IODA2_TBL_TCAM:
+ case IODA2_TBL_TDR:
+ mask = 63;
+ break;
+ case IODA2_TBL_M64BT:
+ tptr = phb->ioda_M64BT;
+ mask = 15;
+ break;
+ case IODA2_TBL_M32DT:
+ tptr = phb->ioda_MDT;
+ mask = 255;
+ break;
+ case IODA2_TBL_PEEV:
+ tptr = phb->ioda_PEEV;
+ mask = 3;
+ break;
+ default:
+ phb3_error(phb, "invalid IODA table %d", table);
+ return NULL;
+ }
+ index &= mask;
+ if (out_idx) {
+ *out_idx = index;
+ }
+ if (out_table) {
+ *out_table = table;
+ }
+ if (tptr) {
+ tptr += index;
+ }
+ if (adreg & PHB_IODA_AD_AUTOINC) {
+ index = (index + 1) & mask;
+ adreg = SETFIELD(PHB_IODA_AD_TADR, adreg, index);
+ }
+ phb->regs[PHB_IODA_ADDR >> 3] = adreg;
+ return tptr;
+}
+
+static uint64_t pnv_phb3_ioda_read(PnvPHB3 *phb)
+{
+ unsigned table;
+ uint64_t *tptr;
+
+ tptr = pnv_phb3_ioda_access(phb, &table, NULL);
+ if (!tptr) {
+ /* Return 0 on unsupported tables, not ff's */
+ return 0;
+ }
+ return *tptr;
+}
+
+static void pnv_phb3_ioda_write(PnvPHB3 *phb, uint64_t val)
+{
+ unsigned table, idx;
+ uint64_t *tptr;
+
+ tptr = pnv_phb3_ioda_access(phb, &table, &idx);
+ if (!tptr) {
+ return;
+ }
+
+ /* Handle side effects */
+ switch (table) {
+ case IODA2_TBL_LXIVT:
+ pnv_phb3_lxivt_write(phb, idx, val);
+ break;
+ case IODA2_TBL_M64BT:
+ *tptr = val;
+ pnv_phb3_check_m64(phb, idx);
+ break;
+ default:
+ *tptr = val;
+ }
+}
+
+/*
+ * This is called whenever the PHB LSI, MSI source ID register or
+ * the PBCQ irq filters are written.
+ */
+void pnv_phb3_remap_irqs(PnvPHB3 *phb)
+{
+ ICSState *ics = &phb->lsis;
+ uint32_t local, global, count, mask, comp;
+ uint64_t baren;
+ PnvPBCQState *pbcq = &phb->pbcq;
+
+ /*
+ * First check if we are enabled. Unlike real HW we don't separate
+ * TX and RX so we enable if both are set
+ */
+ baren = pbcq->nest_regs[PBCQ_NEST_BAR_EN];
+ if (!(baren & PBCQ_NEST_BAR_EN_IRSN_RX) ||
+ !(baren & PBCQ_NEST_BAR_EN_IRSN_TX)) {
+ ics->offset = 0;
+ return;
+ }
+
+ /* Grab local LSI source ID */
+ local = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]) << 3;
+
+ /* Grab global one and compare */
+ global = GETFIELD(PBCQ_NEST_LSI_SRC,
+ pbcq->nest_regs[PBCQ_NEST_LSI_SRC_ID]) << 3;
+ if (global != local) {
+ /*
+ * This happens during initialization, let's come back when we
+ * are properly configured
+ */
+ ics->offset = 0;
+ return;
+ }
+
+ /* Get the base on the powerbus */
+ comp = GETFIELD(PBCQ_NEST_IRSN_COMP,
+ pbcq->nest_regs[PBCQ_NEST_IRSN_COMPARE]);
+ mask = GETFIELD(PBCQ_NEST_IRSN_COMP,
+ pbcq->nest_regs[PBCQ_NEST_IRSN_MASK]);
+ count = ((~mask) + 1) & 0x7ffff;
+ phb->total_irq = count;
+
+ /* Sanity checks */
+ if ((global + PNV_PHB3_NUM_LSI) > count) {
+ phb3_error(phb, "LSIs out of reach: LSI base=%d total irq=%d", global,
+ count);
+ }
+
+ if (count > 2048) {
+ phb3_error(phb, "More interrupts than supported: %d", count);
+ }
+
+ if ((comp & mask) != comp) {
+ phb3_error(phb, "IRQ compare bits not in mask: comp=0x%x mask=0x%x",
+ comp, mask);
+ comp &= mask;
+ }
+ /* Setup LSI offset */
+ ics->offset = comp + global;
+
+ /* Setup MSI offset */
+ pnv_phb3_msi_update_config(&phb->msis, comp, count - PNV_PHB3_NUM_LSI);
+}
+
+static void pnv_phb3_lsi_src_id_write(PnvPHB3 *phb, uint64_t val)
+{
+ /* Sanitize content */
+ val &= PHB_LSI_SRC_ID;
+ phb->regs[PHB_LSI_SOURCE_ID >> 3] = val;
+ pnv_phb3_remap_irqs(phb);
+}
+
+static void pnv_phb3_rtc_invalidate(PnvPHB3 *phb, uint64_t val)
+{
+ PnvPhb3DMASpace *ds;
+
+ /* Always invalidate all for now ... */
+ QLIST_FOREACH(ds, &phb->dma_spaces, list) {
+ ds->pe_num = PHB_INVALID_PE;
+ }
+}
+
+
+static void pnv_phb3_update_msi_regions(PnvPhb3DMASpace *ds)
+{
+ uint64_t cfg = ds->phb->regs[PHB_PHB3_CONFIG >> 3];
+
+ if (cfg & PHB_PHB3C_32BIT_MSI_EN) {
+ if (!memory_region_is_mapped(&ds->msi32_mr)) {
+ memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
+ 0xffff0000, &ds->msi32_mr);
+ }
+ } else {
+ if (memory_region_is_mapped(&ds->msi32_mr)) {
+ memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
+ &ds->msi32_mr);
+ }
+ }
+
+ if (cfg & PHB_PHB3C_64BIT_MSI_EN) {
+ if (!memory_region_is_mapped(&ds->msi64_mr)) {
+ memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
+ (1ull << 60), &ds->msi64_mr);
+ }
+ } else {
+ if (memory_region_is_mapped(&ds->msi64_mr)) {
+ memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
+ &ds->msi64_mr);
+ }
+ }
+}
+
+static void pnv_phb3_update_all_msi_regions(PnvPHB3 *phb)
+{
+ PnvPhb3DMASpace *ds;
+
+ QLIST_FOREACH(ds, &phb->dma_spaces, list) {
+ pnv_phb3_update_msi_regions(ds);
+ }
+}
+
+void pnv_phb3_reg_write(void *opaque, hwaddr off, uint64_t val, unsigned size)
+{
+ PnvPHB3 *phb = opaque;
+ bool changed;
+
+ /* Special case configuration data */
+ if ((off & 0xfffc) == PHB_CONFIG_DATA) {
+ pnv_phb3_config_write(phb, off & 0x3, size, val);
+ return;
+ }
+
+ /* Other registers are 64-bit only */
+ if (size != 8 || off & 0x7) {
+ phb3_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
+ off, size);
+ return;
+ }
+
+ /* Handle masking & filtering */
+ switch (off) {
+ case PHB_M64_UPPER_BITS:
+ val &= 0xfffc000000000000ull;
+ break;
+ case PHB_Q_DMA_R:
+ /*
+ * This is enough logic to make SW happy but we aren't actually
+ * quiescing the DMAs
+ */
+ if (val & PHB_Q_DMA_R_AUTORESET) {
+ val = 0;
+ } else {
+ val &= PHB_Q_DMA_R_QUIESCE_DMA;
+ }
+ break;
+ /* LEM stuff */
+ case PHB_LEM_FIR_AND_MASK:
+ phb->regs[PHB_LEM_FIR_ACCUM >> 3] &= val;
+ return;
+ case PHB_LEM_FIR_OR_MASK:
+ phb->regs[PHB_LEM_FIR_ACCUM >> 3] |= val;
+ return;
+ case PHB_LEM_ERROR_AND_MASK:
+ phb->regs[PHB_LEM_ERROR_MASK >> 3] &= val;
+ return;
+ case PHB_LEM_ERROR_OR_MASK:
+ phb->regs[PHB_LEM_ERROR_MASK >> 3] |= val;
+ return;
+ case PHB_LEM_WOF:
+ val = 0;
+ break;
+ }
+
+ /* Record whether it changed */
+ changed = phb->regs[off >> 3] != val;
+
+ /* Store in register cache first */
+ phb->regs[off >> 3] = val;
+
+ /* Handle side effects */
+ switch (off) {
+ case PHB_PHB3_CONFIG:
+ if (changed) {
+ pnv_phb3_update_all_msi_regions(phb);
+ }
+ /* fall through */
+ case PHB_M32_BASE_ADDR:
+ case PHB_M32_BASE_MASK:
+ case PHB_M32_START_ADDR:
+ if (changed) {
+ pnv_phb3_check_m32(phb);
+ }
+ break;
+ case PHB_M64_UPPER_BITS:
+ if (changed) {
+ pnv_phb3_check_all_m64s(phb);
+ }
+ break;
+ case PHB_LSI_SOURCE_ID:
+ if (changed) {
+ pnv_phb3_lsi_src_id_write(phb, val);
+ }
+ break;
+
+ /* IODA table accesses */
+ case PHB_IODA_DATA0:
+ pnv_phb3_ioda_write(phb, val);
+ break;
+
+ /* RTC invalidation */
+ case PHB_RTC_INVALIDATE:
+ pnv_phb3_rtc_invalidate(phb, val);
+ break;
+
+ /* FFI request */
+ case PHB_FFI_REQUEST:
+ pnv_phb3_msi_ffi(&phb->msis, val);
+ break;
+
+ /* Silent simple writes */
+ case PHB_CONFIG_ADDRESS:
+ case PHB_IODA_ADDR:
+ case PHB_TCE_KILL:
+ case PHB_TCE_SPEC_CTL:
+ case PHB_PEST_BAR:
+ case PHB_PELTV_BAR:
+ case PHB_RTT_BAR:
+ case PHB_RBA_BAR:
+ case PHB_IVT_BAR:
+ case PHB_FFI_LOCK:
+ case PHB_LEM_FIR_ACCUM:
+ case PHB_LEM_ERROR_MASK:
+ case PHB_LEM_ACTION0:
+ case PHB_LEM_ACTION1:
+ break;
+
+ /* Noise on anything else */
+ default:
+ qemu_log_mask(LOG_UNIMP, "phb3: reg_write 0x%"PRIx64"=%"PRIx64"\n",
+ off, val);
+ }
+}
+
+uint64_t pnv_phb3_reg_read(void *opaque, hwaddr off, unsigned size)
+{
+ PnvPHB3 *phb = opaque;
+ PCIHostState *pci = PCI_HOST_BRIDGE(phb);
+ uint64_t val;
+
+ if ((off & 0xfffc) == PHB_CONFIG_DATA) {
+ return pnv_phb3_config_read(phb, off & 0x3, size);
+ }
+
+ /* Other registers are 64-bit only */
+ if (size != 8 || off & 0x7) {
+ phb3_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
+ off, size);
+ return ~0ull;
+ }
+
+ /* Default read from cache */
+ val = phb->regs[off >> 3];
+
+ switch (off) {
+ /* Simulate venice DD2.0 */
+ case PHB_VERSION:
+ return 0x000000a300000005ull;
+ case PHB_PCIE_SYSTEM_CONFIG:
+ return 0x441100fc30000000;
+
+ /* IODA table accesses */
+ case PHB_IODA_DATA0:
+ return pnv_phb3_ioda_read(phb);
+
+ /* Link training always appears trained */
+ case PHB_PCIE_DLP_TRAIN_CTL:
+ if (!pci_find_device(pci->bus, 1, 0)) {
+ return 0;
+ }
+ return PHB_PCIE_DLP_INBAND_PRESENCE | PHB_PCIE_DLP_TC_DL_LINKACT;
+
+ /* FFI Lock */
+ case PHB_FFI_LOCK:
+ /* Set lock and return previous value */
+ phb->regs[off >> 3] |= PHB_FFI_LOCK_STATE;
+ return val;
+
+ /* DMA read sync: make it look like it's complete */
+ case PHB_DMARD_SYNC:
+ return PHB_DMARD_SYNC_COMPLETE;
+
+ /* Silent simple reads */
+ case PHB_PHB3_CONFIG:
+ case PHB_M32_BASE_ADDR:
+ case PHB_M32_BASE_MASK:
+ case PHB_M32_START_ADDR:
+ case PHB_CONFIG_ADDRESS:
+ case PHB_IODA_ADDR:
+ case PHB_RTC_INVALIDATE:
+ case PHB_TCE_KILL:
+ case PHB_TCE_SPEC_CTL:
+ case PHB_PEST_BAR:
+ case PHB_PELTV_BAR:
+ case PHB_RTT_BAR:
+ case PHB_RBA_BAR:
+ case PHB_IVT_BAR:
+ case PHB_M64_UPPER_BITS:
+ case PHB_LEM_FIR_ACCUM:
+ case PHB_LEM_ERROR_MASK:
+ case PHB_LEM_ACTION0:
+ case PHB_LEM_ACTION1:
+ break;
+
+ /* Noise on anything else */
+ default:
+ qemu_log_mask(LOG_UNIMP, "phb3: reg_read 0x%"PRIx64"=%"PRIx64"\n",
+ off, val);
+ }
+ return val;
+}
+
+static const MemoryRegionOps pnv_phb3_reg_ops = {
+ .read = pnv_phb3_reg_read,
+ .write = pnv_phb3_reg_write,
+ .valid.min_access_size = 1,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 1,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static int pnv_phb3_map_irq(PCIDevice *pci_dev, int irq_num)
+{
+ /* Check that out properly ... */
+ return irq_num & 3;
+}
+
+static void pnv_phb3_set_irq(void *opaque, int irq_num, int level)
+{
+ PnvPHB3 *phb = opaque;
+
+ /* LSI only ... */
+ if (irq_num > 3) {
+ phb3_error(phb, "Unknown IRQ to set %d", irq_num);
+ }
+ qemu_set_irq(phb->qirqs[irq_num], level);
+}
+
+static bool pnv_phb3_resolve_pe(PnvPhb3DMASpace *ds)
+{
+ uint64_t rtt, addr;
+ uint16_t rte;
+ int bus_num;
+
+ /* Already resolved ? */
+ if (ds->pe_num != PHB_INVALID_PE) {
+ return true;
+ }
+
+ /* We need to lookup the RTT */
+ rtt = ds->phb->regs[PHB_RTT_BAR >> 3];
+ if (!(rtt & PHB_RTT_BAR_ENABLE)) {
+ phb3_error(ds->phb, "DMA with RTT BAR disabled !");
+ /* Set error bits ? fence ? ... */
+ return false;
+ }
+
+ /* Read RTE */
+ bus_num = pci_bus_num(ds->bus);
+ addr = rtt & PHB_RTT_BASE_ADDRESS_MASK;
+ addr += 2 * ((bus_num << 8) | ds->devfn);
+ if (dma_memory_read(&address_space_memory, addr, &rte, sizeof(rte))) {
+ phb3_error(ds->phb, "Failed to read RTT entry at 0x%"PRIx64, addr);
+ /* Set error bits ? fence ? ... */
+ return false;
+ }
+ rte = be16_to_cpu(rte);
+
+ /* Fail upon reading of invalid PE# */
+ if (rte >= PNV_PHB3_NUM_PE) {
+ phb3_error(ds->phb, "RTE for RID 0x%x invalid (%04x", ds->devfn, rte);
+ /* Set error bits ? fence ? ... */
+ return false;
+ }
+ ds->pe_num = rte;
+ return true;
+}
+
+static void pnv_phb3_translate_tve(PnvPhb3DMASpace *ds, hwaddr addr,
+ bool is_write, uint64_t tve,
+ IOMMUTLBEntry *tlb)
+{
+ uint64_t tta = GETFIELD(IODA2_TVT_TABLE_ADDR, tve);
+ int32_t lev = GETFIELD(IODA2_TVT_NUM_LEVELS, tve);
+ uint32_t tts = GETFIELD(IODA2_TVT_TCE_TABLE_SIZE, tve);
+ uint32_t tps = GETFIELD(IODA2_TVT_IO_PSIZE, tve);
+ PnvPHB3 *phb = ds->phb;
+
+ /* Invalid levels */
+ if (lev > 4) {
+ phb3_error(phb, "Invalid #levels in TVE %d", lev);
+ return;
+ }
+
+ /* IO Page Size of 0 means untranslated, else use TCEs */
+ if (tps == 0) {
+ /*
+ * We only support non-translate in top window.
+ *
+ * TODO: Venice/Murano support it on bottom window above 4G and
+ * Naples suports it on everything
+ */
+ if (!(tve & PPC_BIT(51))) {
+ phb3_error(phb, "xlate for invalid non-translate TVE");
+ return;
+ }
+ /* TODO: Handle boundaries */
+
+ /* Use 4k pages like q35 ... for now */
+ tlb->iova = addr & 0xfffffffffffff000ull;
+ tlb->translated_addr = addr & 0x0003fffffffff000ull;
+ tlb->addr_mask = 0xfffull;
+ tlb->perm = IOMMU_RW;
+ } else {
+ uint32_t tce_shift, tbl_shift, sh;
+ uint64_t base, taddr, tce, tce_mask;
+
+ /* TVE disabled ? */
+ if (tts == 0) {
+ phb3_error(phb, "xlate for invalid translated TVE");
+ return;
+ }
+
+ /* Address bits per bottom level TCE entry */
+ tce_shift = tps + 11;
+
+ /* Address bits per table level */
+ tbl_shift = tts + 8;
+
+ /* Top level table base address */
+ base = tta << 12;
+
+ /* Total shift to first level */
+ sh = tbl_shift * lev + tce_shift;
+
+ /* TODO: Multi-level untested */
+ while ((lev--) >= 0) {
+ /* Grab the TCE address */
+ taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3);
+ if (dma_memory_read(&address_space_memory, taddr, &tce,
+ sizeof(tce))) {
+ phb3_error(phb, "Failed to read TCE at 0x%"PRIx64, taddr);
+ return;
+ }
+ tce = be64_to_cpu(tce);
+
+ /* Check permission for indirect TCE */
+ if ((lev >= 0) && !(tce & 3)) {
+ phb3_error(phb, "Invalid indirect TCE at 0x%"PRIx64, taddr);
+ phb3_error(phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
+ is_write ? 'W' : 'R', tve);
+ phb3_error(phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
+ tta, lev, tts, tps);
+ return;
+ }
+ sh -= tbl_shift;
+ base = tce & ~0xfffull;
+ }
+
+ /* We exit the loop with TCE being the final TCE */
+ tce_mask = ~((1ull << tce_shift) - 1);
+ tlb->iova = addr & tce_mask;
+ tlb->translated_addr = tce & tce_mask;
+ tlb->addr_mask = ~tce_mask;
+ tlb->perm = tce & 3;
+ if ((is_write & !(tce & 2)) || ((!is_write) && !(tce & 1))) {
+ phb3_error(phb, "TCE access fault at 0x%"PRIx64, taddr);
+ phb3_error(phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
+ is_write ? 'W' : 'R', tve);
+ phb3_error(phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
+ tta, lev, tts, tps);
+ }
+ }
+}
+
+static IOMMUTLBEntry pnv_phb3_translate_iommu(IOMMUMemoryRegion *iommu,
+ hwaddr addr,
+ IOMMUAccessFlags flag,
+ int iommu_idx)
+{
+ PnvPhb3DMASpace *ds = container_of(iommu, PnvPhb3DMASpace, dma_mr);
+ int tve_sel;
+ uint64_t tve, cfg;
+ IOMMUTLBEntry ret = {
+ .target_as = &address_space_memory,
+ .iova = addr,
+ .translated_addr = 0,
+ .addr_mask = ~(hwaddr)0,
+ .perm = IOMMU_NONE,
+ };
+ PnvPHB3 *phb = ds->phb;
+
+ /* Resolve PE# */
+ if (!pnv_phb3_resolve_pe(ds)) {
+ phb3_error(phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
+ ds->bus, pci_bus_num(ds->bus), ds->devfn);
+ return ret;
+ }
+
+ /* Check top bits */
+ switch (addr >> 60) {
+ case 00:
+ /* DMA or 32-bit MSI ? */
+ cfg = ds->phb->regs[PHB_PHB3_CONFIG >> 3];
+ if ((cfg & PHB_PHB3C_32BIT_MSI_EN) &&
+ ((addr & 0xffffffffffff0000ull) == 0xffff0000ull)) {
+ phb3_error(phb, "xlate on 32-bit MSI region");
+ return ret;
+ }
+ /* Choose TVE XXX Use PHB3 Control Register */
+ tve_sel = (addr >> 59) & 1;
+ tve = ds->phb->ioda_TVT[ds->pe_num * 2 + tve_sel];
+ pnv_phb3_translate_tve(ds, addr, flag & IOMMU_WO, tve, &ret);
+ break;
+ case 01:
+ phb3_error(phb, "xlate on 64-bit MSI region");
+ break;
+ default:
+ phb3_error(phb, "xlate on unsupported address 0x%"PRIx64, addr);
+ }
+ return ret;
+}
+
+#define TYPE_PNV_PHB3_IOMMU_MEMORY_REGION "pnv-phb3-iommu-memory-region"
+#define PNV_PHB3_IOMMU_MEMORY_REGION(obj) \
+ OBJECT_CHECK(IOMMUMemoryRegion, (obj), TYPE_PNV_PHB3_IOMMU_MEMORY_REGION)
+
+static void pnv_phb3_iommu_memory_region_class_init(ObjectClass *klass,
+ void *data)
+{
+ IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
+
+ imrc->translate = pnv_phb3_translate_iommu;
+}
+
+static const TypeInfo pnv_phb3_iommu_memory_region_info = {
+ .parent = TYPE_IOMMU_MEMORY_REGION,
+ .name = TYPE_PNV_PHB3_IOMMU_MEMORY_REGION,
+ .class_init = pnv_phb3_iommu_memory_region_class_init,
+};
+
+/*
+ * MSI/MSIX memory region implementation.
+ * The handler handles both MSI and MSIX.
+ */
+static void pnv_phb3_msi_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ PnvPhb3DMASpace *ds = opaque;
+
+ /* Resolve PE# */
+ if (!pnv_phb3_resolve_pe(ds)) {
+ phb3_error(ds->phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
+ ds->bus, pci_bus_num(ds->bus), ds->devfn);
+ return;
+ }
+
+ pnv_phb3_msi_send(&ds->phb->msis, addr, data, ds->pe_num);
+}
+
+/* There is no .read as the read result is undefined by PCI spec */
+static uint64_t pnv_phb3_msi_read(void *opaque, hwaddr addr, unsigned size)
+{
+ PnvPhb3DMASpace *ds = opaque;
+
+ phb3_error(ds->phb, "invalid read @ 0x%" HWADDR_PRIx, addr);
+ return -1;
+}
+
+static const MemoryRegionOps pnv_phb3_msi_ops = {
+ .read = pnv_phb3_msi_read,
+ .write = pnv_phb3_msi_write,
+ .endianness = DEVICE_LITTLE_ENDIAN
+};
+
+static AddressSpace *pnv_phb3_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+ PnvPHB3 *phb = opaque;
+ PnvPhb3DMASpace *ds;
+
+ QLIST_FOREACH(ds, &phb->dma_spaces, list) {
+ if (ds->bus == bus && ds->devfn == devfn) {
+ break;
+ }
+ }
+
+ if (ds == NULL) {
+ ds = g_malloc0(sizeof(PnvPhb3DMASpace));
+ ds->bus = bus;
+ ds->devfn = devfn;
+ ds->pe_num = PHB_INVALID_PE;
+ ds->phb = phb;
+ memory_region_init_iommu(&ds->dma_mr, sizeof(ds->dma_mr),
+ TYPE_PNV_PHB3_IOMMU_MEMORY_REGION,
+ OBJECT(phb), "phb3_iommu", UINT64_MAX);
+ address_space_init(&ds->dma_as, MEMORY_REGION(&ds->dma_mr),
+ "phb3_iommu");
+ memory_region_init_io(&ds->msi32_mr, OBJECT(phb), &pnv_phb3_msi_ops,
+ ds, "msi32", 0x10000);
+ memory_region_init_io(&ds->msi64_mr, OBJECT(phb), &pnv_phb3_msi_ops,
+ ds, "msi64", 0x100000);
+ pnv_phb3_update_msi_regions(ds);
+
+ QLIST_INSERT_HEAD(&phb->dma_spaces, ds, list);
+ }
+ return &ds->dma_as;
+}
+
+static void pnv_phb3_instance_init(Object *obj)
+{
+ PnvPHB3 *phb = PNV_PHB3(obj);
+
+ QLIST_INIT(&phb->dma_spaces);
+
+ /* LSI sources */
+ object_initialize_child(obj, "lsi", &phb->lsis, sizeof(phb->lsis),
+ TYPE_ICS, &error_abort, NULL);
+
+ /* Default init ... will be fixed by HW inits */
+ phb->lsis.offset = 0;
+
+ /* MSI sources */
+ object_initialize_child(obj, "msi", &phb->msis, sizeof(phb->msis),
+ TYPE_PHB3_MSI, &error_abort, NULL);
+
+ /* Power Bus Common Queue */
+ object_initialize_child(obj, "pbcq", &phb->pbcq, sizeof(phb->pbcq),
+ TYPE_PNV_PBCQ, &error_abort, NULL);
+
+ /* Root Port */
+ object_initialize_child(obj, "root", &phb->root, sizeof(phb->root),
+ TYPE_PNV_PHB3_ROOT_PORT, &error_abort, NULL);
+ qdev_prop_set_int32(DEVICE(&phb->root), "addr", PCI_DEVFN(0, 0));
+ qdev_prop_set_bit(DEVICE(&phb->root), "multifunction", false);
+}
+
+static void pnv_phb3_realize(DeviceState *dev, Error **errp)
+{
+ PnvPHB3 *phb = PNV_PHB3(dev);
+ PCIHostState *pci = PCI_HOST_BRIDGE(dev);
+ PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
+ Error *local_err = NULL;
+ int i;
+
+ if (phb->phb_id >= PNV8_CHIP_PHB3_MAX) {
+ error_setg(errp, "invalid PHB index: %d", phb->phb_id);
+ return;
+ }
+
+ /* LSI sources */
+ object_property_set_link(OBJECT(&phb->lsis), OBJECT(pnv), "xics",
+ &error_abort);
+ object_property_set_int(OBJECT(&phb->lsis), PNV_PHB3_NUM_LSI, "nr-irqs",
+ &error_abort);
+ object_property_set_bool(OBJECT(&phb->lsis), true, "realized", &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ for (i = 0; i < phb->lsis.nr_irqs; i++) {
+ ics_set_irq_type(&phb->lsis, i, true);
+ }
+
+ phb->qirqs = qemu_allocate_irqs(ics_set_irq, &phb->lsis, phb->lsis.nr_irqs);
+
+ /* MSI sources */
+ object_property_set_link(OBJECT(&phb->msis), OBJECT(phb), "phb",
+ &error_abort);
+ object_property_set_link(OBJECT(&phb->msis), OBJECT(pnv), "xics",
+ &error_abort);
+ object_property_set_int(OBJECT(&phb->msis), PHB3_MAX_MSI, "nr-irqs",
+ &error_abort);
+ object_property_set_bool(OBJECT(&phb->msis), true, "realized", &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ /* Power Bus Common Queue */
+ object_property_set_link(OBJECT(&phb->pbcq), OBJECT(phb), "phb",
+ &error_abort);
+ object_property_set_bool(OBJECT(&phb->pbcq), true, "realized", &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ /* Controller Registers */
+ memory_region_init_io(&phb->mr_regs, OBJECT(phb), &pnv_phb3_reg_ops, phb,
+ "phb3-regs", 0x1000);
+
+ /*
+ * PHB3 doesn't support IO space. However, qemu gets very upset if
+ * we don't have an IO region to anchor IO BARs onto so we just
+ * initialize one which we never hook up to anything
+ */
+ memory_region_init(&phb->pci_io, OBJECT(phb), "pci-io", 0x10000);
+ memory_region_init(&phb->pci_mmio, OBJECT(phb), "pci-mmio",
+ PCI_MMIO_TOTAL_SIZE);
+
+ pci->bus = pci_register_root_bus(dev, "root-bus",
+ pnv_phb3_set_irq, pnv_phb3_map_irq, phb,
+ &phb->pci_mmio, &phb->pci_io,
+ 0, 4, TYPE_PNV_PHB3_ROOT_BUS);
+
+ pci_setup_iommu(pci->bus, pnv_phb3_dma_iommu, phb);
+
+ /* Add a single Root port */
+ qdev_prop_set_uint8(DEVICE(&phb->root), "chassis", phb->chip_id);
+ qdev_prop_set_uint16(DEVICE(&phb->root), "slot", phb->phb_id);
+ qdev_set_parent_bus(DEVICE(&phb->root), BUS(pci->bus));
+ qdev_init_nofail(DEVICE(&phb->root));
+}
+
+void pnv_phb3_update_regions(PnvPHB3 *phb)
+{
+ PnvPBCQState *pbcq = &phb->pbcq;
+
+ /* Unmap first always */
+ if (memory_region_is_mapped(&phb->mr_regs)) {
+ memory_region_del_subregion(&pbcq->phbbar, &phb->mr_regs);
+ }
+
+ /* Map registers if enabled */
+ if (memory_region_is_mapped(&pbcq->phbbar)) {
+ /* TODO: We should use the PHB BAR 2 register but we don't ... */
+ memory_region_add_subregion(&pbcq->phbbar, 0, &phb->mr_regs);
+ }
+
+ /* Check/update m32 */
+ if (memory_region_is_mapped(&phb->mr_m32)) {
+ pnv_phb3_check_m32(phb);
+ }
+ pnv_phb3_check_all_m64s(phb);
+}
+
+static const char *pnv_phb3_root_bus_path(PCIHostState *host_bridge,
+ PCIBus *rootbus)
+{
+ PnvPHB3 *phb = PNV_PHB3(host_bridge);
+
+ snprintf(phb->bus_path, sizeof(phb->bus_path), "00%02x:%02x",
+ phb->chip_id, phb->phb_id);
+ return phb->bus_path;
+}
+
+static Property pnv_phb3_properties[] = {
+ DEFINE_PROP_UINT32("index", PnvPHB3, phb_id, 0),
+ DEFINE_PROP_UINT32("chip-id", PnvPHB3, chip_id, 0),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_phb3_class_init(ObjectClass *klass, void *data)
+{
+ PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ hc->root_bus_path = pnv_phb3_root_bus_path;
+ dc->realize = pnv_phb3_realize;
+ device_class_set_props(dc, pnv_phb3_properties);
+ set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+ dc->user_creatable = false;
+}
+
+static const TypeInfo pnv_phb3_type_info = {
+ .name = TYPE_PNV_PHB3,
+ .parent = TYPE_PCIE_HOST_BRIDGE,
+ .instance_size = sizeof(PnvPHB3),
+ .class_init = pnv_phb3_class_init,
+ .instance_init = pnv_phb3_instance_init,
+};
+
+static void pnv_phb3_root_bus_class_init(ObjectClass *klass, void *data)
+{
+ BusClass *k = BUS_CLASS(klass);
+
+ /*
+ * PHB3 has only a single root complex. Enforce the limit on the
+ * parent bus
+ */
+ k->max_dev = 1;
+}
+
+static const TypeInfo pnv_phb3_root_bus_info = {
+ .name = TYPE_PNV_PHB3_ROOT_BUS,
+ .parent = TYPE_PCIE_BUS,
+ .class_init = pnv_phb3_root_bus_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { INTERFACE_PCIE_DEVICE },
+ { }
+ },
+};
+
+static void pnv_phb3_root_port_realize(DeviceState *dev, Error **errp)
+{
+ PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
+ Error *local_err = NULL;
+
+ rpc->parent_realize(dev, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+}
+
+static void pnv_phb3_root_port_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+ PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass);
+
+ dc->desc = "IBM PHB3 PCIE Root Port";
+
+ device_class_set_parent_realize(dc, pnv_phb3_root_port_realize,
+ &rpc->parent_realize);
+ dc->user_creatable = false;
+
+ k->vendor_id = PCI_VENDOR_ID_IBM;
+ k->device_id = 0x03dc;
+ k->revision = 0;
+
+ rpc->exp_offset = 0x48;
+ rpc->aer_offset = 0x100;
+}
+
+static const TypeInfo pnv_phb3_root_port_info = {
+ .name = TYPE_PNV_PHB3_ROOT_PORT,
+ .parent = TYPE_PCIE_ROOT_PORT,
+ .instance_size = sizeof(PnvPHB3RootPort),
+ .class_init = pnv_phb3_root_port_class_init,
+};
+
+static void pnv_phb3_register_types(void)
+{
+ type_register_static(&pnv_phb3_root_bus_info);
+ type_register_static(&pnv_phb3_root_port_info);
+ type_register_static(&pnv_phb3_type_info);
+ type_register_static(&pnv_phb3_iommu_memory_region_info);
+}
+
+type_init(pnv_phb3_register_types)
diff --git a/hw/pci-host/pnv_phb3_msi.c b/hw/pci-host/pnv_phb3_msi.c
new file mode 100644
index 0000000..ecfc1b2
--- /dev/null
+++ b/hw/pci-host/pnv_phb3_msi.c
@@ -0,0 +1,349 @@
+/*
+ * QEMU PowerPC PowerNV (POWER8) PHB3 model
+ *
+ * Copyright (c) 2014-2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "hw/pci-host/pnv_phb3_regs.h"
+#include "hw/pci-host/pnv_phb3.h"
+#include "hw/ppc/pnv.h"
+#include "hw/pci/msi.h"
+#include "monitor/monitor.h"
+#include "hw/irq.h"
+#include "hw/qdev-properties.h"
+#include "sysemu/reset.h"
+
+static uint64_t phb3_msi_ive_addr(PnvPHB3 *phb, int srcno)
+{
+ uint64_t ivtbar = phb->regs[PHB_IVT_BAR >> 3];
+ uint64_t phbctl = phb->regs[PHB_CONTROL >> 3];
+
+ if (!(ivtbar & PHB_IVT_BAR_ENABLE)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "Failed access to disable IVT BAR !");
+ return 0;
+ }
+
+ if (srcno >= (ivtbar & PHB_IVT_LENGTH_MASK)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "MSI out of bounds (%d vs 0x%"PRIx64")",
+ srcno, (uint64_t) (ivtbar & PHB_IVT_LENGTH_MASK));
+ return 0;
+ }
+
+ ivtbar &= PHB_IVT_BASE_ADDRESS_MASK;
+
+ if (phbctl & PHB_CTRL_IVE_128_BYTES) {
+ return ivtbar + 128 * srcno;
+ } else {
+ return ivtbar + 16 * srcno;
+ }
+}
+
+static bool phb3_msi_read_ive(PnvPHB3 *phb, int srcno, uint64_t *out_ive)
+{
+ uint64_t ive_addr, ive;
+
+ ive_addr = phb3_msi_ive_addr(phb, srcno);
+ if (!ive_addr) {
+ return false;
+ }
+
+ if (dma_memory_read(&address_space_memory, ive_addr, &ive, sizeof(ive))) {
+ qemu_log_mask(LOG_GUEST_ERROR, "Failed to read IVE at 0x%" PRIx64,
+ ive_addr);
+ return false;
+ }
+ *out_ive = be64_to_cpu(ive);
+
+ return true;
+}
+
+static void phb3_msi_set_p(Phb3MsiState *msi, int srcno, uint8_t gen)
+{
+ uint64_t ive_addr;
+ uint8_t p = 0x01 | (gen << 1);
+
+ ive_addr = phb3_msi_ive_addr(msi->phb, srcno);
+ if (!ive_addr) {
+ return;
+ }
+
+ if (dma_memory_write(&address_space_memory, ive_addr + 4, &p, 1)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Failed to write IVE (set P) at 0x%" PRIx64, ive_addr);
+ }
+}
+
+static void phb3_msi_set_q(Phb3MsiState *msi, int srcno)
+{
+ uint64_t ive_addr;
+ uint8_t q = 0x01;
+
+ ive_addr = phb3_msi_ive_addr(msi->phb, srcno);
+ if (!ive_addr) {
+ return;
+ }
+
+ if (dma_memory_write(&address_space_memory, ive_addr + 5, &q, 1)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Failed to write IVE (set Q) at 0x%" PRIx64, ive_addr);
+ }
+}
+
+static void phb3_msi_try_send(Phb3MsiState *msi, int srcno, bool force)
+{
+ ICSState *ics = ICS(msi);
+ uint64_t ive;
+ uint64_t server, prio, pq, gen;
+
+ if (!phb3_msi_read_ive(msi->phb, srcno, &ive)) {
+ return;
+ }
+
+ server = GETFIELD(IODA2_IVT_SERVER, ive);
+ prio = GETFIELD(IODA2_IVT_PRIORITY, ive);
+ if (!force) {
+ pq = GETFIELD(IODA2_IVT_Q, ive) | (GETFIELD(IODA2_IVT_P, ive) << 1);
+ } else {
+ pq = 0;
+ }
+ gen = GETFIELD(IODA2_IVT_GEN, ive);
+
+ /*
+ * The low order 2 bits are the link pointer (Type II interrupts).
+ * Shift back to get a valid IRQ server.
+ */
+ server >>= 2;
+
+ switch (pq) {
+ case 0: /* 00 */
+ if (prio == 0xff) {
+ /* Masked, set Q */
+ phb3_msi_set_q(msi, srcno);
+ } else {
+ /* Enabled, set P and send */
+ phb3_msi_set_p(msi, srcno, gen);
+ icp_irq(ics, server, srcno + ics->offset, prio);
+ }
+ break;
+ case 2: /* 10 */
+ /* Already pending, set Q */
+ phb3_msi_set_q(msi, srcno);
+ break;
+ case 1: /* 01 */
+ case 3: /* 11 */
+ default:
+ /* Just drop stuff if Q already set */
+ break;
+ }
+}
+
+static void phb3_msi_set_irq(void *opaque, int srcno, int val)
+{
+ Phb3MsiState *msi = PHB3_MSI(opaque);
+
+ if (val) {
+ phb3_msi_try_send(msi, srcno, false);
+ }
+}
+
+
+void pnv_phb3_msi_send(Phb3MsiState *msi, uint64_t addr, uint16_t data,
+ int32_t dev_pe)
+{
+ ICSState *ics = ICS(msi);
+ uint64_t ive;
+ uint16_t pe;
+ uint32_t src = ((addr >> 4) & 0xffff) | (data & 0x1f);
+
+ if (src >= ics->nr_irqs) {
+ qemu_log_mask(LOG_GUEST_ERROR, "MSI %d out of bounds", src);
+ return;
+ }
+ if (dev_pe >= 0) {
+ if (!phb3_msi_read_ive(msi->phb, src, &ive)) {
+ return;
+ }
+ pe = GETFIELD(IODA2_IVT_PE, ive);
+ if (pe != dev_pe) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "MSI %d send by PE#%d but assigned to PE#%d",
+ src, dev_pe, pe);
+ return;
+ }
+ }
+ qemu_irq_pulse(msi->qirqs[src]);
+}
+
+void pnv_phb3_msi_ffi(Phb3MsiState *msi, uint64_t val)
+{
+ /* Emit interrupt */
+ pnv_phb3_msi_send(msi, val, 0, -1);
+
+ /* Clear FFI lock */
+ msi->phb->regs[PHB_FFI_LOCK >> 3] = 0;
+}
+
+static void phb3_msi_reject(ICSState *ics, uint32_t nr)
+{
+ Phb3MsiState *msi = PHB3_MSI(ics);
+ unsigned int srcno = nr - ics->offset;
+ unsigned int idx = srcno >> 6;
+ unsigned int bit = 1ull << (srcno & 0x3f);
+
+ assert(srcno < PHB3_MAX_MSI);
+
+ msi->rba[idx] |= bit;
+ msi->rba_sum |= (1u << idx);
+}
+
+static void phb3_msi_resend(ICSState *ics)
+{
+ Phb3MsiState *msi = PHB3_MSI(ics);
+ unsigned int i, j;
+
+ if (msi->rba_sum == 0) {
+ return;
+ }
+
+ for (i = 0; i < 32; i++) {
+ if ((msi->rba_sum & (1u << i)) == 0) {
+ continue;
+ }
+ msi->rba_sum &= ~(1u << i);
+ for (j = 0; j < 64; j++) {
+ if ((msi->rba[i] & (1ull << j)) == 0) {
+ continue;
+ }
+ msi->rba[i] &= ~(1u << j);
+ phb3_msi_try_send(msi, i * 64 + j, true);
+ }
+ }
+}
+
+static void phb3_msi_reset(DeviceState *dev)
+{
+ Phb3MsiState *msi = PHB3_MSI(dev);
+ ICSStateClass *icsc = ICS_GET_CLASS(dev);
+
+ icsc->parent_reset(dev);
+
+ memset(msi->rba, 0, sizeof(msi->rba));
+ msi->rba_sum = 0;
+}
+
+static void phb3_msi_reset_handler(void *dev)
+{
+ phb3_msi_reset(dev);
+}
+
+void pnv_phb3_msi_update_config(Phb3MsiState *msi, uint32_t base,
+ uint32_t count)
+{
+ ICSState *ics = ICS(msi);
+
+ if (count > PHB3_MAX_MSI) {
+ count = PHB3_MAX_MSI;
+ }
+ ics->nr_irqs = count;
+ ics->offset = base;
+}
+
+static void phb3_msi_realize(DeviceState *dev, Error **errp)
+{
+ Phb3MsiState *msi = PHB3_MSI(dev);
+ ICSState *ics = ICS(msi);
+ ICSStateClass *icsc = ICS_GET_CLASS(ics);
+ Error *local_err = NULL;
+
+ assert(msi->phb);
+
+ icsc->parent_realize(dev, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ msi->qirqs = qemu_allocate_irqs(phb3_msi_set_irq, msi, ics->nr_irqs);
+
+ qemu_register_reset(phb3_msi_reset_handler, dev);
+}
+
+static void phb3_msi_instance_init(Object *obj)
+{
+ Phb3MsiState *msi = PHB3_MSI(obj);
+ ICSState *ics = ICS(obj);
+
+ object_property_add_link(obj, "phb", TYPE_PNV_PHB3,
+ (Object **)&msi->phb,
+ object_property_allow_set_link,
+ OBJ_PROP_LINK_STRONG,
+ &error_abort);
+
+ /* Will be overriden later */
+ ics->offset = 0;
+}
+
+static void phb3_msi_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ ICSStateClass *isc = ICS_CLASS(klass);
+
+ device_class_set_parent_realize(dc, phb3_msi_realize,
+ &isc->parent_realize);
+ device_class_set_parent_reset(dc, phb3_msi_reset,
+ &isc->parent_reset);
+
+ isc->reject = phb3_msi_reject;
+ isc->resend = phb3_msi_resend;
+}
+
+static const TypeInfo phb3_msi_info = {
+ .name = TYPE_PHB3_MSI,
+ .parent = TYPE_ICS,
+ .instance_size = sizeof(Phb3MsiState),
+ .class_init = phb3_msi_class_init,
+ .class_size = sizeof(ICSStateClass),
+ .instance_init = phb3_msi_instance_init,
+};
+
+static void pnv_phb3_msi_register_types(void)
+{
+ type_register_static(&phb3_msi_info);
+}
+
+type_init(pnv_phb3_msi_register_types);
+
+void pnv_phb3_msi_pic_print_info(Phb3MsiState *msi, Monitor *mon)
+{
+ ICSState *ics = ICS(msi);
+ int i;
+
+ monitor_printf(mon, "ICS %4x..%4x %p\n",
+ ics->offset, ics->offset + ics->nr_irqs - 1, ics);
+
+ for (i = 0; i < ics->nr_irqs; i++) {
+ uint64_t ive;
+
+ if (!phb3_msi_read_ive(msi->phb, i, &ive)) {
+ return;
+ }
+
+ if (GETFIELD(IODA2_IVT_PRIORITY, ive) == 0xff) {
+ continue;
+ }
+
+ monitor_printf(mon, " %4x %c%c server=%04x prio=%02x gen=%d\n",
+ ics->offset + i,
+ GETFIELD(IODA2_IVT_P, ive) ? 'P' : '-',
+ GETFIELD(IODA2_IVT_Q, ive) ? 'Q' : '-',
+ (uint32_t) GETFIELD(IODA2_IVT_SERVER, ive) >> 2,
+ (uint32_t) GETFIELD(IODA2_IVT_PRIORITY, ive),
+ (uint32_t) GETFIELD(IODA2_IVT_GEN, ive));
+ }
+}
diff --git a/hw/pci-host/pnv_phb3_pbcq.c b/hw/pci-host/pnv_phb3_pbcq.c
new file mode 100644
index 0000000..f232228
--- /dev/null
+++ b/hw/pci-host/pnv_phb3_pbcq.c
@@ -0,0 +1,358 @@
+/*
+ * QEMU PowerPC PowerNV (POWER8) PHB3 model
+ *
+ * Copyright (c) 2014-2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "qemu/log.h"
+#include "target/ppc/cpu.h"
+#include "hw/ppc/fdt.h"
+#include "hw/pci-host/pnv_phb3_regs.h"
+#include "hw/pci-host/pnv_phb3.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/pci/pci_bridge.h"
+#include "hw/pci/pci_bus.h"
+
+#include <libfdt.h>
+
+#define phb3_pbcq_error(pbcq, fmt, ...) \
+ qemu_log_mask(LOG_GUEST_ERROR, "phb3_pbcq[%d:%d]: " fmt "\n", \
+ (pbcq)->phb->chip_id, (pbcq)->phb->phb_id, ## __VA_ARGS__)
+
+static uint64_t pnv_pbcq_nest_xscom_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ PnvPBCQState *pbcq = PNV_PBCQ(opaque);
+ uint32_t offset = addr >> 3;
+
+ return pbcq->nest_regs[offset];
+}
+
+static uint64_t pnv_pbcq_pci_xscom_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ PnvPBCQState *pbcq = PNV_PBCQ(opaque);
+ uint32_t offset = addr >> 3;
+
+ return pbcq->pci_regs[offset];
+}
+
+static uint64_t pnv_pbcq_spci_xscom_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ PnvPBCQState *pbcq = PNV_PBCQ(opaque);
+ uint32_t offset = addr >> 3;
+
+ if (offset == PBCQ_SPCI_ASB_DATA) {
+ return pnv_phb3_reg_read(pbcq->phb,
+ pbcq->spci_regs[PBCQ_SPCI_ASB_ADDR], 8);
+ }
+ return pbcq->spci_regs[offset];
+}
+
+static void pnv_pbcq_update_map(PnvPBCQState *pbcq)
+{
+ uint64_t bar_en = pbcq->nest_regs[PBCQ_NEST_BAR_EN];
+ uint64_t bar, mask, size;
+
+ /*
+ * NOTE: This will really not work well if those are remapped
+ * after the PHB has created its sub regions. We could do better
+ * if we had a way to resize regions but we don't really care
+ * that much in practice as the stuff below really only happens
+ * once early during boot
+ */
+
+ /* Handle unmaps */
+ if (memory_region_is_mapped(&pbcq->mmbar0) &&
+ !(bar_en & PBCQ_NEST_BAR_EN_MMIO0)) {
+ memory_region_del_subregion(get_system_memory(), &pbcq->mmbar0);
+ }
+ if (memory_region_is_mapped(&pbcq->mmbar1) &&
+ !(bar_en & PBCQ_NEST_BAR_EN_MMIO1)) {
+ memory_region_del_subregion(get_system_memory(), &pbcq->mmbar1);
+ }
+ if (memory_region_is_mapped(&pbcq->phbbar) &&
+ !(bar_en & PBCQ_NEST_BAR_EN_PHB)) {
+ memory_region_del_subregion(get_system_memory(), &pbcq->phbbar);
+ }
+
+ /* Update PHB */
+ pnv_phb3_update_regions(pbcq->phb);
+
+ /* Handle maps */
+ if (!memory_region_is_mapped(&pbcq->mmbar0) &&
+ (bar_en & PBCQ_NEST_BAR_EN_MMIO0)) {
+ bar = pbcq->nest_regs[PBCQ_NEST_MMIO_BAR0] >> 14;
+ mask = pbcq->nest_regs[PBCQ_NEST_MMIO_MASK0];
+ size = ((~mask) >> 14) + 1;
+ memory_region_init(&pbcq->mmbar0, OBJECT(pbcq), "pbcq-mmio0", size);
+ memory_region_add_subregion(get_system_memory(), bar, &pbcq->mmbar0);
+ pbcq->mmio0_base = bar;
+ pbcq->mmio0_size = size;
+ }
+ if (!memory_region_is_mapped(&pbcq->mmbar1) &&
+ (bar_en & PBCQ_NEST_BAR_EN_MMIO1)) {
+ bar = pbcq->nest_regs[PBCQ_NEST_MMIO_BAR1] >> 14;
+ mask = pbcq->nest_regs[PBCQ_NEST_MMIO_MASK1];
+ size = ((~mask) >> 14) + 1;
+ memory_region_init(&pbcq->mmbar1, OBJECT(pbcq), "pbcq-mmio1", size);
+ memory_region_add_subregion(get_system_memory(), bar, &pbcq->mmbar1);
+ pbcq->mmio1_base = bar;
+ pbcq->mmio1_size = size;
+ }
+ if (!memory_region_is_mapped(&pbcq->phbbar)
+ && (bar_en & PBCQ_NEST_BAR_EN_PHB)) {
+ bar = pbcq->nest_regs[PBCQ_NEST_PHB_BAR] >> 14;
+ size = 0x1000;
+ memory_region_init(&pbcq->phbbar, OBJECT(pbcq), "pbcq-phb", size);
+ memory_region_add_subregion(get_system_memory(), bar, &pbcq->phbbar);
+ }
+
+ /* Update PHB */
+ pnv_phb3_update_regions(pbcq->phb);
+}
+
+static void pnv_pbcq_nest_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvPBCQState *pbcq = PNV_PBCQ(opaque);
+ uint32_t reg = addr >> 3;
+
+ switch (reg) {
+ case PBCQ_NEST_MMIO_BAR0:
+ case PBCQ_NEST_MMIO_BAR1:
+ case PBCQ_NEST_MMIO_MASK0:
+ case PBCQ_NEST_MMIO_MASK1:
+ if (pbcq->nest_regs[PBCQ_NEST_BAR_EN] &
+ (PBCQ_NEST_BAR_EN_MMIO0 |
+ PBCQ_NEST_BAR_EN_MMIO1)) {
+ phb3_pbcq_error(pbcq, "Changing enabled BAR unsupported");
+ }
+ pbcq->nest_regs[reg] = val & 0xffffffffc0000000ull;
+ break;
+ case PBCQ_NEST_PHB_BAR:
+ if (pbcq->nest_regs[PBCQ_NEST_BAR_EN] & PBCQ_NEST_BAR_EN_PHB) {
+ phb3_pbcq_error(pbcq, "Changing enabled BAR unsupported");
+ }
+ pbcq->nest_regs[reg] = val & 0xfffffffffc000000ull;
+ break;
+ case PBCQ_NEST_BAR_EN:
+ pbcq->nest_regs[reg] = val & 0xf800000000000000ull;
+ pnv_pbcq_update_map(pbcq);
+ pnv_phb3_remap_irqs(pbcq->phb);
+ break;
+ case PBCQ_NEST_IRSN_COMPARE:
+ case PBCQ_NEST_IRSN_MASK:
+ pbcq->nest_regs[reg] = val & PBCQ_NEST_IRSN_COMP;
+ pnv_phb3_remap_irqs(pbcq->phb);
+ break;
+ case PBCQ_NEST_LSI_SRC_ID:
+ pbcq->nest_regs[reg] = val & PBCQ_NEST_LSI_SRC;
+ pnv_phb3_remap_irqs(pbcq->phb);
+ break;
+ default:
+ phb3_pbcq_error(pbcq, "%s @0x%"HWADDR_PRIx"=%"PRIx64, __func__,
+ addr, val);
+ }
+}
+
+static void pnv_pbcq_pci_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvPBCQState *pbcq = PNV_PBCQ(opaque);
+ uint32_t reg = addr >> 3;
+
+ switch (reg) {
+ case PBCQ_PCI_BAR2:
+ pbcq->pci_regs[reg] = val & 0xfffffffffc000000ull;
+ pnv_pbcq_update_map(pbcq);
+ default:
+ phb3_pbcq_error(pbcq, "%s @0x%"HWADDR_PRIx"=%"PRIx64, __func__,
+ addr, val);
+ }
+}
+
+static void pnv_pbcq_spci_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvPBCQState *pbcq = PNV_PBCQ(opaque);
+ uint32_t reg = addr >> 3;
+
+ switch (reg) {
+ case PBCQ_SPCI_ASB_ADDR:
+ pbcq->spci_regs[reg] = val & 0xfff;
+ break;
+ case PBCQ_SPCI_ASB_STATUS:
+ pbcq->spci_regs[reg] &= ~val;
+ break;
+ case PBCQ_SPCI_ASB_DATA:
+ pnv_phb3_reg_write(pbcq->phb, pbcq->spci_regs[PBCQ_SPCI_ASB_ADDR],
+ val, 8);
+ break;
+ case PBCQ_SPCI_AIB_CAPP_EN:
+ case PBCQ_SPCI_CAPP_SEC_TMR:
+ break;
+ default:
+ phb3_pbcq_error(pbcq, "%s @0x%"HWADDR_PRIx"=%"PRIx64, __func__,
+ addr, val);
+ }
+}
+
+static const MemoryRegionOps pnv_pbcq_nest_xscom_ops = {
+ .read = pnv_pbcq_nest_xscom_read,
+ .write = pnv_pbcq_nest_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static const MemoryRegionOps pnv_pbcq_pci_xscom_ops = {
+ .read = pnv_pbcq_pci_xscom_read,
+ .write = pnv_pbcq_pci_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static const MemoryRegionOps pnv_pbcq_spci_xscom_ops = {
+ .read = pnv_pbcq_spci_xscom_read,
+ .write = pnv_pbcq_spci_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_pbcq_default_bars(PnvPBCQState *pbcq)
+{
+ uint64_t mm0, mm1, reg;
+ PnvPHB3 *phb = pbcq->phb;
+
+ mm0 = 0x3d00000000000ull + 0x4000000000ull * phb->chip_id +
+ 0x1000000000ull * phb->phb_id;
+ mm1 = 0x3ff8000000000ull + 0x0200000000ull * phb->chip_id +
+ 0x0080000000ull * phb->phb_id;
+ reg = 0x3fffe40000000ull + 0x0000400000ull * phb->chip_id +
+ 0x0000100000ull * phb->phb_id;
+
+ pbcq->nest_regs[PBCQ_NEST_MMIO_BAR0] = mm0 << 14;
+ pbcq->nest_regs[PBCQ_NEST_MMIO_BAR1] = mm1 << 14;
+ pbcq->nest_regs[PBCQ_NEST_PHB_BAR] = reg << 14;
+ pbcq->nest_regs[PBCQ_NEST_MMIO_MASK0] = 0x3fff000000000ull << 14;
+ pbcq->nest_regs[PBCQ_NEST_MMIO_MASK1] = 0x3ffff80000000ull << 14;
+ pbcq->pci_regs[PBCQ_PCI_BAR2] = reg << 14;
+}
+
+static void pnv_pbcq_realize(DeviceState *dev, Error **errp)
+{
+ PnvPBCQState *pbcq = PNV_PBCQ(dev);
+ PnvPHB3 *phb;
+ char name[32];
+
+ assert(pbcq->phb);
+ phb = pbcq->phb;
+
+ /* TODO: Fix OPAL to do that: establish default BAR values */
+ pnv_pbcq_default_bars(pbcq);
+
+ /* Initialize the XSCOM region for the PBCQ registers */
+ snprintf(name, sizeof(name), "xscom-pbcq-nest-%d.%d",
+ phb->chip_id, phb->phb_id);
+ pnv_xscom_region_init(&pbcq->xscom_nest_regs, OBJECT(dev),
+ &pnv_pbcq_nest_xscom_ops, pbcq, name,
+ PNV_XSCOM_PBCQ_NEST_SIZE);
+ snprintf(name, sizeof(name), "xscom-pbcq-pci-%d.%d",
+ phb->chip_id, phb->phb_id);
+ pnv_xscom_region_init(&pbcq->xscom_pci_regs, OBJECT(dev),
+ &pnv_pbcq_pci_xscom_ops, pbcq, name,
+ PNV_XSCOM_PBCQ_PCI_SIZE);
+ snprintf(name, sizeof(name), "xscom-pbcq-spci-%d.%d",
+ phb->chip_id, phb->phb_id);
+ pnv_xscom_region_init(&pbcq->xscom_spci_regs, OBJECT(dev),
+ &pnv_pbcq_spci_xscom_ops, pbcq, name,
+ PNV_XSCOM_PBCQ_SPCI_SIZE);
+}
+
+static int pnv_pbcq_dt_xscom(PnvXScomInterface *dev, void *fdt,
+ int xscom_offset)
+{
+ const char compat[] = "ibm,power8-pbcq";
+ PnvPHB3 *phb = PNV_PBCQ(dev)->phb;
+ char *name;
+ int offset;
+ uint32_t lpc_pcba = PNV_XSCOM_PBCQ_NEST_BASE + 0x400 * phb->phb_id;
+ uint32_t reg[] = {
+ cpu_to_be32(lpc_pcba),
+ cpu_to_be32(PNV_XSCOM_PBCQ_NEST_SIZE),
+ cpu_to_be32(PNV_XSCOM_PBCQ_PCI_BASE + 0x400 * phb->phb_id),
+ cpu_to_be32(PNV_XSCOM_PBCQ_PCI_SIZE),
+ cpu_to_be32(PNV_XSCOM_PBCQ_SPCI_BASE + 0x040 * phb->phb_id),
+ cpu_to_be32(PNV_XSCOM_PBCQ_SPCI_SIZE)
+ };
+
+ name = g_strdup_printf("pbcq@%x", lpc_pcba);
+ offset = fdt_add_subnode(fdt, xscom_offset, name);
+ _FDT(offset);
+ g_free(name);
+
+ _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))));
+
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,phb-index", phb->phb_id)));
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", phb->chip_id)));
+ _FDT((fdt_setprop(fdt, offset, "compatible", compat,
+ sizeof(compat))));
+ return 0;
+}
+
+static void phb3_pbcq_instance_init(Object *obj)
+{
+ PnvPBCQState *pbcq = PNV_PBCQ(obj);
+
+ object_property_add_link(obj, "phb", TYPE_PNV_PHB3,
+ (Object **)&pbcq->phb,
+ object_property_allow_set_link,
+ OBJ_PROP_LINK_STRONG,
+ &error_abort);
+}
+
+static void pnv_pbcq_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass);
+
+ xdc->dt_xscom = pnv_pbcq_dt_xscom;
+
+ dc->realize = pnv_pbcq_realize;
+ dc->user_creatable = false;
+}
+
+static const TypeInfo pnv_pbcq_type_info = {
+ .name = TYPE_PNV_PBCQ,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(PnvPBCQState),
+ .instance_init = phb3_pbcq_instance_init,
+ .class_init = pnv_pbcq_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_PNV_XSCOM_INTERFACE },
+ { }
+ }
+};
+
+static void pnv_pbcq_register_types(void)
+{
+ type_register_static(&pnv_pbcq_type_info);
+}
+
+type_init(pnv_pbcq_register_types)
diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
new file mode 100644
index 0000000..23cf093
--- /dev/null
+++ b/hw/pci-host/pnv_phb4.c
@@ -0,0 +1,1439 @@
+/*
+ * QEMU PowerPC PowerNV (POWER9) PHB4 model
+ *
+ * Copyright (c) 2018-2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/visitor.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "monitor/monitor.h"
+#include "target/ppc/cpu.h"
+#include "hw/pci-host/pnv_phb4_regs.h"
+#include "hw/pci-host/pnv_phb4.h"
+#include "hw/pci/pcie_host.h"
+#include "hw/pci/pcie_port.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/irq.h"
+#include "hw/qdev-properties.h"
+
+#define phb_error(phb, fmt, ...) \
+ qemu_log_mask(LOG_GUEST_ERROR, "phb4[%d:%d]: " fmt "\n", \
+ (phb)->chip_id, (phb)->phb_id, ## __VA_ARGS__)
+
+/*
+ * QEMU version of the GETFIELD/SETFIELD macros
+ *
+ * These are common with the PnvXive model.
+ */
+static inline uint64_t GETFIELD(uint64_t mask, uint64_t word)
+{
+ return (word & mask) >> ctz64(mask);
+}
+
+static inline uint64_t SETFIELD(uint64_t mask, uint64_t word,
+ uint64_t value)
+{
+ return (word & ~mask) | ((value << ctz64(mask)) & mask);
+}
+
+static PCIDevice *pnv_phb4_find_cfg_dev(PnvPHB4 *phb)
+{
+ PCIHostState *pci = PCI_HOST_BRIDGE(phb);
+ uint64_t addr = phb->regs[PHB_CONFIG_ADDRESS >> 3];
+ uint8_t bus, devfn;
+
+ if (!(addr >> 63)) {
+ return NULL;
+ }
+ bus = (addr >> 52) & 0xff;
+ devfn = (addr >> 44) & 0xff;
+
+ /* We don't access the root complex this way */
+ if (bus == 0 && devfn == 0) {
+ return NULL;
+ }
+ return pci_find_device(pci->bus, bus, devfn);
+}
+
+/*
+ * The CONFIG_DATA register expects little endian accesses, but as the
+ * region is big endian, we have to swap the value.
+ */
+static void pnv_phb4_config_write(PnvPHB4 *phb, unsigned off,
+ unsigned size, uint64_t val)
+{
+ uint32_t cfg_addr, limit;
+ PCIDevice *pdev;
+
+ pdev = pnv_phb4_find_cfg_dev(phb);
+ if (!pdev) {
+ return;
+ }
+ cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
+ cfg_addr |= off;
+ limit = pci_config_size(pdev);
+ if (limit <= cfg_addr) {
+ /*
+ * conventional pci device can be behind pcie-to-pci bridge.
+ * 256 <= addr < 4K has no effects.
+ */
+ return;
+ }
+ switch (size) {
+ case 1:
+ break;
+ case 2:
+ val = bswap16(val);
+ break;
+ case 4:
+ val = bswap32(val);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ pci_host_config_write_common(pdev, cfg_addr, limit, val, size);
+}
+
+static uint64_t pnv_phb4_config_read(PnvPHB4 *phb, unsigned off,
+ unsigned size)
+{
+ uint32_t cfg_addr, limit;
+ PCIDevice *pdev;
+ uint64_t val;
+
+ pdev = pnv_phb4_find_cfg_dev(phb);
+ if (!pdev) {
+ return ~0ull;
+ }
+ cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
+ cfg_addr |= off;
+ limit = pci_config_size(pdev);
+ if (limit <= cfg_addr) {
+ /*
+ * conventional pci device can be behind pcie-to-pci bridge.
+ * 256 <= addr < 4K has no effects.
+ */
+ return ~0ull;
+ }
+ val = pci_host_config_read_common(pdev, cfg_addr, limit, size);
+ switch (size) {
+ case 1:
+ return val;
+ case 2:
+ return bswap16(val);
+ case 4:
+ return bswap32(val);
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/*
+ * Root complex register accesses are memory mapped.
+ */
+static void pnv_phb4_rc_config_write(PnvPHB4 *phb, unsigned off,
+ unsigned size, uint64_t val)
+{
+ PCIHostState *pci = PCI_HOST_BRIDGE(phb);
+ PCIDevice *pdev;
+
+ if (size != 4) {
+ phb_error(phb, "rc_config_write invalid size %d\n", size);
+ return;
+ }
+
+ pdev = pci_find_device(pci->bus, 0, 0);
+ assert(pdev);
+
+ pci_host_config_write_common(pdev, off, PHB_RC_CONFIG_SIZE,
+ bswap32(val), 4);
+}
+
+static uint64_t pnv_phb4_rc_config_read(PnvPHB4 *phb, unsigned off,
+ unsigned size)
+{
+ PCIHostState *pci = PCI_HOST_BRIDGE(phb);
+ PCIDevice *pdev;
+ uint64_t val;
+
+ if (size != 4) {
+ phb_error(phb, "rc_config_read invalid size %d\n", size);
+ return ~0ull;
+ }
+
+ pdev = pci_find_device(pci->bus, 0, 0);
+ assert(pdev);
+
+ val = pci_host_config_read_common(pdev, off, PHB_RC_CONFIG_SIZE, 4);
+ return bswap32(val);
+}
+
+static void pnv_phb4_check_mbt(PnvPHB4 *phb, uint32_t index)
+{
+ uint64_t base, start, size, mbe0, mbe1;
+ MemoryRegion *parent;
+ char name[64];
+
+ /* Unmap first */
+ if (memory_region_is_mapped(&phb->mr_mmio[index])) {
+ /* Should we destroy it in RCU friendly way... ? */
+ memory_region_del_subregion(phb->mr_mmio[index].container,
+ &phb->mr_mmio[index]);
+ }
+
+ /* Get table entry */
+ mbe0 = phb->ioda_MBT[(index << 1)];
+ mbe1 = phb->ioda_MBT[(index << 1) + 1];
+
+ if (!(mbe0 & IODA3_MBT0_ENABLE)) {
+ return;
+ }
+
+ /* Grab geometry from registers */
+ base = GETFIELD(IODA3_MBT0_BASE_ADDR, mbe0) << 12;
+ size = GETFIELD(IODA3_MBT1_MASK, mbe1) << 12;
+ size |= 0xff00000000000000ull;
+ size = ~size + 1;
+
+ /* Calculate PCI side start address based on M32/M64 window type */
+ if (mbe0 & IODA3_MBT0_TYPE_M32) {
+ start = phb->regs[PHB_M32_START_ADDR >> 3];
+ if ((start + size) > 0x100000000ull) {
+ phb_error(phb, "M32 set beyond 4GB boundary !");
+ size = 0x100000000 - start;
+ }
+ } else {
+ start = base | (phb->regs[PHB_M64_UPPER_BITS >> 3]);
+ }
+
+ /* TODO: Figure out how to implemet/decode AOMASK */
+
+ /* Check if it matches an enabled MMIO region in the PEC stack */
+ if (memory_region_is_mapped(&phb->stack->mmbar0) &&
+ base >= phb->stack->mmio0_base &&
+ (base + size) <= (phb->stack->mmio0_base + phb->stack->mmio0_size)) {
+ parent = &phb->stack->mmbar0;
+ base -= phb->stack->mmio0_base;
+ } else if (memory_region_is_mapped(&phb->stack->mmbar1) &&
+ base >= phb->stack->mmio1_base &&
+ (base + size) <= (phb->stack->mmio1_base + phb->stack->mmio1_size)) {
+ parent = &phb->stack->mmbar1;
+ base -= phb->stack->mmio1_base;
+ } else {
+ phb_error(phb, "PHB MBAR %d out of parent bounds", index);
+ return;
+ }
+
+ /* Create alias (better name ?) */
+ snprintf(name, sizeof(name), "phb4-mbar%d", index);
+ memory_region_init_alias(&phb->mr_mmio[index], OBJECT(phb), name,
+ &phb->pci_mmio, start, size);
+ memory_region_add_subregion(parent, base, &phb->mr_mmio[index]);
+}
+
+static void pnv_phb4_check_all_mbt(PnvPHB4 *phb)
+{
+ uint64_t i;
+ uint32_t num_windows = phb->big_phb ? PNV_PHB4_MAX_MMIO_WINDOWS :
+ PNV_PHB4_MIN_MMIO_WINDOWS;
+
+ for (i = 0; i < num_windows; i++) {
+ pnv_phb4_check_mbt(phb, i);
+ }
+}
+
+static uint64_t *pnv_phb4_ioda_access(PnvPHB4 *phb,
+ unsigned *out_table, unsigned *out_idx)
+{
+ uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
+ unsigned int index = GETFIELD(PHB_IODA_AD_TADR, adreg);
+ unsigned int table = GETFIELD(PHB_IODA_AD_TSEL, adreg);
+ unsigned int mask;
+ uint64_t *tptr = NULL;
+
+ switch (table) {
+ case IODA3_TBL_LIST:
+ tptr = phb->ioda_LIST;
+ mask = 7;
+ break;
+ case IODA3_TBL_MIST:
+ tptr = phb->ioda_MIST;
+ mask = phb->big_phb ? PNV_PHB4_MAX_MIST : (PNV_PHB4_MAX_MIST >> 1);
+ mask -= 1;
+ break;
+ case IODA3_TBL_RCAM:
+ mask = phb->big_phb ? 127 : 63;
+ break;
+ case IODA3_TBL_MRT:
+ mask = phb->big_phb ? 15 : 7;
+ break;
+ case IODA3_TBL_PESTA:
+ case IODA3_TBL_PESTB:
+ mask = phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
+ mask -= 1;
+ break;
+ case IODA3_TBL_TVT:
+ tptr = phb->ioda_TVT;
+ mask = phb->big_phb ? PNV_PHB4_MAX_TVEs : (PNV_PHB4_MAX_TVEs >> 1);
+ mask -= 1;
+ break;
+ case IODA3_TBL_TCR:
+ case IODA3_TBL_TDR:
+ mask = phb->big_phb ? 1023 : 511;
+ break;
+ case IODA3_TBL_MBT:
+ tptr = phb->ioda_MBT;
+ mask = phb->big_phb ? PNV_PHB4_MAX_MBEs : (PNV_PHB4_MAX_MBEs >> 1);
+ mask -= 1;
+ break;
+ case IODA3_TBL_MDT:
+ tptr = phb->ioda_MDT;
+ mask = phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
+ mask -= 1;
+ break;
+ case IODA3_TBL_PEEV:
+ tptr = phb->ioda_PEEV;
+ mask = phb->big_phb ? PNV_PHB4_MAX_PEEVs : (PNV_PHB4_MAX_PEEVs >> 1);
+ mask -= 1;
+ break;
+ default:
+ phb_error(phb, "invalid IODA table %d", table);
+ return NULL;
+ }
+ index &= mask;
+ if (out_idx) {
+ *out_idx = index;
+ }
+ if (out_table) {
+ *out_table = table;
+ }
+ if (tptr) {
+ tptr += index;
+ }
+ if (adreg & PHB_IODA_AD_AUTOINC) {
+ index = (index + 1) & mask;
+ adreg = SETFIELD(PHB_IODA_AD_TADR, adreg, index);
+ }
+
+ phb->regs[PHB_IODA_ADDR >> 3] = adreg;
+ return tptr;
+}
+
+static uint64_t pnv_phb4_ioda_read(PnvPHB4 *phb)
+{
+ unsigned table, idx;
+ uint64_t *tptr;
+
+ tptr = pnv_phb4_ioda_access(phb, &table, &idx);
+ if (!tptr) {
+ /* Special PESTA case */
+ if (table == IODA3_TBL_PESTA) {
+ return ((uint64_t)(phb->ioda_PEST_AB[idx] & 1)) << 63;
+ } else if (table == IODA3_TBL_PESTB) {
+ return ((uint64_t)(phb->ioda_PEST_AB[idx] & 2)) << 62;
+ }
+ /* Return 0 on unsupported tables, not ff's */
+ return 0;
+ }
+ return *tptr;
+}
+
+static void pnv_phb4_ioda_write(PnvPHB4 *phb, uint64_t val)
+{
+ unsigned table, idx;
+ uint64_t *tptr;
+
+ tptr = pnv_phb4_ioda_access(phb, &table, &idx);
+ if (!tptr) {
+ /* Special PESTA case */
+ if (table == IODA3_TBL_PESTA) {
+ phb->ioda_PEST_AB[idx] &= ~1;
+ phb->ioda_PEST_AB[idx] |= (val >> 63) & 1;
+ } else if (table == IODA3_TBL_PESTB) {
+ phb->ioda_PEST_AB[idx] &= ~2;
+ phb->ioda_PEST_AB[idx] |= (val >> 62) & 2;
+ }
+ return;
+ }
+
+ /* Handle side effects */
+ switch (table) {
+ case IODA3_TBL_LIST:
+ break;
+ case IODA3_TBL_MIST: {
+ /* Special mask for MIST partial write */
+ uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
+ uint32_t mmask = GETFIELD(PHB_IODA_AD_MIST_PWV, adreg);
+ uint64_t v = *tptr;
+ if (mmask == 0) {
+ mmask = 0xf;
+ }
+ if (mmask & 8) {
+ v &= 0x0000ffffffffffffull;
+ v |= 0xcfff000000000000ull & val;
+ }
+ if (mmask & 4) {
+ v &= 0xffff0000ffffffffull;
+ v |= 0x0000cfff00000000ull & val;
+ }
+ if (mmask & 2) {
+ v &= 0xffffffff0000ffffull;
+ v |= 0x00000000cfff0000ull & val;
+ }
+ if (mmask & 1) {
+ v &= 0xffffffffffff0000ull;
+ v |= 0x000000000000cfffull & val;
+ }
+ *tptr = val;
+ break;
+ }
+ case IODA3_TBL_MBT:
+ *tptr = val;
+
+ /* Copy accross the valid bit to the other half */
+ phb->ioda_MBT[idx ^ 1] &= 0x7fffffffffffffffull;
+ phb->ioda_MBT[idx ^ 1] |= 0x8000000000000000ull & val;
+
+ /* Update mappings */
+ pnv_phb4_check_mbt(phb, idx >> 1);
+ break;
+ default:
+ *tptr = val;
+ }
+}
+
+static void pnv_phb4_rtc_invalidate(PnvPHB4 *phb, uint64_t val)
+{
+ PnvPhb4DMASpace *ds;
+
+ /* Always invalidate all for now ... */
+ QLIST_FOREACH(ds, &phb->dma_spaces, list) {
+ ds->pe_num = PHB_INVALID_PE;
+ }
+}
+
+static void pnv_phb4_update_msi_regions(PnvPhb4DMASpace *ds)
+{
+ uint64_t cfg = ds->phb->regs[PHB_PHB4_CONFIG >> 3];
+
+ if (cfg & PHB_PHB4C_32BIT_MSI_EN) {
+ if (!memory_region_is_mapped(MEMORY_REGION(&ds->msi32_mr))) {
+ memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
+ 0xffff0000, &ds->msi32_mr);
+ }
+ } else {
+ if (memory_region_is_mapped(MEMORY_REGION(&ds->msi32_mr))) {
+ memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
+ &ds->msi32_mr);
+ }
+ }
+
+ if (cfg & PHB_PHB4C_64BIT_MSI_EN) {
+ if (!memory_region_is_mapped(MEMORY_REGION(&ds->msi64_mr))) {
+ memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
+ (1ull << 60), &ds->msi64_mr);
+ }
+ } else {
+ if (memory_region_is_mapped(MEMORY_REGION(&ds->msi64_mr))) {
+ memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
+ &ds->msi64_mr);
+ }
+ }
+}
+
+static void pnv_phb4_update_all_msi_regions(PnvPHB4 *phb)
+{
+ PnvPhb4DMASpace *ds;
+
+ QLIST_FOREACH(ds, &phb->dma_spaces, list) {
+ pnv_phb4_update_msi_regions(ds);
+ }
+}
+
+static void pnv_phb4_update_xsrc(PnvPHB4 *phb)
+{
+ int shift, flags, i, lsi_base;
+ XiveSource *xsrc = &phb->xsrc;
+
+ /* The XIVE source characteristics can be set at run time */
+ if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_PGSZ_64K) {
+ shift = XIVE_ESB_64K;
+ } else {
+ shift = XIVE_ESB_4K;
+ }
+ if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_STORE_EOI) {
+ flags = XIVE_SRC_STORE_EOI;
+ } else {
+ flags = 0;
+ }
+
+ phb->xsrc.esb_shift = shift;
+ phb->xsrc.esb_flags = flags;
+
+ lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]);
+ lsi_base <<= 3;
+
+ /* TODO: handle reset values of PHB_LSI_SRC_ID */
+ if (!lsi_base) {
+ return;
+ }
+
+ /* TODO: need a xive_source_irq_reset_lsi() */
+ bitmap_zero(xsrc->lsi_map, xsrc->nr_irqs);
+
+ for (i = 0; i < xsrc->nr_irqs; i++) {
+ bool msi = (i < lsi_base || i >= (lsi_base + 8));
+ if (!msi) {
+ xive_source_irq_set_lsi(xsrc, i);
+ }
+ }
+}
+
+static void pnv_phb4_reg_write(void *opaque, hwaddr off, uint64_t val,
+ unsigned size)
+{
+ PnvPHB4 *phb = PNV_PHB4(opaque);
+ bool changed;
+
+ /* Special case outbound configuration data */
+ if ((off & 0xfffc) == PHB_CONFIG_DATA) {
+ pnv_phb4_config_write(phb, off & 0x3, size, val);
+ return;
+ }
+
+ /* Special case RC configuration space */
+ if ((off & 0xf800) == PHB_RC_CONFIG_BASE) {
+ pnv_phb4_rc_config_write(phb, off & 0x7ff, size, val);
+ return;
+ }
+
+ /* Other registers are 64-bit only */
+ if (size != 8 || off & 0x7) {
+ phb_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
+ off, size);
+ return;
+ }
+
+ /* Handle masking */
+ switch (off) {
+ case PHB_LSI_SOURCE_ID:
+ val &= PHB_LSI_SRC_ID;
+ break;
+ case PHB_M64_UPPER_BITS:
+ val &= 0xff00000000000000ull;
+ break;
+ /* TCE Kill */
+ case PHB_TCE_KILL:
+ /* Clear top 3 bits which HW does to indicate successful queuing */
+ val &= ~(PHB_TCE_KILL_ALL | PHB_TCE_KILL_PE | PHB_TCE_KILL_ONE);
+ break;
+ case PHB_Q_DMA_R:
+ /*
+ * This is enough logic to make SW happy but we aren't
+ * actually quiescing the DMAs
+ */
+ if (val & PHB_Q_DMA_R_AUTORESET) {
+ val = 0;
+ } else {
+ val &= PHB_Q_DMA_R_QUIESCE_DMA;
+ }
+ break;
+ /* LEM stuff */
+ case PHB_LEM_FIR_AND_MASK:
+ phb->regs[PHB_LEM_FIR_ACCUM >> 3] &= val;
+ return;
+ case PHB_LEM_FIR_OR_MASK:
+ phb->regs[PHB_LEM_FIR_ACCUM >> 3] |= val;
+ return;
+ case PHB_LEM_ERROR_AND_MASK:
+ phb->regs[PHB_LEM_ERROR_MASK >> 3] &= val;
+ return;
+ case PHB_LEM_ERROR_OR_MASK:
+ phb->regs[PHB_LEM_ERROR_MASK >> 3] |= val;
+ return;
+ case PHB_LEM_WOF:
+ val = 0;
+ break;
+ /* TODO: More regs ..., maybe create a table with masks... */
+
+ /* Read only registers */
+ case PHB_CPU_LOADSTORE_STATUS:
+ case PHB_ETU_ERR_SUMMARY:
+ case PHB_PHB4_GEN_CAP:
+ case PHB_PHB4_TCE_CAP:
+ case PHB_PHB4_IRQ_CAP:
+ case PHB_PHB4_EEH_CAP:
+ return;
+ }
+
+ /* Record whether it changed */
+ changed = phb->regs[off >> 3] != val;
+
+ /* Store in register cache first */
+ phb->regs[off >> 3] = val;
+
+ /* Handle side effects */
+ switch (off) {
+ case PHB_PHB4_CONFIG:
+ if (changed) {
+ pnv_phb4_update_all_msi_regions(phb);
+ }
+ break;
+ case PHB_M32_START_ADDR:
+ case PHB_M64_UPPER_BITS:
+ if (changed) {
+ pnv_phb4_check_all_mbt(phb);
+ }
+ break;
+
+ /* IODA table accesses */
+ case PHB_IODA_DATA0:
+ pnv_phb4_ioda_write(phb, val);
+ break;
+
+ /* RTC invalidation */
+ case PHB_RTC_INVALIDATE:
+ pnv_phb4_rtc_invalidate(phb, val);
+ break;
+
+ /* PHB Control (Affects XIVE source) */
+ case PHB_CTRLR:
+ case PHB_LSI_SOURCE_ID:
+ pnv_phb4_update_xsrc(phb);
+ break;
+
+ /* Silent simple writes */
+ case PHB_ASN_CMPM:
+ case PHB_CONFIG_ADDRESS:
+ case PHB_IODA_ADDR:
+ case PHB_TCE_KILL:
+ case PHB_TCE_SPEC_CTL:
+ case PHB_PEST_BAR:
+ case PHB_PELTV_BAR:
+ case PHB_RTT_BAR:
+ case PHB_LEM_FIR_ACCUM:
+ case PHB_LEM_ERROR_MASK:
+ case PHB_LEM_ACTION0:
+ case PHB_LEM_ACTION1:
+ case PHB_TCE_TAG_ENABLE:
+ case PHB_INT_NOTIFY_ADDR:
+ case PHB_INT_NOTIFY_INDEX:
+ case PHB_DMARD_SYNC:
+ break;
+
+ /* Noise on anything else */
+ default:
+ qemu_log_mask(LOG_UNIMP, "phb4: reg_write 0x%"PRIx64"=%"PRIx64"\n",
+ off, val);
+ }
+}
+
+static uint64_t pnv_phb4_reg_read(void *opaque, hwaddr off, unsigned size)
+{
+ PnvPHB4 *phb = PNV_PHB4(opaque);
+ uint64_t val;
+
+ if ((off & 0xfffc) == PHB_CONFIG_DATA) {
+ return pnv_phb4_config_read(phb, off & 0x3, size);
+ }
+
+ /* Special case RC configuration space */
+ if ((off & 0xf800) == PHB_RC_CONFIG_BASE) {
+ return pnv_phb4_rc_config_read(phb, off & 0x7ff, size);
+ }
+
+ /* Other registers are 64-bit only */
+ if (size != 8 || off & 0x7) {
+ phb_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
+ off, size);
+ return ~0ull;
+ }
+
+ /* Default read from cache */
+ val = phb->regs[off >> 3];
+
+ switch (off) {
+ case PHB_VERSION:
+ return phb->version;
+
+ /* Read-only */
+ case PHB_PHB4_GEN_CAP:
+ return 0xe4b8000000000000ull;
+ case PHB_PHB4_TCE_CAP:
+ return phb->big_phb ? 0x4008440000000400ull : 0x2008440000000200ull;
+ case PHB_PHB4_IRQ_CAP:
+ return phb->big_phb ? 0x0800000000001000ull : 0x0800000000000800ull;
+ case PHB_PHB4_EEH_CAP:
+ return phb->big_phb ? 0x2000000000000000ull : 0x1000000000000000ull;
+
+ /* IODA table accesses */
+ case PHB_IODA_DATA0:
+ return pnv_phb4_ioda_read(phb);
+
+ /* Link training always appears trained */
+ case PHB_PCIE_DLP_TRAIN_CTL:
+ /* TODO: Do something sensible with speed ? */
+ return PHB_PCIE_DLP_INBAND_PRESENCE | PHB_PCIE_DLP_TL_LINKACT;
+
+ /* DMA read sync: make it look like it's complete */
+ case PHB_DMARD_SYNC:
+ return PHB_DMARD_SYNC_COMPLETE;
+
+ /* Silent simple reads */
+ case PHB_LSI_SOURCE_ID:
+ case PHB_CPU_LOADSTORE_STATUS:
+ case PHB_ASN_CMPM:
+ case PHB_PHB4_CONFIG:
+ case PHB_M32_START_ADDR:
+ case PHB_CONFIG_ADDRESS:
+ case PHB_IODA_ADDR:
+ case PHB_RTC_INVALIDATE:
+ case PHB_TCE_KILL:
+ case PHB_TCE_SPEC_CTL:
+ case PHB_PEST_BAR:
+ case PHB_PELTV_BAR:
+ case PHB_RTT_BAR:
+ case PHB_M64_UPPER_BITS:
+ case PHB_CTRLR:
+ case PHB_LEM_FIR_ACCUM:
+ case PHB_LEM_ERROR_MASK:
+ case PHB_LEM_ACTION0:
+ case PHB_LEM_ACTION1:
+ case PHB_TCE_TAG_ENABLE:
+ case PHB_INT_NOTIFY_ADDR:
+ case PHB_INT_NOTIFY_INDEX:
+ case PHB_Q_DMA_R:
+ case PHB_ETU_ERR_SUMMARY:
+ break;
+
+ /* Noise on anything else */
+ default:
+ qemu_log_mask(LOG_UNIMP, "phb4: reg_read 0x%"PRIx64"=%"PRIx64"\n",
+ off, val);
+ }
+ return val;
+}
+
+static const MemoryRegionOps pnv_phb4_reg_ops = {
+ .read = pnv_phb4_reg_read,
+ .write = pnv_phb4_reg_write,
+ .valid.min_access_size = 1,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 1,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static uint64_t pnv_phb4_xscom_read(void *opaque, hwaddr addr, unsigned size)
+{
+ PnvPHB4 *phb = PNV_PHB4(opaque);
+ uint32_t reg = addr >> 3;
+ uint64_t val;
+ hwaddr offset;
+
+ switch (reg) {
+ case PHB_SCOM_HV_IND_ADDR:
+ return phb->scom_hv_ind_addr_reg;
+
+ case PHB_SCOM_HV_IND_DATA:
+ if (!(phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_VALID)) {
+ phb_error(phb, "Invalid indirect address");
+ return ~0ull;
+ }
+ size = (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_4B) ? 4 : 8;
+ offset = GETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, phb->scom_hv_ind_addr_reg);
+ val = pnv_phb4_reg_read(phb, offset, size);
+ if (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_AUTOINC) {
+ offset += size;
+ offset &= 0x3fff;
+ phb->scom_hv_ind_addr_reg = SETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR,
+ phb->scom_hv_ind_addr_reg,
+ offset);
+ }
+ return val;
+ case PHB_SCOM_ETU_LEM_FIR:
+ case PHB_SCOM_ETU_LEM_FIR_AND:
+ case PHB_SCOM_ETU_LEM_FIR_OR:
+ case PHB_SCOM_ETU_LEM_FIR_MSK:
+ case PHB_SCOM_ETU_LEM_ERR_MSK_AND:
+ case PHB_SCOM_ETU_LEM_ERR_MSK_OR:
+ case PHB_SCOM_ETU_LEM_ACT0:
+ case PHB_SCOM_ETU_LEM_ACT1:
+ case PHB_SCOM_ETU_LEM_WOF:
+ offset = ((reg - PHB_SCOM_ETU_LEM_FIR) << 3) + PHB_LEM_FIR_ACCUM;
+ return pnv_phb4_reg_read(phb, offset, size);
+ case PHB_SCOM_ETU_PMON_CONFIG:
+ case PHB_SCOM_ETU_PMON_CTR0:
+ case PHB_SCOM_ETU_PMON_CTR1:
+ case PHB_SCOM_ETU_PMON_CTR2:
+ case PHB_SCOM_ETU_PMON_CTR3:
+ offset = ((reg - PHB_SCOM_ETU_PMON_CONFIG) << 3) + PHB_PERFMON_CONFIG;
+ return pnv_phb4_reg_read(phb, offset, size);
+
+ default:
+ qemu_log_mask(LOG_UNIMP, "phb4: xscom_read 0x%"HWADDR_PRIx"\n", addr);
+ return ~0ull;
+ }
+}
+
+static void pnv_phb4_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvPHB4 *phb = PNV_PHB4(opaque);
+ uint32_t reg = addr >> 3;
+ hwaddr offset;
+
+ switch (reg) {
+ case PHB_SCOM_HV_IND_ADDR:
+ phb->scom_hv_ind_addr_reg = val & 0xe000000000001fff;
+ break;
+ case PHB_SCOM_HV_IND_DATA:
+ if (!(phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_VALID)) {
+ phb_error(phb, "Invalid indirect address");
+ break;
+ }
+ size = (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_4B) ? 4 : 8;
+ offset = GETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, phb->scom_hv_ind_addr_reg);
+ pnv_phb4_reg_write(phb, offset, val, size);
+ if (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_AUTOINC) {
+ offset += size;
+ offset &= 0x3fff;
+ phb->scom_hv_ind_addr_reg = SETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR,
+ phb->scom_hv_ind_addr_reg,
+ offset);
+ }
+ break;
+ case PHB_SCOM_ETU_LEM_FIR:
+ case PHB_SCOM_ETU_LEM_FIR_AND:
+ case PHB_SCOM_ETU_LEM_FIR_OR:
+ case PHB_SCOM_ETU_LEM_FIR_MSK:
+ case PHB_SCOM_ETU_LEM_ERR_MSK_AND:
+ case PHB_SCOM_ETU_LEM_ERR_MSK_OR:
+ case PHB_SCOM_ETU_LEM_ACT0:
+ case PHB_SCOM_ETU_LEM_ACT1:
+ case PHB_SCOM_ETU_LEM_WOF:
+ offset = ((reg - PHB_SCOM_ETU_LEM_FIR) << 3) + PHB_LEM_FIR_ACCUM;
+ pnv_phb4_reg_write(phb, offset, val, size);
+ break;
+ case PHB_SCOM_ETU_PMON_CONFIG:
+ case PHB_SCOM_ETU_PMON_CTR0:
+ case PHB_SCOM_ETU_PMON_CTR1:
+ case PHB_SCOM_ETU_PMON_CTR2:
+ case PHB_SCOM_ETU_PMON_CTR3:
+ offset = ((reg - PHB_SCOM_ETU_PMON_CONFIG) << 3) + PHB_PERFMON_CONFIG;
+ pnv_phb4_reg_write(phb, offset, val, size);
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "phb4: xscom_write 0x%"HWADDR_PRIx
+ "=%"PRIx64"\n", addr, val);
+ }
+}
+
+const MemoryRegionOps pnv_phb4_xscom_ops = {
+ .read = pnv_phb4_xscom_read,
+ .write = pnv_phb4_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static int pnv_phb4_map_irq(PCIDevice *pci_dev, int irq_num)
+{
+ /* Check that out properly ... */
+ return irq_num & 3;
+}
+
+static void pnv_phb4_set_irq(void *opaque, int irq_num, int level)
+{
+ PnvPHB4 *phb = PNV_PHB4(opaque);
+ uint32_t lsi_base;
+
+ /* LSI only ... */
+ if (irq_num > 3) {
+ phb_error(phb, "IRQ %x is not an LSI", irq_num);
+ }
+ lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]);
+ lsi_base <<= 3;
+ qemu_set_irq(phb->qirqs[lsi_base + irq_num], level);
+}
+
+static bool pnv_phb4_resolve_pe(PnvPhb4DMASpace *ds)
+{
+ uint64_t rtt, addr;
+ uint16_t rte;
+ int bus_num;
+ int num_PEs;
+
+ /* Already resolved ? */
+ if (ds->pe_num != PHB_INVALID_PE) {
+ return true;
+ }
+
+ /* We need to lookup the RTT */
+ rtt = ds->phb->regs[PHB_RTT_BAR >> 3];
+ if (!(rtt & PHB_RTT_BAR_ENABLE)) {
+ phb_error(ds->phb, "DMA with RTT BAR disabled !");
+ /* Set error bits ? fence ? ... */
+ return false;
+ }
+
+ /* Read RTE */
+ bus_num = pci_bus_num(ds->bus);
+ addr = rtt & PHB_RTT_BASE_ADDRESS_MASK;
+ addr += 2 * ((bus_num << 8) | ds->devfn);
+ if (dma_memory_read(&address_space_memory, addr, &rte, sizeof(rte))) {
+ phb_error(ds->phb, "Failed to read RTT entry at 0x%"PRIx64, addr);
+ /* Set error bits ? fence ? ... */
+ return false;
+ }
+ rte = be16_to_cpu(rte);
+
+ /* Fail upon reading of invalid PE# */
+ num_PEs = ds->phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
+ if (rte >= num_PEs) {
+ phb_error(ds->phb, "RTE for RID 0x%x invalid (%04x", ds->devfn, rte);
+ rte &= num_PEs - 1;
+ }
+ ds->pe_num = rte;
+ return true;
+}
+
+static void pnv_phb4_translate_tve(PnvPhb4DMASpace *ds, hwaddr addr,
+ bool is_write, uint64_t tve,
+ IOMMUTLBEntry *tlb)
+{
+ uint64_t tta = GETFIELD(IODA3_TVT_TABLE_ADDR, tve);
+ int32_t lev = GETFIELD(IODA3_TVT_NUM_LEVELS, tve);
+ uint32_t tts = GETFIELD(IODA3_TVT_TCE_TABLE_SIZE, tve);
+ uint32_t tps = GETFIELD(IODA3_TVT_IO_PSIZE, tve);
+
+ /* Invalid levels */
+ if (lev > 4) {
+ phb_error(ds->phb, "Invalid #levels in TVE %d", lev);
+ return;
+ }
+
+ /* Invalid entry */
+ if (tts == 0) {
+ phb_error(ds->phb, "Access to invalid TVE");
+ return;
+ }
+
+ /* IO Page Size of 0 means untranslated, else use TCEs */
+ if (tps == 0) {
+ /* TODO: Handle boundaries */
+
+ /* Use 4k pages like q35 ... for now */
+ tlb->iova = addr & 0xfffffffffffff000ull;
+ tlb->translated_addr = addr & 0x0003fffffffff000ull;
+ tlb->addr_mask = 0xfffull;
+ tlb->perm = IOMMU_RW;
+ } else {
+ uint32_t tce_shift, tbl_shift, sh;
+ uint64_t base, taddr, tce, tce_mask;
+
+ /* Address bits per bottom level TCE entry */
+ tce_shift = tps + 11;
+
+ /* Address bits per table level */
+ tbl_shift = tts + 8;
+
+ /* Top level table base address */
+ base = tta << 12;
+
+ /* Total shift to first level */
+ sh = tbl_shift * lev + tce_shift;
+
+ /* TODO: Limit to support IO page sizes */
+
+ /* TODO: Multi-level untested */
+ while ((lev--) >= 0) {
+ /* Grab the TCE address */
+ taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3);
+ if (dma_memory_read(&address_space_memory, taddr, &tce,
+ sizeof(tce))) {
+ phb_error(ds->phb, "Failed to read TCE at 0x%"PRIx64, taddr);
+ return;
+ }
+ tce = be64_to_cpu(tce);
+
+ /* Check permission for indirect TCE */
+ if ((lev >= 0) && !(tce & 3)) {
+ phb_error(ds->phb, "Invalid indirect TCE at 0x%"PRIx64, taddr);
+ phb_error(ds->phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
+ is_write ? 'W' : 'R', tve);
+ phb_error(ds->phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
+ tta, lev, tts, tps);
+ return;
+ }
+ sh -= tbl_shift;
+ base = tce & ~0xfffull;
+ }
+
+ /* We exit the loop with TCE being the final TCE */
+ tce_mask = ~((1ull << tce_shift) - 1);
+ tlb->iova = addr & tce_mask;
+ tlb->translated_addr = tce & tce_mask;
+ tlb->addr_mask = ~tce_mask;
+ tlb->perm = tce & 3;
+ if ((is_write & !(tce & 2)) || ((!is_write) && !(tce & 1))) {
+ phb_error(ds->phb, "TCE access fault at 0x%"PRIx64, taddr);
+ phb_error(ds->phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
+ is_write ? 'W' : 'R', tve);
+ phb_error(ds->phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
+ tta, lev, tts, tps);
+ }
+ }
+}
+
+static IOMMUTLBEntry pnv_phb4_translate_iommu(IOMMUMemoryRegion *iommu,
+ hwaddr addr,
+ IOMMUAccessFlags flag,
+ int iommu_idx)
+{
+ PnvPhb4DMASpace *ds = container_of(iommu, PnvPhb4DMASpace, dma_mr);
+ int tve_sel;
+ uint64_t tve, cfg;
+ IOMMUTLBEntry ret = {
+ .target_as = &address_space_memory,
+ .iova = addr,
+ .translated_addr = 0,
+ .addr_mask = ~(hwaddr)0,
+ .perm = IOMMU_NONE,
+ };
+
+ /* Resolve PE# */
+ if (!pnv_phb4_resolve_pe(ds)) {
+ phb_error(ds->phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
+ ds->bus, pci_bus_num(ds->bus), ds->devfn);
+ return ret;
+ }
+
+ /* Check top bits */
+ switch (addr >> 60) {
+ case 00:
+ /* DMA or 32-bit MSI ? */
+ cfg = ds->phb->regs[PHB_PHB4_CONFIG >> 3];
+ if ((cfg & PHB_PHB4C_32BIT_MSI_EN) &&
+ ((addr & 0xffffffffffff0000ull) == 0xffff0000ull)) {
+ phb_error(ds->phb, "xlate on 32-bit MSI region");
+ return ret;
+ }
+ /* Choose TVE XXX Use PHB4 Control Register */
+ tve_sel = (addr >> 59) & 1;
+ tve = ds->phb->ioda_TVT[ds->pe_num * 2 + tve_sel];
+ pnv_phb4_translate_tve(ds, addr, flag & IOMMU_WO, tve, &ret);
+ break;
+ case 01:
+ phb_error(ds->phb, "xlate on 64-bit MSI region");
+ break;
+ default:
+ phb_error(ds->phb, "xlate on unsupported address 0x%"PRIx64, addr);
+ }
+ return ret;
+}
+
+#define TYPE_PNV_PHB4_IOMMU_MEMORY_REGION "pnv-phb4-iommu-memory-region"
+#define PNV_PHB4_IOMMU_MEMORY_REGION(obj) \
+ OBJECT_CHECK(IOMMUMemoryRegion, (obj), TYPE_PNV_PHB4_IOMMU_MEMORY_REGION)
+
+static void pnv_phb4_iommu_memory_region_class_init(ObjectClass *klass,
+ void *data)
+{
+ IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
+
+ imrc->translate = pnv_phb4_translate_iommu;
+}
+
+static const TypeInfo pnv_phb4_iommu_memory_region_info = {
+ .parent = TYPE_IOMMU_MEMORY_REGION,
+ .name = TYPE_PNV_PHB4_IOMMU_MEMORY_REGION,
+ .class_init = pnv_phb4_iommu_memory_region_class_init,
+};
+
+/*
+ * MSI/MSIX memory region implementation.
+ * The handler handles both MSI and MSIX.
+ */
+static void pnv_phb4_msi_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ PnvPhb4DMASpace *ds = opaque;
+ PnvPHB4 *phb = ds->phb;
+
+ uint32_t src = ((addr >> 4) & 0xffff) | (data & 0x1f);
+
+ /* Resolve PE# */
+ if (!pnv_phb4_resolve_pe(ds)) {
+ phb_error(phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
+ ds->bus, pci_bus_num(ds->bus), ds->devfn);
+ return;
+ }
+
+ /* TODO: Check it doesn't collide with LSIs */
+ if (src >= phb->xsrc.nr_irqs) {
+ phb_error(phb, "MSI %d out of bounds", src);
+ return;
+ }
+
+ /* TODO: check PE/MSI assignement */
+
+ qemu_irq_pulse(phb->qirqs[src]);
+}
+
+/* There is no .read as the read result is undefined by PCI spec */
+static uint64_t pnv_phb4_msi_read(void *opaque, hwaddr addr, unsigned size)
+{
+ PnvPhb4DMASpace *ds = opaque;
+
+ phb_error(ds->phb, "Invalid MSI read @ 0x%" HWADDR_PRIx, addr);
+ return -1;
+}
+
+static const MemoryRegionOps pnv_phb4_msi_ops = {
+ .read = pnv_phb4_msi_read,
+ .write = pnv_phb4_msi_write,
+ .endianness = DEVICE_LITTLE_ENDIAN
+};
+
+static PnvPhb4DMASpace *pnv_phb4_dma_find(PnvPHB4 *phb, PCIBus *bus, int devfn)
+{
+ PnvPhb4DMASpace *ds;
+
+ QLIST_FOREACH(ds, &phb->dma_spaces, list) {
+ if (ds->bus == bus && ds->devfn == devfn) {
+ break;
+ }
+ }
+ return ds;
+}
+
+static AddressSpace *pnv_phb4_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+ PnvPHB4 *phb = opaque;
+ PnvPhb4DMASpace *ds;
+ char name[32];
+
+ ds = pnv_phb4_dma_find(phb, bus, devfn);
+
+ if (ds == NULL) {
+ ds = g_malloc0(sizeof(PnvPhb4DMASpace));
+ ds->bus = bus;
+ ds->devfn = devfn;
+ ds->pe_num = PHB_INVALID_PE;
+ ds->phb = phb;
+ snprintf(name, sizeof(name), "phb4-%d.%d-iommu", phb->chip_id,
+ phb->phb_id);
+ memory_region_init_iommu(&ds->dma_mr, sizeof(ds->dma_mr),
+ TYPE_PNV_PHB4_IOMMU_MEMORY_REGION,
+ OBJECT(phb), name, UINT64_MAX);
+ address_space_init(&ds->dma_as, MEMORY_REGION(&ds->dma_mr),
+ name);
+ memory_region_init_io(&ds->msi32_mr, OBJECT(phb), &pnv_phb4_msi_ops,
+ ds, "msi32", 0x10000);
+ memory_region_init_io(&ds->msi64_mr, OBJECT(phb), &pnv_phb4_msi_ops,
+ ds, "msi64", 0x100000);
+ pnv_phb4_update_msi_regions(ds);
+
+ QLIST_INSERT_HEAD(&phb->dma_spaces, ds, list);
+ }
+ return &ds->dma_as;
+}
+
+static void pnv_phb4_instance_init(Object *obj)
+{
+ PnvPHB4 *phb = PNV_PHB4(obj);
+
+ QLIST_INIT(&phb->dma_spaces);
+
+ /* XIVE interrupt source object */
+ object_initialize_child(obj, "source", &phb->xsrc, sizeof(XiveSource),
+ TYPE_XIVE_SOURCE, &error_abort, NULL);
+
+ /* Root Port */
+ object_initialize_child(obj, "root", &phb->root, sizeof(phb->root),
+ TYPE_PNV_PHB4_ROOT_PORT, &error_abort, NULL);
+
+ qdev_prop_set_int32(DEVICE(&phb->root), "addr", PCI_DEVFN(0, 0));
+ qdev_prop_set_bit(DEVICE(&phb->root), "multifunction", false);
+}
+
+static void pnv_phb4_realize(DeviceState *dev, Error **errp)
+{
+ PnvPHB4 *phb = PNV_PHB4(dev);
+ PCIHostState *pci = PCI_HOST_BRIDGE(dev);
+ XiveSource *xsrc = &phb->xsrc;
+ Error *local_err = NULL;
+ int nr_irqs;
+ char name[32];
+
+ assert(phb->stack);
+
+ /* Set the "big_phb" flag */
+ phb->big_phb = phb->phb_id == 0 || phb->phb_id == 3;
+
+ /* Controller Registers */
+ snprintf(name, sizeof(name), "phb4-%d.%d-regs", phb->chip_id,
+ phb->phb_id);
+ memory_region_init_io(&phb->mr_regs, OBJECT(phb), &pnv_phb4_reg_ops, phb,
+ name, 0x2000);
+
+ /*
+ * PHB4 doesn't support IO space. However, qemu gets very upset if
+ * we don't have an IO region to anchor IO BARs onto so we just
+ * initialize one which we never hook up to anything
+ */
+
+ snprintf(name, sizeof(name), "phb4-%d.%d-pci-io", phb->chip_id,
+ phb->phb_id);
+ memory_region_init(&phb->pci_io, OBJECT(phb), name, 0x10000);
+
+ snprintf(name, sizeof(name), "phb4-%d.%d-pci-mmio", phb->chip_id,
+ phb->phb_id);
+ memory_region_init(&phb->pci_mmio, OBJECT(phb), name,
+ PCI_MMIO_TOTAL_SIZE);
+
+ pci->bus = pci_register_root_bus(dev, "root-bus",
+ pnv_phb4_set_irq, pnv_phb4_map_irq, phb,
+ &phb->pci_mmio, &phb->pci_io,
+ 0, 4, TYPE_PNV_PHB4_ROOT_BUS);
+ pci_setup_iommu(pci->bus, pnv_phb4_dma_iommu, phb);
+
+ /* Add a single Root port */
+ qdev_prop_set_uint8(DEVICE(&phb->root), "chassis", phb->chip_id);
+ qdev_prop_set_uint16(DEVICE(&phb->root), "slot", phb->phb_id);
+ qdev_set_parent_bus(DEVICE(&phb->root), BUS(pci->bus));
+ qdev_init_nofail(DEVICE(&phb->root));
+
+ /* Setup XIVE Source */
+ if (phb->big_phb) {
+ nr_irqs = PNV_PHB4_MAX_INTs;
+ } else {
+ nr_irqs = PNV_PHB4_MAX_INTs >> 1;
+ }
+ object_property_set_int(OBJECT(xsrc), nr_irqs, "nr-irqs", &error_fatal);
+ object_property_set_link(OBJECT(xsrc), OBJECT(phb), "xive", &error_fatal);
+ object_property_set_bool(OBJECT(xsrc), true, "realized", &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ pnv_phb4_update_xsrc(phb);
+
+ phb->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc, xsrc->nr_irqs);
+}
+
+static void pnv_phb4_reset(DeviceState *dev)
+{
+ PnvPHB4 *phb = PNV_PHB4(dev);
+ PCIDevice *root_dev = PCI_DEVICE(&phb->root);
+
+ /*
+ * Configure PCI device id at reset using a property.
+ */
+ pci_config_set_vendor_id(root_dev->config, PCI_VENDOR_ID_IBM);
+ pci_config_set_device_id(root_dev->config, phb->device_id);
+}
+
+static const char *pnv_phb4_root_bus_path(PCIHostState *host_bridge,
+ PCIBus *rootbus)
+{
+ PnvPHB4 *phb = PNV_PHB4(host_bridge);
+
+ snprintf(phb->bus_path, sizeof(phb->bus_path), "00%02x:%02x",
+ phb->chip_id, phb->phb_id);
+ return phb->bus_path;
+}
+
+static void pnv_phb4_xive_notify(XiveNotifier *xf, uint32_t srcno)
+{
+ PnvPHB4 *phb = PNV_PHB4(xf);
+ uint64_t notif_port = phb->regs[PHB_INT_NOTIFY_ADDR >> 3];
+ uint32_t offset = phb->regs[PHB_INT_NOTIFY_INDEX >> 3];
+ uint64_t data = XIVE_TRIGGER_PQ | offset | srcno;
+ MemTxResult result;
+
+ address_space_stq_be(&address_space_memory, notif_port, data,
+ MEMTXATTRS_UNSPECIFIED, &result);
+ if (result != MEMTX_OK) {
+ phb_error(phb, "trigger failed @%"HWADDR_PRIx "\n", notif_port);
+ return;
+ }
+}
+
+static Property pnv_phb4_properties[] = {
+ DEFINE_PROP_UINT32("index", PnvPHB4, phb_id, 0),
+ DEFINE_PROP_UINT32("chip-id", PnvPHB4, chip_id, 0),
+ DEFINE_PROP_UINT64("version", PnvPHB4, version, 0),
+ DEFINE_PROP_UINT16("device-id", PnvPHB4, device_id, 0),
+ DEFINE_PROP_LINK("stack", PnvPHB4, stack, TYPE_PNV_PHB4_PEC_STACK,
+ PnvPhb4PecStack *),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_phb4_class_init(ObjectClass *klass, void *data)
+{
+ PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ XiveNotifierClass *xfc = XIVE_NOTIFIER_CLASS(klass);
+
+ hc->root_bus_path = pnv_phb4_root_bus_path;
+ dc->realize = pnv_phb4_realize;
+ device_class_set_props(dc, pnv_phb4_properties);
+ set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+ dc->user_creatable = false;
+ dc->reset = pnv_phb4_reset;
+
+ xfc->notify = pnv_phb4_xive_notify;
+}
+
+static const TypeInfo pnv_phb4_type_info = {
+ .name = TYPE_PNV_PHB4,
+ .parent = TYPE_PCIE_HOST_BRIDGE,
+ .instance_init = pnv_phb4_instance_init,
+ .instance_size = sizeof(PnvPHB4),
+ .class_init = pnv_phb4_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_XIVE_NOTIFIER },
+ { },
+ }
+};
+
+static void pnv_phb4_root_bus_class_init(ObjectClass *klass, void *data)
+{
+ BusClass *k = BUS_CLASS(klass);
+
+ /*
+ * PHB4 has only a single root complex. Enforce the limit on the
+ * parent bus
+ */
+ k->max_dev = 1;
+}
+
+static const TypeInfo pnv_phb4_root_bus_info = {
+ .name = TYPE_PNV_PHB4_ROOT_BUS,
+ .parent = TYPE_PCIE_BUS,
+ .class_init = pnv_phb4_root_bus_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { INTERFACE_PCIE_DEVICE },
+ { }
+ },
+};
+
+static void pnv_phb4_root_port_reset(DeviceState *dev)
+{
+ PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
+ PCIDevice *d = PCI_DEVICE(dev);
+ uint8_t *conf = d->config;
+
+ rpc->parent_reset(dev);
+
+ pci_byte_test_and_set_mask(conf + PCI_IO_BASE,
+ PCI_IO_RANGE_MASK & 0xff);
+ pci_byte_test_and_clear_mask(conf + PCI_IO_LIMIT,
+ PCI_IO_RANGE_MASK & 0xff);
+ pci_set_word(conf + PCI_MEMORY_BASE, 0);
+ pci_set_word(conf + PCI_MEMORY_LIMIT, 0xfff0);
+ pci_set_word(conf + PCI_PREF_MEMORY_BASE, 0x1);
+ pci_set_word(conf + PCI_PREF_MEMORY_LIMIT, 0xfff1);
+ pci_set_long(conf + PCI_PREF_BASE_UPPER32, 0x1); /* Hack */
+ pci_set_long(conf + PCI_PREF_LIMIT_UPPER32, 0xffffffff);
+}
+
+static void pnv_phb4_root_port_realize(DeviceState *dev, Error **errp)
+{
+ PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
+ Error *local_err = NULL;
+
+ rpc->parent_realize(dev, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+}
+
+static void pnv_phb4_root_port_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+ PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass);
+
+ dc->desc = "IBM PHB4 PCIE Root Port";
+ dc->user_creatable = false;
+
+ device_class_set_parent_realize(dc, pnv_phb4_root_port_realize,
+ &rpc->parent_realize);
+ device_class_set_parent_reset(dc, pnv_phb4_root_port_reset,
+ &rpc->parent_reset);
+
+ k->vendor_id = PCI_VENDOR_ID_IBM;
+ k->device_id = PNV_PHB4_DEVICE_ID;
+ k->revision = 0;
+
+ rpc->exp_offset = 0x48;
+ rpc->aer_offset = 0x100;
+
+ dc->reset = &pnv_phb4_root_port_reset;
+}
+
+static const TypeInfo pnv_phb4_root_port_info = {
+ .name = TYPE_PNV_PHB4_ROOT_PORT,
+ .parent = TYPE_PCIE_ROOT_PORT,
+ .instance_size = sizeof(PnvPHB4RootPort),
+ .class_init = pnv_phb4_root_port_class_init,
+};
+
+static void pnv_phb4_register_types(void)
+{
+ type_register_static(&pnv_phb4_root_bus_info);
+ type_register_static(&pnv_phb4_root_port_info);
+ type_register_static(&pnv_phb4_type_info);
+ type_register_static(&pnv_phb4_iommu_memory_region_info);
+}
+
+type_init(pnv_phb4_register_types);
+
+void pnv_phb4_update_regions(PnvPhb4PecStack *stack)
+{
+ PnvPHB4 *phb = &stack->phb;
+
+ /* Unmap first always */
+ if (memory_region_is_mapped(&phb->mr_regs)) {
+ memory_region_del_subregion(&stack->phbbar, &phb->mr_regs);
+ }
+ if (memory_region_is_mapped(&phb->xsrc.esb_mmio)) {
+ memory_region_del_subregion(&stack->intbar, &phb->xsrc.esb_mmio);
+ }
+
+ /* Map registers if enabled */
+ if (memory_region_is_mapped(&stack->phbbar)) {
+ memory_region_add_subregion(&stack->phbbar, 0, &phb->mr_regs);
+ }
+
+ /* Map ESB if enabled */
+ if (memory_region_is_mapped(&stack->intbar)) {
+ memory_region_add_subregion(&stack->intbar, 0, &phb->xsrc.esb_mmio);
+ }
+
+ /* Check/update m32 */
+ pnv_phb4_check_all_mbt(phb);
+}
+
+void pnv_phb4_pic_print_info(PnvPHB4 *phb, Monitor *mon)
+{
+ uint32_t offset = phb->regs[PHB_INT_NOTIFY_INDEX >> 3];
+
+ monitor_printf(mon, "PHB4[%x:%x] Source %08x .. %08x\n",
+ phb->chip_id, phb->phb_id,
+ offset, offset + phb->xsrc.nr_irqs - 1);
+ xive_source_pic_print_info(&phb->xsrc, 0, mon);
+}
diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c
new file mode 100644
index 0000000..68e1db3
--- /dev/null
+++ b/hw/pci-host/pnv_phb4_pec.c
@@ -0,0 +1,595 @@
+/*
+ * QEMU PowerPC PowerNV (POWER9) PHB4 model
+ *
+ * Copyright (c) 2018-2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "qemu/log.h"
+#include "target/ppc/cpu.h"
+#include "hw/ppc/fdt.h"
+#include "hw/pci-host/pnv_phb4_regs.h"
+#include "hw/pci-host/pnv_phb4.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/pci/pci_bridge.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/ppc/pnv.h"
+#include "hw/qdev-properties.h"
+
+#include <libfdt.h>
+
+#define phb_pec_error(pec, fmt, ...) \
+ qemu_log_mask(LOG_GUEST_ERROR, "phb4_pec[%d:%d]: " fmt "\n", \
+ (pec)->chip_id, (pec)->index, ## __VA_ARGS__)
+
+
+static uint64_t pnv_pec_nest_xscom_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ PnvPhb4PecState *pec = PNV_PHB4_PEC(opaque);
+ uint32_t reg = addr >> 3;
+
+ /* TODO: add list of allowed registers and error out if not */
+ return pec->nest_regs[reg];
+}
+
+static void pnv_pec_nest_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvPhb4PecState *pec = PNV_PHB4_PEC(opaque);
+ uint32_t reg = addr >> 3;
+
+ switch (reg) {
+ case PEC_NEST_PBCQ_HW_CONFIG:
+ case PEC_NEST_DROP_PRIO_CTRL:
+ case PEC_NEST_PBCQ_ERR_INJECT:
+ case PEC_NEST_PCI_NEST_CLK_TRACE_CTL:
+ case PEC_NEST_PBCQ_PMON_CTRL:
+ case PEC_NEST_PBCQ_PBUS_ADDR_EXT:
+ case PEC_NEST_PBCQ_PRED_VEC_TIMEOUT:
+ case PEC_NEST_CAPP_CTRL:
+ case PEC_NEST_PBCQ_READ_STK_OVR:
+ case PEC_NEST_PBCQ_WRITE_STK_OVR:
+ case PEC_NEST_PBCQ_STORE_STK_OVR:
+ case PEC_NEST_PBCQ_RETRY_BKOFF_CTRL:
+ pec->nest_regs[reg] = val;
+ break;
+ default:
+ phb_pec_error(pec, "%s @0x%"HWADDR_PRIx"=%"PRIx64"\n", __func__,
+ addr, val);
+ }
+}
+
+static const MemoryRegionOps pnv_pec_nest_xscom_ops = {
+ .read = pnv_pec_nest_xscom_read,
+ .write = pnv_pec_nest_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static uint64_t pnv_pec_pci_xscom_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ PnvPhb4PecState *pec = PNV_PHB4_PEC(opaque);
+ uint32_t reg = addr >> 3;
+
+ /* TODO: add list of allowed registers and error out if not */
+ return pec->pci_regs[reg];
+}
+
+static void pnv_pec_pci_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvPhb4PecState *pec = PNV_PHB4_PEC(opaque);
+ uint32_t reg = addr >> 3;
+
+ switch (reg) {
+ case PEC_PCI_PBAIB_HW_CONFIG:
+ case PEC_PCI_PBAIB_READ_STK_OVR:
+ pec->pci_regs[reg] = val;
+ break;
+ default:
+ phb_pec_error(pec, "%s @0x%"HWADDR_PRIx"=%"PRIx64"\n", __func__,
+ addr, val);
+ }
+}
+
+static const MemoryRegionOps pnv_pec_pci_xscom_ops = {
+ .read = pnv_pec_pci_xscom_read,
+ .write = pnv_pec_pci_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static uint64_t pnv_pec_stk_nest_xscom_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque);
+ uint32_t reg = addr >> 3;
+
+ /* TODO: add list of allowed registers and error out if not */
+ return stack->nest_regs[reg];
+}
+
+static void pnv_pec_stk_update_map(PnvPhb4PecStack *stack)
+{
+ PnvPhb4PecState *pec = stack->pec;
+ MemoryRegion *sysmem = pec->system_memory;
+ uint64_t bar_en = stack->nest_regs[PEC_NEST_STK_BAR_EN];
+ uint64_t bar, mask, size;
+ char name[64];
+
+ /*
+ * NOTE: This will really not work well if those are remapped
+ * after the PHB has created its sub regions. We could do better
+ * if we had a way to resize regions but we don't really care
+ * that much in practice as the stuff below really only happens
+ * once early during boot
+ */
+
+ /* Handle unmaps */
+ if (memory_region_is_mapped(&stack->mmbar0) &&
+ !(bar_en & PEC_NEST_STK_BAR_EN_MMIO0)) {
+ memory_region_del_subregion(sysmem, &stack->mmbar0);
+ }
+ if (memory_region_is_mapped(&stack->mmbar1) &&
+ !(bar_en & PEC_NEST_STK_BAR_EN_MMIO1)) {
+ memory_region_del_subregion(sysmem, &stack->mmbar1);
+ }
+ if (memory_region_is_mapped(&stack->phbbar) &&
+ !(bar_en & PEC_NEST_STK_BAR_EN_PHB)) {
+ memory_region_del_subregion(sysmem, &stack->phbbar);
+ }
+ if (memory_region_is_mapped(&stack->intbar) &&
+ !(bar_en & PEC_NEST_STK_BAR_EN_INT)) {
+ memory_region_del_subregion(sysmem, &stack->intbar);
+ }
+
+ /* Update PHB */
+ pnv_phb4_update_regions(stack);
+
+ /* Handle maps */
+ if (!memory_region_is_mapped(&stack->mmbar0) &&
+ (bar_en & PEC_NEST_STK_BAR_EN_MMIO0)) {
+ bar = stack->nest_regs[PEC_NEST_STK_MMIO_BAR0] >> 8;
+ mask = stack->nest_regs[PEC_NEST_STK_MMIO_BAR0_MASK];
+ size = ((~mask) >> 8) + 1;
+ snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-mmio0",
+ pec->chip_id, pec->index, stack->stack_no);
+ memory_region_init(&stack->mmbar0, OBJECT(stack), name, size);
+ memory_region_add_subregion(sysmem, bar, &stack->mmbar0);
+ stack->mmio0_base = bar;
+ stack->mmio0_size = size;
+ }
+ if (!memory_region_is_mapped(&stack->mmbar1) &&
+ (bar_en & PEC_NEST_STK_BAR_EN_MMIO1)) {
+ bar = stack->nest_regs[PEC_NEST_STK_MMIO_BAR1] >> 8;
+ mask = stack->nest_regs[PEC_NEST_STK_MMIO_BAR1_MASK];
+ size = ((~mask) >> 8) + 1;
+ snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-mmio1",
+ pec->chip_id, pec->index, stack->stack_no);
+ memory_region_init(&stack->mmbar1, OBJECT(stack), name, size);
+ memory_region_add_subregion(sysmem, bar, &stack->mmbar1);
+ stack->mmio1_base = bar;
+ stack->mmio1_size = size;
+ }
+ if (!memory_region_is_mapped(&stack->phbbar) &&
+ (bar_en & PEC_NEST_STK_BAR_EN_PHB)) {
+ bar = stack->nest_regs[PEC_NEST_STK_PHB_REGS_BAR] >> 8;
+ size = PNV_PHB4_NUM_REGS << 3;
+ snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-phb",
+ pec->chip_id, pec->index, stack->stack_no);
+ memory_region_init(&stack->phbbar, OBJECT(stack), name, size);
+ memory_region_add_subregion(sysmem, bar, &stack->phbbar);
+ }
+ if (!memory_region_is_mapped(&stack->intbar) &&
+ (bar_en & PEC_NEST_STK_BAR_EN_INT)) {
+ bar = stack->nest_regs[PEC_NEST_STK_INT_BAR] >> 8;
+ size = PNV_PHB4_MAX_INTs << 16;
+ snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-int",
+ stack->pec->chip_id, stack->pec->index, stack->stack_no);
+ memory_region_init(&stack->intbar, OBJECT(stack), name, size);
+ memory_region_add_subregion(sysmem, bar, &stack->intbar);
+ }
+
+ /* Update PHB */
+ pnv_phb4_update_regions(stack);
+}
+
+static void pnv_pec_stk_nest_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque);
+ PnvPhb4PecState *pec = stack->pec;
+ uint32_t reg = addr >> 3;
+
+ switch (reg) {
+ case PEC_NEST_STK_PCI_NEST_FIR:
+ stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] = val;
+ break;
+ case PEC_NEST_STK_PCI_NEST_FIR_CLR:
+ stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] &= val;
+ break;
+ case PEC_NEST_STK_PCI_NEST_FIR_SET:
+ stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] |= val;
+ break;
+ case PEC_NEST_STK_PCI_NEST_FIR_MSK:
+ stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] = val;
+ break;
+ case PEC_NEST_STK_PCI_NEST_FIR_MSKC:
+ stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] &= val;
+ break;
+ case PEC_NEST_STK_PCI_NEST_FIR_MSKS:
+ stack->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] |= val;
+ break;
+ case PEC_NEST_STK_PCI_NEST_FIR_ACT0:
+ case PEC_NEST_STK_PCI_NEST_FIR_ACT1:
+ stack->nest_regs[reg] = val;
+ break;
+ case PEC_NEST_STK_PCI_NEST_FIR_WOF:
+ stack->nest_regs[reg] = 0;
+ break;
+ case PEC_NEST_STK_ERR_REPORT_0:
+ case PEC_NEST_STK_ERR_REPORT_1:
+ case PEC_NEST_STK_PBCQ_GNRL_STATUS:
+ /* Flag error ? */
+ break;
+ case PEC_NEST_STK_PBCQ_MODE:
+ stack->nest_regs[reg] = val & 0xff00000000000000ull;
+ break;
+ case PEC_NEST_STK_MMIO_BAR0:
+ case PEC_NEST_STK_MMIO_BAR0_MASK:
+ case PEC_NEST_STK_MMIO_BAR1:
+ case PEC_NEST_STK_MMIO_BAR1_MASK:
+ if (stack->nest_regs[PEC_NEST_STK_BAR_EN] &
+ (PEC_NEST_STK_BAR_EN_MMIO0 |
+ PEC_NEST_STK_BAR_EN_MMIO1)) {
+ phb_pec_error(pec, "Changing enabled BAR unsupported\n");
+ }
+ stack->nest_regs[reg] = val & 0xffffffffff000000ull;
+ break;
+ case PEC_NEST_STK_PHB_REGS_BAR:
+ if (stack->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_PHB) {
+ phb_pec_error(pec, "Changing enabled BAR unsupported\n");
+ }
+ stack->nest_regs[reg] = val & 0xffffffffffc00000ull;
+ break;
+ case PEC_NEST_STK_INT_BAR:
+ if (stack->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_INT) {
+ phb_pec_error(pec, "Changing enabled BAR unsupported\n");
+ }
+ stack->nest_regs[reg] = val & 0xfffffff000000000ull;
+ break;
+ case PEC_NEST_STK_BAR_EN:
+ stack->nest_regs[reg] = val & 0xf000000000000000ull;
+ pnv_pec_stk_update_map(stack);
+ break;
+ case PEC_NEST_STK_DATA_FRZ_TYPE:
+ case PEC_NEST_STK_PBCQ_TUN_BAR:
+ /* Not used for now */
+ stack->nest_regs[reg] = val;
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "phb4_pec: nest_xscom_write 0x%"HWADDR_PRIx
+ "=%"PRIx64"\n", addr, val);
+ }
+}
+
+static const MemoryRegionOps pnv_pec_stk_nest_xscom_ops = {
+ .read = pnv_pec_stk_nest_xscom_read,
+ .write = pnv_pec_stk_nest_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static uint64_t pnv_pec_stk_pci_xscom_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque);
+ uint32_t reg = addr >> 3;
+
+ /* TODO: add list of allowed registers and error out if not */
+ return stack->pci_regs[reg];
+}
+
+static void pnv_pec_stk_pci_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(opaque);
+ uint32_t reg = addr >> 3;
+
+ switch (reg) {
+ case PEC_PCI_STK_PCI_FIR:
+ stack->nest_regs[reg] = val;
+ break;
+ case PEC_PCI_STK_PCI_FIR_CLR:
+ stack->nest_regs[PEC_PCI_STK_PCI_FIR] &= val;
+ break;
+ case PEC_PCI_STK_PCI_FIR_SET:
+ stack->nest_regs[PEC_PCI_STK_PCI_FIR] |= val;
+ break;
+ case PEC_PCI_STK_PCI_FIR_MSK:
+ stack->nest_regs[reg] = val;
+ break;
+ case PEC_PCI_STK_PCI_FIR_MSKC:
+ stack->nest_regs[PEC_PCI_STK_PCI_FIR_MSK] &= val;
+ break;
+ case PEC_PCI_STK_PCI_FIR_MSKS:
+ stack->nest_regs[PEC_PCI_STK_PCI_FIR_MSK] |= val;
+ break;
+ case PEC_PCI_STK_PCI_FIR_ACT0:
+ case PEC_PCI_STK_PCI_FIR_ACT1:
+ stack->nest_regs[reg] = val;
+ break;
+ case PEC_PCI_STK_PCI_FIR_WOF:
+ stack->nest_regs[reg] = 0;
+ break;
+ case PEC_PCI_STK_ETU_RESET:
+ stack->nest_regs[reg] = val & 0x8000000000000000ull;
+ /* TODO: Implement reset */
+ break;
+ case PEC_PCI_STK_PBAIB_ERR_REPORT:
+ break;
+ case PEC_PCI_STK_PBAIB_TX_CMD_CRED:
+ case PEC_PCI_STK_PBAIB_TX_DAT_CRED:
+ stack->nest_regs[reg] = val;
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "phb4_pec_stk: pci_xscom_write 0x%"HWADDR_PRIx
+ "=%"PRIx64"\n", addr, val);
+ }
+}
+
+static const MemoryRegionOps pnv_pec_stk_pci_xscom_ops = {
+ .read = pnv_pec_stk_pci_xscom_read,
+ .write = pnv_pec_stk_pci_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_pec_instance_init(Object *obj)
+{
+ PnvPhb4PecState *pec = PNV_PHB4_PEC(obj);
+ int i;
+
+ for (i = 0; i < PHB4_PEC_MAX_STACKS; i++) {
+ object_initialize_child(obj, "stack[*]", &pec->stacks[i],
+ sizeof(pec->stacks[i]), TYPE_PNV_PHB4_PEC_STACK,
+ &error_abort, NULL);
+ }
+}
+
+static void pnv_pec_realize(DeviceState *dev, Error **errp)
+{
+ PnvPhb4PecState *pec = PNV_PHB4_PEC(dev);
+ Error *local_err = NULL;
+ char name[64];
+ int i;
+
+ assert(pec->system_memory);
+
+ /* Create stacks */
+ for (i = 0; i < pec->num_stacks; i++) {
+ PnvPhb4PecStack *stack = &pec->stacks[i];
+ Object *stk_obj = OBJECT(stack);
+
+ object_property_set_int(stk_obj, i, "stack-no", &error_abort);
+ object_property_set_link(stk_obj, OBJECT(pec), "pec", &error_abort);
+ object_property_set_bool(stk_obj, true, "realized", errp);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
+
+ /* Initialize the XSCOM regions for the PEC registers */
+ snprintf(name, sizeof(name), "xscom-pec-%d.%d-nest", pec->chip_id,
+ pec->index);
+ pnv_xscom_region_init(&pec->nest_regs_mr, OBJECT(dev),
+ &pnv_pec_nest_xscom_ops, pec, name,
+ PHB4_PEC_NEST_REGS_COUNT);
+
+ snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci", pec->chip_id,
+ pec->index);
+ pnv_xscom_region_init(&pec->pci_regs_mr, OBJECT(dev),
+ &pnv_pec_pci_xscom_ops, pec, name,
+ PHB4_PEC_PCI_REGS_COUNT);
+}
+
+static int pnv_pec_dt_xscom(PnvXScomInterface *dev, void *fdt,
+ int xscom_offset)
+{
+ PnvPhb4PecState *pec = PNV_PHB4_PEC(dev);
+ PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(dev);
+ uint32_t nbase = pecc->xscom_nest_base(pec);
+ uint32_t pbase = pecc->xscom_pci_base(pec);
+ int offset, i;
+ char *name;
+ uint32_t reg[] = {
+ cpu_to_be32(nbase),
+ cpu_to_be32(pecc->xscom_nest_size),
+ cpu_to_be32(pbase),
+ cpu_to_be32(pecc->xscom_pci_size),
+ };
+
+ name = g_strdup_printf("pbcq@%x", nbase);
+ offset = fdt_add_subnode(fdt, xscom_offset, name);
+ _FDT(offset);
+ g_free(name);
+
+ _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))));
+
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,pec-index", pec->index)));
+ _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 1)));
+ _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0)));
+ _FDT((fdt_setprop(fdt, offset, "compatible", pecc->compat,
+ pecc->compat_size)));
+
+ for (i = 0; i < pec->num_stacks; i++) {
+ PnvPhb4PecStack *stack = &pec->stacks[i];
+ PnvPHB4 *phb = &stack->phb;
+ int stk_offset;
+
+ name = g_strdup_printf("stack@%x", i);
+ stk_offset = fdt_add_subnode(fdt, offset, name);
+ _FDT(stk_offset);
+ g_free(name);
+ _FDT((fdt_setprop(fdt, stk_offset, "compatible", pecc->stk_compat,
+ pecc->stk_compat_size)));
+ _FDT((fdt_setprop_cell(fdt, stk_offset, "reg", i)));
+ _FDT((fdt_setprop_cell(fdt, stk_offset, "ibm,phb-index", phb->phb_id)));
+ }
+
+ return 0;
+}
+
+static Property pnv_pec_properties[] = {
+ DEFINE_PROP_UINT32("index", PnvPhb4PecState, index, 0),
+ DEFINE_PROP_UINT32("num-stacks", PnvPhb4PecState, num_stacks, 0),
+ DEFINE_PROP_UINT32("chip-id", PnvPhb4PecState, chip_id, 0),
+ DEFINE_PROP_LINK("system-memory", PnvPhb4PecState, system_memory,
+ TYPE_MEMORY_REGION, MemoryRegion *),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static uint32_t pnv_pec_xscom_pci_base(PnvPhb4PecState *pec)
+{
+ return PNV9_XSCOM_PEC_PCI_BASE + 0x1000000 * pec->index;
+}
+
+static uint32_t pnv_pec_xscom_nest_base(PnvPhb4PecState *pec)
+{
+ return PNV9_XSCOM_PEC_NEST_BASE + 0x400 * pec->index;
+}
+
+static void pnv_pec_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass);
+ PnvPhb4PecClass *pecc = PNV_PHB4_PEC_CLASS(klass);
+ static const char compat[] = "ibm,power9-pbcq";
+ static const char stk_compat[] = "ibm,power9-phb-stack";
+
+ xdc->dt_xscom = pnv_pec_dt_xscom;
+
+ dc->realize = pnv_pec_realize;
+ device_class_set_props(dc, pnv_pec_properties);
+ dc->user_creatable = false;
+
+ pecc->xscom_nest_base = pnv_pec_xscom_nest_base;
+ pecc->xscom_pci_base = pnv_pec_xscom_pci_base;
+ pecc->xscom_nest_size = PNV9_XSCOM_PEC_NEST_SIZE;
+ pecc->xscom_pci_size = PNV9_XSCOM_PEC_PCI_SIZE;
+ pecc->compat = compat;
+ pecc->compat_size = sizeof(compat);
+ pecc->stk_compat = stk_compat;
+ pecc->stk_compat_size = sizeof(stk_compat);
+}
+
+static const TypeInfo pnv_pec_type_info = {
+ .name = TYPE_PNV_PHB4_PEC,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(PnvPhb4PecState),
+ .instance_init = pnv_pec_instance_init,
+ .class_init = pnv_pec_class_init,
+ .class_size = sizeof(PnvPhb4PecClass),
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_PNV_XSCOM_INTERFACE },
+ { }
+ }
+};
+
+static void pnv_pec_stk_instance_init(Object *obj)
+{
+ PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(obj);
+
+ object_initialize_child(obj, "phb", &stack->phb, sizeof(stack->phb),
+ TYPE_PNV_PHB4, &error_abort, NULL);
+}
+
+static void pnv_pec_stk_realize(DeviceState *dev, Error **errp)
+{
+ PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(dev);
+ PnvPhb4PecState *pec = stack->pec;
+ char name[64];
+
+ assert(pec);
+
+ /* Initialize the XSCOM regions for the stack registers */
+ snprintf(name, sizeof(name), "xscom-pec-%d.%d-nest-stack-%d",
+ pec->chip_id, pec->index, stack->stack_no);
+ pnv_xscom_region_init(&stack->nest_regs_mr, OBJECT(stack),
+ &pnv_pec_stk_nest_xscom_ops, stack, name,
+ PHB4_PEC_NEST_STK_REGS_COUNT);
+
+ snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci-stack-%d",
+ pec->chip_id, pec->index, stack->stack_no);
+ pnv_xscom_region_init(&stack->pci_regs_mr, OBJECT(stack),
+ &pnv_pec_stk_pci_xscom_ops, stack, name,
+ PHB4_PEC_PCI_STK_REGS_COUNT);
+
+ /* PHB pass-through */
+ snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci-stack-%d-phb",
+ pec->chip_id, pec->index, stack->stack_no);
+ pnv_xscom_region_init(&stack->phb_regs_mr, OBJECT(&stack->phb),
+ &pnv_phb4_xscom_ops, &stack->phb, name, 0x40);
+
+ /*
+ * Let the machine/chip realize the PHB object to customize more
+ * easily some fields
+ */
+}
+
+static Property pnv_pec_stk_properties[] = {
+ DEFINE_PROP_UINT32("stack-no", PnvPhb4PecStack, stack_no, 0),
+ DEFINE_PROP_LINK("pec", PnvPhb4PecStack, pec, TYPE_PNV_PHB4_PEC,
+ PnvPhb4PecState *),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_pec_stk_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ device_class_set_props(dc, pnv_pec_stk_properties);
+ dc->realize = pnv_pec_stk_realize;
+ dc->user_creatable = false;
+
+ /* TODO: reset regs ? */
+}
+
+static const TypeInfo pnv_pec_stk_type_info = {
+ .name = TYPE_PNV_PHB4_PEC_STACK,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(PnvPhb4PecStack),
+ .instance_init = pnv_pec_stk_instance_init,
+ .class_init = pnv_pec_stk_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_PNV_XSCOM_INTERFACE },
+ { }
+ }
+};
+
+static void pnv_pec_register_types(void)
+{
+ type_register_static(&pnv_pec_type_info);
+ type_register_static(&pnv_pec_stk_type_info);
+}
+
+type_init(pnv_pec_register_types);
diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
index e27efe9..354828b 100644
--- a/hw/ppc/Kconfig
+++ b/hw/ppc/Kconfig
@@ -135,6 +135,8 @@
default y
depends on PSERIES
select XIVE
+ select PCI
+ select PCIE_PORT
config XIVE_KVM
bool
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index e61994c..139c857 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -40,6 +40,7 @@
#include "hw/intc/intc.h"
#include "hw/ipmi/ipmi.h"
#include "target/ppc/mmu-hash64.h"
+#include "hw/pci/msi.h"
#include "hw/ppc/xics.h"
#include "hw/qdev-properties.h"
@@ -615,16 +616,29 @@
static void pnv_chip_power8_pic_print_info(PnvChip *chip, Monitor *mon)
{
Pnv8Chip *chip8 = PNV8_CHIP(chip);
+ int i;
ics_pic_print_info(&chip8->psi.ics, mon);
+ for (i = 0; i < chip->num_phbs; i++) {
+ pnv_phb3_msi_pic_print_info(&chip8->phbs[i].msis, mon);
+ ics_pic_print_info(&chip8->phbs[i].lsis, mon);
+ }
}
static void pnv_chip_power9_pic_print_info(PnvChip *chip, Monitor *mon)
{
Pnv9Chip *chip9 = PNV9_CHIP(chip);
+ int i, j;
pnv_xive_pic_print_info(&chip9->xive, mon);
pnv_psi_pic_print_info(&chip9->psi, mon);
+
+ for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) {
+ PnvPhb4PecState *pec = &chip9->pecs[i];
+ for (j = 0; j < pec->num_stacks; j++) {
+ pnv_phb4_pic_print_info(&pec->stacks[j].phb, mon);
+ }
+ }
}
static uint64_t pnv_chip_power8_xscom_core_base(PnvChip *chip,
@@ -716,7 +730,7 @@
exit(1);
}
- fw_size = load_image_targphys(fw_filename, FW_LOAD_ADDR, FW_MAX_SIZE);
+ fw_size = load_image_targphys(fw_filename, pnv->fw_load_addr, FW_MAX_SIZE);
if (fw_size < 0) {
error_report("Could not load OPAL firmware '%s'", fw_filename);
exit(1);
@@ -748,6 +762,9 @@
}
}
+ /* MSIs are supported on this platform */
+ msi_nonbroken = true;
+
/*
* Check compatibility of the specified CPU with the machine
* default.
@@ -1014,7 +1031,10 @@
static void pnv_chip_power8_instance_init(Object *obj)
{
+ PnvChip *chip = PNV_CHIP(obj);
Pnv8Chip *chip8 = PNV8_CHIP(obj);
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj);
+ int i;
object_property_add_link(obj, "xics", TYPE_XICS_FABRIC,
(Object **)&chip8->xics,
@@ -1033,6 +1053,17 @@
object_initialize_child(obj, "homer", &chip8->homer, sizeof(chip8->homer),
TYPE_PNV8_HOMER, &error_abort, NULL);
+
+ for (i = 0; i < pcc->num_phbs; i++) {
+ object_initialize_child(obj, "phb[*]", &chip8->phbs[i],
+ sizeof(chip8->phbs[i]), TYPE_PNV_PHB3,
+ &error_abort, NULL);
+ }
+
+ /*
+ * Number of PHBs is the chip default
+ */
+ chip->num_phbs = pcc->num_phbs;
}
static void pnv_chip_icp_realize(Pnv8Chip *chip8, Error **errp)
@@ -1071,6 +1102,7 @@
Pnv8Chip *chip8 = PNV8_CHIP(dev);
Pnv8Psi *psi8 = &chip8->psi;
Error *local_err = NULL;
+ int i;
assert(chip8->xics);
@@ -1151,6 +1183,33 @@
/* Homer mmio region */
memory_region_add_subregion(get_system_memory(), PNV_HOMER_BASE(chip),
&chip8->homer.regs);
+
+ /* PHB3 controllers */
+ for (i = 0; i < chip->num_phbs; i++) {
+ PnvPHB3 *phb = &chip8->phbs[i];
+ PnvPBCQState *pbcq = &phb->pbcq;
+
+ object_property_set_int(OBJECT(phb), i, "index", &error_fatal);
+ object_property_set_int(OBJECT(phb), chip->chip_id, "chip-id",
+ &error_fatal);
+ object_property_set_bool(OBJECT(phb), true, "realized", &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ qdev_set_parent_bus(DEVICE(phb), sysbus_get_default());
+
+ /* Populate the XSCOM address space. */
+ pnv_xscom_add_subregion(chip,
+ PNV_XSCOM_PBCQ_NEST_BASE + 0x400 * phb->phb_id,
+ &pbcq->xscom_nest_regs);
+ pnv_xscom_add_subregion(chip,
+ PNV_XSCOM_PBCQ_PCI_BASE + 0x400 * phb->phb_id,
+ &pbcq->xscom_pci_regs);
+ pnv_xscom_add_subregion(chip,
+ PNV_XSCOM_PBCQ_SPCI_BASE + 0x040 * phb->phb_id,
+ &pbcq->xscom_spci_regs);
+ }
}
static uint32_t pnv_chip_power8_xscom_pcba(PnvChip *chip, uint64_t addr)
@@ -1166,6 +1225,7 @@
k->chip_cfam_id = 0x221ef04980000000ull; /* P8 Murano DD2.1 */
k->cores_mask = POWER8E_CORE_MASK;
+ k->num_phbs = 3;
k->core_pir = pnv_chip_core_pir_p8;
k->intc_create = pnv_chip_power8_intc_create;
k->intc_reset = pnv_chip_power8_intc_reset;
@@ -1189,6 +1249,7 @@
k->chip_cfam_id = 0x220ea04980000000ull; /* P8 Venice DD2.0 */
k->cores_mask = POWER8_CORE_MASK;
+ k->num_phbs = 3;
k->core_pir = pnv_chip_core_pir_p8;
k->intc_create = pnv_chip_power8_intc_create;
k->intc_reset = pnv_chip_power8_intc_reset;
@@ -1212,6 +1273,7 @@
k->chip_cfam_id = 0x120d304980000000ull; /* P8 Naples DD1.0 */
k->cores_mask = POWER8_CORE_MASK;
+ k->num_phbs = 3;
k->core_pir = pnv_chip_core_pir_p8;
k->intc_create = pnv_chip_power8_intc_create;
k->intc_reset = pnv_chip_power8_intc_reset;
@@ -1230,7 +1292,10 @@
static void pnv_chip_power9_instance_init(Object *obj)
{
+ PnvChip *chip = PNV_CHIP(obj);
Pnv9Chip *chip9 = PNV9_CHIP(obj);
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj);
+ int i;
object_initialize_child(obj, "xive", &chip9->xive, sizeof(chip9->xive),
TYPE_PNV_XIVE, &error_abort, NULL);
@@ -1248,6 +1313,17 @@
object_initialize_child(obj, "homer", &chip9->homer, sizeof(chip9->homer),
TYPE_PNV9_HOMER, &error_abort, NULL);
+
+ for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) {
+ object_initialize_child(obj, "pec[*]", &chip9->pecs[i],
+ sizeof(chip9->pecs[i]), TYPE_PNV_PHB4_PEC,
+ &error_abort, NULL);
+ }
+
+ /*
+ * Number of PHBs is the chip default
+ */
+ chip->num_phbs = pcc->num_phbs;
}
static void pnv_chip_quad_realize(Pnv9Chip *chip9, Error **errp)
@@ -1276,6 +1352,78 @@
}
}
+static void pnv_chip_power9_phb_realize(PnvChip *chip, Error **errp)
+{
+ Pnv9Chip *chip9 = PNV9_CHIP(chip);
+ Error *local_err = NULL;
+ int i, j;
+ int phb_id = 0;
+
+ for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) {
+ PnvPhb4PecState *pec = &chip9->pecs[i];
+ PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec);
+ uint32_t pec_nest_base;
+ uint32_t pec_pci_base;
+
+ object_property_set_int(OBJECT(pec), i, "index", &error_fatal);
+ /*
+ * PEC0 -> 1 stack
+ * PEC1 -> 2 stacks
+ * PEC2 -> 3 stacks
+ */
+ object_property_set_int(OBJECT(pec), i + 1, "num-stacks",
+ &error_fatal);
+ object_property_set_int(OBJECT(pec), chip->chip_id, "chip-id",
+ &error_fatal);
+ object_property_set_link(OBJECT(pec), OBJECT(get_system_memory()),
+ "system-memory", &error_abort);
+ object_property_set_bool(OBJECT(pec), true, "realized", &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ pec_nest_base = pecc->xscom_nest_base(pec);
+ pec_pci_base = pecc->xscom_pci_base(pec);
+
+ pnv_xscom_add_subregion(chip, pec_nest_base, &pec->nest_regs_mr);
+ pnv_xscom_add_subregion(chip, pec_pci_base, &pec->pci_regs_mr);
+
+ for (j = 0; j < pec->num_stacks && phb_id < chip->num_phbs;
+ j++, phb_id++) {
+ PnvPhb4PecStack *stack = &pec->stacks[j];
+ Object *obj = OBJECT(&stack->phb);
+
+ object_property_set_int(obj, phb_id, "index", &error_fatal);
+ object_property_set_int(obj, chip->chip_id, "chip-id",
+ &error_fatal);
+ object_property_set_int(obj, PNV_PHB4_VERSION, "version",
+ &error_fatal);
+ object_property_set_int(obj, PNV_PHB4_DEVICE_ID, "device-id",
+ &error_fatal);
+ object_property_set_link(obj, OBJECT(stack), "stack", &error_abort);
+ object_property_set_bool(obj, true, "realized", &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ qdev_set_parent_bus(DEVICE(obj), sysbus_get_default());
+
+ /* Populate the XSCOM address space. */
+ pnv_xscom_add_subregion(chip,
+ pec_nest_base + 0x40 * (stack->stack_no + 1),
+ &stack->nest_regs_mr);
+ pnv_xscom_add_subregion(chip,
+ pec_pci_base + 0x40 * (stack->stack_no + 1),
+ &stack->pci_regs_mr);
+ pnv_xscom_add_subregion(chip,
+ pec_pci_base + PNV9_XSCOM_PEC_PCI_STK0 +
+ 0x40 * stack->stack_no,
+ &stack->phb_regs_mr);
+ }
+ }
+}
+
static void pnv_chip_power9_realize(DeviceState *dev, Error **errp)
{
PnvChipClass *pcc = PNV_CHIP_GET_CLASS(dev);
@@ -1378,6 +1526,13 @@
/* Homer mmio region */
memory_region_add_subregion(get_system_memory(), PNV9_HOMER_BASE(chip),
&chip9->homer.regs);
+
+ /* PHBs */
+ pnv_chip_power9_phb_realize(chip, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
}
static uint32_t pnv_chip_power9_xscom_pcba(PnvChip *chip, uint64_t addr)
@@ -1404,6 +1559,7 @@
k->xscom_core_base = pnv_chip_power9_xscom_core_base;
k->xscom_pcba = pnv_chip_power9_xscom_pcba;
dc->desc = "PowerNV Chip POWER9";
+ k->num_phbs = 6;
device_class_set_parent_realize(dc, pnv_chip_power9_realize,
&k->parent_realize);
@@ -1533,6 +1689,7 @@
PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
const char *typename = pnv_chip_core_typename(chip);
int i, core_hwid;
+ PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
if (!object_class_by_name(typename)) {
error_setg(errp, "Unable to find PowerNV CPU Core '%s'", typename);
@@ -1571,6 +1728,8 @@
object_property_set_int(OBJECT(pnv_core),
pcc->core_pir(chip, core_hwid),
"pir", &error_fatal);
+ object_property_set_int(OBJECT(pnv_core), pnv->fw_load_addr,
+ "hrmor", &error_fatal);
object_property_set_link(OBJECT(pnv_core), OBJECT(chip), "chip",
&error_abort);
object_property_set_bool(OBJECT(pnv_core), true, "realized",
@@ -1605,6 +1764,7 @@
DEFINE_PROP_UINT32("nr-cores", PnvChip, nr_cores, 1),
DEFINE_PROP_UINT64("cores-mask", PnvChip, cores_mask, 0x0),
DEFINE_PROP_UINT32("nr-threads", PnvChip, nr_threads, 1),
+ DEFINE_PROP_UINT32("num-phbs", PnvChip, num_phbs, 0),
DEFINE_PROP_END_OF_LIST(),
};
@@ -1638,14 +1798,23 @@
static ICSState *pnv_ics_get(XICSFabric *xi, int irq)
{
PnvMachineState *pnv = PNV_MACHINE(xi);
- int i;
+ int i, j;
for (i = 0; i < pnv->num_chips; i++) {
+ PnvChip *chip = pnv->chips[i];
Pnv8Chip *chip8 = PNV8_CHIP(pnv->chips[i]);
if (ics_valid_irq(&chip8->psi.ics, irq)) {
return &chip8->psi.ics;
}
+ for (j = 0; j < chip->num_phbs; j++) {
+ if (ics_valid_irq(&chip8->phbs[j].lsis, irq)) {
+ return &chip8->phbs[j].lsis;
+ }
+ if (ics_valid_irq(ICS(&chip8->phbs[j].msis), irq)) {
+ return ICS(&chip8->phbs[j].msis);
+ }
+ }
}
return NULL;
}
@@ -1653,11 +1822,17 @@
static void pnv_ics_resend(XICSFabric *xi)
{
PnvMachineState *pnv = PNV_MACHINE(xi);
- int i;
+ int i, j;
for (i = 0; i < pnv->num_chips; i++) {
+ PnvChip *chip = pnv->chips[i];
Pnv8Chip *chip8 = PNV8_CHIP(pnv->chips[i]);
+
ics_resend(&chip8->psi.ics);
+ for (j = 0; j < chip->num_phbs; j++) {
+ ics_resend(&chip8->phbs[j].lsis);
+ ics_resend(ICS(&chip8->phbs[j].msis));
+ }
}
}
@@ -1767,6 +1942,22 @@
pmc->dt_power_mgt = pnv_dt_power_mgt;
}
+static bool pnv_machine_get_hb(Object *obj, Error **errp)
+{
+ PnvMachineState *pnv = PNV_MACHINE(obj);
+
+ return !!pnv->fw_load_addr;
+}
+
+static void pnv_machine_set_hb(Object *obj, bool value, Error **errp)
+{
+ PnvMachineState *pnv = PNV_MACHINE(obj);
+
+ if (value) {
+ pnv->fw_load_addr = 0x8000000;
+ }
+}
+
static void pnv_machine_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -1786,6 +1977,13 @@
*/
mc->default_ram_size = INITRD_LOAD_ADDR + INITRD_MAX_SIZE;
ispc->print_info = pnv_pic_print_info;
+
+ object_class_property_add_bool(oc, "hb-mode",
+ pnv_machine_get_hb, pnv_machine_set_hb,
+ &error_abort);
+ object_class_property_set_description(oc, "hb-mode",
+ "Use a hostboot like boot loader",
+ NULL);
}
#define DEFINE_PNV8_CHIP_TYPE(type, class_initfn) \
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index 8ca5fbd..2345620 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -40,11 +40,11 @@
return cpu_type;
}
-static void pnv_core_cpu_reset(PowerPCCPU *cpu, PnvChip *chip)
+static void pnv_core_cpu_reset(PnvCore *pc, PowerPCCPU *cpu)
{
CPUState *cs = CPU(cpu);
CPUPPCState *env = &cpu->env;
- PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip);
cpu_reset(cs);
@@ -56,7 +56,9 @@
env->nip = 0x10;
env->msr |= MSR_HVB; /* Hypervisor mode */
- pcc->intc_reset(chip, cpu);
+ env->spr[SPR_HRMOR] = pc->hrmor;
+
+ pcc->intc_reset(pc->chip, cpu);
}
/*
@@ -162,14 +164,14 @@
.endianness = DEVICE_BIG_ENDIAN,
};
-static void pnv_core_cpu_realize(PowerPCCPU *cpu, PnvChip *chip, Error **errp)
+static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp)
{
CPUPPCState *env = &cpu->env;
int core_pir;
int thread_index = 0; /* TODO: TCG supports only one thread */
ppc_spr_t *pir = &env->spr_cb[SPR_PIR];
Error *local_err = NULL;
- PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip);
object_property_set_bool(OBJECT(cpu), true, "realized", &local_err);
if (local_err) {
@@ -177,13 +179,13 @@
return;
}
- pcc->intc_create(chip, cpu, &local_err);
+ pcc->intc_create(pc->chip, cpu, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
- core_pir = object_property_get_uint(OBJECT(cpu), "core-pir", &error_abort);
+ core_pir = object_property_get_uint(OBJECT(pc), "pir", &error_abort);
/*
* The PIR of a thread is the core PIR + the thread index. We will
@@ -203,7 +205,7 @@
int i;
for (i = 0; i < cc->nr_threads; i++) {
- pnv_core_cpu_reset(pc->threads[i], pc->chip);
+ pnv_core_cpu_reset(pc, pc->threads[i]);
}
}
@@ -231,8 +233,6 @@
snprintf(name, sizeof(name), "thread[%d]", i);
object_property_add_child(OBJECT(pc), name, obj, &error_abort);
- object_property_add_alias(obj, "core-pir", OBJECT(pc),
- "pir", &error_abort);
cpu->machine_data = g_new0(PnvCPUState, 1);
@@ -240,7 +240,7 @@
}
for (j = 0; j < cc->nr_threads; j++) {
- pnv_core_cpu_realize(pc->threads[j], pc->chip, &local_err);
+ pnv_core_cpu_realize(pc, pc->threads[j], &local_err);
if (local_err) {
goto err;
}
@@ -263,12 +263,12 @@
error_propagate(errp, local_err);
}
-static void pnv_core_cpu_unrealize(PowerPCCPU *cpu, PnvChip *chip)
+static void pnv_core_cpu_unrealize(PnvCore *pc, PowerPCCPU *cpu)
{
PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
- PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip);
- pcc->intc_destroy(chip, cpu);
+ pcc->intc_destroy(pc->chip, cpu);
cpu_remove_sync(CPU(cpu));
cpu->machine_data = NULL;
g_free(pnv_cpu);
@@ -284,13 +284,14 @@
qemu_unregister_reset(pnv_core_reset, pc);
for (i = 0; i < cc->nr_threads; i++) {
- pnv_core_cpu_unrealize(pc->threads[i], pc->chip);
+ pnv_core_cpu_unrealize(pc, pc->threads[i]);
}
g_free(pc->threads);
}
static Property pnv_core_properties[] = {
DEFINE_PROP_UINT32("pir", PnvCore, pir, 0),
+ DEFINE_PROP_UINT64("hrmor", PnvCore, hrmor, 0),
DEFINE_PROP_LINK("chip", PnvCore, chip, TYPE_PNV_CHIP, PnvChip *),
DEFINE_PROP_END_OF_LIST(),
};
@@ -324,6 +325,7 @@
dc->realize = pnv_core_realize;
dc->unrealize = pnv_core_unrealize;
device_class_set_props(dc, pnv_core_properties);
+ dc->user_creatable = false;
}
#define DEFINE_PNV_CORE_TYPE(family, cpu_model) \
@@ -422,6 +424,7 @@
dc->realize = pnv_quad_realize;
device_class_set_props(dc, pnv_quad_properties);
+ dc->user_creatable = false;
}
static const TypeInfo pnv_quad_info = {
diff --git a/hw/ppc/pnv_homer.c b/hw/ppc/pnv_homer.c
index 93ae42f..9a26262 100644
--- a/hw/ppc/pnv_homer.c
+++ b/hw/ppc/pnv_homer.c
@@ -360,6 +360,7 @@
dc->realize = pnv_homer_realize;
dc->desc = "PowerNV HOMER Memory";
device_class_set_props(dc, pnv_homer_properties);
+ dc->user_creatable = false;
}
static const TypeInfo pnv_homer_type_info = {
diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c
index 22b2055..5989d72 100644
--- a/hw/ppc/pnv_lpc.c
+++ b/hw/ppc/pnv_lpc.c
@@ -762,6 +762,7 @@
dc->realize = pnv_lpc_realize;
dc->desc = "PowerNV LPC Controller";
device_class_set_props(dc, pnv_lpc_properties);
+ dc->user_creatable = false;
}
static const TypeInfo pnv_lpc_info = {
@@ -825,6 +826,7 @@
qemu_irq *irqs;
qemu_irq_handler handler;
PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
+ bool hostboot_mode = !!pnv->fw_load_addr;
/* let isa_bus_new() create its own bridge on SysBus otherwise
* devices speficied on the command line won't find the bus and
@@ -859,7 +861,9 @@
* Start disabled. The HIOMAP protocol will activate the mapping
* with HIOMAP_C_CREATE_WRITE_WINDOW
*/
- memory_region_set_enabled(&pnv->pnor->mmio, false);
+ if (!hostboot_mode) {
+ memory_region_set_enabled(&pnv->pnor->mmio, false);
+ }
return isa_bus;
}
diff --git a/hw/ppc/pnv_occ.c b/hw/ppc/pnv_occ.c
index 2173fac..5a716c2 100644
--- a/hw/ppc/pnv_occ.c
+++ b/hw/ppc/pnv_occ.c
@@ -280,6 +280,7 @@
dc->realize = pnv_occ_realize;
dc->desc = "PowerNV OCC Controller";
device_class_set_props(dc, pnv_occ_properties);
+ dc->user_creatable = false;
}
static const TypeInfo pnv_occ_type_info = {
diff --git a/hw/ppc/pnv_pnor.c b/hw/ppc/pnv_pnor.c
index f761d8d..c365ee5 100644
--- a/hw/ppc/pnv_pnor.c
+++ b/hw/ppc/pnv_pnor.c
@@ -11,6 +11,7 @@
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "qemu/log.h"
+#include "qemu/units.h"
#include "sysemu/block-backend.h"
#include "sysemu/blockdev.h"
#include "hw/loader.h"
@@ -46,7 +47,8 @@
ret = blk_pwrite(s->blk, offset, s->storage + offset,
offset_end - offset, 0);
if (ret < 0) {
- error_report("Could not update PNOR: %s", strerror(-ret));
+ error_report("Could not update PNOR offset=0x%" PRIx32" : %s", offset,
+ strerror(-ret));
}
}
@@ -111,7 +113,7 @@
}
static Property pnv_pnor_properties[] = {
- DEFINE_PROP_INT64("size", PnvPnor, size, 128 << 20),
+ DEFINE_PROP_INT64("size", PnvPnor, size, 128 * MiB),
DEFINE_PROP_DRIVE("drive", PnvPnor, blk),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 4c5fa29..4a11fb1 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -1490,24 +1490,6 @@
}
/*****************************************************************************/
-/* Debug port */
-void PPC_debug_write (void *opaque, uint32_t addr, uint32_t val)
-{
- addr &= 0xF;
- switch (addr) {
- case 0:
- printf("%c", val);
- break;
- case 1:
- printf("\n");
- fflush(stdout);
- break;
- case 2:
- printf("Set loglevel to %04" PRIx32 "\n", val);
- qemu_set_log(val | 0x100);
- break;
- }
-}
int ppc_cpu_pir(PowerPCCPU *cpu)
{
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index 862345c..111cc80 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -42,7 +42,7 @@
#include "hw/loader.h"
#include "hw/rtc/mc146818rtc.h"
#include "hw/isa/pc87312.h"
-#include "hw/net/ne2000-isa.h"
+#include "hw/qdev-properties.h"
#include "sysemu/arch_init.h"
#include "sysemu/kvm.h"
#include "sysemu/qtest.h"
@@ -60,178 +60,9 @@
#define CFG_ADDR 0xf0000510
-#define BIOS_SIZE (1 * MiB)
-#define BIOS_FILENAME "ppc_rom.bin"
#define KERNEL_LOAD_ADDR 0x01000000
#define INITRD_LOAD_ADDR 0x01800000
-/* Constants for devices init */
-static const int ide_iobase[2] = { 0x1f0, 0x170 };
-static const int ide_iobase2[2] = { 0x3f6, 0x376 };
-static const int ide_irq[2] = { 13, 13 };
-
-#define NE2000_NB_MAX 6
-
-static uint32_t ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, 0x360, 0x280, 0x380 };
-static int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 3, 4, 5 };
-
-/* ISA IO ports bridge */
-#define PPC_IO_BASE 0x80000000
-
-/* Fake super-io ports for PREP platform (Intel 82378ZB) */
-typedef struct sysctrl_t {
- qemu_irq reset_irq;
- Nvram *nvram;
- uint8_t state;
- uint8_t syscontrol;
- int contiguous_map;
- qemu_irq contiguous_map_irq;
- int endian;
-} sysctrl_t;
-
-enum {
- STATE_HARDFILE = 0x01,
-};
-
-static sysctrl_t *sysctrl;
-
-static void PREP_io_800_writeb (void *opaque, uint32_t addr, uint32_t val)
-{
- sysctrl_t *sysctrl = opaque;
-
- trace_prep_io_800_writeb(addr - PPC_IO_BASE, val);
- switch (addr) {
- case 0x0092:
- /* Special port 92 */
- /* Check soft reset asked */
- if (val & 0x01) {
- qemu_irq_raise(sysctrl->reset_irq);
- } else {
- qemu_irq_lower(sysctrl->reset_irq);
- }
- /* Check LE mode */
- if (val & 0x02) {
- sysctrl->endian = 1;
- } else {
- sysctrl->endian = 0;
- }
- break;
- case 0x0800:
- /* Motorola CPU configuration register : read-only */
- break;
- case 0x0802:
- /* Motorola base module feature register : read-only */
- break;
- case 0x0803:
- /* Motorola base module status register : read-only */
- break;
- case 0x0808:
- /* Hardfile light register */
- if (val & 1)
- sysctrl->state |= STATE_HARDFILE;
- else
- sysctrl->state &= ~STATE_HARDFILE;
- break;
- case 0x0810:
- /* Password protect 1 register */
- if (sysctrl->nvram != NULL) {
- NvramClass *k = NVRAM_GET_CLASS(sysctrl->nvram);
- (k->toggle_lock)(sysctrl->nvram, 1);
- }
- break;
- case 0x0812:
- /* Password protect 2 register */
- if (sysctrl->nvram != NULL) {
- NvramClass *k = NVRAM_GET_CLASS(sysctrl->nvram);
- (k->toggle_lock)(sysctrl->nvram, 2);
- }
- break;
- case 0x0814:
- /* L2 invalidate register */
- // tlb_flush(first_cpu, 1);
- break;
- case 0x081C:
- /* system control register */
- sysctrl->syscontrol = val & 0x0F;
- break;
- case 0x0850:
- /* I/O map type register */
- sysctrl->contiguous_map = val & 0x01;
- qemu_set_irq(sysctrl->contiguous_map_irq, sysctrl->contiguous_map);
- break;
- default:
- printf("ERROR: unaffected IO port write: %04" PRIx32
- " => %02" PRIx32"\n", addr, val);
- break;
- }
-}
-
-static uint32_t PREP_io_800_readb (void *opaque, uint32_t addr)
-{
- sysctrl_t *sysctrl = opaque;
- uint32_t retval = 0xFF;
-
- switch (addr) {
- case 0x0092:
- /* Special port 92 */
- retval = sysctrl->endian << 1;
- break;
- case 0x0800:
- /* Motorola CPU configuration register */
- retval = 0xEF; /* MPC750 */
- break;
- case 0x0802:
- /* Motorola Base module feature register */
- retval = 0xAD; /* No ESCC, PMC slot neither ethernet */
- break;
- case 0x0803:
- /* Motorola base module status register */
- retval = 0xE0; /* Standard MPC750 */
- break;
- case 0x080C:
- /* Equipment present register:
- * no L2 cache
- * no upgrade processor
- * no cards in PCI slots
- * SCSI fuse is bad
- */
- retval = 0x3C;
- break;
- case 0x0810:
- /* Motorola base module extended feature register */
- retval = 0x39; /* No USB, CF and PCI bridge. NVRAM present */
- break;
- case 0x0814:
- /* L2 invalidate: don't care */
- break;
- case 0x0818:
- /* Keylock */
- retval = 0x00;
- break;
- case 0x081C:
- /* system control register
- * 7 - 6 / 1 - 0: L2 cache enable
- */
- retval = sysctrl->syscontrol;
- break;
- case 0x0823:
- /* */
- retval = 0x03; /* no L2 cache */
- break;
- case 0x0850:
- /* I/O map type register */
- retval = sysctrl->contiguous_map;
- break;
- default:
- printf("ERROR: unaffected IO port: %04" PRIx32 " read\n", addr);
- break;
- }
- trace_prep_io_800_readb(addr - PPC_IO_BASE, retval);
-
- return retval;
-}
-
-
#define NVRAM_SIZE 0x2000
static void fw_cfg_boot_set(void *opaque, const char *boot_device,
@@ -247,17 +78,6 @@
cpu_reset(CPU(cpu));
}
-static const MemoryRegionPortio prep_portio_list[] = {
- /* System control ports */
- { 0x0092, 1, 1, .read = PREP_io_800_readb, .write = PREP_io_800_writeb, },
- { 0x0800, 0x52, 1,
- .read = PREP_io_800_readb, .write = PREP_io_800_writeb, },
- /* Special port to get debug messages from Open-Firmware */
- { 0x0F00, 4, 1, .write = PPC_debug_write, },
- PORTIO_END_OF_LIST(),
-};
-
-static PortioList prep_port_list;
/*****************************************************************************/
/* NVRAM helpers */
@@ -397,207 +217,6 @@
return 0;
}
-/* PowerPC PREP hardware initialisation */
-static void ppc_prep_init(MachineState *machine)
-{
- ram_addr_t ram_size = machine->ram_size;
- const char *kernel_filename = machine->kernel_filename;
- const char *kernel_cmdline = machine->kernel_cmdline;
- const char *initrd_filename = machine->initrd_filename;
- const char *boot_device = machine->boot_order;
- MemoryRegion *sysmem = get_system_memory();
- PowerPCCPU *cpu = NULL;
- CPUPPCState *env = NULL;
- Nvram *m48t59;
-#if 0
- MemoryRegion *xcsr = g_new(MemoryRegion, 1);
-#endif
- int linux_boot, i, nb_nics1;
- MemoryRegion *ram = g_new(MemoryRegion, 1);
- uint32_t kernel_base, initrd_base;
- long kernel_size, initrd_size;
- DeviceState *dev;
- PCIHostState *pcihost;
- PCIBus *pci_bus;
- PCIDevice *pci;
- ISABus *isa_bus;
- ISADevice *isa;
- int ppc_boot_device;
- DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
-
- sysctrl = g_malloc0(sizeof(sysctrl_t));
-
- linux_boot = (kernel_filename != NULL);
-
- /* init CPUs */
- for (i = 0; i < machine->smp.cpus; i++) {
- cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
- env = &cpu->env;
-
- if (env->flags & POWERPC_FLAG_RTC_CLK) {
- /* POWER / PowerPC 601 RTC clock frequency is 7.8125 MHz */
- cpu_ppc_tb_init(env, 7812500UL);
- } else {
- /* Set time-base frequency to 100 Mhz */
- cpu_ppc_tb_init(env, 100UL * 1000UL * 1000UL);
- }
- qemu_register_reset(ppc_prep_reset, cpu);
- }
-
- /* allocate RAM */
- memory_region_allocate_system_memory(ram, NULL, "ppc_prep.ram", ram_size);
- memory_region_add_subregion(sysmem, 0, ram);
-
- if (linux_boot) {
- kernel_base = KERNEL_LOAD_ADDR;
- /* now we can load the kernel */
- kernel_size = load_image_targphys(kernel_filename, kernel_base,
- ram_size - kernel_base);
- if (kernel_size < 0) {
- error_report("could not load kernel '%s'", kernel_filename);
- exit(1);
- }
- /* load initrd */
- if (initrd_filename) {
- initrd_base = INITRD_LOAD_ADDR;
- initrd_size = load_image_targphys(initrd_filename, initrd_base,
- ram_size - initrd_base);
- if (initrd_size < 0) {
- error_report("could not load initial ram disk '%s'",
- initrd_filename);
- exit(1);
- }
- } else {
- initrd_base = 0;
- initrd_size = 0;
- }
- ppc_boot_device = 'm';
- } else {
- kernel_base = 0;
- kernel_size = 0;
- initrd_base = 0;
- initrd_size = 0;
- ppc_boot_device = '\0';
- /* For now, OHW cannot boot from the network. */
- for (i = 0; boot_device[i] != '\0'; i++) {
- if (boot_device[i] >= 'a' && boot_device[i] <= 'f') {
- ppc_boot_device = boot_device[i];
- break;
- }
- }
- if (ppc_boot_device == '\0') {
- error_report("No valid boot device for Mac99 machine");
- exit(1);
- }
- }
-
- if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) {
- error_report("Only 6xx bus is supported on PREP machine");
- exit(1);
- }
-
- dev = qdev_create(NULL, "raven-pcihost");
- if (bios_name == NULL) {
- bios_name = BIOS_FILENAME;
- }
- qdev_prop_set_string(dev, "bios-name", bios_name);
- qdev_prop_set_uint32(dev, "elf-machine", PPC_ELF_MACHINE);
- qdev_prop_set_bit(dev, "is-legacy-prep", true);
- pcihost = PCI_HOST_BRIDGE(dev);
- object_property_add_child(qdev_get_machine(), "raven", OBJECT(dev), NULL);
- qdev_init_nofail(dev);
- pci_bus = (PCIBus *)qdev_get_child_bus(dev, "pci.0");
- if (pci_bus == NULL) {
- error_report("Couldn't create PCI host controller");
- exit(1);
- }
- sysctrl->contiguous_map_irq = qdev_get_gpio_in(dev, 0);
-
- /* PCI -> ISA bridge */
- pci = pci_create_simple(pci_bus, PCI_DEVFN(1, 0), "i82378");
- cpu = POWERPC_CPU(first_cpu);
- qdev_connect_gpio_out(&pci->qdev, 0,
- cpu->env.irq_inputs[PPC6xx_INPUT_INT]);
- sysbus_connect_irq(&pcihost->busdev, 0, qdev_get_gpio_in(&pci->qdev, 9));
- sysbus_connect_irq(&pcihost->busdev, 1, qdev_get_gpio_in(&pci->qdev, 11));
- sysbus_connect_irq(&pcihost->busdev, 2, qdev_get_gpio_in(&pci->qdev, 9));
- sysbus_connect_irq(&pcihost->busdev, 3, qdev_get_gpio_in(&pci->qdev, 11));
- isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(pci), "isa.0"));
-
- /* Super I/O (parallel + serial ports) */
- isa = isa_create(isa_bus, TYPE_PC87312_SUPERIO);
- dev = DEVICE(isa);
- qdev_prop_set_uint8(dev, "config", 13); /* fdc, ser0, ser1, par0 */
- qdev_init_nofail(dev);
-
- /* init basic PC hardware */
- pci_vga_init(pci_bus);
-
- nb_nics1 = nb_nics;
- if (nb_nics1 > NE2000_NB_MAX)
- nb_nics1 = NE2000_NB_MAX;
- for(i = 0; i < nb_nics1; i++) {
- if (nd_table[i].model == NULL) {
- nd_table[i].model = g_strdup("ne2k_isa");
- }
- if (strcmp(nd_table[i].model, "ne2k_isa") == 0) {
- isa_ne2000_init(isa_bus, ne2000_io[i], ne2000_irq[i],
- &nd_table[i]);
- } else {
- pci_nic_init_nofail(&nd_table[i], pci_bus, "ne2k_pci", NULL);
- }
- }
-
- ide_drive_get(hd, ARRAY_SIZE(hd));
- for(i = 0; i < MAX_IDE_BUS; i++) {
- isa_ide_init(isa_bus, ide_iobase[i], ide_iobase2[i], ide_irq[i],
- hd[2 * i],
- hd[2 * i + 1]);
- }
-
- cpu = POWERPC_CPU(first_cpu);
- sysctrl->reset_irq = cpu->env.irq_inputs[PPC6xx_INPUT_HRESET];
-
- portio_list_init(&prep_port_list, NULL, prep_portio_list, sysctrl, "prep");
- portio_list_add(&prep_port_list, isa_address_space_io(isa), 0x0);
-
- /*
- * PowerPC control and status register group: unimplemented,
- * would be at address 0xFEFF0000.
- */
-
- if (machine_usb(machine)) {
- pci_create_simple(pci_bus, -1, "pci-ohci");
- }
-
- m48t59 = m48t59_init_isa(isa_bus, 0x0074, NVRAM_SIZE, 2000, 59);
- if (m48t59 == NULL)
- return;
- sysctrl->nvram = m48t59;
-
- /* Initialise NVRAM */
- PPC_NVRAM_set_params(m48t59, NVRAM_SIZE, "PREP", ram_size,
- ppc_boot_device,
- kernel_base, kernel_size,
- kernel_cmdline,
- initrd_base, initrd_size,
- /* XXX: need an option to load a NVRAM image */
- 0,
- graphic_width, graphic_height, graphic_depth);
-}
-
-static void prep_machine_init(MachineClass *mc)
-{
- mc->deprecation_reason = "use 40p machine type instead";
- mc->desc = "PowerPC PREP platform";
- mc->init = ppc_prep_init;
- mc->block_default_type = IF_IDE;
- mc->max_cpus = MAX_CPUS;
- mc->default_boot_order = "cad";
- mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("602");
- mc->default_display = "std";
-}
-
static int prep_set_cmos_checksum(DeviceState *dev, void *opaque)
{
uint16_t checksum = *(uint16_t *)opaque;
@@ -821,4 +440,3 @@
}
DEFINE_MACHINE("40p", ibm_40p_machine_init)
-DEFINE_MACHINE("prep", prep_machine_init)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index a0076e5..c9b2e0a 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -46,6 +46,7 @@
#include "migration/qemu-file-types.h"
#include "migration/global_state.h"
#include "migration/register.h"
+#include "migration/blocker.h"
#include "mmu-hash64.h"
#include "mmu-book3s-v3.h"
#include "cpu-models.h"
@@ -1677,6 +1678,14 @@
first_ppc_cpu->env.gpr[5] = 0;
spapr->cas_reboot = false;
+
+ spapr->mc_status = -1;
+ spapr->guest_machine_check_addr = -1;
+
+ /* Signal all vCPUs waiting on this condition */
+ qemu_cond_broadcast(&spapr->mc_delivery_cond);
+
+ migrate_del_blocker(spapr->fwnmi_migration_blocker);
}
static void spapr_create_nvram(SpaprMachineState *spapr)
@@ -1959,6 +1968,42 @@
},
};
+static bool spapr_fwnmi_needed(void *opaque)
+{
+ SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+ return spapr->guest_machine_check_addr != -1;
+}
+
+static int spapr_fwnmi_pre_save(void *opaque)
+{
+ SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+ /*
+ * Check if machine check handling is in progress and print a
+ * warning message.
+ */
+ if (spapr->mc_status != -1) {
+ warn_report("A machine check is being handled during migration. The"
+ "handler may run and log hardware error on the destination");
+ }
+
+ return 0;
+}
+
+static const VMStateDescription vmstate_spapr_machine_check = {
+ .name = "spapr_machine_check",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = spapr_fwnmi_needed,
+ .pre_save = spapr_fwnmi_pre_save,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
+ VMSTATE_INT32(mc_status, SpaprMachineState),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
static const VMStateDescription vmstate_spapr = {
.name = "spapr",
.version_id = 3,
@@ -1992,6 +2037,8 @@
&vmstate_spapr_dtb,
&vmstate_spapr_cap_large_decr,
&vmstate_spapr_cap_ccf_assist,
+ &vmstate_spapr_cap_fwnmi,
+ &vmstate_spapr_machine_check,
NULL
}
};
@@ -2807,6 +2854,13 @@
spapr_create_lmb_dr_connectors(spapr);
}
+ if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_ON) {
+ /* Create the error string for live migration blocker */
+ error_setg(&spapr->fwnmi_migration_blocker,
+ "A machine check is being handled during migration. The handler"
+ "may run and log hardware error on the destination");
+ }
+
/* Set up RTAS event infrastructure */
spapr_events_init(spapr);
@@ -2970,6 +3024,8 @@
kvmppc_spapr_enable_inkernel_multitce();
}
+
+ qemu_cond_init(&spapr->mc_delivery_cond);
}
static int spapr_kvm_type(MachineState *machine, const char *vm_type)
@@ -4397,7 +4453,8 @@
smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */
smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
- smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
+ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
+ smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
spapr_caps_add_properties(smc, &error_abort);
smc->irq = &spapr_irq_dual;
smc->dr_phb_enabled = true;
@@ -4465,8 +4522,12 @@
*/
static void spapr_machine_4_2_class_options(MachineClass *mc)
{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
spapr_machine_5_0_class_options(mc);
compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len);
+ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
+ smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
}
DEFINE_SPAPR_MACHINE(4_2, "4.2", false);
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 481dfd2..8b27d3a 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -485,17 +485,48 @@
uint8_t kvm_val = kvmppc_get_cap_count_cache_flush_assist();
if (tcg_enabled() && val) {
- /* TODO - for now only allow broken for TCG */
- error_setg(errp,
-"Requested count cache flush assist capability level not supported by tcg,"
- " try appending -machine cap-ccf-assist=off");
+ /* TCG doesn't implement anything here, but allow with a warning */
+ warn_report("TCG doesn't support requested feature, cap-ccf-assist=on");
} else if (kvm_enabled() && (val > kvm_val)) {
+ uint8_t kvm_ibs = kvmppc_get_cap_safe_indirect_branch();
+
+ if (kvm_ibs == SPAPR_CAP_FIXED_CCD) {
+ /*
+ * If we don't have CCF assist on the host, the assist
+ * instruction is a harmless no-op. It won't correctly
+ * implement the cache count flush *but* if we have
+ * count-cache-disabled in the host, that flush is
+ * unnnecessary. So, specifically allow this case. This
+ * allows us to have better performance on POWER9 DD2.3,
+ * while still working on POWER9 DD2.2 and POWER8 host
+ * cpus.
+ */
+ return;
+ }
error_setg(errp,
"Requested count cache flush assist capability level not supported by kvm,"
" try appending -machine cap-ccf-assist=off");
}
}
+static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
+ Error **errp)
+{
+ if (!val) {
+ return; /* Disabled by default */
+ }
+
+ if (tcg_enabled()) {
+ warn_report("Firmware Assisted Non-Maskable Interrupts(FWNMI) not "
+ "supported in TCG");
+ } else if (kvm_enabled()) {
+ if (kvmppc_set_fwnmi() < 0) {
+ error_setg(errp, "Firmware Assisted Non-Maskable Interrupts(FWNMI) "
+ "not supported by KVM");
+ }
+ }
+}
+
SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
[SPAPR_CAP_HTM] = {
.name = "htm",
@@ -595,6 +626,15 @@
.type = "bool",
.apply = cap_ccf_assist_apply,
},
+ [SPAPR_CAP_FWNMI_MCE] = {
+ .name = "fwnmi-mce",
+ .description = "Handle fwnmi machine check exceptions",
+ .index = SPAPR_CAP_FWNMI_MCE,
+ .get = spapr_cap_get_bool,
+ .set = spapr_cap_set_bool,
+ .type = "bool",
+ .apply = cap_fwnmi_mce_apply,
+ },
};
static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
@@ -734,6 +774,7 @@
SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
+SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE);
void spapr_caps_init(SpaprMachineState *spapr)
{
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index e355e00..884e455 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -40,8 +40,10 @@
#include "hw/ppc/spapr_drc.h"
#include "qemu/help_option.h"
#include "qemu/bcd.h"
+#include "qemu/main-loop.h"
#include "hw/ppc/spapr_ovec.h"
#include <libfdt.h>
+#include "migration/blocker.h"
#define RTAS_LOG_VERSION_MASK 0xff000000
#define RTAS_LOG_VERSION_6 0x06000000
@@ -213,6 +215,104 @@
struct rtas_event_log_v6_hp hp;
} QEMU_PACKED;
+struct rtas_event_log_v6_mc {
+#define RTAS_LOG_V6_SECTION_ID_MC 0x4D43 /* MC */
+ struct rtas_event_log_v6_section_header hdr;
+ uint32_t fru_id;
+ uint32_t proc_id;
+ uint8_t error_type;
+#define RTAS_LOG_V6_MC_TYPE_UE 0
+#define RTAS_LOG_V6_MC_TYPE_SLB 1
+#define RTAS_LOG_V6_MC_TYPE_ERAT 2
+#define RTAS_LOG_V6_MC_TYPE_TLB 4
+#define RTAS_LOG_V6_MC_TYPE_D_CACHE 5
+#define RTAS_LOG_V6_MC_TYPE_I_CACHE 7
+ uint8_t sub_err_type;
+#define RTAS_LOG_V6_MC_UE_INDETERMINATE 0
+#define RTAS_LOG_V6_MC_UE_IFETCH 1
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2
+#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4
+#define RTAS_LOG_V6_MC_SLB_PARITY 0
+#define RTAS_LOG_V6_MC_SLB_MULTIHIT 1
+#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2
+#define RTAS_LOG_V6_MC_ERAT_PARITY 1
+#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2
+#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE 3
+#define RTAS_LOG_V6_MC_TLB_PARITY 1
+#define RTAS_LOG_V6_MC_TLB_MULTIHIT 2
+#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3
+ uint8_t reserved_1[6];
+ uint64_t effective_address;
+ uint64_t logical_address;
+} QEMU_PACKED;
+
+struct mc_extended_log {
+ struct rtas_event_log_v6 v6hdr;
+ struct rtas_event_log_v6_mc mc;
+} QEMU_PACKED;
+
+struct MC_ierror_table {
+ unsigned long srr1_mask;
+ unsigned long srr1_value;
+ bool nip_valid; /* nip is a valid indicator of faulting address */
+ uint8_t error_type;
+ uint8_t error_subtype;
+ unsigned int initiator;
+ unsigned int severity;
+};
+
+static const struct MC_ierror_table mc_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000, true,
+ RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000080000, true,
+ RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x00000000000c0000, true,
+ RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000100000, true,
+ RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000140000, true,
+ RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000180000, true,
+ RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } };
+
+struct MC_derror_table {
+ unsigned long dsisr_value;
+ bool dar_valid; /* dar is a valid indicator of faulting address */
+ uint8_t error_type;
+ uint8_t error_subtype;
+ unsigned int initiator;
+ unsigned int severity;
+};
+
+static const struct MC_derror_table mc_derror_table[] = {
+{ 0x00008000, false,
+ RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00004000, true,
+ RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000800, true,
+ RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000400, true,
+ RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000080, true,
+ RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT, /* Before PARITY */
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000100, true,
+ RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } };
+
+#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42))
+
typedef enum EventClass {
EVENT_CLASS_INTERNAL_ERRORS = 0,
EVENT_CLASS_EPOW = 1,
@@ -622,6 +722,175 @@
RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
}
+static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered,
+ struct mc_extended_log *ext_elog)
+{
+ int i;
+ CPUPPCState *env = &cpu->env;
+ uint32_t summary;
+ uint64_t dsisr = env->spr[SPR_DSISR];
+
+ summary = RTAS_LOG_VERSION_6 | RTAS_LOG_OPTIONAL_PART_PRESENT;
+ if (recovered) {
+ summary |= RTAS_LOG_DISPOSITION_FULLY_RECOVERED;
+ } else {
+ summary |= RTAS_LOG_DISPOSITION_NOT_RECOVERED;
+ }
+
+ if (SRR1_MC_LOADSTORE(env->spr[SPR_SRR1])) {
+ for (i = 0; i < ARRAY_SIZE(mc_derror_table); i++) {
+ if (!(dsisr & mc_derror_table[i].dsisr_value)) {
+ continue;
+ }
+
+ ext_elog->mc.error_type = mc_derror_table[i].error_type;
+ ext_elog->mc.sub_err_type = mc_derror_table[i].error_subtype;
+ if (mc_derror_table[i].dar_valid) {
+ ext_elog->mc.effective_address = cpu_to_be64(env->spr[SPR_DAR]);
+ }
+
+ summary |= mc_derror_table[i].initiator
+ | mc_derror_table[i].severity;
+
+ return summary;
+ }
+ } else {
+ for (i = 0; i < ARRAY_SIZE(mc_ierror_table); i++) {
+ if ((env->spr[SPR_SRR1] & mc_ierror_table[i].srr1_mask) !=
+ mc_ierror_table[i].srr1_value) {
+ continue;
+ }
+
+ ext_elog->mc.error_type = mc_ierror_table[i].error_type;
+ ext_elog->mc.sub_err_type = mc_ierror_table[i].error_subtype;
+ if (mc_ierror_table[i].nip_valid) {
+ ext_elog->mc.effective_address = cpu_to_be64(env->nip);
+ }
+
+ summary |= mc_ierror_table[i].initiator
+ | mc_ierror_table[i].severity;
+
+ return summary;
+ }
+ }
+
+ summary |= RTAS_LOG_INITIATOR_CPU;
+ return summary;
+}
+
+static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+ CPUState *cs = CPU(cpu);
+ uint64_t rtas_addr;
+ CPUPPCState *env = &cpu->env;
+ PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+ target_ulong msr = 0;
+ struct rtas_error_log log;
+ struct mc_extended_log *ext_elog;
+ uint32_t summary;
+
+ /*
+ * Properly set bits in MSR before we invoke the handler.
+ * SRR0/1, DAR and DSISR are properly set by KVM
+ */
+ if (!(*pcc->interrupts_big_endian)(cpu)) {
+ msr |= (1ULL << MSR_LE);
+ }
+
+ if (env->msr & (1ULL << MSR_SF)) {
+ msr |= (1ULL << MSR_SF);
+ }
+
+ msr |= (1ULL << MSR_ME);
+
+ ext_elog = g_malloc0(sizeof(*ext_elog));
+ summary = spapr_mce_get_elog_type(cpu, recovered, ext_elog);
+
+ log.summary = cpu_to_be32(summary);
+ log.extended_length = cpu_to_be32(sizeof(*ext_elog));
+
+ spapr_init_v6hdr(&ext_elog->v6hdr);
+ ext_elog->mc.hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MC);
+ ext_elog->mc.hdr.section_length =
+ cpu_to_be16(sizeof(struct rtas_event_log_v6_mc));
+ ext_elog->mc.hdr.section_version = 1;
+
+ /* get rtas addr from fdt */
+ rtas_addr = spapr_get_rtas_addr();
+ if (!rtas_addr) {
+ /* Unable to fetch rtas_addr. Hence reset the guest */
+ ppc_cpu_do_system_reset(cs);
+ g_free(ext_elog);
+ return;
+ }
+
+ stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET,
+ env->gpr[3]);
+ cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET +
+ sizeof(env->gpr[3]), &log, sizeof(log));
+ cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET +
+ sizeof(env->gpr[3]) + sizeof(log), ext_elog,
+ sizeof(*ext_elog));
+
+ env->gpr[3] = rtas_addr + RTAS_ERROR_LOG_OFFSET;
+ env->msr = msr;
+ env->nip = spapr->guest_machine_check_addr;
+
+ g_free(ext_elog);
+}
+
+void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+ CPUState *cs = CPU(cpu);
+ int ret;
+ Error *local_err = NULL;
+
+ if (spapr->guest_machine_check_addr == -1) {
+ /*
+ * This implies that we have hit a machine check either when the
+ * guest has not registered FWNMI (i.e., "ibm,nmi-register" not
+ * called) or between system reset and "ibm,nmi-register".
+ * Fall back to the old machine check behavior in such cases.
+ */
+ cs->exception_index = POWERPC_EXCP_MCHECK;
+ ppc_cpu_do_interrupt(cs);
+ return;
+ }
+
+ while (spapr->mc_status != -1) {
+ /*
+ * Check whether the same CPU got machine check error
+ * while still handling the mc error (i.e., before
+ * that CPU called "ibm,nmi-interlock")
+ */
+ if (spapr->mc_status == cpu->vcpu_id) {
+ qemu_system_guest_panicked(NULL);
+ return;
+ }
+ qemu_cond_wait_iothread(&spapr->mc_delivery_cond);
+ /* Meanwhile if the system is reset, then just return */
+ if (spapr->guest_machine_check_addr == -1) {
+ return;
+ }
+ }
+
+ ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, &local_err);
+ if (ret == -EBUSY) {
+ /*
+ * We don't want to abort so we let the migration to continue.
+ * In a rare case, the machine check handler will run on the target.
+ * Though this is not preferable, it is better than aborting
+ * the migration or killing the VM.
+ */
+ warn_report("Received a fwnmi while migration was in progress");
+ }
+
+ spapr->mc_status = cpu->vcpu_id;
+ spapr_mce_dispatch_elog(cpu, recovered);
+}
+
static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
uint32_t token, uint32_t nargs,
target_ulong args,
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index f1799b1..b8bb66b 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1676,6 +1676,18 @@
Error *local_err = NULL;
bool raw_mode_supported = false;
bool guest_xive;
+ CPUState *cs;
+
+ /* CAS is supposed to be called early when only the boot vCPU is active. */
+ CPU_FOREACH(cs) {
+ if (cs == CPU(cpu)) {
+ continue;
+ }
+ if (!cs->halted) {
+ warn_report("guest has multiple active vCPUs at CAS, which is not allowed");
+ return H_MULTI_THREADS_ACTIVE;
+ }
+ }
cas_pvr = cas_check_pvr(spapr, cpu, &addr, &raw_mode_supported, &local_err);
if (local_err) {
@@ -1703,7 +1715,15 @@
ov_table = addr;
ov1_guest = spapr_ovec_parse_vector(ov_table, 1);
+ if (!ov1_guest) {
+ warn_report("guest didn't provide option vector 1");
+ return H_PARAMETER;
+ }
ov5_guest = spapr_ovec_parse_vector(ov_table, 5);
+ if (!ov5_guest) {
+ warn_report("guest didn't provide option vector 5");
+ return H_PARAMETER;
+ }
if (spapr_ovec_test(ov5_guest, OV5_MMU_BOTH)) {
error_report("guest requested hash and radix MMU, which is invalid.");
exit(EXIT_FAILURE);
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 85135e0..883fe28 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -50,6 +50,7 @@
#include "hw/ppc/fdt.h"
#include "target/ppc/mmu-hash64.h"
#include "target/ppc/mmu-book3s-v3.h"
+#include "migration/blocker.h"
static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
uint32_t token, uint32_t nargs,
@@ -399,6 +400,62 @@
rtas_st(rets, 1, 100);
}
+static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ hwaddr rtas_addr;
+
+ if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
+ rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+ return;
+ }
+
+ rtas_addr = spapr_get_rtas_addr();
+ if (!rtas_addr) {
+ rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+ return;
+ }
+
+ spapr->guest_machine_check_addr = rtas_ld(args, 1);
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
+ rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+ return;
+ }
+
+ if (spapr->guest_machine_check_addr == -1) {
+ /* NMI register not called */
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ if (spapr->mc_status != cpu->vcpu_id) {
+ /* The vCPU that hit the NMI should invoke "ibm,nmi-interlock" */
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ /*
+ * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
+ * hence unset mc_status.
+ */
+ spapr->mc_status = -1;
+ qemu_cond_signal(&spapr->mc_delivery_cond);
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+ migrate_del_blocker(spapr->fwnmi_migration_blocker);
+}
+
static struct rtas_call {
const char *name;
spapr_rtas_fn fn;
@@ -476,6 +533,32 @@
}
}
+hwaddr spapr_get_rtas_addr(void)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+ int rtas_node;
+ const fdt32_t *rtas_data;
+ void *fdt = spapr->fdt_blob;
+
+ /* fetch rtas addr from fdt */
+ rtas_node = fdt_path_offset(fdt, "/rtas");
+ if (rtas_node < 0) {
+ return 0;
+ }
+
+ rtas_data = fdt_getprop(fdt, rtas_node, "linux,rtas-base", NULL);
+ if (!rtas_data) {
+ return 0;
+ }
+
+ /*
+ * We assume that the OS called RTAS instantiate-rtas, but some other
+ * OS might call RTAS instantiate-rtas-64 instead. This fine as of now
+ * as SLOF only supports 32-bit variant.
+ */
+ return (hwaddr)fdt32_to_cpu(*rtas_data);
+}
+
static void core_rtas_register_types(void)
{
spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
@@ -501,6 +584,10 @@
rtas_set_power_level);
spapr_rtas_register(RTAS_GET_POWER_LEVEL, "get-power-level",
rtas_get_power_level);
+ spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
+ rtas_ibm_nmi_register);
+ spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
+ rtas_ibm_nmi_interlock);
}
type_init(core_rtas_register_types)
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index f14944e..0b085ea 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -87,6 +87,7 @@
SpaprVioDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
int vdevice_off, node_off, ret;
char *dt_name;
+ const char *dt_compatible;
vdevice_off = fdt_path_offset(fdt, "/vdevice");
if (vdevice_off < 0) {
@@ -113,9 +114,15 @@
}
}
- if (pc->dt_compatible) {
+ if (pc->get_dt_compatible) {
+ dt_compatible = pc->get_dt_compatible(dev);
+ } else {
+ dt_compatible = pc->dt_compatible;
+ }
+
+ if (dt_compatible) {
ret = fdt_setprop_string(fdt, node_off, "compatible",
- pc->dt_compatible);
+ dt_compatible);
if (ret < 0) {
return ret;
}
diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c
index 7526947..91dd00e 100644
--- a/hw/ppc/virtex_ml507.c
+++ b/hw/ppc/virtex_ml507.c
@@ -89,10 +89,7 @@
tlb->PID = 0;
}
-static PowerPCCPU *ppc440_init_xilinx(ram_addr_t *ram_size,
- int do_init,
- const char *cpu_type,
- uint32_t sysclk)
+static PowerPCCPU *ppc440_init_xilinx(const char *cpu_type, uint32_t sysclk)
{
PowerPCCPU *cpu;
CPUPPCState *env;
@@ -213,7 +210,7 @@
int i;
/* init CPUs */
- cpu = ppc440_init_xilinx(&ram_size, 1, machine->cpu_type, 400000000);
+ cpu = ppc440_init_xilinx(machine->cpu_type, 400000000);
env = &cpu->env;
if (env->mmu_model != POWERPC_MMU_BOOKE) {
diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig
index 4c8ee87..9e67d99 100644
--- a/hw/tpm/Kconfig
+++ b/hw/tpm/Kconfig
@@ -22,3 +22,9 @@
bool
default y
depends on TPMDEV
+
+config TPM_SPAPR
+ bool
+ default y
+ depends on TPM && PSERIES
+ select TPMDEV
diff --git a/hw/tpm/Makefile.objs b/hw/tpm/Makefile.objs
index de0b85d..85eb99a 100644
--- a/hw/tpm/Makefile.objs
+++ b/hw/tpm/Makefile.objs
@@ -4,3 +4,4 @@
common-obj-$(CONFIG_TPM_CRB) += tpm_crb.o
common-obj-$(CONFIG_TPM_PASSTHROUGH) += tpm_passthrough.o
common-obj-$(CONFIG_TPM_EMULATOR) += tpm_emulator.o
+obj-$(CONFIG_TPM_SPAPR) += tpm_spapr.o
diff --git a/hw/tpm/tpm_spapr.c b/hw/tpm/tpm_spapr.c
new file mode 100644
index 0000000..ce65eb2
--- /dev/null
+++ b/hw/tpm/tpm_spapr.c
@@ -0,0 +1,429 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
+ *
+ * PAPR Virtual TPM
+ *
+ * Copyright (c) 2015, 2017, 2019 IBM Corporation.
+ *
+ * Authors:
+ * Stefan Berger <stefanb@linux.vnet.ibm.com>
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
+
+#include "sysemu/tpm_backend.h"
+#include "tpm_int.h"
+#include "tpm_util.h"
+
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_vio.h"
+#include "trace.h"
+
+#define DEBUG_SPAPR 0
+
+#define VIO_SPAPR_VTPM(obj) \
+ OBJECT_CHECK(SpaprTpmState, (obj), TYPE_TPM_SPAPR)
+
+typedef struct TpmCrq {
+ uint8_t valid; /* 0x80: cmd; 0xc0: init crq */
+ /* 0x81-0x83: CRQ message response */
+ uint8_t msg; /* see below */
+ uint16_t len; /* len of TPM request; len of TPM response */
+ uint32_t data; /* rtce_dma_handle when sending TPM request */
+ uint64_t reserved;
+} TpmCrq;
+
+#define SPAPR_VTPM_VALID_INIT_CRQ_COMMAND 0xC0
+#define SPAPR_VTPM_VALID_COMMAND 0x80
+#define SPAPR_VTPM_MSG_RESULT 0x80
+
+/* msg types for valid = SPAPR_VTPM_VALID_INIT_CRQ */
+#define SPAPR_VTPM_INIT_CRQ_RESULT 0x1
+#define SPAPR_VTPM_INIT_CRQ_COMPLETE_RESULT 0x2
+
+/* msg types for valid = SPAPR_VTPM_VALID_CMD */
+#define SPAPR_VTPM_GET_VERSION 0x1
+#define SPAPR_VTPM_TPM_COMMAND 0x2
+#define SPAPR_VTPM_GET_RTCE_BUFFER_SIZE 0x3
+#define SPAPR_VTPM_PREPARE_TO_SUSPEND 0x4
+
+/* response error messages */
+#define SPAPR_VTPM_VTPM_ERROR 0xff
+
+/* error codes */
+#define SPAPR_VTPM_ERR_COPY_IN_FAILED 0x3
+#define SPAPR_VTPM_ERR_COPY_OUT_FAILED 0x4
+
+#define TPM_SPAPR_BUFFER_MAX 4096
+
+typedef struct {
+ SpaprVioDevice vdev;
+
+ TpmCrq crq; /* track single TPM command */
+
+ uint8_t state;
+#define SPAPR_VTPM_STATE_NONE 0
+#define SPAPR_VTPM_STATE_EXECUTION 1
+#define SPAPR_VTPM_STATE_COMPLETION 2
+
+ unsigned char *buffer;
+
+ uint32_t numbytes; /* number of bytes to deliver on resume */
+
+ TPMBackendCmd cmd;
+
+ TPMBackend *be_driver;
+ TPMVersion be_tpm_version;
+
+ size_t be_buffer_size;
+} SpaprTpmState;
+
+/*
+ * Send a request to the TPM.
+ */
+static void tpm_spapr_tpm_send(SpaprTpmState *s)
+{
+ if (trace_event_get_state_backends(TRACE_TPM_SPAPR_SHOW_BUFFER)) {
+ tpm_util_show_buffer(s->buffer, s->be_buffer_size, "To TPM");
+ }
+
+ s->state = SPAPR_VTPM_STATE_EXECUTION;
+ s->cmd = (TPMBackendCmd) {
+ .locty = 0,
+ .in = s->buffer,
+ .in_len = MIN(tpm_cmd_get_size(s->buffer), s->be_buffer_size),
+ .out = s->buffer,
+ .out_len = s->be_buffer_size,
+ };
+
+ tpm_backend_deliver_request(s->be_driver, &s->cmd);
+}
+
+static int tpm_spapr_process_cmd(SpaprTpmState *s, uint64_t dataptr)
+{
+ long rc;
+
+ /* a max. of be_buffer_size bytes can be transported */
+ rc = spapr_vio_dma_read(&s->vdev, dataptr,
+ s->buffer, s->be_buffer_size);
+ if (rc) {
+ error_report("tpm_spapr_got_payload: DMA read failure");
+ }
+ /* let vTPM handle any malformed request */
+ tpm_spapr_tpm_send(s);
+
+ return rc;
+}
+
+static inline int spapr_tpm_send_crq(struct SpaprVioDevice *dev, TpmCrq *crq)
+{
+ return spapr_vio_send_crq(dev, (uint8_t *)crq);
+}
+
+static int tpm_spapr_do_crq(struct SpaprVioDevice *dev, uint8_t *crq_data)
+{
+ SpaprTpmState *s = VIO_SPAPR_VTPM(dev);
+ TpmCrq local_crq;
+ TpmCrq *crq = &s->crq; /* requests only */
+ int rc;
+ uint8_t valid = crq_data[0];
+ uint8_t msg = crq_data[1];
+
+ trace_tpm_spapr_do_crq(valid, msg);
+
+ switch (valid) {
+ case SPAPR_VTPM_VALID_INIT_CRQ_COMMAND: /* Init command/response */
+
+ /* Respond to initialization request */
+ switch (msg) {
+ case SPAPR_VTPM_INIT_CRQ_RESULT:
+ trace_tpm_spapr_do_crq_crq_result();
+ memset(&local_crq, 0, sizeof(local_crq));
+ local_crq.valid = SPAPR_VTPM_VALID_INIT_CRQ_COMMAND;
+ local_crq.msg = SPAPR_VTPM_INIT_CRQ_RESULT;
+ spapr_tpm_send_crq(dev, &local_crq);
+ break;
+
+ case SPAPR_VTPM_INIT_CRQ_COMPLETE_RESULT:
+ trace_tpm_spapr_do_crq_crq_complete_result();
+ memset(&local_crq, 0, sizeof(local_crq));
+ local_crq.valid = SPAPR_VTPM_VALID_INIT_CRQ_COMMAND;
+ local_crq.msg = SPAPR_VTPM_INIT_CRQ_COMPLETE_RESULT;
+ spapr_tpm_send_crq(dev, &local_crq);
+ break;
+ }
+
+ break;
+ case SPAPR_VTPM_VALID_COMMAND: /* Payloads */
+ switch (msg) {
+ case SPAPR_VTPM_TPM_COMMAND:
+ trace_tpm_spapr_do_crq_tpm_command();
+ if (s->state == SPAPR_VTPM_STATE_EXECUTION) {
+ return H_BUSY;
+ }
+ memcpy(crq, crq_data, sizeof(*crq));
+
+ rc = tpm_spapr_process_cmd(s, be32_to_cpu(crq->data));
+
+ if (rc == H_SUCCESS) {
+ crq->valid = be16_to_cpu(0);
+ } else {
+ local_crq.valid = SPAPR_VTPM_MSG_RESULT;
+ local_crq.msg = SPAPR_VTPM_VTPM_ERROR;
+ local_crq.len = cpu_to_be16(0);
+ local_crq.data = cpu_to_be32(SPAPR_VTPM_ERR_COPY_IN_FAILED);
+ spapr_tpm_send_crq(dev, &local_crq);
+ }
+ break;
+
+ case SPAPR_VTPM_GET_RTCE_BUFFER_SIZE:
+ trace_tpm_spapr_do_crq_tpm_get_rtce_buffer_size(s->be_buffer_size);
+ local_crq.valid = SPAPR_VTPM_VALID_COMMAND;
+ local_crq.msg = SPAPR_VTPM_GET_RTCE_BUFFER_SIZE |
+ SPAPR_VTPM_MSG_RESULT;
+ local_crq.len = cpu_to_be16(s->be_buffer_size);
+ spapr_tpm_send_crq(dev, &local_crq);
+ break;
+
+ case SPAPR_VTPM_GET_VERSION:
+ local_crq.valid = SPAPR_VTPM_VALID_COMMAND;
+ local_crq.msg = SPAPR_VTPM_GET_VERSION | SPAPR_VTPM_MSG_RESULT;
+ local_crq.len = cpu_to_be16(0);
+ switch (s->be_tpm_version) {
+ case TPM_VERSION_1_2:
+ local_crq.data = cpu_to_be32(1);
+ break;
+ case TPM_VERSION_2_0:
+ local_crq.data = cpu_to_be32(2);
+ break;
+ default:
+ g_assert_not_reached();
+ break;
+ }
+ trace_tpm_spapr_do_crq_get_version(be32_to_cpu(local_crq.data));
+ spapr_tpm_send_crq(dev, &local_crq);
+ break;
+
+ case SPAPR_VTPM_PREPARE_TO_SUSPEND:
+ trace_tpm_spapr_do_crq_prepare_to_suspend();
+ local_crq.valid = SPAPR_VTPM_VALID_COMMAND;
+ local_crq.msg = SPAPR_VTPM_PREPARE_TO_SUSPEND |
+ SPAPR_VTPM_MSG_RESULT;
+ spapr_tpm_send_crq(dev, &local_crq);
+ break;
+
+ default:
+ trace_tpm_spapr_do_crq_unknown_msg_type(crq->msg);
+ }
+ break;
+ default:
+ trace_tpm_spapr_do_crq_unknown_crq(valid, msg);
+ };
+
+ return H_SUCCESS;
+}
+
+static void tpm_spapr_request_completed(TPMIf *ti, int ret)
+{
+ SpaprTpmState *s = VIO_SPAPR_VTPM(ti);
+ TpmCrq *crq = &s->crq;
+ uint32_t len;
+ int rc;
+
+ s->state = SPAPR_VTPM_STATE_COMPLETION;
+
+ /* a max. of be_buffer_size bytes can be transported */
+ len = MIN(tpm_cmd_get_size(s->buffer), s->be_buffer_size);
+
+ if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
+ trace_tpm_spapr_caught_response(len);
+ /* defer delivery of response until .post_load */
+ s->numbytes = len;
+ return;
+ }
+
+ rc = spapr_vio_dma_write(&s->vdev, be32_to_cpu(crq->data),
+ s->buffer, len);
+
+ if (trace_event_get_state_backends(TRACE_TPM_SPAPR_SHOW_BUFFER)) {
+ tpm_util_show_buffer(s->buffer, len, "From TPM");
+ }
+
+ crq->valid = SPAPR_VTPM_MSG_RESULT;
+ if (rc == H_SUCCESS) {
+ crq->msg = SPAPR_VTPM_TPM_COMMAND | SPAPR_VTPM_MSG_RESULT;
+ crq->len = cpu_to_be16(len);
+ } else {
+ error_report("%s: DMA write failure", __func__);
+ crq->msg = SPAPR_VTPM_VTPM_ERROR;
+ crq->len = cpu_to_be16(0);
+ crq->data = cpu_to_be32(SPAPR_VTPM_ERR_COPY_OUT_FAILED);
+ }
+
+ rc = spapr_tpm_send_crq(&s->vdev, crq);
+ if (rc) {
+ error_report("%s: Error sending response", __func__);
+ }
+}
+
+static int tpm_spapr_do_startup_tpm(SpaprTpmState *s, size_t buffersize)
+{
+ return tpm_backend_startup_tpm(s->be_driver, buffersize);
+}
+
+static const char *tpm_spapr_get_dt_compatible(SpaprVioDevice *dev)
+{
+ SpaprTpmState *s = VIO_SPAPR_VTPM(dev);
+
+ switch (s->be_tpm_version) {
+ case TPM_VERSION_1_2:
+ return "IBM,vtpm";
+ case TPM_VERSION_2_0:
+ return "IBM,vtpm20";
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void tpm_spapr_reset(SpaprVioDevice *dev)
+{
+ SpaprTpmState *s = VIO_SPAPR_VTPM(dev);
+
+ s->state = SPAPR_VTPM_STATE_NONE;
+ s->numbytes = 0;
+
+ s->be_tpm_version = tpm_backend_get_tpm_version(s->be_driver);
+
+ s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->be_driver),
+ TPM_SPAPR_BUFFER_MAX);
+
+ tpm_backend_reset(s->be_driver);
+ tpm_spapr_do_startup_tpm(s, s->be_buffer_size);
+}
+
+static enum TPMVersion tpm_spapr_get_version(TPMIf *ti)
+{
+ SpaprTpmState *s = VIO_SPAPR_VTPM(ti);
+
+ if (tpm_backend_had_startup_error(s->be_driver)) {
+ return TPM_VERSION_UNSPEC;
+ }
+
+ return tpm_backend_get_tpm_version(s->be_driver);
+}
+
+/* persistent state handling */
+
+static int tpm_spapr_pre_save(void *opaque)
+{
+ SpaprTpmState *s = opaque;
+
+ tpm_backend_finish_sync(s->be_driver);
+ /*
+ * we cannot deliver the results to the VM since DMA would touch VM memory
+ */
+
+ return 0;
+}
+
+static int tpm_spapr_post_load(void *opaque, int version_id)
+{
+ SpaprTpmState *s = opaque;
+
+ if (s->numbytes) {
+ trace_tpm_spapr_post_load();
+ /* deliver the results to the VM via DMA */
+ tpm_spapr_request_completed(TPM_IF(s), 0);
+ s->numbytes = 0;
+ }
+
+ return 0;
+}
+
+static const VMStateDescription vmstate_spapr_vtpm = {
+ .name = "tpm-spapr",
+ .pre_save = tpm_spapr_pre_save,
+ .post_load = tpm_spapr_post_load,
+ .fields = (VMStateField[]) {
+ VMSTATE_SPAPR_VIO(vdev, SpaprTpmState),
+
+ VMSTATE_UINT8(state, SpaprTpmState),
+ VMSTATE_UINT32(numbytes, SpaprTpmState),
+ VMSTATE_VBUFFER_UINT32(buffer, SpaprTpmState, 0, NULL, numbytes),
+ /* remember DMA address */
+ VMSTATE_UINT32(crq.data, SpaprTpmState),
+ VMSTATE_END_OF_LIST(),
+ }
+};
+
+static Property tpm_spapr_properties[] = {
+ DEFINE_SPAPR_PROPERTIES(SpaprTpmState, vdev),
+ DEFINE_PROP_TPMBE("tpmdev", SpaprTpmState, be_driver),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void tpm_spapr_realizefn(SpaprVioDevice *dev, Error **errp)
+{
+ SpaprTpmState *s = VIO_SPAPR_VTPM(dev);
+
+ if (!tpm_find()) {
+ error_setg(errp, "at most one TPM device is permitted");
+ return;
+ }
+
+ dev->crq.SendFunc = tpm_spapr_do_crq;
+
+ if (!s->be_driver) {
+ error_setg(errp, "'tpmdev' property is required");
+ return;
+ }
+ s->buffer = g_malloc(TPM_SPAPR_BUFFER_MAX);
+}
+
+static void tpm_spapr_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ SpaprVioDeviceClass *k = VIO_SPAPR_DEVICE_CLASS(klass);
+ TPMIfClass *tc = TPM_IF_CLASS(klass);
+
+ k->realize = tpm_spapr_realizefn;
+ k->reset = tpm_spapr_reset;
+ k->dt_name = "vtpm";
+ k->dt_type = "IBM,vtpm";
+ k->get_dt_compatible = tpm_spapr_get_dt_compatible;
+ k->signal_mask = 0x00000001;
+ set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+ device_class_set_props(dc, tpm_spapr_properties);
+ k->rtce_window_size = 0x10000000;
+ dc->vmsd = &vmstate_spapr_vtpm;
+
+ tc->model = TPM_MODEL_TPM_SPAPR;
+ tc->get_version = tpm_spapr_get_version;
+ tc->request_completed = tpm_spapr_request_completed;
+}
+
+static const TypeInfo tpm_spapr_info = {
+ .name = TYPE_TPM_SPAPR,
+ .parent = TYPE_VIO_SPAPR_DEVICE,
+ .instance_size = sizeof(SpaprTpmState),
+ .class_init = tpm_spapr_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_TPM_IF },
+ { }
+ }
+};
+
+static void tpm_spapr_register_types(void)
+{
+ type_register_static(&tpm_spapr_info);
+}
+
+type_init(tpm_spapr_register_types)
diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c
index 5362df2..31facb8 100644
--- a/hw/tpm/tpm_tis.c
+++ b/hw/tpm/tpm_tis.c
@@ -107,30 +107,6 @@
return (uint8_t)((addr >> TPM_TIS_LOCALITY_SHIFT) & 0x7);
}
-static void tpm_tis_show_buffer(const unsigned char *buffer,
- size_t buffer_size, const char *string)
-{
- size_t len, i;
- char *line_buffer, *p;
-
- len = MIN(tpm_cmd_get_size(buffer), buffer_size);
-
- /*
- * allocate enough room for 3 chars per buffer entry plus a
- * newline after every 16 chars and a final null terminator.
- */
- line_buffer = g_malloc(len * 3 + (len / 16) + 1);
-
- for (i = 0, p = line_buffer; i < len; i++) {
- if (i && !(i % 16)) {
- p += sprintf(p, "\n");
- }
- p += sprintf(p, "%.2X ", buffer[i]);
- }
- trace_tpm_tis_show_buffer(string, len, line_buffer);
-
- g_free(line_buffer);
-}
/*
* Set the given flags in the STS register by clearing the register but
@@ -156,8 +132,8 @@
*/
static void tpm_tis_tpm_send(TPMState *s, uint8_t locty)
{
- if (trace_event_get_state_backends(TRACE_TPM_TIS_SHOW_BUFFER)) {
- tpm_tis_show_buffer(s->buffer, s->be_buffer_size, "To TPM");
+ if (trace_event_get_state_backends(TRACE_TPM_UTIL_SHOW_BUFFER)) {
+ tpm_util_show_buffer(s->buffer, s->be_buffer_size, "To TPM");
}
/*
@@ -325,8 +301,8 @@
s->loc[locty].state = TPM_TIS_STATE_COMPLETION;
s->rw_offset = 0;
- if (trace_event_get_state_backends(TRACE_TPM_TIS_SHOW_BUFFER)) {
- tpm_tis_show_buffer(s->buffer, s->be_buffer_size, "From TPM");
+ if (trace_event_get_state_backends(TRACE_TPM_UTIL_SHOW_BUFFER)) {
+ tpm_util_show_buffer(s->buffer, s->be_buffer_size, "From TPM");
}
if (TPM_TIS_IS_VALID_LOCTY(s->next_locty)) {
diff --git a/hw/tpm/tpm_util.c b/hw/tpm/tpm_util.c
index 62b091f..c0a0f3d 100644
--- a/hw/tpm/tpm_util.c
+++ b/hw/tpm/tpm_util.c
@@ -350,3 +350,28 @@
tsb->buffer = NULL;
tsb->size = 0;
}
+
+void tpm_util_show_buffer(const unsigned char *buffer,
+ size_t buffer_size, const char *string)
+{
+ size_t len, i;
+ char *line_buffer, *p;
+
+ len = MIN(tpm_cmd_get_size(buffer), buffer_size);
+
+ /*
+ * allocate enough room for 3 chars per buffer entry plus a
+ * newline after every 16 chars and a final null terminator.
+ */
+ line_buffer = g_malloc(len * 3 + (len / 16) + 1);
+
+ for (i = 0, p = line_buffer; i < len; i++) {
+ if (i && !(i % 16)) {
+ p += sprintf(p, "\n");
+ }
+ p += sprintf(p, "%.2X ", buffer[i]);
+ }
+ trace_tpm_util_show_buffer(string, len, line_buffer);
+
+ g_free(line_buffer);
+}
diff --git a/hw/tpm/tpm_util.h b/hw/tpm/tpm_util.h
index f397ac2..7889081 100644
--- a/hw/tpm/tpm_util.h
+++ b/hw/tpm/tpm_util.h
@@ -79,4 +79,7 @@
void tpm_sized_buffer_reset(TPMSizedBuffer *tsb);
+void tpm_util_show_buffer(const unsigned char *buffer,
+ size_t buffer_size, const char *string);
+
#endif /* TPM_TPM_UTIL_H */
diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events
index 89804bc..439e514 100644
--- a/hw/tpm/trace-events
+++ b/hw/tpm/trace-events
@@ -14,6 +14,7 @@
tpm_util_get_buffer_size_hdr_len2(uint32_t len, size_t expected) "tpm2_resp->hdr.len = %u, expected = %zu"
tpm_util_get_buffer_size_len2(uint32_t len, size_t expected) "tpm2_resp->len = %u, expected = %zu"
tpm_util_get_buffer_size(size_t len) "buffersize of device: %zu"
+tpm_util_show_buffer(const char *direction, size_t len, const char *buf) "direction: %s len: %zu\n%s"
# tpm_emulator.c
tpm_emulator_set_locality(uint8_t locty) "setting locality to %d"
@@ -36,7 +37,6 @@
tpm_emulator_inst_init(void) ""
# tpm_tis.c
-tpm_tis_show_buffer(const char *direction, size_t len, const char *buf) "direction: %s len: %zu\nbuf: %s"
tpm_tis_raise_irq(uint32_t irqmask) "Raising IRQ for flag 0x%08x"
tpm_tis_new_active_locality(uint8_t locty) "Active locality is now %d"
tpm_tis_abort(uint8_t locty) "New active locality is %d"
@@ -55,3 +55,17 @@
# tpm_ppi.c
tpm_ppi_memset(uint8_t *ptr, size_t size) "memset: %p %zu"
+
+# hw/tpm/tpm_spapr.c
+tpm_spapr_show_buffer(const char *direction, size_t len, const char *buf) "direction: %s len: %zu\n%s"
+tpm_spapr_do_crq(uint8_t raw1, uint8_t raw2) "1st 2 bytes in CRQ: 0x%02x 0x%02x"
+tpm_spapr_do_crq_crq_result(void) "SPAPR_VTPM_INIT_CRQ_RESULT"
+tpm_spapr_do_crq_crq_complete_result(void) "SPAPR_VTPM_INIT_CRQ_COMP_RESULT"
+tpm_spapr_do_crq_tpm_command(void) "got TPM command payload"
+tpm_spapr_do_crq_tpm_get_rtce_buffer_size(size_t buffersize) "response: buffer size is %zu"
+tpm_spapr_do_crq_get_version(uint32_t version) "response: version %u"
+tpm_spapr_do_crq_prepare_to_suspend(void) "response: preparing to suspend"
+tpm_spapr_do_crq_unknown_msg_type(uint8_t type) "Unknown message type 0x%02x"
+tpm_spapr_do_crq_unknown_crq(uint8_t raw1, uint8_t raw2) "unknown CRQ 0x%02x 0x%02x ..."
+tpm_spapr_post_load(void) "Delivering TPM response after resume"
+tpm_spapr_caught_response(uint32_t v) "Caught response to deliver after resume: %u bytes"
diff --git a/include/hw/pci-host/pnv_phb3.h b/include/hw/pci-host/pnv_phb3.h
new file mode 100644
index 0000000..75b7878
--- /dev/null
+++ b/include/hw/pci-host/pnv_phb3.h
@@ -0,0 +1,164 @@
+/*
+ * QEMU PowerPC PowerNV (POWER8) PHB3 model
+ *
+ * Copyright (c) 2014-2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef PCI_HOST_PNV_PHB3_H
+#define PCI_HOST_PNV_PHB3_H
+
+#include "hw/pci/pcie_host.h"
+#include "hw/pci/pcie_port.h"
+#include "hw/ppc/xics.h"
+
+typedef struct PnvPHB3 PnvPHB3;
+
+/*
+ * PHB3 XICS Source for MSIs
+ */
+#define TYPE_PHB3_MSI "phb3-msi"
+#define PHB3_MSI(obj) OBJECT_CHECK(Phb3MsiState, (obj), TYPE_PHB3_MSI)
+
+#define PHB3_MAX_MSI 2048
+
+typedef struct Phb3MsiState {
+ ICSState ics;
+ qemu_irq *qirqs;
+
+ PnvPHB3 *phb;
+ uint64_t rba[PHB3_MAX_MSI / 64];
+ uint32_t rba_sum;
+} Phb3MsiState;
+
+void pnv_phb3_msi_update_config(Phb3MsiState *msis, uint32_t base,
+ uint32_t count);
+void pnv_phb3_msi_send(Phb3MsiState *msis, uint64_t addr, uint16_t data,
+ int32_t dev_pe);
+void pnv_phb3_msi_ffi(Phb3MsiState *msis, uint64_t val);
+void pnv_phb3_msi_pic_print_info(Phb3MsiState *msis, Monitor *mon);
+
+
+/*
+ * We have one such address space wrapper per possible device under
+ * the PHB since they need to be assigned statically at qemu device
+ * creation time. The relationship to a PE is done later dynamically.
+ * This means we can potentially create a lot of these guys. Q35
+ * stores them as some kind of radix tree but we never really need to
+ * do fast lookups so instead we simply keep a QLIST of them for now,
+ * we can add the radix if needed later on.
+ *
+ * We do cache the PE number to speed things up a bit though.
+ */
+typedef struct PnvPhb3DMASpace {
+ PCIBus *bus;
+ uint8_t devfn;
+ int pe_num; /* Cached PE number */
+#define PHB_INVALID_PE (-1)
+ PnvPHB3 *phb;
+ AddressSpace dma_as;
+ IOMMUMemoryRegion dma_mr;
+ MemoryRegion msi32_mr;
+ MemoryRegion msi64_mr;
+ QLIST_ENTRY(PnvPhb3DMASpace) list;
+} PnvPhb3DMASpace;
+
+/*
+ * PHB3 Power Bus Common Queue
+ */
+#define TYPE_PNV_PBCQ "pnv-pbcq"
+#define PNV_PBCQ(obj) OBJECT_CHECK(PnvPBCQState, (obj), TYPE_PNV_PBCQ)
+
+typedef struct PnvPBCQState {
+ DeviceState parent;
+
+ uint32_t nest_xbase;
+ uint32_t spci_xbase;
+ uint32_t pci_xbase;
+#define PBCQ_NEST_REGS_COUNT 0x46
+#define PBCQ_PCI_REGS_COUNT 0x15
+#define PBCQ_SPCI_REGS_COUNT 0x5
+
+ uint64_t nest_regs[PBCQ_NEST_REGS_COUNT];
+ uint64_t spci_regs[PBCQ_SPCI_REGS_COUNT];
+ uint64_t pci_regs[PBCQ_PCI_REGS_COUNT];
+ MemoryRegion mmbar0;
+ MemoryRegion mmbar1;
+ MemoryRegion phbbar;
+ uint64_t mmio0_base;
+ uint64_t mmio0_size;
+ uint64_t mmio1_base;
+ uint64_t mmio1_size;
+ PnvPHB3 *phb;
+
+ MemoryRegion xscom_nest_regs;
+ MemoryRegion xscom_pci_regs;
+ MemoryRegion xscom_spci_regs;
+} PnvPBCQState;
+
+/*
+ * PHB3 PCIe Root port
+ */
+#define TYPE_PNV_PHB3_ROOT_BUS "pnv-phb3-root-bus"
+
+#define TYPE_PNV_PHB3_ROOT_PORT "pnv-phb3-root-port"
+
+typedef struct PnvPHB3RootPort {
+ PCIESlot parent_obj;
+} PnvPHB3RootPort;
+
+/*
+ * PHB3 PCIe Host Bridge for PowerNV machines (POWER8)
+ */
+#define TYPE_PNV_PHB3 "pnv-phb3"
+#define PNV_PHB3(obj) OBJECT_CHECK(PnvPHB3, (obj), TYPE_PNV_PHB3)
+
+#define PNV_PHB3_NUM_M64 16
+#define PNV_PHB3_NUM_REGS (0x1000 >> 3)
+#define PNV_PHB3_NUM_LSI 8
+#define PNV_PHB3_NUM_PE 256
+
+#define PCI_MMIO_TOTAL_SIZE (0x1ull << 60)
+
+struct PnvPHB3 {
+ PCIExpressHost parent_obj;
+
+ uint32_t chip_id;
+ uint32_t phb_id;
+ char bus_path[8];
+
+ uint64_t regs[PNV_PHB3_NUM_REGS];
+ MemoryRegion mr_regs;
+
+ MemoryRegion mr_m32;
+ MemoryRegion mr_m64[PNV_PHB3_NUM_M64];
+ MemoryRegion pci_mmio;
+ MemoryRegion pci_io;
+
+ uint64_t ioda_LIST[8];
+ uint64_t ioda_LXIVT[8];
+ uint64_t ioda_TVT[512];
+ uint64_t ioda_M64BT[16];
+ uint64_t ioda_MDT[256];
+ uint64_t ioda_PEEV[4];
+
+ uint32_t total_irq;
+ ICSState lsis;
+ qemu_irq *qirqs;
+ Phb3MsiState msis;
+
+ PnvPBCQState pbcq;
+
+ PnvPHB3RootPort root;
+
+ QLIST_HEAD(, PnvPhb3DMASpace) dma_spaces;
+};
+
+uint64_t pnv_phb3_reg_read(void *opaque, hwaddr off, unsigned size);
+void pnv_phb3_reg_write(void *opaque, hwaddr off, uint64_t val, unsigned size);
+void pnv_phb3_update_regions(PnvPHB3 *phb);
+void pnv_phb3_remap_irqs(PnvPHB3 *phb);
+
+#endif /* PCI_HOST_PNV_PHB3_H */
diff --git a/include/hw/pci-host/pnv_phb3_regs.h b/include/hw/pci-host/pnv_phb3_regs.h
new file mode 100644
index 0000000..a174ef1
--- /dev/null
+++ b/include/hw/pci-host/pnv_phb3_regs.h
@@ -0,0 +1,450 @@
+/*
+ * QEMU PowerPC PowerNV (POWER8) PHB3 model
+ *
+ * Copyright (c) 2013-2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef PCI_HOST_PNV_PHB3_REGS_H
+#define PCI_HOST_PNV_PHB3_REGS_H
+
+#include "qemu/host-utils.h"
+
+/*
+ * QEMU version of the GETFIELD/SETFIELD macros
+ *
+ * These are common with the PnvXive model.
+ */
+static inline uint64_t GETFIELD(uint64_t mask, uint64_t word)
+{
+ return (word & mask) >> ctz64(mask);
+}
+
+static inline uint64_t SETFIELD(uint64_t mask, uint64_t word,
+ uint64_t value)
+{
+ return (word & ~mask) | ((value << ctz64(mask)) & mask);
+}
+
+/*
+ * PBCQ XSCOM registers
+ */
+
+#define PBCQ_NEST_IRSN_COMPARE 0x1a
+#define PBCQ_NEST_IRSN_COMP PPC_BITMASK(0, 18)
+#define PBCQ_NEST_IRSN_MASK 0x1b
+#define PBCQ_NEST_LSI_SRC_ID 0x1f
+#define PBCQ_NEST_LSI_SRC PPC_BITMASK(0, 7)
+#define PBCQ_NEST_REGS_COUNT 0x46
+#define PBCQ_NEST_MMIO_BAR0 0x40
+#define PBCQ_NEST_MMIO_BAR1 0x41
+#define PBCQ_NEST_PHB_BAR 0x42
+#define PBCQ_NEST_MMIO_MASK0 0x43
+#define PBCQ_NEST_MMIO_MASK1 0x44
+#define PBCQ_NEST_BAR_EN 0x45
+#define PBCQ_NEST_BAR_EN_MMIO0 PPC_BIT(0)
+#define PBCQ_NEST_BAR_EN_MMIO1 PPC_BIT(1)
+#define PBCQ_NEST_BAR_EN_PHB PPC_BIT(2)
+#define PBCQ_NEST_BAR_EN_IRSN_RX PPC_BIT(3)
+#define PBCQ_NEST_BAR_EN_IRSN_TX PPC_BIT(4)
+
+#define PBCQ_PCI_REGS_COUNT 0x15
+#define PBCQ_PCI_BAR2 0x0b
+
+#define PBCQ_SPCI_REGS_COUNT 0x5
+#define PBCQ_SPCI_ASB_ADDR 0x0
+#define PBCQ_SPCI_ASB_STATUS 0x1
+#define PBCQ_SPCI_ASB_DATA 0x2
+#define PBCQ_SPCI_AIB_CAPP_EN 0x3
+#define PBCQ_SPCI_CAPP_SEC_TMR 0x4
+
+/*
+ * PHB MMIO registers
+ */
+
+/* PHB Fundamental register set A */
+#define PHB_LSI_SOURCE_ID 0x100
+#define PHB_LSI_SRC_ID PPC_BITMASK(5, 12)
+#define PHB_DMA_CHAN_STATUS 0x110
+#define PHB_DMA_CHAN_ANY_ERR PPC_BIT(27)
+#define PHB_DMA_CHAN_ANY_ERR1 PPC_BIT(28)
+#define PHB_DMA_CHAN_ANY_FREEZE PPC_BIT(29)
+#define PHB_CPU_LOADSTORE_STATUS 0x120
+#define PHB_CPU_LS_ANY_ERR PPC_BIT(27)
+#define PHB_CPU_LS_ANY_ERR1 PPC_BIT(28)
+#define PHB_CPU_LS_ANY_FREEZE PPC_BIT(29)
+#define PHB_DMA_MSI_NODE_ID 0x128
+#define PHB_DMAMSI_NID_FIXED PPC_BIT(0)
+#define PHB_DMAMSI_NID PPC_BITMASK(24, 31)
+#define PHB_CONFIG_DATA 0x130
+#define PHB_LOCK0 0x138
+#define PHB_CONFIG_ADDRESS 0x140
+#define PHB_CA_ENABLE PPC_BIT(0)
+#define PHB_CA_BUS PPC_BITMASK(4, 11)
+#define PHB_CA_DEV PPC_BITMASK(12, 16)
+#define PHB_CA_FUNC PPC_BITMASK(17, 19)
+#define PHB_CA_REG PPC_BITMASK(20, 31)
+#define PHB_CA_PE PPC_BITMASK(40, 47)
+#define PHB_LOCK1 0x148
+#define PHB_IVT_BAR 0x150
+#define PHB_IVT_BAR_ENABLE PPC_BIT(0)
+#define PHB_IVT_BASE_ADDRESS_MASK PPC_BITMASK(14, 48)
+#define PHB_IVT_LENGTH_MASK PPC_BITMASK(52, 63)
+#define PHB_RBA_BAR 0x158
+#define PHB_RBA_BAR_ENABLE PPC_BIT(0)
+#define PHB_RBA_BASE_ADDRESS PPC_BITMASK(14, 55)
+#define PHB_PHB3_CONFIG 0x160
+#define PHB_PHB3C_64B_TCE_EN PPC_BIT(2)
+#define PHB_PHB3C_32BIT_MSI_EN PPC_BIT(8)
+#define PHB_PHB3C_64BIT_MSI_EN PPC_BIT(14)
+#define PHB_PHB3C_M32_EN PPC_BIT(16)
+#define PHB_RTT_BAR 0x168
+#define PHB_RTT_BAR_ENABLE PPC_BIT(0)
+#define PHB_RTT_BASE_ADDRESS_MASK PPC_BITMASK(14, 46)
+#define PHB_PELTV_BAR 0x188
+#define PHB_PELTV_BAR_ENABLE PPC_BIT(0)
+#define PHB_PELTV_BASE_ADDRESS PPC_BITMASK(14, 50)
+#define PHB_M32_BASE_ADDR 0x190
+#define PHB_M32_BASE_MASK 0x198
+#define PHB_M32_START_ADDR 0x1a0
+#define PHB_PEST_BAR 0x1a8
+#define PHB_PEST_BAR_ENABLE PPC_BIT(0)
+#define PHB_PEST_BASE_ADDRESS PPC_BITMASK(14, 51)
+#define PHB_M64_UPPER_BITS 0x1f0
+#define PHB_INTREP_TIMER 0x1f8
+#define PHB_DMARD_SYNC 0x200
+#define PHB_DMARD_SYNC_START PPC_BIT(0)
+#define PHB_DMARD_SYNC_COMPLETE PPC_BIT(1)
+#define PHB_RTC_INVALIDATE 0x208
+#define PHB_RTC_INVALIDATE_ALL PPC_BIT(0)
+#define PHB_RTC_INVALIDATE_RID PPC_BITMASK(16, 31)
+#define PHB_TCE_KILL 0x210
+#define PHB_TCE_KILL_ALL PPC_BIT(0)
+#define PHB_TCE_SPEC_CTL 0x218
+#define PHB_IODA_ADDR 0x220
+#define PHB_IODA_AD_AUTOINC PPC_BIT(0)
+#define PHB_IODA_AD_TSEL PPC_BITMASK(11, 15)
+#define PHB_IODA_AD_TADR PPC_BITMASK(55, 63)
+#define PHB_IODA_DATA0 0x228
+#define PHB_FFI_REQUEST 0x238
+#define PHB_FFI_LOCK_CLEAR PPC_BIT(3)
+#define PHB_FFI_REQUEST_ISN PPC_BITMASK(49, 59)
+#define PHB_FFI_LOCK 0x240
+#define PHB_FFI_LOCK_STATE PPC_BIT(0)
+#define PHB_XIVE_UPDATE 0x248 /* Broken in DD1 */
+#define PHB_PHB3_GEN_CAP 0x250
+#define PHB_PHB3_TCE_CAP 0x258
+#define PHB_PHB3_IRQ_CAP 0x260
+#define PHB_PHB3_EEH_CAP 0x268
+#define PHB_IVC_INVALIDATE 0x2a0
+#define PHB_IVC_INVALIDATE_ALL PPC_BIT(0)
+#define PHB_IVC_INVALIDATE_SID PPC_BITMASK(16, 31)
+#define PHB_IVC_UPDATE 0x2a8
+#define PHB_IVC_UPDATE_ENABLE_P PPC_BIT(0)
+#define PHB_IVC_UPDATE_ENABLE_Q PPC_BIT(1)
+#define PHB_IVC_UPDATE_ENABLE_SERVER PPC_BIT(2)
+#define PHB_IVC_UPDATE_ENABLE_PRI PPC_BIT(3)
+#define PHB_IVC_UPDATE_ENABLE_GEN PPC_BIT(4)
+#define PHB_IVC_UPDATE_ENABLE_CON PPC_BIT(5)
+#define PHB_IVC_UPDATE_GEN_MATCH PPC_BITMASK(6, 7)
+#define PHB_IVC_UPDATE_SERVER PPC_BITMASK(8, 23)
+#define PHB_IVC_UPDATE_PRI PPC_BITMASK(24, 31)
+#define PHB_IVC_UPDATE_GEN PPC_BITMASK(32, 33)
+#define PHB_IVC_UPDATE_P PPC_BITMASK(34, 34)
+#define PHB_IVC_UPDATE_Q PPC_BITMASK(35, 35)
+#define PHB_IVC_UPDATE_SID PPC_BITMASK(48, 63)
+#define PHB_PAPR_ERR_INJ_CTL 0x2b0
+#define PHB_PAPR_ERR_INJ_CTL_INB PPC_BIT(0)
+#define PHB_PAPR_ERR_INJ_CTL_OUTB PPC_BIT(1)
+#define PHB_PAPR_ERR_INJ_CTL_STICKY PPC_BIT(2)
+#define PHB_PAPR_ERR_INJ_CTL_CFG PPC_BIT(3)
+#define PHB_PAPR_ERR_INJ_CTL_RD PPC_BIT(4)
+#define PHB_PAPR_ERR_INJ_CTL_WR PPC_BIT(5)
+#define PHB_PAPR_ERR_INJ_CTL_FREEZE PPC_BIT(6)
+#define PHB_PAPR_ERR_INJ_ADDR 0x2b8
+#define PHB_PAPR_ERR_INJ_ADDR_MMIO PPC_BITMASK(16, 63)
+#define PHB_PAPR_ERR_INJ_MASK 0x2c0
+#define PHB_PAPR_ERR_INJ_MASK_CFG PPC_BITMASK(4, 11)
+#define PHB_PAPR_ERR_INJ_MASK_MMIO PPC_BITMASK(16, 63)
+#define PHB_ETU_ERR_SUMMARY 0x2c8
+
+/* UTL registers */
+#define UTL_SYS_BUS_CONTROL 0x400
+#define UTL_STATUS 0x408
+#define UTL_SYS_BUS_AGENT_STATUS 0x410
+#define UTL_SYS_BUS_AGENT_ERR_SEVERITY 0x418
+#define UTL_SYS_BUS_AGENT_IRQ_EN 0x420
+#define UTL_SYS_BUS_BURST_SZ_CONF 0x440
+#define UTL_REVISION_ID 0x448
+#define UTL_BCLK_DOMAIN_DBG1 0x460
+#define UTL_BCLK_DOMAIN_DBG2 0x468
+#define UTL_BCLK_DOMAIN_DBG3 0x470
+#define UTL_BCLK_DOMAIN_DBG4 0x478
+#define UTL_BCLK_DOMAIN_DBG5 0x480
+#define UTL_BCLK_DOMAIN_DBG6 0x488
+#define UTL_OUT_POST_HDR_BUF_ALLOC 0x4c0
+#define UTL_OUT_POST_DAT_BUF_ALLOC 0x4d0
+#define UTL_IN_POST_HDR_BUF_ALLOC 0x4e0
+#define UTL_IN_POST_DAT_BUF_ALLOC 0x4f0
+#define UTL_OUT_NP_BUF_ALLOC 0x500
+#define UTL_IN_NP_BUF_ALLOC 0x510
+#define UTL_PCIE_TAGS_ALLOC 0x520
+#define UTL_GBIF_READ_TAGS_ALLOC 0x530
+#define UTL_PCIE_PORT_CONTROL 0x540
+#define UTL_PCIE_PORT_STATUS 0x548
+#define UTL_PCIE_PORT_ERROR_SEV 0x550
+#define UTL_PCIE_PORT_IRQ_EN 0x558
+#define UTL_RC_STATUS 0x560
+#define UTL_RC_ERR_SEVERITY 0x568
+#define UTL_RC_IRQ_EN 0x570
+#define UTL_EP_STATUS 0x578
+#define UTL_EP_ERR_SEVERITY 0x580
+#define UTL_EP_ERR_IRQ_EN 0x588
+#define UTL_PCI_PM_CTRL1 0x590
+#define UTL_PCI_PM_CTRL2 0x598
+#define UTL_GP_CTL1 0x5a0
+#define UTL_GP_CTL2 0x5a8
+#define UTL_PCLK_DOMAIN_DBG1 0x5b0
+#define UTL_PCLK_DOMAIN_DBG2 0x5b8
+#define UTL_PCLK_DOMAIN_DBG3 0x5c0
+#define UTL_PCLK_DOMAIN_DBG4 0x5c8
+
+/* PCI-E Stack registers */
+#define PHB_PCIE_SYSTEM_CONFIG 0x600
+#define PHB_PCIE_BUS_NUMBER 0x608
+#define PHB_PCIE_SYSTEM_TEST 0x618
+#define PHB_PCIE_LINK_MANAGEMENT 0x630
+#define PHB_PCIE_LM_LINK_ACTIVE PPC_BIT(8)
+#define PHB_PCIE_DLP_TRAIN_CTL 0x640
+#define PHB_PCIE_DLP_TCTX_DISABLE PPC_BIT(1)
+#define PHB_PCIE_DLP_TCRX_DISABLED PPC_BIT(16)
+#define PHB_PCIE_DLP_INBAND_PRESENCE PPC_BIT(19)
+#define PHB_PCIE_DLP_TC_DL_LINKUP PPC_BIT(21)
+#define PHB_PCIE_DLP_TC_DL_PGRESET PPC_BIT(22)
+#define PHB_PCIE_DLP_TC_DL_LINKACT PPC_BIT(23)
+#define PHB_PCIE_SLOP_LOOPBACK_STATUS 0x648
+#define PHB_PCIE_SYS_LINK_INIT 0x668
+#define PHB_PCIE_UTL_CONFIG 0x670
+#define PHB_PCIE_DLP_CONTROL 0x678
+#define PHB_PCIE_UTL_ERRLOG1 0x680
+#define PHB_PCIE_UTL_ERRLOG2 0x688
+#define PHB_PCIE_UTL_ERRLOG3 0x690
+#define PHB_PCIE_UTL_ERRLOG4 0x698
+#define PHB_PCIE_DLP_ERRLOG1 0x6a0
+#define PHB_PCIE_DLP_ERRLOG2 0x6a8
+#define PHB_PCIE_DLP_ERR_STATUS 0x6b0
+#define PHB_PCIE_DLP_ERR_COUNTERS 0x6b8
+#define PHB_PCIE_UTL_ERR_INJECT 0x6c0
+#define PHB_PCIE_TLDLP_ERR_INJECT 0x6c8
+#define PHB_PCIE_LANE_EQ_CNTL0 0x6d0
+#define PHB_PCIE_LANE_EQ_CNTL1 0x6d8
+#define PHB_PCIE_LANE_EQ_CNTL2 0x6e0
+#define PHB_PCIE_LANE_EQ_CNTL3 0x6e8
+#define PHB_PCIE_STRAPPING 0x700
+
+/* Fundamental register set B */
+#define PHB_VERSION 0x800
+#define PHB_RESET 0x808
+#define PHB_CONTROL 0x810
+#define PHB_CTRL_IVE_128_BYTES PPC_BIT(24)
+#define PHB_AIB_RX_CRED_INIT_TIMER 0x818
+#define PHB_AIB_RX_CMD_CRED 0x820
+#define PHB_AIB_RX_DATA_CRED 0x828
+#define PHB_AIB_TX_CMD_CRED 0x830
+#define PHB_AIB_TX_DATA_CRED 0x838
+#define PHB_AIB_TX_CHAN_MAPPING 0x840
+#define PHB_AIB_TAG_ENABLE 0x858
+#define PHB_AIB_FENCE_CTRL 0x860
+#define PHB_TCE_TAG_ENABLE 0x868
+#define PHB_TCE_WATERMARK 0x870
+#define PHB_TIMEOUT_CTRL1 0x878
+#define PHB_TIMEOUT_CTRL2 0x880
+#define PHB_Q_DMA_R 0x888
+#define PHB_Q_DMA_R_QUIESCE_DMA PPC_BIT(0)
+#define PHB_Q_DMA_R_AUTORESET PPC_BIT(1)
+#define PHB_Q_DMA_R_DMA_RESP_STATUS PPC_BIT(4)
+#define PHB_Q_DMA_R_MMIO_RESP_STATUS PPC_BIT(5)
+#define PHB_Q_DMA_R_TCE_RESP_STATUS PPC_BIT(6)
+#define PHB_AIB_TAG_STATUS 0x900
+#define PHB_TCE_TAG_STATUS 0x908
+
+/* FIR & Error registers */
+#define PHB_LEM_FIR_ACCUM 0xc00
+#define PHB_LEM_FIR_AND_MASK 0xc08
+#define PHB_LEM_FIR_OR_MASK 0xc10
+#define PHB_LEM_ERROR_MASK 0xc18
+#define PHB_LEM_ERROR_AND_MASK 0xc20
+#define PHB_LEM_ERROR_OR_MASK 0xc28
+#define PHB_LEM_ACTION0 0xc30
+#define PHB_LEM_ACTION1 0xc38
+#define PHB_LEM_WOF 0xc40
+#define PHB_ERR_STATUS 0xc80
+#define PHB_ERR1_STATUS 0xc88
+#define PHB_ERR_INJECT 0xc90
+#define PHB_ERR_LEM_ENABLE 0xc98
+#define PHB_ERR_IRQ_ENABLE 0xca0
+#define PHB_ERR_FREEZE_ENABLE 0xca8
+#define PHB_ERR_AIB_FENCE_ENABLE 0xcb0
+#define PHB_ERR_LOG_0 0xcc0
+#define PHB_ERR_LOG_1 0xcc8
+#define PHB_ERR_STATUS_MASK 0xcd0
+#define PHB_ERR1_STATUS_MASK 0xcd8
+
+#define PHB_OUT_ERR_STATUS 0xd00
+#define PHB_OUT_ERR1_STATUS 0xd08
+#define PHB_OUT_ERR_INJECT 0xd10
+#define PHB_OUT_ERR_LEM_ENABLE 0xd18
+#define PHB_OUT_ERR_IRQ_ENABLE 0xd20
+#define PHB_OUT_ERR_FREEZE_ENABLE 0xd28
+#define PHB_OUT_ERR_AIB_FENCE_ENABLE 0xd30
+#define PHB_OUT_ERR_LOG_0 0xd40
+#define PHB_OUT_ERR_LOG_1 0xd48
+#define PHB_OUT_ERR_STATUS_MASK 0xd50
+#define PHB_OUT_ERR1_STATUS_MASK 0xd58
+
+#define PHB_INA_ERR_STATUS 0xd80
+#define PHB_INA_ERR1_STATUS 0xd88
+#define PHB_INA_ERR_INJECT 0xd90
+#define PHB_INA_ERR_LEM_ENABLE 0xd98
+#define PHB_INA_ERR_IRQ_ENABLE 0xda0
+#define PHB_INA_ERR_FREEZE_ENABLE 0xda8
+#define PHB_INA_ERR_AIB_FENCE_ENABLE 0xdb0
+#define PHB_INA_ERR_LOG_0 0xdc0
+#define PHB_INA_ERR_LOG_1 0xdc8
+#define PHB_INA_ERR_STATUS_MASK 0xdd0
+#define PHB_INA_ERR1_STATUS_MASK 0xdd8
+
+#define PHB_INB_ERR_STATUS 0xe00
+#define PHB_INB_ERR1_STATUS 0xe08
+#define PHB_INB_ERR_INJECT 0xe10
+#define PHB_INB_ERR_LEM_ENABLE 0xe18
+#define PHB_INB_ERR_IRQ_ENABLE 0xe20
+#define PHB_INB_ERR_FREEZE_ENABLE 0xe28
+#define PHB_INB_ERR_AIB_FENCE_ENABLE 0xe30
+#define PHB_INB_ERR_LOG_0 0xe40
+#define PHB_INB_ERR_LOG_1 0xe48
+#define PHB_INB_ERR_STATUS_MASK 0xe50
+#define PHB_INB_ERR1_STATUS_MASK 0xe58
+
+/* Performance monitor & Debug registers */
+#define PHB_TRACE_CONTROL 0xf80
+#define PHB_PERFMON_CONFIG 0xf88
+#define PHB_PERFMON_CTR0 0xf90
+#define PHB_PERFMON_CTR1 0xf98
+#define PHB_PERFMON_CTR2 0xfa0
+#define PHB_PERFMON_CTR3 0xfa8
+#define PHB_HOTPLUG_OVERRIDE 0xfb0
+#define PHB_HPOVR_FORCE_RESAMPLE PPC_BIT(9)
+#define PHB_HPOVR_PRESENCE_A PPC_BIT(10)
+#define PHB_HPOVR_PRESENCE_B PPC_BIT(11)
+#define PHB_HPOVR_LINK_ACTIVE PPC_BIT(12)
+#define PHB_HPOVR_LINK_BIFURCATED PPC_BIT(13)
+#define PHB_HPOVR_LINK_LANE_SWAPPED PPC_BIT(14)
+
+/*
+ * IODA2 on-chip tables
+ */
+
+#define IODA2_TBL_LIST 1
+#define IODA2_TBL_LXIVT 2
+#define IODA2_TBL_IVC_CAM 3
+#define IODA2_TBL_RBA 4
+#define IODA2_TBL_RCAM 5
+#define IODA2_TBL_MRT 6
+#define IODA2_TBL_PESTA 7
+#define IODA2_TBL_PESTB 8
+#define IODA2_TBL_TVT 9
+#define IODA2_TBL_TCAM 10
+#define IODA2_TBL_TDR 11
+#define IODA2_TBL_M64BT 16
+#define IODA2_TBL_M32DT 17
+#define IODA2_TBL_PEEV 20
+
+/* LXIVT */
+#define IODA2_LXIVT_SERVER PPC_BITMASK(8, 23)
+#define IODA2_LXIVT_PRIORITY PPC_BITMASK(24, 31)
+#define IODA2_LXIVT_NODE_ID PPC_BITMASK(56, 63)
+
+/* IVT */
+#define IODA2_IVT_SERVER PPC_BITMASK(0, 23)
+#define IODA2_IVT_PRIORITY PPC_BITMASK(24, 31)
+#define IODA2_IVT_GEN PPC_BITMASK(37, 38)
+#define IODA2_IVT_P PPC_BITMASK(39, 39)
+#define IODA2_IVT_Q PPC_BITMASK(47, 47)
+#define IODA2_IVT_PE PPC_BITMASK(48, 63)
+
+/* TVT */
+#define IODA2_TVT_TABLE_ADDR PPC_BITMASK(0, 47)
+#define IODA2_TVT_NUM_LEVELS PPC_BITMASK(48, 50)
+#define IODA2_TVE_1_LEVEL 0
+#define IODA2_TVE_2_LEVELS 1
+#define IODA2_TVE_3_LEVELS 2
+#define IODA2_TVE_4_LEVELS 3
+#define IODA2_TVE_5_LEVELS 4
+#define IODA2_TVT_TCE_TABLE_SIZE PPC_BITMASK(51, 55)
+#define IODA2_TVT_IO_PSIZE PPC_BITMASK(59, 63)
+
+/* PESTA */
+#define IODA2_PESTA_MMIO_FROZEN PPC_BIT(0)
+
+/* PESTB */
+#define IODA2_PESTB_DMA_STOPPED PPC_BIT(0)
+
+/* M32DT */
+#define IODA2_M32DT_PE PPC_BITMASK(8, 15)
+
+/* M64BT */
+#define IODA2_M64BT_ENABLE PPC_BIT(0)
+#define IODA2_M64BT_SINGLE_PE PPC_BIT(1)
+#define IODA2_M64BT_BASE PPC_BITMASK(2, 31)
+#define IODA2_M64BT_MASK PPC_BITMASK(34, 63)
+#define IODA2_M64BT_SINGLE_BASE PPC_BITMASK(2, 26)
+#define IODA2_M64BT_PE_HI PPC_BITMASK(27, 31)
+#define IODA2_M64BT_SINGLE_MASK PPC_BITMASK(34, 58)
+#define IODA2_M64BT_PE_LOW PPC_BITMASK(59, 63)
+
+/*
+ * IODA2 in-memory tables
+ */
+
+/*
+ * PEST
+ *
+ * 2x8 bytes entries, PEST0 and PEST1
+ */
+
+#define IODA2_PEST0_MMIO_CAUSE PPC_BIT(2)
+#define IODA2_PEST0_CFG_READ PPC_BIT(3)
+#define IODA2_PEST0_CFG_WRITE PPC_BIT(4)
+#define IODA2_PEST0_TTYPE PPC_BITMASK(5, 7)
+#define PEST_TTYPE_DMA_WRITE 0
+#define PEST_TTYPE_MSI 1
+#define PEST_TTYPE_DMA_READ 2
+#define PEST_TTYPE_DMA_READ_RESP 3
+#define PEST_TTYPE_MMIO_LOAD 4
+#define PEST_TTYPE_MMIO_STORE 5
+#define PEST_TTYPE_OTHER 7
+#define IODA2_PEST0_CA_RETURN PPC_BIT(8)
+#define IODA2_PEST0_UTL_RTOS_TIMEOUT PPC_BIT(8) /* Same bit as CA return */
+#define IODA2_PEST0_UR_RETURN PPC_BIT(9)
+#define IODA2_PEST0_UTL_NONFATAL PPC_BIT(10)
+#define IODA2_PEST0_UTL_FATAL PPC_BIT(11)
+#define IODA2_PEST0_PARITY_UE PPC_BIT(13)
+#define IODA2_PEST0_UTL_CORRECTABLE PPC_BIT(14)
+#define IODA2_PEST0_UTL_INTERRUPT PPC_BIT(15)
+#define IODA2_PEST0_MMIO_XLATE PPC_BIT(16)
+#define IODA2_PEST0_IODA2_ERROR PPC_BIT(16) /* Same bit as MMIO xlate */
+#define IODA2_PEST0_TCE_PAGE_FAULT PPC_BIT(18)
+#define IODA2_PEST0_TCE_ACCESS_FAULT PPC_BIT(19)
+#define IODA2_PEST0_DMA_RESP_TIMEOUT PPC_BIT(20)
+#define IODA2_PEST0_AIB_SIZE_INVALID PPC_BIT(21)
+#define IODA2_PEST0_LEM_BIT PPC_BITMASK(26, 31)
+#define IODA2_PEST0_RID PPC_BITMASK(32, 47)
+#define IODA2_PEST0_MSI_DATA PPC_BITMASK(48, 63)
+
+#define IODA2_PEST1_FAIL_ADDR PPC_BITMASK(3, 63)
+
+
+#endif /* PCI_HOST_PNV_PHB3_REGS_H */
diff --git a/include/hw/pci-host/pnv_phb4.h b/include/hw/pci-host/pnv_phb4.h
new file mode 100644
index 0000000..c882bfd
--- /dev/null
+++ b/include/hw/pci-host/pnv_phb4.h
@@ -0,0 +1,230 @@
+/*
+ * QEMU PowerPC PowerNV (POWER9) PHB4 model
+ *
+ * Copyright (c) 2018-2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef PCI_HOST_PNV_PHB4_H
+#define PCI_HOST_PNV_PHB4_H
+
+#include "hw/pci/pcie_host.h"
+#include "hw/pci/pcie_port.h"
+#include "hw/ppc/xive.h"
+
+typedef struct PnvPhb4PecState PnvPhb4PecState;
+typedef struct PnvPhb4PecStack PnvPhb4PecStack;
+typedef struct PnvPHB4 PnvPHB4;
+typedef struct PnvChip PnvChip;
+
+/*
+ * We have one such address space wrapper per possible device under
+ * the PHB since they need to be assigned statically at qemu device
+ * creation time. The relationship to a PE is done later
+ * dynamically. This means we can potentially create a lot of these
+ * guys. Q35 stores them as some kind of radix tree but we never
+ * really need to do fast lookups so instead we simply keep a QLIST of
+ * them for now, we can add the radix if needed later on.
+ *
+ * We do cache the PE number to speed things up a bit though.
+ */
+typedef struct PnvPhb4DMASpace {
+ PCIBus *bus;
+ uint8_t devfn;
+ int pe_num; /* Cached PE number */
+#define PHB_INVALID_PE (-1)
+ PnvPHB4 *phb;
+ AddressSpace dma_as;
+ IOMMUMemoryRegion dma_mr;
+ MemoryRegion msi32_mr;
+ MemoryRegion msi64_mr;
+ QLIST_ENTRY(PnvPhb4DMASpace) list;
+} PnvPhb4DMASpace;
+
+/*
+ * PHB4 PCIe Root port
+ */
+#define TYPE_PNV_PHB4_ROOT_BUS "pnv-phb4-root-bus"
+#define TYPE_PNV_PHB4_ROOT_PORT "pnv-phb4-root-port"
+
+typedef struct PnvPHB4RootPort {
+ PCIESlot parent_obj;
+} PnvPHB4RootPort;
+
+/*
+ * PHB4 PCIe Host Bridge for PowerNV machines (POWER9)
+ */
+#define TYPE_PNV_PHB4 "pnv-phb4"
+#define PNV_PHB4(obj) OBJECT_CHECK(PnvPHB4, (obj), TYPE_PNV_PHB4)
+
+#define PNV_PHB4_MAX_LSIs 8
+#define PNV_PHB4_MAX_INTs 4096
+#define PNV_PHB4_MAX_MIST (PNV_PHB4_MAX_INTs >> 2)
+#define PNV_PHB4_MAX_MMIO_WINDOWS 32
+#define PNV_PHB4_MIN_MMIO_WINDOWS 16
+#define PNV_PHB4_NUM_REGS (0x3000 >> 3)
+#define PNV_PHB4_MAX_PEs 512
+#define PNV_PHB4_MAX_TVEs (PNV_PHB4_MAX_PEs * 2)
+#define PNV_PHB4_MAX_PEEVs (PNV_PHB4_MAX_PEs / 64)
+#define PNV_PHB4_MAX_MBEs (PNV_PHB4_MAX_MMIO_WINDOWS * 2)
+
+#define PNV_PHB4_VERSION 0x000000a400000002ull
+#define PNV_PHB4_DEVICE_ID 0x04c1
+
+#define PCI_MMIO_TOTAL_SIZE (0x1ull << 60)
+
+struct PnvPHB4 {
+ PCIExpressHost parent_obj;
+
+ PnvPHB4RootPort root;
+
+ uint32_t chip_id;
+ uint32_t phb_id;
+
+ uint64_t version;
+ uint16_t device_id;
+
+ char bus_path[8];
+
+ /* Main register images */
+ uint64_t regs[PNV_PHB4_NUM_REGS];
+ MemoryRegion mr_regs;
+
+ /* Extra SCOM-only register */
+ uint64_t scom_hv_ind_addr_reg;
+
+ /*
+ * Geometry of the PHB. There are two types, small and big PHBs, a
+ * number of resources (number of PEs, windows etc...) are doubled
+ * for a big PHB
+ */
+ bool big_phb;
+
+ /* Memory regions for MMIO space */
+ MemoryRegion mr_mmio[PNV_PHB4_MAX_MMIO_WINDOWS];
+
+ /* PCI side space */
+ MemoryRegion pci_mmio;
+ MemoryRegion pci_io;
+
+ /* On-chip IODA tables */
+ uint64_t ioda_LIST[PNV_PHB4_MAX_LSIs];
+ uint64_t ioda_MIST[PNV_PHB4_MAX_MIST];
+ uint64_t ioda_TVT[PNV_PHB4_MAX_TVEs];
+ uint64_t ioda_MBT[PNV_PHB4_MAX_MBEs];
+ uint64_t ioda_MDT[PNV_PHB4_MAX_PEs];
+ uint64_t ioda_PEEV[PNV_PHB4_MAX_PEEVs];
+
+ /*
+ * The internal PESTA/B is 2 bits per PE split into two tables, we
+ * store them in a single array here to avoid wasting space.
+ */
+ uint8_t ioda_PEST_AB[PNV_PHB4_MAX_PEs];
+
+ /* P9 Interrupt generation */
+ XiveSource xsrc;
+ qemu_irq *qirqs;
+
+ PnvPhb4PecStack *stack;
+
+ QLIST_HEAD(, PnvPhb4DMASpace) dma_spaces;
+};
+
+void pnv_phb4_pic_print_info(PnvPHB4 *phb, Monitor *mon);
+void pnv_phb4_update_regions(PnvPhb4PecStack *stack);
+extern const MemoryRegionOps pnv_phb4_xscom_ops;
+
+/*
+ * PHB4 PEC (PCI Express Controller)
+ */
+#define TYPE_PNV_PHB4_PEC "pnv-phb4-pec"
+#define PNV_PHB4_PEC(obj) \
+ OBJECT_CHECK(PnvPhb4PecState, (obj), TYPE_PNV_PHB4_PEC)
+
+#define TYPE_PNV_PHB4_PEC_STACK "pnv-phb4-pec-stack"
+#define PNV_PHB4_PEC_STACK(obj) \
+ OBJECT_CHECK(PnvPhb4PecStack, (obj), TYPE_PNV_PHB4_PEC_STACK)
+
+/* Per-stack data */
+struct PnvPhb4PecStack {
+ DeviceState parent;
+
+ /* My own stack number */
+ uint32_t stack_no;
+
+ /* Nest registers */
+#define PHB4_PEC_NEST_STK_REGS_COUNT 0x17
+ uint64_t nest_regs[PHB4_PEC_NEST_STK_REGS_COUNT];
+ MemoryRegion nest_regs_mr;
+
+ /* PCI registers (excluding pass-through) */
+#define PHB4_PEC_PCI_STK_REGS_COUNT 0xf
+ uint64_t pci_regs[PHB4_PEC_PCI_STK_REGS_COUNT];
+ MemoryRegion pci_regs_mr;
+
+ /* PHB pass-through XSCOM */
+ MemoryRegion phb_regs_mr;
+
+ /* Memory windows from PowerBus to PHB */
+ MemoryRegion mmbar0;
+ MemoryRegion mmbar1;
+ MemoryRegion phbbar;
+ MemoryRegion intbar;
+ uint64_t mmio0_base;
+ uint64_t mmio0_size;
+ uint64_t mmio1_base;
+ uint64_t mmio1_size;
+
+ /* The owner PEC */
+ PnvPhb4PecState *pec;
+
+ /* The actual PHB */
+ PnvPHB4 phb;
+};
+
+struct PnvPhb4PecState {
+ DeviceState parent;
+
+ /* PEC number in chip */
+ uint32_t index;
+ uint32_t chip_id;
+
+ MemoryRegion *system_memory;
+
+ /* Nest registers, excuding per-stack */
+#define PHB4_PEC_NEST_REGS_COUNT 0xf
+ uint64_t nest_regs[PHB4_PEC_NEST_REGS_COUNT];
+ MemoryRegion nest_regs_mr;
+
+ /* PCI registers, excluding per-stack */
+#define PHB4_PEC_PCI_REGS_COUNT 0x2
+ uint64_t pci_regs[PHB4_PEC_PCI_REGS_COUNT];
+ MemoryRegion pci_regs_mr;
+
+ /* Stacks */
+ #define PHB4_PEC_MAX_STACKS 3
+ uint32_t num_stacks;
+ PnvPhb4PecStack stacks[PHB4_PEC_MAX_STACKS];
+};
+
+#define PNV_PHB4_PEC_CLASS(klass) \
+ OBJECT_CLASS_CHECK(PnvPhb4PecClass, (klass), TYPE_PNV_PHB4_PEC)
+#define PNV_PHB4_PEC_GET_CLASS(obj) \
+ OBJECT_GET_CLASS(PnvPhb4PecClass, (obj), TYPE_PNV_PHB4_PEC)
+
+typedef struct PnvPhb4PecClass {
+ DeviceClass parent_class;
+
+ uint32_t (*xscom_nest_base)(PnvPhb4PecState *pec);
+ uint32_t xscom_nest_size;
+ uint32_t (*xscom_pci_base)(PnvPhb4PecState *pec);
+ uint32_t xscom_pci_size;
+ const char *compat;
+ int compat_size;
+ const char *stk_compat;
+ int stk_compat_size;
+} PnvPhb4PecClass;
+
+#endif /* PCI_HOST_PNV_PHB4_H */
diff --git a/include/hw/pci-host/pnv_phb4_regs.h b/include/hw/pci-host/pnv_phb4_regs.h
new file mode 100644
index 0000000..55df2c3
--- /dev/null
+++ b/include/hw/pci-host/pnv_phb4_regs.h
@@ -0,0 +1,553 @@
+/*
+ * QEMU PowerPC PowerNV (POWER9) PHB4 model
+ *
+ * Copyright (c) 2013-2020, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef PCI_HOST_PNV_PHB4_REGS_H
+#define PCI_HOST_PNV_PHB4_REGS_H
+
+/*
+ * PEC XSCOM registers
+ *
+ * There a 3 PECs in P9. Each PEC can have several PHBs. Each PEC has some
+ * "global" registers and some "per-stack" (per-PHB) registers. Those are
+ * organized in two XSCOM ranges, the "Nest" range and the "PCI" range, each
+ * range contains both some "PEC" registers and some "per-stack" registers.
+ *
+ * Finally the PCI range also contains an additional range per stack that
+ * passes through to some of the PHB own registers.
+ *
+ * PEC0 can contain 1 PHB (PHB0)
+ * PEC1 can contain 2 PHBs (PHB1 and PHB2)
+ * PEC2 can contain 3 PHBs (PHB3, PHB4 and PHB5)
+ */
+
+/*
+ * This is the "stack" offset, it's the offset from a given range base
+ * to the first "per-stack" registers and also the stride between
+ * stacks, thus for PEC2, the global registers are at offset 0, the
+ * PHB3 registers at offset 0x40, the PHB4 at offset 0x80 etc....
+ *
+ * It is *also* the offset to the pass-through SCOM region but in this case
+ * it is 0 based, ie PHB3 is at 0x100 PHB4 is a 0x140 etc..
+ */
+#define PEC_STACK_OFFSET 0x40
+
+/* XSCOM Nest global registers */
+#define PEC_NEST_PBCQ_HW_CONFIG 0x00
+#define PEC_NEST_DROP_PRIO_CTRL 0x01
+#define PEC_NEST_PBCQ_ERR_INJECT 0x02
+#define PEC_NEST_PCI_NEST_CLK_TRACE_CTL 0x03
+#define PEC_NEST_PBCQ_PMON_CTRL 0x04
+#define PEC_NEST_PBCQ_PBUS_ADDR_EXT 0x05
+#define PEC_NEST_PBCQ_PRED_VEC_TIMEOUT 0x06
+#define PEC_NEST_CAPP_CTRL 0x07
+#define PEC_NEST_PBCQ_READ_STK_OVR 0x08
+#define PEC_NEST_PBCQ_WRITE_STK_OVR 0x09
+#define PEC_NEST_PBCQ_STORE_STK_OVR 0x0a
+#define PEC_NEST_PBCQ_RETRY_BKOFF_CTRL 0x0b
+
+/* XSCOM Nest per-stack registers */
+#define PEC_NEST_STK_PCI_NEST_FIR 0x00
+#define PEC_NEST_STK_PCI_NEST_FIR_CLR 0x01
+#define PEC_NEST_STK_PCI_NEST_FIR_SET 0x02
+#define PEC_NEST_STK_PCI_NEST_FIR_MSK 0x03
+#define PEC_NEST_STK_PCI_NEST_FIR_MSKC 0x04
+#define PEC_NEST_STK_PCI_NEST_FIR_MSKS 0x05
+#define PEC_NEST_STK_PCI_NEST_FIR_ACT0 0x06
+#define PEC_NEST_STK_PCI_NEST_FIR_ACT1 0x07
+#define PEC_NEST_STK_PCI_NEST_FIR_WOF 0x08
+#define PEC_NEST_STK_ERR_REPORT_0 0x0a
+#define PEC_NEST_STK_ERR_REPORT_1 0x0b
+#define PEC_NEST_STK_PBCQ_GNRL_STATUS 0x0c
+#define PEC_NEST_STK_PBCQ_MODE 0x0d
+#define PEC_NEST_STK_MMIO_BAR0 0x0e
+#define PEC_NEST_STK_MMIO_BAR0_MASK 0x0f
+#define PEC_NEST_STK_MMIO_BAR1 0x10
+#define PEC_NEST_STK_MMIO_BAR1_MASK 0x11
+#define PEC_NEST_STK_PHB_REGS_BAR 0x12
+#define PEC_NEST_STK_INT_BAR 0x13
+#define PEC_NEST_STK_BAR_EN 0x14
+#define PEC_NEST_STK_BAR_EN_MMIO0 PPC_BIT(0)
+#define PEC_NEST_STK_BAR_EN_MMIO1 PPC_BIT(1)
+#define PEC_NEST_STK_BAR_EN_PHB PPC_BIT(2)
+#define PEC_NEST_STK_BAR_EN_INT PPC_BIT(3)
+#define PEC_NEST_STK_DATA_FRZ_TYPE 0x15
+#define PEC_NEST_STK_PBCQ_TUN_BAR 0x16
+
+/* XSCOM PCI global registers */
+#define PEC_PCI_PBAIB_HW_CONFIG 0x00
+#define PEC_PCI_PBAIB_READ_STK_OVR 0x02
+
+/* XSCOM PCI per-stack registers */
+#define PEC_PCI_STK_PCI_FIR 0x00
+#define PEC_PCI_STK_PCI_FIR_CLR 0x01
+#define PEC_PCI_STK_PCI_FIR_SET 0x02
+#define PEC_PCI_STK_PCI_FIR_MSK 0x03
+#define PEC_PCI_STK_PCI_FIR_MSKC 0x04
+#define PEC_PCI_STK_PCI_FIR_MSKS 0x05
+#define PEC_PCI_STK_PCI_FIR_ACT0 0x06
+#define PEC_PCI_STK_PCI_FIR_ACT1 0x07
+#define PEC_PCI_STK_PCI_FIR_WOF 0x08
+#define PEC_PCI_STK_ETU_RESET 0x0a
+#define PEC_PCI_STK_PBAIB_ERR_REPORT 0x0b
+#define PEC_PCI_STK_PBAIB_TX_CMD_CRED 0x0d
+#define PEC_PCI_STK_PBAIB_TX_DAT_CRED 0x0e
+
+/*
+ * PHB "SCOM" registers. This is accessed via the above window
+ * and provides a backdoor to the PHB when the AIB bus is not
+ * functional. Some of these directly map some of the PHB MMIO
+ * registers, some are specific and allow indirect access to a
+ * wider range of PHB registers
+ */
+#define PHB_SCOM_HV_IND_ADDR 0x00
+#define PHB_SCOM_HV_IND_ADDR_VALID PPC_BIT(0)
+#define PHB_SCOM_HV_IND_ADDR_4B PPC_BIT(1)
+#define PHB_SCOM_HV_IND_ADDR_AUTOINC PPC_BIT(2)
+#define PHB_SCOM_HV_IND_ADDR_ADDR PPC_BITMASK(51, 63)
+#define PHB_SCOM_HV_IND_DATA 0x01
+#define PHB_SCOM_ETU_LEM_FIR 0x08
+#define PHB_SCOM_ETU_LEM_FIR_AND 0x09
+#define PHB_SCOM_ETU_LEM_FIR_OR 0x0a
+#define PHB_SCOM_ETU_LEM_FIR_MSK 0x0b
+#define PHB_SCOM_ETU_LEM_ERR_MSK_AND 0x0c
+#define PHB_SCOM_ETU_LEM_ERR_MSK_OR 0x0d
+#define PHB_SCOM_ETU_LEM_ACT0 0x0e
+#define PHB_SCOM_ETU_LEM_ACT1 0x0f
+#define PHB_SCOM_ETU_LEM_WOF 0x10
+#define PHB_SCOM_ETU_PMON_CONFIG 0x17
+#define PHB_SCOM_ETU_PMON_CTR0 0x18
+#define PHB_SCOM_ETU_PMON_CTR1 0x19
+#define PHB_SCOM_ETU_PMON_CTR2 0x1a
+#define PHB_SCOM_ETU_PMON_CTR3 0x1b
+
+
+/*
+ * PHB MMIO registers
+ */
+
+/* PHB Fundamental register set A */
+#define PHB_LSI_SOURCE_ID 0x100
+#define PHB_LSI_SRC_ID PPC_BITMASK(4, 12)
+#define PHB_DMA_CHAN_STATUS 0x110
+#define PHB_DMA_CHAN_ANY_ERR PPC_BIT(27)
+#define PHB_DMA_CHAN_ANY_ERR1 PPC_BIT(28)
+#define PHB_DMA_CHAN_ANY_FREEZE PPC_BIT(29)
+#define PHB_CPU_LOADSTORE_STATUS 0x120
+#define PHB_CPU_LS_ANY_ERR PPC_BIT(27)
+#define PHB_CPU_LS_ANY_ERR1 PPC_BIT(28)
+#define PHB_CPU_LS_ANY_FREEZE PPC_BIT(29)
+#define PHB_CONFIG_DATA 0x130
+#define PHB_LOCK0 0x138
+#define PHB_CONFIG_ADDRESS 0x140
+#define PHB_CA_ENABLE PPC_BIT(0)
+#define PHB_CA_STATUS PPC_BITMASK(1, 3)
+#define PHB_CA_STATUS_GOOD 0
+#define PHB_CA_STATUS_UR 1
+#define PHB_CA_STATUS_CRS 2
+#define PHB_CA_STATUS_CA 4
+#define PHB_CA_BUS PPC_BITMASK(4, 11)
+#define PHB_CA_DEV PPC_BITMASK(12, 16)
+#define PHB_CA_FUNC PPC_BITMASK(17, 19)
+#define PHB_CA_BDFN PPC_BITMASK(4, 19) /* bus,dev,func */
+#define PHB_CA_REG PPC_BITMASK(20, 31)
+#define PHB_CA_PE PPC_BITMASK(39, 47)
+#define PHB_LOCK1 0x148
+#define PHB_PHB4_CONFIG 0x160
+#define PHB_PHB4C_32BIT_MSI_EN PPC_BIT(8)
+#define PHB_PHB4C_64BIT_MSI_EN PPC_BIT(14)
+#define PHB_RTT_BAR 0x168
+#define PHB_RTT_BAR_ENABLE PPC_BIT(0)
+#define PHB_RTT_BASE_ADDRESS_MASK PPC_BITMASK(8, 46)
+#define PHB_PELTV_BAR 0x188
+#define PHB_PELTV_BAR_ENABLE PPC_BIT(0)
+#define PHB_PELTV_BASE_ADDRESS PPC_BITMASK(8, 50)
+#define PHB_M32_START_ADDR 0x1a0
+#define PHB_PEST_BAR 0x1a8
+#define PHB_PEST_BAR_ENABLE PPC_BIT(0)
+#define PHB_PEST_BASE_ADDRESS PPC_BITMASK(8, 51)
+#define PHB_ASN_CMPM 0x1C0
+#define PHB_ASN_CMPM_ENABLE PPC_BIT(63)
+#define PHB_CAPI_CMPM 0x1C8
+#define PHB_CAPI_CMPM_ENABLE PPC_BIT(63)
+#define PHB_M64_AOMASK 0x1d0
+#define PHB_M64_UPPER_BITS 0x1f0
+#define PHB_NXLATE_PREFIX 0x1f8
+#define PHB_DMARD_SYNC 0x200
+#define PHB_DMARD_SYNC_START PPC_BIT(0)
+#define PHB_DMARD_SYNC_COMPLETE PPC_BIT(1)
+#define PHB_RTC_INVALIDATE 0x208
+#define PHB_RTC_INVALIDATE_ALL PPC_BIT(0)
+#define PHB_RTC_INVALIDATE_RID PPC_BITMASK(16, 31)
+#define PHB_TCE_KILL 0x210
+#define PHB_TCE_KILL_ALL PPC_BIT(0)
+#define PHB_TCE_KILL_PE PPC_BIT(1)
+#define PHB_TCE_KILL_ONE PPC_BIT(2)
+#define PHB_TCE_KILL_PSEL PPC_BIT(3)
+#define PHB_TCE_KILL_64K 0x1000 /* Address override */
+#define PHB_TCE_KILL_2M 0x2000 /* Address override */
+#define PHB_TCE_KILL_1G 0x3000 /* Address override */
+#define PHB_TCE_KILL_PENUM PPC_BITMASK(55, 63)
+#define PHB_TCE_SPEC_CTL 0x218
+#define PHB_IODA_ADDR 0x220
+#define PHB_IODA_AD_AUTOINC PPC_BIT(0)
+#define PHB_IODA_AD_TSEL PPC_BITMASK(11, 15)
+#define PHB_IODA_AD_MIST_PWV PPC_BITMASK(28, 31)
+#define PHB_IODA_AD_TADR PPC_BITMASK(54, 63)
+#define PHB_IODA_DATA0 0x228
+#define PHB_PHB4_GEN_CAP 0x250
+#define PHB_PHB4_TCE_CAP 0x258
+#define PHB_PHB4_IRQ_CAP 0x260
+#define PHB_PHB4_EEH_CAP 0x268
+#define PHB_PAPR_ERR_INJ_CTL 0x2b0
+#define PHB_PAPR_ERR_INJ_CTL_INB PPC_BIT(0)
+#define PHB_PAPR_ERR_INJ_CTL_OUTB PPC_BIT(1)
+#define PHB_PAPR_ERR_INJ_CTL_STICKY PPC_BIT(2)
+#define PHB_PAPR_ERR_INJ_CTL_CFG PPC_BIT(3)
+#define PHB_PAPR_ERR_INJ_CTL_RD PPC_BIT(4)
+#define PHB_PAPR_ERR_INJ_CTL_WR PPC_BIT(5)
+#define PHB_PAPR_ERR_INJ_CTL_FREEZE PPC_BIT(6)
+#define PHB_PAPR_ERR_INJ_ADDR 0x2b8
+#define PHB_PAPR_ERR_INJ_ADDR_MMIO PPC_BITMASK(16, 63)
+#define PHB_PAPR_ERR_INJ_MASK 0x2c0
+#define PHB_PAPR_ERR_INJ_MASK_CFG PPC_BITMASK(4, 11)
+#define PHB_PAPR_ERR_INJ_MASK_CFG_ALL PPC_BITMASK(4, 19)
+#define PHB_PAPR_ERR_INJ_MASK_MMIO PPC_BITMASK(16, 63)
+#define PHB_ETU_ERR_SUMMARY 0x2c8
+#define PHB_INT_NOTIFY_ADDR 0x300
+#define PHB_INT_NOTIFY_INDEX 0x308
+
+/* Fundamental register set B */
+#define PHB_VERSION 0x800
+#define PHB_CTRLR 0x810
+#define PHB_CTRLR_IRQ_PGSZ_64K PPC_BIT(11)
+#define PHB_CTRLR_IRQ_STORE_EOI PPC_BIT(12)
+#define PHB_CTRLR_MMIO_RD_STRICT PPC_BIT(13)
+#define PHB_CTRLR_MMIO_EEH_DISABLE PPC_BIT(14)
+#define PHB_CTRLR_CFG_EEH_BLOCK PPC_BIT(15)
+#define PHB_CTRLR_FENCE_LNKILL_DIS PPC_BIT(16)
+#define PHB_CTRLR_TVT_ADDR_SEL PPC_BITMASK(17, 19)
+#define TVT_DD1_1_PER_PE 0
+#define TVT_DD1_2_PER_PE 1
+#define TVT_DD1_4_PER_PE 2
+#define TVT_DD1_8_PER_PE 3
+#define TVT_DD1_16_PER_PE 4
+#define TVT_2_PER_PE 0
+#define TVT_4_PER_PE 1
+#define TVT_8_PER_PE 2
+#define TVT_16_PER_PE 3
+#define PHB_CTRLR_DMA_RD_SPACING PPC_BITMASK(28, 31)
+#define PHB_AIB_FENCE_CTRL 0x860
+#define PHB_TCE_TAG_ENABLE 0x868
+#define PHB_TCE_WATERMARK 0x870
+#define PHB_TIMEOUT_CTRL1 0x878
+#define PHB_TIMEOUT_CTRL2 0x880
+#define PHB_Q_DMA_R 0x888
+#define PHB_Q_DMA_R_QUIESCE_DMA PPC_BIT(0)
+#define PHB_Q_DMA_R_AUTORESET PPC_BIT(1)
+#define PHB_Q_DMA_R_DMA_RESP_STATUS PPC_BIT(4)
+#define PHB_Q_DMA_R_MMIO_RESP_STATUS PPC_BIT(5)
+#define PHB_Q_DMA_R_TCE_RESP_STATUS PPC_BIT(6)
+#define PHB_Q_DMA_R_TCE_KILL_STATUS PPC_BIT(7)
+#define PHB_TCE_TAG_STATUS 0x908
+
+/* FIR & Error registers */
+#define PHB_LEM_FIR_ACCUM 0xc00
+#define PHB_LEM_FIR_AND_MASK 0xc08
+#define PHB_LEM_FIR_OR_MASK 0xc10
+#define PHB_LEM_ERROR_MASK 0xc18
+#define PHB_LEM_ERROR_AND_MASK 0xc20
+#define PHB_LEM_ERROR_OR_MASK 0xc28
+#define PHB_LEM_ACTION0 0xc30
+#define PHB_LEM_ACTION1 0xc38
+#define PHB_LEM_WOF 0xc40
+#define PHB_ERR_STATUS 0xc80
+#define PHB_ERR1_STATUS 0xc88
+#define PHB_ERR_INJECT 0xc90
+#define PHB_ERR_LEM_ENABLE 0xc98
+#define PHB_ERR_IRQ_ENABLE 0xca0
+#define PHB_ERR_FREEZE_ENABLE 0xca8
+#define PHB_ERR_AIB_FENCE_ENABLE 0xcb0
+#define PHB_ERR_LOG_0 0xcc0
+#define PHB_ERR_LOG_1 0xcc8
+#define PHB_ERR_STATUS_MASK 0xcd0
+#define PHB_ERR1_STATUS_MASK 0xcd8
+
+#define PHB_TXE_ERR_STATUS 0xd00
+#define PHB_TXE_ERR1_STATUS 0xd08
+#define PHB_TXE_ERR_INJECT 0xd10
+#define PHB_TXE_ERR_LEM_ENABLE 0xd18
+#define PHB_TXE_ERR_IRQ_ENABLE 0xd20
+#define PHB_TXE_ERR_FREEZE_ENABLE 0xd28
+#define PHB_TXE_ERR_AIB_FENCE_ENABLE 0xd30
+#define PHB_TXE_ERR_LOG_0 0xd40
+#define PHB_TXE_ERR_LOG_1 0xd48
+#define PHB_TXE_ERR_STATUS_MASK 0xd50
+#define PHB_TXE_ERR1_STATUS_MASK 0xd58
+
+#define PHB_RXE_ARB_ERR_STATUS 0xd80
+#define PHB_RXE_ARB_ERR1_STATUS 0xd88
+#define PHB_RXE_ARB_ERR_INJECT 0xd90
+#define PHB_RXE_ARB_ERR_LEM_ENABLE 0xd98
+#define PHB_RXE_ARB_ERR_IRQ_ENABLE 0xda0
+#define PHB_RXE_ARB_ERR_FREEZE_ENABLE 0xda8
+#define PHB_RXE_ARB_ERR_AIB_FENCE_ENABLE 0xdb0
+#define PHB_RXE_ARB_ERR_LOG_0 0xdc0
+#define PHB_RXE_ARB_ERR_LOG_1 0xdc8
+#define PHB_RXE_ARB_ERR_STATUS_MASK 0xdd0
+#define PHB_RXE_ARB_ERR1_STATUS_MASK 0xdd8
+
+#define PHB_RXE_MRG_ERR_STATUS 0xe00
+#define PHB_RXE_MRG_ERR1_STATUS 0xe08
+#define PHB_RXE_MRG_ERR_INJECT 0xe10
+#define PHB_RXE_MRG_ERR_LEM_ENABLE 0xe18
+#define PHB_RXE_MRG_ERR_IRQ_ENABLE 0xe20
+#define PHB_RXE_MRG_ERR_FREEZE_ENABLE 0xe28
+#define PHB_RXE_MRG_ERR_AIB_FENCE_ENABLE 0xe30
+#define PHB_RXE_MRG_ERR_LOG_0 0xe40
+#define PHB_RXE_MRG_ERR_LOG_1 0xe48
+#define PHB_RXE_MRG_ERR_STATUS_MASK 0xe50
+#define PHB_RXE_MRG_ERR1_STATUS_MASK 0xe58
+
+#define PHB_RXE_TCE_ERR_STATUS 0xe80
+#define PHB_RXE_TCE_ERR1_STATUS 0xe88
+#define PHB_RXE_TCE_ERR_INJECT 0xe90
+#define PHB_RXE_TCE_ERR_LEM_ENABLE 0xe98
+#define PHB_RXE_TCE_ERR_IRQ_ENABLE 0xea0
+#define PHB_RXE_TCE_ERR_FREEZE_ENABLE 0xea8
+#define PHB_RXE_TCE_ERR_AIB_FENCE_ENABLE 0xeb0
+#define PHB_RXE_TCE_ERR_LOG_0 0xec0
+#define PHB_RXE_TCE_ERR_LOG_1 0xec8
+#define PHB_RXE_TCE_ERR_STATUS_MASK 0xed0
+#define PHB_RXE_TCE_ERR1_STATUS_MASK 0xed8
+
+/* Performance monitor & Debug registers */
+#define PHB_TRACE_CONTROL 0xf80
+#define PHB_PERFMON_CONFIG 0xf88
+#define PHB_PERFMON_CTR0 0xf90
+#define PHB_PERFMON_CTR1 0xf98
+#define PHB_PERFMON_CTR2 0xfa0
+#define PHB_PERFMON_CTR3 0xfa8
+
+/* Root complex config space memory mapped */
+#define PHB_RC_CONFIG_BASE 0x1000
+#define PHB_RC_CONFIG_SIZE 0x800
+
+/* PHB4 REGB registers */
+
+/* PBL core */
+#define PHB_PBL_CONTROL 0x1800
+#define PHB_PBL_TIMEOUT_CTRL 0x1810
+#define PHB_PBL_NPTAG_ENABLE 0x1820
+#define PHB_PBL_NBW_CMP_MASK 0x1830
+#define PHB_PBL_NBW_MASK_ENABLE PPC_BIT(63)
+#define PHB_PBL_SYS_LINK_INIT 0x1838
+#define PHB_PBL_BUF_STATUS 0x1840
+#define PHB_PBL_ERR_STATUS 0x1900
+#define PHB_PBL_ERR1_STATUS 0x1908
+#define PHB_PBL_ERR_INJECT 0x1910
+#define PHB_PBL_ERR_INF_ENABLE 0x1920
+#define PHB_PBL_ERR_ERC_ENABLE 0x1928
+#define PHB_PBL_ERR_FAT_ENABLE 0x1930
+#define PHB_PBL_ERR_LOG_0 0x1940
+#define PHB_PBL_ERR_LOG_1 0x1948
+#define PHB_PBL_ERR_STATUS_MASK 0x1950
+#define PHB_PBL_ERR1_STATUS_MASK 0x1958
+
+/* PCI-E stack */
+#define PHB_PCIE_SCR 0x1A00
+#define PHB_PCIE_SCR_SLOT_CAP PPC_BIT(15)
+#define PHB_PCIE_SCR_MAXLINKSPEED PPC_BITMASK(32, 35)
+
+
+#define PHB_PCIE_CRESET 0x1A10
+#define PHB_PCIE_CRESET_CFG_CORE PPC_BIT(0)
+#define PHB_PCIE_CRESET_TLDLP PPC_BIT(1)
+#define PHB_PCIE_CRESET_PBL PPC_BIT(2)
+#define PHB_PCIE_CRESET_PERST_N PPC_BIT(3)
+#define PHB_PCIE_CRESET_PIPE_N PPC_BIT(4)
+
+
+#define PHB_PCIE_HOTPLUG_STATUS 0x1A20
+#define PHB_PCIE_HPSTAT_PRESENCE PPC_BIT(10)
+
+#define PHB_PCIE_DLP_TRAIN_CTL 0x1A40
+#define PHB_PCIE_DLP_LINK_WIDTH PPC_BITMASK(30, 35)
+#define PHB_PCIE_DLP_LINK_SPEED PPC_BITMASK(36, 39)
+#define PHB_PCIE_DLP_LTSSM_TRC PPC_BITMASK(24, 27)
+#define PHB_PCIE_DLP_LTSSM_RESET 0
+#define PHB_PCIE_DLP_LTSSM_DETECT 1
+#define PHB_PCIE_DLP_LTSSM_POLLING 2
+#define PHB_PCIE_DLP_LTSSM_CONFIG 3
+#define PHB_PCIE_DLP_LTSSM_L0 4
+#define PHB_PCIE_DLP_LTSSM_REC 5
+#define PHB_PCIE_DLP_LTSSM_L1 6
+#define PHB_PCIE_DLP_LTSSM_L2 7
+#define PHB_PCIE_DLP_LTSSM_HOTRESET 8
+#define PHB_PCIE_DLP_LTSSM_DISABLED 9
+#define PHB_PCIE_DLP_LTSSM_LOOPBACK 10
+#define PHB_PCIE_DLP_TL_LINKACT PPC_BIT(23)
+#define PHB_PCIE_DLP_DL_PGRESET PPC_BIT(22)
+#define PHB_PCIE_DLP_TRAINING PPC_BIT(20)
+#define PHB_PCIE_DLP_INBAND_PRESENCE PPC_BIT(19)
+
+#define PHB_PCIE_DLP_CTL 0x1A78
+#define PHB_PCIE_DLP_CTL_BYPASS_PH2 PPC_BIT(4)
+#define PHB_PCIE_DLP_CTL_BYPASS_PH3 PPC_BIT(5)
+
+#define PHB_PCIE_DLP_TRWCTL 0x1A80
+#define PHB_PCIE_DLP_TRWCTL_EN PPC_BIT(0)
+
+#define PHB_PCIE_DLP_ERRLOG1 0x1AA0
+#define PHB_PCIE_DLP_ERRLOG2 0x1AA8
+#define PHB_PCIE_DLP_ERR_STATUS 0x1AB0
+#define PHB_PCIE_DLP_ERR_COUNTERS 0x1AB8
+
+#define PHB_PCIE_LANE_EQ_CNTL0 0x1AD0
+#define PHB_PCIE_LANE_EQ_CNTL1 0x1AD8
+#define PHB_PCIE_LANE_EQ_CNTL2 0x1AE0
+#define PHB_PCIE_LANE_EQ_CNTL3 0x1AE8
+#define PHB_PCIE_LANE_EQ_CNTL20 0x1AF0
+#define PHB_PCIE_LANE_EQ_CNTL21 0x1AF8
+#define PHB_PCIE_LANE_EQ_CNTL22 0x1B00 /* DD1 only */
+#define PHB_PCIE_LANE_EQ_CNTL23 0x1B08 /* DD1 only */
+#define PHB_PCIE_TRACE_CTRL 0x1B20
+#define PHB_PCIE_MISC_STRAP 0x1B30
+
+/* Error */
+#define PHB_REGB_ERR_STATUS 0x1C00
+#define PHB_REGB_ERR1_STATUS 0x1C08
+#define PHB_REGB_ERR_INJECT 0x1C10
+#define PHB_REGB_ERR_INF_ENABLE 0x1C20
+#define PHB_REGB_ERR_ERC_ENABLE 0x1C28
+#define PHB_REGB_ERR_FAT_ENABLE 0x1C30
+#define PHB_REGB_ERR_LOG_0 0x1C40
+#define PHB_REGB_ERR_LOG_1 0x1C48
+#define PHB_REGB_ERR_STATUS_MASK 0x1C50
+#define PHB_REGB_ERR1_STATUS_MASK 0x1C58
+
+/*
+ * IODA3 on-chip tables
+ */
+
+#define IODA3_TBL_LIST 1
+#define IODA3_TBL_MIST 2
+#define IODA3_TBL_RCAM 5
+#define IODA3_TBL_MRT 6
+#define IODA3_TBL_PESTA 7
+#define IODA3_TBL_PESTB 8
+#define IODA3_TBL_TVT 9
+#define IODA3_TBL_TCR 10
+#define IODA3_TBL_TDR 11
+#define IODA3_TBL_MBT 16
+#define IODA3_TBL_MDT 17
+#define IODA3_TBL_PEEV 20
+
+/* LIST */
+#define IODA3_LIST_P PPC_BIT(6)
+#define IODA3_LIST_Q PPC_BIT(7)
+#define IODA3_LIST_STATE PPC_BIT(14)
+
+/* MIST */
+#define IODA3_MIST_P3 PPC_BIT(48 + 0)
+#define IODA3_MIST_Q3 PPC_BIT(48 + 1)
+#define IODA3_MIST_PE3 PPC_BITMASK(48 + 4, 48 + 15)
+
+/* TVT */
+#define IODA3_TVT_TABLE_ADDR PPC_BITMASK(0, 47)
+#define IODA3_TVT_NUM_LEVELS PPC_BITMASK(48, 50)
+#define IODA3_TVE_1_LEVEL 0
+#define IODA3_TVE_2_LEVELS 1
+#define IODA3_TVE_3_LEVELS 2
+#define IODA3_TVE_4_LEVELS 3
+#define IODA3_TVE_5_LEVELS 4
+#define IODA3_TVT_TCE_TABLE_SIZE PPC_BITMASK(51, 55)
+#define IODA3_TVT_NON_TRANSLATE_50 PPC_BIT(56)
+#define IODA3_TVT_IO_PSIZE PPC_BITMASK(59, 63)
+
+/* PESTA */
+#define IODA3_PESTA_MMIO_FROZEN PPC_BIT(0)
+#define IODA3_PESTA_TRANS_TYPE PPC_BITMASK(5, 7)
+#define IODA3_PESTA_TRANS_TYPE_MMIOLOAD 0x4
+#define IODA3_PESTA_CA_CMPLT_TMT PPC_BIT(8)
+#define IODA3_PESTA_UR PPC_BIT(9)
+
+/* PESTB */
+#define IODA3_PESTB_DMA_STOPPED PPC_BIT(0)
+
+/* MDT */
+/* FIXME: check this field with Eric and add a B, C and D */
+#define IODA3_MDT_PE_A PPC_BITMASK(0, 15)
+#define IODA3_MDT_PE_B PPC_BITMASK(16, 31)
+#define IODA3_MDT_PE_C PPC_BITMASK(32, 47)
+#define IODA3_MDT_PE_D PPC_BITMASK(48, 63)
+
+/* MBT */
+#define IODA3_MBT0_ENABLE PPC_BIT(0)
+#define IODA3_MBT0_TYPE PPC_BIT(1)
+#define IODA3_MBT0_TYPE_M32 IODA3_MBT0_TYPE
+#define IODA3_MBT0_TYPE_M64 0
+#define IODA3_MBT0_MODE PPC_BITMASK(2, 3)
+#define IODA3_MBT0_MODE_PE_SEG 0
+#define IODA3_MBT0_MODE_MDT 1
+#define IODA3_MBT0_MODE_SINGLE_PE 2
+#define IODA3_MBT0_SEG_DIV PPC_BITMASK(4, 5)
+#define IODA3_MBT0_SEG_DIV_MAX 0
+#define IODA3_MBT0_SEG_DIV_128 1
+#define IODA3_MBT0_SEG_DIV_64 2
+#define IODA3_MBT0_SEG_DIV_8 3
+#define IODA3_MBT0_MDT_COLUMN PPC_BITMASK(4, 5)
+#define IODA3_MBT0_BASE_ADDR PPC_BITMASK(8, 51)
+
+#define IODA3_MBT1_ENABLE PPC_BIT(0)
+#define IODA3_MBT1_MASK PPC_BITMASK(8, 51)
+#define IODA3_MBT1_SEG_BASE PPC_BITMASK(55, 63)
+#define IODA3_MBT1_SINGLE_PE_NUM PPC_BITMASK(55, 63)
+
+/*
+ * IODA3 in-memory tables
+ */
+
+/*
+ * PEST
+ *
+ * 2x8 bytes entries, PEST0 and PEST1
+ */
+
+#define IODA3_PEST0_MMIO_CAUSE PPC_BIT(2)
+#define IODA3_PEST0_CFG_READ PPC_BIT(3)
+#define IODA3_PEST0_CFG_WRITE PPC_BIT(4)
+#define IODA3_PEST0_TTYPE PPC_BITMASK(5, 7)
+#define PEST_TTYPE_DMA_WRITE 0
+#define PEST_TTYPE_MSI 1
+#define PEST_TTYPE_DMA_READ 2
+#define PEST_TTYPE_DMA_READ_RESP 3
+#define PEST_TTYPE_MMIO_LOAD 4
+#define PEST_TTYPE_MMIO_STORE 5
+#define PEST_TTYPE_OTHER 7
+#define IODA3_PEST0_CA_RETURN PPC_BIT(8)
+#define IODA3_PEST0_UR_RETURN PPC_BIT(9)
+#define IODA3_PEST0_PCIE_NONFATAL PPC_BIT(10)
+#define IODA3_PEST0_PCIE_FATAL PPC_BIT(11)
+#define IODA3_PEST0_PARITY_UE PPC_BIT(13)
+#define IODA3_PEST0_PCIE_CORRECTABLE PPC_BIT(14)
+#define IODA3_PEST0_PCIE_INTERRUPT PPC_BIT(15)
+#define IODA3_PEST0_MMIO_XLATE PPC_BIT(16)
+#define IODA3_PEST0_IODA3_ERROR PPC_BIT(16) /* Same bit as MMIO xlate */
+#define IODA3_PEST0_TCE_PAGE_FAULT PPC_BIT(18)
+#define IODA3_PEST0_TCE_ACCESS_FAULT PPC_BIT(19)
+#define IODA3_PEST0_DMA_RESP_TIMEOUT PPC_BIT(20)
+#define IODA3_PEST0_AIB_SIZE_INVALID PPC_BIT(21)
+#define IODA3_PEST0_LEM_BIT PPC_BITMASK(26, 31)
+#define IODA3_PEST0_RID PPC_BITMASK(32, 47)
+#define IODA3_PEST0_MSI_DATA PPC_BITMASK(48, 63)
+
+#define IODA3_PEST1_FAIL_ADDR PPC_BITMASK(3, 63)
+
+
+#endif /* PCI_HOST_PNV_PHB4_REGS_H */
diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h
index 7515430..4b3d254 100644
--- a/include/hw/pci/pcie_port.h
+++ b/include/hw/pci/pcie_port.h
@@ -72,6 +72,7 @@
typedef struct PCIERootPortClass {
PCIDeviceClass parent_class;
DeviceRealize parent_realize;
+ DeviceReset parent_reset;
uint8_t (*aer_vector)(const PCIDevice *dev);
int (*interrupts_init)(PCIDevice *dev, Error **errp);
diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index d65dd32..fb4d0c0 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h
@@ -30,6 +30,8 @@
#include "hw/ppc/pnv_homer.h"
#include "hw/ppc/pnv_xive.h"
#include "hw/ppc/pnv_core.h"
+#include "hw/pci-host/pnv_phb3.h"
+#include "hw/pci-host/pnv_phb4.h"
#define TYPE_PNV_CHIP "pnv-chip"
#define PNV_CHIP(obj) OBJECT_CHECK(PnvChip, (obj), TYPE_PNV_CHIP)
@@ -52,6 +54,8 @@
uint64_t cores_mask;
PnvCore **cores;
+ uint32_t num_phbs;
+
MemoryRegion xscom_mmio;
MemoryRegion xscom;
AddressSpace xscom_as;
@@ -74,6 +78,9 @@
PnvOCC occ;
PnvHomer homer;
+#define PNV8_CHIP_PHB3_MAX 4
+ PnvPHB3 phbs[PNV8_CHIP_PHB3_MAX];
+
XICSFabric *xics;
} Pnv8Chip;
@@ -93,6 +100,9 @@
uint32_t nr_quads;
PnvQuad *quads;
+
+#define PNV9_CHIP_MAX_PEC 3
+ PnvPhb4PecState pecs[PNV9_CHIP_MAX_PEC];
} Pnv9Chip;
/*
@@ -120,6 +130,7 @@
/*< public >*/
uint64_t chip_cfam_id;
uint64_t cores_mask;
+ uint32_t num_phbs;
DeviceRealize parent_realize;
@@ -217,6 +228,8 @@
Notifier powerdown_notifier;
PnvPnor *pnor;
+
+ hwaddr fw_load_addr;
};
#define PNV_FDT_ADDR 0x01000000
diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
index 55eee95..113550e 100644
--- a/include/hw/ppc/pnv_core.h
+++ b/include/hw/ppc/pnv_core.h
@@ -40,6 +40,7 @@
/*< public >*/
PowerPCCPU **threads;
uint32_t pir;
+ uint64_t hrmor;
PnvChip *chip;
MemoryRegion xscom_regs;
diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h
index f74c81a..09156a5 100644
--- a/include/hw/ppc/pnv_xscom.h
+++ b/include/hw/ppc/pnv_xscom.h
@@ -71,6 +71,15 @@
#define PNV_XSCOM_PBA_BASE 0x2013f00
#define PNV_XSCOM_PBA_SIZE 0x40
+#define PNV_XSCOM_PBCQ_NEST_BASE 0x2012000
+#define PNV_XSCOM_PBCQ_NEST_SIZE 0x46
+
+#define PNV_XSCOM_PBCQ_PCI_BASE 0x9012000
+#define PNV_XSCOM_PBCQ_PCI_SIZE 0x15
+
+#define PNV_XSCOM_PBCQ_SPCI_BASE 0x9013c00
+#define PNV_XSCOM_PBCQ_SPCI_SIZE 0x5
+
/*
* Layout of the XSCOM PCB addresses (POWER 9)
*/
@@ -94,6 +103,17 @@
#define PNV9_XSCOM_XIVE_BASE 0x5013000
#define PNV9_XSCOM_XIVE_SIZE 0x300
+#define PNV9_XSCOM_PEC_NEST_BASE 0x4010c00
+#define PNV9_XSCOM_PEC_NEST_SIZE 0x100
+
+#define PNV9_XSCOM_PEC_PCI_BASE 0xd010800
+#define PNV9_XSCOM_PEC_PCI_SIZE 0x200
+
+/* XSCOM PCI "pass-through" window to PHB SCOM */
+#define PNV9_XSCOM_PEC_PCI_STK0 0x100
+#define PNV9_XSCOM_PEC_PCI_STK1 0x140
+#define PNV9_XSCOM_PEC_PCI_STK2 0x180
+
/*
* Layout of the XSCOM PCB addresses (POWER 10)
*/
diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h
index 4ea5436..93e614c 100644
--- a/include/hw/ppc/ppc.h
+++ b/include/hw/ppc/ppc.h
@@ -68,7 +68,6 @@
void ppc40x_core_reset(PowerPCCPU *cpu);
void ppc40x_chip_reset(PowerPCCPU *cpu);
void ppc40x_system_reset(PowerPCCPU *cpu);
-void PPC_debug_write (void *opaque, uint32_t addr, uint32_t val);
#if defined(CONFIG_USER_ONLY)
static inline void ppc40x_irq_init(PowerPCCPU *cpu) {}
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 61f005c..a1fba95 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -79,8 +79,10 @@
#define SPAPR_CAP_LARGE_DECREMENTER 0x08
/* Count Cache Flush Assist HW Instruction */
#define SPAPR_CAP_CCF_ASSIST 0x09
+/* FWNMI machine check handling */
+#define SPAPR_CAP_FWNMI_MCE 0x0A
/* Num Caps */
-#define SPAPR_CAP_NUM (SPAPR_CAP_CCF_ASSIST + 1)
+#define SPAPR_CAP_NUM (SPAPR_CAP_FWNMI_MCE + 1)
/*
* Capability Values
@@ -189,6 +191,15 @@
* occurs during the unplug process. */
QTAILQ_HEAD(, SpaprDimmState) pending_dimm_unplugs;
+ /* State related to "ibm,nmi-register" and "ibm,nmi-interlock" calls */
+ target_ulong guest_machine_check_addr;
+ /*
+ * mc_status is set to -1 if mc is not in progress, else is set to the CPU
+ * handling the mc.
+ */
+ int mc_status;
+ QemuCond mc_delivery_cond;
+
/*< public >*/
char *kvm_type;
char *host_model;
@@ -207,6 +218,8 @@
unsigned gpu_numa_id;
SpaprTpmProxy *tpm_proxy;
+
+ Error *fwnmi_migration_blocker;
};
#define H_SUCCESS 0
@@ -645,8 +658,10 @@
#define RTAS_IBM_REMOVE_PE_DMA_WINDOW (RTAS_TOKEN_BASE + 0x28)
#define RTAS_IBM_RESET_PE_DMA_WINDOW (RTAS_TOKEN_BASE + 0x29)
#define RTAS_IBM_SUSPEND_ME (RTAS_TOKEN_BASE + 0x2A)
+#define RTAS_IBM_NMI_REGISTER (RTAS_TOKEN_BASE + 0x2B)
+#define RTAS_IBM_NMI_INTERLOCK (RTAS_TOKEN_BASE + 0x2C)
-#define RTAS_TOKEN_MAX (RTAS_TOKEN_BASE + 0x2B)
+#define RTAS_TOKEN_MAX (RTAS_TOKEN_BASE + 0x2D)
/* RTAS ibm,get-system-parameter token values */
#define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS 20
@@ -716,6 +731,9 @@
#define RTAS_ERROR_LOG_MAX 2048
+/* Offset from rtas-base where error log is placed */
+#define RTAS_ERROR_LOG_OFFSET 0x30
+
#define RTAS_EVENT_SCAN_RATE 1
/* This helper should be used to encode interrupt specifiers when the related
@@ -802,6 +820,7 @@
int spapr_max_server_number(SpaprMachineState *spapr);
void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
uint64_t pte0, uint64_t pte1);
+void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered);
/* DRC callbacks. */
void spapr_core_release(DeviceState *dev);
@@ -869,6 +888,7 @@
extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
extern const VMStateDescription vmstate_spapr_cap_large_decr;
extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
+extern const VMStateDescription vmstate_spapr_cap_fwnmi;
static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
{
@@ -891,4 +911,5 @@
#define SPAPR_OV5_XIVE_BOTH 0x80 /* Only to advertise on the platform */
void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
+hwaddr spapr_get_rtas_addr(void);
#endif /* HW_SPAPR_H */
diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h
index ce6d9b0..bed7df6 100644
--- a/include/hw/ppc/spapr_vio.h
+++ b/include/hw/ppc/spapr_vio.h
@@ -58,6 +58,7 @@
void (*realize)(SpaprVioDevice *dev, Error **errp);
void (*reset)(SpaprVioDevice *dev);
int (*devnode)(SpaprVioDevice *dev, void *fdt, int node_off);
+ const char *(*get_dt_compatible)(SpaprVioDevice *dev);
} SpaprVioDeviceClass;
struct SpaprVioDevice {
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 48a75aa..9ed58ec 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -101,6 +101,10 @@
DeviceClass parent_class;
DeviceRealize parent_realize;
+ DeviceReset parent_reset;
+
+ void (*reject)(ICSState *s, uint32_t irq);
+ void (*resend)(ICSState *s);
};
struct ICSState {
@@ -161,6 +165,7 @@
uint32_t icp_accept(ICPState *ss);
uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr);
void icp_eoi(ICPState *icp, uint32_t xirr);
+void icp_irq(ICSState *ics, int server, int nr, uint8_t priority);
void icp_reset(ICPState *icp);
void ics_write_xive(ICSState *ics, int nr, int server,
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index f6ba78e..a6d20b0 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -295,6 +295,14 @@
*/
void qemu_mutex_unlock_iothread(void);
+/*
+ * qemu_cond_wait_iothread: Wait on condition for the main loop mutex
+ *
+ * This function atomically releases the main loop mutex and causes
+ * the calling thread to block on the condition.
+ */
+void qemu_cond_wait_iothread(QemuCond *cond);
+
/* internal interfaces */
void qemu_fd_register(int fd);
diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h
index 5b541a7..15979a3 100644
--- a/include/sysemu/tpm.h
+++ b/include/sysemu/tpm.h
@@ -45,11 +45,14 @@
#define TYPE_TPM_TIS "tpm-tis"
#define TYPE_TPM_CRB "tpm-crb"
+#define TYPE_TPM_SPAPR "tpm-spapr"
#define TPM_IS_TIS(chr) \
object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS)
#define TPM_IS_CRB(chr) \
object_dynamic_cast(OBJECT(chr), TYPE_TPM_CRB)
+#define TPM_IS_SPAPR(chr) \
+ object_dynamic_cast(OBJECT(chr), TYPE_TPM_SPAPR)
/* returns NULL unless there is exactly one TPM device */
static inline TPMIf *tpm_find(void)
diff --git a/pc-bios/README b/pc-bios/README
index 269d99a..d6d33d2 100644
--- a/pc-bios/README
+++ b/pc-bios/README
@@ -4,9 +4,6 @@
- The VGA BIOS and the Cirrus VGA BIOS come from the LGPL VGA bios
project (http://www.nongnu.org/vgabios/).
-- The PowerPC Open Hack'Ware Open Firmware Compatible BIOS is
- available at https://repo.or.cz/openhackware.git.
-
- OpenBIOS (http://www.openbios.org/) is a free (GPL v2) portable
firmware implementation. The goal is to implement a 100% IEEE
1275-1994 (referred to as Open Firmware) compliant firmware.
diff --git a/pc-bios/ppc_rom.bin b/pc-bios/ppc_rom.bin
deleted file mode 100644
index 174a247..0000000
--- a/pc-bios/ppc_rom.bin
+++ /dev/null
Binary files differ
diff --git a/qapi/tpm.json b/qapi/tpm.json
index b30323b..63878aa 100644
--- a/qapi/tpm.json
+++ b/qapi/tpm.json
@@ -12,11 +12,11 @@
#
# @tpm-tis: TPM TIS model
# @tpm-crb: TPM CRB model (since 2.12)
+# @tpm-spapr: TPM SPAPR model (since 5.0)
#
# Since: 1.5
##
-{ 'enum': 'TpmModel', 'data': [ 'tpm-tis', 'tpm-crb' ] }
-
+{ 'enum': 'TpmModel', 'data': [ 'tpm-tis', 'tpm-crb', 'tpm-spapr' ] }
##
# @query-tpm-models:
#
@@ -29,7 +29,7 @@
# Example:
#
# -> { "execute": "query-tpm-models" }
-# <- { "return": [ "tpm-tis", "tpm-crb" ] }
+# <- { "return": [ "tpm-tis", "tpm-crb", "tpm-spapr" ] }
#
##
{ 'command': 'query-tpm-models', 'returns': ['TpmModel'] }
diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi
index 3d2a8ff..ea3e10b 100644
--- a/qemu-deprecated.texi
+++ b/qemu-deprecated.texi
@@ -270,12 +270,6 @@
These machine types are very old and likely can not be used for live migration
from old QEMU versions anymore. A newer machine type should be used instead.
-@subsection prep (PowerPC) (since 3.1)
-
-This machine type uses an unmaintained firmware, broken in lots of ways,
-and unable to start post-2004 operating systems. 40p machine type should be
-used instead.
-
@subsection spike_v1.9.1 and spike_v1.10 (since 4.1)
The version specific Spike machines have been deprecated in favour of the
diff --git a/qemu-doc.texi b/qemu-doc.texi
index 2328e7e..b79f1c3 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -1729,7 +1729,7 @@
@section PowerPC System emulator
@cindex system emulation (PowerPC)
-Use the executable @file{qemu-system-ppc} to simulate a complete PREP
+Use the executable @file{qemu-system-ppc} to simulate a complete 40P (PREP)
or PowerMac PowerPC system.
QEMU emulates the following PowerMac peripherals:
@@ -1749,7 +1749,7 @@
VIA-CUDA with ADB keyboard and mouse.
@end itemize
-QEMU emulates the following PREP peripherals:
+QEMU emulates the following 40P (PREP) peripherals:
@itemize @minus
@item
@@ -1761,7 +1761,7 @@
@item
Floppy disk
@item
-NE2000 network adapters
+PCnet network adapters
@item
Serial port
@item
@@ -1770,12 +1770,9 @@
PC compatible keyboard and mouse.
@end itemize
-QEMU uses the Open Hack'Ware Open Firmware Compatible BIOS available at
-@url{http://perso.magic.fr/l_indien/OpenHackWare/index.htm}.
-
Since version 0.9.1, QEMU uses OpenBIOS @url{https://www.openbios.org/}
-for the g3beige and mac99 PowerMac machines. OpenBIOS is a free (GPL
-v2) portable firmware implementation. The goal is to implement a 100%
+for the g3beige and mac99 PowerMac and the 40p machines. OpenBIOS is a free
+(GPL v2) portable firmware implementation. The goal is to implement a 100%
IEEE 1275-1994 (referred to as Open Firmware) compliant firmware.
@c man begin OPTIONS
@@ -1798,8 +1795,6 @@
-prom-env 'boot-args=conf=hd:2,\yaboot.conf'
@end example
-These variables are not used by Open Hack'Ware.
-
@end table
@c man end
diff --git a/roms/openhackware b/roms/openhackware
deleted file mode 160000
index c559da7..0000000
--- a/roms/openhackware
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit c559da7c8eec5e45ef1f67978827af6f0b9546f5
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 8ebeaba..3a1eb76 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -180,7 +180,7 @@
POWERPC_EXCP_TRAP = 0x40,
};
-#define PPC_INPUT(env) (env->bus_model)
+#define PPC_INPUT(env) ((env)->bus_model)
/*****************************************************************************/
typedef struct opc_handler_t opc_handler_t;
@@ -397,6 +397,10 @@
#define PSSCR_ESL PPC_BIT(42) /* Enable State Loss */
#define PSSCR_EC PPC_BIT(43) /* Exit Criterion */
+/* HFSCR bits */
+#define HFSCR_MSGP PPC_BIT(53) /* Privileged Message Send Facilities */
+#define HFSCR_IC_MSGP 0xA
+
#define msr_sf ((env->msr >> MSR_SF) & 1)
#define msr_isf ((env->msr >> MSR_ISF) & 1)
#define msr_shv ((env->msr >> MSR_SHV) & 1)
@@ -1329,6 +1333,8 @@
#endif
void store_fpscr(CPUPPCState *env, uint64_t arg, uint32_t mask);
+void helper_hfscr_facility_check(CPUPPCState *env, uint32_t bit,
+ const char *caller, uint32_t cause);
static inline uint64_t ppc_dump_gpr(CPUPPCState *env, int gprn)
{
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 5752ed4..027f54c 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -473,6 +473,15 @@
env->spr[SPR_FSCR] |= ((target_ulong)env->error_code << 56);
#endif
break;
+ case POWERPC_EXCP_HV_FU: /* Hypervisor Facility Unavailable Exception */
+#ifdef TARGET_PPC64
+ env->spr[SPR_HFSCR] |= ((target_ulong)env->error_code << FSCR_IC_POS);
+ srr0 = SPR_HSRR0;
+ srr1 = SPR_HSRR1;
+ new_msr |= (target_ulong)MSR_HVB;
+ new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
+#endif
+ break;
case POWERPC_EXCP_PIT: /* Programmable interval timer interrupt */
LOG_EXCP("PIT exception\n");
break;
@@ -900,7 +909,11 @@
}
if (env->pending_interrupts & (1 << PPC_INTERRUPT_DOORBELL)) {
env->pending_interrupts &= ~(1 << PPC_INTERRUPT_DOORBELL);
- powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_DOORI);
+ if (is_book3s_arch2x(env)) {
+ powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_SDOOR);
+ } else {
+ powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_DOORI);
+ }
return;
}
if (env->pending_interrupts & (1 << PPC_INTERRUPT_HDOORBELL)) {
@@ -1221,39 +1234,30 @@
}
/* Server Processor Control */
-static int book3s_dbell2irq(target_ulong rb)
-{
- int msg = rb & DBELL_TYPE_MASK;
+static bool dbell_type_server(target_ulong rb)
+{
/*
* A Directed Hypervisor Doorbell message is sent only if the
* message type is 5. All other types are reserved and the
* instruction is a no-op
*/
- return msg == DBELL_TYPE_DBELL_SERVER ? PPC_INTERRUPT_HDOORBELL : -1;
+ return (rb & DBELL_TYPE_MASK) == DBELL_TYPE_DBELL_SERVER;
}
void helper_book3s_msgclr(CPUPPCState *env, target_ulong rb)
{
- int irq = book3s_dbell2irq(rb);
-
- if (irq < 0) {
+ if (!dbell_type_server(rb)) {
return;
}
- env->pending_interrupts &= ~(1 << irq);
+ env->pending_interrupts &= ~(1 << PPC_INTERRUPT_HDOORBELL);
}
-void helper_book3s_msgsnd(target_ulong rb)
+static void book3s_msgsnd_common(int pir, int irq)
{
- int irq = book3s_dbell2irq(rb);
- int pir = rb & DBELL_PROCIDTAG_MASK;
CPUState *cs;
- if (irq < 0) {
- return;
- }
-
qemu_mutex_lock_iothread();
CPU_FOREACH(cs) {
PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -1267,6 +1271,49 @@
}
qemu_mutex_unlock_iothread();
}
+
+void helper_book3s_msgsnd(target_ulong rb)
+{
+ int pir = rb & DBELL_PROCIDTAG_MASK;
+
+ if (!dbell_type_server(rb)) {
+ return;
+ }
+
+ book3s_msgsnd_common(pir, PPC_INTERRUPT_HDOORBELL);
+}
+
+#if defined(TARGET_PPC64)
+void helper_book3s_msgclrp(CPUPPCState *env, target_ulong rb)
+{
+ helper_hfscr_facility_check(env, HFSCR_MSGP, "msgclrp", HFSCR_IC_MSGP);
+
+ if (!dbell_type_server(rb)) {
+ return;
+ }
+
+ env->pending_interrupts &= ~(1 << PPC_INTERRUPT_DOORBELL);
+}
+
+/*
+ * sends a message to other threads that are on the same
+ * multi-threaded processor
+ */
+void helper_book3s_msgsndp(CPUPPCState *env, target_ulong rb)
+{
+ int pir = env->spr_cb[SPR_PIR].default_value;
+
+ helper_hfscr_facility_check(env, HFSCR_MSGP, "msgsndp", HFSCR_IC_MSGP);
+
+ if (!dbell_type_server(rb)) {
+ return;
+ }
+
+ /* TODO: TCG supports only one thread */
+
+ book3s_msgsnd_common(pir, PPC_INTERRUPT_DOORBELL);
+}
+#endif
#endif
void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index cd0dfe3..cfb4c07 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -657,6 +657,10 @@
DEF_HELPER_FLAGS_1(load_purr, TCG_CALL_NO_RWG, tl, env)
DEF_HELPER_FLAGS_2(store_purr, TCG_CALL_NO_RWG, void, env, tl)
DEF_HELPER_2(store_ptcr, void, env, tl)
+DEF_HELPER_FLAGS_1(load_dpdes, TCG_CALL_NO_RWG, tl, env)
+DEF_HELPER_FLAGS_2(store_dpdes, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_2(book3s_msgsndp, void, env, tl)
+DEF_HELPER_2(book3s_msgclrp, void, env, tl)
#endif
DEF_HELPER_2(store_sdr1, void, env, tl)
DEF_HELPER_2(store_pidr, void, env, tl)
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 06fd0cc..7f44b1a 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -53,6 +53,9 @@
#define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
+#define DEBUG_RETURN_GUEST 0
+#define DEBUG_RETURN_GDB 1
+
const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
KVM_CAP_LAST_INFO
};
@@ -1564,7 +1567,7 @@
static int kvm_handle_hw_breakpoint(CPUState *cs,
struct kvm_debug_exit_arch *arch_info)
{
- int handle = 0;
+ int handle = DEBUG_RETURN_GUEST;
int n;
int flag = 0;
@@ -1572,13 +1575,13 @@
if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
if (n >= 0) {
- handle = 1;
+ handle = DEBUG_RETURN_GDB;
}
} else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
KVMPPC_DEBUG_WATCH_WRITE)) {
n = find_hw_watchpoint(arch_info->address, &flag);
if (n >= 0) {
- handle = 1;
+ handle = DEBUG_RETURN_GDB;
cs->watchpoint_hit = &hw_watchpoint;
hw_watchpoint.vaddr = hw_debug_points[n].addr;
hw_watchpoint.flags = flag;
@@ -1590,12 +1593,12 @@
static int kvm_handle_singlestep(void)
{
- return 1;
+ return DEBUG_RETURN_GDB;
}
static int kvm_handle_sw_breakpoint(void)
{
- return 1;
+ return DEBUG_RETURN_GDB;
}
static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
@@ -1647,7 +1650,7 @@
env->error_code = POWERPC_EXCP_INVAL;
ppc_cpu_do_interrupt(cs);
- return 0;
+ return DEBUG_RETURN_GUEST;
}
int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
@@ -1702,6 +1705,13 @@
ret = 0;
break;
+#if defined(TARGET_PPC64)
+ case KVM_EXIT_NMI:
+ trace_kvm_handle_nmi_exception();
+ ret = kvm_handle_nmi(cpu, run);
+ break;
+#endif
+
default:
fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
ret = -1;
@@ -2054,6 +2064,14 @@
}
}
+int kvmppc_set_fwnmi(void)
+{
+ PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+ CPUState *cs = CPU(cpu);
+
+ return kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0);
+}
+
int kvmppc_smt_threads(void)
{
return cap_ppc_smt ? cap_ppc_smt : 1;
@@ -2789,6 +2807,19 @@
return data & 0xffff;
}
+#if defined(TARGET_PPC64)
+int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run)
+{
+ bool recovered = run->flags & KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
+
+ cpu_synchronize_state(CPU(cpu));
+
+ spapr_mce_req_event(cpu, recovered);
+
+ return 0;
+}
+#endif
+
int kvmppc_enable_hwrng(void)
{
if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index b713097..9e4f235 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -27,6 +27,7 @@
void kvmppc_set_papr(PowerPCCPU *cpu);
int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
+int kvmppc_set_fwnmi(void);
int kvmppc_smt_threads(void);
void kvmppc_error_append_smt_possible_hint(Error *const *errp);
int kvmppc_set_smt_threads(int smt);
@@ -83,6 +84,8 @@
void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online);
void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset);
+int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run);
+
#else
static inline uint32_t kvmppc_get_tbfreq(void)
@@ -160,6 +163,11 @@
{
}
+static inline int kvmppc_set_fwnmi(void)
+{
+ return -1;
+}
+
static inline int kvmppc_smt_threads(void)
{
return 1;
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index e8e2a8a..98f5895 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -56,51 +56,138 @@
}
}
+static void *probe_contiguous(CPUPPCState *env, target_ulong addr, uint32_t nb,
+ MMUAccessType access_type, int mmu_idx,
+ uintptr_t raddr)
+{
+ void *host1, *host2;
+ uint32_t nb_pg1, nb_pg2;
+
+ nb_pg1 = -(addr | TARGET_PAGE_MASK);
+ if (likely(nb <= nb_pg1)) {
+ /* The entire operation is on a single page. */
+ return probe_access(env, addr, nb, access_type, mmu_idx, raddr);
+ }
+
+ /* The operation spans two pages. */
+ nb_pg2 = nb - nb_pg1;
+ host1 = probe_access(env, addr, nb_pg1, access_type, mmu_idx, raddr);
+ addr = addr_add(env, addr, nb_pg1);
+ host2 = probe_access(env, addr, nb_pg2, access_type, mmu_idx, raddr);
+
+ /* If the two host pages are contiguous, optimize. */
+ if (host2 == host1 + nb_pg1) {
+ return host1;
+ }
+ return NULL;
+}
+
void helper_lmw(CPUPPCState *env, target_ulong addr, uint32_t reg)
{
- for (; reg < 32; reg++) {
- if (needs_byteswap(env)) {
- env->gpr[reg] = bswap32(cpu_ldl_data_ra(env, addr, GETPC()));
- } else {
- env->gpr[reg] = cpu_ldl_data_ra(env, addr, GETPC());
+ uintptr_t raddr = GETPC();
+ int mmu_idx = cpu_mmu_index(env, false);
+ void *host = probe_contiguous(env, addr, (32 - reg) * 4,
+ MMU_DATA_LOAD, mmu_idx, raddr);
+
+ if (likely(host)) {
+ /* Fast path -- the entire operation is in RAM at host. */
+ for (; reg < 32; reg++) {
+ env->gpr[reg] = (uint32_t)ldl_be_p(host);
+ host += 4;
}
- addr = addr_add(env, addr, 4);
+ } else {
+ /* Slow path -- at least some of the operation requires i/o. */
+ for (; reg < 32; reg++) {
+ env->gpr[reg] = cpu_ldl_mmuidx_ra(env, addr, mmu_idx, raddr);
+ addr = addr_add(env, addr, 4);
+ }
}
}
void helper_stmw(CPUPPCState *env, target_ulong addr, uint32_t reg)
{
- for (; reg < 32; reg++) {
- if (needs_byteswap(env)) {
- cpu_stl_data_ra(env, addr, bswap32((uint32_t)env->gpr[reg]),
- GETPC());
- } else {
- cpu_stl_data_ra(env, addr, (uint32_t)env->gpr[reg], GETPC());
+ uintptr_t raddr = GETPC();
+ int mmu_idx = cpu_mmu_index(env, false);
+ void *host = probe_contiguous(env, addr, (32 - reg) * 4,
+ MMU_DATA_STORE, mmu_idx, raddr);
+
+ if (likely(host)) {
+ /* Fast path -- the entire operation is in RAM at host. */
+ for (; reg < 32; reg++) {
+ stl_be_p(host, env->gpr[reg]);
+ host += 4;
}
- addr = addr_add(env, addr, 4);
+ } else {
+ /* Slow path -- at least some of the operation requires i/o. */
+ for (; reg < 32; reg++) {
+ cpu_stl_mmuidx_ra(env, addr, env->gpr[reg], mmu_idx, raddr);
+ addr = addr_add(env, addr, 4);
+ }
}
}
static void do_lsw(CPUPPCState *env, target_ulong addr, uint32_t nb,
uint32_t reg, uintptr_t raddr)
{
- int sh;
+ int mmu_idx;
+ void *host;
+ uint32_t val;
- for (; nb > 3; nb -= 4) {
- env->gpr[reg] = cpu_ldl_data_ra(env, addr, raddr);
- reg = (reg + 1) % 32;
- addr = addr_add(env, addr, 4);
+ if (unlikely(nb == 0)) {
+ return;
}
- if (unlikely(nb > 0)) {
- env->gpr[reg] = 0;
- for (sh = 24; nb > 0; nb--, sh -= 8) {
- env->gpr[reg] |= cpu_ldub_data_ra(env, addr, raddr) << sh;
- addr = addr_add(env, addr, 1);
+
+ mmu_idx = cpu_mmu_index(env, false);
+ host = probe_contiguous(env, addr, nb, MMU_DATA_LOAD, mmu_idx, raddr);
+
+ if (likely(host)) {
+ /* Fast path -- the entire operation is in RAM at host. */
+ for (; nb > 3; nb -= 4) {
+ env->gpr[reg] = (uint32_t)ldl_be_p(host);
+ reg = (reg + 1) % 32;
+ host += 4;
+ }
+ switch (nb) {
+ default:
+ return;
+ case 1:
+ val = ldub_p(host) << 24;
+ break;
+ case 2:
+ val = lduw_be_p(host) << 16;
+ break;
+ case 3:
+ val = (lduw_be_p(host) << 16) | (ldub_p(host + 2) << 8);
+ break;
+ }
+ } else {
+ /* Slow path -- at least some of the operation requires i/o. */
+ for (; nb > 3; nb -= 4) {
+ env->gpr[reg] = cpu_ldl_mmuidx_ra(env, addr, mmu_idx, raddr);
+ reg = (reg + 1) % 32;
+ addr = addr_add(env, addr, 4);
+ }
+ switch (nb) {
+ default:
+ return;
+ case 1:
+ val = cpu_ldub_mmuidx_ra(env, addr, mmu_idx, raddr) << 24;
+ break;
+ case 2:
+ val = cpu_lduw_mmuidx_ra(env, addr, mmu_idx, raddr) << 16;
+ break;
+ case 3:
+ val = cpu_lduw_mmuidx_ra(env, addr, mmu_idx, raddr) << 16;
+ addr = addr_add(env, addr, 2);
+ val |= cpu_ldub_mmuidx_ra(env, addr, mmu_idx, raddr) << 8;
+ break;
}
}
+ env->gpr[reg] = val;
}
-void helper_lsw(CPUPPCState *env, target_ulong addr, uint32_t nb, uint32_t reg)
+void helper_lsw(CPUPPCState *env, target_ulong addr,
+ uint32_t nb, uint32_t reg)
{
do_lsw(env, addr, nb, reg, GETPC());
}
@@ -130,17 +217,57 @@
void helper_stsw(CPUPPCState *env, target_ulong addr, uint32_t nb,
uint32_t reg)
{
- int sh;
+ uintptr_t raddr = GETPC();
+ int mmu_idx;
+ void *host;
+ uint32_t val;
- for (; nb > 3; nb -= 4) {
- cpu_stl_data_ra(env, addr, env->gpr[reg], GETPC());
- reg = (reg + 1) % 32;
- addr = addr_add(env, addr, 4);
+ if (unlikely(nb == 0)) {
+ return;
}
- if (unlikely(nb > 0)) {
- for (sh = 24; nb > 0; nb--, sh -= 8) {
- cpu_stb_data_ra(env, addr, (env->gpr[reg] >> sh) & 0xFF, GETPC());
- addr = addr_add(env, addr, 1);
+
+ mmu_idx = cpu_mmu_index(env, false);
+ host = probe_contiguous(env, addr, nb, MMU_DATA_STORE, mmu_idx, raddr);
+
+ if (likely(host)) {
+ /* Fast path -- the entire operation is in RAM at host. */
+ for (; nb > 3; nb -= 4) {
+ stl_be_p(host, env->gpr[reg]);
+ reg = (reg + 1) % 32;
+ host += 4;
+ }
+ val = env->gpr[reg];
+ switch (nb) {
+ case 1:
+ stb_p(host, val >> 24);
+ break;
+ case 2:
+ stw_be_p(host, val >> 16);
+ break;
+ case 3:
+ stw_be_p(host, val >> 16);
+ stb_p(host + 2, val >> 8);
+ break;
+ }
+ } else {
+ for (; nb > 3; nb -= 4) {
+ cpu_stl_mmuidx_ra(env, addr, env->gpr[reg], mmu_idx, raddr);
+ reg = (reg + 1) % 32;
+ addr = addr_add(env, addr, 4);
+ }
+ val = env->gpr[reg];
+ switch (nb) {
+ case 1:
+ cpu_stb_mmuidx_ra(env, addr, val >> 24, mmu_idx, raddr);
+ break;
+ case 2:
+ cpu_stw_mmuidx_ra(env, addr, val >> 16, mmu_idx, raddr);
+ break;
+ case 3:
+ cpu_stw_mmuidx_ra(env, addr, val >> 16, mmu_idx, raddr);
+ addr = addr_add(env, addr, 2);
+ cpu_stb_mmuidx_ra(env, addr, val >> 8, mmu_idx, raddr);
+ break;
}
}
}
@@ -166,12 +293,12 @@
addr &= mask;
/* Check reservation */
- if ((env->reserve_addr & mask) == (addr & mask)) {
+ if ((env->reserve_addr & mask) == addr) {
env->reserve_addr = (target_ulong)-1ULL;
}
/* Try fast path translate */
- haddr = tlb_vaddr_to_host(env, addr, MMU_DATA_STORE, mmu_idx);
+ haddr = probe_write(env, addr, dcbz_size, mmu_idx, retaddr);
if (haddr) {
memset(haddr, 0, dcbz_size);
} else {
diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
index 2318f3a..55b68d1 100644
--- a/target/ppc/misc_helper.c
+++ b/target/ppc/misc_helper.c
@@ -41,6 +41,18 @@
}
#ifdef TARGET_PPC64
+static void raise_hv_fu_exception(CPUPPCState *env, uint32_t bit,
+ const char *caller, uint32_t cause,
+ uintptr_t raddr)
+{
+ qemu_log_mask(CPU_LOG_INT, "HV Facility %d is unavailable (%s)\n",
+ bit, caller);
+
+ env->spr[SPR_HFSCR] &= ~((target_ulong)FSCR_IC_MASK << FSCR_IC_POS);
+
+ raise_exception_err_ra(env, POWERPC_EXCP_HV_FU, cause, raddr);
+}
+
static void raise_fu_exception(CPUPPCState *env, uint32_t bit,
uint32_t sprn, uint32_t cause,
uintptr_t raddr)
@@ -55,6 +67,17 @@
}
#endif
+void helper_hfscr_facility_check(CPUPPCState *env, uint32_t bit,
+ const char *caller, uint32_t cause)
+{
+#ifdef TARGET_PPC64
+ if ((env->msr_mask & MSR_HVB) && !msr_hv &&
+ !(env->spr[SPR_HFSCR] & (1UL << bit))) {
+ raise_hv_fu_exception(env, bit, caller, cause, GETPC());
+ }
+#endif
+}
+
void helper_fscr_facility_check(CPUPPCState *env, uint32_t bit,
uint32_t sprn, uint32_t cause)
{
@@ -105,6 +128,46 @@
env->spr[SPR_PCR] = value & pcc->pcr_mask;
}
+
+/*
+ * DPDES register is shared. Each bit reflects the state of the
+ * doorbell interrupt of a thread of the same core.
+ */
+target_ulong helper_load_dpdes(CPUPPCState *env)
+{
+ target_ulong dpdes = 0;
+
+ helper_hfscr_facility_check(env, HFSCR_MSGP, "load DPDES", HFSCR_IC_MSGP);
+
+ /* TODO: TCG supports only one thread */
+ if (env->pending_interrupts & (1 << PPC_INTERRUPT_DOORBELL)) {
+ dpdes = 1;
+ }
+
+ return dpdes;
+}
+
+void helper_store_dpdes(CPUPPCState *env, target_ulong val)
+{
+ PowerPCCPU *cpu = env_archcpu(env);
+ CPUState *cs = CPU(cpu);
+
+ helper_hfscr_facility_check(env, HFSCR_MSGP, "store DPDES", HFSCR_IC_MSGP);
+
+ /* TODO: TCG supports only one thread */
+ if (val & ~0x1) {
+ qemu_log_mask(LOG_GUEST_ERROR, "Invalid DPDES register value "
+ TARGET_FMT_lx"\n", val);
+ return;
+ }
+
+ if (val & 0x1) {
+ env->pending_interrupts |= 1 << PPC_INTERRUPT_DOORBELL;
+ cpu_interrupt(cs, CPU_INTERRUPT_HARD);
+ } else {
+ env->pending_interrupts &= ~(1 << PPC_INTERRUPT_DOORBELL);
+ }
+}
#endif /* defined(TARGET_PPC64) */
void helper_store_pidr(CPUPPCState *env, target_ulong val)
diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 066e324..224e646 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -235,6 +235,12 @@
/* In real mode top 4 effective addr bits (mostly) ignored */
raddr = eaddr & 0x0FFFFFFFFFFFFFFFULL;
+ /* In HV mode, add HRMOR if top EA bit is clear */
+ if (msr_hv || !env->has_hv_mode) {
+ if (!(eaddr >> 63)) {
+ raddr |= env->spr[SPR_HRMOR];
+ }
+ }
tlb_set_page(cs, eaddr & TARGET_PAGE_MASK, raddr & TARGET_PAGE_MASK,
PAGE_READ | PAGE_WRITE | PAGE_EXEC, mmu_idx,
TARGET_PAGE_SIZE);
diff --git a/target/ppc/trace-events b/target/ppc/trace-events
index 3dc6740..6d15aa9 100644
--- a/target/ppc/trace-events
+++ b/target/ppc/trace-events
@@ -28,3 +28,4 @@
kvm_handle_epr(void) "handle epr"
kvm_handle_watchdog_expiry(void) "handle watchdog expiry"
kvm_handle_debug_exception(void) "handle debug exception"
+kvm_handle_nmi_exception(void) "handle NMI exception"
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 9dcf8dc..36fa273 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -6645,6 +6645,28 @@
#endif /* defined(CONFIG_USER_ONLY) */
}
+#if defined(TARGET_PPC64)
+static void gen_msgclrp(DisasContext *ctx)
+{
+#if defined(CONFIG_USER_ONLY)
+ GEN_PRIV;
+#else
+ CHK_SV;
+ gen_helper_book3s_msgclrp(cpu_env, cpu_gpr[rB(ctx->opcode)]);
+#endif /* defined(CONFIG_USER_ONLY) */
+}
+
+static void gen_msgsndp(DisasContext *ctx)
+{
+#if defined(CONFIG_USER_ONLY)
+ GEN_PRIV;
+#else
+ CHK_SV;
+ gen_helper_book3s_msgsndp(cpu_env, cpu_gpr[rB(ctx->opcode)]);
+#endif /* defined(CONFIG_USER_ONLY) */
+}
+#endif
+
static void gen_msgsync(DisasContext *ctx)
{
#if defined(CONFIG_USER_ONLY)
@@ -7187,6 +7209,10 @@
GEN_HANDLER_E(maddhd_maddhdu, 0x04, 0x18, 0xFF, 0x00000000, PPC_NONE,
PPC2_ISA300),
GEN_HANDLER_E(maddld, 0x04, 0x19, 0xFF, 0x00000000, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER2_E(msgsndp, "msgsndp", 0x1F, 0x0E, 0x04, 0x03ff0001,
+ PPC_NONE, PPC2_ISA207S),
+GEN_HANDLER2_E(msgclrp, "msgclrp", 0x1F, 0x0E, 0x05, 0x03ff0001,
+ PPC_NONE, PPC2_ISA207S),
#endif
#undef GEN_INT_ARITH_ADD
diff --git a/target/ppc/translate_init.inc.c b/target/ppc/translate_init.inc.c
index 2d3efad..53995f6 100644
--- a/target/ppc/translate_init.inc.c
+++ b/target/ppc/translate_init.inc.c
@@ -464,6 +464,17 @@
{
gen_helper_store_pcr(cpu_env, cpu_gpr[gprn]);
}
+
+/* DPDES */
+static void spr_read_dpdes(DisasContext *ctx, int gprn, int sprn)
+{
+ gen_helper_load_dpdes(cpu_gpr[gprn], cpu_env);
+}
+
+static void spr_write_dpdes(DisasContext *ctx, int sprn, int gprn)
+{
+ gen_helper_store_dpdes(cpu_env, cpu_gpr[gprn]);
+}
#endif
#endif
@@ -8238,10 +8249,11 @@
{
#if !defined(CONFIG_USER_ONLY)
/* Directed Privileged Door-bell Exception State, used for IPI */
- spr_register(env, SPR_DPDES, "DPDES",
- SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, SPR_NOACCESS,
- 0x00000000);
+ spr_register_kvm_hv(env, SPR_DPDES, "DPDES",
+ SPR_NOACCESS, SPR_NOACCESS,
+ &spr_read_dpdes, SPR_NOACCESS,
+ &spr_read_dpdes, &spr_write_dpdes,
+ KVM_REG_PPC_DPDES, 0x00000000);
#endif
}
diff --git a/tests/qtest/boot-order-test.c b/tests/qtest/boot-order-test.c
index a725bce..4a6218a 100644
--- a/tests/qtest/boot-order-test.c
+++ b/tests/qtest/boot-order-test.c
@@ -108,30 +108,6 @@
test_boot_orders(NULL, read_boot_order_pc, test_cases_pc);
}
-static uint8_t read_m48t59(QTestState *qts, uint64_t addr, uint16_t reg)
-{
- qtest_writeb(qts, addr, reg & 0xff);
- qtest_writeb(qts, addr + 1, reg >> 8);
- return qtest_readb(qts, addr + 3);
-}
-
-static uint64_t read_boot_order_prep(QTestState *qts)
-{
- return read_m48t59(qts, 0x80000000 + 0x74, 0x34);
-}
-
-static const boot_order_test test_cases_prep[] = {
- { "", 'c', 'c' },
- { "-boot c", 'c', 'c' },
- { "-boot d", 'd', 'd' },
- {}
-};
-
-static void test_prep_boot_order(void)
-{
- test_boot_orders("prep", read_boot_order_prep, test_cases_prep);
-}
-
static uint64_t read_boot_order_pmac(QTestState *qts)
{
QFWCFG *fw_cfg = mm_fw_cfg_init(qts, 0xf0000510);
@@ -190,7 +166,6 @@
if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
qtest_add_func("boot-order/pc", test_pc_boot_order);
} else if (strcmp(arch, "ppc") == 0 || strcmp(arch, "ppc64") == 0) {
- qtest_add_func("boot-order/prep", test_prep_boot_order);
qtest_add_func("boot-order/pmac_oldworld",
test_pmac_oldworld_boot_order);
qtest_add_func("boot-order/pmac_newworld",
diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c
index 8e8c5b0..85a3614 100644
--- a/tests/qtest/boot-serial-test.c
+++ b/tests/qtest/boot-serial-test.c
@@ -15,6 +15,7 @@
#include "qemu/osdep.h"
#include "libqtest.h"
+#include "libqos/libqos-spapr.h"
static const uint8_t kernel_mcf5208[] = {
0x41, 0xf9, 0xfc, 0x06, 0x00, 0x00, /* lea 0xfc060000,%a0 */
@@ -112,7 +113,7 @@
{ "ppc64", "40p", "-m 192", "Memory: 192M" },
{ "ppc64", "mac99", "", "PowerPC,970FX" },
{ "ppc64", "pseries",
- "-machine cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken",
+ "-machine " PSERIES_DEFAULT_CAPABILITIES,
"Open Firmware" },
{ "ppc64", "powernv8", "", "OPAL" },
{ "ppc64", "powernv9", "", "OPAL" },
diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c
index 67635e3..833a050 100644
--- a/tests/qtest/cdrom-test.c
+++ b/tests/qtest/cdrom-test.c
@@ -189,7 +189,7 @@
add_s390x_tests();
} else if (g_str_equal(arch, "ppc64")) {
const char *ppcmachines[] = {
- "pseries", "mac99", "g3beige", "40p", "prep", NULL
+ "pseries", "mac99", "g3beige", "40p", NULL
};
add_cdrom_param_tests(ppcmachines);
} else if (g_str_equal(arch, "sparc")) {
diff --git a/tests/qtest/endianness-test.c b/tests/qtest/endianness-test.c
index 5852795..2798802 100644
--- a/tests/qtest/endianness-test.c
+++ b/tests/qtest/endianness-test.c
@@ -35,7 +35,7 @@
{ "mips64", "malta", 0x10000000, .bswap = true },
{ "mips64el", "fulong2e", 0x1fd00000 },
{ "ppc", "g3beige", 0xfe000000, .bswap = true, .superio = "i82378" },
- { "ppc", "prep", 0x80000000, .bswap = true },
+ { "ppc", "40p", 0x80000000, .bswap = true },
{ "ppc", "bamboo", 0xe8000000, .bswap = true, .superio = "i82378" },
{ "ppc64", "mac99", 0xf2000000, .bswap = true, .superio = "i82378" },
{ "ppc64", "pseries", (1ULL << 45), .bswap = true, .superio = "i82378" },
diff --git a/tests/qtest/libqos/libqos-spapr.h b/tests/qtest/libqos/libqos-spapr.h
index dcb5c43..d9c4c22 100644
--- a/tests/qtest/libqos/libqos-spapr.h
+++ b/tests/qtest/libqos/libqos-spapr.h
@@ -7,4 +7,12 @@
QOSState *qtest_spapr_boot(const char *cmdline_fmt, ...);
void qtest_spapr_shutdown(QOSState *qs);
+/* List of capabilities needed to silence warnings with TCG */
+#define PSERIES_DEFAULT_CAPABILITIES \
+ "cap-cfpc=broken," \
+ "cap-sbbc=broken," \
+ "cap-ibs=broken," \
+ "cap-ccf-assist=off," \
+ "cap-fwnmi-mce=off"
+
#endif
diff --git a/tests/qtest/prom-env-test.c b/tests/qtest/prom-env-test.c
index 9be52c7..60e6ec3 100644
--- a/tests/qtest/prom-env-test.c
+++ b/tests/qtest/prom-env-test.c
@@ -21,6 +21,7 @@
#include "qemu/osdep.h"
#include "libqtest.h"
+#include "libqos/libqos-spapr.h"
#define MAGIC 0xcafec0de
#define ADDRESS 0x4000
@@ -54,7 +55,7 @@
*/
if (strcmp(machine, "pseries") == 0) {
extra_args = "-nodefaults"
- " -machine cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken";
+ " -machine " PSERIES_DEFAULT_CAPABILITIES;
}
qts = qtest_initf("-M %s -accel tcg %s -prom-env 'use-nvramrc?=true' "
diff --git a/tests/qtest/pxe-test.c b/tests/qtest/pxe-test.c
index f68d0aa..1161a77 100644
--- a/tests/qtest/pxe-test.c
+++ b/tests/qtest/pxe-test.c
@@ -17,6 +17,7 @@
#include "qemu-common.h"
#include "libqtest.h"
#include "boot-sector.h"
+#include "libqos/libqos-spapr.h"
#define NETNAME "net0"
@@ -46,15 +47,15 @@
static testdef_t ppc64_tests[] = {
{ "pseries", "spapr-vlan",
- "-machine cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken,vsmt=8" },
+ "-machine vsmt=8," PSERIES_DEFAULT_CAPABILITIES },
{ "pseries", "virtio-net-pci",
- "-machine cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken,vsmt=8" },
+ "-machine vsmt=8," PSERIES_DEFAULT_CAPABILITIES },
{ NULL },
};
static testdef_t ppc64_tests_slow[] = {
{ "pseries", "e1000",
- "-machine cap-cfpc=broken,cap-sbbc=broken,cap-ibs=broken,vsmt=8" },
+ "-machine vsmt=8," PSERIES_DEFAULT_CAPABILITIES },
{ NULL },
};