Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging
* scsi-disk: Don't silently truncate serial number
* backends/hostmem: Report error on unavailable qemu_madvise() features or unaligned memory sizes
* target/i386: fixes and documentation for INHIBIT_IRQ/TF/RF and debugging
* i386/hvf: Adds support for INVTSC cpuid bit
* i386/hvf: Fixes for dirty memory tracking
* i386/hvf: Use hv_vcpu_interrupt() and hv_vcpu_run_until()
* hvf: Cleanups
* stubs: fixes for --disable-system build
* i386/kvm: support for FRED
* i386/kvm: fix MCE handling on AMD hosts
# -----BEGIN PGP SIGNATURE-----
#
# iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmZkF2oUHHBib256aW5p
# QHJlZGhhdC5jb20ACgkQv/vSX3jHroPNlQf+N9y6Eh0nMEEQ69twtV8ytglTY+uX
# FsogvnsXHNMVubOWmmeItM6kFXTAkR9cmFaL8dqI1Gs03xEQdQXbF1KejJZOAZVl
# RQMOW8Fg2Afr+0lwqCXHvhsmZ4hr5yUkRndyucA/E9AO2uGrtgwsWGDBGaHJOZIA
# lAsEMOZgKjXHZnefXjhMrvpk/QNovjEV6f1RHX3oKZjKSI5/G4IqGSmwNYToot8p
# 2fgs4Qti4+1gNyM2oBLq7cCMjMS61tSxOMH4uqVoIisjyckPlAFRvc+DXtKsUAAs
# 9AgM++pNgpB0IXv67czRUNdRoK7OI8I0ULhI4qHXi6Yg2QYAHqpQ6WL4Lg==
# =RP7U
# -----END PGP SIGNATURE-----
# gpg: Signature made Sat 08 Jun 2024 01:33:46 AM PDT
# gpg: using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg: issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" [full]
* tag 'for-upstream' of https://gitlab.com/bonzini/qemu: (42 commits)
python: mkvenv: remove ensure command
Revert "python: use vendored tomli"
i386: Add support for overflow recovery
i386: Add support for SUCCOR feature
i386: Fix MCE support for AMD hosts
docs: i386: pc: Avoid mentioning limit of maximum vCPUs
target/i386: Add get/set/migrate support for FRED MSRs
target/i386: enumerate VMX nested-exception support
vmxcap: add support for VMX FRED controls
target/i386: mark CR4.FRED not reserved
target/i386: add support for FRED in CPUID enumeration
hvf: Makes assert_hvf_ok report failed expression
i386/hvf: Updates API usage to use modern vCPU run function
i386/hvf: In kick_vcpu use hv_vcpu_interrupt to force exit
i386/hvf: Fixes dirty memory tracking by page granularity RX->RWX change
hvf: Consistent types for vCPU handles
i386/hvf: Fixes some compilation warnings
i386/hvf: Adds support for INVTSC cpuid bit
stubs/meson: Fix qemuutil build when --disable-system
scsi-disk: Don't silently truncate serial number
...
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
index 6f1e27e..b2a37a2 100644
--- a/accel/hvf/hvf-accel-ops.c
+++ b/accel/hvf/hvf-accel-ops.c
@@ -400,7 +400,7 @@
r = hv_vcpu_create(&cpu->accel->fd,
(hv_vcpu_exit_t **)&cpu->accel->exit, NULL);
#else
- r = hv_vcpu_create((hv_vcpuid_t *)&cpu->accel->fd, HV_VCPU_DEFAULT);
+ r = hv_vcpu_create(&cpu->accel->fd, HV_VCPU_DEFAULT);
#endif
cpu->accel->dirty = true;
assert_hvf_ok(r);
diff --git a/accel/hvf/hvf-all.c b/accel/hvf/hvf-all.c
index db05b81..c008dc2 100644
--- a/accel/hvf/hvf-all.c
+++ b/accel/hvf/hvf-all.c
@@ -13,40 +13,33 @@
#include "sysemu/hvf.h"
#include "sysemu/hvf_int.h"
-void assert_hvf_ok(hv_return_t ret)
+const char *hvf_return_string(hv_return_t ret)
+{
+ switch (ret) {
+ case HV_SUCCESS: return "HV_SUCCESS";
+ case HV_ERROR: return "HV_ERROR";
+ case HV_BUSY: return "HV_BUSY";
+ case HV_BAD_ARGUMENT: return "HV_BAD_ARGUMENT";
+ case HV_NO_RESOURCES: return "HV_NO_RESOURCES";
+ case HV_NO_DEVICE: return "HV_NO_DEVICE";
+ case HV_UNSUPPORTED: return "HV_UNSUPPORTED";
+#if defined(MAC_OS_VERSION_11_0) && \
+ MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
+ case HV_DENIED: return "HV_DENIED";
+#endif
+ default: return "[unknown hv_return value]";
+ }
+}
+
+void assert_hvf_ok_impl(hv_return_t ret, const char *file, unsigned int line,
+ const char *exp)
{
if (ret == HV_SUCCESS) {
return;
}
- switch (ret) {
- case HV_ERROR:
- error_report("Error: HV_ERROR");
- break;
- case HV_BUSY:
- error_report("Error: HV_BUSY");
- break;
- case HV_BAD_ARGUMENT:
- error_report("Error: HV_BAD_ARGUMENT");
- break;
- case HV_NO_RESOURCES:
- error_report("Error: HV_NO_RESOURCES");
- break;
- case HV_NO_DEVICE:
- error_report("Error: HV_NO_DEVICE");
- break;
- case HV_UNSUPPORTED:
- error_report("Error: HV_UNSUPPORTED");
- break;
-#if defined(MAC_OS_VERSION_11_0) && \
- MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
- case HV_DENIED:
- error_report("Error: HV_DENIED");
- break;
-#endif
- default:
- error_report("Unknown Error");
- }
+ error_report("Error: %s = %s (0x%x, at %s:%u)",
+ exp, hvf_return_string(ret), ret, file, line);
abort();
}
diff --git a/backends/hostmem-epc.c b/backends/hostmem-epc.c
index 735e2e1..f58fcf0 100644
--- a/backends/hostmem-epc.c
+++ b/backends/hostmem-epc.c
@@ -36,6 +36,7 @@
return false;
}
+ backend->aligned = true;
name = object_get_canonical_path(OBJECT(backend));
ram_flags = (backend->share ? RAM_SHARED : 0) | RAM_PROTECTED;
return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index 3c69db7..7e5072e 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -80,6 +80,7 @@
g_assert_not_reached();
}
+ backend->aligned = true;
name = host_memory_backend_get_name(backend);
ram_flags = backend->share ? RAM_SHARED : 0;
ram_flags |= fb->readonly ? RAM_READONLY_FD : 0;
diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c
index 745ead0..6a3c89a 100644
--- a/backends/hostmem-memfd.c
+++ b/backends/hostmem-memfd.c
@@ -52,6 +52,7 @@
return false;
}
+ backend->aligned = true;
name = host_memory_backend_get_name(backend);
ram_flags = backend->share ? RAM_SHARED : 0;
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
diff --git a/backends/hostmem.c b/backends/hostmem.c
index eb9682b..4e5576a 100644
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -20,6 +20,7 @@
#include "qom/object_interfaces.h"
#include "qemu/mmap-alloc.h"
#include "qemu/madvise.h"
+#include "qemu/cutils.h"
#include "hw/qdev-core.h"
#ifdef CONFIG_NUMA
@@ -169,19 +170,24 @@
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
- if (!host_memory_backend_mr_inited(backend)) {
- backend->merge = value;
+ if (QEMU_MADV_MERGEABLE == QEMU_MADV_INVALID) {
+ if (value) {
+ error_setg(errp, "Memory merging is not supported on this host");
+ }
+ assert(!backend->merge);
return;
}
- if (value != backend->merge) {
+ if (!host_memory_backend_mr_inited(backend) &&
+ value != backend->merge) {
void *ptr = memory_region_get_ram_ptr(&backend->mr);
uint64_t sz = memory_region_size(&backend->mr);
qemu_madvise(ptr, sz,
value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
- backend->merge = value;
}
+
+ backend->merge = value;
}
static bool host_memory_backend_get_dump(Object *obj, Error **errp)
@@ -195,19 +201,24 @@
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
- if (!host_memory_backend_mr_inited(backend)) {
- backend->dump = value;
+ if (QEMU_MADV_DONTDUMP == QEMU_MADV_INVALID) {
+ if (!value) {
+ error_setg(errp, "Dumping guest memory cannot be disabled on this host");
+ }
+ assert(backend->dump);
return;
}
- if (value != backend->dump) {
+ if (host_memory_backend_mr_inited(backend) &&
+ value != backend->dump) {
void *ptr = memory_region_get_ram_ptr(&backend->mr);
uint64_t sz = memory_region_size(&backend->mr);
qemu_madvise(ptr, sz,
value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
- backend->dump = value;
}
+
+ backend->dump = value;
}
static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
@@ -325,6 +336,7 @@
HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
void *ptr;
uint64_t sz;
+ size_t pagesize;
bool async = !phase_check(PHASE_LATE_BACKENDS_CREATED);
if (!bc->alloc) {
@@ -336,6 +348,14 @@
ptr = memory_region_get_ram_ptr(&backend->mr);
sz = memory_region_size(&backend->mr);
+ pagesize = qemu_ram_pagesize(backend->mr.ram_block);
+
+ if (backend->aligned && !QEMU_IS_ALIGNED(sz, pagesize)) {
+ g_autofree char *pagesize_str = size_to_str(pagesize);
+ error_setg(errp, "backend '%s' memory size must be multiple of %s",
+ object_get_typename(OBJECT(uc)), pagesize_str);
+ return;
+ }
if (backend->merge) {
qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
diff --git a/configure b/configure
index 4d01a42..5ad1674 100755
--- a/configure
+++ b/configure
@@ -955,10 +955,6 @@
# Finish preparing the virtual environment using vendored .whl files
-if $python -c 'import sys; sys.exit(sys.version_info >= (3,11))'; then
- $mkvenv ensure --dir "${source_path}/python/wheels" \
- 'tomli>=1.2.0' || exit 1
-fi
$mkvenv ensuregroup --dir "${source_path}/python/wheels" \
${source_path}/pythondeps.toml meson || exit 1
diff --git a/docs/devel/build-system.rst b/docs/devel/build-system.rst
index 09caf2f..f4fd761 100644
--- a/docs/devel/build-system.rst
+++ b/docs/devel/build-system.rst
@@ -185,14 +185,13 @@
Python packages that are **mandatory** dependencies to build QEMU,
but are not available in all supported distros, are bundled with the
-QEMU sources. Currently this includes Meson (outdated in CentOS 8
-and derivatives, Ubuntu 20.04 and 22.04, and openSUSE Leap) and tomli
-(absent in Ubuntu 20.04).
+QEMU sources. The only one is currently Meson (outdated in Ubuntu
+22.04 and openSUSE Leap).
-If you need to update these, please do so by modifying and rerunning
-``python/scripts/vendor.py``. This script embeds the sha256 hash of
-package sources and checks it. The pypi.org web site provides an easy
-way to retrieve the sha256 hash of the sources.
+In order to include a new or updated wheel, modify and rerun the
+``python/scripts/vendor.py`` script. The script embeds the
+sha256 hash of package sources and checks it. The pypi.org web site
+provides an easy way to retrieve the sha256 hash of the sources.
Stage 2: Meson
diff --git a/docs/system/target-i386-desc.rst.inc b/docs/system/target-i386-desc.rst.inc
index 319e540..ae312b1 100644
--- a/docs/system/target-i386-desc.rst.inc
+++ b/docs/system/target-i386-desc.rst.inc
@@ -36,7 +36,8 @@
- PCI UHCI, OHCI, EHCI or XHCI USB controller and a virtual USB-1.1
hub.
-SMP is supported with up to 255 CPUs (and 4096 CPUs for PC Q35 machine).
+SMP is supported with a large number of virtual CPUs (upper limit is
+configuration dependent).
QEMU uses the PC BIOS from the Seabios project and the Plex86/Bochs LGPL
VGA BIOS.
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 77a356f..c93d249 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -17,6 +17,7 @@
#include "hw/loader.h"
#include "qapi/error.h"
#include "qapi/qapi-visit-machine.h"
+#include "qemu/madvise.h"
#include "qom/object_interfaces.h"
#include "sysemu/cpus.h"
#include "sysemu/sysemu.h"
@@ -427,6 +428,10 @@
{
MachineState *ms = MACHINE(obj);
+ if (!value && QEMU_MADV_DONTDUMP == QEMU_MADV_INVALID) {
+ error_setg(errp, "Dumping guest memory cannot be disabled on this host");
+ return;
+ }
ms->dump_guest_core = value;
}
@@ -441,6 +446,10 @@
{
MachineState *ms = MACHINE(obj);
+ if (value && QEMU_MADV_MERGEABLE == QEMU_MADV_INVALID) {
+ error_setg(errp, "Memory merging is not supported on this host");
+ return;
+ }
ms->mem_merge = value;
}
@@ -1129,7 +1138,7 @@
container_get(obj, "/peripheral-anon");
ms->dump_guest_core = true;
- ms->mem_merge = true;
+ ms->mem_merge = (QEMU_MADV_MERGEABLE != QEMU_MADV_INVALID);
ms->enable_graphics = true;
ms->kernel_cmdline = g_strdup("");
ms->ram_size = mc->default_ram_size;
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index f386a2f..0812d39 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -58,6 +58,9 @@
#define TYPE_SCSI_DISK_BASE "scsi-disk-base"
+#define MAX_SERIAL_LEN 36
+#define MAX_SERIAL_LEN_FOR_DEVID 20
+
OBJECT_DECLARE_TYPE(SCSIDiskState, SCSIDiskClass, SCSI_DISK_BASE)
struct SCSIDiskClass {
@@ -648,8 +651,8 @@
}
l = strlen(s->serial);
- if (l > 36) {
- l = 36;
+ if (l > MAX_SERIAL_LEN) {
+ l = MAX_SERIAL_LEN;
}
trace_scsi_disk_emulate_vpd_page_80(req->cmd.xfer);
@@ -2501,9 +2504,20 @@
if (!s->vendor) {
s->vendor = g_strdup("QEMU");
}
+ if (s->serial && strlen(s->serial) > MAX_SERIAL_LEN) {
+ error_setg(errp, "The serial number can't be longer than %d characters",
+ MAX_SERIAL_LEN);
+ return;
+ }
if (!s->device_id) {
if (s->serial) {
- s->device_id = g_strdup_printf("%.20s", s->serial);
+ if (strlen(s->serial) > MAX_SERIAL_LEN_FOR_DEVID) {
+ error_setg(errp, "The serial number can't be longer than %d "
+ "characters when it is also used as the default for "
+ "device_id", MAX_SERIAL_LEN_FOR_DEVID);
+ return;
+ }
+ s->device_id = g_strdup(s->serial);
} else {
const char *str = blk_name(s->qdev.conf.blk);
if (str && *str) {
diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h
index 04b884b..de47ae5 100644
--- a/include/sysemu/hostmem.h
+++ b/include/sysemu/hostmem.h
@@ -74,7 +74,7 @@
uint64_t size;
bool merge, dump, use_canonical_path;
bool prealloc, is_mapped, share, reserve;
- bool guest_memfd;
+ bool guest_memfd, aligned;
uint32_t prealloc_threads;
ThreadContext *prealloc_context;
DECLARE_BITMAP(host_nodes, MAX_NODES + 1);
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
index 4a327fd..5b28d17 100644
--- a/include/sysemu/hvf_int.h
+++ b/include/sysemu/hvf_int.h
@@ -13,8 +13,10 @@
#ifdef __aarch64__
#include <Hypervisor/Hypervisor.h>
+typedef hv_vcpu_t hvf_vcpuid;
#else
#include <Hypervisor/hv.h>
+typedef hv_vcpuid_t hvf_vcpuid;
#endif
/* hvf_slot flags */
@@ -50,7 +52,7 @@
extern HVFState *hvf_state;
struct AccelCPUState {
- uint64_t fd;
+ hvf_vcpuid fd;
void *exit;
bool vtimer_masked;
sigset_t unblock_ipi_mask;
@@ -58,7 +60,10 @@
bool dirty;
};
-void assert_hvf_ok(hv_return_t ret);
+void assert_hvf_ok_impl(hv_return_t ret, const char *file, unsigned int line,
+ const char *exp);
+#define assert_hvf_ok(EX) assert_hvf_ok_impl((EX), __FILE__, __LINE__, #EX)
+const char *hvf_return_string(hv_return_t ret);
int hvf_arch_init(void);
int hvf_arch_init_vcpu(CPUState *cpu);
void hvf_arch_vcpu_destroy(CPUState *cpu);
diff --git a/meson.build b/meson.build
index d80203f..ec59eff 100644
--- a/meson.build
+++ b/meson.build
@@ -2556,10 +2556,16 @@
#else
int main(void) { struct file_handle fh; return open_by_handle_at(0, &fh, 0); }
#endif'''))
-config_host_data.set('CONFIG_POSIX_MADVISE', cc.links(gnu_source_prefix + '''
- #include <sys/mman.h>
- #include <stddef.h>
- int main(void) { return posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); }'''))
+
+# On Darwin posix_madvise() has the same return semantics as plain madvise(),
+# i.e. errno is set and -1 is returned. That's not really how POSIX defines the
+# function. On the flip side, it has madvise() which is preferred anyways.
+if host_os != 'darwin'
+ config_host_data.set('CONFIG_POSIX_MADVISE', cc.links(gnu_source_prefix + '''
+ #include <sys/mman.h>
+ #include <stddef.h>
+ int main(void) { return posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); }'''))
+endif
config_host_data.set('CONFIG_PTHREAD_SETNAME_NP_W_TID', cc.links(gnu_source_prefix + '''
#include <pthread.h>
diff --git a/python/scripts/mkvenv.py b/python/scripts/mkvenv.py
index d0b9c21..f2526af 100644
--- a/python/scripts/mkvenv.py
+++ b/python/scripts/mkvenv.py
@@ -13,7 +13,6 @@
create create a venv
post_init
post-venv initialization
- ensure Ensure that the specified package is installed.
ensuregroup
Ensure that the specified package group is installed.
@@ -36,18 +35,6 @@
--------------------------------------------------
-usage: mkvenv ensure [-h] [--online] [--dir DIR] dep_spec...
-
-positional arguments:
- dep_spec PEP 508 Dependency specification, e.g. 'meson>=0.61.5'
-
-options:
- -h, --help show this help message and exit
- --online Install packages from PyPI, if necessary.
- --dir DIR Path to vendored packages where we may install from.
-
---------------------------------------------------
-
usage: mkvenv ensuregroup [-h] [--online] [--dir DIR] file group...
positional arguments:
@@ -726,57 +713,6 @@
return None
-def ensure(
- dep_specs: Sequence[str],
- online: bool = False,
- wheels_dir: Optional[Union[str, Path]] = None,
- prog: Optional[str] = None,
-) -> None:
- """
- Use pip to ensure we have the package specified by @dep_specs.
-
- If the package is already installed, do nothing. If online and
- wheels_dir are both provided, prefer packages found in wheels_dir
- first before connecting to PyPI.
-
- :param dep_specs:
- PEP 508 dependency specifications. e.g. ['meson>=0.61.5'].
- :param online: If True, fall back to PyPI.
- :param wheels_dir: If specified, search this path for packages.
- :param prog:
- If specified, use this program name for error diagnostics that will
- be presented to the user. e.g., 'sphinx-build' can be used as a
- bellwether for the presence of 'sphinx'.
- """
-
- if not HAVE_DISTLIB:
- raise Ouch("a usable distlib could not be found, please install it")
-
- # Convert the depspecs to a dictionary, as if they came
- # from a section in a pythondeps.toml file
- group: Dict[str, Dict[str, str]] = {}
- for spec in dep_specs:
- name = distlib.version.LegacyMatcher(spec).name
- group[name] = {}
-
- spec = spec.strip()
- pos = len(name)
- ver = spec[pos:].strip()
- if ver:
- group[name]["accepted"] = ver
-
- if prog:
- group[name]["canary"] = prog
- prog = None
-
- result = _do_ensure(group, online, wheels_dir)
- if result:
- # Well, that's not good.
- if result[1]:
- raise Ouch(result[0])
- raise SystemExit(f"\n{result[0]}\n\n")
-
-
def _parse_groups(file: str) -> Dict[str, Dict[str, Any]]:
if not HAVE_TOMLLIB:
if sys.version_info < (3, 11):
@@ -888,39 +824,6 @@
)
-def _add_ensure_subcommand(subparsers: Any) -> None:
- subparser = subparsers.add_parser(
- "ensure", help="Ensure that the specified package is installed."
- )
- subparser.add_argument(
- "--online",
- action="store_true",
- help="Install packages from PyPI, if necessary.",
- )
- subparser.add_argument(
- "--dir",
- type=str,
- action="store",
- help="Path to vendored packages where we may install from.",
- )
- subparser.add_argument(
- "--diagnose",
- type=str,
- action="store",
- help=(
- "Name of a shell utility to use for "
- "diagnostics if this command fails."
- ),
- )
- subparser.add_argument(
- "dep_specs",
- type=str,
- action="store",
- help="PEP 508 Dependency specification, e.g. 'meson>=0.61.5'",
- nargs="+",
- )
-
-
def main() -> int:
"""CLI interface to make_qemu_venv. See module docstring."""
if os.environ.get("DEBUG") or os.environ.get("GITLAB_CI"):
@@ -944,7 +847,6 @@
_add_create_subcommand(subparsers)
_add_post_init_subcommand(subparsers)
- _add_ensure_subcommand(subparsers)
_add_ensuregroup_subcommand(subparsers)
args = parser.parse_args()
@@ -957,13 +859,6 @@
)
if args.command == "post_init":
post_venv_setup()
- if args.command == "ensure":
- ensure(
- dep_specs=args.dep_specs,
- online=args.online,
- wheels_dir=args.dir,
- prog=args.diagnose,
- )
if args.command == "ensuregroup":
ensure_group(
file=args.file,
diff --git a/python/scripts/vendor.py b/python/scripts/vendor.py
index 1038b14..07aff97 100755
--- a/python/scripts/vendor.py
+++ b/python/scripts/vendor.py
@@ -43,9 +43,6 @@
packages = {
"meson==1.2.3":
"4533a43c34548edd1f63a276a42690fce15bde9409bcf20c4b8fa3d7e4d7cac1",
-
- "tomli==2.0.1":
- "939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc",
}
vendor_dir = Path(__file__, "..", "..", "wheels").resolve()
diff --git a/python/wheels/tomli-2.0.1-py3-none-any.whl b/python/wheels/tomli-2.0.1-py3-none-any.whl
deleted file mode 100644
index 29670b9..0000000
--- a/python/wheels/tomli-2.0.1-py3-none-any.whl
+++ /dev/null
Binary files differ
diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap
index 3fb4d5b..508be19 100755
--- a/scripts/kvm/vmxcap
+++ b/scripts/kvm/vmxcap
@@ -24,6 +24,7 @@
MSR_IA32_VMX_TRUE_ENTRY_CTLS = 0x490
MSR_IA32_VMX_VMFUNC = 0x491
MSR_IA32_VMX_PROCBASED_CTLS3 = 0x492
+MSR_IA32_VMX_EXIT_CTLS2 = 0x493
class msr(object):
def __init__(self):
@@ -116,6 +117,7 @@
54: 'INS/OUTS instruction information',
55: 'IA32_VMX_TRUE_*_CTLS support',
56: 'Skip checks on event error code',
+ 58: 'VMX nested exception support',
},
msr = MSR_IA32_VMX_BASIC,
),
@@ -219,11 +221,21 @@
23: 'Clear IA32_BNDCFGS',
24: 'Conceal VM exits from PT',
25: 'Clear IA32_RTIT_CTL',
+ 31: 'Activate secondary VM-exit controls',
},
cap_msr = MSR_IA32_VMX_EXIT_CTLS,
true_cap_msr = MSR_IA32_VMX_TRUE_EXIT_CTLS,
),
+ Allowed1Control(
+ name = 'secondary VM-Exit controls',
+ bits = {
+ 0: 'Save IA32 FRED MSRs',
+ 1: 'Load IA32 FRED MSRs',
+ },
+ cap_msr = MSR_IA32_VMX_EXIT_CTLS2,
+ ),
+
Control(
name = 'VM-Entry controls',
bits = {
@@ -237,6 +249,7 @@
16: 'Load IA32_BNDCFGS',
17: 'Conceal VM entries from PT',
18: 'Load IA32_RTIT_CTL',
+ 23: 'Load IA32 FRED MSRs',
},
cap_msr = MSR_IA32_VMX_ENTRY_CTLS,
true_cap_msr = MSR_IA32_VMX_TRUE_ENTRY_CTLS,
diff --git a/stubs/meson.build b/stubs/meson.build
index 3b9d420..f15b48d 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -3,6 +3,7 @@
# below, so that it is clear who needs the stubbed functionality.
stub_ss.add(files('cpu-get-clock.c'))
+stub_ss.add(files('error-printf.c'))
stub_ss.add(files('fdset.c'))
stub_ss.add(files('iothread-lock.c'))
stub_ss.add(files('is-daemonized.c'))
@@ -45,17 +46,10 @@
stub_ss.add(files('qmp-quit.c'))
endif
-if have_ga
- stub_ss.add(files('error-printf.c'))
-endif
-
if have_block or have_user
stub_ss.add(files('qtest.c'))
stub_ss.add(files('vm-stop.c'))
stub_ss.add(files('vmstate.c'))
-
- # more symbols provided by the monitor
- stub_ss.add(files('error-printf.c'))
endif
if have_user
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 914bef4..7466217 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1114,7 +1114,7 @@
"avx-vnni", "avx512-bf16", NULL, "cmpccxadd",
NULL, NULL, "fzrm", "fsrs",
"fsrc", NULL, NULL, NULL,
- NULL, NULL, NULL, NULL,
+ NULL, "fred", "lkgs", "wrmsrns",
NULL, "amx-fp16", NULL, "avx-ifma",
NULL, NULL, "lam", NULL,
NULL, NULL, NULL, NULL,
@@ -1180,6 +1180,22 @@
.tcg_features = TCG_APM_FEATURES,
.unmigratable_flags = CPUID_APM_INVTSC,
},
+ [FEAT_8000_0007_EBX] = {
+ .type = CPUID_FEATURE_WORD,
+ .feat_names = {
+ "overflow-recov", "succor", NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ },
+ .cpuid = { .eax = 0x80000007, .reg = R_EBX, },
+ .tcg_features = 0,
+ .unmigratable_flags = 0,
+ },
[FEAT_8000_0008_EBX] = {
.type = CPUID_FEATURE_WORD,
.feat_names = {
@@ -1492,6 +1508,7 @@
[54] = "vmx-ins-outs",
[55] = "vmx-true-ctls",
[56] = "vmx-any-errcode",
+ [58] = "vmx-nested-exception",
},
.msr = {
.index = MSR_IA32_VMX_BASIC,
@@ -1701,6 +1718,18 @@
.from = { FEAT_7_0_ECX, CPUID_7_0_ECX_WAITPKG },
.to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE },
},
+ {
+ .from = { FEAT_8000_0001_EDX, CPUID_EXT2_LM },
+ .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED },
+ },
+ {
+ .from = { FEAT_7_1_EAX, CPUID_7_1_EAX_LKGS },
+ .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED },
+ },
+ {
+ .from = { FEAT_7_1_EAX, CPUID_7_1_EAX_WRMSRNS },
+ .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED },
+ },
};
typedef struct X86RegisterInfo32 {
@@ -6874,7 +6903,7 @@
break;
case 0x80000007:
*eax = 0;
- *ebx = 0;
+ *ebx = env->features[FEAT_8000_0007_EBX];
*ecx = 0;
*edx = env->features[FEAT_8000_0007_EDX];
break;
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index c64ef0c..8fe28b6 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -261,6 +261,18 @@
#define CR4_PKS_MASK (1U << 24)
#define CR4_LAM_SUP_MASK (1U << 28)
+#ifdef TARGET_X86_64
+#define CR4_FRED_MASK (1ULL << 32)
+#else
+#define CR4_FRED_MASK 0
+#endif
+
+#ifdef TARGET_X86_64
+#define CR4_FRED_MASK (1ULL << 32)
+#else
+#define CR4_FRED_MASK 0
+#endif
+
#define CR4_RESERVED_MASK \
(~(target_ulong)(CR4_VME_MASK | CR4_PVI_MASK | CR4_TSD_MASK \
| CR4_DE_MASK | CR4_PSE_MASK | CR4_PAE_MASK \
@@ -269,7 +281,7 @@
| CR4_LA57_MASK \
| CR4_FSGSBASE_MASK | CR4_PCIDE_MASK | CR4_OSXSAVE_MASK \
| CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_PKE_MASK | CR4_PKS_MASK \
- | CR4_LAM_SUP_MASK))
+ | CR4_LAM_SUP_MASK | CR4_FRED_MASK))
#define DR6_BD (1 << 13)
#define DR6_BS (1 << 14)
@@ -365,6 +377,8 @@
#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
#define MCI_STATUS_AR (1ULL<<55) /* Action required */
+#define MCI_STATUS_DEFERRED (1ULL<<44) /* Deferred error */
+#define MCI_STATUS_POISON (1ULL<<43) /* Poisoned data consumed */
/* MISC register defines */
#define MCM_ADDR_SEGOFF 0 /* segment offset */
@@ -526,6 +540,17 @@
#define MSR_IA32_XFD 0x000001c4
#define MSR_IA32_XFD_ERR 0x000001c5
+/* FRED MSRs */
+#define MSR_IA32_FRED_RSP0 0x000001cc /* Stack level 0 regular stack pointer */
+#define MSR_IA32_FRED_RSP1 0x000001cd /* Stack level 1 regular stack pointer */
+#define MSR_IA32_FRED_RSP2 0x000001ce /* Stack level 2 regular stack pointer */
+#define MSR_IA32_FRED_RSP3 0x000001cf /* Stack level 3 regular stack pointer */
+#define MSR_IA32_FRED_STKLVLS 0x000001d0 /* FRED exception stack levels */
+#define MSR_IA32_FRED_SSP1 0x000001d1 /* Stack level 1 shadow stack pointer in ring 0 */
+#define MSR_IA32_FRED_SSP2 0x000001d2 /* Stack level 2 shadow stack pointer in ring 0 */
+#define MSR_IA32_FRED_SSP3 0x000001d3 /* Stack level 3 shadow stack pointer in ring 0 */
+#define MSR_IA32_FRED_CONFIG 0x000001d4 /* FRED Entrypoint and interrupt stack level */
+
#define MSR_IA32_BNDCFGS 0x00000d90
#define MSR_IA32_XSS 0x00000da0
#define MSR_IA32_UMWAIT_CONTROL 0xe1
@@ -605,6 +630,7 @@
FEAT_7_1_EAX, /* CPUID[EAX=7,ECX=1].EAX */
FEAT_8000_0001_EDX, /* CPUID[8000_0001].EDX */
FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */
+ FEAT_8000_0007_EBX, /* CPUID[8000_0007].EBX */
FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */
FEAT_8000_0008_EBX, /* CPUID[8000_0008].EBX */
FEAT_8000_0021_EAX, /* CPUID[8000_0021].EAX */
@@ -941,6 +967,12 @@
#define CPUID_7_1_EDX_AMX_COMPLEX (1U << 8)
/* PREFETCHIT0/1 Instructions */
#define CPUID_7_1_EDX_PREFETCHITI (1U << 14)
+/* Flexible return and event delivery (FRED) */
+#define CPUID_7_1_EAX_FRED (1U << 17)
+/* Load into IA32_KERNEL_GS_BASE (LKGS) */
+#define CPUID_7_1_EAX_LKGS (1U << 18)
+/* Non-Serializing Write to Model Specific Register (WRMSRNS) */
+#define CPUID_7_1_EAX_WRMSRNS (1U << 19)
/* Do not exhibit MXCSR Configuration Dependent Timing (MCDT) behavior */
#define CPUID_7_2_EDX_MCDT_NO (1U << 5)
@@ -951,6 +983,10 @@
/* Packets which contain IP payload have LIP values */
#define CPUID_14_0_ECX_LIP (1U << 31)
+/* RAS Features */
+#define CPUID_8000_0007_EBX_OVERFLOW_RECOV (1U << 0)
+#define CPUID_8000_0007_EBX_SUCCOR (1U << 1)
+
/* CLZERO instruction */
#define CPUID_8000_0008_EBX_CLZERO (1U << 0)
/* Always save/restore FP error pointers */
@@ -1053,6 +1089,7 @@
#define MSR_VMX_BASIC_INS_OUTS (1ULL << 54)
#define MSR_VMX_BASIC_TRUE_CTLS (1ULL << 55)
#define MSR_VMX_BASIC_ANY_ERRCODE (1ULL << 56)
+#define MSR_VMX_BASIC_NESTED_EXCEPTION (1ULL << 58)
#define MSR_VMX_MISC_PREEMPTION_TIMER_SHIFT_MASK 0x1Full
#define MSR_VMX_MISC_STORE_LMA (1ULL << 5)
@@ -1704,6 +1741,17 @@
target_ulong cstar;
target_ulong fmask;
target_ulong kernelgsbase;
+
+ /* FRED MSRs */
+ uint64_t fred_rsp0;
+ uint64_t fred_rsp1;
+ uint64_t fred_rsp2;
+ uint64_t fred_rsp3;
+ uint64_t fred_stklvls;
+ uint64_t fred_ssp1;
+ uint64_t fred_ssp2;
+ uint64_t fred_ssp3;
+ uint64_t fred_config;
#endif
uint64_t tsc_adjust;
@@ -2607,6 +2655,9 @@
if (!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_LAM)) {
reserved_bits |= CR4_LAM_SUP_MASK;
}
+ if (!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED)) {
+ reserved_bits |= CR4_FRED_MASK;
+ }
return reserved_bits;
}
diff --git a/target/i386/helper.c b/target/i386/helper.c
index f9d1381..01a268a 100644
--- a/target/i386/helper.c
+++ b/target/i386/helper.c
@@ -91,6 +91,10 @@
int family = 0;
int model = 0;
+ if (IS_AMD_CPU(env)) {
+ return 0;
+ }
+
cpu_x86_version(env, &family, &model);
if ((family == 6 && model >= 14) || family > 6) {
return 1;
diff --git a/target/i386/helper.h b/target/i386/helper.h
index a52a1bf..2f46cff 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -53,9 +53,10 @@
DEF_HELPER_2(sysexit, void, env, int)
DEF_HELPER_2(syscall, void, env, int)
DEF_HELPER_2(sysret, void, env, int)
-DEF_HELPER_FLAGS_2(pause, TCG_CALL_NO_WG, noreturn, env, int)
+DEF_HELPER_FLAGS_1(pause, TCG_CALL_NO_WG, noreturn, env)
DEF_HELPER_FLAGS_3(raise_interrupt, TCG_CALL_NO_WG, noreturn, env, int, int)
DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, int)
+DEF_HELPER_FLAGS_1(icebp, TCG_CALL_NO_WG, noreturn, env)
DEF_HELPER_3(boundw, void, env, tl, int)
DEF_HELPER_3(boundl, void, env, tl, int)
@@ -89,7 +90,7 @@
DEF_HELPER_1(stgi, void, env)
DEF_HELPER_1(clgi, void, env)
DEF_HELPER_FLAGS_2(flush_page, TCG_CALL_NO_RWG, void, env, tl)
-DEF_HELPER_FLAGS_2(hlt, TCG_CALL_NO_WG, noreturn, env, int)
+DEF_HELPER_FLAGS_1(hlt, TCG_CALL_NO_WG, noreturn, env)
DEF_HELPER_FLAGS_2(monitor, TCG_CALL_NO_WG, void, env, tl)
DEF_HELPER_FLAGS_2(mwait, TCG_CALL_NO_WG, noreturn, env, int)
DEF_HELPER_1(rdmsr, void, env)
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index e493452..2d0eef6 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -49,6 +49,8 @@
#include "qemu/osdep.h"
#include "qemu/error-report.h"
#include "qemu/memalign.h"
+#include "qapi/error.h"
+#include "migration/blocker.h"
#include "sysemu/hvf.h"
#include "sysemu/hvf_int.h"
@@ -74,6 +76,8 @@
#include "qemu/accel.h"
#include "target/i386/cpu.h"
+static Error *invtsc_mig_blocker;
+
void vmx_update_tpr(CPUState *cpu)
{
/* TODO: need integrate APIC handling */
@@ -131,9 +135,10 @@
if (write && slot) {
if (slot->flags & HVF_SLOT_LOG) {
+ uint64_t dirty_page_start = gpa & ~(TARGET_PAGE_SIZE - 1u);
memory_region_set_dirty(slot->region, gpa - slot->start, 1);
- hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
- HV_MEMORY_READ | HV_MEMORY_WRITE);
+ hv_vm_protect(dirty_page_start, TARGET_PAGE_SIZE,
+ HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC);
}
}
@@ -210,6 +215,7 @@
void hvf_kick_vcpu_thread(CPUState *cpu)
{
cpus_kick_thread(cpu);
+ hv_vcpu_interrupt(&cpu->accel->fd, 1);
}
int hvf_arch_init(void)
@@ -221,6 +227,8 @@
{
X86CPU *x86cpu = X86_CPU(cpu);
CPUX86State *env = &x86cpu->env;
+ Error *local_err = NULL;
+ int r;
uint64_t reqCap;
init_emu();
@@ -238,6 +246,18 @@
}
}
+ if ((env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) &&
+ invtsc_mig_blocker == NULL) {
+ error_setg(&invtsc_mig_blocker,
+ "State blocked by non-migratable CPU device (invtsc flag)");
+ r = migrate_add_blocker(&invtsc_mig_blocker, &local_err);
+ if (r < 0) {
+ error_report_err(local_err);
+ return r;
+ }
+ }
+
+
if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED,
&hvf_state->hvf_caps->vmx_cap_pinbased)) {
abort();
@@ -407,6 +427,27 @@
}
}
+static hv_return_t hvf_vcpu_run(hv_vcpuid_t vcpu_id)
+{
+ /*
+ * hv_vcpu_run_until is available and recommended from macOS 10.15+,
+ * HV_DEADLINE_FOREVER from 11.0. Test for availability at runtime and fall
+ * back to hv_vcpu_run() only where necessary.
+ */
+#ifndef MAC_OS_VERSION_11_0
+ return hv_vcpu_run(vcpu_id);
+#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
+ return hv_vcpu_run_until(vcpu_id, HV_DEADLINE_FOREVER);
+#else /* MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0 */
+ /* 11.0 SDK or newer, but could be < 11 at runtime */
+ if (__builtin_available(macOS 11.0, *)) {
+ return hv_vcpu_run_until(vcpu_id, HV_DEADLINE_FOREVER);
+ } else {
+ return hv_vcpu_run(vcpu_id);
+ }
+#endif
+}
+
int hvf_vcpu_exec(CPUState *cpu)
{
X86CPU *x86_cpu = X86_CPU(cpu);
@@ -435,7 +476,7 @@
return EXCP_HLT;
}
- hv_return_t r = hv_vcpu_run(cpu->accel->fd);
+ hv_return_t r = hvf_vcpu_run(cpu->accel->fd);
assert_hvf_ok(r);
/* handle VMEXIT */
diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h
index 0fffcfa..3954ef8 100644
--- a/target/i386/hvf/vmx.h
+++ b/target/i386/hvf/vmx.h
@@ -95,8 +95,7 @@
efer |= MSR_EFER_LMA;
wvmcs(vcpu, VMCS_GUEST_IA32_EFER, efer);
entry_ctls = rvmcs(vcpu, VMCS_ENTRY_CTLS);
- wvmcs(vcpu, VMCS_ENTRY_CTLS, rvmcs(vcpu, VMCS_ENTRY_CTLS) |
- VM_ENTRY_GUEST_LMA);
+ wvmcs(vcpu, VMCS_ENTRY_CTLS, entry_ctls | VM_ENTRY_GUEST_LMA);
uint64_t guest_tr_ar = rvmcs(vcpu, VMCS_GUEST_TR_ACCESS_RIGHTS);
if ((efer & MSR_EFER_LME) &&
diff --git a/target/i386/hvf/x86_cpuid.c b/target/i386/hvf/x86_cpuid.c
index 9380b90..e56cd84 100644
--- a/target/i386/hvf/x86_cpuid.c
+++ b/target/i386/hvf/x86_cpuid.c
@@ -146,6 +146,10 @@
CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_OSVW | CPUID_EXT3_XOP |
CPUID_EXT3_FMA4 | CPUID_EXT3_TBM;
break;
+ case 0x80000007:
+ edx &= CPUID_APM_INVTSC;
+ eax = ebx = ecx = 0;
+ break;
default:
return 0;
}
diff --git a/target/i386/hvf/x86_decode.c b/target/i386/hvf/x86_decode.c
index 3728d77..a4a28f1 100644
--- a/target/i386/hvf/x86_decode.c
+++ b/target/i386/hvf/x86_decode.c
@@ -2111,7 +2111,7 @@
return decode->len;
}
-void init_decoder()
+void init_decoder(void)
{
int i;
diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c
index 3a3f0a5..38c782b 100644
--- a/target/i386/hvf/x86_emu.c
+++ b/target/i386/hvf/x86_emu.c
@@ -1409,7 +1409,7 @@
static struct cmd_handler _cmd_handler[X86_DECODE_CMD_LAST];
-static void init_cmd_handler()
+static void init_cmd_handler(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(handlers); i++) {
@@ -1481,7 +1481,7 @@
return true;
}
-void init_emu()
+void init_emu(void)
{
init_cmd_handler();
}
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 0852ed0..912f5d5 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -532,6 +532,8 @@
*/
cpuid_1_edx = kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX);
ret |= cpuid_1_edx & CPUID_EXT2_AMD_ALIASES;
+ } else if (function == 0x80000007 && reg == R_EBX) {
+ ret |= CPUID_8000_0007_EBX_OVERFLOW_RECOV | CPUID_8000_0007_EBX_SUCCOR;
} else if (function == KVM_CPUID_FEATURES && reg == R_EAX) {
/* kvm_pv_unhalt is reported by GET_SUPPORTED_CPUID, but it can't
* be enabled without the in-kernel irqchip
@@ -638,17 +640,40 @@
{
CPUState *cs = CPU(cpu);
CPUX86State *env = &cpu->env;
- uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
- MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S;
- uint64_t mcg_status = MCG_STATUS_MCIP;
+ uint64_t status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_MISCV |
+ MCI_STATUS_ADDRV;
+ uint64_t mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV;
int flags = 0;
- if (code == BUS_MCEERR_AR) {
- status |= MCI_STATUS_AR | 0x134;
- mcg_status |= MCG_STATUS_RIPV | MCG_STATUS_EIPV;
+ if (!IS_AMD_CPU(env)) {
+ status |= MCI_STATUS_S | MCI_STATUS_UC;
+ if (code == BUS_MCEERR_AR) {
+ status |= MCI_STATUS_AR | 0x134;
+ mcg_status |= MCG_STATUS_EIPV;
+ } else {
+ status |= 0xc0;
+ }
} else {
- status |= 0xc0;
- mcg_status |= MCG_STATUS_RIPV;
+ if (code == BUS_MCEERR_AR) {
+ status |= MCI_STATUS_UC | MCI_STATUS_POISON;
+ mcg_status |= MCG_STATUS_EIPV;
+ } else {
+ /* Setting the POISON bit for deferred errors indicates to the
+ * guest kernel that the address provided by the MCE is valid
+ * and usable which will ensure that the guest kernel will send
+ * a SIGBUS_AO signal to the guest process. This allows for
+ * more desirable behavior in the case that the guest process
+ * with poisoned memory has set the MCE_KILL_EARLY prctl flag
+ * which indicates that the process would prefer to handle or
+ * shutdown due to the poisoned memory condition before the
+ * memory has been accessed.
+ *
+ * While the POISON bit would not be set in a deferred error
+ * sent from hardware, the bit is not meaningful for deferred
+ * errors and can be reused in this scenario.
+ */
+ status |= MCI_STATUS_DEFERRED | MCI_STATUS_POISON;
+ }
}
flags = cpu_x86_support_mca_broadcast(env) ? MCE_INJECT_BROADCAST : 0;
@@ -3376,6 +3401,17 @@
kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, env->kernelgsbase);
kvm_msr_entry_add(cpu, MSR_FMASK, env->fmask);
kvm_msr_entry_add(cpu, MSR_LSTAR, env->lstar);
+ if (env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED) {
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP0, env->fred_rsp0);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP1, env->fred_rsp1);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP2, env->fred_rsp2);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP3, env->fred_rsp3);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_STKLVLS, env->fred_stklvls);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP1, env->fred_ssp1);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP2, env->fred_ssp2);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP3, env->fred_ssp3);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_CONFIG, env->fred_config);
+ }
}
#endif
@@ -3848,6 +3884,17 @@
kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, 0);
kvm_msr_entry_add(cpu, MSR_FMASK, 0);
kvm_msr_entry_add(cpu, MSR_LSTAR, 0);
+ if (env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED) {
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP0, 0);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP1, 0);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP2, 0);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP3, 0);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_STKLVLS, 0);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP1, 0);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP2, 0);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP3, 0);
+ kvm_msr_entry_add(cpu, MSR_IA32_FRED_CONFIG, 0);
+ }
}
#endif
kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, 0);
@@ -4069,6 +4116,33 @@
case MSR_LSTAR:
env->lstar = msrs[i].data;
break;
+ case MSR_IA32_FRED_RSP0:
+ env->fred_rsp0 = msrs[i].data;
+ break;
+ case MSR_IA32_FRED_RSP1:
+ env->fred_rsp1 = msrs[i].data;
+ break;
+ case MSR_IA32_FRED_RSP2:
+ env->fred_rsp2 = msrs[i].data;
+ break;
+ case MSR_IA32_FRED_RSP3:
+ env->fred_rsp3 = msrs[i].data;
+ break;
+ case MSR_IA32_FRED_STKLVLS:
+ env->fred_stklvls = msrs[i].data;
+ break;
+ case MSR_IA32_FRED_SSP1:
+ env->fred_ssp1 = msrs[i].data;
+ break;
+ case MSR_IA32_FRED_SSP2:
+ env->fred_ssp2 = msrs[i].data;
+ break;
+ case MSR_IA32_FRED_SSP3:
+ env->fred_ssp3 = msrs[i].data;
+ break;
+ case MSR_IA32_FRED_CONFIG:
+ env->fred_config = msrs[i].data;
+ break;
#endif
case MSR_IA32_TSC:
env->tsc = msrs[i].data;
diff --git a/target/i386/machine.c b/target/i386/machine.c
index c3ae320..39f8294 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -1544,6 +1544,33 @@
};
#ifdef TARGET_X86_64
+static bool intel_fred_msrs_needed(void *opaque)
+{
+ X86CPU *cpu = opaque;
+ CPUX86State *env = &cpu->env;
+
+ return !!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED);
+}
+
+static const VMStateDescription vmstate_msr_fred = {
+ .name = "cpu/fred",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = intel_fred_msrs_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(env.fred_rsp0, X86CPU),
+ VMSTATE_UINT64(env.fred_rsp1, X86CPU),
+ VMSTATE_UINT64(env.fred_rsp2, X86CPU),
+ VMSTATE_UINT64(env.fred_rsp3, X86CPU),
+ VMSTATE_UINT64(env.fred_stklvls, X86CPU),
+ VMSTATE_UINT64(env.fred_ssp1, X86CPU),
+ VMSTATE_UINT64(env.fred_ssp2, X86CPU),
+ VMSTATE_UINT64(env.fred_ssp3, X86CPU),
+ VMSTATE_UINT64(env.fred_config, X86CPU),
+ VMSTATE_END_OF_LIST()
+ }
+ };
+
static bool amx_xtile_needed(void *opaque)
{
X86CPU *cpu = opaque;
@@ -1747,6 +1774,7 @@
&vmstate_pdptrs,
&vmstate_msr_xfd,
#ifdef TARGET_X86_64
+ &vmstate_msr_fred,
&vmstate_amx_xtile,
#endif
&vmstate_arch_lbr,
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 0ff0866..c2d8da8 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -1359,6 +1359,19 @@
}
}
+static void decode_90(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+ static X86OpEntry pause = X86_OP_ENTRY0(PAUSE, svm(PAUSE));
+ static X86OpEntry nop = X86_OP_ENTRY0(NOP);
+ static X86OpEntry xchg_ax = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v);
+
+ if (REX_B(s)) {
+ *entry = xchg_ax;
+ } else {
+ *entry = (s->prefix & PREFIX_REPZ) ? pause : nop;
+ }
+}
+
static const X86OpEntry opcodes_root[256] = {
[0x00] = X86_OP_ENTRY2(ADD, E,b, G,b, lock),
[0x01] = X86_OP_ENTRY2(ADD, E,v, G,v, lock),
@@ -1441,7 +1454,7 @@
[0x86] = X86_OP_ENTRY2(XCHG, E,b, G,b, xchg),
[0x87] = X86_OP_ENTRY2(XCHG, E,v, G,v, xchg),
- [0x90] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
+ [0x90] = X86_OP_GROUP0(90),
[0x91] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
[0x92] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
[0x93] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v),
@@ -1496,7 +1509,7 @@
[0xE7] = X86_OP_ENTRYrr(OUT, 0,v, I_unsigned,b), /* AX/EAX */
[0xF1] = X86_OP_ENTRY0(INT1, svm(ICEBP)),
- [0xF4] = X86_OP_ENTRY0(HLT, chk(cpl0)),
+ [0xF4] = X86_OP_ENTRY0(HLT, chk(cpl0) svm(HLT)),
[0xF5] = X86_OP_ENTRY0(CMC),
[0xF6] = X86_OP_GROUP1(group3, E,b),
[0xF7] = X86_OP_GROUP1(group3, E,v),
@@ -2539,7 +2552,7 @@
/*
* Checks that result in #GP or VMEXIT come second. Intercepts are
- * generally checked after non-memory exceptions (i.e. before all
+ * generally checked after non-memory exceptions (i.e. after all
* exceptions if there is no memory operand). Exceptions are
* vm86 checks (INTn, IRET, PUSHF/POPF), RSM and XSETBV (!).
*
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index e990141..4be3d9a 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1638,8 +1638,8 @@
{
#ifdef CONFIG_SYSTEM_ONLY
gen_update_cc_op(s);
- gen_update_eip_cur(s);
- gen_helper_hlt(tcg_env, cur_insn_len_i32(s));
+ gen_update_eip_next(s);
+ gen_helper_hlt(tcg_env);
s->base.is_jmp = DISAS_NORETURN;
#endif
}
@@ -1858,7 +1858,10 @@
static void gen_INT1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
- gen_exception(s, EXCP01_DB);
+ gen_update_cc_op(s);
+ gen_update_eip_next(s);
+ gen_helper_icebp(tcg_env);
+ s->base.is_jmp = DISAS_NORETURN;
}
static void gen_INT3(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
@@ -2347,6 +2350,14 @@
decode->op[1].offset, vec_len, vec_len);
}
+static void gen_PAUSE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_update_cc_op(s);
+ gen_update_eip_next(s);
+ gen_helper_pause(tcg_env);
+ s->base.is_jmp = DISAS_NORETURN;
+}
+
static void gen_PCMPESTRI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
@@ -2564,12 +2575,14 @@
static void gen_POP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
+ X86DecodedOp *op = &decode->op[0];
MemOp ot = gen_pop_T0(s);
- if (decode->op[0].has_ea) {
+
+ if (op->has_ea || op->unit == X86_OP_SEG) {
/* NOTE: order is important for MMU exceptions */
- gen_op_st_v(s, ot, s->T0, s->A0);
- decode->op[0].unit = X86_OP_SKIP;
+ gen_writeback(s, decode, 0, s->T0);
}
+
/* NOTE: writing back registers after update is important for pop %sp */
gen_pop_update(s, ot);
}
@@ -4011,18 +4024,6 @@
static void gen_XCHG(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
- if (decode->b == 0x90 && !REX_B(s)) {
- if (s->prefix & PREFIX_REPZ) {
- gen_update_cc_op(s);
- gen_update_eip_cur(s);
- gen_helper_pause(tcg_env, cur_insn_len_i32(s));
- s->base.is_jmp = DISAS_NORETURN;
- }
- /* No writeback. */
- decode->op[0].unit = X86_OP_SKIP;
- return;
- }
-
if (s->prefix & PREFIX_LOCK) {
tcg_gen_atomic_xchg_tl(s->T0, s->A0, s->T1,
s->mem_index, decode->op[0].ot | MO_LE);
diff --git a/target/i386/tcg/excp_helper.c b/target/i386/tcg/excp_helper.c
index 65e37ae..72387aa 100644
--- a/target/i386/tcg/excp_helper.c
+++ b/target/i386/tcg/excp_helper.c
@@ -140,6 +140,26 @@
raise_interrupt2(env, exception_index, 0, 0, 0, retaddr);
}
+G_NORETURN void helper_icebp(CPUX86State *env)
+{
+ CPUState *cs = env_cpu(env);
+
+ do_end_instruction(env);
+
+ /*
+ * INT1 aka ICEBP generates a trap-like #DB, but it is pretty special.
+ *
+ * "Although the ICEBP instruction dispatches through IDT vector 1,
+ * that event is not interceptable by means of the #DB exception
+ * intercept". Instead there is a separate fault-like ICEBP intercept.
+ */
+ cs->exception_index = EXCP01_DB;
+ env->error_code = 0;
+ env->exception_is_int = 0;
+ env->exception_next_eip = env->eip;
+ cpu_loop_exit(cs);
+}
+
G_NORETURN void handle_unaligned_access(CPUX86State *env, vaddr vaddr,
MMUAccessType access_type,
uintptr_t retaddr)
diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h
index 8595794..15d6c6f 100644
--- a/target/i386/tcg/helper-tcg.h
+++ b/target/i386/tcg/helper-tcg.h
@@ -91,7 +91,6 @@
/* misc_helper.c */
void cpu_load_eflags(CPUX86State *env, int eflags, int update_mask);
-G_NORETURN void do_pause(CPUX86State *env);
/* sysemu/svm_helper.c */
#ifndef CONFIG_USER_ONLY
@@ -111,7 +110,17 @@
/* smm_helper.c */
void do_smm_enter(X86CPU *cpu);
-/* bpt_helper.c */
+/* sysemu/bpt_helper.c */
bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update);
+/*
+ * Do the tasks usually performed by gen_eob(). Callers of this function
+ * should also handle TF as appropriate.
+ */
+static inline void do_end_instruction(CPUX86State *env)
+{
+ /* needed if sti is just before */
+ env->hflags &= ~HF_INHIBIT_IRQ_MASK;
+ env->eflags &= ~HF_RF_MASK;
+}
#endif /* I386_HELPER_TCG_H */
diff --git a/target/i386/tcg/misc_helper.c b/target/i386/tcg/misc_helper.c
index b0f0f7b..ed4cda8 100644
--- a/target/i386/tcg/misc_helper.c
+++ b/target/i386/tcg/misc_helper.c
@@ -88,23 +88,19 @@
raise_exception_err(env, EXCP06_ILLOP, 0);
}
-G_NORETURN void do_pause(CPUX86State *env)
+G_NORETURN void helper_pause(CPUX86State *env)
{
CPUState *cs = env_cpu(env);
+ /* Do gen_eob() tasks before going back to the main loop. */
+ do_end_instruction(env);
+ helper_rechecking_single_step(env);
+
/* Just let another CPU run. */
cs->exception_index = EXCP_INTERRUPT;
cpu_loop_exit(cs);
}
-G_NORETURN void helper_pause(CPUX86State *env, int next_eip_addend)
-{
- cpu_svm_check_intercept_param(env, SVM_EXIT_PAUSE, 0, GETPC());
- env->eip += next_eip_addend;
-
- do_pause(env);
-}
-
uint64_t helper_rdpkru(CPUX86State *env, uint32_t ecx)
{
if ((env->cr[4] & CR4_PKE_MASK) == 0) {
diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c
index 0301459..715db1f 100644
--- a/target/i386/tcg/seg_helper.c
+++ b/target/i386/tcg/seg_helper.c
@@ -526,6 +526,24 @@
}
}
+static int exception_is_fault(int intno)
+{
+ switch (intno) {
+ /*
+ * #DB can be both fault- and trap-like, but it never sets RF=1
+ * in the RFLAGS value pushed on the stack.
+ */
+ case EXCP01_DB:
+ case EXCP03_INT3:
+ case EXCP04_INTO:
+ case EXCP08_DBLE:
+ case EXCP12_MCHK:
+ return 0;
+ }
+ /* Everything else including reserved exception is a fault. */
+ return 1;
+}
+
int exception_has_error_code(int intno)
{
switch (intno) {
@@ -605,8 +623,9 @@
int type, dpl, selector, ss_dpl, cpl;
int has_error_code, new_stack, shift;
uint32_t e1, e2, offset, ss = 0, esp, ss_e1 = 0, ss_e2 = 0;
- uint32_t old_eip, sp_mask;
+ uint32_t old_eip, sp_mask, eflags;
int vm86 = env->eflags & VM_MASK;
+ bool set_rf;
has_error_code = 0;
if (!is_int && !is_hw) {
@@ -614,8 +633,10 @@
}
if (is_int) {
old_eip = next_eip;
+ set_rf = false;
} else {
old_eip = env->eip;
+ set_rf = exception_is_fault(intno);
}
dt = &env->idt;
@@ -748,6 +769,15 @@
}
push_size <<= shift;
#endif
+ eflags = cpu_compute_eflags(env);
+ /*
+ * AMD states that code breakpoint #DBs clear RF=0, Intel leaves it
+ * as is. AMD behavior could be implemented in check_hw_breakpoints().
+ */
+ if (set_rf) {
+ eflags |= RF_MASK;
+ }
+
if (shift == 1) {
if (new_stack) {
if (vm86) {
@@ -759,7 +789,7 @@
PUSHL(ssp, esp, sp_mask, env->segs[R_SS].selector);
PUSHL(ssp, esp, sp_mask, env->regs[R_ESP]);
}
- PUSHL(ssp, esp, sp_mask, cpu_compute_eflags(env));
+ PUSHL(ssp, esp, sp_mask, eflags);
PUSHL(ssp, esp, sp_mask, env->segs[R_CS].selector);
PUSHL(ssp, esp, sp_mask, old_eip);
if (has_error_code) {
@@ -776,7 +806,7 @@
PUSHW(ssp, esp, sp_mask, env->segs[R_SS].selector);
PUSHW(ssp, esp, sp_mask, env->regs[R_ESP]);
}
- PUSHW(ssp, esp, sp_mask, cpu_compute_eflags(env));
+ PUSHW(ssp, esp, sp_mask, eflags);
PUSHW(ssp, esp, sp_mask, env->segs[R_CS].selector);
PUSHW(ssp, esp, sp_mask, old_eip);
if (has_error_code) {
@@ -868,8 +898,9 @@
target_ulong ptr;
int type, dpl, selector, cpl, ist;
int has_error_code, new_stack;
- uint32_t e1, e2, e3, ss;
+ uint32_t e1, e2, e3, ss, eflags;
target_ulong old_eip, esp, offset;
+ bool set_rf;
has_error_code = 0;
if (!is_int && !is_hw) {
@@ -877,8 +908,10 @@
}
if (is_int) {
old_eip = next_eip;
+ set_rf = false;
} else {
old_eip = env->eip;
+ set_rf = exception_is_fault(intno);
}
dt = &env->idt;
@@ -950,9 +983,15 @@
}
esp &= ~0xfLL; /* align stack */
+ /* See do_interrupt_protected. */
+ eflags = cpu_compute_eflags(env);
+ if (set_rf) {
+ eflags |= RF_MASK;
+ }
+
PUSHQ(esp, env->segs[R_SS].selector);
PUSHQ(esp, env->regs[R_ESP]);
- PUSHQ(esp, cpu_compute_eflags(env));
+ PUSHQ(esp, eflags);
PUSHQ(esp, env->segs[R_CS].selector);
PUSHQ(esp, old_eip);
if (has_error_code) {
diff --git a/target/i386/tcg/sysemu/bpt_helper.c b/target/i386/tcg/sysemu/bpt_helper.c
index 4d96a48..b29acf4 100644
--- a/target/i386/tcg/sysemu/bpt_helper.c
+++ b/target/i386/tcg/sysemu/bpt_helper.c
@@ -215,6 +215,12 @@
if (cs->watchpoint_hit->flags & BP_CPU) {
cs->watchpoint_hit = NULL;
if (check_hw_breakpoints(env, false)) {
+ /*
+ * FIXME: #DB should be delayed by one instruction if
+ * INHIBIT_IRQ is set (STI cannot trigger a watchpoint).
+ * The delayed #DB should also fuse with one generated
+ * by ICEBP (aka INT1).
+ */
raise_exception(env, EXCP01_DB);
} else {
cpu_loop_exit_noexc(cs);
@@ -238,6 +244,12 @@
}
}
+ if (env->dr[7] & DR7_GD) {
+ env->dr[7] &= ~DR7_GD;
+ env->dr[6] |= DR6_BD;
+ raise_exception_ra(env, EXCP01_DB, GETPC());
+ }
+
return env->dr[reg];
}
@@ -251,6 +263,12 @@
}
}
+ if (env->dr[7] & DR7_GD) {
+ env->dr[7] &= ~DR7_GD;
+ env->dr[6] |= DR6_BD;
+ raise_exception_ra(env, EXCP01_DB, GETPC());
+ }
+
if (reg < 4) {
if (hw_breakpoint_enabled(env->dr[7], reg)
&& hw_breakpoint_type(env->dr[7], reg) != DR7_TYPE_IO_RW) {
diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c
index edb7c3d..7fa0c5a 100644
--- a/target/i386/tcg/sysemu/misc_helper.c
+++ b/target/i386/tcg/sysemu/misc_helper.c
@@ -516,25 +516,16 @@
tlb_flush_page(env_cpu(env), addr);
}
-static G_NORETURN
-void do_hlt(CPUX86State *env)
+G_NORETURN void helper_hlt(CPUX86State *env)
{
CPUState *cs = env_cpu(env);
- env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */
+ do_end_instruction(env);
cs->halted = 1;
cs->exception_index = EXCP_HLT;
cpu_loop_exit(cs);
}
-G_NORETURN void helper_hlt(CPUX86State *env, int next_eip_addend)
-{
- cpu_svm_check_intercept_param(env, SVM_EXIT_HLT, 0, GETPC());
- env->eip += next_eip_addend;
-
- do_hlt(env);
-}
-
void helper_monitor(CPUX86State *env, target_ulong ptr)
{
if ((uint32_t)env->regs[R_ECX] != 0) {
@@ -556,8 +547,8 @@
/* XXX: not complete but not completely erroneous */
if (cs->cpu_index != 0 || CPU_NEXT(cs) != NULL) {
- do_pause(env);
+ helper_pause(env);
} else {
- do_hlt(env);
+ helper_hlt(env);
}
}
diff --git a/target/i386/tcg/sysemu/seg_helper.c b/target/i386/tcg/sysemu/seg_helper.c
index 9ba94de..05174a7 100644
--- a/target/i386/tcg/sysemu/seg_helper.c
+++ b/target/i386/tcg/sysemu/seg_helper.c
@@ -130,15 +130,26 @@
bool x86_cpu_exec_halt(CPUState *cpu)
{
- if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
- X86CPU *x86_cpu = X86_CPU(cpu);
+ X86CPU *x86_cpu = X86_CPU(cpu);
+ CPUX86State *env = &x86_cpu->env;
+ if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
bql_lock();
apic_poll_irq(x86_cpu->apic_state);
cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
bql_unlock();
}
- return cpu_has_work(cpu);
+
+ if (!cpu_has_work(cpu)) {
+ return false;
+ }
+
+ /* Complete HLT instruction. */
+ if (env->eflags & TF_MASK) {
+ env->dr[6] |= DR6_BS;
+ do_interrupt_all(x86_cpu, EXCP01_DB, 0, 0, env->eip, 0);
+ }
+ return true;
}
bool x86_need_replay_interrupt(int interrupt_request)
diff --git a/target/i386/tcg/sysemu/svm_helper.c b/target/i386/tcg/sysemu/svm_helper.c
index 5d6de22..9db8ad6 100644
--- a/target/i386/tcg/sysemu/svm_helper.c
+++ b/target/i386/tcg/sysemu/svm_helper.c
@@ -163,6 +163,8 @@
uint64_t new_cr0;
uint64_t new_cr3;
uint64_t new_cr4;
+ uint64_t new_dr6;
+ uint64_t new_dr7;
if (aflag == 2) {
addr = env->regs[R_EAX];
@@ -252,6 +254,13 @@
control.intercept_exceptions
));
+ env->hflags &= ~HF_INHIBIT_IRQ_MASK;
+ if (x86_ldl_phys(cs, env->vm_vmcb +
+ offsetof(struct vmcb, control.int_state)) &
+ SVM_INTERRUPT_SHADOW_MASK) {
+ env->hflags |= HF_INHIBIT_IRQ_MASK;
+ }
+
nested_ctl = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb,
control.nested_ctl));
asid = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb,
@@ -361,20 +370,22 @@
env->vm_vmcb + offsetof(struct vmcb, save.rsp));
env->regs[R_EAX] = x86_ldq_phys(cs,
env->vm_vmcb + offsetof(struct vmcb, save.rax));
- env->dr[7] = x86_ldq_phys(cs,
- env->vm_vmcb + offsetof(struct vmcb, save.dr7));
- env->dr[6] = x86_ldq_phys(cs,
- env->vm_vmcb + offsetof(struct vmcb, save.dr6));
+
+ new_dr7 = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.dr7));
+ new_dr6 = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.dr6));
#ifdef TARGET_X86_64
- if (env->dr[6] & DR_RESERVED_MASK) {
+ if (new_dr7 & DR_RESERVED_MASK) {
cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
}
- if (env->dr[7] & DR_RESERVED_MASK) {
+ if (new_dr6 & DR_RESERVED_MASK) {
cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
}
#endif
+ cpu_x86_update_dr7(env, new_dr7);
+ env->dr[6] = new_dr6;
+
if (is_efer_invalid_state(env)) {
cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC());
}
@@ -811,8 +822,12 @@
env->hflags &= ~HF_GUEST_MASK;
env->intercept = 0;
env->intercept_exceptions = 0;
+
+ /* Clears the V_IRQ and V_INTR_MASKING bits inside the processor. */
cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
env->int_ctl = 0;
+
+ /* Clears the TSC_OFFSET inside the processor. */
env->tsc_offset = 0;
env->gdt.base = x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb,
@@ -832,6 +847,15 @@
cpu_x86_update_cr4(env, x86_ldq_phys(cs,
env->vm_hsave + offsetof(struct vmcb,
save.cr4)));
+
+ /*
+ * Resets the current ASID register to zero (host ASID; TLB flush).
+ *
+ * If the host is in PAE mode, the processor reloads the host's PDPEs
+ * from the page table indicated the host's CR3. FIXME: If the PDPEs
+ * contain illegal state, the processor causes a shutdown (QEMU does
+ * not implement PDPTRs).
+ */
cpu_x86_update_cr3(env, x86_ldq_phys(cs,
env->vm_hsave + offsetof(struct vmcb,
save.cr3)));
@@ -839,12 +863,14 @@
set properly */
cpu_load_efer(env, x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb,
save.efer)));
+
+ /* Completion of the VMRUN instruction clears the host EFLAGS.RF bit. */
env->eflags = 0;
cpu_load_eflags(env, x86_ldq_phys(cs,
env->vm_hsave + offsetof(struct vmcb,
save.rflags)),
~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK |
- VM_MASK));
+ RF_MASK | VM_MASK));
svm_load_seg_cache(env, MMU_PHYS_IDX,
env->vm_hsave + offsetof(struct vmcb, save.es), R_ES);
@@ -864,8 +890,11 @@
env->dr[6] = x86_ldq_phys(cs,
env->vm_hsave + offsetof(struct vmcb, save.dr6));
- env->dr[7] = x86_ldq_phys(cs,
- env->vm_hsave + offsetof(struct vmcb, save.dr7));
+
+ /* Disables all breakpoints in the host DR7 register. */
+ cpu_x86_update_dr7(env,
+ x86_ldq_phys(cs,
+ env->vm_hsave + offsetof(struct vmcb, save.dr7)) & ~0xff);
/* other setups */
x86_stl_phys(cs,
@@ -881,21 +910,17 @@
env->hflags2 &= ~HF2_GIF_MASK;
env->hflags2 &= ~HF2_VGIF_MASK;
- /* FIXME: Resets the current ASID register to zero (host ASID). */
- /* Clears the V_IRQ and V_INTR_MASKING bits inside the processor. */
- /* Clears the TSC_OFFSET inside the processor. */
+ /* FIXME: Checks the reloaded host state for consistency. */
- /* If the host is in PAE mode, the processor reloads the host's PDPEs
- from the page table indicated the host's CR3. If the PDPEs contain
- illegal state, the processor causes a shutdown. */
-
- /* Disables all breakpoints in the host DR7 register. */
-
- /* Checks the reloaded host state for consistency. */
-
- /* If the host's rIP reloaded by #VMEXIT is outside the limit of the
- host's code segment or non-canonical (in the case of long mode), a
- #GP fault is delivered inside the host. */
+ /*
+ * EFLAGS.TF causes a #DB trap after the VMRUN completes on the host
+ * side (i.e., after the #VMEXIT from the guest). Since we're running
+ * in the main loop, call do_interrupt_all directly.
+ */
+ if ((env->eflags & TF_MASK) != 0) {
+ env->dr[6] |= DR6_BS;
+ do_interrupt_all(X86_CPU(cs), EXCP01_DB, 0, 0, env->eip, 0);
+ }
}
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 0486ab6..fcba9c1 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -549,6 +549,19 @@
}
}
+static void gen_update_eip_next(DisasContext *s)
+{
+ assert(s->pc_save != -1);
+ if (tb_cflags(s->base.tb) & CF_PCREL) {
+ tcg_gen_addi_tl(cpu_eip, cpu_eip, s->pc - s->pc_save);
+ } else if (CODE64(s)) {
+ tcg_gen_movi_tl(cpu_eip, s->pc);
+ } else {
+ tcg_gen_movi_tl(cpu_eip, (uint32_t)(s->pc - s->cs_base));
+ }
+ s->pc_save = s->pc;
+}
+
static void gen_update_eip_cur(DisasContext *s)
{
assert(s->pc_save != -1);
@@ -2125,7 +2138,7 @@
}
/* Copy the FrameTemp value to EBP. */
- gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
+ gen_op_mov_reg_v(s, d_ot, R_EBP, s->T1);
/* Compute the final value of ESP. */
tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
@@ -3732,6 +3745,11 @@
}
gen_update_cc_op(s);
gen_update_eip_cur(s);
+ /*
+ * Reloads INHIBIT_IRQ mask as well as TF and RF with guest state.
+ * The usual gen_eob() handling is performed on vmexit after
+ * host state is reloaded.
+ */
gen_helper_vmrun(tcg_env, tcg_constant_i32(s->aflag - 1),
cur_insn_len_i32(s));
tcg_gen_exit_tb(NULL, 0);
@@ -4630,6 +4648,14 @@
* If jmp_opt, we want to handle each string instruction individually.
* For icount also disable repz optimization so that each iteration
* is accounted separately.
+ *
+ * FIXME: this is messy; it makes REP string instructions a lot less
+ * efficient than they should be and it gets in the way of correct
+ * handling of RF (interrupts or traps arriving after any iteration
+ * of a repeated string instruction but the last should set RF to 1).
+ * Perhaps it would be more efficient if REP string instructions were
+ * always at the beginning of the TB, or even their own TB? That
+ * would even allow accounting up to 64k iterations at once for icount.
*/
dc->repz_opt = !dc->jmp_opt && !(cflags & CF_USE_ICOUNT);
@@ -4735,6 +4761,17 @@
switch (dc->base.is_jmp) {
case DISAS_NORETURN:
+ /*
+ * Most instructions should not use DISAS_NORETURN, as that suppresses
+ * the handling of hflags normally done by gen_eob(). We can
+ * get here:
+ * - for exception and interrupts
+ * - for jump optimization (which is disabled by INHIBIT_IRQ/RF/TF)
+ * - for VMRUN because RF/TF handling for the host is done after vmexit,
+ * and INHIBIT_IRQ is loaded from the VMCB
+ * - for HLT/PAUSE/MWAIT to exit the main loop with specific EXCP_* values;
+ * the helpers handle themselves the tasks normally done by gen_eob().
+ */
break;
case DISAS_TOO_MANY:
gen_update_cc_op(dc);
diff --git a/util/osdep.c b/util/osdep.c
index e996c47..5d23bbf 100644
--- a/util/osdep.c
+++ b/util/osdep.c
@@ -57,9 +57,14 @@
#if defined(CONFIG_MADVISE)
return madvise(addr, len, advice);
#elif defined(CONFIG_POSIX_MADVISE)
- return posix_madvise(addr, len, advice);
+ int rc = posix_madvise(addr, len, advice);
+ if (rc) {
+ errno = rc;
+ return -1;
+ }
+ return 0;
#else
- errno = EINVAL;
+ errno = ENOSYS;
return -1;
#endif
}