Merge tag 'pull-request-2024-02-06' of https://gitlab.com/thuth/qemu into staging
* Emulate CVB, CVBY, CVBG and CVDG s390x instructions
* Fix bug in lsi53c895a reentrancy counter
* Deprecate the "power5+" and "power7+" CPU names
* Fix problems in the freebsd VM test
# -----BEGIN PGP SIGNATURE-----
#
# iQJFBAABCAAvFiEEJ7iIR+7gJQEY8+q5LtnXdP5wLbUFAmXCCXURHHRodXRoQHJl
# ZGhhdC5jb20ACgkQLtnXdP5wLbXtEA/9HKWMHbWqDAdlrpmfW8lCFaBHgV0+Fqsy
# GlxJykni2BxIWNoR7J6SdAqbgx3E2/7i8IMIUwYXlNBjEs/UQ0ZcnI5k6OfUS24p
# qfbdH717SgsaB9R1vCBhmOGGWYBfe/RqPGIcni/eg+jSxB5cn2XvEv3+ZBckvDsh
# KFuuAa6vvuBVhyXLbkP8Z+LEe27ttIYi5v1dvJ1an4UbFESqxVb0knyuFYpZpY8Y
# h7dZ0hyCid7YT03zVmSADK7anO+epBdzUU3SsKXj2dB9nebSjmkav6lQQBKYHHUg
# THojcWKwFPNK0AojhBuBCqFYgkGGt/9kjwlUt7jfm1TcSemN65XLNYHThRekPuAJ
# Jcze8dcEerbj1xsNWYh4hPvB92laEiyVR5BYFfUkJ9m2IAamPQLHvOT7jzhC3Y9k
# 4wvVcf9QKVtKW0QO54SQjD4A/qQu/4777oH5w83nGuxjUthmHDqZmjDlIRe6lKJt
# gsA+mKn+w9HrtiXOSkoMhK8PAyvCoAef/N7kvHZoHmp6TtfQAjPs4/v2uZMpnd60
# z7Cw50giHpo9lmiZ1Ey2fQvw9orYhNoXAc4XfYGHuYdQFWpCGz1PB2Km8uTPTEUe
# as364ULBqWoFBCRuRndy2+z2e3zhK5THTPCAyHf48M6teMEPa4KTsTCk7MzmfVfx
# C8RsLcmrFPI=
# =eQNc
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 06 Feb 2024 10:27:01 GMT
# gpg: using RSA key 27B88847EEE0250118F3EAB92ED9D774FE702DB5
# gpg: issuer "thuth@redhat.com"
# gpg: Good signature from "Thomas Huth <th.huth@gmx.de>" [full]
# gpg: aka "Thomas Huth <thuth@redhat.com>" [full]
# gpg: aka "Thomas Huth <huth@tuxfamily.org>" [full]
# gpg: aka "Thomas Huth <th.huth@posteo.de>" [unknown]
# Primary key fingerprint: 27B8 8847 EEE0 2501 18F3 EAB9 2ED9 D774 FE70 2DB5
* tag 'pull-request-2024-02-06' of https://gitlab.com/thuth/qemu:
meson: Link with libinotify on FreeBSD
test-util-filemonitor: Adapt to the FreeBSD inotify rename semantics
tests/vm/freebsd: Reload the sshd configuration
tests/vm: Set UseDNS=no in the sshd configuration
target/s390x: Prefer fast cpu_env() over slower CPU QOM cast macro
tests/tcg/s390x: Test CONVERT TO BINARY
tests/tcg/s390x: Test CONVERT TO DECIMAL
target/s390x: Emulate CVB, CVBY and CVBG
target/s390x: Emulate CVDG
docs/about: Deprecate the old "power5+" and "power7+" CPU names
target/ppc/cpu-models: Rename power5+ and power7+ for new QOM naming rules
hw/scsi/lsi53c895a: add missing decrement of reentrancy counter
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/backends/hostmem.c b/backends/hostmem.c
index 987f6f5..81a72ce 100644
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -20,6 +20,7 @@
#include "qom/object_interfaces.h"
#include "qemu/mmap-alloc.h"
#include "qemu/madvise.h"
+#include "hw/qdev-core.h"
#ifdef CONFIG_NUMA
#include <numaif.h>
@@ -237,7 +238,7 @@
uint64_t sz = memory_region_size(&backend->mr);
if (!qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads,
- backend->prealloc_context, errp)) {
+ backend->prealloc_context, false, errp)) {
return;
}
backend->prealloc = true;
@@ -323,6 +324,7 @@
HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
void *ptr;
uint64_t sz;
+ bool async = !phase_check(PHASE_LATE_BACKENDS_CREATED);
if (!bc->alloc) {
return;
@@ -402,7 +404,8 @@
if (backend->prealloc && !qemu_prealloc_mem(memory_region_get_fd(&backend->mr),
ptr, sz,
backend->prealloc_threads,
- backend->prealloc_context, errp)) {
+ backend->prealloc_context,
+ async, errp)) {
return;
}
}
diff --git a/hw/hyperv/hv-balloon.c b/hw/hyperv/hv-balloon.c
index 0238365..ade2833 100644
--- a/hw/hyperv/hv-balloon.c
+++ b/hw/hyperv/hv-balloon.c
@@ -1477,22 +1477,7 @@
balloon->mr = g_new0(MemoryRegion, 1);
memory_region_init(balloon->mr, OBJECT(balloon), TYPE_HV_BALLOON,
memory_region_size(hostmem_mr));
-
- /*
- * The VM can indicate an alignment up to 32 GiB. Memory device core can
- * usually only handle/guarantee 1 GiB alignment. The user will have to
- * specify a larger maxmem eventually.
- *
- * The memory device core will warn the user in case maxmem might have to be
- * increased and will fail plugging the device if there is not sufficient
- * space after alignment.
- *
- * TODO: we could do the alignment ourselves in a slightly bigger region.
- * But this feels better, although the warning might be annoying. Maybe
- * we can optimize that in the future (e.g., with such a device on the
- * cmdline place/size the device memory region differently.
- */
- balloon->mr->align = MAX(32 * GiB, memory_region_get_alignment(hostmem_mr));
+ balloon->mr->align = memory_region_get_alignment(hostmem_mr);
}
static void hv_balloon_free_mr(HvBalloon *balloon)
@@ -1654,6 +1639,25 @@
return balloon->mr;
}
+static uint64_t hv_balloon_md_get_min_alignment(const MemoryDeviceState *md)
+{
+ /*
+ * The VM can indicate an alignment up to 32 GiB. Memory device core can
+ * usually only handle/guarantee 1 GiB alignment. The user will have to
+ * specify a larger maxmem eventually.
+ *
+ * The memory device core will warn the user in case maxmem might have to be
+ * increased and will fail plugging the device if there is not sufficient
+ * space after alignment.
+ *
+ * TODO: we could do the alignment ourselves in a slightly bigger region.
+ * But this feels better, although the warning might be annoying. Maybe
+ * we can optimize that in the future (e.g., with such a device on the
+ * cmdline place/size the device memory region differently.
+ */
+ return 32 * GiB;
+}
+
static void hv_balloon_md_fill_device_info(const MemoryDeviceState *md,
MemoryDeviceInfo *info)
{
@@ -1766,5 +1770,6 @@
mdc->get_memory_region = hv_balloon_md_get_memory_region;
mdc->decide_memslots = hv_balloon_decide_memslots;
mdc->get_memslots = hv_balloon_get_memslots;
+ mdc->get_min_alignment = hv_balloon_md_get_min_alignment;
mdc->fill_device_info = hv_balloon_md_fill_device_info;
}
diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c
index a1b1af2..e098585 100644
--- a/hw/mem/memory-device.c
+++ b/hw/mem/memory-device.c
@@ -374,6 +374,20 @@
goto out;
}
+ /*
+ * We always want the memory region size to be multiples of the memory
+ * region alignment: for example, DIMMs with 1G+1byte size don't make
+ * any sense. Note that we don't check that the size is multiples
+ * of any additional alignment requirements the memory device might
+ * have when it comes to the address in physical address space.
+ */
+ if (!QEMU_IS_ALIGNED(memory_region_size(mr),
+ memory_region_get_alignment(mr))) {
+ error_setg(errp, "backend memory size must be multiple of 0x%"
+ PRIx64, memory_region_get_alignment(mr));
+ return;
+ }
+
if (legacy_align) {
align = *legacy_align;
} else {
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index 99ab989..ffd119e 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -605,7 +605,7 @@
int fd = memory_region_get_fd(&vmem->memdev->mr);
Error *local_err = NULL;
- if (!qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err)) {
+ if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, &local_err)) {
static bool warned;
/*
@@ -1248,7 +1248,7 @@
int fd = memory_region_get_fd(&vmem->memdev->mr);
Error *local_err = NULL;
- if (!qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err)) {
+ if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, &local_err)) {
error_report_err(local_err);
return -ENOMEM;
}
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index d47536e..9228e96 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -1084,6 +1084,11 @@
PHASE_ACCEL_CREATED,
/*
+ * Late backend objects have been created and initialized.
+ */
+ PHASE_LATE_BACKENDS_CREATED,
+
+ /*
* machine_class->init has been called, thus creating any embedded
* devices and validating machine properties. Devices created at
* this time are considered to be cold-plugged.
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index c9692cc..7d359da 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -680,6 +680,8 @@
* @area: start address of the are to preallocate
* @sz: the size of the area to preallocate
* @max_threads: maximum number of threads to use
+ * @tc: prealloc context threads pointer, NULL if not in use
+ * @async: request asynchronous preallocation, requires @tc
* @errp: returns an error if this function fails
*
* Preallocate memory (populate/prefault page tables writable) for the virtual
@@ -687,10 +689,24 @@
* each page in the area was faulted in writable at least once, for example,
* after allocating file blocks for mapped files.
*
+ * When setting @async, allocation might be performed asynchronously.
+ * qemu_finish_async_prealloc_mem() must be called to finish any asynchronous
+ * preallocation.
+ *
* Return: true on success, else false setting @errp with error.
*/
bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
- ThreadContext *tc, Error **errp);
+ ThreadContext *tc, bool async, Error **errp);
+
+/**
+ * qemu_finish_async_prealloc_mem:
+ * @errp: returns an error if this function fails
+ *
+ * Finish all outstanding asynchronous memory preallocation.
+ *
+ * Return: true on success, else false setting @errp with error.
+ */
+bool qemu_finish_async_prealloc_mem(Error **errp);
/**
* qemu_get_pid_name:
diff --git a/system/vl.c b/system/vl.c
index bb959cb..2a0bd08 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -2013,6 +2013,14 @@
object_option_foreach_add(object_create_late);
+ /*
+ * Wait for any outstanding memory prealloc from created memory
+ * backends to complete.
+ */
+ if (!qemu_finish_async_prealloc_mem(&error_fatal)) {
+ exit(1);
+ }
+
if (tpm_init() < 0) {
exit(1);
}
@@ -3699,6 +3707,7 @@
* over memory-backend-file objects).
*/
qemu_create_late_backends();
+ phase_advance(PHASE_LATE_BACKENDS_CREATED);
/*
* Note: creates a QOM object, must run only after global and
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 7c29700..3c379f9 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -42,6 +42,7 @@
#include "qemu/cutils.h"
#include "qemu/units.h"
#include "qemu/thread-context.h"
+#include "qemu/main-loop.h"
#ifdef CONFIG_LINUX
#include <sys/syscall.h>
@@ -63,11 +64,15 @@
struct MemsetThread;
+static QLIST_HEAD(, MemsetContext) memset_contexts =
+ QLIST_HEAD_INITIALIZER(memset_contexts);
+
typedef struct MemsetContext {
bool all_threads_created;
bool any_thread_failed;
struct MemsetThread *threads;
int num_threads;
+ QLIST_ENTRY(MemsetContext) next;
} MemsetContext;
struct MemsetThread {
@@ -412,19 +417,44 @@
return ret;
}
+static int wait_and_free_mem_prealloc_context(MemsetContext *context)
+{
+ int i, ret = 0, tmp;
+
+ for (i = 0; i < context->num_threads; i++) {
+ tmp = (uintptr_t)qemu_thread_join(&context->threads[i].pgthread);
+
+ if (tmp) {
+ ret = tmp;
+ }
+ }
+ g_free(context->threads);
+ g_free(context);
+ return ret;
+}
+
static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
- int max_threads, ThreadContext *tc,
+ int max_threads, ThreadContext *tc, bool async,
bool use_madv_populate_write)
{
static gsize initialized = 0;
- MemsetContext context = {
- .num_threads = get_memset_num_threads(hpagesize, numpages, max_threads),
- };
+ MemsetContext *context = g_malloc0(sizeof(MemsetContext));
size_t numpages_per_thread, leftover;
void *(*touch_fn)(void *);
- int ret = 0, i = 0;
+ int ret, i = 0;
char *addr = area;
+ /*
+ * Asynchronous preallocation is only allowed when using MADV_POPULATE_WRITE
+ * and prealloc context for thread placement.
+ */
+ if (!use_madv_populate_write || !tc) {
+ async = false;
+ }
+
+ context->num_threads =
+ get_memset_num_threads(hpagesize, numpages, max_threads);
+
if (g_once_init_enter(&initialized)) {
qemu_mutex_init(&page_mutex);
qemu_cond_init(&page_cond);
@@ -432,8 +462,11 @@
}
if (use_madv_populate_write) {
- /* Avoid creating a single thread for MADV_POPULATE_WRITE */
- if (context.num_threads == 1) {
+ /*
+ * Avoid creating a single thread for MADV_POPULATE_WRITE when
+ * preallocating synchronously.
+ */
+ if (context->num_threads == 1 && !async) {
if (qemu_madvise(area, hpagesize * numpages,
QEMU_MADV_POPULATE_WRITE)) {
return -errno;
@@ -445,50 +478,86 @@
touch_fn = do_touch_pages;
}
- context.threads = g_new0(MemsetThread, context.num_threads);
- numpages_per_thread = numpages / context.num_threads;
- leftover = numpages % context.num_threads;
- for (i = 0; i < context.num_threads; i++) {
- context.threads[i].addr = addr;
- context.threads[i].numpages = numpages_per_thread + (i < leftover);
- context.threads[i].hpagesize = hpagesize;
- context.threads[i].context = &context;
+ context->threads = g_new0(MemsetThread, context->num_threads);
+ numpages_per_thread = numpages / context->num_threads;
+ leftover = numpages % context->num_threads;
+ for (i = 0; i < context->num_threads; i++) {
+ context->threads[i].addr = addr;
+ context->threads[i].numpages = numpages_per_thread + (i < leftover);
+ context->threads[i].hpagesize = hpagesize;
+ context->threads[i].context = context;
if (tc) {
- thread_context_create_thread(tc, &context.threads[i].pgthread,
+ thread_context_create_thread(tc, &context->threads[i].pgthread,
"touch_pages",
- touch_fn, &context.threads[i],
+ touch_fn, &context->threads[i],
QEMU_THREAD_JOINABLE);
} else {
- qemu_thread_create(&context.threads[i].pgthread, "touch_pages",
- touch_fn, &context.threads[i],
+ qemu_thread_create(&context->threads[i].pgthread, "touch_pages",
+ touch_fn, &context->threads[i],
QEMU_THREAD_JOINABLE);
}
- addr += context.threads[i].numpages * hpagesize;
+ addr += context->threads[i].numpages * hpagesize;
+ }
+
+ if (async) {
+ /*
+ * async requests currently require the BQL. Add it to the list and kick
+ * preallocation off during qemu_finish_async_prealloc_mem().
+ */
+ assert(bql_locked());
+ QLIST_INSERT_HEAD(&memset_contexts, context, next);
+ return 0;
}
if (!use_madv_populate_write) {
- sigbus_memset_context = &context;
+ sigbus_memset_context = context;
}
qemu_mutex_lock(&page_mutex);
- context.all_threads_created = true;
+ context->all_threads_created = true;
qemu_cond_broadcast(&page_cond);
qemu_mutex_unlock(&page_mutex);
- for (i = 0; i < context.num_threads; i++) {
- int tmp = (uintptr_t)qemu_thread_join(&context.threads[i].pgthread);
+ ret = wait_and_free_mem_prealloc_context(context);
+ if (!use_madv_populate_write) {
+ sigbus_memset_context = NULL;
+ }
+ return ret;
+}
+
+bool qemu_finish_async_prealloc_mem(Error **errp)
+{
+ int ret = 0, tmp;
+ MemsetContext *context, *next_context;
+
+ /* Waiting for preallocation requires the BQL. */
+ assert(bql_locked());
+ if (QLIST_EMPTY(&memset_contexts)) {
+ return true;
+ }
+
+ qemu_mutex_lock(&page_mutex);
+ QLIST_FOREACH(context, &memset_contexts, next) {
+ context->all_threads_created = true;
+ }
+ qemu_cond_broadcast(&page_cond);
+ qemu_mutex_unlock(&page_mutex);
+
+ QLIST_FOREACH_SAFE(context, &memset_contexts, next, next_context) {
+ QLIST_REMOVE(context, next);
+ tmp = wait_and_free_mem_prealloc_context(context);
if (tmp) {
ret = tmp;
}
}
- if (!use_madv_populate_write) {
- sigbus_memset_context = NULL;
+ if (ret) {
+ error_setg_errno(errp, -ret,
+ "qemu_prealloc_mem: preallocating memory failed");
+ return false;
}
- g_free(context.threads);
-
- return ret;
+ return true;
}
static bool madv_populate_write_possible(char *area, size_t pagesize)
@@ -498,7 +567,7 @@
}
bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
- ThreadContext *tc, Error **errp)
+ ThreadContext *tc, bool async, Error **errp)
{
static gsize initialized;
int ret;
@@ -540,7 +609,7 @@
}
/* touch pages simultaneously */
- ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc,
+ ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc, async,
use_madv_populate_write);
if (ret) {
error_setg_errno(errp, -ret,
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index c4a5f05..b623830 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -265,7 +265,7 @@
}
bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
- ThreadContext *tc, Error **errp)
+ ThreadContext *tc, bool async, Error **errp)
{
int i;
size_t pagesize = qemu_real_host_page_size();
@@ -278,6 +278,12 @@
return true;
}
+bool qemu_finish_async_prealloc_mem(Error **errp)
+{
+ /* async prealloc not supported, there is nothing to finish */
+ return true;
+}
+
char *qemu_get_pid_name(pid_t pid)
{
/* XXX Implement me */