s390: autodetect map private
By default qemu will use MAP_PRIVATE for guest pages. This will write
protect pages and thus break on s390 systems that dont support this feature.
Therefore qemu has a hack to always use MAP_SHARED for s390. But MAP_SHARED
has other problems (no dirty pages tracking, a lot more swap overhead etc.)
Newer systems allow the distinction via KVM_CAP_S390_COW. With this feature
qemu can use the standard qemu alloc if available, otherwise it will use
the old s390 hack.
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
Acked-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
diff --git a/exec.c b/exec.c
index dd4833d..c9fa17d 100644
--- a/exec.c
+++ b/exec.c
@@ -2536,26 +2536,14 @@
exit(1);
#endif
} else {
-#if defined(TARGET_S390X) && defined(CONFIG_KVM)
- /* S390 KVM requires the topmost vma of the RAM to be smaller than
- an system defined value, which is at least 256GB. Larger systems
- have larger values. We put the guest between the end of data
- segment (system break) and this value. We use 32GB as a base to
- have enough room for the system break to grow. */
- new_block->host = mmap((void*)0x800000000, size,
- PROT_EXEC|PROT_READ|PROT_WRITE,
- MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
- if (new_block->host == MAP_FAILED) {
- fprintf(stderr, "Allocating RAM failed\n");
- abort();
- }
-#else
if (xen_enabled()) {
xen_ram_alloc(new_block->offset, size, mr);
+ } else if (kvm_enabled()) {
+ /* some s390/kvm configurations have special constraints */
+ new_block->host = kvm_vmalloc(size);
} else {
new_block->host = qemu_vmalloc(size);
}
-#endif
qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
}
}
diff --git a/kvm-all.c b/kvm-all.c
index f8e4328..1016ca4 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1655,6 +1655,19 @@
return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing();
}
+void *kvm_vmalloc(ram_addr_t size)
+{
+#ifdef TARGET_S390X
+ void *mem;
+
+ mem = kvm_arch_vmalloc(size);
+ if (mem) {
+ return mem;
+ }
+#endif
+ return qemu_vmalloc(size);
+}
+
void kvm_setup_guest_memory(void *start, size_t size)
{
if (!kvm_has_sync_mmu()) {
diff --git a/kvm.h b/kvm.h
index 9c7b0ea..ddc7c53 100644
--- a/kvm.h
+++ b/kvm.h
@@ -70,6 +70,8 @@
int kvm_cpu_exec(CPUArchState *env);
#if !defined(CONFIG_USER_ONLY)
+void *kvm_vmalloc(ram_addr_t size);
+void *kvm_arch_vmalloc(ram_addr_t size);
void kvm_setup_guest_memory(void *start, size_t size);
int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size);
diff --git a/oslib-posix.c b/oslib-posix.c
index 6b7ba64..dbeb627 100644
--- a/oslib-posix.c
+++ b/oslib-posix.c
@@ -41,6 +41,9 @@
therefore we need special code which handles running on Valgrind. */
# define QEMU_VMALLOC_ALIGN (512 * 4096)
# define CONFIG_VALGRIND
+#elif defined(__linux__) && defined(__s390x__)
+ /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */
+# define QEMU_VMALLOC_ALIGN (256 * 4096)
#else
# define QEMU_VMALLOC_ALIGN getpagesize()
#endif
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index ec08dd0..47008c2 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -135,6 +135,41 @@
return 0;
}
+/*
+ * Legacy layout for s390:
+ * Older S390 KVM requires the topmost vma of the RAM to be
+ * smaller than an system defined value, which is at least 256GB.
+ * Larger systems have larger values. We put the guest between
+ * the end of data segment (system break) and this value. We
+ * use 32GB as a base to have enough room for the system break
+ * to grow. We also have to use MAP parameters that avoid
+ * read-only mapping of guest pages.
+ */
+static void *legacy_s390_alloc(ram_addr_t size)
+{
+ void *mem;
+
+ mem = mmap((void *) 0x800000000ULL, size,
+ PROT_EXEC|PROT_READ|PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+ if (mem == MAP_FAILED) {
+ fprintf(stderr, "Allocating RAM failed\n");
+ abort();
+ }
+ return mem;
+}
+
+void *kvm_arch_vmalloc(ram_addr_t size)
+{
+ /* Can we use the standard allocation ? */
+ if (kvm_check_extension(kvm_state, KVM_CAP_S390_GMAP) &&
+ kvm_check_extension(kvm_state, KVM_CAP_S390_COW)) {
+ return NULL;
+ } else {
+ return legacy_s390_alloc(size);
+ }
+}
+
int kvm_arch_insert_sw_breakpoint(CPUS390XState *env, struct kvm_sw_breakpoint *bp)
{
static const uint8_t diag_501[] = {0x83, 0x24, 0x05, 0x01};