pseries: fix kvmppc_set_fwnmi()
QEMU issues the ioctl(KVM_CAP_PPC_FWNMI) on the first vCPU.
If the first vCPU is currently running, the vCPU mutex is held
and the ioctl() cannot be done and waits until the mutex is released.
This never happens and the VM is stuck.
To avoid this deadlock, issue the ioctl on the same vCPU doing the
RTAS call.
The problem can be reproduced by booting a guest with several vCPUs
(the probability to have the problem is (n - 1) / n, n = # of CPUs),
and then by triggering a kernel crash with "echo c >/proc/sysrq-trigger".
On the reboot, the kernel hangs after:
...
[ 0.000000] -----------------------------------------------------
[ 0.000000] ppc64_pft_size = 0x0
[ 0.000000] phys_mem_size = 0x48000000
[ 0.000000] dcache_bsize = 0x80
[ 0.000000] icache_bsize = 0x80
[ 0.000000] cpu_features = 0x0001c06f8f4f91a7
[ 0.000000] possible = 0x0003fbffcf5fb1a7
[ 0.000000] always = 0x00000003800081a1
[ 0.000000] cpu_user_features = 0xdc0065c2 0xaee00000
[ 0.000000] mmu_features = 0x3c006041
[ 0.000000] firmware_features = 0x00000085455a445f
[ 0.000000] physical_start = 0x8000000
[ 0.000000] -----------------------------------------------------
[ 0.000000] numa: NODE_DATA [mem 0x47f33c80-0x47f3ffff]
Fixes: ec010c00665b ("ppc/spapr: KVM FWNMI should not be enabled until guest requests it")
Cc: npiggin@gmail.com
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20200724083533.281700-1-lvivier@redhat.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index bcac0d0..513c7a8 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -438,7 +438,7 @@
}
if (kvm_enabled()) {
- if (kvmppc_set_fwnmi() < 0) {
+ if (kvmppc_set_fwnmi(cpu) < 0) {
rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
return;
}
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 2692f76..d85ba8f 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -2071,9 +2071,8 @@
return cap_fwnmi;
}
-int kvmppc_set_fwnmi(void)
+int kvmppc_set_fwnmi(PowerPCCPU *cpu)
{
- PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
CPUState *cs = CPU(cpu);
return kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0);
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 701c0c2..72e05f1 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -28,7 +28,7 @@
int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
bool kvmppc_get_fwnmi(void);
-int kvmppc_set_fwnmi(void);
+int kvmppc_set_fwnmi(PowerPCCPU *cpu);
int kvmppc_smt_threads(void);
void kvmppc_error_append_smt_possible_hint(Error *const *errp);
int kvmppc_set_smt_threads(int smt);
@@ -169,7 +169,7 @@
return false;
}
-static inline int kvmppc_set_fwnmi(void)
+static inline int kvmppc_set_fwnmi(PowerPCCPU *cpu)
{
return -1;
}