KVM: do not use sigtimedwait to catch SIGBUS

Call kvm_on_sigbus_vcpu asynchronously from the VCPU thread.
Information for the SIGBUS can be stored in thread-local variables
and processed later in kvm_cpu_exec.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
diff --git a/cpus.c b/cpus.c
index 399e271..56b1338 100644
--- a/cpus.c
+++ b/cpus.c
@@ -926,8 +926,16 @@
         sigbus_reraise();
     }
 
-    if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
-        sigbus_reraise();
+    if (current_cpu) {
+        /* Called asynchronously in VCPU thread.  */
+        if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
+            sigbus_reraise();
+        }
+    } else {
+        /* Called synchronously (via signalfd) in main thread.  */
+        if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
+            sigbus_reraise();
+        }
     }
 }
 
@@ -958,8 +966,9 @@
     sigaction(SIG_IPI, &sigact, NULL);
 
     pthread_sigmask(SIG_BLOCK, NULL, &set);
-    sigdelset(&set, SIG_IPI);
     sigdelset(&set, SIGBUS);
+    pthread_sigmask(SIG_SETMASK, &set, NULL);
+    sigdelset(&set, SIG_IPI);
     r = kvm_set_signal_mask(cpu, &set);
     if (r) {
         fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
@@ -977,7 +986,6 @@
 
     sigemptyset(&waitset);
     sigaddset(&waitset, SIG_IPI);
-    sigaddset(&waitset, SIGBUS);
 
     do {
         r = sigtimedwait(&waitset, &siginfo, &ts);
@@ -986,25 +994,12 @@
             exit(1);
         }
 
-        switch (r) {
-        case SIGBUS:
-            if (siginfo.si_code != BUS_MCEERR_AO && siginfo.si_code != BUS_MCEERR_AR) {
-                sigbus_reraise();
-            }
-            if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
-                sigbus_reraise();
-            }
-            break;
-        default:
-            break;
-        }
-
         r = sigpending(&chkset);
         if (r == -1) {
             perror("sigpending");
             exit(1);
         }
-    } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
+    } while (sigismember(&chkset, SIG_IPI));
 }
 #else /* !CONFIG_LINUX */
 static void qemu_init_sigbus(void)
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 6ecb61c..a1b019d 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -357,7 +357,10 @@
 /* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */
 unsigned long kvm_arch_vcpu_id(CPUState *cpu);
 
-int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
+#ifdef TARGET_I386
+#define KVM_HAVE_MCE_INJECTION 1
+void kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
+#endif
 
 void kvm_arch_init_irq_routing(KVMState *s);
 
diff --git a/kvm-all.c b/kvm-all.c
index a433ad3..0baa193 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1893,6 +1893,12 @@
     run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 }
 
+#ifdef KVM_HAVE_MCE_INJECTION
+static __thread void *pending_sigbus_addr;
+static __thread int pending_sigbus_code;
+static __thread bool have_sigbus_pending;
+#endif
+
 int kvm_cpu_exec(CPUState *cpu)
 {
     struct kvm_run *run = cpu->kvm_run;
@@ -1930,6 +1936,16 @@
 
         attrs = kvm_arch_post_run(cpu, run);
 
+#ifdef KVM_HAVE_MCE_INJECTION
+        if (unlikely(have_sigbus_pending)) {
+            qemu_mutex_lock_iothread();
+            kvm_arch_on_sigbus_vcpu(cpu, pending_sigbus_code,
+                                    pending_sigbus_addr);
+            have_sigbus_pending = false;
+            qemu_mutex_unlock_iothread();
+        }
+#endif
+
         if (run_ret < 0) {
             if (run_ret == -EINTR || run_ret == -EAGAIN) {
                 DPRINTF("io window exit\n");
@@ -2392,13 +2408,27 @@
     return r;
 }
 
+/* Called asynchronously in VCPU thread.  */
 int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
 {
-    return kvm_arch_on_sigbus_vcpu(cpu, code, addr);
+#ifdef KVM_HAVE_MCE_INJECTION
+    if (have_sigbus_pending) {
+        return 1;
+    }
+    have_sigbus_pending = true;
+    pending_sigbus_addr = addr;
+    pending_sigbus_code = code;
+    atomic_set(&cpu->exit_request, 1);
+    return 0;
+#else
+    return 1;
+#endif
 }
 
+/* Called synchronously (via signalfd) in main thread.  */
 int kvm_on_sigbus(int code, void *addr)
 {
+#ifdef KVM_HAVE_MCE_INJECTION
     /* Action required MCE kills the process if SIGBUS is blocked.  Because
      * that's what happens in the I/O thread, where we handle MCE via signalfd,
      * we can only get action optional here.
@@ -2406,6 +2436,9 @@
     assert(code != BUS_MCEERR_AR);
     kvm_arch_on_sigbus_vcpu(first_cpu, code, addr);
     return 0;
+#else
+    return 1;
+#endif
 }
 
 int kvm_create_device(KVMState *s, uint64_t type, bool test)
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index e5218f6..4555468 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -560,11 +560,6 @@
     return 0;
 }
 
-int kvm_arch_on_sigbus_vcpu(CPUState *cs, int code, void *addr)
-{
-    return 1;
-}
-
 /* The #ifdef protections are until 32bit headers are imported and can
  * be removed once both 32 and 64 bit reach feature parity.
  */
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 2adf992..7698421 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -455,7 +455,7 @@
     exit(1);
 }
 
-int kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
+void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
 {
     X86CPU *cpu = X86_CPU(c);
     CPUX86State *env = &cpu->env;
@@ -475,7 +475,7 @@
             kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
             kvm_hwpoison_page_add(ram_addr);
             kvm_mce_inject(cpu, paddr, code);
-            return 0;
+            return;
         }
 
         fprintf(stderr, "Hardware memory error for memory used by "
@@ -487,7 +487,6 @@
     }
 
     /* Hope we are lucky for AO MCE */
-    return 0;
 }
 
 static int kvm_inject_mce_oldstyle(X86CPU *cpu)
diff --git a/target/mips/kvm.c b/target/mips/kvm.c
index 3e686e7..0982e87 100644
--- a/target/mips/kvm.c
+++ b/target/mips/kvm.c
@@ -180,12 +180,6 @@
     return true;
 }
 
-int kvm_arch_on_sigbus_vcpu(CPUState *cs, int code, void *addr)
-{
-    DPRINTF("%s\n", __func__);
-    return 1;
-}
-
 void kvm_arch_init_irq_routing(KVMState *s)
 {
 }
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 75598cd..03f5097 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -2582,11 +2582,6 @@
     return true;
 }
 
-int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
-{
-    return 1;
-}
-
 void kvm_arch_init_irq_routing(KVMState *s)
 {
 }
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index e7eea6d..ac47154 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2140,11 +2140,6 @@
     return true;
 }
 
-int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
-{
-    return 1;
-}
-
 void kvm_s390_io_interrupt(uint16_t subchannel_id,
                            uint16_t subchannel_nr, uint32_t io_int_parm,
                            uint32_t io_int_word)