kvm: First step to push iothread lock out of inner run loop

This opens the path to get rid of the iothread lock on vmexits in KVM
mode. On x86, the in-kernel irqchips has to be used because we otherwise
need to synchronize APIC and other per-cpu state accesses that could be
changed concurrently.

Regarding pre/post-run callbacks, s390x and ARM should be fine without
specific locking as the callbacks are empty. MIPS and POWER require
locking for the pre-run callback.

For the handle_exit callback, it is non-empty in x86, POWER and s390.
Some POWER cases could do without the locking, but it is left in
place for now.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <1434646046-27150-7-git-send-email-pbonzini@redhat.com>
diff --git a/kvm-all.c b/kvm-all.c
index e98b08d..ca428ca 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1755,6 +1755,8 @@
         return EXCP_HLT;
     }
 
+    qemu_mutex_unlock_iothread();
+
     do {
         MemTxAttrs attrs;
 
@@ -1773,11 +1775,9 @@
              */
             qemu_cpu_kick_self();
         }
-        qemu_mutex_unlock_iothread();
 
         run_ret = kvm_vcpu_ioctl(cpu, KVM_RUN, 0);
 
-        qemu_mutex_lock_iothread();
         attrs = kvm_arch_post_run(cpu, run);
 
         if (run_ret < 0) {
@@ -1804,20 +1804,24 @@
         switch (run->exit_reason) {
         case KVM_EXIT_IO:
             DPRINTF("handle_io\n");
+            qemu_mutex_lock_iothread();
             kvm_handle_io(run->io.port, attrs,
                           (uint8_t *)run + run->io.data_offset,
                           run->io.direction,
                           run->io.size,
                           run->io.count);
+            qemu_mutex_unlock_iothread();
             ret = 0;
             break;
         case KVM_EXIT_MMIO:
             DPRINTF("handle_mmio\n");
+            qemu_mutex_lock_iothread();
             address_space_rw(&address_space_memory,
                              run->mmio.phys_addr, attrs,
                              run->mmio.data,
                              run->mmio.len,
                              run->mmio.is_write);
+            qemu_mutex_unlock_iothread();
             ret = 0;
             break;
         case KVM_EXIT_IRQ_WINDOW_OPEN:
@@ -1860,6 +1864,8 @@
         }
     } while (ret == 0);
 
+    qemu_mutex_lock_iothread();
+
     if (ret < 0) {
         cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_CODE);
         vm_stop(RUN_STATE_INTERNAL_ERROR);
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index daced5c..6426600 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -2191,7 +2191,10 @@
 
     /* Inject NMI */
     if (cpu->interrupt_request & CPU_INTERRUPT_NMI) {
+        qemu_mutex_lock_iothread();
         cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
+        qemu_mutex_unlock_iothread();
+
         DPRINTF("injected NMI\n");
         ret = kvm_vcpu_ioctl(cpu, KVM_NMI);
         if (ret < 0) {
@@ -2200,6 +2203,10 @@
         }
     }
 
+    if (!kvm_irqchip_in_kernel()) {
+        qemu_mutex_lock_iothread();
+    }
+
     /* Force the VCPU out of its inner loop to process any INIT requests
      * or (for userspace APIC, but it is cheap to combine the checks here)
      * pending TPR access reports.
@@ -2243,6 +2250,8 @@
 
         DPRINTF("setting tpr\n");
         run->cr8 = cpu_get_apic_tpr(x86_cpu->apic_state);
+
+        qemu_mutex_unlock_iothread();
     }
 }
 
@@ -2256,8 +2265,17 @@
     } else {
         env->eflags &= ~IF_MASK;
     }
+
+    /* We need to protect the apic state against concurrent accesses from
+     * different threads in case the userspace irqchip is used. */
+    if (!kvm_irqchip_in_kernel()) {
+        qemu_mutex_lock_iothread();
+    }
     cpu_set_apic_tpr(x86_cpu->apic_state, run->cr8);
     cpu_set_apic_base(x86_cpu->apic_state, run->apic_base);
+    if (!kvm_irqchip_in_kernel()) {
+        qemu_mutex_unlock_iothread();
+    }
     return cpu_get_mem_attrs(env);
 }
 
@@ -2550,13 +2568,17 @@
     switch (run->exit_reason) {
     case KVM_EXIT_HLT:
         DPRINTF("handle_hlt\n");
+        qemu_mutex_lock_iothread();
         ret = kvm_handle_halt(cpu);
+        qemu_mutex_unlock_iothread();
         break;
     case KVM_EXIT_SET_TPR:
         ret = 0;
         break;
     case KVM_EXIT_TPR_ACCESS:
+        qemu_mutex_lock_iothread();
         ret = kvm_handle_tpr_access(cpu);
+        qemu_mutex_unlock_iothread();
         break;
     case KVM_EXIT_FAIL_ENTRY:
         code = run->fail_entry.hardware_entry_failure_reason;
@@ -2582,7 +2604,9 @@
         break;
     case KVM_EXIT_DEBUG:
         DPRINTF("kvm_exit_debug\n");
+        qemu_mutex_lock_iothread();
         ret = kvm_handle_debug(cpu, &run->debug.arch);
+        qemu_mutex_unlock_iothread();
         break;
     default:
         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
diff --git a/target-mips/kvm.c b/target-mips/kvm.c
index 948619f..7d2293d 100644
--- a/target-mips/kvm.c
+++ b/target-mips/kvm.c
@@ -99,6 +99,8 @@
     int r;
     struct kvm_mips_interrupt intr;
 
+    qemu_mutex_lock_iothread();
+
     if ((cs->interrupt_request & CPU_INTERRUPT_HARD) &&
             cpu_mips_io_interrupts_pending(cpu)) {
         intr.cpu = -1;
@@ -109,6 +111,8 @@
                          __func__, cs->cpu_index, intr.irq);
         }
     }
+
+    qemu_mutex_unlock_iothread();
 }
 
 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index afb4696..ddf469f 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -1242,6 +1242,8 @@
     int r;
     unsigned irq;
 
+    qemu_mutex_lock_iothread();
+
     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
     if (!cap_interrupt_level &&
@@ -1269,6 +1271,8 @@
     /* We don't know if there are more interrupts pending after this. However,
      * the guest will return to userspace in the course of handling this one
      * anyways, so we will get a chance to deliver the rest. */
+
+    qemu_mutex_unlock_iothread();
 }
 
 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
@@ -1570,6 +1574,8 @@
     CPUPPCState *env = &cpu->env;
     int ret;
 
+    qemu_mutex_lock_iothread();
+
     switch (run->exit_reason) {
     case KVM_EXIT_DCR:
         if (run->dcr.is_write) {
@@ -1620,6 +1626,7 @@
         break;
     }
 
+    qemu_mutex_unlock_iothread();
     return ret;
 }
 
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index 135111a..ae3a0af 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -2007,6 +2007,8 @@
     S390CPU *cpu = S390_CPU(cs);
     int ret = 0;
 
+    qemu_mutex_lock_iothread();
+
     switch (run->exit_reason) {
         case KVM_EXIT_S390_SIEIC:
             ret = handle_intercept(cpu);
@@ -2027,6 +2029,7 @@
             fprintf(stderr, "Unknown KVM exit: %d\n", run->exit_reason);
             break;
     }
+    qemu_mutex_unlock_iothread();
 
     if (ret == 0) {
         ret = EXCP_INTERRUPT;