QEMU: MCE: Add MCE simulation to qemu/tcg
- MCE features are initialized when VCPU is intialized according to CPUID.
- A monitor command "mce" is added to inject a MCE.
- A new interrupt mask: CPU_INTERRUPT_MCE is added to inject the MCE.
aliguori: fix build for linux-user
Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
diff --git a/cpu-all.h b/cpu-all.h
index 97a224d..fda15ce 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -770,6 +770,7 @@
#define CPU_INTERRUPT_NMI 0x200 /* NMI pending. */
#define CPU_INTERRUPT_INIT 0x400 /* INIT pending. */
#define CPU_INTERRUPT_SIPI 0x800 /* SIPI pending. */
+#define CPU_INTERRUPT_MCE 0x1000 /* (x86 only) MCE pending. */
void cpu_interrupt(CPUState *s, int mask);
void cpu_reset_interrupt(CPUState *env, int mask);
@@ -1071,4 +1072,7 @@
extern int64_t kqemu_ret_intr_count;
#endif
+void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
+ uint64_t mcg_status, uint64_t addr, uint64_t misc);
+
#endif /* CPU_ALL_H */
diff --git a/cpu-exec.c b/cpu-exec.c
index db5cb57..38335f8 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -400,6 +400,10 @@
env->hflags2 |= HF2_NMI_MASK;
do_interrupt(EXCP02_NMI, 0, 0, 0, 1);
next_tb = 0;
+ } else if (interrupt_request & CPU_INTERRUPT_MCE) {
+ env->interrupt_request &= ~CPU_INTERRUPT_MCE;
+ do_interrupt(EXCP12_MCHK, 0, 0, 0, 0);
+ next_tb = 0;
} else if ((interrupt_request & CPU_INTERRUPT_HARD) &&
(((env->hflags2 & HF2_VINTR_MASK) &&
(env->hflags2 & HF2_HIF_MASK)) ||
diff --git a/monitor.c b/monitor.c
index bad79fe..301bde0 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1677,6 +1677,28 @@
}
}
+#if defined(TARGET_I386)
+static void do_inject_mce(Monitor *mon,
+ int cpu_index, int bank,
+ unsigned status_hi, unsigned status_lo,
+ unsigned mcg_status_hi, unsigned mcg_status_lo,
+ unsigned addr_hi, unsigned addr_lo,
+ unsigned misc_hi, unsigned misc_lo)
+{
+ CPUState *cenv;
+ uint64_t status = ((uint64_t)status_hi << 32) | status_lo;
+ uint64_t mcg_status = ((uint64_t)mcg_status_hi << 32) | mcg_status_lo;
+ uint64_t addr = ((uint64_t)addr_hi << 32) | addr_lo;
+ uint64_t misc = ((uint64_t)misc_hi << 32) | misc_lo;
+
+ for (cenv = first_cpu; cenv != NULL; cenv = cenv->next_cpu)
+ if (cenv->cpu_index == cpu_index && cenv->mcg_cap) {
+ cpu_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc);
+ break;
+ }
+}
+#endif
+
static const mon_cmd_t mon_cmds[] = {
#include "qemu-monitor.h"
{ NULL, NULL, },
@@ -2451,6 +2473,15 @@
void *arg3, void *arg4, void *arg5);
void (*handler_7)(Monitor *mon, void *arg0, void *arg1, void *arg2,
void *arg3, void *arg4, void *arg5, void *arg6);
+ void (*handler_8)(Monitor *mon, void *arg0, void *arg1, void *arg2,
+ void *arg3, void *arg4, void *arg5, void *arg6,
+ void *arg7);
+ void (*handler_9)(Monitor *mon, void *arg0, void *arg1, void *arg2,
+ void *arg3, void *arg4, void *arg5, void *arg6,
+ void *arg7, void *arg8);
+ void (*handler_10)(Monitor *mon, void *arg0, void *arg1, void *arg2,
+ void *arg3, void *arg4, void *arg5, void *arg6,
+ void *arg7, void *arg8, void *arg9);
#ifdef DEBUG
monitor_printf(mon, "command='%s'\n", cmdline);
@@ -2739,6 +2770,21 @@
handler_7(mon, args[0], args[1], args[2], args[3], args[4], args[5],
args[6]);
break;
+ case 8:
+ handler_8 = cmd->handler;
+ handler_8(mon, args[0], args[1], args[2], args[3], args[4], args[5],
+ args[6], args[7]);
+ break;
+ case 9:
+ handler_9 = cmd->handler;
+ handler_9(mon, args[0], args[1], args[2], args[3], args[4], args[5],
+ args[6], args[7], args[8]);
+ break;
+ case 10:
+ handler_10 = cmd->handler;
+ handler_10(mon, args[0], args[1], args[2], args[3], args[4], args[5],
+ args[6], args[7], args[8], args[9]);
+ break;
default:
monitor_printf(mon, "unsupported number of arguments: %d\n", nb_args);
goto fail;
diff --git a/qemu-monitor.hx b/qemu-monitor.hx
index dc10b75..62edbcd 100644
--- a/qemu-monitor.hx
+++ b/qemu-monitor.hx
@@ -615,6 +615,14 @@
policy back to @code{deny}.
ETEXI
+#if defined(TARGET_I386)
+ { "mce", "iillll", do_inject_mce, "cpu bank status mcgstatus addr misc", "inject a MCE on the given CPU"},
+#endif
+STEXI
+@item mce @var{cpu} @var{bank} @var{status} @var{mcgstatus} @var{addr} @var{misc}
+Inject an MCE on the given CPU (x86 only).
+ETEXI
+
STEXI
@end table
ETEXI
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 4a8608e..6f7478a 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -204,6 +204,7 @@
#define CR4_DE_MASK (1 << 3)
#define CR4_PSE_MASK (1 << 4)
#define CR4_PAE_MASK (1 << 5)
+#define CR4_MCE_MASK (1 << 6)
#define CR4_PGE_MASK (1 << 7)
#define CR4_PCE_MASK (1 << 8)
#define CR4_OSFXSR_SHIFT 9
@@ -250,6 +251,17 @@
#define PG_ERROR_RSVD_MASK 0x08
#define PG_ERROR_I_D_MASK 0x10
+#define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */
+
+#define MCE_CAP_DEF MCG_CTL_P
+#define MCE_BANKS_DEF 10
+
+#define MCG_STATUS_MCIP (1UL<<2) /* machine check in progress */
+
+#define MCI_STATUS_VAL (1UL<<63) /* valid error */
+#define MCI_STATUS_OVER (1UL<<62) /* previous errors lost */
+#define MCI_STATUS_UC (1UL<<61) /* uncorrected error */
+
#define MSR_IA32_TSC 0x10
#define MSR_IA32_APICBASE 0x1b
#define MSR_IA32_APICBASE_BSP (1<<8)
@@ -290,6 +302,11 @@
#define MSR_MTRRdefType 0x2ff
+#define MSR_MC0_CTL 0x400
+#define MSR_MC0_STATUS 0x401
+#define MSR_MC0_ADDR 0x402
+#define MSR_MC0_MISC 0x403
+
#define MSR_EFER 0xc0000080
#define MSR_EFER_SCE (1 << 0)
@@ -678,6 +695,11 @@
/* in order to simplify APIC support, we leave this pointer to the
user */
struct APICState *apic_state;
+
+ uint64 mcg_cap;
+ uint64 mcg_status;
+ uint64 mcg_ctl;
+ uint64 *mce_banks;
} CPUX86State;
CPUX86State *cpu_x86_init(const char *cpu_model);
@@ -842,7 +864,7 @@
#define cpu_signal_handler cpu_x86_signal_handler
#define cpu_list x86_cpu_list
-#define CPU_SAVE_VERSION 9
+#define CPU_SAVE_VERSION 10
/* MMU modes definitions */
#define MMU_MODE0_SUFFIX _kernel
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 82e1ff1..ce5346c 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1496,8 +1496,77 @@
if (prev_debug_excp_handler)
prev_debug_excp_handler(env);
}
+
+/* This should come from sysemu.h - if we could include it here... */
+void qemu_system_reset_request(void);
+
+void cpu_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
+ uint64_t mcg_status, uint64_t addr, uint64_t misc)
+{
+ uint64_t mcg_cap = cenv->mcg_cap;
+ unsigned bank_num = mcg_cap & 0xff;
+ uint64_t *banks = cenv->mce_banks;
+
+ if (bank >= bank_num || !(status & MCI_STATUS_VAL))
+ return;
+
+ /*
+ * if MSR_MCG_CTL is not all 1s, the uncorrected error
+ * reporting is disabled
+ */
+ if ((status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
+ cenv->mcg_ctl != ~(uint64_t)0)
+ return;
+ banks += 4 * bank;
+ /*
+ * if MSR_MCi_CTL is not all 1s, the uncorrected error
+ * reporting is disabled for the bank
+ */
+ if ((status & MCI_STATUS_UC) && banks[0] != ~(uint64_t)0)
+ return;
+ if (status & MCI_STATUS_UC) {
+ if ((cenv->mcg_status & MCG_STATUS_MCIP) ||
+ !(cenv->cr[4] & CR4_MCE_MASK)) {
+ fprintf(stderr, "injects mce exception while previous "
+ "one is in progress!\n");
+ qemu_log_mask(CPU_LOG_RESET, "Triple fault\n");
+ qemu_system_reset_request();
+ return;
+ }
+ if (banks[1] & MCI_STATUS_VAL)
+ status |= MCI_STATUS_OVER;
+ banks[2] = addr;
+ banks[3] = misc;
+ cenv->mcg_status = mcg_status;
+ banks[1] = status;
+ cpu_interrupt(cenv, CPU_INTERRUPT_MCE);
+ } else if (!(banks[1] & MCI_STATUS_VAL)
+ || !(banks[1] & MCI_STATUS_UC)) {
+ if (banks[1] & MCI_STATUS_VAL)
+ status |= MCI_STATUS_OVER;
+ banks[2] = addr;
+ banks[3] = misc;
+ banks[1] = status;
+ } else
+ banks[1] |= MCI_STATUS_OVER;
+}
#endif /* !CONFIG_USER_ONLY */
+static void mce_init(CPUX86State *cenv)
+{
+ unsigned int bank, bank_num;
+
+ if (((cenv->cpuid_version >> 8)&0xf) >= 6
+ && (cenv->cpuid_features&(CPUID_MCE|CPUID_MCA)) == (CPUID_MCE|CPUID_MCA)) {
+ cenv->mcg_cap = MCE_CAP_DEF | MCE_BANKS_DEF;
+ cenv->mcg_ctl = ~(uint64_t)0;
+ bank_num = cenv->mcg_cap & 0xff;
+ cenv->mce_banks = qemu_mallocz(bank_num * sizeof(uint64_t) * 4);
+ for (bank = 0; bank < bank_num; bank++)
+ cenv->mce_banks[bank*4] = ~(uint64_t)0;
+ }
+}
+
static void host_cpuid(uint32_t function, uint32_t count,
uint32_t *eax, uint32_t *ebx,
uint32_t *ecx, uint32_t *edx)
@@ -1735,6 +1804,7 @@
cpu_x86_close(env);
return NULL;
}
+ mce_init(env);
cpu_reset(env);
#ifdef CONFIG_KQEMU
kqemu_init(env);
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 2a72b01..8bf13cc 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -158,7 +158,20 @@
qemu_put_sbe32s(f, &pending_irq);
qemu_put_be32s(f, &env->mp_state);
qemu_put_be64s(f, &env->tsc);
-}
+
+ /* MCE */
+ qemu_put_be64s(f, &env->mcg_cap);
+ if (env->mcg_cap) {
+ qemu_put_be64s(f, &env->mcg_status);
+ qemu_put_be64s(f, &env->mcg_ctl);
+ for (i = 0; i < (env->mcg_cap & 0xff); i++) {
+ qemu_put_be64s(f, &env->mce_banks[4*i]);
+ qemu_put_be64s(f, &env->mce_banks[4*i + 1]);
+ qemu_put_be64s(f, &env->mce_banks[4*i + 2]);
+ qemu_put_be64s(f, &env->mce_banks[4*i + 3]);
+ }
+ }
+ }
#ifdef USE_X86LDOUBLE
/* XXX: add that in a FPU generic layer */
@@ -349,6 +362,20 @@
qemu_get_be64s(f, &env->tsc);
}
+ if (version_id >= 10) {
+ qemu_get_be64s(f, &env->mcg_cap);
+ if (env->mcg_cap) {
+ qemu_get_be64s(f, &env->mcg_status);
+ qemu_get_be64s(f, &env->mcg_ctl);
+ for (i = 0; i < (env->mcg_cap & 0xff); i++) {
+ qemu_get_be64s(f, &env->mce_banks[4*i]);
+ qemu_get_be64s(f, &env->mce_banks[4*i + 1]);
+ qemu_get_be64s(f, &env->mce_banks[4*i + 2]);
+ qemu_get_be64s(f, &env->mce_banks[4*i + 3]);
+ }
+ }
+ }
+
/* XXX: ensure compatiblity for halted bit ? */
/* XXX: compute redundant hflags bits */
env->hflags = hflags;
diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c
index bd1769c..ed22c7a 100644
--- a/target-i386/op_helper.c
+++ b/target-i386/op_helper.c
@@ -3133,7 +3133,23 @@
case MSR_MTRRdefType:
env->mtrr_deftype = val;
break;
+ case MSR_MCG_STATUS:
+ env->mcg_status = val;
+ break;
+ case MSR_MCG_CTL:
+ if ((env->mcg_cap & MCG_CTL_P)
+ && (val == 0 || val == ~(uint64_t)0))
+ env->mcg_ctl = val;
+ break;
default:
+ if ((uint32_t)ECX >= MSR_MC0_CTL
+ && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) {
+ uint32_t offset = (uint32_t)ECX - MSR_MC0_CTL;
+ if ((offset & 0x3) != 0
+ || (val == 0 || val == ~(uint64_t)0))
+ env->mce_banks[offset] = val;
+ break;
+ }
/* XXX: exception ? */
break;
}
@@ -3252,7 +3268,25 @@
/* XXX: exception ? */
val = 0;
break;
+ case MSR_MCG_CAP:
+ val = env->mcg_cap;
+ break;
+ case MSR_MCG_CTL:
+ if (env->mcg_cap & MCG_CTL_P)
+ val = env->mcg_ctl;
+ else
+ val = 0;
+ break;
+ case MSR_MCG_STATUS:
+ val = env->mcg_status;
+ break;
default:
+ if ((uint32_t)ECX >= MSR_MC0_CTL
+ && (uint32_t)ECX < MSR_MC0_CTL + (4 * env->mcg_cap & 0xff)) {
+ uint32_t offset = (uint32_t)ECX - MSR_MC0_CTL;
+ val = env->mce_banks[offset];
+ break;
+ }
/* XXX: exception ? */
val = 0;
break;