blob: 23ae639b23781e3e1ffec4e276e292c78604551f [file] [log] [blame]
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001/*
2 * QEMU Windows Hypervisor Platform accelerator (WHPX)
3 *
4 * Copyright Microsoft Corp. 2017
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 *
9 */
10
11#include "qemu/osdep.h"
12#include "cpu.h"
13#include "exec/address-spaces.h"
Justin Terry (VM)812d49f2018-01-22 13:07:48 -080014#include "exec/ioport.h"
Ivan Shcherbakovd7482ff2022-03-02 17:28:33 -080015#include "exec/gdbstub.h"
Claudio Fontana940e43a2021-02-04 17:39:24 +010016#include "qemu/accel.h"
Justin Terry (VM)812d49f2018-01-22 13:07:48 -080017#include "sysemu/whpx.h"
Justin Terry (VM)812d49f2018-01-22 13:07:48 -080018#include "sysemu/cpus.h"
Markus Armbruster54d31232019-08-12 07:23:59 +020019#include "sysemu/runstate.h"
Justin Terry (VM)812d49f2018-01-22 13:07:48 -080020#include "qemu/main-loop.h"
Philippe Mathieu-Daudé754f2872019-09-20 13:33:27 +020021#include "hw/boards.h"
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +000022#include "hw/i386/ioapic.h"
23#include "hw/i386/apic_internal.h"
Justin Terry (VM)812d49f2018-01-22 13:07:48 -080024#include "qemu/error-report.h"
Justin Terry (VM)812d49f2018-01-22 13:07:48 -080025#include "qapi/error.h"
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +000026#include "qapi/qapi-types-common.h"
27#include "qapi/qapi-visit-common.h"
Justin Terry (VM)812d49f2018-01-22 13:07:48 -080028#include "migration/blocker.h"
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +000029#include <winerror.h>
Justin Terry (VM)812d49f2018-01-22 13:07:48 -080030
Paolo Bonzini9102c962020-12-19 04:06:36 -050031#include "whpx-internal.h"
Claudio Fontanab86f59c2021-02-04 17:39:25 +010032#include "whpx-accel-ops.h"
33
34#include <WinHvPlatform.h>
35#include <WinHvEmulation.h>
Justin Terry (VM)812d49f2018-01-22 13:07:48 -080036
Sunil Muthuswamy5c8e1e82020-07-30 22:11:26 +000037#define HYPERV_APIC_BUS_FREQUENCY (200000000ULL)
38
Justin Terry (VM)812d49f2018-01-22 13:07:48 -080039static const WHV_REGISTER_NAME whpx_register_names[] = {
40
41 /* X64 General purpose registers */
42 WHvX64RegisterRax,
43 WHvX64RegisterRcx,
44 WHvX64RegisterRdx,
45 WHvX64RegisterRbx,
46 WHvX64RegisterRsp,
47 WHvX64RegisterRbp,
48 WHvX64RegisterRsi,
49 WHvX64RegisterRdi,
50 WHvX64RegisterR8,
51 WHvX64RegisterR9,
52 WHvX64RegisterR10,
53 WHvX64RegisterR11,
54 WHvX64RegisterR12,
55 WHvX64RegisterR13,
56 WHvX64RegisterR14,
57 WHvX64RegisterR15,
58 WHvX64RegisterRip,
59 WHvX64RegisterRflags,
60
61 /* X64 Segment registers */
62 WHvX64RegisterEs,
63 WHvX64RegisterCs,
64 WHvX64RegisterSs,
65 WHvX64RegisterDs,
66 WHvX64RegisterFs,
67 WHvX64RegisterGs,
68 WHvX64RegisterLdtr,
69 WHvX64RegisterTr,
70
71 /* X64 Table registers */
72 WHvX64RegisterIdtr,
73 WHvX64RegisterGdtr,
74
75 /* X64 Control Registers */
76 WHvX64RegisterCr0,
77 WHvX64RegisterCr2,
78 WHvX64RegisterCr3,
79 WHvX64RegisterCr4,
80 WHvX64RegisterCr8,
81
82 /* X64 Debug Registers */
83 /*
84 * WHvX64RegisterDr0,
85 * WHvX64RegisterDr1,
86 * WHvX64RegisterDr2,
87 * WHvX64RegisterDr3,
88 * WHvX64RegisterDr6,
89 * WHvX64RegisterDr7,
90 */
91
92 /* X64 Floating Point and Vector Registers */
93 WHvX64RegisterXmm0,
94 WHvX64RegisterXmm1,
95 WHvX64RegisterXmm2,
96 WHvX64RegisterXmm3,
97 WHvX64RegisterXmm4,
98 WHvX64RegisterXmm5,
99 WHvX64RegisterXmm6,
100 WHvX64RegisterXmm7,
101 WHvX64RegisterXmm8,
102 WHvX64RegisterXmm9,
103 WHvX64RegisterXmm10,
104 WHvX64RegisterXmm11,
105 WHvX64RegisterXmm12,
106 WHvX64RegisterXmm13,
107 WHvX64RegisterXmm14,
108 WHvX64RegisterXmm15,
109 WHvX64RegisterFpMmx0,
110 WHvX64RegisterFpMmx1,
111 WHvX64RegisterFpMmx2,
112 WHvX64RegisterFpMmx3,
113 WHvX64RegisterFpMmx4,
114 WHvX64RegisterFpMmx5,
115 WHvX64RegisterFpMmx6,
116 WHvX64RegisterFpMmx7,
117 WHvX64RegisterFpControlStatus,
118 WHvX64RegisterXmmControlStatus,
119
120 /* X64 MSRs */
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800121 WHvX64RegisterEfer,
122#ifdef TARGET_X86_64
123 WHvX64RegisterKernelGsBase,
124#endif
125 WHvX64RegisterApicBase,
126 /* WHvX64RegisterPat, */
127 WHvX64RegisterSysenterCs,
128 WHvX64RegisterSysenterEip,
129 WHvX64RegisterSysenterEsp,
130 WHvX64RegisterStar,
131#ifdef TARGET_X86_64
132 WHvX64RegisterLstar,
133 WHvX64RegisterCstar,
134 WHvX64RegisterSfmask,
135#endif
136
137 /* Interrupt / Event Registers */
138 /*
139 * WHvRegisterPendingInterruption,
140 * WHvRegisterInterruptState,
141 * WHvRegisterPendingEvent0,
142 * WHvRegisterPendingEvent1
143 * WHvX64RegisterDeliverabilityNotifications,
144 */
145};
146
147struct whpx_register_set {
148 WHV_REGISTER_VALUE values[RTL_NUMBER_OF(whpx_register_names)];
149};
150
Ivan Shcherbakovd7482ff2022-03-02 17:28:33 -0800151/*
152 * The current implementation of instruction stepping sets the TF flag
153 * in RFLAGS, causing the CPU to raise an INT1 after each instruction.
154 * This corresponds to the WHvX64ExceptionTypeDebugTrapOrFault exception.
155 *
156 * This approach has a few limitations:
157 * 1. Stepping over a PUSHF/SAHF instruction will save the TF flag
158 * along with the other flags, possibly restoring it later. It would
159 * result in another INT1 when the flags are restored, triggering
160 * a stop in gdb that could be cleared by doing another step.
161 *
162 * Stepping over a POPF/LAHF instruction will let it overwrite the
163 * TF flags, ending the stepping mode.
164 *
165 * 2. Stepping over an instruction raising an exception (e.g. INT, DIV,
166 * or anything that could result in a page fault) will save the flags
167 * to the stack, clear the TF flag, and let the guest execute the
168 * handler. Normally, the guest will restore the original flags,
169 * that will continue single-stepping.
170 *
171 * 3. Debuggers running on the guest may wish to set TF to do instruction
172 * stepping. INT1 events generated by it would be intercepted by us,
173 * as long as the gdb is connected to QEMU.
174 *
175 * In practice this means that:
176 * 1. Stepping through flags-modifying instructions may cause gdb to
177 * continue or stop in unexpected places. This will be fully recoverable
178 * and will not crash the target.
179 *
180 * 2. Stepping over an instruction that triggers an exception will step
181 * over the exception handler, not into it.
182 *
183 * 3. Debugging the guest via gdb, while running debugger on the guest
184 * at the same time may lead to unexpected effects. Removing all
185 * breakpoints set via QEMU will prevent any further interference
186 * with the guest-level debuggers.
187 *
188 * The limitations can be addressed as shown below:
189 * 1. PUSHF/SAHF/POPF/LAHF/IRET instructions can be emulated instead of
190 * stepping through them. The exact semantics of the instructions is
191 * defined in the "Combined Volume Set of Intel 64 and IA-32
192 * Architectures Software Developer's Manuals", however it involves a
193 * fair amount of corner cases due to compatibility with real mode,
194 * virtual 8086 mode, and differences between 64-bit and 32-bit modes.
195 *
196 * 2. We could step into the guest's exception handlers using the following
197 * sequence:
198 * a. Temporarily enable catching of all exception types via
199 * whpx_set_exception_exit_bitmap().
200 * b. Once an exception is intercepted, read the IDT/GDT and locate
201 * the original handler.
202 * c. Patch the original handler, injecting an INT3 at the beginning.
203 * d. Update the exception exit bitmap to only catch the
204 * WHvX64ExceptionTypeBreakpointTrap exception.
205 * e. Let the affected CPU run in the exclusive mode.
206 * f. Restore the original handler and the exception exit bitmap.
207 * Note that handling all corner cases related to IDT/GDT is harder
208 * than it may seem. See x86_cpu_get_phys_page_attrs_debug() for a
209 * rough idea.
210 *
211 * 3. In order to properly support guest-level debugging in parallel with
212 * the QEMU-level debugging, we would need to be able to pass some INT1
213 * events to the guest. This could be done via the following methods:
214 * a. Using the WHvRegisterPendingEvent register. As of Windows 21H1,
215 * it seems to only work for interrupts and not software
216 * exceptions.
217 * b. Locating and patching the original handler by parsing IDT/GDT.
218 * This involves relatively complex logic outlined in the previous
219 * paragraph.
220 * c. Emulating the exception invocation (i.e. manually updating RIP,
221 * RFLAGS, and pushing the old values to stack). This is even more
222 * complicated than the previous option, since it involves checking
223 * CPL, gate attributes, and doing various adjustments depending
224 * on the current CPU mode, whether the CPL is changing, etc.
225 */
226typedef enum WhpxStepMode {
227 WHPX_STEP_NONE = 0,
228 /* Halt other VCPUs */
229 WHPX_STEP_EXCLUSIVE,
230} WhpxStepMode;
231
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800232struct whpx_vcpu {
233 WHV_EMULATOR_HANDLE emulator;
234 bool window_registered;
235 bool interruptable;
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +0000236 bool ready_for_pic_interrupt;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800237 uint64_t tpr;
238 uint64_t apic_base;
Justin Terry (VM)4e286092018-03-14 07:52:43 -0700239 bool interruption_pending;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800240
241 /* Must be the last field as it may have a tail */
242 WHV_RUN_VP_EXIT_CONTEXT exit_ctx;
243};
244
245static bool whpx_allowed;
Lucian Petrut327fccb2018-05-15 20:35:21 +0300246static bool whp_dispatch_initialized;
247static HMODULE hWinHvPlatform, hWinHvEmulation;
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +0000248static uint32_t max_vcpu_index;
Sunil Muthuswamyb6b3da92019-11-07 19:48:32 +0000249static WHV_PROCESSOR_XSAVE_FEATURES whpx_xsave_cap;
250
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800251struct whpx_state whpx_global;
Lucian Petrut327fccb2018-05-15 20:35:21 +0300252struct WHPDispatch whp_dispatch;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800253
Sunil Muthuswamyb6b3da92019-11-07 19:48:32 +0000254static bool whpx_has_xsave(void)
255{
256 return whpx_xsave_cap.XsaveSupport;
257}
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800258
259/*
260 * VP support
261 */
262
263static struct whpx_vcpu *get_whpx_vcpu(CPUState *cpu)
264{
265 return (struct whpx_vcpu *)cpu->hax_vcpu;
266}
267
268static WHV_X64_SEGMENT_REGISTER whpx_seg_q2h(const SegmentCache *qs, int v86,
269 int r86)
270{
271 WHV_X64_SEGMENT_REGISTER hs;
272 unsigned flags = qs->flags;
273
274 hs.Base = qs->base;
275 hs.Limit = qs->limit;
276 hs.Selector = qs->selector;
277
278 if (v86) {
279 hs.Attributes = 0;
280 hs.SegmentType = 3;
281 hs.Present = 1;
282 hs.DescriptorPrivilegeLevel = 3;
283 hs.NonSystemSegment = 1;
284
285 } else {
286 hs.Attributes = (flags >> DESC_TYPE_SHIFT);
287
288 if (r86) {
289 /* hs.Base &= 0xfffff; */
290 }
291 }
292
293 return hs;
294}
295
296static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs)
297{
298 SegmentCache qs;
299
300 qs.base = hs->Base;
301 qs.limit = hs->Limit;
302 qs.selector = hs->Selector;
303
304 qs.flags = ((uint32_t)hs->Attributes) << DESC_TYPE_SHIFT;
305
306 return qs;
307}
308
Sunil Muthuswamyb6b3da92019-11-07 19:48:32 +0000309/* X64 Extended Control Registers */
310static void whpx_set_xcrs(CPUState *cpu)
311{
312 CPUX86State *env = cpu->env_ptr;
313 HRESULT hr;
314 struct whpx_state *whpx = &whpx_global;
315 WHV_REGISTER_VALUE xcr0;
316 WHV_REGISTER_NAME xcr0_name = WHvX64RegisterXCr0;
317
318 if (!whpx_has_xsave()) {
319 return;
320 }
321
322 /* Only xcr0 is supported by the hypervisor currently */
323 xcr0.Reg64 = env->xcr0;
324 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
325 whpx->partition, cpu->cpu_index, &xcr0_name, 1, &xcr0);
326 if (FAILED(hr)) {
327 error_report("WHPX: Failed to set register xcr0, hr=%08lx", hr);
328 }
329}
330
Sunil Muthuswamy6785e762020-02-26 20:54:39 +0000331static int whpx_set_tsc(CPUState *cpu)
332{
Philippe Mathieu-Daudé95e862d2022-03-05 23:35:19 +0100333 CPUX86State *env = cpu->env_ptr;
Sunil Muthuswamy6785e762020-02-26 20:54:39 +0000334 WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
335 WHV_REGISTER_VALUE tsc_val;
336 HRESULT hr;
337 struct whpx_state *whpx = &whpx_global;
338
339 /*
340 * Suspend the partition prior to setting the TSC to reduce the variance
341 * in TSC across vCPUs. When the first vCPU runs post suspend, the
342 * partition is automatically resumed.
343 */
344 if (whp_dispatch.WHvSuspendPartitionTime) {
345
346 /*
347 * Unable to suspend partition while setting TSC is not a fatal
348 * error. It just increases the likelihood of TSC variance between
349 * vCPUs and some guest OS are able to handle that just fine.
350 */
351 hr = whp_dispatch.WHvSuspendPartitionTime(whpx->partition);
352 if (FAILED(hr)) {
353 warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr);
354 }
355 }
356
357 tsc_val.Reg64 = env->tsc;
358 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
359 whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
360 if (FAILED(hr)) {
361 error_report("WHPX: Failed to set TSC, hr=%08lx", hr);
362 return -1;
363 }
364
365 return 0;
366}
367
Ivan Shcherbakov5ad93fd2022-02-22 21:18:00 -0800368/*
369 * The CR8 register in the CPU is mapped to the TPR register of the APIC,
370 * however, they use a slightly different encoding. Specifically:
371 *
372 * APIC.TPR[bits 7:4] = CR8[bits 3:0]
373 *
374 * This mechanism is described in section 10.8.6.1 of Volume 3 of Intel 64
375 * and IA-32 Architectures Software Developer's Manual.
376 */
377
378static uint64_t whpx_apic_tpr_to_cr8(uint64_t tpr)
379{
380 return tpr >> 4;
381}
382
Sunil Muthuswamy6785e762020-02-26 20:54:39 +0000383static void whpx_set_registers(CPUState *cpu, int level)
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800384{
385 struct whpx_state *whpx = &whpx_global;
386 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
Philippe Mathieu-Daudé95e862d2022-03-05 23:35:19 +0100387 CPUX86State *env = cpu->env_ptr;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800388 X86CPU *x86_cpu = X86_CPU(cpu);
Lucian Petrutc3942bf2018-05-15 20:35:22 +0300389 struct whpx_register_set vcxt;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800390 HRESULT hr;
Lucian Petrutc3942bf2018-05-15 20:35:22 +0300391 int idx;
392 int idx_next;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800393 int i;
394 int v86, r86;
395
396 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
397
Sunil Muthuswamy6785e762020-02-26 20:54:39 +0000398 /*
399 * Following MSRs have side effects on the guest or are too heavy for
400 * runtime. Limit them to full state update.
401 */
402 if (level >= WHPX_SET_RESET_STATE) {
403 whpx_set_tsc(cpu);
404 }
405
Lucian Petrutc3942bf2018-05-15 20:35:22 +0300406 memset(&vcxt, 0, sizeof(struct whpx_register_set));
407
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800408 v86 = (env->eflags & VM_MASK);
409 r86 = !(env->cr[0] & CR0_PE_MASK);
410
Ivan Shcherbakov5ad93fd2022-02-22 21:18:00 -0800411 vcpu->tpr = whpx_apic_tpr_to_cr8(cpu_get_apic_tpr(x86_cpu->apic_state));
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800412 vcpu->apic_base = cpu_get_apic_base(x86_cpu->apic_state);
413
Lucian Petrutc3942bf2018-05-15 20:35:22 +0300414 idx = 0;
415
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800416 /* Indexes for first 16 registers match between HV and QEMU definitions */
Lucian Petrutc3942bf2018-05-15 20:35:22 +0300417 idx_next = 16;
418 for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
419 vcxt.values[idx].Reg64 = (uint64_t)env->regs[idx];
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800420 }
Lucian Petrutc3942bf2018-05-15 20:35:22 +0300421 idx = idx_next;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800422
423 /* Same goes for RIP and RFLAGS */
424 assert(whpx_register_names[idx] == WHvX64RegisterRip);
425 vcxt.values[idx++].Reg64 = env->eip;
426
427 assert(whpx_register_names[idx] == WHvX64RegisterRflags);
428 vcxt.values[idx++].Reg64 = env->eflags;
429
430 /* Translate 6+4 segment registers. HV and QEMU order matches */
431 assert(idx == WHvX64RegisterEs);
432 for (i = 0; i < 6; i += 1, idx += 1) {
433 vcxt.values[idx].Segment = whpx_seg_q2h(&env->segs[i], v86, r86);
434 }
435
436 assert(idx == WHvX64RegisterLdtr);
437 vcxt.values[idx++].Segment = whpx_seg_q2h(&env->ldt, 0, 0);
438
439 assert(idx == WHvX64RegisterTr);
440 vcxt.values[idx++].Segment = whpx_seg_q2h(&env->tr, 0, 0);
441
442 assert(idx == WHvX64RegisterIdtr);
443 vcxt.values[idx].Table.Base = env->idt.base;
444 vcxt.values[idx].Table.Limit = env->idt.limit;
445 idx += 1;
446
447 assert(idx == WHvX64RegisterGdtr);
448 vcxt.values[idx].Table.Base = env->gdt.base;
449 vcxt.values[idx].Table.Limit = env->gdt.limit;
450 idx += 1;
451
452 /* CR0, 2, 3, 4, 8 */
453 assert(whpx_register_names[idx] == WHvX64RegisterCr0);
454 vcxt.values[idx++].Reg64 = env->cr[0];
455 assert(whpx_register_names[idx] == WHvX64RegisterCr2);
456 vcxt.values[idx++].Reg64 = env->cr[2];
457 assert(whpx_register_names[idx] == WHvX64RegisterCr3);
458 vcxt.values[idx++].Reg64 = env->cr[3];
459 assert(whpx_register_names[idx] == WHvX64RegisterCr4);
460 vcxt.values[idx++].Reg64 = env->cr[4];
461 assert(whpx_register_names[idx] == WHvX64RegisterCr8);
462 vcxt.values[idx++].Reg64 = vcpu->tpr;
463
464 /* 8 Debug Registers - Skipped */
465
Sunil Muthuswamyb6b3da92019-11-07 19:48:32 +0000466 /*
467 * Extended control registers needs to be handled separately depending
468 * on whether xsave is supported/enabled or not.
469 */
470 whpx_set_xcrs(cpu);
471
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800472 /* 16 XMM registers */
473 assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
Lucian Petrutc3942bf2018-05-15 20:35:22 +0300474 idx_next = idx + 16;
475 for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800476 vcxt.values[idx].Reg128.Low64 = env->xmm_regs[i].ZMM_Q(0);
477 vcxt.values[idx].Reg128.High64 = env->xmm_regs[i].ZMM_Q(1);
478 }
Lucian Petrutc3942bf2018-05-15 20:35:22 +0300479 idx = idx_next;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800480
481 /* 8 FP registers */
482 assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
483 for (i = 0; i < 8; i += 1, idx += 1) {
484 vcxt.values[idx].Fp.AsUINT128.Low64 = env->fpregs[i].mmx.MMX_Q(0);
485 /* vcxt.values[idx].Fp.AsUINT128.High64 =
486 env->fpregs[i].mmx.MMX_Q(1);
487 */
488 }
489
490 /* FP control status register */
491 assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus);
492 vcxt.values[idx].FpControlStatus.FpControl = env->fpuc;
493 vcxt.values[idx].FpControlStatus.FpStatus =
494 (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
495 vcxt.values[idx].FpControlStatus.FpTag = 0;
496 for (i = 0; i < 8; ++i) {
497 vcxt.values[idx].FpControlStatus.FpTag |= (!env->fptags[i]) << i;
498 }
499 vcxt.values[idx].FpControlStatus.Reserved = 0;
500 vcxt.values[idx].FpControlStatus.LastFpOp = env->fpop;
501 vcxt.values[idx].FpControlStatus.LastFpRip = env->fpip;
502 idx += 1;
503
504 /* XMM control status register */
505 assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus);
506 vcxt.values[idx].XmmControlStatus.LastFpRdp = 0;
507 vcxt.values[idx].XmmControlStatus.XmmStatusControl = env->mxcsr;
508 vcxt.values[idx].XmmControlStatus.XmmStatusControlMask = 0x0000ffff;
509 idx += 1;
510
511 /* MSRs */
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800512 assert(whpx_register_names[idx] == WHvX64RegisterEfer);
513 vcxt.values[idx++].Reg64 = env->efer;
514#ifdef TARGET_X86_64
515 assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase);
516 vcxt.values[idx++].Reg64 = env->kernelgsbase;
517#endif
518
519 assert(whpx_register_names[idx] == WHvX64RegisterApicBase);
520 vcxt.values[idx++].Reg64 = vcpu->apic_base;
521
522 /* WHvX64RegisterPat - Skipped */
523
524 assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs);
525 vcxt.values[idx++].Reg64 = env->sysenter_cs;
526 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip);
527 vcxt.values[idx++].Reg64 = env->sysenter_eip;
528 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp);
529 vcxt.values[idx++].Reg64 = env->sysenter_esp;
530 assert(whpx_register_names[idx] == WHvX64RegisterStar);
531 vcxt.values[idx++].Reg64 = env->star;
532#ifdef TARGET_X86_64
533 assert(whpx_register_names[idx] == WHvX64RegisterLstar);
534 vcxt.values[idx++].Reg64 = env->lstar;
535 assert(whpx_register_names[idx] == WHvX64RegisterCstar);
536 vcxt.values[idx++].Reg64 = env->cstar;
537 assert(whpx_register_names[idx] == WHvX64RegisterSfmask);
538 vcxt.values[idx++].Reg64 = env->fmask;
539#endif
540
541 /* Interrupt / Event Registers - Skipped */
542
543 assert(idx == RTL_NUMBER_OF(whpx_register_names));
544
Lucian Petrut327fccb2018-05-15 20:35:21 +0300545 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
546 whpx->partition, cpu->cpu_index,
547 whpx_register_names,
548 RTL_NUMBER_OF(whpx_register_names),
549 &vcxt.values[0]);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800550
551 if (FAILED(hr)) {
552 error_report("WHPX: Failed to set virtual processor context, hr=%08lx",
553 hr);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800554 }
555
556 return;
557}
558
Sunil Muthuswamy6785e762020-02-26 20:54:39 +0000559static int whpx_get_tsc(CPUState *cpu)
560{
Philippe Mathieu-Daudé95e862d2022-03-05 23:35:19 +0100561 CPUX86State *env = cpu->env_ptr;
Sunil Muthuswamy6785e762020-02-26 20:54:39 +0000562 WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
563 WHV_REGISTER_VALUE tsc_val;
564 HRESULT hr;
565 struct whpx_state *whpx = &whpx_global;
566
567 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
568 whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
569 if (FAILED(hr)) {
570 error_report("WHPX: Failed to get TSC, hr=%08lx", hr);
571 return -1;
572 }
573
574 env->tsc = tsc_val.Reg64;
575 return 0;
576}
577
Sunil Muthuswamyb6b3da92019-11-07 19:48:32 +0000578/* X64 Extended Control Registers */
579static void whpx_get_xcrs(CPUState *cpu)
580{
581 CPUX86State *env = cpu->env_ptr;
582 HRESULT hr;
583 struct whpx_state *whpx = &whpx_global;
584 WHV_REGISTER_VALUE xcr0;
585 WHV_REGISTER_NAME xcr0_name = WHvX64RegisterXCr0;
586
587 if (!whpx_has_xsave()) {
588 return;
589 }
590
591 /* Only xcr0 is supported by the hypervisor currently */
592 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
593 whpx->partition, cpu->cpu_index, &xcr0_name, 1, &xcr0);
594 if (FAILED(hr)) {
595 error_report("WHPX: Failed to get register xcr0, hr=%08lx", hr);
596 return;
597 }
598
599 env->xcr0 = xcr0.Reg64;
600}
601
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800602static void whpx_get_registers(CPUState *cpu)
603{
604 struct whpx_state *whpx = &whpx_global;
605 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
Philippe Mathieu-Daudé95e862d2022-03-05 23:35:19 +0100606 CPUX86State *env = cpu->env_ptr;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800607 X86CPU *x86_cpu = X86_CPU(cpu);
608 struct whpx_register_set vcxt;
609 uint64_t tpr, apic_base;
610 HRESULT hr;
Lucian Petrutc3942bf2018-05-15 20:35:22 +0300611 int idx;
612 int idx_next;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800613 int i;
614
615 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
616
Sunil Muthuswamy6785e762020-02-26 20:54:39 +0000617 if (!env->tsc_valid) {
618 whpx_get_tsc(cpu);
619 env->tsc_valid = !runstate_is_running();
620 }
621
Lucian Petrut327fccb2018-05-15 20:35:21 +0300622 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
623 whpx->partition, cpu->cpu_index,
624 whpx_register_names,
625 RTL_NUMBER_OF(whpx_register_names),
626 &vcxt.values[0]);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800627 if (FAILED(hr)) {
628 error_report("WHPX: Failed to get virtual processor context, hr=%08lx",
629 hr);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800630 }
631
Ivan Shcherbakov5ad93fd2022-02-22 21:18:00 -0800632 if (whpx_apic_in_platform()) {
633 /*
634 * Fetch the TPR value from the emulated APIC. It may get overwritten
635 * below with the value from CR8 returned by
636 * WHvGetVirtualProcessorRegisters().
637 */
638 whpx_apic_get(x86_cpu->apic_state);
639 vcpu->tpr = whpx_apic_tpr_to_cr8(
640 cpu_get_apic_tpr(x86_cpu->apic_state));
641 }
642
Lucian Petrutc3942bf2018-05-15 20:35:22 +0300643 idx = 0;
644
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800645 /* Indexes for first 16 registers match between HV and QEMU definitions */
Lucian Petrutc3942bf2018-05-15 20:35:22 +0300646 idx_next = 16;
647 for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800648 env->regs[idx] = vcxt.values[idx].Reg64;
649 }
Lucian Petrutc3942bf2018-05-15 20:35:22 +0300650 idx = idx_next;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800651
652 /* Same goes for RIP and RFLAGS */
653 assert(whpx_register_names[idx] == WHvX64RegisterRip);
654 env->eip = vcxt.values[idx++].Reg64;
655 assert(whpx_register_names[idx] == WHvX64RegisterRflags);
656 env->eflags = vcxt.values[idx++].Reg64;
657
658 /* Translate 6+4 segment registers. HV and QEMU order matches */
659 assert(idx == WHvX64RegisterEs);
660 for (i = 0; i < 6; i += 1, idx += 1) {
661 env->segs[i] = whpx_seg_h2q(&vcxt.values[idx].Segment);
662 }
663
664 assert(idx == WHvX64RegisterLdtr);
665 env->ldt = whpx_seg_h2q(&vcxt.values[idx++].Segment);
666 assert(idx == WHvX64RegisterTr);
667 env->tr = whpx_seg_h2q(&vcxt.values[idx++].Segment);
668 assert(idx == WHvX64RegisterIdtr);
669 env->idt.base = vcxt.values[idx].Table.Base;
670 env->idt.limit = vcxt.values[idx].Table.Limit;
671 idx += 1;
672 assert(idx == WHvX64RegisterGdtr);
673 env->gdt.base = vcxt.values[idx].Table.Base;
674 env->gdt.limit = vcxt.values[idx].Table.Limit;
675 idx += 1;
676
677 /* CR0, 2, 3, 4, 8 */
678 assert(whpx_register_names[idx] == WHvX64RegisterCr0);
679 env->cr[0] = vcxt.values[idx++].Reg64;
680 assert(whpx_register_names[idx] == WHvX64RegisterCr2);
681 env->cr[2] = vcxt.values[idx++].Reg64;
682 assert(whpx_register_names[idx] == WHvX64RegisterCr3);
683 env->cr[3] = vcxt.values[idx++].Reg64;
684 assert(whpx_register_names[idx] == WHvX64RegisterCr4);
685 env->cr[4] = vcxt.values[idx++].Reg64;
686 assert(whpx_register_names[idx] == WHvX64RegisterCr8);
687 tpr = vcxt.values[idx++].Reg64;
688 if (tpr != vcpu->tpr) {
689 vcpu->tpr = tpr;
690 cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
691 }
692
693 /* 8 Debug Registers - Skipped */
694
Sunil Muthuswamyb6b3da92019-11-07 19:48:32 +0000695 /*
696 * Extended control registers needs to be handled separately depending
697 * on whether xsave is supported/enabled or not.
698 */
699 whpx_get_xcrs(cpu);
700
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800701 /* 16 XMM registers */
702 assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
Lucian Petrutc3942bf2018-05-15 20:35:22 +0300703 idx_next = idx + 16;
704 for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800705 env->xmm_regs[i].ZMM_Q(0) = vcxt.values[idx].Reg128.Low64;
706 env->xmm_regs[i].ZMM_Q(1) = vcxt.values[idx].Reg128.High64;
707 }
Lucian Petrutc3942bf2018-05-15 20:35:22 +0300708 idx = idx_next;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800709
710 /* 8 FP registers */
711 assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
712 for (i = 0; i < 8; i += 1, idx += 1) {
713 env->fpregs[i].mmx.MMX_Q(0) = vcxt.values[idx].Fp.AsUINT128.Low64;
714 /* env->fpregs[i].mmx.MMX_Q(1) =
715 vcxt.values[idx].Fp.AsUINT128.High64;
716 */
717 }
718
719 /* FP control status register */
720 assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus);
721 env->fpuc = vcxt.values[idx].FpControlStatus.FpControl;
722 env->fpstt = (vcxt.values[idx].FpControlStatus.FpStatus >> 11) & 0x7;
723 env->fpus = vcxt.values[idx].FpControlStatus.FpStatus & ~0x3800;
724 for (i = 0; i < 8; ++i) {
725 env->fptags[i] = !((vcxt.values[idx].FpControlStatus.FpTag >> i) & 1);
726 }
727 env->fpop = vcxt.values[idx].FpControlStatus.LastFpOp;
728 env->fpip = vcxt.values[idx].FpControlStatus.LastFpRip;
729 idx += 1;
730
731 /* XMM control status register */
732 assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus);
733 env->mxcsr = vcxt.values[idx].XmmControlStatus.XmmStatusControl;
734 idx += 1;
735
736 /* MSRs */
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800737 assert(whpx_register_names[idx] == WHvX64RegisterEfer);
738 env->efer = vcxt.values[idx++].Reg64;
739#ifdef TARGET_X86_64
740 assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase);
741 env->kernelgsbase = vcxt.values[idx++].Reg64;
742#endif
743
744 assert(whpx_register_names[idx] == WHvX64RegisterApicBase);
745 apic_base = vcxt.values[idx++].Reg64;
746 if (apic_base != vcpu->apic_base) {
747 vcpu->apic_base = apic_base;
748 cpu_set_apic_base(x86_cpu->apic_state, vcpu->apic_base);
749 }
750
751 /* WHvX64RegisterPat - Skipped */
752
753 assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs);
Philippe Mathieu-Daudé7c98f0f2020-02-18 10:44:00 +0100754 env->sysenter_cs = vcxt.values[idx++].Reg64;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800755 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip);
756 env->sysenter_eip = vcxt.values[idx++].Reg64;
757 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp);
758 env->sysenter_esp = vcxt.values[idx++].Reg64;
759 assert(whpx_register_names[idx] == WHvX64RegisterStar);
760 env->star = vcxt.values[idx++].Reg64;
761#ifdef TARGET_X86_64
762 assert(whpx_register_names[idx] == WHvX64RegisterLstar);
763 env->lstar = vcxt.values[idx++].Reg64;
764 assert(whpx_register_names[idx] == WHvX64RegisterCstar);
765 env->cstar = vcxt.values[idx++].Reg64;
766 assert(whpx_register_names[idx] == WHvX64RegisterSfmask);
767 env->fmask = vcxt.values[idx++].Reg64;
768#endif
769
770 /* Interrupt / Event Registers - Skipped */
771
772 assert(idx == RTL_NUMBER_OF(whpx_register_names));
773
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +0000774 if (whpx_apic_in_platform()) {
775 whpx_apic_get(x86_cpu->apic_state);
776 }
777
Ivan Shcherbakove5618902022-02-22 21:15:41 -0800778 x86_update_hflags(env);
779
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800780 return;
781}
782
783static HRESULT CALLBACK whpx_emu_ioport_callback(
784 void *ctx,
785 WHV_EMULATOR_IO_ACCESS_INFO *IoAccess)
786{
787 MemTxAttrs attrs = { 0 };
788 address_space_rw(&address_space_io, IoAccess->Port, attrs,
Philippe Mathieu-Daudéb7cbebf2020-02-19 20:28:22 +0100789 &IoAccess->Data, IoAccess->AccessSize,
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800790 IoAccess->Direction);
791 return S_OK;
792}
793
Justin Terry (VM) via Qemu-develf875f042018-02-26 09:13:33 -0800794static HRESULT CALLBACK whpx_emu_mmio_callback(
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800795 void *ctx,
796 WHV_EMULATOR_MEMORY_ACCESS_INFO *ma)
797{
798 cpu_physical_memory_rw(ma->GpaAddress, ma->Data, ma->AccessSize,
799 ma->Direction);
800 return S_OK;
801}
802
803static HRESULT CALLBACK whpx_emu_getreg_callback(
804 void *ctx,
805 const WHV_REGISTER_NAME *RegisterNames,
806 UINT32 RegisterCount,
807 WHV_REGISTER_VALUE *RegisterValues)
808{
809 HRESULT hr;
810 struct whpx_state *whpx = &whpx_global;
811 CPUState *cpu = (CPUState *)ctx;
812
Lucian Petrut327fccb2018-05-15 20:35:21 +0300813 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
814 whpx->partition, cpu->cpu_index,
815 RegisterNames, RegisterCount,
816 RegisterValues);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800817 if (FAILED(hr)) {
818 error_report("WHPX: Failed to get virtual processor registers,"
819 " hr=%08lx", hr);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800820 }
821
822 return hr;
823}
824
825static HRESULT CALLBACK whpx_emu_setreg_callback(
826 void *ctx,
827 const WHV_REGISTER_NAME *RegisterNames,
828 UINT32 RegisterCount,
829 const WHV_REGISTER_VALUE *RegisterValues)
830{
831 HRESULT hr;
832 struct whpx_state *whpx = &whpx_global;
833 CPUState *cpu = (CPUState *)ctx;
834
Lucian Petrut327fccb2018-05-15 20:35:21 +0300835 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
836 whpx->partition, cpu->cpu_index,
837 RegisterNames, RegisterCount,
838 RegisterValues);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800839 if (FAILED(hr)) {
840 error_report("WHPX: Failed to set virtual processor registers,"
841 " hr=%08lx", hr);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800842 }
843
844 /*
845 * The emulator just successfully wrote the register state. We clear the
846 * dirty state so we avoid the double write on resume of the VP.
847 */
848 cpu->vcpu_dirty = false;
849
850 return hr;
851}
852
853static HRESULT CALLBACK whpx_emu_translate_callback(
854 void *ctx,
855 WHV_GUEST_VIRTUAL_ADDRESS Gva,
856 WHV_TRANSLATE_GVA_FLAGS TranslateFlags,
857 WHV_TRANSLATE_GVA_RESULT_CODE *TranslationResult,
858 WHV_GUEST_PHYSICAL_ADDRESS *Gpa)
859{
860 HRESULT hr;
861 struct whpx_state *whpx = &whpx_global;
862 CPUState *cpu = (CPUState *)ctx;
863 WHV_TRANSLATE_GVA_RESULT res;
864
Lucian Petrut327fccb2018-05-15 20:35:21 +0300865 hr = whp_dispatch.WHvTranslateGva(whpx->partition, cpu->cpu_index,
866 Gva, TranslateFlags, &res, Gpa);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800867 if (FAILED(hr)) {
868 error_report("WHPX: Failed to translate GVA, hr=%08lx", hr);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800869 } else {
870 *TranslationResult = res.ResultCode;
871 }
872
873 return hr;
874}
875
876static const WHV_EMULATOR_CALLBACKS whpx_emu_callbacks = {
Justin Terry (VM) via Qemu-devel914e2ab2018-02-26 09:13:30 -0800877 .Size = sizeof(WHV_EMULATOR_CALLBACKS),
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800878 .WHvEmulatorIoPortCallback = whpx_emu_ioport_callback,
Justin Terry (VM) via Qemu-develf875f042018-02-26 09:13:33 -0800879 .WHvEmulatorMemoryCallback = whpx_emu_mmio_callback,
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800880 .WHvEmulatorGetVirtualProcessorRegisters = whpx_emu_getreg_callback,
881 .WHvEmulatorSetVirtualProcessorRegisters = whpx_emu_setreg_callback,
882 .WHvEmulatorTranslateGvaPage = whpx_emu_translate_callback,
883};
884
885static int whpx_handle_mmio(CPUState *cpu, WHV_MEMORY_ACCESS_CONTEXT *ctx)
886{
887 HRESULT hr;
888 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
889 WHV_EMULATOR_STATUS emu_status;
890
Lucian Petrut327fccb2018-05-15 20:35:21 +0300891 hr = whp_dispatch.WHvEmulatorTryMmioEmulation(
892 vcpu->emulator, cpu,
893 &vcpu->exit_ctx.VpContext, ctx,
894 &emu_status);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800895 if (FAILED(hr)) {
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800896 error_report("WHPX: Failed to parse MMIO access, hr=%08lx", hr);
897 return -1;
898 }
899
900 if (!emu_status.EmulationSuccessful) {
Lucian Petrut327fccb2018-05-15 20:35:21 +0300901 error_report("WHPX: Failed to emulate MMIO access with"
902 " EmulatorReturnStatus: %u", emu_status.AsUINT32);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800903 return -1;
904 }
905
906 return 0;
907}
908
909static int whpx_handle_portio(CPUState *cpu,
910 WHV_X64_IO_PORT_ACCESS_CONTEXT *ctx)
911{
912 HRESULT hr;
913 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
914 WHV_EMULATOR_STATUS emu_status;
915
Lucian Petrut327fccb2018-05-15 20:35:21 +0300916 hr = whp_dispatch.WHvEmulatorTryIoEmulation(
917 vcpu->emulator, cpu,
918 &vcpu->exit_ctx.VpContext, ctx,
919 &emu_status);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800920 if (FAILED(hr)) {
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800921 error_report("WHPX: Failed to parse PortIO access, hr=%08lx", hr);
922 return -1;
923 }
924
925 if (!emu_status.EmulationSuccessful) {
Lucian Petrut327fccb2018-05-15 20:35:21 +0300926 error_report("WHPX: Failed to emulate PortIO access with"
927 " EmulatorReturnStatus: %u", emu_status.AsUINT32);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -0800928 return -1;
929 }
930
931 return 0;
932}
933
Ivan Shcherbakovd7482ff2022-03-02 17:28:33 -0800934/*
935 * Controls whether we should intercept various exceptions on the guest,
936 * namely breakpoint/single-step events.
937 *
938 * The 'exceptions' argument accepts a bitmask, e.g:
939 * (1 << WHvX64ExceptionTypeDebugTrapOrFault) | (...)
940 */
941static HRESULT whpx_set_exception_exit_bitmap(UINT64 exceptions)
942{
943 struct whpx_state *whpx = &whpx_global;
944 WHV_PARTITION_PROPERTY prop = { 0, };
945 HRESULT hr;
946
947 if (exceptions == whpx->exception_exit_bitmap) {
948 return S_OK;
949 }
950
951 prop.ExceptionExitBitmap = exceptions;
952
953 hr = whp_dispatch.WHvSetPartitionProperty(
954 whpx->partition,
955 WHvPartitionPropertyCodeExceptionExitBitmap,
956 &prop,
957 sizeof(WHV_PARTITION_PROPERTY));
958
959 if (SUCCEEDED(hr)) {
960 whpx->exception_exit_bitmap = exceptions;
961 }
962
963 return hr;
964}
965
966
967/*
968 * This function is called before/after stepping over a single instruction.
969 * It will update the CPU registers to arm/disarm the instruction stepping
970 * accordingly.
971 */
972static HRESULT whpx_vcpu_configure_single_stepping(CPUState *cpu,
973 bool set,
974 uint64_t *exit_context_rflags)
975{
976 WHV_REGISTER_NAME reg_name;
977 WHV_REGISTER_VALUE reg_value;
978 HRESULT hr;
979 struct whpx_state *whpx = &whpx_global;
980
981 /*
982 * If we are trying to step over a single instruction, we need to set the
983 * TF bit in rflags. Otherwise, clear it.
984 */
985 reg_name = WHvX64RegisterRflags;
986 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
987 whpx->partition,
988 cpu->cpu_index,
989 &reg_name,
990 1,
991 &reg_value);
992
993 if (FAILED(hr)) {
994 error_report("WHPX: Failed to get rflags, hr=%08lx", hr);
995 return hr;
996 }
997
998 if (exit_context_rflags) {
999 assert(*exit_context_rflags == reg_value.Reg64);
1000 }
1001
1002 if (set) {
1003 /* Raise WHvX64ExceptionTypeDebugTrapOrFault after each instruction */
1004 reg_value.Reg64 |= TF_MASK;
1005 } else {
1006 reg_value.Reg64 &= ~TF_MASK;
1007 }
1008
1009 if (exit_context_rflags) {
1010 *exit_context_rflags = reg_value.Reg64;
1011 }
1012
1013 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1014 whpx->partition,
1015 cpu->cpu_index,
1016 &reg_name,
1017 1,
1018 &reg_value);
1019
1020 if (FAILED(hr)) {
1021 error_report("WHPX: Failed to set rflags,"
1022 " hr=%08lx",
1023 hr);
1024 return hr;
1025 }
1026
1027 reg_name = WHvRegisterInterruptState;
1028 reg_value.Reg64 = 0;
1029
1030 /* Suspend delivery of hardware interrupts during single-stepping. */
1031 reg_value.InterruptState.InterruptShadow = set != 0;
1032
1033 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1034 whpx->partition,
1035 cpu->cpu_index,
1036 &reg_name,
1037 1,
1038 &reg_value);
1039
1040 if (FAILED(hr)) {
1041 error_report("WHPX: Failed to set InterruptState,"
1042 " hr=%08lx",
1043 hr);
1044 return hr;
1045 }
1046
1047 if (!set) {
1048 /*
1049 * We have just finished stepping over a single instruction,
1050 * and intercepted the INT1 generated by it.
1051 * We need to now hide the INT1 from the guest,
1052 * as it would not be expecting it.
1053 */
1054
1055 reg_name = WHvX64RegisterPendingDebugException;
1056 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
1057 whpx->partition,
1058 cpu->cpu_index,
1059 &reg_name,
1060 1,
1061 &reg_value);
1062
1063 if (FAILED(hr)) {
1064 error_report("WHPX: Failed to get pending debug exceptions,"
1065 "hr=%08lx", hr);
1066 return hr;
1067 }
1068
1069 if (reg_value.PendingDebugException.SingleStep) {
1070 reg_value.PendingDebugException.SingleStep = 0;
1071
1072 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1073 whpx->partition,
1074 cpu->cpu_index,
1075 &reg_name,
1076 1,
1077 &reg_value);
1078
1079 if (FAILED(hr)) {
1080 error_report("WHPX: Failed to clear pending debug exceptions,"
1081 "hr=%08lx", hr);
1082 return hr;
1083 }
1084 }
1085
1086 }
1087
1088 return S_OK;
1089}
1090
1091/* Tries to find a breakpoint at the specified address. */
1092static struct whpx_breakpoint *whpx_lookup_breakpoint_by_addr(uint64_t address)
1093{
1094 struct whpx_state *whpx = &whpx_global;
1095 int i;
1096
1097 if (whpx->breakpoints.breakpoints) {
1098 for (i = 0; i < whpx->breakpoints.breakpoints->used; i++) {
1099 if (address == whpx->breakpoints.breakpoints->data[i].address) {
1100 return &whpx->breakpoints.breakpoints->data[i];
1101 }
1102 }
1103 }
1104
1105 return NULL;
1106}
1107
1108/*
1109 * Linux uses int3 (0xCC) during startup (see int3_selftest()) and for
1110 * debugging user-mode applications. Since the WHPX API does not offer
1111 * an easy way to pass the intercepted exception back to the guest, we
1112 * resort to using INT1 instead, and let the guest always handle INT3.
1113 */
1114static const uint8_t whpx_breakpoint_instruction = 0xF1;
1115
1116/*
1117 * The WHPX QEMU backend implements breakpoints by writing the INT1
1118 * instruction into memory (ignoring the DRx registers). This raises a few
1119 * issues that need to be carefully handled:
1120 *
1121 * 1. Although unlikely, other parts of QEMU may set multiple breakpoints
1122 * at the same location, and later remove them in arbitrary order.
1123 * This should not cause memory corruption, and should only remove the
1124 * physical breakpoint instruction when the last QEMU breakpoint is gone.
1125 *
1126 * 2. Writing arbitrary virtual memory may fail if it's not mapped to a valid
1127 * physical location. Hence, physically adding/removing a breakpoint can
1128 * theoretically fail at any time. We need to keep track of it.
1129 *
1130 * The function below rebuilds a list of low-level breakpoints (one per
1131 * address, tracking the original instruction and any errors) from the list of
1132 * high-level breakpoints (set via cpu_breakpoint_insert()).
1133 *
1134 * In order to optimize performance, this function stores the list of
1135 * high-level breakpoints (a.k.a. CPU breakpoints) used to compute the
1136 * low-level ones, so that it won't be re-invoked until these breakpoints
1137 * change.
1138 *
1139 * Note that this function decides which breakpoints should be inserted into,
1140 * memory, but doesn't actually do it. The memory accessing is done in
1141 * whpx_apply_breakpoints().
1142 */
1143static void whpx_translate_cpu_breakpoints(
1144 struct whpx_breakpoints *breakpoints,
1145 CPUState *cpu,
1146 int cpu_breakpoint_count)
1147{
1148 CPUBreakpoint *bp;
1149 int cpu_bp_index = 0;
1150
1151 breakpoints->original_addresses =
1152 g_renew(vaddr, breakpoints->original_addresses, cpu_breakpoint_count);
1153
1154 breakpoints->original_address_count = cpu_breakpoint_count;
1155
1156 int max_breakpoints = cpu_breakpoint_count +
1157 (breakpoints->breakpoints ? breakpoints->breakpoints->used : 0);
1158
1159 struct whpx_breakpoint_collection *new_breakpoints =
1160 (struct whpx_breakpoint_collection *)g_malloc0(
1161 sizeof(struct whpx_breakpoint_collection) +
1162 max_breakpoints * sizeof(struct whpx_breakpoint));
1163
1164 new_breakpoints->allocated = max_breakpoints;
1165 new_breakpoints->used = 0;
1166
1167 /*
1168 * 1. Preserve all old breakpoints that could not be automatically
1169 * cleared when the CPU got stopped.
1170 */
1171 if (breakpoints->breakpoints) {
1172 int i;
1173 for (i = 0; i < breakpoints->breakpoints->used; i++) {
1174 if (breakpoints->breakpoints->data[i].state != WHPX_BP_CLEARED) {
1175 new_breakpoints->data[new_breakpoints->used++] =
1176 breakpoints->breakpoints->data[i];
1177 }
1178 }
1179 }
1180
1181 /* 2. Map all CPU breakpoints to WHPX breakpoints */
1182 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
1183 int i;
1184 bool found = false;
1185
1186 /* This will be used to detect changed CPU breakpoints later. */
1187 breakpoints->original_addresses[cpu_bp_index++] = bp->pc;
1188
1189 for (i = 0; i < new_breakpoints->used; i++) {
1190 /*
1191 * WARNING: This loop has O(N^2) complexity, where N is the
1192 * number of breakpoints. It should not be a bottleneck in
1193 * real-world scenarios, since it only needs to run once after
1194 * the breakpoints have been modified.
1195 * If this ever becomes a concern, it can be optimized by storing
1196 * high-level breakpoint objects in a tree or hash map.
1197 */
1198
1199 if (new_breakpoints->data[i].address == bp->pc) {
1200 /* There was already a breakpoint at this address. */
1201 if (new_breakpoints->data[i].state == WHPX_BP_CLEAR_PENDING) {
1202 new_breakpoints->data[i].state = WHPX_BP_SET;
1203 } else if (new_breakpoints->data[i].state == WHPX_BP_SET) {
1204 new_breakpoints->data[i].state = WHPX_BP_SET_PENDING;
1205 }
1206
1207 found = true;
1208 break;
1209 }
1210 }
1211
1212 if (!found && new_breakpoints->used < new_breakpoints->allocated) {
1213 /* No WHPX breakpoint at this address. Create one. */
1214 new_breakpoints->data[new_breakpoints->used].address = bp->pc;
1215 new_breakpoints->data[new_breakpoints->used].state =
1216 WHPX_BP_SET_PENDING;
1217 new_breakpoints->used++;
1218 }
1219 }
1220
1221 if (breakpoints->breakpoints) {
1222 /*
1223 * Free the previous breakpoint list. This can be optimized by keeping
1224 * it as shadow buffer for the next computation instead of freeing
1225 * it immediately.
1226 */
1227 g_free(breakpoints->breakpoints);
1228 }
1229
1230 breakpoints->breakpoints = new_breakpoints;
1231}
1232
1233/*
1234 * Physically inserts/removes the breakpoints by reading and writing the
1235 * physical memory, keeping a track of the failed attempts.
1236 *
1237 * Passing resuming=true will try to set all previously unset breakpoints.
1238 * Passing resuming=false will remove all inserted ones.
1239 */
1240static void whpx_apply_breakpoints(
1241 struct whpx_breakpoint_collection *breakpoints,
1242 CPUState *cpu,
1243 bool resuming)
1244{
1245 int i, rc;
1246 if (!breakpoints) {
1247 return;
1248 }
1249
1250 for (i = 0; i < breakpoints->used; i++) {
1251 /* Decide what to do right now based on the last known state. */
1252 WhpxBreakpointState state = breakpoints->data[i].state;
1253 switch (state) {
1254 case WHPX_BP_CLEARED:
1255 if (resuming) {
1256 state = WHPX_BP_SET_PENDING;
1257 }
1258 break;
1259 case WHPX_BP_SET_PENDING:
1260 if (!resuming) {
1261 state = WHPX_BP_CLEARED;
1262 }
1263 break;
1264 case WHPX_BP_SET:
1265 if (!resuming) {
1266 state = WHPX_BP_CLEAR_PENDING;
1267 }
1268 break;
1269 case WHPX_BP_CLEAR_PENDING:
1270 if (resuming) {
1271 state = WHPX_BP_SET;
1272 }
1273 break;
1274 }
1275
1276 if (state == WHPX_BP_SET_PENDING) {
1277 /* Remember the original instruction. */
1278 rc = cpu_memory_rw_debug(cpu,
1279 breakpoints->data[i].address,
1280 &breakpoints->data[i].original_instruction,
1281 1,
1282 false);
1283
1284 if (!rc) {
1285 /* Write the breakpoint instruction. */
1286 rc = cpu_memory_rw_debug(cpu,
1287 breakpoints->data[i].address,
1288 (void *)&whpx_breakpoint_instruction,
1289 1,
1290 true);
1291 }
1292
1293 if (!rc) {
1294 state = WHPX_BP_SET;
1295 }
1296
1297 }
1298
1299 if (state == WHPX_BP_CLEAR_PENDING) {
1300 /* Restore the original instruction. */
1301 rc = cpu_memory_rw_debug(cpu,
1302 breakpoints->data[i].address,
1303 &breakpoints->data[i].original_instruction,
1304 1,
1305 true);
1306
1307 if (!rc) {
1308 state = WHPX_BP_CLEARED;
1309 }
1310 }
1311
1312 breakpoints->data[i].state = state;
1313 }
1314}
1315
1316/*
1317 * This function is called when the a VCPU is about to start and no other
1318 * VCPUs have been started so far. Since the VCPU start order could be
1319 * arbitrary, it doesn't have to be VCPU#0.
1320 *
1321 * It is used to commit the breakpoints into memory, and configure WHPX
1322 * to intercept debug exceptions.
1323 *
1324 * Note that whpx_set_exception_exit_bitmap() cannot be called if one or
1325 * more VCPUs are already running, so this is the best place to do it.
1326 */
1327static int whpx_first_vcpu_starting(CPUState *cpu)
1328{
1329 struct whpx_state *whpx = &whpx_global;
1330 HRESULT hr;
1331
1332 g_assert(qemu_mutex_iothread_locked());
1333
1334 if (!QTAILQ_EMPTY(&cpu->breakpoints) ||
1335 (whpx->breakpoints.breakpoints &&
1336 whpx->breakpoints.breakpoints->used)) {
1337 CPUBreakpoint *bp;
1338 int i = 0;
1339 bool update_pending = false;
1340
1341 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
1342 if (i >= whpx->breakpoints.original_address_count ||
1343 bp->pc != whpx->breakpoints.original_addresses[i]) {
1344 update_pending = true;
1345 }
1346
1347 i++;
1348 }
1349
1350 if (i != whpx->breakpoints.original_address_count) {
1351 update_pending = true;
1352 }
1353
1354 if (update_pending) {
1355 /*
1356 * The CPU breakpoints have changed since the last call to
1357 * whpx_translate_cpu_breakpoints(). WHPX breakpoints must
1358 * now be recomputed.
1359 */
1360 whpx_translate_cpu_breakpoints(&whpx->breakpoints, cpu, i);
1361 }
1362
1363 /* Actually insert the breakpoints into the memory. */
1364 whpx_apply_breakpoints(whpx->breakpoints.breakpoints, cpu, true);
1365 }
1366
1367 uint64_t exception_mask;
1368 if (whpx->step_pending ||
1369 (whpx->breakpoints.breakpoints &&
1370 whpx->breakpoints.breakpoints->used)) {
1371 /*
1372 * We are either attempting to single-step one or more CPUs, or
1373 * have one or more breakpoints enabled. Both require intercepting
1374 * the WHvX64ExceptionTypeBreakpointTrap exception.
1375 */
1376
1377 exception_mask = 1UL << WHvX64ExceptionTypeDebugTrapOrFault;
1378 } else {
1379 /* Let the guest handle all exceptions. */
1380 exception_mask = 0;
1381 }
1382
1383 hr = whpx_set_exception_exit_bitmap(exception_mask);
1384 if (!SUCCEEDED(hr)) {
1385 error_report("WHPX: Failed to update exception exit mask,"
1386 "hr=%08lx.", hr);
1387 return 1;
1388 }
1389
1390 return 0;
1391}
1392
1393/*
1394 * This function is called when the last VCPU has finished running.
1395 * It is used to remove any previously set breakpoints from memory.
1396 */
1397static int whpx_last_vcpu_stopping(CPUState *cpu)
1398{
1399 whpx_apply_breakpoints(whpx_global.breakpoints.breakpoints, cpu, false);
1400 return 0;
1401}
1402
1403/* Returns the address of the next instruction that is about to be executed. */
1404static vaddr whpx_vcpu_get_pc(CPUState *cpu, bool exit_context_valid)
1405{
1406 if (cpu->vcpu_dirty) {
1407 /* The CPU registers have been modified by other parts of QEMU. */
1408 CPUArchState *env = (CPUArchState *)(cpu->env_ptr);
1409 return env->eip;
1410 } else if (exit_context_valid) {
1411 /*
1412 * The CPU registers have not been modified by neither other parts
1413 * of QEMU, nor this port by calling WHvSetVirtualProcessorRegisters().
1414 * This is the most common case.
1415 */
1416 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
1417 return vcpu->exit_ctx.VpContext.Rip;
1418 } else {
1419 /*
1420 * The CPU registers have been modified by a call to
1421 * WHvSetVirtualProcessorRegisters() and must be re-queried from
1422 * the target.
1423 */
1424 WHV_REGISTER_VALUE reg_value;
1425 WHV_REGISTER_NAME reg_name = WHvX64RegisterRip;
1426 HRESULT hr;
1427 struct whpx_state *whpx = &whpx_global;
1428
1429 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
1430 whpx->partition,
1431 cpu->cpu_index,
1432 &reg_name,
1433 1,
1434 &reg_value);
1435
1436 if (FAILED(hr)) {
1437 error_report("WHPX: Failed to get PC, hr=%08lx", hr);
1438 return 0;
1439 }
1440
1441 return reg_value.Reg64;
1442 }
1443}
1444
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001445static int whpx_handle_halt(CPUState *cpu)
1446{
Philippe Mathieu-Daudé95e862d2022-03-05 23:35:19 +01001447 CPUX86State *env = cpu->env_ptr;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001448 int ret = 0;
1449
1450 qemu_mutex_lock_iothread();
1451 if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
1452 (env->eflags & IF_MASK)) &&
1453 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
1454 cpu->exception_index = EXCP_HLT;
1455 cpu->halted = true;
1456 ret = 1;
1457 }
1458 qemu_mutex_unlock_iothread();
1459
1460 return ret;
1461}
1462
1463static void whpx_vcpu_pre_run(CPUState *cpu)
1464{
1465 HRESULT hr;
1466 struct whpx_state *whpx = &whpx_global;
1467 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
Philippe Mathieu-Daudé95e862d2022-03-05 23:35:19 +01001468 CPUX86State *env = cpu->env_ptr;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001469 X86CPU *x86_cpu = X86_CPU(cpu);
1470 int irq;
Justin Terry (VM) via Qemu-devel2bf3e742018-02-26 09:13:34 -08001471 uint8_t tpr;
Lucian Petrutc3942bf2018-05-15 20:35:22 +03001472 WHV_X64_PENDING_INTERRUPTION_REGISTER new_int;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001473 UINT32 reg_count = 0;
Lucian Petrutc3942bf2018-05-15 20:35:22 +03001474 WHV_REGISTER_VALUE reg_values[3];
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001475 WHV_REGISTER_NAME reg_names[3];
1476
Lucian Petrutc3942bf2018-05-15 20:35:22 +03001477 memset(&new_int, 0, sizeof(new_int));
1478 memset(reg_values, 0, sizeof(reg_values));
1479
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001480 qemu_mutex_lock_iothread();
1481
1482 /* Inject NMI */
Justin Terry (VM)4e286092018-03-14 07:52:43 -07001483 if (!vcpu->interruption_pending &&
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001484 cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) {
1485 if (cpu->interrupt_request & CPU_INTERRUPT_NMI) {
1486 cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
1487 vcpu->interruptable = false;
1488 new_int.InterruptionType = WHvX64PendingNmi;
1489 new_int.InterruptionPending = 1;
1490 new_int.InterruptionVector = 2;
1491 }
1492 if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001493 cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001494 }
1495 }
1496
1497 /*
1498 * Force the VCPU out of its inner loop to process any INIT requests or
1499 * commit pending TPR access.
1500 */
1501 if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
1502 if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
1503 !(env->hflags & HF_SMM_MASK)) {
1504 cpu->exit_request = 1;
1505 }
1506 if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
1507 cpu->exit_request = 1;
1508 }
1509 }
1510
1511 /* Get pending hard interruption or replay one that was overwritten */
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +00001512 if (!whpx_apic_in_platform()) {
1513 if (!vcpu->interruption_pending &&
1514 vcpu->interruptable && (env->eflags & IF_MASK)) {
1515 assert(!new_int.InterruptionPending);
1516 if (cpu->interrupt_request & CPU_INTERRUPT_HARD) {
1517 cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
1518 irq = cpu_get_pic_interrupt(env);
1519 if (irq >= 0) {
1520 new_int.InterruptionType = WHvX64PendingInterrupt;
1521 new_int.InterruptionPending = 1;
1522 new_int.InterruptionVector = irq;
1523 }
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001524 }
1525 }
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001526
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +00001527 /* Setup interrupt state if new one was prepared */
1528 if (new_int.InterruptionPending) {
1529 reg_values[reg_count].PendingInterruption = new_int;
1530 reg_names[reg_count] = WHvRegisterPendingInterruption;
1531 reg_count += 1;
1532 }
1533 } else if (vcpu->ready_for_pic_interrupt &&
1534 (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
1535 cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
1536 irq = cpu_get_pic_interrupt(env);
1537 if (irq >= 0) {
1538 reg_names[reg_count] = WHvRegisterPendingEvent;
1539 reg_values[reg_count].ExtIntEvent = (WHV_X64_PENDING_EXT_INT_EVENT)
1540 {
1541 .EventPending = 1,
1542 .EventType = WHvX64PendingEventExtInt,
1543 .Vector = irq,
1544 };
1545 reg_count += 1;
1546 }
1547 }
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001548
1549 /* Sync the TPR to the CR8 if was modified during the intercept */
Justin Terry (VM) via Qemu-devel2bf3e742018-02-26 09:13:34 -08001550 tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
1551 if (tpr != vcpu->tpr) {
1552 vcpu->tpr = tpr;
1553 reg_values[reg_count].Reg64 = tpr;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001554 cpu->exit_request = 1;
1555 reg_names[reg_count] = WHvX64RegisterCr8;
1556 reg_count += 1;
1557 }
1558
1559 /* Update the state of the interrupt delivery notification */
Justin Terry (VM) via Qemu-develeb1fe942018-02-26 09:13:36 -08001560 if (!vcpu->window_registered &&
1561 cpu->interrupt_request & CPU_INTERRUPT_HARD) {
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +00001562 reg_values[reg_count].DeliverabilityNotifications =
1563 (WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER) {
1564 .InterruptNotification = 1
1565 };
Justin Terry (VM) via Qemu-develeb1fe942018-02-26 09:13:36 -08001566 vcpu->window_registered = 1;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001567 reg_names[reg_count] = WHvX64RegisterDeliverabilityNotifications;
1568 reg_count += 1;
1569 }
1570
1571 qemu_mutex_unlock_iothread();
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +00001572 vcpu->ready_for_pic_interrupt = false;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001573
1574 if (reg_count) {
Lucian Petrut327fccb2018-05-15 20:35:21 +03001575 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1576 whpx->partition, cpu->cpu_index,
1577 reg_names, reg_count, reg_values);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001578 if (FAILED(hr)) {
1579 error_report("WHPX: Failed to set interrupt state registers,"
1580 " hr=%08lx", hr);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001581 }
1582 }
1583
1584 return;
1585}
1586
1587static void whpx_vcpu_post_run(CPUState *cpu)
1588{
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001589 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
Philippe Mathieu-Daudé95e862d2022-03-05 23:35:19 +01001590 CPUX86State *env = cpu->env_ptr;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001591 X86CPU *x86_cpu = X86_CPU(cpu);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001592
Justin Terry (VM)4e286092018-03-14 07:52:43 -07001593 env->eflags = vcpu->exit_ctx.VpContext.Rflags;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001594
Justin Terry (VM)4e286092018-03-14 07:52:43 -07001595 uint64_t tpr = vcpu->exit_ctx.VpContext.Cr8;
1596 if (vcpu->tpr != tpr) {
1597 vcpu->tpr = tpr;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001598 qemu_mutex_lock_iothread();
1599 cpu_set_apic_tpr(x86_cpu->apic_state, vcpu->tpr);
1600 qemu_mutex_unlock_iothread();
1601 }
1602
Justin Terry (VM)4e286092018-03-14 07:52:43 -07001603 vcpu->interruption_pending =
1604 vcpu->exit_ctx.VpContext.ExecutionState.InterruptionPending;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001605
Justin Terry (VM)4e286092018-03-14 07:52:43 -07001606 vcpu->interruptable =
1607 !vcpu->exit_ctx.VpContext.ExecutionState.InterruptShadow;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001608
1609 return;
1610}
1611
1612static void whpx_vcpu_process_async_events(CPUState *cpu)
1613{
Philippe Mathieu-Daudé95e862d2022-03-05 23:35:19 +01001614 CPUX86State *env = cpu->env_ptr;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001615 X86CPU *x86_cpu = X86_CPU(cpu);
1616 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
1617
1618 if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
1619 !(env->hflags & HF_SMM_MASK)) {
Sunil Muthuswamy4df28c92020-02-24 19:27:38 +00001620 whpx_cpu_synchronize_state(cpu);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001621 do_cpu_init(x86_cpu);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001622 vcpu->interruptable = true;
1623 }
1624
1625 if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
1626 cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
1627 apic_poll_irq(x86_cpu->apic_state);
1628 }
1629
1630 if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
1631 (env->eflags & IF_MASK)) ||
1632 (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
1633 cpu->halted = false;
1634 }
1635
1636 if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
Sunil Muthuswamy4df28c92020-02-24 19:27:38 +00001637 whpx_cpu_synchronize_state(cpu);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001638 do_cpu_sipi(x86_cpu);
1639 }
1640
1641 if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
1642 cpu->interrupt_request &= ~CPU_INTERRUPT_TPR;
Sunil Muthuswamy4df28c92020-02-24 19:27:38 +00001643 whpx_cpu_synchronize_state(cpu);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001644 apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip,
1645 env->tpr_access_type);
1646 }
1647
1648 return;
1649}
1650
1651static int whpx_vcpu_run(CPUState *cpu)
1652{
1653 HRESULT hr;
1654 struct whpx_state *whpx = &whpx_global;
1655 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
Ivan Shcherbakovd7482ff2022-03-02 17:28:33 -08001656 struct whpx_breakpoint *stepped_over_bp = NULL;
1657 WhpxStepMode exclusive_step_mode = WHPX_STEP_NONE;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001658 int ret;
1659
Ivan Shcherbakovd7482ff2022-03-02 17:28:33 -08001660 g_assert(qemu_mutex_iothread_locked());
1661
1662 if (whpx->running_cpus++ == 0) {
1663 /* Insert breakpoints into memory, update exception exit bitmap. */
1664 ret = whpx_first_vcpu_starting(cpu);
1665 if (ret != 0) {
1666 return ret;
1667 }
1668 }
1669
1670 if (whpx->breakpoints.breakpoints &&
1671 whpx->breakpoints.breakpoints->used > 0)
1672 {
1673 uint64_t pc = whpx_vcpu_get_pc(cpu, true);
1674 stepped_over_bp = whpx_lookup_breakpoint_by_addr(pc);
1675 if (stepped_over_bp && stepped_over_bp->state != WHPX_BP_SET) {
1676 stepped_over_bp = NULL;
1677 }
1678
1679 if (stepped_over_bp) {
1680 /*
1681 * We are trying to run the instruction overwritten by an active
1682 * breakpoint. We will temporarily disable the breakpoint, suspend
1683 * other CPUs, and step over the instruction.
1684 */
1685 exclusive_step_mode = WHPX_STEP_EXCLUSIVE;
1686 }
1687 }
1688
1689 if (exclusive_step_mode == WHPX_STEP_NONE) {
1690 whpx_vcpu_process_async_events(cpu);
1691 if (cpu->halted && !whpx_apic_in_platform()) {
1692 cpu->exception_index = EXCP_HLT;
1693 qatomic_set(&cpu->exit_request, false);
1694 return 0;
1695 }
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001696 }
1697
1698 qemu_mutex_unlock_iothread();
Ivan Shcherbakovd7482ff2022-03-02 17:28:33 -08001699
1700 if (exclusive_step_mode != WHPX_STEP_NONE) {
1701 start_exclusive();
1702 g_assert(cpu == current_cpu);
1703 g_assert(!cpu->running);
1704 cpu->running = true;
1705
1706 hr = whpx_set_exception_exit_bitmap(
1707 1UL << WHvX64ExceptionTypeDebugTrapOrFault);
1708 if (!SUCCEEDED(hr)) {
1709 error_report("WHPX: Failed to update exception exit mask, "
1710 "hr=%08lx.", hr);
1711 return 1;
1712 }
1713
1714 if (stepped_over_bp) {
1715 /* Temporarily disable the triggered breakpoint. */
1716 cpu_memory_rw_debug(cpu,
1717 stepped_over_bp->address,
1718 &stepped_over_bp->original_instruction,
1719 1,
1720 true);
1721 }
1722 } else {
1723 cpu_exec_start(cpu);
1724 }
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001725
1726 do {
1727 if (cpu->vcpu_dirty) {
Sunil Muthuswamy6785e762020-02-26 20:54:39 +00001728 whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001729 cpu->vcpu_dirty = false;
1730 }
1731
Ivan Shcherbakovd7482ff2022-03-02 17:28:33 -08001732 if (exclusive_step_mode == WHPX_STEP_NONE) {
1733 whpx_vcpu_pre_run(cpu);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001734
Ivan Shcherbakovd7482ff2022-03-02 17:28:33 -08001735 if (qatomic_read(&cpu->exit_request)) {
1736 whpx_vcpu_kick(cpu);
1737 }
1738 }
1739
1740 if (exclusive_step_mode != WHPX_STEP_NONE || cpu->singlestep_enabled) {
1741 whpx_vcpu_configure_single_stepping(cpu, true, NULL);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001742 }
1743
Lucian Petrut327fccb2018-05-15 20:35:21 +03001744 hr = whp_dispatch.WHvRunVirtualProcessor(
1745 whpx->partition, cpu->cpu_index,
1746 &vcpu->exit_ctx, sizeof(vcpu->exit_ctx));
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001747
1748 if (FAILED(hr)) {
1749 error_report("WHPX: Failed to exec a virtual processor,"
1750 " hr=%08lx", hr);
1751 ret = -1;
1752 break;
1753 }
1754
Ivan Shcherbakovd7482ff2022-03-02 17:28:33 -08001755 if (exclusive_step_mode != WHPX_STEP_NONE || cpu->singlestep_enabled) {
1756 whpx_vcpu_configure_single_stepping(cpu,
1757 false,
1758 &vcpu->exit_ctx.VpContext.Rflags);
1759 }
1760
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001761 whpx_vcpu_post_run(cpu);
1762
1763 switch (vcpu->exit_ctx.ExitReason) {
1764 case WHvRunVpExitReasonMemoryAccess:
1765 ret = whpx_handle_mmio(cpu, &vcpu->exit_ctx.MemoryAccess);
1766 break;
1767
1768 case WHvRunVpExitReasonX64IoPortAccess:
1769 ret = whpx_handle_portio(cpu, &vcpu->exit_ctx.IoPortAccess);
1770 break;
1771
1772 case WHvRunVpExitReasonX64InterruptWindow:
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +00001773 vcpu->ready_for_pic_interrupt = 1;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001774 vcpu->window_registered = 0;
Justin Terry (VM)e7ca5492018-06-05 22:15:28 +00001775 ret = 0;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001776 break;
1777
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +00001778 case WHvRunVpExitReasonX64ApicEoi:
1779 assert(whpx_apic_in_platform());
1780 ioapic_eoi_broadcast(vcpu->exit_ctx.ApicEoi.InterruptVector);
1781 break;
1782
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001783 case WHvRunVpExitReasonX64Halt:
Ivan Shcherbakovd7482ff2022-03-02 17:28:33 -08001784 /*
1785 * WARNING: as of build 19043.1526 (21H1), this exit reason is no
1786 * longer used.
1787 */
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001788 ret = whpx_handle_halt(cpu);
1789 break;
1790
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +00001791 case WHvRunVpExitReasonX64ApicInitSipiTrap: {
1792 WHV_INTERRUPT_CONTROL ipi = {0};
1793 uint64_t icr = vcpu->exit_ctx.ApicInitSipi.ApicIcr;
1794 uint32_t delivery_mode =
1795 (icr & APIC_ICR_DELIV_MOD) >> APIC_ICR_DELIV_MOD_SHIFT;
1796 int dest_shorthand =
1797 (icr & APIC_ICR_DEST_SHORT) >> APIC_ICR_DEST_SHORT_SHIFT;
1798 bool broadcast = false;
1799 bool include_self = false;
1800 uint32_t i;
1801
1802 /* We only registered for INIT and SIPI exits. */
1803 if ((delivery_mode != APIC_DM_INIT) &&
1804 (delivery_mode != APIC_DM_SIPI)) {
1805 error_report(
1806 "WHPX: Unexpected APIC exit that is not a INIT or SIPI");
1807 break;
1808 }
1809
1810 if (delivery_mode == APIC_DM_INIT) {
1811 ipi.Type = WHvX64InterruptTypeInit;
1812 } else {
1813 ipi.Type = WHvX64InterruptTypeSipi;
1814 }
1815
1816 ipi.DestinationMode =
1817 ((icr & APIC_ICR_DEST_MOD) >> APIC_ICR_DEST_MOD_SHIFT) ?
1818 WHvX64InterruptDestinationModeLogical :
1819 WHvX64InterruptDestinationModePhysical;
1820
1821 ipi.TriggerMode =
1822 ((icr & APIC_ICR_TRIGGER_MOD) >> APIC_ICR_TRIGGER_MOD_SHIFT) ?
1823 WHvX64InterruptTriggerModeLevel :
1824 WHvX64InterruptTriggerModeEdge;
1825
1826 ipi.Vector = icr & APIC_VECTOR_MASK;
1827 switch (dest_shorthand) {
1828 /* no shorthand. Bits 56-63 contain the destination. */
1829 case 0:
1830 ipi.Destination = (icr >> 56) & APIC_VECTOR_MASK;
1831 hr = whp_dispatch.WHvRequestInterrupt(whpx->partition,
1832 &ipi, sizeof(ipi));
1833 if (FAILED(hr)) {
1834 error_report("WHPX: Failed to request interrupt hr=%08lx",
1835 hr);
1836 }
1837
1838 break;
1839
1840 /* self */
1841 case 1:
1842 include_self = true;
1843 break;
1844
1845 /* broadcast, including self */
1846 case 2:
1847 broadcast = true;
1848 include_self = true;
1849 break;
1850
1851 /* broadcast, excluding self */
1852 case 3:
1853 broadcast = true;
1854 break;
1855 }
1856
1857 if (!broadcast && !include_self) {
1858 break;
1859 }
1860
1861 for (i = 0; i <= max_vcpu_index; i++) {
1862 if (i == cpu->cpu_index && !include_self) {
1863 continue;
1864 }
1865
1866 /*
1867 * Assuming that APIC Ids are identity mapped since
1868 * WHvX64RegisterApicId & WHvX64RegisterInitialApicId registers
1869 * are not handled yet and the hypervisor doesn't allow the
1870 * guest to modify the APIC ID.
1871 */
1872 ipi.Destination = i;
1873 hr = whp_dispatch.WHvRequestInterrupt(whpx->partition,
1874 &ipi, sizeof(ipi));
1875 if (FAILED(hr)) {
1876 error_report(
1877 "WHPX: Failed to request SIPI for %d, hr=%08lx",
1878 i, hr);
1879 }
1880 }
1881
1882 break;
1883 }
1884
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001885 case WHvRunVpExitReasonCanceled:
Ivan Shcherbakovd7482ff2022-03-02 17:28:33 -08001886 if (exclusive_step_mode != WHPX_STEP_NONE) {
1887 /*
1888 * We are trying to step over a single instruction, and
1889 * likely got a request to stop from another thread.
1890 * Delay it until we are done stepping
1891 * over.
1892 */
1893 ret = 0;
1894 } else {
1895 cpu->exception_index = EXCP_INTERRUPT;
1896 ret = 1;
1897 }
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08001898 break;
Justin Terry (VM)e7ca5492018-06-05 22:15:28 +00001899 case WHvRunVpExitReasonX64MsrAccess: {
1900 WHV_REGISTER_VALUE reg_values[3] = {0};
1901 WHV_REGISTER_NAME reg_names[3];
1902 UINT32 reg_count;
1903
1904 reg_names[0] = WHvX64RegisterRip;
1905 reg_names[1] = WHvX64RegisterRax;
1906 reg_names[2] = WHvX64RegisterRdx;
1907
1908 reg_values[0].Reg64 =
1909 vcpu->exit_ctx.VpContext.Rip +
1910 vcpu->exit_ctx.VpContext.InstructionLength;
1911
1912 /*
1913 * For all unsupported MSR access we:
1914 * ignore writes
1915 * return 0 on read.
1916 */
1917 reg_count = vcpu->exit_ctx.MsrAccess.AccessInfo.IsWrite ?
1918 1 : 3;
1919
1920 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1921 whpx->partition,
1922 cpu->cpu_index,
1923 reg_names, reg_count,
1924 reg_values);
1925
1926 if (FAILED(hr)) {
1927 error_report("WHPX: Failed to set MsrAccess state "
1928 " registers, hr=%08lx", hr);
1929 }
1930 ret = 0;
1931 break;
1932 }
Justin Terry (VM)7becac82018-03-26 10:06:58 -07001933 case WHvRunVpExitReasonX64Cpuid: {
Lucian Petrutc3942bf2018-05-15 20:35:22 +03001934 WHV_REGISTER_VALUE reg_values[5];
Justin Terry (VM)7becac82018-03-26 10:06:58 -07001935 WHV_REGISTER_NAME reg_names[5];
1936 UINT32 reg_count = 5;
Sunil Muthuswamydadf3012020-02-27 21:01:04 +00001937 UINT64 cpuid_fn, rip = 0, rax = 0, rcx = 0, rdx = 0, rbx = 0;
1938 X86CPU *x86_cpu = X86_CPU(cpu);
1939 CPUX86State *env = &x86_cpu->env;
Justin Terry (VM)7becac82018-03-26 10:06:58 -07001940
Lucian Petrutc3942bf2018-05-15 20:35:22 +03001941 memset(reg_values, 0, sizeof(reg_values));
1942
Justin Terry (VM)7becac82018-03-26 10:06:58 -07001943 rip = vcpu->exit_ctx.VpContext.Rip +
1944 vcpu->exit_ctx.VpContext.InstructionLength;
Sunil Muthuswamydadf3012020-02-27 21:01:04 +00001945 cpuid_fn = vcpu->exit_ctx.CpuidAccess.Rax;
Justin Terry (VM)7becac82018-03-26 10:06:58 -07001946
Sunil Muthuswamydadf3012020-02-27 21:01:04 +00001947 /*
1948 * Ideally, these should be supplied to the hypervisor during VCPU
1949 * initialization and it should be able to satisfy this request.
1950 * But, currently, WHPX doesn't support setting CPUID values in the
1951 * hypervisor once the partition has been setup, which is too late
1952 * since VCPUs are realized later. For now, use the values from
1953 * QEMU to satisfy these requests, until WHPX adds support for
1954 * being able to set these values in the hypervisor at runtime.
1955 */
1956 cpu_x86_cpuid(env, cpuid_fn, 0, (UINT32 *)&rax, (UINT32 *)&rbx,
1957 (UINT32 *)&rcx, (UINT32 *)&rdx);
1958 switch (cpuid_fn) {
Sunil Muthuswamy5c8e1e82020-07-30 22:11:26 +00001959 case 0x40000000:
1960 /* Expose the vmware cpu frequency cpuid leaf */
1961 rax = 0x40000010;
1962 rbx = rcx = rdx = 0;
1963 break;
1964
1965 case 0x40000010:
1966 rax = env->tsc_khz;
1967 rbx = env->apic_bus_freq / 1000; /* Hz to KHz */
1968 rcx = rdx = 0;
1969 break;
1970
Justin Terry (VM)e1753a72018-06-05 22:15:27 +00001971 case 0x80000001:
Justin Terry (VM)e1753a72018-06-05 22:15:27 +00001972 /* Remove any support of OSVW */
Sunil Muthuswamydadf3012020-02-27 21:01:04 +00001973 rcx &= ~CPUID_EXT3_OSVW;
Justin Terry (VM)e1753a72018-06-05 22:15:27 +00001974 break;
Justin Terry (VM)7becac82018-03-26 10:06:58 -07001975 }
1976
1977 reg_names[0] = WHvX64RegisterRip;
1978 reg_names[1] = WHvX64RegisterRax;
1979 reg_names[2] = WHvX64RegisterRcx;
1980 reg_names[3] = WHvX64RegisterRdx;
1981 reg_names[4] = WHvX64RegisterRbx;
1982
1983 reg_values[0].Reg64 = rip;
1984 reg_values[1].Reg64 = rax;
1985 reg_values[2].Reg64 = rcx;
1986 reg_values[3].Reg64 = rdx;
1987 reg_values[4].Reg64 = rbx;
1988
Lucian Petrut327fccb2018-05-15 20:35:21 +03001989 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1990 whpx->partition, cpu->cpu_index,
1991 reg_names,
1992 reg_count,
1993 reg_values);
Justin Terry (VM)7becac82018-03-26 10:06:58 -07001994
1995 if (FAILED(hr)) {
1996 error_report("WHPX: Failed to set CpuidAccess state registers,"
1997 " hr=%08lx", hr);
1998 }
1999 ret = 0;
2000 break;
2001 }
Ivan Shcherbakovd7482ff2022-03-02 17:28:33 -08002002 case WHvRunVpExitReasonException:
2003 whpx_get_registers(cpu);
2004
2005 if ((vcpu->exit_ctx.VpException.ExceptionType ==
2006 WHvX64ExceptionTypeDebugTrapOrFault) &&
2007 (vcpu->exit_ctx.VpException.InstructionByteCount >= 1) &&
2008 (vcpu->exit_ctx.VpException.InstructionBytes[0] ==
2009 whpx_breakpoint_instruction)) {
2010 /* Stopped at a software breakpoint. */
2011 cpu->exception_index = EXCP_DEBUG;
2012 } else if ((vcpu->exit_ctx.VpException.ExceptionType ==
2013 WHvX64ExceptionTypeDebugTrapOrFault) &&
2014 !cpu->singlestep_enabled) {
2015 /*
2016 * Just finished stepping over a breakpoint, but the
2017 * gdb does not expect us to do single-stepping.
2018 * Don't do anything special.
2019 */
2020 cpu->exception_index = EXCP_INTERRUPT;
2021 } else {
2022 /* Another exception or debug event. Report it to GDB. */
2023 cpu->exception_index = EXCP_DEBUG;
2024 }
2025
2026 ret = 1;
2027 break;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002028 case WHvRunVpExitReasonNone:
2029 case WHvRunVpExitReasonUnrecoverableException:
2030 case WHvRunVpExitReasonInvalidVpRegisterValue:
2031 case WHvRunVpExitReasonUnsupportedFeature:
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002032 default:
2033 error_report("WHPX: Unexpected VP exit code %d",
2034 vcpu->exit_ctx.ExitReason);
2035 whpx_get_registers(cpu);
2036 qemu_mutex_lock_iothread();
2037 qemu_system_guest_panicked(cpu_get_crash_info(cpu));
2038 qemu_mutex_unlock_iothread();
2039 break;
2040 }
2041
2042 } while (!ret);
2043
Ivan Shcherbakovd7482ff2022-03-02 17:28:33 -08002044 if (stepped_over_bp) {
2045 /* Restore the breakpoint we stepped over */
2046 cpu_memory_rw_debug(cpu,
2047 stepped_over_bp->address,
2048 (void *)&whpx_breakpoint_instruction,
2049 1,
2050 true);
2051 }
2052
2053 if (exclusive_step_mode != WHPX_STEP_NONE) {
2054 g_assert(cpu_in_exclusive_context(cpu));
2055 cpu->running = false;
2056 end_exclusive();
2057
2058 exclusive_step_mode = WHPX_STEP_NONE;
2059 } else {
2060 cpu_exec_end(cpu);
2061 }
2062
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002063 qemu_mutex_lock_iothread();
2064 current_cpu = cpu;
2065
Ivan Shcherbakovd7482ff2022-03-02 17:28:33 -08002066 if (--whpx->running_cpus == 0) {
2067 whpx_last_vcpu_stopping(cpu);
2068 }
2069
Stefan Hajnoczid73415a2020-09-23 11:56:46 +01002070 qatomic_set(&cpu->exit_request, false);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002071
2072 return ret < 0;
2073}
2074
2075static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
2076{
Sunil Muthuswamy4df28c92020-02-24 19:27:38 +00002077 if (!cpu->vcpu_dirty) {
2078 whpx_get_registers(cpu);
2079 cpu->vcpu_dirty = true;
2080 }
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002081}
2082
2083static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu,
2084 run_on_cpu_data arg)
2085{
Sunil Muthuswamy6785e762020-02-26 20:54:39 +00002086 whpx_set_registers(cpu, WHPX_SET_RESET_STATE);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002087 cpu->vcpu_dirty = false;
2088}
2089
2090static void do_whpx_cpu_synchronize_post_init(CPUState *cpu,
2091 run_on_cpu_data arg)
2092{
Sunil Muthuswamy6785e762020-02-26 20:54:39 +00002093 whpx_set_registers(cpu, WHPX_SET_FULL_STATE);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002094 cpu->vcpu_dirty = false;
2095}
2096
2097static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu,
2098 run_on_cpu_data arg)
2099{
2100 cpu->vcpu_dirty = true;
2101}
2102
2103/*
2104 * CPU support.
2105 */
2106
2107void whpx_cpu_synchronize_state(CPUState *cpu)
2108{
2109 if (!cpu->vcpu_dirty) {
2110 run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL);
2111 }
2112}
2113
2114void whpx_cpu_synchronize_post_reset(CPUState *cpu)
2115{
2116 run_on_cpu(cpu, do_whpx_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
2117}
2118
2119void whpx_cpu_synchronize_post_init(CPUState *cpu)
2120{
2121 run_on_cpu(cpu, do_whpx_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
2122}
2123
2124void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu)
2125{
2126 run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
2127}
2128
Ivan Shcherbakovd7482ff2022-03-02 17:28:33 -08002129void whpx_cpu_synchronize_pre_resume(bool step_pending)
2130{
2131 whpx_global.step_pending = step_pending;
2132}
2133
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002134/*
2135 * Vcpu support.
2136 */
2137
2138static Error *whpx_migration_blocker;
2139
Philippe Mathieu-Daudé538f0492021-01-11 16:20:20 +01002140static void whpx_cpu_update_state(void *opaque, bool running, RunState state)
Sunil Muthuswamy6785e762020-02-26 20:54:39 +00002141{
2142 CPUX86State *env = opaque;
2143
2144 if (running) {
2145 env->tsc_valid = false;
2146 }
2147}
2148
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002149int whpx_init_vcpu(CPUState *cpu)
2150{
2151 HRESULT hr;
2152 struct whpx_state *whpx = &whpx_global;
Sunil Muthuswamy5c8e1e82020-07-30 22:11:26 +00002153 struct whpx_vcpu *vcpu = NULL;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002154 Error *local_error = NULL;
Philippe Mathieu-Daudé95e862d2022-03-05 23:35:19 +01002155 CPUX86State *env = cpu->env_ptr;
Sunil Muthuswamy5c8e1e82020-07-30 22:11:26 +00002156 X86CPU *x86_cpu = X86_CPU(cpu);
2157 UINT64 freq = 0;
2158 int ret;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002159
2160 /* Add migration blockers for all unsupported features of the
2161 * Windows Hypervisor Platform
2162 */
2163 if (whpx_migration_blocker == NULL) {
2164 error_setg(&whpx_migration_blocker,
2165 "State blocked due to non-migratable CPUID feature support,"
2166 "dirty memory tracking support, and XSAVE/XRSTOR support");
2167
Markus Armbruster436c8312021-07-20 14:54:01 +02002168 if (migrate_add_blocker(whpx_migration_blocker, &local_error) < 0) {
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002169 error_report_err(local_error);
Lucian Petrut327fccb2018-05-15 20:35:21 +03002170 error_free(whpx_migration_blocker);
Sunil Muthuswamy5c8e1e82020-07-30 22:11:26 +00002171 ret = -EINVAL;
2172 goto error;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002173 }
2174 }
2175
Markus Armbrusterb21e2382022-03-15 15:41:56 +01002176 vcpu = g_new0(struct whpx_vcpu, 1);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002177
2178 if (!vcpu) {
2179 error_report("WHPX: Failed to allocte VCPU context.");
Sunil Muthuswamy5c8e1e82020-07-30 22:11:26 +00002180 ret = -ENOMEM;
2181 goto error;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002182 }
2183
Lucian Petrut327fccb2018-05-15 20:35:21 +03002184 hr = whp_dispatch.WHvEmulatorCreateEmulator(
2185 &whpx_emu_callbacks,
2186 &vcpu->emulator);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002187 if (FAILED(hr)) {
2188 error_report("WHPX: Failed to setup instruction completion support,"
2189 " hr=%08lx", hr);
Sunil Muthuswamy5c8e1e82020-07-30 22:11:26 +00002190 ret = -EINVAL;
2191 goto error;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002192 }
2193
Lucian Petrut327fccb2018-05-15 20:35:21 +03002194 hr = whp_dispatch.WHvCreateVirtualProcessor(
2195 whpx->partition, cpu->cpu_index, 0);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002196 if (FAILED(hr)) {
2197 error_report("WHPX: Failed to create a virtual processor,"
2198 " hr=%08lx", hr);
Lucian Petrut327fccb2018-05-15 20:35:21 +03002199 whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
Sunil Muthuswamy5c8e1e82020-07-30 22:11:26 +00002200 ret = -EINVAL;
2201 goto error;
2202 }
2203
2204 /*
2205 * vcpu's TSC frequency is either specified by user, or use the value
2206 * provided by Hyper-V if the former is not present. In the latter case, we
2207 * query it from Hyper-V and record in env->tsc_khz, so that vcpu's TSC
2208 * frequency can be migrated later via this field.
2209 */
2210 if (!env->tsc_khz) {
2211 hr = whp_dispatch.WHvGetCapability(
2212 WHvCapabilityCodeProcessorClockFrequency, &freq, sizeof(freq),
2213 NULL);
2214 if (hr != WHV_E_UNKNOWN_CAPABILITY) {
2215 if (FAILED(hr)) {
2216 printf("WHPX: Failed to query tsc frequency, hr=0x%08lx\n", hr);
2217 } else {
2218 env->tsc_khz = freq / 1000; /* Hz to KHz */
2219 }
2220 }
2221 }
2222
2223 env->apic_bus_freq = HYPERV_APIC_BUS_FREQUENCY;
2224 hr = whp_dispatch.WHvGetCapability(
2225 WHvCapabilityCodeInterruptClockFrequency, &freq, sizeof(freq), NULL);
2226 if (hr != WHV_E_UNKNOWN_CAPABILITY) {
2227 if (FAILED(hr)) {
2228 printf("WHPX: Failed to query apic bus frequency hr=0x%08lx\n", hr);
2229 } else {
2230 env->apic_bus_freq = freq;
2231 }
2232 }
2233
2234 /*
2235 * If the vmware cpuid frequency leaf option is set, and we have a valid
2236 * tsc value, trap the corresponding cpuid's.
2237 */
2238 if (x86_cpu->vmware_cpuid_freq && env->tsc_khz) {
2239 UINT32 cpuidExitList[] = {1, 0x80000001, 0x40000000, 0x40000010};
2240
2241 hr = whp_dispatch.WHvSetPartitionProperty(
2242 whpx->partition,
2243 WHvPartitionPropertyCodeCpuidExitList,
2244 cpuidExitList,
2245 RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32));
2246
2247 if (FAILED(hr)) {
2248 error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
2249 hr);
2250 ret = -EINVAL;
2251 goto error;
2252 }
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002253 }
2254
2255 vcpu->interruptable = true;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002256 cpu->vcpu_dirty = true;
2257 cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu;
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +00002258 max_vcpu_index = max(max_vcpu_index, cpu->cpu_index);
Sunil Muthuswamy6785e762020-02-26 20:54:39 +00002259 qemu_add_vm_change_state_handler(whpx_cpu_update_state, cpu->env_ptr);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002260
2261 return 0;
Sunil Muthuswamy5c8e1e82020-07-30 22:11:26 +00002262
2263error:
2264 g_free(vcpu);
2265
2266 return ret;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002267}
2268
2269int whpx_vcpu_exec(CPUState *cpu)
2270{
2271 int ret;
2272 int fatal;
2273
2274 for (;;) {
2275 if (cpu->exception_index >= EXCP_INTERRUPT) {
2276 ret = cpu->exception_index;
2277 cpu->exception_index = -1;
2278 break;
2279 }
2280
2281 fatal = whpx_vcpu_run(cpu);
2282
2283 if (fatal) {
2284 error_report("WHPX: Failed to exec a virtual processor");
2285 abort();
2286 }
2287 }
2288
2289 return ret;
2290}
2291
2292void whpx_destroy_vcpu(CPUState *cpu)
2293{
2294 struct whpx_state *whpx = &whpx_global;
2295 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
2296
Lucian Petrut327fccb2018-05-15 20:35:21 +03002297 whp_dispatch.WHvDeleteVirtualProcessor(whpx->partition, cpu->cpu_index);
2298 whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002299 g_free(cpu->hax_vcpu);
2300 return;
2301}
2302
2303void whpx_vcpu_kick(CPUState *cpu)
2304{
2305 struct whpx_state *whpx = &whpx_global;
Lucian Petrut327fccb2018-05-15 20:35:21 +03002306 whp_dispatch.WHvCancelRunVirtualProcessor(
2307 whpx->partition, cpu->cpu_index, 0);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002308}
2309
2310/*
2311 * Memory support.
2312 */
2313
2314static void whpx_update_mapping(hwaddr start_pa, ram_addr_t size,
2315 void *host_va, int add, int rom,
2316 const char *name)
2317{
2318 struct whpx_state *whpx = &whpx_global;
2319 HRESULT hr;
2320
2321 /*
2322 if (add) {
2323 printf("WHPX: ADD PA:%p Size:%p, Host:%p, %s, '%s'\n",
2324 (void*)start_pa, (void*)size, host_va,
2325 (rom ? "ROM" : "RAM"), name);
2326 } else {
2327 printf("WHPX: DEL PA:%p Size:%p, Host:%p, '%s'\n",
2328 (void*)start_pa, (void*)size, host_va, name);
2329 }
2330 */
2331
2332 if (add) {
Lucian Petrut327fccb2018-05-15 20:35:21 +03002333 hr = whp_dispatch.WHvMapGpaRange(whpx->partition,
2334 host_va,
2335 start_pa,
2336 size,
2337 (WHvMapGpaRangeFlagRead |
2338 WHvMapGpaRangeFlagExecute |
2339 (rom ? 0 : WHvMapGpaRangeFlagWrite)));
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002340 } else {
Lucian Petrut327fccb2018-05-15 20:35:21 +03002341 hr = whp_dispatch.WHvUnmapGpaRange(whpx->partition,
2342 start_pa,
2343 size);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002344 }
2345
2346 if (FAILED(hr)) {
2347 error_report("WHPX: Failed to %s GPA range '%s' PA:%p, Size:%p bytes,"
2348 " Host:%p, hr=%08lx",
2349 (add ? "MAP" : "UNMAP"), name,
Lucian Petrutc3942bf2018-05-15 20:35:22 +03002350 (void *)(uintptr_t)start_pa, (void *)size, host_va, hr);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002351 }
2352}
2353
2354static void whpx_process_section(MemoryRegionSection *section, int add)
2355{
2356 MemoryRegion *mr = section->mr;
2357 hwaddr start_pa = section->offset_within_address_space;
2358 ram_addr_t size = int128_get64(section->size);
2359 unsigned int delta;
2360 uint64_t host_va;
2361
2362 if (!memory_region_is_ram(mr)) {
2363 return;
2364 }
2365
Marc-André Lureau8e3b0cb2022-03-23 19:57:22 +04002366 delta = qemu_real_host_page_size() - (start_pa & ~qemu_real_host_page_mask());
2367 delta &= ~qemu_real_host_page_mask();
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002368 if (delta > size) {
2369 return;
2370 }
2371 start_pa += delta;
2372 size -= delta;
Marc-André Lureau8e3b0cb2022-03-23 19:57:22 +04002373 size &= qemu_real_host_page_mask();
2374 if (!size || (start_pa & ~qemu_real_host_page_mask())) {
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002375 return;
2376 }
2377
2378 host_va = (uintptr_t)memory_region_get_ram_ptr(mr)
2379 + section->offset_within_region + delta;
2380
Lucian Petrutc3942bf2018-05-15 20:35:22 +03002381 whpx_update_mapping(start_pa, size, (void *)(uintptr_t)host_va, add,
2382 memory_region_is_rom(mr), mr->name);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002383}
2384
2385static void whpx_region_add(MemoryListener *listener,
2386 MemoryRegionSection *section)
2387{
2388 memory_region_ref(section->mr);
2389 whpx_process_section(section, 1);
2390}
2391
2392static void whpx_region_del(MemoryListener *listener,
2393 MemoryRegionSection *section)
2394{
2395 whpx_process_section(section, 0);
2396 memory_region_unref(section->mr);
2397}
2398
2399static void whpx_transaction_begin(MemoryListener *listener)
2400{
2401}
2402
2403static void whpx_transaction_commit(MemoryListener *listener)
2404{
2405}
2406
2407static void whpx_log_sync(MemoryListener *listener,
2408 MemoryRegionSection *section)
2409{
2410 MemoryRegion *mr = section->mr;
2411
2412 if (!memory_region_is_ram(mr)) {
2413 return;
2414 }
2415
2416 memory_region_set_dirty(mr, 0, int128_get64(section->size));
2417}
2418
2419static MemoryListener whpx_memory_listener = {
Peter Xu142518b2021-08-16 21:35:52 -04002420 .name = "whpx",
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002421 .begin = whpx_transaction_begin,
2422 .commit = whpx_transaction_commit,
2423 .region_add = whpx_region_add,
2424 .region_del = whpx_region_del,
2425 .log_sync = whpx_log_sync,
2426 .priority = 10,
2427};
2428
2429static void whpx_memory_init(void)
2430{
2431 memory_listener_register(&whpx_memory_listener, &address_space_memory);
2432}
2433
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002434/*
Sunil Muthuswamyb9027102019-11-13 18:54:39 +00002435 * Load the functions from the given library, using the given handle. If a
2436 * handle is provided, it is used, otherwise the library is opened. The
2437 * handle will be updated on return with the opened one.
2438 */
2439static bool load_whp_dispatch_fns(HMODULE *handle,
2440 WHPFunctionList function_list)
2441{
2442 HMODULE hLib = *handle;
2443
2444 #define WINHV_PLATFORM_DLL "WinHvPlatform.dll"
2445 #define WINHV_EMULATION_DLL "WinHvEmulation.dll"
Sunil Muthuswamy6785e762020-02-26 20:54:39 +00002446 #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \
2447 whp_dispatch.function_name = \
2448 (function_name ## _t)GetProcAddress(hLib, #function_name); \
2449
Sunil Muthuswamyb9027102019-11-13 18:54:39 +00002450 #define WHP_LOAD_FIELD(return_type, function_name, signature) \
2451 whp_dispatch.function_name = \
2452 (function_name ## _t)GetProcAddress(hLib, #function_name); \
2453 if (!whp_dispatch.function_name) { \
2454 error_report("Could not load function %s", #function_name); \
2455 goto error; \
2456 } \
2457
2458 #define WHP_LOAD_LIB(lib_name, handle_lib) \
2459 if (!handle_lib) { \
2460 handle_lib = LoadLibrary(lib_name); \
2461 if (!handle_lib) { \
2462 error_report("Could not load library %s.", lib_name); \
2463 goto error; \
2464 } \
2465 } \
2466
2467 switch (function_list) {
2468 case WINHV_PLATFORM_FNS_DEFAULT:
2469 WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
2470 LIST_WINHVPLATFORM_FUNCTIONS(WHP_LOAD_FIELD)
2471 break;
2472
2473 case WINHV_EMULATION_FNS_DEFAULT:
2474 WHP_LOAD_LIB(WINHV_EMULATION_DLL, hLib)
2475 LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD)
2476 break;
Sunil Muthuswamy6785e762020-02-26 20:54:39 +00002477
2478 case WINHV_PLATFORM_FNS_SUPPLEMENTAL:
2479 WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
2480 LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL)
2481 break;
Sunil Muthuswamyb9027102019-11-13 18:54:39 +00002482 }
2483
2484 *handle = hLib;
2485 return true;
2486
2487error:
2488 if (hLib) {
2489 FreeLibrary(hLib);
2490 }
2491
2492 return false;
2493}
2494
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +00002495static void whpx_set_kernel_irqchip(Object *obj, Visitor *v,
2496 const char *name, void *opaque,
2497 Error **errp)
2498{
2499 struct whpx_state *whpx = &whpx_global;
2500 OnOffSplit mode;
2501
2502 if (!visit_type_OnOffSplit(v, name, &mode, errp)) {
2503 return;
2504 }
2505
2506 switch (mode) {
2507 case ON_OFF_SPLIT_ON:
2508 whpx->kernel_irqchip_allowed = true;
2509 whpx->kernel_irqchip_required = true;
2510 break;
2511
2512 case ON_OFF_SPLIT_OFF:
2513 whpx->kernel_irqchip_allowed = false;
2514 whpx->kernel_irqchip_required = false;
2515 break;
2516
2517 case ON_OFF_SPLIT_SPLIT:
2518 error_setg(errp, "WHPX: split irqchip currently not supported");
2519 error_append_hint(errp,
2520 "Try without kernel-irqchip or with kernel-irqchip=on|off");
2521 break;
2522
2523 default:
2524 /*
2525 * The value was checked in visit_type_OnOffSplit() above. If
2526 * we get here, then something is wrong in QEMU.
2527 */
2528 abort();
2529 }
2530}
2531
Sunil Muthuswamyb9027102019-11-13 18:54:39 +00002532/*
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002533 * Partition support
2534 */
2535
2536static int whpx_accel_init(MachineState *ms)
2537{
2538 struct whpx_state *whpx;
2539 int ret;
2540 HRESULT hr;
2541 WHV_CAPABILITY whpx_cap;
Justin Terry (VM)3907e632018-03-14 07:52:41 -07002542 UINT32 whpx_cap_size;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002543 WHV_PARTITION_PROPERTY prop;
Sunil Muthuswamy5c8e1e82020-07-30 22:11:26 +00002544 UINT32 cpuidExitList[] = {1, 0x80000001};
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +00002545 WHV_CAPABILITY_FEATURES features = {0};
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002546
2547 whpx = &whpx_global;
2548
Lucian Petrut327fccb2018-05-15 20:35:21 +03002549 if (!init_whp_dispatch()) {
2550 ret = -ENOSYS;
2551 goto error;
2552 }
2553
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002554 whpx->mem_quota = ms->ram_size;
2555
Lucian Petrut327fccb2018-05-15 20:35:21 +03002556 hr = whp_dispatch.WHvGetCapability(
2557 WHvCapabilityCodeHypervisorPresent, &whpx_cap,
2558 sizeof(whpx_cap), &whpx_cap_size);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002559 if (FAILED(hr) || !whpx_cap.HypervisorPresent) {
2560 error_report("WHPX: No accelerator found, hr=%08lx", hr);
2561 ret = -ENOSPC;
2562 goto error;
2563 }
2564
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +00002565 hr = whp_dispatch.WHvGetCapability(
2566 WHvCapabilityCodeFeatures, &features, sizeof(features), NULL);
2567 if (FAILED(hr)) {
2568 error_report("WHPX: Failed to query capabilities, hr=%08lx", hr);
2569 ret = -EINVAL;
2570 goto error;
2571 }
2572
Lucian Petrut327fccb2018-05-15 20:35:21 +03002573 hr = whp_dispatch.WHvCreatePartition(&whpx->partition);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002574 if (FAILED(hr)) {
2575 error_report("WHPX: Failed to create partition, hr=%08lx", hr);
2576 ret = -EINVAL;
2577 goto error;
2578 }
2579
Sunil Muthuswamyb6b3da92019-11-07 19:48:32 +00002580 /*
2581 * Query the XSAVE capability of the partition. Any error here is not
2582 * considered fatal.
2583 */
2584 hr = whp_dispatch.WHvGetPartitionProperty(
2585 whpx->partition,
2586 WHvPartitionPropertyCodeProcessorXsaveFeatures,
2587 &whpx_xsave_cap,
2588 sizeof(whpx_xsave_cap),
2589 &whpx_cap_size);
2590
2591 /*
2592 * Windows version which don't support this property will return with the
2593 * specific error code.
2594 */
2595 if (FAILED(hr) && hr != WHV_E_UNKNOWN_PROPERTY) {
2596 error_report("WHPX: Failed to query XSAVE capability, hr=%08lx", hr);
2597 }
2598
2599 if (!whpx_has_xsave()) {
2600 printf("WHPX: Partition is not XSAVE capable\n");
2601 }
2602
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002603 memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
Stefan Weilf2b143a2019-07-12 15:26:11 +02002604 prop.ProcessorCount = ms->smp.cpus;
Lucian Petrut327fccb2018-05-15 20:35:21 +03002605 hr = whp_dispatch.WHvSetPartitionProperty(
2606 whpx->partition,
2607 WHvPartitionPropertyCodeProcessorCount,
2608 &prop,
2609 sizeof(WHV_PARTITION_PROPERTY));
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002610
2611 if (FAILED(hr)) {
2612 error_report("WHPX: Failed to set partition core count to %d,"
Stefan Weilf2b143a2019-07-12 15:26:11 +02002613 " hr=%08lx", ms->smp.cores, hr);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002614 ret = -EINVAL;
2615 goto error;
2616 }
2617
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +00002618 /*
2619 * Error out if WHP doesn't support apic emulation and user is requiring
2620 * it.
2621 */
2622 if (whpx->kernel_irqchip_required && (!features.LocalApicEmulation ||
2623 !whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2)) {
2624 error_report("WHPX: kernel irqchip requested, but unavailable. "
2625 "Try without kernel-irqchip or with kernel-irqchip=off");
2626 ret = -EINVAL;
2627 goto error;
2628 }
2629
2630 if (whpx->kernel_irqchip_allowed && features.LocalApicEmulation &&
2631 whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2) {
2632 WHV_X64_LOCAL_APIC_EMULATION_MODE mode =
2633 WHvX64LocalApicEmulationModeXApic;
2634 printf("WHPX: setting APIC emulation mode in the hypervisor\n");
2635 hr = whp_dispatch.WHvSetPartitionProperty(
2636 whpx->partition,
2637 WHvPartitionPropertyCodeLocalApicEmulationMode,
2638 &mode,
2639 sizeof(mode));
2640 if (FAILED(hr)) {
2641 error_report("WHPX: Failed to enable kernel irqchip hr=%08lx", hr);
2642 if (whpx->kernel_irqchip_required) {
2643 error_report("WHPX: kernel irqchip requested, but unavailable");
2644 ret = -EINVAL;
2645 goto error;
2646 }
2647 } else {
2648 whpx->apic_in_platform = true;
2649 }
2650 }
2651
2652 /* Register for MSR and CPUID exits */
Justin Terry (VM)7becac82018-03-26 10:06:58 -07002653 memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
Justin Terry (VM)e7ca5492018-06-05 22:15:28 +00002654 prop.ExtendedVmExits.X64MsrExit = 1;
Justin Terry (VM)7becac82018-03-26 10:06:58 -07002655 prop.ExtendedVmExits.X64CpuidExit = 1;
Ivan Shcherbakovd7482ff2022-03-02 17:28:33 -08002656 prop.ExtendedVmExits.ExceptionExit = 1;
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +00002657 if (whpx_apic_in_platform()) {
2658 prop.ExtendedVmExits.X64ApicInitSipiExitTrap = 1;
2659 }
Justin Terry (VM)7becac82018-03-26 10:06:58 -07002660
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +00002661 hr = whp_dispatch.WHvSetPartitionProperty(
2662 whpx->partition,
2663 WHvPartitionPropertyCodeExtendedVmExits,
2664 &prop,
2665 sizeof(WHV_PARTITION_PROPERTY));
Justin Terry (VM)7becac82018-03-26 10:06:58 -07002666 if (FAILED(hr)) {
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +00002667 error_report("WHPX: Failed to enable MSR & CPUIDexit, hr=%08lx", hr);
Justin Terry (VM)7becac82018-03-26 10:06:58 -07002668 ret = -EINVAL;
2669 goto error;
2670 }
2671
Lucian Petrut327fccb2018-05-15 20:35:21 +03002672 hr = whp_dispatch.WHvSetPartitionProperty(
2673 whpx->partition,
2674 WHvPartitionPropertyCodeCpuidExitList,
2675 cpuidExitList,
2676 RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32));
Justin Terry (VM)e1753a72018-06-05 22:15:27 +00002677
Justin Terry (VM)7becac82018-03-26 10:06:58 -07002678 if (FAILED(hr)) {
2679 error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
2680 hr);
2681 ret = -EINVAL;
2682 goto error;
2683 }
2684
Ivan Shcherbakovd7482ff2022-03-02 17:28:33 -08002685 /*
2686 * We do not want to intercept any exceptions from the guest,
2687 * until we actually start debugging with gdb.
2688 */
2689 whpx->exception_exit_bitmap = -1;
2690 hr = whpx_set_exception_exit_bitmap(0);
2691
2692 if (FAILED(hr)) {
2693 error_report("WHPX: Failed to set exception exit bitmap, hr=%08lx", hr);
2694 ret = -EINVAL;
2695 goto error;
2696 }
2697
Lucian Petrut327fccb2018-05-15 20:35:21 +03002698 hr = whp_dispatch.WHvSetupPartition(whpx->partition);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002699 if (FAILED(hr)) {
2700 error_report("WHPX: Failed to setup partition, hr=%08lx", hr);
2701 ret = -EINVAL;
2702 goto error;
2703 }
2704
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002705 whpx_memory_init();
2706
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002707 printf("Windows Hypervisor Platform accelerator is operational\n");
2708 return 0;
2709
Sunil Muthuswamy5c8e1e82020-07-30 22:11:26 +00002710error:
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002711
2712 if (NULL != whpx->partition) {
Lucian Petrut327fccb2018-05-15 20:35:21 +03002713 whp_dispatch.WHvDeletePartition(whpx->partition);
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002714 whpx->partition = NULL;
2715 }
2716
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002717 return ret;
2718}
2719
2720int whpx_enabled(void)
2721{
2722 return whpx_allowed;
2723}
2724
Paolo Bonzini84f4ef12020-12-19 04:06:37 -05002725bool whpx_apic_in_platform(void) {
2726 return whpx_global.apic_in_platform;
2727}
2728
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002729static void whpx_accel_class_init(ObjectClass *oc, void *data)
2730{
2731 AccelClass *ac = ACCEL_CLASS(oc);
2732 ac->name = "WHPX";
2733 ac->init_machine = whpx_accel_init;
2734 ac->allowed = &whpx_allowed;
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +00002735
2736 object_class_property_add(oc, "kernel-irqchip", "on|off|split",
2737 NULL, whpx_set_kernel_irqchip,
2738 NULL, NULL);
2739 object_class_property_set_description(oc, "kernel-irqchip",
2740 "Configure WHPX in-kernel irqchip");
2741}
2742
2743static void whpx_accel_instance_init(Object *obj)
2744{
2745 struct whpx_state *whpx = &whpx_global;
2746
2747 memset(whpx, 0, sizeof(struct whpx_state));
2748 /* Turn on kernel-irqchip, by default */
2749 whpx->kernel_irqchip_allowed = true;
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002750}
2751
2752static const TypeInfo whpx_accel_type = {
2753 .name = ACCEL_CLASS_NAME("whpx"),
2754 .parent = TYPE_ACCEL,
Sunil Muthuswamyfaf20792020-10-28 02:23:19 +00002755 .instance_init = whpx_accel_instance_init,
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002756 .class_init = whpx_accel_class_init,
2757};
2758
2759static void whpx_type_init(void)
2760{
2761 type_register_static(&whpx_accel_type);
2762}
2763
Lucian Petrut327fccb2018-05-15 20:35:21 +03002764bool init_whp_dispatch(void)
2765{
Lucian Petrut327fccb2018-05-15 20:35:21 +03002766 if (whp_dispatch_initialized) {
2767 return true;
2768 }
2769
Sunil Muthuswamyb9027102019-11-13 18:54:39 +00002770 if (!load_whp_dispatch_fns(&hWinHvPlatform, WINHV_PLATFORM_FNS_DEFAULT)) {
Lucian Petrut327fccb2018-05-15 20:35:21 +03002771 goto error;
2772 }
Lucian Petrut327fccb2018-05-15 20:35:21 +03002773
Sunil Muthuswamyb9027102019-11-13 18:54:39 +00002774 if (!load_whp_dispatch_fns(&hWinHvEmulation, WINHV_EMULATION_FNS_DEFAULT)) {
Lucian Petrut327fccb2018-05-15 20:35:21 +03002775 goto error;
2776 }
Lucian Petrut327fccb2018-05-15 20:35:21 +03002777
Sunil Muthuswamy6785e762020-02-26 20:54:39 +00002778 assert(load_whp_dispatch_fns(&hWinHvPlatform,
2779 WINHV_PLATFORM_FNS_SUPPLEMENTAL));
Lucian Petrut327fccb2018-05-15 20:35:21 +03002780 whp_dispatch_initialized = true;
Sunil Muthuswamyb9027102019-11-13 18:54:39 +00002781
Lucian Petrut327fccb2018-05-15 20:35:21 +03002782 return true;
Sunil Muthuswamyb9027102019-11-13 18:54:39 +00002783error:
Lucian Petrut327fccb2018-05-15 20:35:21 +03002784 if (hWinHvPlatform) {
2785 FreeLibrary(hWinHvPlatform);
2786 }
Sunil Muthuswamyb9027102019-11-13 18:54:39 +00002787
Lucian Petrut327fccb2018-05-15 20:35:21 +03002788 if (hWinHvEmulation) {
2789 FreeLibrary(hWinHvEmulation);
2790 }
Sunil Muthuswamyb9027102019-11-13 18:54:39 +00002791
Lucian Petrut327fccb2018-05-15 20:35:21 +03002792 return false;
2793}
2794
Justin Terry (VM)812d49f2018-01-22 13:07:48 -08002795type_init(whpx_type_init);