blob: 18d78e5b6bd5fde95e9ceb1d523618047c68a8d2 [file] [log] [blame]
Vincent Palatin47c1c8c2017-01-10 11:59:56 +01001/*
2 * QEMU HAX support
3 *
4 * Copyright IBM, Corp. 2008
5 * Red Hat, Inc. 2008
6 *
7 * Authors:
8 * Anthony Liguori <aliguori@us.ibm.com>
9 * Glauber Costa <gcosta@redhat.com>
10 *
11 * Copyright (c) 2011 Intel Corporation
12 * Written by:
13 * Jiang Yunhong<yunhong.jiang@intel.com>
14 * Xin Xiaohui<xiaohui.xin@intel.com>
15 * Zhang Xiantao<xiantao.zhang@intel.com>
16 *
17 * This work is licensed under the terms of the GNU GPL, version 2 or later.
18 * See the COPYING file in the top-level directory.
19 *
20 */
21
22/*
23 * HAX common code for both windows and darwin
24 */
25
26#include "qemu/osdep.h"
27#include "cpu.h"
28#include "exec/address-spaces.h"
Vincent Palatin47c1c8c2017-01-10 11:59:56 +010029
Claudio Fontana940e43a2021-02-04 17:39:24 +010030#include "qemu/accel.h"
Markus Armbruster71e8a912019-08-12 07:23:38 +020031#include "sysemu/reset.h"
Markus Armbruster54d31232019-08-12 07:23:59 +020032#include "sysemu/runstate.h"
Vincent Palatin47c1c8c2017-01-10 11:59:56 +010033#include "hw/boards.h"
34
Claudio Fontanab86f59c2021-02-04 17:39:25 +010035#include "hax-accel-ops.h"
Claudio Fontanae92558e2020-07-07 11:18:49 +020036
Vincent Palatin47c1c8c2017-01-10 11:59:56 +010037#define DEBUG_HAX 0
38
39#define DPRINTF(fmt, ...) \
40 do { \
41 if (DEBUG_HAX) { \
42 fprintf(stdout, fmt, ## __VA_ARGS__); \
43 } \
44 } while (0)
45
46/* Current version */
47const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */
48/* Minimum HAX kernel version */
49const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */
50
Philippe Mathieu-Daudéb04363c2022-02-03 13:16:58 +010051bool hax_allowed;
Vincent Palatin47c1c8c2017-01-10 11:59:56 +010052
53struct hax_state hax_global;
54
55static void hax_vcpu_sync_state(CPUArchState *env, int modified);
56static int hax_arch_get_registers(CPUArchState *env);
57
Vincent Palatin47c1c8c2017-01-10 11:59:56 +010058int valid_hax_tunnel_size(uint16_t size)
59{
60 return size >= sizeof(struct hax_tunnel);
61}
62
63hax_fd hax_vcpu_get_fd(CPUArchState *env)
64{
Philippe Mathieu-Daudéf861b3f2023-03-29 19:01:49 +020065 AccelCPUState *vcpu = env_cpu(env)->accel;
Vincent Palatin47c1c8c2017-01-10 11:59:56 +010066 if (!vcpu) {
67 return HAX_INVALID_FD;
68 }
69 return vcpu->fd;
70}
71
72static int hax_get_capability(struct hax_state *hax)
73{
74 int ret;
75 struct hax_capabilityinfo capinfo, *cap = &capinfo;
76
77 ret = hax_capability(hax, cap);
78 if (ret) {
79 return ret;
80 }
81
82 if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) {
83 if (cap->winfo & HAX_CAP_FAILREASON_VT) {
84 DPRINTF
85 ("VTX feature is not enabled, HAX driver will not work.\n");
86 } else if (cap->winfo & HAX_CAP_FAILREASON_NX) {
87 DPRINTF
88 ("NX feature is not enabled, HAX driver will not work.\n");
89 }
90 return -ENXIO;
91
92 }
93
94 if (!(cap->winfo & HAX_CAP_UG)) {
95 fprintf(stderr, "UG mode is not supported by the hardware.\n");
96 return -ENOTSUP;
97 }
98
Yu Ning7a5235c2018-01-12 18:22:35 +080099 hax->supports_64bit_ramblock = !!(cap->winfo & HAX_CAP_64BIT_RAMBLOCK);
100
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100101 if (cap->wstatus & HAX_CAP_MEMQUOTA) {
102 if (cap->mem_quota < hax->mem_quota) {
103 fprintf(stderr, "The VM memory needed exceeds the driver limit.\n");
104 return -ENOSPC;
105 }
106 }
107 return 0;
108}
109
110static int hax_version_support(struct hax_state *hax)
111{
112 int ret;
113 struct hax_module_version version;
114
115 ret = hax_mod_version(hax, &version);
116 if (ret < 0) {
117 return 0;
118 }
119
120 if (hax_min_version > version.cur_version) {
121 fprintf(stderr, "Incompatible HAX module version %d,",
122 version.cur_version);
123 fprintf(stderr, "requires minimum version %d\n", hax_min_version);
124 return 0;
125 }
126 if (hax_cur_version < version.compat_version) {
127 fprintf(stderr, "Incompatible QEMU HAX API version %x,",
128 hax_cur_version);
129 fprintf(stderr, "requires minimum HAX API version %x\n",
130 version.compat_version);
131 return 0;
132 }
133
134 return 1;
135}
136
137int hax_vcpu_create(int id)
138{
Philippe Mathieu-Daudéf861b3f2023-03-29 19:01:49 +0200139 AccelCPUState *vcpu = NULL;
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100140 int ret;
141
142 if (!hax_global.vm) {
143 fprintf(stderr, "vcpu %x created failed, vm is null\n", id);
144 return -1;
145 }
146
147 if (hax_global.vm->vcpus[id]) {
148 fprintf(stderr, "vcpu %x allocated already\n", id);
149 return 0;
150 }
151
Philippe Mathieu-Daudéf861b3f2023-03-29 19:01:49 +0200152 vcpu = g_new0(AccelCPUState, 1);
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100153
154 ret = hax_host_create_vcpu(hax_global.vm->fd, id);
155 if (ret) {
156 fprintf(stderr, "Failed to create vcpu %x\n", id);
157 goto error;
158 }
159
160 vcpu->vcpu_id = id;
161 vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id);
162 if (hax_invalid_fd(vcpu->fd)) {
163 fprintf(stderr, "Failed to open the vcpu\n");
164 ret = -ENODEV;
165 goto error;
166 }
167
168 hax_global.vm->vcpus[id] = vcpu;
169
170 ret = hax_host_setup_vcpu_channel(vcpu);
171 if (ret) {
172 fprintf(stderr, "Invalid hax tunnel size\n");
173 ret = -EINVAL;
174 goto error;
175 }
176 return 0;
177
178 error:
179 /* vcpu and tunnel will be closed automatically */
180 if (vcpu && !hax_invalid_fd(vcpu->fd)) {
181 hax_close_fd(vcpu->fd);
182 }
183
184 hax_global.vm->vcpus[id] = NULL;
185 g_free(vcpu);
186 return -1;
187}
188
189int hax_vcpu_destroy(CPUState *cpu)
190{
Philippe Mathieu-Daudéf861b3f2023-03-29 19:01:49 +0200191 AccelCPUState *vcpu = cpu->accel;
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100192
193 if (!hax_global.vm) {
194 fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id);
195 return -1;
196 }
197
198 if (!vcpu) {
199 return 0;
200 }
201
202 /*
Li Qiang1d4f78e2018-11-27 02:00:12 -0800203 * 1. The hax_tunnel is also destroyed when vcpu is destroyed
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100204 * 2. close fd will cause hax module vcpu be cleaned
205 */
206 hax_close_fd(vcpu->fd);
207 hax_global.vm->vcpus[vcpu->vcpu_id] = NULL;
Philippe Mathieu-Daudé43477342023-04-05 09:29:59 +0200208#ifdef _WIN32
Philippe Mathieu-Daudé642ce522023-03-29 19:13:09 +0200209 CloseHandle(vcpu->hThread);
Philippe Mathieu-Daudé43477342023-04-05 09:29:59 +0200210#endif
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100211 g_free(vcpu);
Philippe Mathieu-Daudé6ecd2cd2023-03-29 18:56:35 +0200212 cpu->accel = NULL;
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100213 return 0;
214}
215
216int hax_init_vcpu(CPUState *cpu)
217{
218 int ret;
219
220 ret = hax_vcpu_create(cpu->cpu_index);
221 if (ret < 0) {
222 fprintf(stderr, "Failed to create HAX vcpu\n");
223 exit(-1);
224 }
225
Philippe Mathieu-Daudé6ecd2cd2023-03-29 18:56:35 +0200226 cpu->accel = hax_global.vm->vcpus[cpu->cpu_index];
Sergio Andres Gomez Del Real99f31832017-06-18 14:11:01 -0500227 cpu->vcpu_dirty = true;
Philippe Mathieu-Daudé95e862d2022-03-05 23:35:19 +0100228 qemu_register_reset(hax_reset_vcpu_state, cpu->env_ptr);
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100229
230 return ret;
231}
232
WangBowen34a09502020-05-09 11:59:52 +0800233struct hax_vm *hax_vm_create(struct hax_state *hax, int max_cpus)
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100234{
235 struct hax_vm *vm;
WangBowen34a09502020-05-09 11:59:52 +0800236 int vm_id = 0, ret, i;
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100237
238 if (hax_invalid_fd(hax->fd)) {
239 return NULL;
240 }
241
242 if (hax->vm) {
243 return hax->vm;
244 }
245
WangBowen34a09502020-05-09 11:59:52 +0800246 if (max_cpus > HAX_MAX_VCPU) {
247 fprintf(stderr, "Maximum VCPU number QEMU supported is %d\n", HAX_MAX_VCPU);
248 return NULL;
249 }
250
Li Qiang090627a2018-11-26 01:56:19 -0800251 vm = g_new0(struct hax_vm, 1);
252
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100253 ret = hax_host_create_vm(hax, &vm_id);
254 if (ret) {
255 fprintf(stderr, "Failed to create vm %x\n", ret);
256 goto error;
257 }
258 vm->id = vm_id;
259 vm->fd = hax_host_open_vm(hax, vm_id);
260 if (hax_invalid_fd(vm->fd)) {
261 fprintf(stderr, "Failed to open vm %d\n", vm_id);
262 goto error;
263 }
264
WangBowen34a09502020-05-09 11:59:52 +0800265 vm->numvcpus = max_cpus;
Philippe Mathieu-Daudéf861b3f2023-03-29 19:01:49 +0200266 vm->vcpus = g_new0(AccelCPUState *, vm->numvcpus);
WangBowen34a09502020-05-09 11:59:52 +0800267 for (i = 0; i < vm->numvcpus; i++) {
268 vm->vcpus[i] = NULL;
269 }
270
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100271 hax->vm = vm;
272 return vm;
273
274 error:
275 g_free(vm);
276 hax->vm = NULL;
277 return NULL;
278}
279
280int hax_vm_destroy(struct hax_vm *vm)
281{
282 int i;
283
WangBowen34a09502020-05-09 11:59:52 +0800284 for (i = 0; i < vm->numvcpus; i++)
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100285 if (vm->vcpus[i]) {
286 fprintf(stderr, "VCPU should be cleaned before vm clean\n");
287 return -1;
288 }
289 hax_close_fd(vm->fd);
WangBowen34a09502020-05-09 11:59:52 +0800290 vm->numvcpus = 0;
291 g_free(vm->vcpus);
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100292 g_free(vm);
293 hax_global.vm = NULL;
294 return 0;
295}
296
WangBowen34a09502020-05-09 11:59:52 +0800297static int hax_init(ram_addr_t ram_size, int max_cpus)
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100298{
299 struct hax_state *hax = NULL;
300 struct hax_qemu_version qversion;
301 int ret;
302
303 hax = &hax_global;
304
305 memset(hax, 0, sizeof(struct hax_state));
306 hax->mem_quota = ram_size;
307
308 hax->fd = hax_mod_open();
309 if (hax_invalid_fd(hax->fd)) {
310 hax->fd = 0;
311 ret = -ENODEV;
312 goto error;
313 }
314
315 ret = hax_get_capability(hax);
316
317 if (ret) {
318 if (ret != -ENOSPC) {
319 ret = -EINVAL;
320 }
321 goto error;
322 }
323
324 if (!hax_version_support(hax)) {
325 ret = -EINVAL;
326 goto error;
327 }
328
WangBowen34a09502020-05-09 11:59:52 +0800329 hax->vm = hax_vm_create(hax, max_cpus);
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100330 if (!hax->vm) {
331 fprintf(stderr, "Failed to create HAX VM\n");
332 ret = -EINVAL;
333 goto error;
334 }
335
336 hax_memory_init();
337
338 qversion.cur_version = hax_cur_version;
339 qversion.min_version = hax_min_version;
340 hax_notify_qemu_version(hax->vm->fd, &qversion);
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100341
342 return ret;
343 error:
344 if (hax->vm) {
345 hax_vm_destroy(hax->vm);
346 }
347 if (hax->fd) {
348 hax_mod_close(hax);
349 }
350
351 return ret;
352}
353
354static int hax_accel_init(MachineState *ms)
355{
WangBowen34a09502020-05-09 11:59:52 +0800356 int ret = hax_init(ms->ram_size, (int)ms->smp.max_cpus);
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100357
358 if (ret && (ret != -ENOSPC)) {
359 fprintf(stderr, "No accelerator found.\n");
360 } else {
361 fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n",
362 !ret ? "working" : "not working",
363 !ret ? "fast virt" : "emulation");
Thomas Huth90c167a2023-01-26 13:10:34 +0100364 fprintf(stdout,
365 "NOTE: HAX is deprecated and will be removed in a future release.\n"
366 " Use 'whpx' (on Windows) or 'hvf' (on macOS) instead.\n");
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100367 }
368 return ret;
369}
370
371static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft)
372{
373 if (hft->direction < 2) {
Philippe Mathieu-Daudé0eeef0a2020-02-20 10:51:35 +0100374 cpu_physical_memory_rw(hft->gpa, &hft->value, hft->size,
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100375 hft->direction);
376 } else {
377 /*
378 * HAX API v4 supports transferring data between two MMIO addresses,
379 * hft->gpa and hft->gpa2 (instructions such as MOVS require this):
380 * hft->direction == 2: gpa ==> gpa2
381 */
382 uint64_t value;
Philippe Mathieu-Daudéadeefe02020-02-19 20:34:58 +0100383 cpu_physical_memory_read(hft->gpa, &value, hft->size);
384 cpu_physical_memory_write(hft->gpa2, &value, hft->size);
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100385 }
386
387 return 0;
388}
389
390static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
391 int direction, int size, int count, void *buffer)
392{
393 uint8_t *ptr;
394 int i;
395 MemTxAttrs attrs = { 0 };
396
397 if (!df) {
Markus Armbruster3d558332022-11-23 14:38:11 +0100398 ptr = buffer;
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100399 } else {
400 ptr = buffer + size * count - size;
401 }
402 for (i = 0; i < count; i++) {
403 address_space_rw(&address_space_io, port, attrs,
404 ptr, size, direction == HAX_EXIT_IO_OUT);
405 if (!df) {
406 ptr += size;
407 } else {
408 ptr -= size;
409 }
410 }
411
412 return 0;
413}
414
415static int hax_vcpu_interrupt(CPUArchState *env)
416{
Richard Henderson29a0af62019-03-22 16:07:18 -0700417 CPUState *cpu = env_cpu(env);
Philippe Mathieu-Daudéf861b3f2023-03-29 19:01:49 +0200418 AccelCPUState *vcpu = cpu->accel;
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100419 struct hax_tunnel *ht = vcpu->tunnel;
420
421 /*
422 * Try to inject an interrupt if the guest can accept it
423 * Unlike KVM, HAX kernel check for the eflags, instead of qemu
424 */
425 if (ht->ready_for_interrupt_injection &&
426 (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
427 int irq;
428
429 irq = cpu_get_pic_interrupt(env);
430 if (irq >= 0) {
431 hax_inject_interrupt(env, irq);
432 cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
433 }
434 }
435
436 /* If we have an interrupt but the guest is not ready to receive an
437 * interrupt, request an interrupt window exit. This will
438 * cause a return to userspace as soon as the guest is ready to
439 * receive interrupts. */
440 if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
441 ht->request_interrupt_window = 1;
442 } else {
443 ht->request_interrupt_window = 0;
444 }
445 return 0;
446}
447
448void hax_raise_event(CPUState *cpu)
449{
Philippe Mathieu-Daudéf861b3f2023-03-29 19:01:49 +0200450 AccelCPUState *vcpu = cpu->accel;
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100451
452 if (!vcpu) {
453 return;
454 }
455 vcpu->tunnel->user_event_pending = 1;
456}
457
458/*
459 * Ask hax kernel module to run the CPU for us till:
460 * 1. Guest crash or shutdown
461 * 2. Need QEMU's emulation like guest execute MMIO instruction
462 * 3. Guest execute HLT
463 * 4. QEMU have Signal/event pending
464 * 5. An unknown VMX exit happens
465 */
466static int hax_vcpu_hax_exec(CPUArchState *env)
467{
468 int ret = 0;
Richard Henderson29a0af62019-03-22 16:07:18 -0700469 CPUState *cpu = env_cpu(env);
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100470 X86CPU *x86_cpu = X86_CPU(cpu);
Philippe Mathieu-Daudéf861b3f2023-03-29 19:01:49 +0200471 AccelCPUState *vcpu = cpu->accel;
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100472 struct hax_tunnel *ht = vcpu->tunnel;
473
474 if (!hax_enabled()) {
475 DPRINTF("Trying to vcpu execute at eip:" TARGET_FMT_lx "\n", env->eip);
476 return 0;
477 }
478
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100479 if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
480 cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
481 apic_poll_irq(x86_cpu->apic_state);
482 }
483
Colin Xu6f38dca2019-06-10 10:19:39 +0800484 /* After a vcpu is halted (either because it is an AP and has just been
485 * reset, or because it has executed the HLT instruction), it will not be
486 * run (hax_vcpu_run()) until it is unhalted. The next few if blocks check
487 * for events that may change the halted state of this vcpu:
488 * a) Maskable interrupt, when RFLAGS.IF is 1;
489 * Note: env->eflags may not reflect the current RFLAGS state, because
490 * it is not updated after each hax_vcpu_run(). We cannot afford
491 * to fail to recognize any unhalt-by-maskable-interrupt event
492 * (in which case the vcpu will halt forever), and yet we cannot
493 * afford the overhead of hax_vcpu_sync_state(). The current
494 * solution is to err on the side of caution and have the HLT
495 * handler (see case HAX_EXIT_HLT below) unconditionally set the
496 * IF_MASK bit in env->eflags, which, in effect, disables the
497 * RFLAGS.IF check.
498 * b) NMI;
499 * c) INIT signal;
500 * d) SIPI signal.
501 */
502 if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
503 (env->eflags & IF_MASK)) ||
504 (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
505 cpu->halted = 0;
506 }
507
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100508 if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
509 DPRINTF("\nhax_vcpu_hax_exec: handling INIT for %d\n",
510 cpu->cpu_index);
511 do_cpu_init(x86_cpu);
512 hax_vcpu_sync_state(env, 1);
513 }
514
515 if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
516 DPRINTF("hax_vcpu_hax_exec: handling SIPI for %d\n",
517 cpu->cpu_index);
518 hax_vcpu_sync_state(env, 0);
519 do_cpu_sipi(x86_cpu);
520 hax_vcpu_sync_state(env, 1);
521 }
522
Colin Xu6f38dca2019-06-10 10:19:39 +0800523 if (cpu->halted) {
524 /* If this vcpu is halted, we must not ask HAXM to run it. Instead, we
525 * break out of hax_smp_cpu_exec() as if this vcpu had executed HLT.
526 * That way, this vcpu thread will be trapped in qemu_wait_io_event(),
527 * until the vcpu is unhalted.
528 */
529 cpu->exception_index = EXCP_HLT;
530 return 0;
531 }
532
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100533 do {
534 int hax_ret;
535
536 if (cpu->exit_request) {
537 ret = 1;
538 break;
539 }
540
541 hax_vcpu_interrupt(env);
542
543 qemu_mutex_unlock_iothread();
Paolo Bonzini457e0352017-06-08 16:49:05 +0200544 cpu_exec_start(cpu);
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100545 hax_ret = hax_vcpu_run(vcpu);
Paolo Bonzini457e0352017-06-08 16:49:05 +0200546 cpu_exec_end(cpu);
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100547 qemu_mutex_lock_iothread();
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100548
549 /* Simply continue the vcpu_run if system call interrupted */
550 if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
551 DPRINTF("io window interrupted\n");
552 continue;
553 }
554
555 if (hax_ret < 0) {
556 fprintf(stderr, "vcpu run failed for vcpu %x\n", vcpu->vcpu_id);
557 abort();
558 }
559 switch (ht->_exit_status) {
560 case HAX_EXIT_IO:
561 ret = hax_handle_io(env, ht->pio._df, ht->pio._port,
562 ht->pio._direction,
563 ht->pio._size, ht->pio._count, vcpu->iobuf);
564 break;
565 case HAX_EXIT_FAST_MMIO:
566 ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf);
567 break;
568 /* Guest state changed, currently only for shutdown */
569 case HAX_EXIT_STATECHANGE:
570 fprintf(stdout, "VCPU shutdown request\n");
Eric Blakecf83f142017-05-15 16:41:13 -0500571 qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100572 hax_vcpu_sync_state(env, 0);
573 ret = 1;
574 break;
575 case HAX_EXIT_UNKNOWN_VMEXIT:
576 fprintf(stderr, "Unknown VMX exit %x from guest\n",
577 ht->_exit_reason);
Eric Blakecf83f142017-05-15 16:41:13 -0500578 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100579 hax_vcpu_sync_state(env, 0);
Markus Armbruster90c84c52019-04-17 21:18:02 +0200580 cpu_dump_state(cpu, stderr, 0);
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100581 ret = -1;
582 break;
583 case HAX_EXIT_HLT:
584 if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
585 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
586 /* hlt instruction with interrupt disabled is shutdown */
587 env->eflags |= IF_MASK;
588 cpu->halted = 1;
589 cpu->exception_index = EXCP_HLT;
590 ret = 1;
591 }
592 break;
593 /* these situations will continue to hax module */
594 case HAX_EXIT_INTERRUPT:
595 case HAX_EXIT_PAUSED:
596 break;
597 case HAX_EXIT_MMIO:
598 /* Should not happen on UG system */
599 fprintf(stderr, "HAX: unsupported MMIO emulation\n");
600 ret = -1;
601 break;
602 case HAX_EXIT_REAL:
603 /* Should not happen on UG system */
604 fprintf(stderr, "HAX: unimplemented real mode emulation\n");
605 ret = -1;
606 break;
607 default:
608 fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status);
Eric Blakecf83f142017-05-15 16:41:13 -0500609 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100610 hax_vcpu_sync_state(env, 0);
Markus Armbruster90c84c52019-04-17 21:18:02 +0200611 cpu_dump_state(cpu, stderr, 0);
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100612 ret = 1;
613 break;
614 }
615 } while (!ret);
616
617 if (cpu->exit_request) {
618 cpu->exit_request = 0;
619 cpu->exception_index = EXCP_INTERRUPT;
620 }
621 return ret < 0;
622}
623
624static void do_hax_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
625{
626 CPUArchState *env = cpu->env_ptr;
627
628 hax_arch_get_registers(env);
Sergio Andres Gomez Del Real99f31832017-06-18 14:11:01 -0500629 cpu->vcpu_dirty = true;
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100630}
631
632void hax_cpu_synchronize_state(CPUState *cpu)
633{
Sergio Andres Gomez Del Real99f31832017-06-18 14:11:01 -0500634 if (!cpu->vcpu_dirty) {
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100635 run_on_cpu(cpu, do_hax_cpu_synchronize_state, RUN_ON_CPU_NULL);
636 }
637}
638
639static void do_hax_cpu_synchronize_post_reset(CPUState *cpu,
640 run_on_cpu_data arg)
641{
642 CPUArchState *env = cpu->env_ptr;
643
644 hax_vcpu_sync_state(env, 1);
Sergio Andres Gomez Del Real99f31832017-06-18 14:11:01 -0500645 cpu->vcpu_dirty = false;
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100646}
647
648void hax_cpu_synchronize_post_reset(CPUState *cpu)
649{
650 run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
651}
652
653static void do_hax_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
654{
655 CPUArchState *env = cpu->env_ptr;
656
657 hax_vcpu_sync_state(env, 1);
Sergio Andres Gomez Del Real99f31832017-06-18 14:11:01 -0500658 cpu->vcpu_dirty = false;
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100659}
660
661void hax_cpu_synchronize_post_init(CPUState *cpu)
662{
663 run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
664}
665
David Gibson75e972d2017-05-26 14:46:28 +1000666static void do_hax_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
667{
Sergio Andres Gomez Del Real99f31832017-06-18 14:11:01 -0500668 cpu->vcpu_dirty = true;
David Gibson75e972d2017-05-26 14:46:28 +1000669}
670
671void hax_cpu_synchronize_pre_loadvm(CPUState *cpu)
672{
673 run_on_cpu(cpu, do_hax_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
674}
675
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100676int hax_smp_cpu_exec(CPUState *cpu)
677{
Philippe Mathieu-Daudé95e862d2022-03-05 23:35:19 +0100678 CPUArchState *env = cpu->env_ptr;
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100679 int fatal;
680 int ret;
681
682 while (1) {
683 if (cpu->exception_index >= EXCP_INTERRUPT) {
684 ret = cpu->exception_index;
685 cpu->exception_index = -1;
686 break;
687 }
688
689 fatal = hax_vcpu_hax_exec(env);
690
691 if (fatal) {
692 fprintf(stderr, "Unsupported HAX vcpu return\n");
693 abort();
694 }
695 }
696
697 return ret;
698}
699
700static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
701{
702 memset(lhs, 0, sizeof(struct segment_desc_t));
703 lhs->selector = rhs->selector;
704 lhs->base = rhs->base;
705 lhs->limit = rhs->limit;
706 lhs->type = 3;
707 lhs->present = 1;
708 lhs->dpl = 3;
709 lhs->operand_size = 0;
710 lhs->desc = 1;
711 lhs->long_mode = 0;
712 lhs->granularity = 0;
713 lhs->available = 0;
714}
715
716static void get_seg(SegmentCache *lhs, const struct segment_desc_t *rhs)
717{
718 lhs->selector = rhs->selector;
719 lhs->base = rhs->base;
720 lhs->limit = rhs->limit;
721 lhs->flags = (rhs->type << DESC_TYPE_SHIFT)
722 | (rhs->present * DESC_P_MASK)
723 | (rhs->dpl << DESC_DPL_SHIFT)
724 | (rhs->operand_size << DESC_B_SHIFT)
725 | (rhs->desc * DESC_S_MASK)
726 | (rhs->long_mode << DESC_L_SHIFT)
727 | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK);
728}
729
730static void set_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
731{
732 unsigned flags = rhs->flags;
733
734 memset(lhs, 0, sizeof(struct segment_desc_t));
735 lhs->selector = rhs->selector;
736 lhs->base = rhs->base;
737 lhs->limit = rhs->limit;
738 lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
739 lhs->present = (flags & DESC_P_MASK) != 0;
740 lhs->dpl = rhs->selector & 3;
741 lhs->operand_size = (flags >> DESC_B_SHIFT) & 1;
742 lhs->desc = (flags & DESC_S_MASK) != 0;
743 lhs->long_mode = (flags >> DESC_L_SHIFT) & 1;
744 lhs->granularity = (flags & DESC_G_MASK) != 0;
745 lhs->available = (flags & DESC_AVL_MASK) != 0;
746}
747
748static void hax_getput_reg(uint64_t *hax_reg, target_ulong *qemu_reg, int set)
749{
750 target_ulong reg = *hax_reg;
751
752 if (set) {
753 *hax_reg = *qemu_reg;
754 } else {
755 *qemu_reg = reg;
756 }
757}
758
759/* The sregs has been synced with HAX kernel already before this call */
760static int hax_get_segments(CPUArchState *env, struct vcpu_state_t *sregs)
761{
762 get_seg(&env->segs[R_CS], &sregs->_cs);
763 get_seg(&env->segs[R_DS], &sregs->_ds);
764 get_seg(&env->segs[R_ES], &sregs->_es);
765 get_seg(&env->segs[R_FS], &sregs->_fs);
766 get_seg(&env->segs[R_GS], &sregs->_gs);
767 get_seg(&env->segs[R_SS], &sregs->_ss);
768
769 get_seg(&env->tr, &sregs->_tr);
770 get_seg(&env->ldt, &sregs->_ldt);
771 env->idt.limit = sregs->_idt.limit;
772 env->idt.base = sregs->_idt.base;
773 env->gdt.limit = sregs->_gdt.limit;
774 env->gdt.base = sregs->_gdt.base;
775 return 0;
776}
777
778static int hax_set_segments(CPUArchState *env, struct vcpu_state_t *sregs)
779{
780 if ((env->eflags & VM_MASK)) {
781 set_v8086_seg(&sregs->_cs, &env->segs[R_CS]);
782 set_v8086_seg(&sregs->_ds, &env->segs[R_DS]);
783 set_v8086_seg(&sregs->_es, &env->segs[R_ES]);
784 set_v8086_seg(&sregs->_fs, &env->segs[R_FS]);
785 set_v8086_seg(&sregs->_gs, &env->segs[R_GS]);
786 set_v8086_seg(&sregs->_ss, &env->segs[R_SS]);
787 } else {
788 set_seg(&sregs->_cs, &env->segs[R_CS]);
789 set_seg(&sregs->_ds, &env->segs[R_DS]);
790 set_seg(&sregs->_es, &env->segs[R_ES]);
791 set_seg(&sregs->_fs, &env->segs[R_FS]);
792 set_seg(&sregs->_gs, &env->segs[R_GS]);
793 set_seg(&sregs->_ss, &env->segs[R_SS]);
794
795 if (env->cr[0] & CR0_PE_MASK) {
796 /* force ss cpl to cs cpl */
797 sregs->_ss.selector = (sregs->_ss.selector & ~3) |
798 (sregs->_cs.selector & 3);
799 sregs->_ss.dpl = sregs->_ss.selector & 3;
800 }
801 }
802
803 set_seg(&sregs->_tr, &env->tr);
804 set_seg(&sregs->_ldt, &env->ldt);
805 sregs->_idt.limit = env->idt.limit;
806 sregs->_idt.base = env->idt.base;
807 sregs->_gdt.limit = env->gdt.limit;
808 sregs->_gdt.base = env->gdt.base;
809 return 0;
810}
811
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100812static int hax_sync_vcpu_register(CPUArchState *env, int set)
813{
814 struct vcpu_state_t regs;
815 int ret;
816 memset(&regs, 0, sizeof(struct vcpu_state_t));
817
818 if (!set) {
819 ret = hax_sync_vcpu_state(env, &regs, 0);
820 if (ret < 0) {
821 return -1;
822 }
823 }
824
825 /* generic register */
826 hax_getput_reg(&regs._rax, &env->regs[R_EAX], set);
827 hax_getput_reg(&regs._rbx, &env->regs[R_EBX], set);
828 hax_getput_reg(&regs._rcx, &env->regs[R_ECX], set);
829 hax_getput_reg(&regs._rdx, &env->regs[R_EDX], set);
830 hax_getput_reg(&regs._rsi, &env->regs[R_ESI], set);
831 hax_getput_reg(&regs._rdi, &env->regs[R_EDI], set);
832 hax_getput_reg(&regs._rsp, &env->regs[R_ESP], set);
833 hax_getput_reg(&regs._rbp, &env->regs[R_EBP], set);
834#ifdef TARGET_X86_64
835 hax_getput_reg(&regs._r8, &env->regs[8], set);
836 hax_getput_reg(&regs._r9, &env->regs[9], set);
837 hax_getput_reg(&regs._r10, &env->regs[10], set);
838 hax_getput_reg(&regs._r11, &env->regs[11], set);
839 hax_getput_reg(&regs._r12, &env->regs[12], set);
840 hax_getput_reg(&regs._r13, &env->regs[13], set);
841 hax_getput_reg(&regs._r14, &env->regs[14], set);
842 hax_getput_reg(&regs._r15, &env->regs[15], set);
843#endif
844 hax_getput_reg(&regs._rflags, &env->eflags, set);
845 hax_getput_reg(&regs._rip, &env->eip, set);
846
847 if (set) {
848 regs._cr0 = env->cr[0];
849 regs._cr2 = env->cr[2];
850 regs._cr3 = env->cr[3];
851 regs._cr4 = env->cr[4];
852 hax_set_segments(env, &regs);
853 } else {
854 env->cr[0] = regs._cr0;
855 env->cr[2] = regs._cr2;
856 env->cr[3] = regs._cr3;
857 env->cr[4] = regs._cr4;
858 hax_get_segments(env, &regs);
859 }
860
861 if (set) {
862 ret = hax_sync_vcpu_state(env, &regs, 1);
863 if (ret < 0) {
864 return -1;
865 }
866 }
Vincent Palatin47c1c8c2017-01-10 11:59:56 +0100867 return 0;
868}
869
870static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index,
871 uint64_t value)
872{
873 item->entry = index;
874 item->value = value;
875}
876
877static int hax_get_msrs(CPUArchState *env)
878{
879 struct hax_msr_data md;
880 struct vmx_msr *msrs = md.entries;
881 int ret, i, n;
882
883 n = 0;
884 msrs[n++].entry = MSR_IA32_SYSENTER_CS;
885 msrs[n++].entry = MSR_IA32_SYSENTER_ESP;
886 msrs[n++].entry = MSR_IA32_SYSENTER_EIP;
887 msrs[n++].entry = MSR_IA32_TSC;
888#ifdef TARGET_X86_64
889 msrs[n++].entry = MSR_EFER;
890 msrs[n++].entry = MSR_STAR;
891 msrs[n++].entry = MSR_LSTAR;
892 msrs[n++].entry = MSR_CSTAR;
893 msrs[n++].entry = MSR_FMASK;
894 msrs[n++].entry = MSR_KERNELGSBASE;
895#endif
896 md.nr_msr = n;
897 ret = hax_sync_msr(env, &md, 0);
898 if (ret < 0) {
899 return ret;
900 }
901
902 for (i = 0; i < md.done; i++) {
903 switch (msrs[i].entry) {
904 case MSR_IA32_SYSENTER_CS:
905 env->sysenter_cs = msrs[i].value;
906 break;
907 case MSR_IA32_SYSENTER_ESP:
908 env->sysenter_esp = msrs[i].value;
909 break;
910 case MSR_IA32_SYSENTER_EIP:
911 env->sysenter_eip = msrs[i].value;
912 break;
913 case MSR_IA32_TSC:
914 env->tsc = msrs[i].value;
915 break;
916#ifdef TARGET_X86_64
917 case MSR_EFER:
918 env->efer = msrs[i].value;
919 break;
920 case MSR_STAR:
921 env->star = msrs[i].value;
922 break;
923 case MSR_LSTAR:
924 env->lstar = msrs[i].value;
925 break;
926 case MSR_CSTAR:
927 env->cstar = msrs[i].value;
928 break;
929 case MSR_FMASK:
930 env->fmask = msrs[i].value;
931 break;
932 case MSR_KERNELGSBASE:
933 env->kernelgsbase = msrs[i].value;
934 break;
935#endif
936 }
937 }
938
939 return 0;
940}
941
942static int hax_set_msrs(CPUArchState *env)
943{
944 struct hax_msr_data md;
945 struct vmx_msr *msrs;
946 msrs = md.entries;
947 int n = 0;
948
949 memset(&md, 0, sizeof(struct hax_msr_data));
950 hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
951 hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
952 hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
953 hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
954#ifdef TARGET_X86_64
955 hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer);
956 hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
957 hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
958 hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
959 hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
960 hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
961#endif
962 md.nr_msr = n;
963 md.done = 0;
964
965 return hax_sync_msr(env, &md, 1);
966}
967
968static int hax_get_fpu(CPUArchState *env)
969{
970 struct fx_layout fpu;
971 int i, ret;
972
973 ret = hax_sync_fpu(env, &fpu, 0);
974 if (ret < 0) {
975 return ret;
976 }
977
978 env->fpstt = (fpu.fsw >> 11) & 7;
979 env->fpus = fpu.fsw;
980 env->fpuc = fpu.fcw;
981 for (i = 0; i < 8; ++i) {
982 env->fptags[i] = !((fpu.ftw >> i) & 1);
983 }
984 memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs));
985
986 for (i = 0; i < 8; i++) {
987 env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.mmx_1[i][0]);
988 env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.mmx_1[i][8]);
989 if (CPU_NB_REGS > 8) {
990 env->xmm_regs[i + 8].ZMM_Q(0) = ldq_p(&fpu.mmx_2[i][0]);
991 env->xmm_regs[i + 8].ZMM_Q(1) = ldq_p(&fpu.mmx_2[i][8]);
992 }
993 }
994 env->mxcsr = fpu.mxcsr;
995
996 return 0;
997}
998
999static int hax_set_fpu(CPUArchState *env)
1000{
1001 struct fx_layout fpu;
1002 int i;
1003
1004 memset(&fpu, 0, sizeof(fpu));
1005 fpu.fsw = env->fpus & ~(7 << 11);
1006 fpu.fsw |= (env->fpstt & 7) << 11;
1007 fpu.fcw = env->fpuc;
1008
1009 for (i = 0; i < 8; ++i) {
1010 fpu.ftw |= (!env->fptags[i]) << i;
1011 }
1012
1013 memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs));
1014 for (i = 0; i < 8; i++) {
1015 stq_p(&fpu.mmx_1[i][0], env->xmm_regs[i].ZMM_Q(0));
1016 stq_p(&fpu.mmx_1[i][8], env->xmm_regs[i].ZMM_Q(1));
1017 if (CPU_NB_REGS > 8) {
1018 stq_p(&fpu.mmx_2[i][0], env->xmm_regs[i + 8].ZMM_Q(0));
1019 stq_p(&fpu.mmx_2[i][8], env->xmm_regs[i + 8].ZMM_Q(1));
1020 }
1021 }
1022
1023 fpu.mxcsr = env->mxcsr;
1024
1025 return hax_sync_fpu(env, &fpu, 1);
1026}
1027
1028static int hax_arch_get_registers(CPUArchState *env)
1029{
1030 int ret;
1031
1032 ret = hax_sync_vcpu_register(env, 0);
1033 if (ret < 0) {
1034 return ret;
1035 }
1036
1037 ret = hax_get_fpu(env);
1038 if (ret < 0) {
1039 return ret;
1040 }
1041
1042 ret = hax_get_msrs(env);
1043 if (ret < 0) {
1044 return ret;
1045 }
1046
Tao Wudf16af82018-01-10 11:50:56 -08001047 x86_update_hflags(env);
Vincent Palatin47c1c8c2017-01-10 11:59:56 +01001048 return 0;
1049}
1050
1051static int hax_arch_set_registers(CPUArchState *env)
1052{
1053 int ret;
1054 ret = hax_sync_vcpu_register(env, 1);
1055
1056 if (ret < 0) {
1057 fprintf(stderr, "Failed to sync vcpu reg\n");
1058 return ret;
1059 }
1060 ret = hax_set_fpu(env);
1061 if (ret < 0) {
1062 fprintf(stderr, "FPU failed\n");
1063 return ret;
1064 }
1065 ret = hax_set_msrs(env);
1066 if (ret < 0) {
1067 fprintf(stderr, "MSR failed\n");
1068 return ret;
1069 }
1070
1071 return 0;
1072}
1073
1074static void hax_vcpu_sync_state(CPUArchState *env, int modified)
1075{
1076 if (hax_enabled()) {
1077 if (modified) {
1078 hax_arch_set_registers(env);
1079 } else {
1080 hax_arch_get_registers(env);
1081 }
1082 }
1083}
1084
1085/*
1086 * much simpler than kvm, at least in first stage because:
1087 * We don't need consider the device pass-through, we don't need
1088 * consider the framebuffer, and we may even remove the bios at all
1089 */
1090int hax_sync_vcpus(void)
1091{
1092 if (hax_enabled()) {
1093 CPUState *cpu;
1094
1095 cpu = first_cpu;
1096 if (!cpu) {
1097 return 0;
1098 }
1099
1100 for (; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1101 int ret;
1102
1103 ret = hax_arch_set_registers(cpu->env_ptr);
1104 if (ret < 0) {
1105 return ret;
1106 }
1107 }
1108 }
1109
1110 return 0;
1111}
1112
1113void hax_reset_vcpu_state(void *opaque)
1114{
1115 CPUState *cpu;
1116 for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) {
Philippe Mathieu-Daudé6ecd2cd2023-03-29 18:56:35 +02001117 cpu->accel->tunnel->user_event_pending = 0;
1118 cpu->accel->tunnel->ready_for_interrupt_injection = 0;
Vincent Palatin47c1c8c2017-01-10 11:59:56 +01001119 }
1120}
1121
1122static void hax_accel_class_init(ObjectClass *oc, void *data)
1123{
1124 AccelClass *ac = ACCEL_CLASS(oc);
1125 ac->name = "HAX";
1126 ac->init_machine = hax_accel_init;
1127 ac->allowed = &hax_allowed;
1128}
1129
1130static const TypeInfo hax_accel_type = {
1131 .name = ACCEL_CLASS_NAME("hax"),
1132 .parent = TYPE_ACCEL,
1133 .class_init = hax_accel_class_init,
1134};
1135
1136static void hax_type_init(void)
1137{
1138 type_register_static(&hax_accel_type);
1139}
1140
1141type_init(hax_type_init);