| /* |
| * Hyper-V guest/hypervisor interaction |
| * |
| * Copyright (c) 2015-2018 Virtuozzo International GmbH. |
| * |
| * This work is licensed under the terms of the GNU GPL, version 2 or later. |
| * See the COPYING file in the top-level directory. |
| */ |
| |
| #include "qemu/osdep.h" |
| #include "qemu/main-loop.h" |
| #include "qemu/module.h" |
| #include "qapi/error.h" |
| #include "exec/address-spaces.h" |
| #include "sysemu/kvm.h" |
| #include "qemu/bitops.h" |
| #include "qemu/error-report.h" |
| #include "qemu/queue.h" |
| #include "qemu/rcu.h" |
| #include "qemu/rcu_queue.h" |
| #include "hw/hyperv/hyperv.h" |
| |
| typedef struct SynICState { |
| DeviceState parent_obj; |
| |
| CPUState *cs; |
| |
| bool enabled; |
| hwaddr msg_page_addr; |
| hwaddr event_page_addr; |
| MemoryRegion msg_page_mr; |
| MemoryRegion event_page_mr; |
| struct hyperv_message_page *msg_page; |
| struct hyperv_event_flags_page *event_page; |
| } SynICState; |
| |
| #define TYPE_SYNIC "hyperv-synic" |
| #define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC) |
| |
| static SynICState *get_synic(CPUState *cs) |
| { |
| return SYNIC(object_resolve_path_component(OBJECT(cs), "synic")); |
| } |
| |
| static void synic_update(SynICState *synic, bool enable, |
| hwaddr msg_page_addr, hwaddr event_page_addr) |
| { |
| |
| synic->enabled = enable; |
| if (synic->msg_page_addr != msg_page_addr) { |
| if (synic->msg_page_addr) { |
| memory_region_del_subregion(get_system_memory(), |
| &synic->msg_page_mr); |
| } |
| if (msg_page_addr) { |
| memory_region_add_subregion(get_system_memory(), msg_page_addr, |
| &synic->msg_page_mr); |
| } |
| synic->msg_page_addr = msg_page_addr; |
| } |
| if (synic->event_page_addr != event_page_addr) { |
| if (synic->event_page_addr) { |
| memory_region_del_subregion(get_system_memory(), |
| &synic->event_page_mr); |
| } |
| if (event_page_addr) { |
| memory_region_add_subregion(get_system_memory(), event_page_addr, |
| &synic->event_page_mr); |
| } |
| synic->event_page_addr = event_page_addr; |
| } |
| } |
| |
| void hyperv_synic_update(CPUState *cs, bool enable, |
| hwaddr msg_page_addr, hwaddr event_page_addr) |
| { |
| SynICState *synic = get_synic(cs); |
| |
| if (!synic) { |
| return; |
| } |
| |
| synic_update(synic, enable, msg_page_addr, event_page_addr); |
| } |
| |
| static void synic_realize(DeviceState *dev, Error **errp) |
| { |
| Object *obj = OBJECT(dev); |
| SynICState *synic = SYNIC(dev); |
| char *msgp_name, *eventp_name; |
| uint32_t vp_index; |
| |
| /* memory region names have to be globally unique */ |
| vp_index = hyperv_vp_index(synic->cs); |
| msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index); |
| eventp_name = g_strdup_printf("synic-%u-event-page", vp_index); |
| |
| memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name, |
| sizeof(*synic->msg_page), &error_abort); |
| memory_region_init_ram(&synic->event_page_mr, obj, eventp_name, |
| sizeof(*synic->event_page), &error_abort); |
| synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr); |
| synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr); |
| |
| g_free(msgp_name); |
| g_free(eventp_name); |
| } |
| static void synic_reset(DeviceState *dev) |
| { |
| SynICState *synic = SYNIC(dev); |
| memset(synic->msg_page, 0, sizeof(*synic->msg_page)); |
| memset(synic->event_page, 0, sizeof(*synic->event_page)); |
| synic_update(synic, false, 0, 0); |
| } |
| |
| static void synic_class_init(ObjectClass *klass, void *data) |
| { |
| DeviceClass *dc = DEVICE_CLASS(klass); |
| |
| dc->realize = synic_realize; |
| dc->reset = synic_reset; |
| dc->user_creatable = false; |
| } |
| |
| void hyperv_synic_add(CPUState *cs) |
| { |
| Object *obj; |
| SynICState *synic; |
| |
| obj = object_new(TYPE_SYNIC); |
| synic = SYNIC(obj); |
| synic->cs = cs; |
| object_property_add_child(OBJECT(cs), "synic", obj, &error_abort); |
| object_unref(obj); |
| object_property_set_bool(obj, true, "realized", &error_abort); |
| } |
| |
| void hyperv_synic_reset(CPUState *cs) |
| { |
| SynICState *synic = get_synic(cs); |
| |
| if (synic) { |
| device_legacy_reset(DEVICE(synic)); |
| } |
| } |
| |
| static const TypeInfo synic_type_info = { |
| .name = TYPE_SYNIC, |
| .parent = TYPE_DEVICE, |
| .instance_size = sizeof(SynICState), |
| .class_init = synic_class_init, |
| }; |
| |
| static void synic_register_types(void) |
| { |
| type_register_static(&synic_type_info); |
| } |
| |
| type_init(synic_register_types) |
| |
| /* |
| * KVM has its own message producers (SynIC timers). To guarantee |
| * serialization with both KVM vcpu and the guest cpu, the messages are first |
| * staged in an intermediate area and then posted to the SynIC message page in |
| * the vcpu thread. |
| */ |
| typedef struct HvSintStagedMessage { |
| /* message content staged by hyperv_post_msg */ |
| struct hyperv_message msg; |
| /* callback + data (r/o) to complete the processing in a BH */ |
| HvSintMsgCb cb; |
| void *cb_data; |
| /* message posting status filled by cpu_post_msg */ |
| int status; |
| /* passing the buck: */ |
| enum { |
| /* initial state */ |
| HV_STAGED_MSG_FREE, |
| /* |
| * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE -> |
| * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu |
| */ |
| HV_STAGED_MSG_BUSY, |
| /* |
| * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot, |
| * notify the guest, records the status, marks the posting done (BUSY |
| * -> POSTED), and schedules sint_msg_bh BH |
| */ |
| HV_STAGED_MSG_POSTED, |
| /* |
| * sint_msg_bh (BH) verifies that the posting is done, runs the |
| * callback, and starts over (POSTED -> FREE) |
| */ |
| } state; |
| } HvSintStagedMessage; |
| |
| struct HvSintRoute { |
| uint32_t sint; |
| SynICState *synic; |
| int gsi; |
| EventNotifier sint_set_notifier; |
| EventNotifier sint_ack_notifier; |
| |
| HvSintStagedMessage *staged_msg; |
| |
| unsigned refcount; |
| }; |
| |
| static CPUState *hyperv_find_vcpu(uint32_t vp_index) |
| { |
| CPUState *cs = qemu_get_cpu(vp_index); |
| assert(hyperv_vp_index(cs) == vp_index); |
| return cs; |
| } |
| |
| /* |
| * BH to complete the processing of a staged message. |
| */ |
| static void sint_msg_bh(void *opaque) |
| { |
| HvSintRoute *sint_route = opaque; |
| HvSintStagedMessage *staged_msg = sint_route->staged_msg; |
| |
| if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) { |
| /* status nor ready yet (spurious ack from guest?), ignore */ |
| return; |
| } |
| |
| staged_msg->cb(staged_msg->cb_data, staged_msg->status); |
| staged_msg->status = 0; |
| |
| /* staged message processing finished, ready to start over */ |
| atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE); |
| /* drop the reference taken in hyperv_post_msg */ |
| hyperv_sint_route_unref(sint_route); |
| } |
| |
| /* |
| * Worker to transfer the message from the staging area into the SynIC message |
| * page in vcpu context. |
| */ |
| static void cpu_post_msg(CPUState *cs, run_on_cpu_data data) |
| { |
| HvSintRoute *sint_route = data.host_ptr; |
| HvSintStagedMessage *staged_msg = sint_route->staged_msg; |
| SynICState *synic = sint_route->synic; |
| struct hyperv_message *dst_msg; |
| bool wait_for_sint_ack = false; |
| |
| assert(staged_msg->state == HV_STAGED_MSG_BUSY); |
| |
| if (!synic->enabled || !synic->msg_page_addr) { |
| staged_msg->status = -ENXIO; |
| goto posted; |
| } |
| |
| dst_msg = &synic->msg_page->slot[sint_route->sint]; |
| |
| if (dst_msg->header.message_type != HV_MESSAGE_NONE) { |
| dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING; |
| staged_msg->status = -EAGAIN; |
| wait_for_sint_ack = true; |
| } else { |
| memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg)); |
| staged_msg->status = hyperv_sint_route_set_sint(sint_route); |
| } |
| |
| memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page)); |
| |
| posted: |
| atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED); |
| /* |
| * Notify the msg originator of the progress made; if the slot was busy we |
| * set msg_pending flag in it so it will be the guest who will do EOM and |
| * trigger the notification from KVM via sint_ack_notifier |
| */ |
| if (!wait_for_sint_ack) { |
| aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, |
| sint_route); |
| } |
| } |
| |
| /* |
| * Post a Hyper-V message to the staging area, for delivery to guest in the |
| * vcpu thread. |
| */ |
| int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg) |
| { |
| HvSintStagedMessage *staged_msg = sint_route->staged_msg; |
| |
| assert(staged_msg); |
| |
| /* grab the staging area */ |
| if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE, |
| HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) { |
| return -EAGAIN; |
| } |
| |
| memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg)); |
| |
| /* hold a reference on sint_route until the callback is finished */ |
| hyperv_sint_route_ref(sint_route); |
| |
| /* schedule message posting attempt in vcpu thread */ |
| async_run_on_cpu(sint_route->synic->cs, cpu_post_msg, |
| RUN_ON_CPU_HOST_PTR(sint_route)); |
| return 0; |
| } |
| |
| static void sint_ack_handler(EventNotifier *notifier) |
| { |
| HvSintRoute *sint_route = container_of(notifier, HvSintRoute, |
| sint_ack_notifier); |
| event_notifier_test_and_clear(notifier); |
| |
| /* |
| * the guest consumed the previous message so complete the current one with |
| * -EAGAIN and let the msg originator retry |
| */ |
| aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route); |
| } |
| |
| /* |
| * Set given event flag for a given sint on a given vcpu, and signal the sint. |
| */ |
| int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno) |
| { |
| int ret; |
| SynICState *synic = sint_route->synic; |
| unsigned long *flags, set_mask; |
| unsigned set_idx; |
| |
| if (eventno > HV_EVENT_FLAGS_COUNT) { |
| return -EINVAL; |
| } |
| if (!synic->enabled || !synic->event_page_addr) { |
| return -ENXIO; |
| } |
| |
| set_idx = BIT_WORD(eventno); |
| set_mask = BIT_MASK(eventno); |
| flags = synic->event_page->slot[sint_route->sint].flags; |
| |
| if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) { |
| memory_region_set_dirty(&synic->event_page_mr, 0, |
| sizeof(*synic->event_page)); |
| ret = hyperv_sint_route_set_sint(sint_route); |
| } else { |
| ret = 0; |
| } |
| return ret; |
| } |
| |
| HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, |
| HvSintMsgCb cb, void *cb_data) |
| { |
| HvSintRoute *sint_route; |
| EventNotifier *ack_notifier; |
| int r, gsi; |
| CPUState *cs; |
| SynICState *synic; |
| |
| cs = hyperv_find_vcpu(vp_index); |
| if (!cs) { |
| return NULL; |
| } |
| |
| synic = get_synic(cs); |
| if (!synic) { |
| return NULL; |
| } |
| |
| sint_route = g_new0(HvSintRoute, 1); |
| r = event_notifier_init(&sint_route->sint_set_notifier, false); |
| if (r) { |
| goto err; |
| } |
| |
| |
| ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL; |
| if (ack_notifier) { |
| sint_route->staged_msg = g_new0(HvSintStagedMessage, 1); |
| sint_route->staged_msg->cb = cb; |
| sint_route->staged_msg->cb_data = cb_data; |
| |
| r = event_notifier_init(ack_notifier, false); |
| if (r) { |
| goto err_sint_set_notifier; |
| } |
| |
| event_notifier_set_handler(ack_notifier, sint_ack_handler); |
| } |
| |
| gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint); |
| if (gsi < 0) { |
| goto err_gsi; |
| } |
| |
| r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, |
| &sint_route->sint_set_notifier, |
| ack_notifier, gsi); |
| if (r) { |
| goto err_irqfd; |
| } |
| sint_route->gsi = gsi; |
| sint_route->synic = synic; |
| sint_route->sint = sint; |
| sint_route->refcount = 1; |
| |
| return sint_route; |
| |
| err_irqfd: |
| kvm_irqchip_release_virq(kvm_state, gsi); |
| err_gsi: |
| if (ack_notifier) { |
| event_notifier_set_handler(ack_notifier, NULL); |
| event_notifier_cleanup(ack_notifier); |
| g_free(sint_route->staged_msg); |
| } |
| err_sint_set_notifier: |
| event_notifier_cleanup(&sint_route->sint_set_notifier); |
| err: |
| g_free(sint_route); |
| |
| return NULL; |
| } |
| |
| void hyperv_sint_route_ref(HvSintRoute *sint_route) |
| { |
| sint_route->refcount++; |
| } |
| |
| void hyperv_sint_route_unref(HvSintRoute *sint_route) |
| { |
| if (!sint_route) { |
| return; |
| } |
| |
| assert(sint_route->refcount > 0); |
| |
| if (--sint_route->refcount) { |
| return; |
| } |
| |
| kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, |
| &sint_route->sint_set_notifier, |
| sint_route->gsi); |
| kvm_irqchip_release_virq(kvm_state, sint_route->gsi); |
| if (sint_route->staged_msg) { |
| event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); |
| event_notifier_cleanup(&sint_route->sint_ack_notifier); |
| g_free(sint_route->staged_msg); |
| } |
| event_notifier_cleanup(&sint_route->sint_set_notifier); |
| g_free(sint_route); |
| } |
| |
| int hyperv_sint_route_set_sint(HvSintRoute *sint_route) |
| { |
| return event_notifier_set(&sint_route->sint_set_notifier); |
| } |
| |
| typedef struct MsgHandler { |
| struct rcu_head rcu; |
| QLIST_ENTRY(MsgHandler) link; |
| uint32_t conn_id; |
| HvMsgHandler handler; |
| void *data; |
| } MsgHandler; |
| |
| typedef struct EventFlagHandler { |
| struct rcu_head rcu; |
| QLIST_ENTRY(EventFlagHandler) link; |
| uint32_t conn_id; |
| EventNotifier *notifier; |
| } EventFlagHandler; |
| |
| static QLIST_HEAD(, MsgHandler) msg_handlers; |
| static QLIST_HEAD(, EventFlagHandler) event_flag_handlers; |
| static QemuMutex handlers_mutex; |
| |
| static void __attribute__((constructor)) hv_init(void) |
| { |
| QLIST_INIT(&msg_handlers); |
| QLIST_INIT(&event_flag_handlers); |
| qemu_mutex_init(&handlers_mutex); |
| } |
| |
| int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data) |
| { |
| int ret; |
| MsgHandler *mh; |
| |
| qemu_mutex_lock(&handlers_mutex); |
| QLIST_FOREACH(mh, &msg_handlers, link) { |
| if (mh->conn_id == conn_id) { |
| if (handler) { |
| ret = -EEXIST; |
| } else { |
| QLIST_REMOVE_RCU(mh, link); |
| g_free_rcu(mh, rcu); |
| ret = 0; |
| } |
| goto unlock; |
| } |
| } |
| |
| if (handler) { |
| mh = g_new(MsgHandler, 1); |
| mh->conn_id = conn_id; |
| mh->handler = handler; |
| mh->data = data; |
| QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link); |
| ret = 0; |
| } else { |
| ret = -ENOENT; |
| } |
| unlock: |
| qemu_mutex_unlock(&handlers_mutex); |
| return ret; |
| } |
| |
| uint16_t hyperv_hcall_post_message(uint64_t param, bool fast) |
| { |
| uint16_t ret; |
| hwaddr len; |
| struct hyperv_post_message_input *msg; |
| MsgHandler *mh; |
| |
| if (fast) { |
| return HV_STATUS_INVALID_HYPERCALL_CODE; |
| } |
| if (param & (__alignof__(*msg) - 1)) { |
| return HV_STATUS_INVALID_ALIGNMENT; |
| } |
| |
| len = sizeof(*msg); |
| msg = cpu_physical_memory_map(param, &len, 0); |
| if (len < sizeof(*msg)) { |
| ret = HV_STATUS_INSUFFICIENT_MEMORY; |
| goto unmap; |
| } |
| if (msg->payload_size > sizeof(msg->payload)) { |
| ret = HV_STATUS_INVALID_HYPERCALL_INPUT; |
| goto unmap; |
| } |
| |
| ret = HV_STATUS_INVALID_CONNECTION_ID; |
| WITH_RCU_READ_LOCK_GUARD() { |
| QLIST_FOREACH_RCU(mh, &msg_handlers, link) { |
| if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) { |
| ret = mh->handler(msg, mh->data); |
| break; |
| } |
| } |
| } |
| |
| unmap: |
| cpu_physical_memory_unmap(msg, len, 0, 0); |
| return ret; |
| } |
| |
| static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) |
| { |
| int ret; |
| EventFlagHandler *handler; |
| |
| qemu_mutex_lock(&handlers_mutex); |
| QLIST_FOREACH(handler, &event_flag_handlers, link) { |
| if (handler->conn_id == conn_id) { |
| if (notifier) { |
| ret = -EEXIST; |
| } else { |
| QLIST_REMOVE_RCU(handler, link); |
| g_free_rcu(handler, rcu); |
| ret = 0; |
| } |
| goto unlock; |
| } |
| } |
| |
| if (notifier) { |
| handler = g_new(EventFlagHandler, 1); |
| handler->conn_id = conn_id; |
| handler->notifier = notifier; |
| QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link); |
| ret = 0; |
| } else { |
| ret = -ENOENT; |
| } |
| unlock: |
| qemu_mutex_unlock(&handlers_mutex); |
| return ret; |
| } |
| |
| static bool process_event_flags_userspace; |
| |
| int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) |
| { |
| if (!process_event_flags_userspace && |
| !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) { |
| process_event_flags_userspace = true; |
| |
| warn_report("Hyper-V event signaling is not supported by this kernel; " |
| "using slower userspace hypercall processing"); |
| } |
| |
| if (!process_event_flags_userspace) { |
| struct kvm_hyperv_eventfd hvevfd = { |
| .conn_id = conn_id, |
| .fd = notifier ? event_notifier_get_fd(notifier) : -1, |
| .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN, |
| }; |
| |
| return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd); |
| } |
| return set_event_flag_handler(conn_id, notifier); |
| } |
| |
| uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast) |
| { |
| EventFlagHandler *handler; |
| |
| if (unlikely(!fast)) { |
| hwaddr addr = param; |
| |
| if (addr & (__alignof__(addr) - 1)) { |
| return HV_STATUS_INVALID_ALIGNMENT; |
| } |
| |
| param = ldq_phys(&address_space_memory, addr); |
| } |
| |
| /* |
| * Per spec, bits 32-47 contain the extra "flag number". However, we |
| * have no use for it, and in all known usecases it is zero, so just |
| * report lookup failure if it isn't. |
| */ |
| if (param & 0xffff00000000ULL) { |
| return HV_STATUS_INVALID_PORT_ID; |
| } |
| /* remaining bits are reserved-zero */ |
| if (param & ~HV_CONNECTION_ID_MASK) { |
| return HV_STATUS_INVALID_HYPERCALL_INPUT; |
| } |
| |
| RCU_READ_LOCK_GUARD(); |
| QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) { |
| if (handler->conn_id == param) { |
| event_notifier_set(handler->notifier); |
| return 0; |
| } |
| } |
| return HV_STATUS_INVALID_CONNECTION_ID; |
| } |