blob: 14193ef3f921e923b79f15d89abec40d3a421f0c [file] [log] [blame]
/*
* QEMU Xen emulation: Shared/overlay pages support
*
* Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Authors: David Woodhouse <dwmw2@infradead.org>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "qemu/host-utils.h"
#include "qemu/module.h"
#include "qemu/main-loop.h"
#include "qemu/cutils.h"
#include "qapi/error.h"
#include "qom/object.h"
#include "migration/vmstate.h"
#include "hw/sysbus.h"
#include "hw/xen/xen.h"
#include "xen_overlay.h"
#include "xen_evtchn.h"
#include "xen_xenstore.h"
#include "sysemu/kvm.h"
#include "sysemu/kvm_xen.h"
#include "hw/xen/interface/io/xs_wire.h"
#include "hw/xen/interface/event_channel.h"
#define TYPE_XEN_XENSTORE "xen-xenstore"
OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE)
#define XEN_PAGE_SHIFT 12
#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT)
#define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
#define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t))
#define XENSTORE_HEADER_SIZE ((unsigned int)sizeof(struct xsd_sockmsg))
struct XenXenstoreState {
/*< private >*/
SysBusDevice busdev;
/*< public >*/
MemoryRegion xenstore_page;
struct xenstore_domain_interface *xs;
uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
uint8_t rsp_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
uint32_t req_offset;
uint32_t rsp_offset;
bool rsp_pending;
bool fatal_error;
evtchn_port_t guest_port;
evtchn_port_t be_port;
struct xenevtchn_handle *eh;
};
struct XenXenstoreState *xen_xenstore_singleton;
static void xen_xenstore_event(void *opaque);
static void xen_xenstore_realize(DeviceState *dev, Error **errp)
{
XenXenstoreState *s = XEN_XENSTORE(dev);
if (xen_mode != XEN_EMULATE) {
error_setg(errp, "Xen xenstore support is for Xen emulation");
return;
}
memory_region_init_ram(&s->xenstore_page, OBJECT(dev), "xen:xenstore_page",
XEN_PAGE_SIZE, &error_abort);
memory_region_set_enabled(&s->xenstore_page, true);
s->xs = memory_region_get_ram_ptr(&s->xenstore_page);
memset(s->xs, 0, XEN_PAGE_SIZE);
/* We can't map it this early as KVM isn't ready */
xen_xenstore_singleton = s;
s->eh = xen_be_evtchn_open();
if (!s->eh) {
error_setg(errp, "Xenstore evtchn port init failed");
return;
}
aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true,
xen_xenstore_event, NULL, NULL, NULL, s);
}
static bool xen_xenstore_is_needed(void *opaque)
{
return xen_mode == XEN_EMULATE;
}
static int xen_xenstore_pre_save(void *opaque)
{
XenXenstoreState *s = opaque;
if (s->eh) {
s->guest_port = xen_be_evtchn_get_guest_port(s->eh);
}
return 0;
}
static int xen_xenstore_post_load(void *opaque, int ver)
{
XenXenstoreState *s = opaque;
/*
* As qemu/dom0, rebind to the guest's port. The Windows drivers may
* unbind the XenStore evtchn and rebind to it, having obtained the
* "remote" port through EVTCHNOP_status. In the case that migration
* occurs while it's unbound, the "remote" port needs to be the same
* as before so that the guest can find it, but should remain unbound.
*/
if (s->guest_port) {
int be_port = xen_be_evtchn_bind_interdomain(s->eh, xen_domid,
s->guest_port);
if (be_port < 0) {
return be_port;
}
s->be_port = be_port;
}
return 0;
}
static const VMStateDescription xen_xenstore_vmstate = {
.name = "xen_xenstore",
.version_id = 1,
.minimum_version_id = 1,
.needed = xen_xenstore_is_needed,
.pre_save = xen_xenstore_pre_save,
.post_load = xen_xenstore_post_load,
.fields = (VMStateField[]) {
VMSTATE_UINT8_ARRAY(req_data, XenXenstoreState,
sizeof_field(XenXenstoreState, req_data)),
VMSTATE_UINT8_ARRAY(rsp_data, XenXenstoreState,
sizeof_field(XenXenstoreState, rsp_data)),
VMSTATE_UINT32(req_offset, XenXenstoreState),
VMSTATE_UINT32(rsp_offset, XenXenstoreState),
VMSTATE_BOOL(rsp_pending, XenXenstoreState),
VMSTATE_UINT32(guest_port, XenXenstoreState),
VMSTATE_BOOL(fatal_error, XenXenstoreState),
VMSTATE_END_OF_LIST()
}
};
static void xen_xenstore_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->realize = xen_xenstore_realize;
dc->vmsd = &xen_xenstore_vmstate;
}
static const TypeInfo xen_xenstore_info = {
.name = TYPE_XEN_XENSTORE,
.parent = TYPE_SYS_BUS_DEVICE,
.instance_size = sizeof(XenXenstoreState),
.class_init = xen_xenstore_class_init,
};
void xen_xenstore_create(void)
{
DeviceState *dev = sysbus_create_simple(TYPE_XEN_XENSTORE, -1, NULL);
xen_xenstore_singleton = XEN_XENSTORE(dev);
/*
* Defer the init (xen_xenstore_reset()) until KVM is set up and the
* overlay page can be mapped.
*/
}
static void xen_xenstore_register_types(void)
{
type_register_static(&xen_xenstore_info);
}
type_init(xen_xenstore_register_types)
uint16_t xen_xenstore_get_port(void)
{
XenXenstoreState *s = xen_xenstore_singleton;
if (!s) {
return 0;
}
return s->guest_port;
}
static bool req_pending(XenXenstoreState *s)
{
struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
return s->req_offset == XENSTORE_HEADER_SIZE + req->len;
}
static void reset_req(XenXenstoreState *s)
{
memset(s->req_data, 0, sizeof(s->req_data));
s->req_offset = 0;
}
static void reset_rsp(XenXenstoreState *s)
{
s->rsp_pending = false;
memset(s->rsp_data, 0, sizeof(s->rsp_data));
s->rsp_offset = 0;
}
static void process_req(XenXenstoreState *s)
{
struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
const char enosys[] = "ENOSYS";
assert(req_pending(s));
assert(!s->rsp_pending);
rsp->type = XS_ERROR;
rsp->req_id = req->req_id;
rsp->tx_id = req->tx_id;
rsp->len = sizeof(enosys);
memcpy((void *)&rsp[1], enosys, sizeof(enosys));
s->rsp_pending = true;
reset_req(s);
}
static unsigned int copy_from_ring(XenXenstoreState *s, uint8_t *ptr,
unsigned int len)
{
if (!len) {
return 0;
}
XENSTORE_RING_IDX prod = qatomic_read(&s->xs->req_prod);
XENSTORE_RING_IDX cons = qatomic_read(&s->xs->req_cons);
unsigned int copied = 0;
/* Ensure the ring contents don't cross the req_prod access. */
smp_rmb();
while (len) {
unsigned int avail = prod - cons;
unsigned int offset = MASK_XENSTORE_IDX(cons);
unsigned int copylen = avail;
if (avail > XENSTORE_RING_SIZE) {
error_report("XenStore ring handling error");
s->fatal_error = true;
break;
} else if (avail == 0) {
break;
}
if (copylen > len) {
copylen = len;
}
if (copylen > XENSTORE_RING_SIZE - offset) {
copylen = XENSTORE_RING_SIZE - offset;
}
memcpy(ptr, &s->xs->req[offset], copylen);
copied += copylen;
ptr += copylen;
len -= copylen;
cons += copylen;
}
/*
* Not sure this ever mattered except on Alpha, but this barrier
* is to ensure that the update to req_cons is globally visible
* only after we have consumed all the data from the ring, and we
* don't end up seeing data written to the ring *after* the other
* end sees the update and writes more to the ring. Xen's own
* xenstored has the same barrier here (although with no comment
* at all, obviously, because it's Xen code).
*/
smp_mb();
qatomic_set(&s->xs->req_cons, cons);
return copied;
}
static unsigned int copy_to_ring(XenXenstoreState *s, uint8_t *ptr,
unsigned int len)
{
if (!len) {
return 0;
}
XENSTORE_RING_IDX cons = qatomic_read(&s->xs->rsp_cons);
XENSTORE_RING_IDX prod = qatomic_read(&s->xs->rsp_prod);
unsigned int copied = 0;
/*
* This matches the barrier in copy_to_ring() (or the guest's
* equivalent) betweem writing the data to the ring and updating
* rsp_prod. It protects against the pathological case (which
* again I think never happened except on Alpha) where our
* subsequent writes to the ring could *cross* the read of
* rsp_cons and the guest could see the new data when it was
* intending to read the old.
*/
smp_mb();
while (len) {
unsigned int avail = cons + XENSTORE_RING_SIZE - prod;
unsigned int offset = MASK_XENSTORE_IDX(prod);
unsigned int copylen = len;
if (avail > XENSTORE_RING_SIZE) {
error_report("XenStore ring handling error");
s->fatal_error = true;
break;
} else if (avail == 0) {
break;
}
if (copylen > avail) {
copylen = avail;
}
if (copylen > XENSTORE_RING_SIZE - offset) {
copylen = XENSTORE_RING_SIZE - offset;
}
memcpy(&s->xs->rsp[offset], ptr, copylen);
copied += copylen;
ptr += copylen;
len -= copylen;
prod += copylen;
}
/* Ensure the ring contents are seen before rsp_prod update. */
smp_wmb();
qatomic_set(&s->xs->rsp_prod, prod);
return copied;
}
static unsigned int get_req(XenXenstoreState *s)
{
unsigned int copied = 0;
if (s->fatal_error) {
return 0;
}
assert(!req_pending(s));
if (s->req_offset < XENSTORE_HEADER_SIZE) {
void *ptr = s->req_data + s->req_offset;
unsigned int len = XENSTORE_HEADER_SIZE;
unsigned int copylen = copy_from_ring(s, ptr, len);
copied += copylen;
s->req_offset += copylen;
}
if (s->req_offset >= XENSTORE_HEADER_SIZE) {
struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
if (req->len > (uint32_t)XENSTORE_PAYLOAD_MAX) {
error_report("Illegal XenStore request");
s->fatal_error = true;
return 0;
}
void *ptr = s->req_data + s->req_offset;
unsigned int len = XENSTORE_HEADER_SIZE + req->len - s->req_offset;
unsigned int copylen = copy_from_ring(s, ptr, len);
copied += copylen;
s->req_offset += copylen;
}
return copied;
}
static unsigned int put_rsp(XenXenstoreState *s)
{
if (s->fatal_error) {
return 0;
}
assert(s->rsp_pending);
struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
assert(s->rsp_offset < XENSTORE_HEADER_SIZE + rsp->len);
void *ptr = s->rsp_data + s->rsp_offset;
unsigned int len = XENSTORE_HEADER_SIZE + rsp->len - s->rsp_offset;
unsigned int copylen = copy_to_ring(s, ptr, len);
s->rsp_offset += copylen;
/* Have we produced a complete response? */
if (s->rsp_offset == XENSTORE_HEADER_SIZE + rsp->len) {
reset_rsp(s);
}
return copylen;
}
static void xen_xenstore_event(void *opaque)
{
XenXenstoreState *s = opaque;
evtchn_port_t port = xen_be_evtchn_pending(s->eh);
unsigned int copied_to, copied_from;
bool processed, notify = false;
if (port != s->be_port) {
return;
}
/* We know this is a no-op. */
xen_be_evtchn_unmask(s->eh, port);
do {
copied_to = copied_from = 0;
processed = false;
if (s->rsp_pending) {
copied_to = put_rsp(s);
}
if (!req_pending(s)) {
copied_from = get_req(s);
}
if (req_pending(s) && !s->rsp_pending) {
process_req(s);
processed = true;
}
notify |= copied_to || copied_from;
} while (copied_to || copied_from || processed);
if (notify) {
xen_be_evtchn_notify(s->eh, s->be_port);
}
}
static void alloc_guest_port(XenXenstoreState *s)
{
struct evtchn_alloc_unbound alloc = {
.dom = DOMID_SELF,
.remote_dom = DOMID_QEMU,
};
if (!xen_evtchn_alloc_unbound_op(&alloc)) {
s->guest_port = alloc.port;
}
}
int xen_xenstore_reset(void)
{
XenXenstoreState *s = xen_xenstore_singleton;
int err;
if (!s) {
return -ENOTSUP;
}
s->req_offset = s->rsp_offset = 0;
s->rsp_pending = false;
if (!memory_region_is_mapped(&s->xenstore_page)) {
uint64_t gpa = XEN_SPECIAL_PFN(XENSTORE) << TARGET_PAGE_BITS;
xen_overlay_do_map_page(&s->xenstore_page, gpa);
}
alloc_guest_port(s);
/*
* As qemu/dom0, bind to the guest's port. For incoming migration, this
* will be unbound as the guest's evtchn table is overwritten. We then
* rebind to the correct guest port in xen_xenstore_post_load().
*/
err = xen_be_evtchn_bind_interdomain(s->eh, xen_domid, s->guest_port);
if (err < 0) {
return err;
}
s->be_port = err;
return 0;
}