| /* |
| * QEMU Xen emulation: Grant table support |
| * |
| * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| * |
| * Authors: David Woodhouse <dwmw2@infradead.org> |
| * |
| * This work is licensed under the terms of the GNU GPL, version 2 or later. |
| * See the COPYING file in the top-level directory. |
| */ |
| |
| #include "qemu/osdep.h" |
| #include "qemu/host-utils.h" |
| #include "qemu/module.h" |
| #include "qemu/lockable.h" |
| #include "qemu/main-loop.h" |
| #include "qapi/error.h" |
| #include "qom/object.h" |
| #include "exec/target_page.h" |
| #include "exec/address-spaces.h" |
| #include "migration/vmstate.h" |
| |
| #include "hw/sysbus.h" |
| #include "hw/xen/xen.h" |
| #include "hw/xen/xen_backend_ops.h" |
| #include "xen_overlay.h" |
| #include "xen_gnttab.h" |
| |
| #include "sysemu/kvm.h" |
| #include "sysemu/kvm_xen.h" |
| |
| #include "hw/xen/interface/memory.h" |
| #include "hw/xen/interface/grant_table.h" |
| |
| #define TYPE_XEN_GNTTAB "xen-gnttab" |
| OBJECT_DECLARE_SIMPLE_TYPE(XenGnttabState, XEN_GNTTAB) |
| |
| #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t)) |
| |
| static struct gnttab_backend_ops emu_gnttab_backend_ops; |
| |
| struct XenGnttabState { |
| /*< private >*/ |
| SysBusDevice busdev; |
| /*< public >*/ |
| |
| QemuMutex gnt_lock; |
| |
| uint32_t nr_frames; |
| uint32_t max_frames; |
| |
| union { |
| grant_entry_v1_t *v1; |
| /* Theoretically, v2 support could be added here. */ |
| } entries; |
| |
| MemoryRegion gnt_frames; |
| MemoryRegion *gnt_aliases; |
| uint64_t *gnt_frame_gpas; |
| |
| uint8_t *map_track; |
| }; |
| |
| struct XenGnttabState *xen_gnttab_singleton; |
| |
| static void xen_gnttab_realize(DeviceState *dev, Error **errp) |
| { |
| XenGnttabState *s = XEN_GNTTAB(dev); |
| int i; |
| |
| if (xen_mode != XEN_EMULATE) { |
| error_setg(errp, "Xen grant table support is for Xen emulation"); |
| return; |
| } |
| s->max_frames = kvm_xen_get_gnttab_max_frames(); |
| memory_region_init_ram(&s->gnt_frames, OBJECT(dev), "xen:grant_table", |
| XEN_PAGE_SIZE * s->max_frames, &error_abort); |
| memory_region_set_enabled(&s->gnt_frames, true); |
| s->entries.v1 = memory_region_get_ram_ptr(&s->gnt_frames); |
| |
| /* Create individual page-sizes aliases for overlays */ |
| s->gnt_aliases = (void *)g_new0(MemoryRegion, s->max_frames); |
| s->gnt_frame_gpas = (void *)g_new(uint64_t, s->max_frames); |
| for (i = 0; i < s->max_frames; i++) { |
| memory_region_init_alias(&s->gnt_aliases[i], OBJECT(dev), |
| NULL, &s->gnt_frames, |
| i * XEN_PAGE_SIZE, XEN_PAGE_SIZE); |
| s->gnt_frame_gpas[i] = INVALID_GPA; |
| } |
| |
| s->nr_frames = 0; |
| memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames); |
| s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access; |
| s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE); |
| |
| qemu_mutex_init(&s->gnt_lock); |
| |
| xen_gnttab_singleton = s; |
| |
| s->map_track = g_new0(uint8_t, s->max_frames * ENTRIES_PER_FRAME_V1); |
| |
| xen_gnttab_ops = &emu_gnttab_backend_ops; |
| } |
| |
| static int xen_gnttab_post_load(void *opaque, int version_id) |
| { |
| XenGnttabState *s = XEN_GNTTAB(opaque); |
| uint32_t i; |
| |
| for (i = 0; i < s->nr_frames; i++) { |
| if (s->gnt_frame_gpas[i] != INVALID_GPA) { |
| xen_overlay_do_map_page(&s->gnt_aliases[i], s->gnt_frame_gpas[i]); |
| } |
| } |
| return 0; |
| } |
| |
| static bool xen_gnttab_is_needed(void *opaque) |
| { |
| return xen_mode == XEN_EMULATE; |
| } |
| |
| static const VMStateDescription xen_gnttab_vmstate = { |
| .name = "xen_gnttab", |
| .version_id = 1, |
| .minimum_version_id = 1, |
| .needed = xen_gnttab_is_needed, |
| .post_load = xen_gnttab_post_load, |
| .fields = (VMStateField[]) { |
| VMSTATE_UINT32(nr_frames, XenGnttabState), |
| VMSTATE_VARRAY_UINT32(gnt_frame_gpas, XenGnttabState, nr_frames, 0, |
| vmstate_info_uint64, uint64_t), |
| VMSTATE_END_OF_LIST() |
| } |
| }; |
| |
| static void xen_gnttab_class_init(ObjectClass *klass, void *data) |
| { |
| DeviceClass *dc = DEVICE_CLASS(klass); |
| |
| dc->realize = xen_gnttab_realize; |
| dc->vmsd = &xen_gnttab_vmstate; |
| } |
| |
| static const TypeInfo xen_gnttab_info = { |
| .name = TYPE_XEN_GNTTAB, |
| .parent = TYPE_SYS_BUS_DEVICE, |
| .instance_size = sizeof(XenGnttabState), |
| .class_init = xen_gnttab_class_init, |
| }; |
| |
| void xen_gnttab_create(void) |
| { |
| xen_gnttab_singleton = XEN_GNTTAB(sysbus_create_simple(TYPE_XEN_GNTTAB, |
| -1, NULL)); |
| } |
| |
| static void xen_gnttab_register_types(void) |
| { |
| type_register_static(&xen_gnttab_info); |
| } |
| |
| type_init(xen_gnttab_register_types) |
| |
| int xen_gnttab_map_page(uint64_t idx, uint64_t gfn) |
| { |
| XenGnttabState *s = xen_gnttab_singleton; |
| uint64_t gpa = gfn << XEN_PAGE_SHIFT; |
| |
| if (!s) { |
| return -ENOTSUP; |
| } |
| |
| if (idx >= s->max_frames) { |
| return -EINVAL; |
| } |
| |
| QEMU_IOTHREAD_LOCK_GUARD(); |
| QEMU_LOCK_GUARD(&s->gnt_lock); |
| |
| xen_overlay_do_map_page(&s->gnt_aliases[idx], gpa); |
| |
| s->gnt_frame_gpas[idx] = gpa; |
| |
| if (s->nr_frames <= idx) { |
| s->nr_frames = idx + 1; |
| } |
| |
| return 0; |
| } |
| |
| int xen_gnttab_set_version_op(struct gnttab_set_version *set) |
| { |
| int ret; |
| |
| switch (set->version) { |
| case 1: |
| ret = 0; |
| break; |
| |
| case 2: |
| /* Behave as before set_version was introduced. */ |
| ret = -ENOSYS; |
| break; |
| |
| default: |
| ret = -EINVAL; |
| } |
| |
| set->version = 1; |
| return ret; |
| } |
| |
| int xen_gnttab_get_version_op(struct gnttab_get_version *get) |
| { |
| if (get->dom != DOMID_SELF && get->dom != xen_domid) { |
| return -ESRCH; |
| } |
| |
| get->version = 1; |
| return 0; |
| } |
| |
| int xen_gnttab_query_size_op(struct gnttab_query_size *size) |
| { |
| XenGnttabState *s = xen_gnttab_singleton; |
| |
| if (!s) { |
| return -ENOTSUP; |
| } |
| |
| if (size->dom != DOMID_SELF && size->dom != xen_domid) { |
| size->status = GNTST_bad_domain; |
| return 0; |
| } |
| |
| size->status = GNTST_okay; |
| size->nr_frames = s->nr_frames; |
| size->max_nr_frames = s->max_frames; |
| return 0; |
| } |
| |
| /* Track per-open refs, to allow close() to clean up. */ |
| struct active_ref { |
| MemoryRegionSection mrs; |
| void *virtaddr; |
| uint32_t refcnt; |
| int prot; |
| }; |
| |
| static void gnt_unref(XenGnttabState *s, grant_ref_t ref, |
| MemoryRegionSection *mrs, int prot) |
| { |
| if (mrs && mrs->mr) { |
| if (prot & PROT_WRITE) { |
| memory_region_set_dirty(mrs->mr, mrs->offset_within_region, |
| XEN_PAGE_SIZE); |
| } |
| memory_region_unref(mrs->mr); |
| mrs->mr = NULL; |
| } |
| assert(s->map_track[ref] != 0); |
| |
| if (--s->map_track[ref] == 0) { |
| grant_entry_v1_t *gnt_p = &s->entries.v1[ref]; |
| qatomic_and(&gnt_p->flags, (uint16_t)~(GTF_reading | GTF_writing)); |
| } |
| } |
| |
| static uint64_t gnt_ref(XenGnttabState *s, grant_ref_t ref, int prot) |
| { |
| uint16_t mask = GTF_type_mask | GTF_sub_page; |
| grant_entry_v1_t gnt, *gnt_p; |
| int retries = 0; |
| |
| if (ref >= s->max_frames * ENTRIES_PER_FRAME_V1 || |
| s->map_track[ref] == UINT8_MAX) { |
| return INVALID_GPA; |
| } |
| |
| if (prot & PROT_WRITE) { |
| mask |= GTF_readonly; |
| } |
| |
| gnt_p = &s->entries.v1[ref]; |
| |
| /* |
| * The guest can legitimately be changing the GTF_readonly flag. Allow |
| * that, but don't let a malicious guest cause a livelock. |
| */ |
| for (retries = 0; retries < 5; retries++) { |
| uint16_t new_flags; |
| |
| /* Read the entry before an atomic operation on its flags */ |
| gnt = *(volatile grant_entry_v1_t *)gnt_p; |
| |
| if ((gnt.flags & mask) != GTF_permit_access || |
| gnt.domid != DOMID_QEMU) { |
| return INVALID_GPA; |
| } |
| |
| new_flags = gnt.flags | GTF_reading; |
| if (prot & PROT_WRITE) { |
| new_flags |= GTF_writing; |
| } |
| |
| if (qatomic_cmpxchg(&gnt_p->flags, gnt.flags, new_flags) == gnt.flags) { |
| return (uint64_t)gnt.frame << XEN_PAGE_SHIFT; |
| } |
| } |
| |
| return INVALID_GPA; |
| } |
| |
| struct xengntdev_handle { |
| GHashTable *active_maps; |
| }; |
| |
| static int xen_be_gnttab_set_max_grants(struct xengntdev_handle *xgt, |
| uint32_t nr_grants) |
| { |
| return 0; |
| } |
| |
| static void *xen_be_gnttab_map_refs(struct xengntdev_handle *xgt, |
| uint32_t count, uint32_t domid, |
| uint32_t *refs, int prot) |
| { |
| XenGnttabState *s = xen_gnttab_singleton; |
| struct active_ref *act; |
| |
| if (!s) { |
| errno = ENOTSUP; |
| return NULL; |
| } |
| |
| if (domid != xen_domid) { |
| errno = EINVAL; |
| return NULL; |
| } |
| |
| if (!count || count > 4096) { |
| errno = EINVAL; |
| return NULL; |
| } |
| |
| /* |
| * Making a contiguous mapping from potentially discontiguous grant |
| * references would be... distinctly non-trivial. We don't support it. |
| * Even changing the API to return an array of pointers, one per page, |
| * wouldn't be simple to use in PV backends because some structures |
| * actually cross page boundaries (e.g. 32-bit blkif_response ring |
| * entries are 12 bytes). |
| */ |
| if (count != 1) { |
| errno = EINVAL; |
| return NULL; |
| } |
| |
| QEMU_LOCK_GUARD(&s->gnt_lock); |
| |
| act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0])); |
| if (act) { |
| if ((prot & PROT_WRITE) && !(act->prot & PROT_WRITE)) { |
| if (gnt_ref(s, refs[0], prot) == INVALID_GPA) { |
| return NULL; |
| } |
| act->prot |= PROT_WRITE; |
| } |
| act->refcnt++; |
| } else { |
| uint64_t gpa = gnt_ref(s, refs[0], prot); |
| if (gpa == INVALID_GPA) { |
| errno = EINVAL; |
| return NULL; |
| } |
| |
| act = g_new0(struct active_ref, 1); |
| act->prot = prot; |
| act->refcnt = 1; |
| act->mrs = memory_region_find(get_system_memory(), gpa, XEN_PAGE_SIZE); |
| |
| if (act->mrs.mr && |
| !int128_lt(act->mrs.size, int128_make64(XEN_PAGE_SIZE)) && |
| memory_region_get_ram_addr(act->mrs.mr) != RAM_ADDR_INVALID) { |
| act->virtaddr = qemu_map_ram_ptr(act->mrs.mr->ram_block, |
| act->mrs.offset_within_region); |
| } |
| if (!act->virtaddr) { |
| gnt_unref(s, refs[0], &act->mrs, 0); |
| g_free(act); |
| errno = EINVAL; |
| return NULL; |
| } |
| |
| s->map_track[refs[0]]++; |
| g_hash_table_insert(xgt->active_maps, GINT_TO_POINTER(refs[0]), act); |
| } |
| |
| return act->virtaddr; |
| } |
| |
| static gboolean do_unmap(gpointer key, gpointer value, gpointer user_data) |
| { |
| XenGnttabState *s = user_data; |
| grant_ref_t gref = GPOINTER_TO_INT(key); |
| struct active_ref *act = value; |
| |
| gnt_unref(s, gref, &act->mrs, act->prot); |
| g_free(act); |
| return true; |
| } |
| |
| static int xen_be_gnttab_unmap(struct xengntdev_handle *xgt, |
| void *start_address, uint32_t *refs, |
| uint32_t count) |
| { |
| XenGnttabState *s = xen_gnttab_singleton; |
| struct active_ref *act; |
| |
| if (!s) { |
| return -ENOTSUP; |
| } |
| |
| if (count != 1) { |
| return -EINVAL; |
| } |
| |
| QEMU_LOCK_GUARD(&s->gnt_lock); |
| |
| act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0])); |
| if (!act) { |
| return -ENOENT; |
| } |
| |
| if (act->virtaddr != start_address) { |
| return -EINVAL; |
| } |
| |
| if (!--act->refcnt) { |
| do_unmap(GINT_TO_POINTER(refs[0]), act, s); |
| g_hash_table_remove(xgt->active_maps, GINT_TO_POINTER(refs[0])); |
| } |
| |
| return 0; |
| } |
| |
| /* |
| * This looks a bit like the one for true Xen in xen-operations.c but |
| * in emulation we don't support multi-page mappings. And under Xen we |
| * *want* the multi-page mappings so we have fewer bounces through the |
| * kernel and the hypervisor. So the code paths end up being similar, |
| * but different. |
| */ |
| static int xen_be_gnttab_copy(struct xengntdev_handle *xgt, bool to_domain, |
| uint32_t domid, XenGrantCopySegment *segs, |
| uint32_t nr_segs, Error **errp) |
| { |
| int prot = to_domain ? PROT_WRITE : PROT_READ; |
| unsigned int i; |
| |
| for (i = 0; i < nr_segs; i++) { |
| XenGrantCopySegment *seg = &segs[i]; |
| void *page; |
| uint32_t ref = to_domain ? seg->dest.foreign.ref : |
| seg->source.foreign.ref; |
| |
| page = xen_be_gnttab_map_refs(xgt, 1, domid, &ref, prot); |
| if (!page) { |
| if (errp) { |
| error_setg_errno(errp, errno, |
| "xen_be_gnttab_map_refs failed"); |
| } |
| return -errno; |
| } |
| |
| if (to_domain) { |
| memcpy(page + seg->dest.foreign.offset, seg->source.virt, |
| seg->len); |
| } else { |
| memcpy(seg->dest.virt, page + seg->source.foreign.offset, |
| seg->len); |
| } |
| |
| if (xen_be_gnttab_unmap(xgt, page, &ref, 1)) { |
| if (errp) { |
| error_setg_errno(errp, errno, "xen_be_gnttab_unmap failed"); |
| } |
| return -errno; |
| } |
| } |
| |
| return 0; |
| } |
| |
| static struct xengntdev_handle *xen_be_gnttab_open(void) |
| { |
| struct xengntdev_handle *xgt = g_new0(struct xengntdev_handle, 1); |
| |
| xgt->active_maps = g_hash_table_new(g_direct_hash, g_direct_equal); |
| return xgt; |
| } |
| |
| static int xen_be_gnttab_close(struct xengntdev_handle *xgt) |
| { |
| XenGnttabState *s = xen_gnttab_singleton; |
| |
| if (!s) { |
| return -ENOTSUP; |
| } |
| |
| g_hash_table_foreach_remove(xgt->active_maps, do_unmap, s); |
| g_hash_table_destroy(xgt->active_maps); |
| g_free(xgt); |
| return 0; |
| } |
| |
| static struct gnttab_backend_ops emu_gnttab_backend_ops = { |
| .open = xen_be_gnttab_open, |
| .close = xen_be_gnttab_close, |
| .grant_copy = xen_be_gnttab_copy, |
| .set_max_grants = xen_be_gnttab_set_max_grants, |
| .map_refs = xen_be_gnttab_map_refs, |
| .unmap = xen_be_gnttab_unmap, |
| }; |
| |
| int xen_gnttab_reset(void) |
| { |
| XenGnttabState *s = xen_gnttab_singleton; |
| |
| if (!s) { |
| return -ENOTSUP; |
| } |
| |
| QEMU_LOCK_GUARD(&s->gnt_lock); |
| |
| s->nr_frames = 0; |
| |
| memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames); |
| |
| s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access; |
| s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE); |
| |
| memset(s->map_track, 0, s->max_frames * ENTRIES_PER_FRAME_V1); |
| |
| return 0; |
| } |