target/i386/hax/hax-mem.c - qemu - Git at Google

 /*
  * HAX memory mapping operations
  *
  * Copyright (c) 2015-16 Intel Corporation
  * Copyright 2016 Google, Inc.
  *
  * This work is licensed under the terms of the GNU GPL, version 2.  See
  * the COPYING file in the top-level directory.
  */

 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "exec/address-spaces.h"
 #include "qemu/error-report.h"

 #include "hax-accel-ops.h"
 #include "qemu/queue.h"

 #define DEBUG_HAX_MEM 0

 #define DPRINTF(fmt, ...) \
     do { \
         if (DEBUG_HAX_MEM) { \
             fprintf(stdout, fmt, ## __VA_ARGS__); \
         } \
     } while (0)

 /**
  * HAXMapping: describes a pending guest physical memory mapping
  *
  * @start_pa: a guest physical address marking the start of the region; must be
  *            page-aligned
  * @size: a guest physical address marking the end of the region; must be
  *          page-aligned
  * @host_va: the host virtual address of the start of the mapping
  * @flags: mapping parameters e.g. HAX_RAM_INFO_ROM or HAX_RAM_INFO_INVALID
  * @entry: additional fields for linking #HAXMapping instances together
  */
 typedef struct HAXMapping {
     uint64_t start_pa;
     uint32_t size;
     uint64_t host_va;
     int flags;
     QTAILQ_ENTRY(HAXMapping) entry;
 } HAXMapping;

 /*
  * A doubly-linked list (actually a tail queue) of the pending page mappings
  * for the ongoing memory transaction.
  *
  * It is used to optimize the number of page mapping updates done through the
  * kernel module. For example, it's effective when a driver is digging an MMIO
  * hole inside an existing memory mapping. It will get a deletion of the whole
  * region, then the addition of the 2 remaining RAM areas around the hole and
  * finally the memory transaction commit. During the commit, it will effectively
  * send to the kernel only the removal of the pages from the MMIO hole after
  * having computed locally the result of the deletion and additions.
  */
 static QTAILQ_HEAD(, HAXMapping) mappings =
     QTAILQ_HEAD_INITIALIZER(mappings);

 /**
  * hax_mapping_dump_list: dumps @mappings to stdout (for debugging)
  */
 static void hax_mapping_dump_list(void)
 {
     HAXMapping *entry;

     DPRINTF("%s updates:\n", __func__);
     QTAILQ_FOREACH(entry, &mappings, entry) {
         DPRINTF("\t%c 0x%016" PRIx64 "->0x%016" PRIx64 " VA 0x%016" PRIx64
                 "%s\n", entry->flags & HAX_RAM_INFO_INVALID ? '-' : '+',
                 entry->start_pa, entry->start_pa + entry->size, entry->host_va,
                 entry->flags & HAX_RAM_INFO_ROM ? " ROM" : "");
     }
 }

 static void hax_insert_mapping_before(HAXMapping *next, uint64_t start_pa,
                                       uint32_t size, uint64_t host_va,
                                       uint8_t flags)
 {
     HAXMapping *entry;

     entry = g_malloc0(sizeof(*entry));
     entry->start_pa = start_pa;
     entry->size = size;
     entry->host_va = host_va;
     entry->flags = flags;
     if (!next) {
         QTAILQ_INSERT_TAIL(&mappings, entry, entry);
     } else {
         QTAILQ_INSERT_BEFORE(next, entry, entry);
     }
 }

 static bool hax_mapping_is_opposite(HAXMapping *entry, uint64_t host_va,
                                     uint8_t flags)
 {
     /* removed then added without change for the read-only flag */
     bool nop_flags = (entry->flags ^ flags) == HAX_RAM_INFO_INVALID;

     return (entry->host_va == host_va) && nop_flags;
 }

 static void hax_update_mapping(uint64_t start_pa, uint32_t size,
                                uint64_t host_va, uint8_t flags)
 {
     uint64_t end_pa = start_pa + size;
     HAXMapping *entry, *next;

     QTAILQ_FOREACH_SAFE(entry, &mappings, entry, next) {
         uint32_t chunk_sz;
         if (start_pa >= entry->start_pa + entry->size) {
             continue;
         }
         if (start_pa < entry->start_pa) {
             chunk_sz = end_pa <= entry->start_pa ? size
                                                  : entry->start_pa - start_pa;
             hax_insert_mapping_before(entry, start_pa, chunk_sz,
                                       host_va, flags);
             start_pa += chunk_sz;
             host_va += chunk_sz;
             size -= chunk_sz;
         } else if (start_pa > entry->start_pa) {
             /* split the existing chunk at start_pa */
             chunk_sz = start_pa - entry->start_pa;
             hax_insert_mapping_before(entry, entry->start_pa, chunk_sz,
                                       entry->host_va, entry->flags);
             entry->start_pa += chunk_sz;
             entry->host_va += chunk_sz;
             entry->size -= chunk_sz;
         }
         /* now start_pa == entry->start_pa */
         chunk_sz = MIN(size, entry->size);
         if (chunk_sz) {
             bool nop = hax_mapping_is_opposite(entry, host_va, flags);
             bool partial = chunk_sz < entry->size;
             if (partial) {
                 /* remove the beginning of the existing chunk */
                 entry->start_pa += chunk_sz;
                 entry->host_va += chunk_sz;
                 entry->size -= chunk_sz;
                 if (!nop) {
                     hax_insert_mapping_before(entry, start_pa, chunk_sz,
                                               host_va, flags);
                 }
             } else { /* affects the full mapping entry */
                 if (nop) { /* no change to this mapping, remove it */
                     QTAILQ_REMOVE(&mappings, entry, entry);
                     g_free(entry);
                 } else { /* update mapping properties */
                     entry->host_va = host_va;
                     entry->flags = flags;
                 }
             }
             start_pa += chunk_sz;
             host_va += chunk_sz;
             size -= chunk_sz;
         }
         if (!size) { /* we are done */
             break;
         }
     }
     if (size) { /* add the leftover */
         hax_insert_mapping_before(NULL, start_pa, size, host_va, flags);
     }
 }

 static void hax_process_section(MemoryRegionSection *section, uint8_t flags)
 {
     MemoryRegion *mr = section->mr;
     hwaddr start_pa = section->offset_within_address_space;
     ram_addr_t size = int128_get64(section->size);
     unsigned int delta;
     uint64_t host_va;
     uint32_t max_mapping_size;

     /* We only care about RAM and ROM regions */
     if (!memory_region_is_ram(mr)) {
         if (memory_region_is_romd(mr)) {
             /* HAXM kernel module does not support ROMD yet  */
             warn_report("Ignoring ROMD region 0x%016" PRIx64 "->0x%016" PRIx64,
                         start_pa, start_pa + size);
         }
         return;
     }

     /* Adjust start_pa and size so that they are page-aligned. (Cf
      * kvm_set_phys_mem() in kvm-all.c).
      */
     delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask);
     delta &= ~qemu_real_host_page_mask;
     if (delta > size) {
         return;
     }
     start_pa += delta;
     size -= delta;
     size &= qemu_real_host_page_mask;
     if (!size || (start_pa & ~qemu_real_host_page_mask)) {
         return;
     }

     host_va = (uintptr_t)memory_region_get_ram_ptr(mr)
             + section->offset_within_region + delta;
     if (memory_region_is_rom(section->mr)) {
         flags |= HAX_RAM_INFO_ROM;
     }

     /*
      * The kernel module interface uses 32-bit sizes:
      * https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_set_ram
      *
      * If the mapping size is longer than 32 bits, we can't process it in one
      * call into the kernel. Instead, we split the mapping into smaller ones,
      * and call hax_update_mapping() on each.
      */
     max_mapping_size = UINT32_MAX & qemu_real_host_page_mask;
     while (size > max_mapping_size) {
         hax_update_mapping(start_pa, max_mapping_size, host_va, flags);
         start_pa += max_mapping_size;
         size -= max_mapping_size;
         host_va += max_mapping_size;
     }
     /* Now size <= max_mapping_size */
     hax_update_mapping(start_pa, (uint32_t)size, host_va, flags);
 }

 static void hax_region_add(MemoryListener *listener,
                            MemoryRegionSection *section)
 {
     memory_region_ref(section->mr);
     hax_process_section(section, 0);
 }

 static void hax_region_del(MemoryListener *listener,
                            MemoryRegionSection *section)
 {
     hax_process_section(section, HAX_RAM_INFO_INVALID);
     memory_region_unref(section->mr);
 }

 static void hax_transaction_begin(MemoryListener *listener)
 {
     g_assert(QTAILQ_EMPTY(&mappings));
 }

 static void hax_transaction_commit(MemoryListener *listener)
 {
     if (!QTAILQ_EMPTY(&mappings)) {
         HAXMapping *entry, *next;

         if (DEBUG_HAX_MEM) {
             hax_mapping_dump_list();
         }
         QTAILQ_FOREACH_SAFE(entry, &mappings, entry, next) {
             if (entry->flags & HAX_RAM_INFO_INVALID) {
                 /* for unmapping, put the values expected by the kernel */
                 entry->flags = HAX_RAM_INFO_INVALID;
                 entry->host_va = 0;
             }
             if (hax_set_ram(entry->start_pa, entry->size,
                             entry->host_va, entry->flags)) {
                 fprintf(stderr, "%s: Failed mapping @0x%016" PRIx64 "+0x%"
                         PRIx32 " flags %02x\n", __func__, entry->start_pa,
                         entry->size, entry->flags);
             }
             QTAILQ_REMOVE(&mappings, entry, entry);
             g_free(entry);
         }
     }
 }

 /* currently we fake the dirty bitmap sync, always dirty */
 static void hax_log_sync(MemoryListener *listener,
                          MemoryRegionSection *section)
 {
     MemoryRegion *mr = section->mr;

     if (!memory_region_is_ram(mr)) {
         /* Skip MMIO regions */
         return;
     }

     memory_region_set_dirty(mr, 0, int128_get64(section->size));
 }

 static MemoryListener hax_memory_listener = {
     .begin = hax_transaction_begin,
     .commit = hax_transaction_commit,
     .region_add = hax_region_add,
     .region_del = hax_region_del,
     .log_sync = hax_log_sync,
     .priority = 10,
 };

 static void hax_ram_block_added(RAMBlockNotifier *n, void *host, size_t size)
 {
     /*
      * We must register each RAM block with the HAXM kernel module, or
      * hax_set_ram() will fail for any mapping into the RAM block:
      * https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_alloc_ram
      *
      * Old versions of the HAXM kernel module (< 6.2.0) used to preallocate all
      * host physical pages for the RAM block as part of this registration
      * process, hence the name hax_populate_ram().
      */
     if (hax_populate_ram((uint64_t)(uintptr_t)host, size) < 0) {
         fprintf(stderr, "HAX failed to populate RAM\n");
         abort();
     }
 }

 static struct RAMBlockNotifier hax_ram_notifier = {
     .ram_block_added = hax_ram_block_added,
 };

 void hax_memory_init(void)
 {
     ram_block_notifier_add(&hax_ram_notifier);
     memory_listener_register(&hax_memory_listener, &address_space_memory);
 }
	/*
	* HAX memory mapping operations
	*
	* Copyright (c) 2015-16 Intel Corporation
	* Copyright 2016 Google, Inc.
	*
	* This work is licensed under the terms of the GNU GPL, version 2. See
	* the COPYING file in the top-level directory.
	*/

	#include "qemu/osdep.h"
	#include "cpu.h"
	#include "exec/address-spaces.h"
	#include "qemu/error-report.h"

	#include "hax-accel-ops.h"
	#include "qemu/queue.h"

	#define DEBUG_HAX_MEM 0

	#define DPRINTF(fmt, ...) \
	do { \
	if (DEBUG_HAX_MEM) { \
	fprintf(stdout, fmt, ## __VA_ARGS__); \
	} \
	} while (0)

	/**
	* HAXMapping: describes a pending guest physical memory mapping
	*
	* @start_pa: a guest physical address marking the start of the region; must be
	* page-aligned
	* @size: a guest physical address marking the end of the region; must be
	* page-aligned
	* @host_va: the host virtual address of the start of the mapping
	* @flags: mapping parameters e.g. HAX_RAM_INFO_ROM or HAX_RAM_INFO_INVALID
	* @entry: additional fields for linking #HAXMapping instances together
	*/
	typedef struct HAXMapping {
	uint64_t start_pa;
	uint32_t size;
	uint64_t host_va;
	int flags;
	QTAILQ_ENTRY(HAXMapping) entry;
	} HAXMapping;

	/*
	* A doubly-linked list (actually a tail queue) of the pending page mappings
	* for the ongoing memory transaction.
	*
	* It is used to optimize the number of page mapping updates done through the
	* kernel module. For example, it's effective when a driver is digging an MMIO
	* hole inside an existing memory mapping. It will get a deletion of the whole
	* region, then the addition of the 2 remaining RAM areas around the hole and
	* finally the memory transaction commit. During the commit, it will effectively
	* send to the kernel only the removal of the pages from the MMIO hole after
	* having computed locally the result of the deletion and additions.
	*/
	static QTAILQ_HEAD(, HAXMapping) mappings =
	QTAILQ_HEAD_INITIALIZER(mappings);

	/**
	* hax_mapping_dump_list: dumps @mappings to stdout (for debugging)
	*/
	static void hax_mapping_dump_list(void)
	{
	HAXMapping *entry;

	DPRINTF("%s updates:\n", __func__);
	QTAILQ_FOREACH(entry, &mappings, entry) {
	DPRINTF("\t%c 0x%016" PRIx64 "->0x%016" PRIx64 " VA 0x%016" PRIx64
	"%s\n", entry->flags & HAX_RAM_INFO_INVALID ? '-' : '+',
	entry->start_pa, entry->start_pa + entry->size, entry->host_va,
	entry->flags & HAX_RAM_INFO_ROM ? " ROM" : "");
	}
	}

	static void hax_insert_mapping_before(HAXMapping *next, uint64_t start_pa,
	uint32_t size, uint64_t host_va,
	uint8_t flags)
	{
	HAXMapping *entry;

	entry = g_malloc0(sizeof(*entry));
	entry->start_pa = start_pa;
	entry->size = size;
	entry->host_va = host_va;
	entry->flags = flags;
	if (!next) {
	QTAILQ_INSERT_TAIL(&mappings, entry, entry);
	} else {
	QTAILQ_INSERT_BEFORE(next, entry, entry);
	}
	}

	static bool hax_mapping_is_opposite(HAXMapping *entry, uint64_t host_va,
	uint8_t flags)
	{
	/* removed then added without change for the read-only flag */
	bool nop_flags = (entry->flags ^ flags) == HAX_RAM_INFO_INVALID;

	return (entry->host_va == host_va) && nop_flags;
	}

	static void hax_update_mapping(uint64_t start_pa, uint32_t size,
	uint64_t host_va, uint8_t flags)
	{
	uint64_t end_pa = start_pa + size;
	HAXMapping entry, next;

	QTAILQ_FOREACH_SAFE(entry, &mappings, entry, next) {
	uint32_t chunk_sz;
	if (start_pa >= entry->start_pa + entry->size) {
	continue;
	}
	if (start_pa < entry->start_pa) {
	chunk_sz = end_pa <= entry->start_pa ? size
	: entry->start_pa - start_pa;
	hax_insert_mapping_before(entry, start_pa, chunk_sz,
	host_va, flags);
	start_pa += chunk_sz;
	host_va += chunk_sz;
	size -= chunk_sz;
	} else if (start_pa > entry->start_pa) {
	/* split the existing chunk at start_pa */
	chunk_sz = start_pa - entry->start_pa;
	hax_insert_mapping_before(entry, entry->start_pa, chunk_sz,
	entry->host_va, entry->flags);
	entry->start_pa += chunk_sz;
	entry->host_va += chunk_sz;
	entry->size -= chunk_sz;
	}
	/* now start_pa == entry->start_pa */
	chunk_sz = MIN(size, entry->size);
	if (chunk_sz) {
	bool nop = hax_mapping_is_opposite(entry, host_va, flags);
	bool partial = chunk_sz < entry->size;
	if (partial) {
	/* remove the beginning of the existing chunk */
	entry->start_pa += chunk_sz;
	entry->host_va += chunk_sz;
	entry->size -= chunk_sz;
	if (!nop) {
	hax_insert_mapping_before(entry, start_pa, chunk_sz,
	host_va, flags);
	}
	} else { /* affects the full mapping entry */
	if (nop) { /* no change to this mapping, remove it */
	QTAILQ_REMOVE(&mappings, entry, entry);
	g_free(entry);
	} else { /* update mapping properties */
	entry->host_va = host_va;
	entry->flags = flags;
	}
	}
	start_pa += chunk_sz;
	host_va += chunk_sz;
	size -= chunk_sz;
	}
	if (!size) { /* we are done */
	break;
	}
	}
	if (size) { /* add the leftover */
	hax_insert_mapping_before(NULL, start_pa, size, host_va, flags);
	}
	}

	static void hax_process_section(MemoryRegionSection *section, uint8_t flags)
	{
	MemoryRegion *mr = section->mr;
	hwaddr start_pa = section->offset_within_address_space;
	ram_addr_t size = int128_get64(section->size);
	unsigned int delta;
	uint64_t host_va;
	uint32_t max_mapping_size;

	/* We only care about RAM and ROM regions */
	if (!memory_region_is_ram(mr)) {
	if (memory_region_is_romd(mr)) {
	/* HAXM kernel module does not support ROMD yet */
	warn_report("Ignoring ROMD region 0x%016" PRIx64 "->0x%016" PRIx64,
	start_pa, start_pa + size);
	}
	return;
	}

	/* Adjust start_pa and size so that they are page-aligned. (Cf
	* kvm_set_phys_mem() in kvm-all.c).
	*/
	delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask);
	delta &= ~qemu_real_host_page_mask;
	if (delta > size) {
	return;
	}
	start_pa += delta;
	size -= delta;
	size &= qemu_real_host_page_mask;
	if (!size \|\| (start_pa & ~qemu_real_host_page_mask)) {
	return;
	}

	host_va = (uintptr_t)memory_region_get_ram_ptr(mr)
	+ section->offset_within_region + delta;
	if (memory_region_is_rom(section->mr)) {
	flags \|= HAX_RAM_INFO_ROM;
	}

	/*
	* The kernel module interface uses 32-bit sizes:
	* https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_set_ram
	*
	* If the mapping size is longer than 32 bits, we can't process it in one
	* call into the kernel. Instead, we split the mapping into smaller ones,
	* and call hax_update_mapping() on each.
	*/
	max_mapping_size = UINT32_MAX & qemu_real_host_page_mask;
	while (size > max_mapping_size) {
	hax_update_mapping(start_pa, max_mapping_size, host_va, flags);
	start_pa += max_mapping_size;
	size -= max_mapping_size;
	host_va += max_mapping_size;
	}
	/* Now size <= max_mapping_size */
	hax_update_mapping(start_pa, (uint32_t)size, host_va, flags);
	}

	static void hax_region_add(MemoryListener *listener,
	MemoryRegionSection *section)
	{
	memory_region_ref(section->mr);
	hax_process_section(section, 0);
	}

	static void hax_region_del(MemoryListener *listener,
	MemoryRegionSection *section)
	{
	hax_process_section(section, HAX_RAM_INFO_INVALID);
	memory_region_unref(section->mr);
	}

	static void hax_transaction_begin(MemoryListener *listener)
	{
	g_assert(QTAILQ_EMPTY(&mappings));
	}

	static void hax_transaction_commit(MemoryListener *listener)
	{
	if (!QTAILQ_EMPTY(&mappings)) {
	HAXMapping entry, next;

	if (DEBUG_HAX_MEM) {
	hax_mapping_dump_list();
	}
	QTAILQ_FOREACH_SAFE(entry, &mappings, entry, next) {
	if (entry->flags & HAX_RAM_INFO_INVALID) {
	/* for unmapping, put the values expected by the kernel */
	entry->flags = HAX_RAM_INFO_INVALID;
	entry->host_va = 0;
	}
	if (hax_set_ram(entry->start_pa, entry->size,
	entry->host_va, entry->flags)) {
	fprintf(stderr, "%s: Failed mapping @0x%016" PRIx64 "+0x%"
	PRIx32 " flags %02x\n", __func__, entry->start_pa,
	entry->size, entry->flags);
	}
	QTAILQ_REMOVE(&mappings, entry, entry);
	g_free(entry);
	}
	}
	}

	/* currently we fake the dirty bitmap sync, always dirty */
	static void hax_log_sync(MemoryListener *listener,
	MemoryRegionSection *section)
	{
	MemoryRegion *mr = section->mr;

	if (!memory_region_is_ram(mr)) {
	/* Skip MMIO regions */
	return;
	}

	memory_region_set_dirty(mr, 0, int128_get64(section->size));
	}

	static MemoryListener hax_memory_listener = {
	.begin = hax_transaction_begin,
	.commit = hax_transaction_commit,
	.region_add = hax_region_add,
	.region_del = hax_region_del,
	.log_sync = hax_log_sync,
	.priority = 10,
	};

	static void hax_ram_block_added(RAMBlockNotifier n, void host, size_t size)
	{
	/*
	* We must register each RAM block with the HAXM kernel module, or
	* hax_set_ram() will fail for any mapping into the RAM block:
	* https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_alloc_ram
	*
	* Old versions of the HAXM kernel module (< 6.2.0) used to preallocate all
	* host physical pages for the RAM block as part of this registration
	* process, hence the name hax_populate_ram().
	*/
	if (hax_populate_ram((uint64_t)(uintptr_t)host, size) < 0) {
	fprintf(stderr, "HAX failed to populate RAM\n");
	abort();
	}
	}

	static struct RAMBlockNotifier hax_ram_notifier = {
	.ram_block_added = hax_ram_block_added,
	};

	void hax_memory_init(void)
	{
	ram_block_notifier_add(&hax_ram_notifier);
	memory_listener_register(&hax_memory_listener, &address_space_memory);
	}