Vincent Palatin | 47c1c8c | 2017-01-10 11:59:56 +0100 | [diff] [blame] | 1 | /* |
| 2 | * HAX memory mapping operations |
| 3 | * |
| 4 | * Copyright (c) 2015-16 Intel Corporation |
| 5 | * Copyright 2016 Google, Inc. |
| 6 | * |
| 7 | * This work is licensed under the terms of the GNU GPL, version 2. See |
| 8 | * the COPYING file in the top-level directory. |
| 9 | */ |
| 10 | |
| 11 | #include "qemu/osdep.h" |
| 12 | #include "cpu.h" |
| 13 | #include "exec/address-spaces.h" |
Alistair Francis | b62e39b | 2017-09-11 12:52:56 -0700 | [diff] [blame] | 14 | #include "qemu/error-report.h" |
Vincent Palatin | 47c1c8c | 2017-01-10 11:59:56 +0100 | [diff] [blame] | 15 | |
Claudio Fontana | b2d61ea | 2020-08-11 18:47:21 +0200 | [diff] [blame] | 16 | #include "hax-cpus.h" |
Vincent Palatin | 47c1c8c | 2017-01-10 11:59:56 +0100 | [diff] [blame] | 17 | #include "qemu/queue.h" |
| 18 | |
| 19 | #define DEBUG_HAX_MEM 0 |
| 20 | |
| 21 | #define DPRINTF(fmt, ...) \ |
| 22 | do { \ |
| 23 | if (DEBUG_HAX_MEM) { \ |
| 24 | fprintf(stdout, fmt, ## __VA_ARGS__); \ |
| 25 | } \ |
| 26 | } while (0) |
| 27 | |
| 28 | /** |
| 29 | * HAXMapping: describes a pending guest physical memory mapping |
| 30 | * |
| 31 | * @start_pa: a guest physical address marking the start of the region; must be |
| 32 | * page-aligned |
| 33 | * @size: a guest physical address marking the end of the region; must be |
| 34 | * page-aligned |
| 35 | * @host_va: the host virtual address of the start of the mapping |
| 36 | * @flags: mapping parameters e.g. HAX_RAM_INFO_ROM or HAX_RAM_INFO_INVALID |
| 37 | * @entry: additional fields for linking #HAXMapping instances together |
| 38 | */ |
| 39 | typedef struct HAXMapping { |
| 40 | uint64_t start_pa; |
| 41 | uint32_t size; |
| 42 | uint64_t host_va; |
| 43 | int flags; |
| 44 | QTAILQ_ENTRY(HAXMapping) entry; |
| 45 | } HAXMapping; |
| 46 | |
| 47 | /* |
| 48 | * A doubly-linked list (actually a tail queue) of the pending page mappings |
| 49 | * for the ongoing memory transaction. |
| 50 | * |
| 51 | * It is used to optimize the number of page mapping updates done through the |
| 52 | * kernel module. For example, it's effective when a driver is digging an MMIO |
| 53 | * hole inside an existing memory mapping. It will get a deletion of the whole |
| 54 | * region, then the addition of the 2 remaining RAM areas around the hole and |
| 55 | * finally the memory transaction commit. During the commit, it will effectively |
| 56 | * send to the kernel only the removal of the pages from the MMIO hole after |
| 57 | * having computed locally the result of the deletion and additions. |
| 58 | */ |
Paolo Bonzini | b58deb3 | 2018-12-06 11:58:10 +0100 | [diff] [blame] | 59 | static QTAILQ_HEAD(, HAXMapping) mappings = |
Vincent Palatin | 47c1c8c | 2017-01-10 11:59:56 +0100 | [diff] [blame] | 60 | QTAILQ_HEAD_INITIALIZER(mappings); |
| 61 | |
| 62 | /** |
| 63 | * hax_mapping_dump_list: dumps @mappings to stdout (for debugging) |
| 64 | */ |
| 65 | static void hax_mapping_dump_list(void) |
| 66 | { |
| 67 | HAXMapping *entry; |
| 68 | |
| 69 | DPRINTF("%s updates:\n", __func__); |
| 70 | QTAILQ_FOREACH(entry, &mappings, entry) { |
| 71 | DPRINTF("\t%c 0x%016" PRIx64 "->0x%016" PRIx64 " VA 0x%016" PRIx64 |
| 72 | "%s\n", entry->flags & HAX_RAM_INFO_INVALID ? '-' : '+', |
| 73 | entry->start_pa, entry->start_pa + entry->size, entry->host_va, |
| 74 | entry->flags & HAX_RAM_INFO_ROM ? " ROM" : ""); |
| 75 | } |
| 76 | } |
| 77 | |
| 78 | static void hax_insert_mapping_before(HAXMapping *next, uint64_t start_pa, |
| 79 | uint32_t size, uint64_t host_va, |
| 80 | uint8_t flags) |
| 81 | { |
| 82 | HAXMapping *entry; |
| 83 | |
| 84 | entry = g_malloc0(sizeof(*entry)); |
| 85 | entry->start_pa = start_pa; |
| 86 | entry->size = size; |
| 87 | entry->host_va = host_va; |
| 88 | entry->flags = flags; |
| 89 | if (!next) { |
| 90 | QTAILQ_INSERT_TAIL(&mappings, entry, entry); |
| 91 | } else { |
| 92 | QTAILQ_INSERT_BEFORE(next, entry, entry); |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | static bool hax_mapping_is_opposite(HAXMapping *entry, uint64_t host_va, |
| 97 | uint8_t flags) |
| 98 | { |
| 99 | /* removed then added without change for the read-only flag */ |
| 100 | bool nop_flags = (entry->flags ^ flags) == HAX_RAM_INFO_INVALID; |
| 101 | |
| 102 | return (entry->host_va == host_va) && nop_flags; |
| 103 | } |
| 104 | |
| 105 | static void hax_update_mapping(uint64_t start_pa, uint32_t size, |
| 106 | uint64_t host_va, uint8_t flags) |
| 107 | { |
| 108 | uint64_t end_pa = start_pa + size; |
Vincent Palatin | 47c1c8c | 2017-01-10 11:59:56 +0100 | [diff] [blame] | 109 | HAXMapping *entry, *next; |
| 110 | |
| 111 | QTAILQ_FOREACH_SAFE(entry, &mappings, entry, next) { |
Yu Ning | 8a3c3d9 | 2017-04-28 15:27:23 +0800 | [diff] [blame] | 112 | uint32_t chunk_sz; |
Vincent Palatin | 47c1c8c | 2017-01-10 11:59:56 +0100 | [diff] [blame] | 113 | if (start_pa >= entry->start_pa + entry->size) { |
| 114 | continue; |
| 115 | } |
| 116 | if (start_pa < entry->start_pa) { |
| 117 | chunk_sz = end_pa <= entry->start_pa ? size |
| 118 | : entry->start_pa - start_pa; |
| 119 | hax_insert_mapping_before(entry, start_pa, chunk_sz, |
| 120 | host_va, flags); |
| 121 | start_pa += chunk_sz; |
| 122 | host_va += chunk_sz; |
| 123 | size -= chunk_sz; |
Yu Ning | 8a3c3d9 | 2017-04-28 15:27:23 +0800 | [diff] [blame] | 124 | } else if (start_pa > entry->start_pa) { |
| 125 | /* split the existing chunk at start_pa */ |
| 126 | chunk_sz = start_pa - entry->start_pa; |
| 127 | hax_insert_mapping_before(entry, entry->start_pa, chunk_sz, |
| 128 | entry->host_va, entry->flags); |
| 129 | entry->start_pa += chunk_sz; |
| 130 | entry->host_va += chunk_sz; |
| 131 | entry->size -= chunk_sz; |
Vincent Palatin | 47c1c8c | 2017-01-10 11:59:56 +0100 | [diff] [blame] | 132 | } |
Yu Ning | 8a3c3d9 | 2017-04-28 15:27:23 +0800 | [diff] [blame] | 133 | /* now start_pa == entry->start_pa */ |
Vincent Palatin | 47c1c8c | 2017-01-10 11:59:56 +0100 | [diff] [blame] | 134 | chunk_sz = MIN(size, entry->size); |
| 135 | if (chunk_sz) { |
| 136 | bool nop = hax_mapping_is_opposite(entry, host_va, flags); |
| 137 | bool partial = chunk_sz < entry->size; |
| 138 | if (partial) { |
| 139 | /* remove the beginning of the existing chunk */ |
| 140 | entry->start_pa += chunk_sz; |
| 141 | entry->host_va += chunk_sz; |
| 142 | entry->size -= chunk_sz; |
| 143 | if (!nop) { |
| 144 | hax_insert_mapping_before(entry, start_pa, chunk_sz, |
| 145 | host_va, flags); |
| 146 | } |
| 147 | } else { /* affects the full mapping entry */ |
| 148 | if (nop) { /* no change to this mapping, remove it */ |
| 149 | QTAILQ_REMOVE(&mappings, entry, entry); |
| 150 | g_free(entry); |
| 151 | } else { /* update mapping properties */ |
| 152 | entry->host_va = host_va; |
| 153 | entry->flags = flags; |
| 154 | } |
| 155 | } |
| 156 | start_pa += chunk_sz; |
| 157 | host_va += chunk_sz; |
| 158 | size -= chunk_sz; |
| 159 | } |
| 160 | if (!size) { /* we are done */ |
| 161 | break; |
| 162 | } |
| 163 | } |
| 164 | if (size) { /* add the leftover */ |
| 165 | hax_insert_mapping_before(NULL, start_pa, size, host_va, flags); |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | static void hax_process_section(MemoryRegionSection *section, uint8_t flags) |
| 170 | { |
| 171 | MemoryRegion *mr = section->mr; |
| 172 | hwaddr start_pa = section->offset_within_address_space; |
| 173 | ram_addr_t size = int128_get64(section->size); |
| 174 | unsigned int delta; |
| 175 | uint64_t host_va; |
Yu Ning | 7a5235c | 2018-01-12 18:22:35 +0800 | [diff] [blame] | 176 | uint32_t max_mapping_size; |
Vincent Palatin | 47c1c8c | 2017-01-10 11:59:56 +0100 | [diff] [blame] | 177 | |
Yu Ning | 8a3c3d9 | 2017-04-28 15:27:23 +0800 | [diff] [blame] | 178 | /* We only care about RAM and ROM regions */ |
Vincent Palatin | 47c1c8c | 2017-01-10 11:59:56 +0100 | [diff] [blame] | 179 | if (!memory_region_is_ram(mr)) { |
Yu Ning | 8a3c3d9 | 2017-04-28 15:27:23 +0800 | [diff] [blame] | 180 | if (memory_region_is_romd(mr)) { |
| 181 | /* HAXM kernel module does not support ROMD yet */ |
Alistair Francis | b62e39b | 2017-09-11 12:52:56 -0700 | [diff] [blame] | 182 | warn_report("Ignoring ROMD region 0x%016" PRIx64 "->0x%016" PRIx64, |
| 183 | start_pa, start_pa + size); |
Yu Ning | 8a3c3d9 | 2017-04-28 15:27:23 +0800 | [diff] [blame] | 184 | } |
Vincent Palatin | 47c1c8c | 2017-01-10 11:59:56 +0100 | [diff] [blame] | 185 | return; |
| 186 | } |
| 187 | |
| 188 | /* Adjust start_pa and size so that they are page-aligned. (Cf |
| 189 | * kvm_set_phys_mem() in kvm-all.c). |
| 190 | */ |
| 191 | delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask); |
| 192 | delta &= ~qemu_real_host_page_mask; |
| 193 | if (delta > size) { |
| 194 | return; |
| 195 | } |
| 196 | start_pa += delta; |
| 197 | size -= delta; |
| 198 | size &= qemu_real_host_page_mask; |
| 199 | if (!size || (start_pa & ~qemu_real_host_page_mask)) { |
| 200 | return; |
| 201 | } |
| 202 | |
| 203 | host_va = (uintptr_t)memory_region_get_ram_ptr(mr) |
| 204 | + section->offset_within_region + delta; |
| 205 | if (memory_region_is_rom(section->mr)) { |
| 206 | flags |= HAX_RAM_INFO_ROM; |
| 207 | } |
| 208 | |
Yu Ning | 7a5235c | 2018-01-12 18:22:35 +0800 | [diff] [blame] | 209 | /* |
| 210 | * The kernel module interface uses 32-bit sizes: |
| 211 | * https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_set_ram |
| 212 | * |
| 213 | * If the mapping size is longer than 32 bits, we can't process it in one |
| 214 | * call into the kernel. Instead, we split the mapping into smaller ones, |
| 215 | * and call hax_update_mapping() on each. |
| 216 | */ |
| 217 | max_mapping_size = UINT32_MAX & qemu_real_host_page_mask; |
| 218 | while (size > max_mapping_size) { |
| 219 | hax_update_mapping(start_pa, max_mapping_size, host_va, flags); |
| 220 | start_pa += max_mapping_size; |
| 221 | size -= max_mapping_size; |
| 222 | host_va += max_mapping_size; |
| 223 | } |
| 224 | /* Now size <= max_mapping_size */ |
| 225 | hax_update_mapping(start_pa, (uint32_t)size, host_va, flags); |
Vincent Palatin | 47c1c8c | 2017-01-10 11:59:56 +0100 | [diff] [blame] | 226 | } |
| 227 | |
| 228 | static void hax_region_add(MemoryListener *listener, |
| 229 | MemoryRegionSection *section) |
| 230 | { |
| 231 | memory_region_ref(section->mr); |
| 232 | hax_process_section(section, 0); |
| 233 | } |
| 234 | |
| 235 | static void hax_region_del(MemoryListener *listener, |
| 236 | MemoryRegionSection *section) |
| 237 | { |
| 238 | hax_process_section(section, HAX_RAM_INFO_INVALID); |
| 239 | memory_region_unref(section->mr); |
| 240 | } |
| 241 | |
| 242 | static void hax_transaction_begin(MemoryListener *listener) |
| 243 | { |
| 244 | g_assert(QTAILQ_EMPTY(&mappings)); |
| 245 | } |
| 246 | |
| 247 | static void hax_transaction_commit(MemoryListener *listener) |
| 248 | { |
| 249 | if (!QTAILQ_EMPTY(&mappings)) { |
| 250 | HAXMapping *entry, *next; |
| 251 | |
| 252 | if (DEBUG_HAX_MEM) { |
| 253 | hax_mapping_dump_list(); |
| 254 | } |
| 255 | QTAILQ_FOREACH_SAFE(entry, &mappings, entry, next) { |
| 256 | if (entry->flags & HAX_RAM_INFO_INVALID) { |
| 257 | /* for unmapping, put the values expected by the kernel */ |
| 258 | entry->flags = HAX_RAM_INFO_INVALID; |
| 259 | entry->host_va = 0; |
| 260 | } |
| 261 | if (hax_set_ram(entry->start_pa, entry->size, |
| 262 | entry->host_va, entry->flags)) { |
| 263 | fprintf(stderr, "%s: Failed mapping @0x%016" PRIx64 "+0x%" |
| 264 | PRIx32 " flags %02x\n", __func__, entry->start_pa, |
| 265 | entry->size, entry->flags); |
| 266 | } |
| 267 | QTAILQ_REMOVE(&mappings, entry, entry); |
| 268 | g_free(entry); |
| 269 | } |
| 270 | } |
| 271 | } |
| 272 | |
| 273 | /* currently we fake the dirty bitmap sync, always dirty */ |
| 274 | static void hax_log_sync(MemoryListener *listener, |
| 275 | MemoryRegionSection *section) |
| 276 | { |
| 277 | MemoryRegion *mr = section->mr; |
| 278 | |
| 279 | if (!memory_region_is_ram(mr)) { |
| 280 | /* Skip MMIO regions */ |
| 281 | return; |
| 282 | } |
| 283 | |
| 284 | memory_region_set_dirty(mr, 0, int128_get64(section->size)); |
| 285 | } |
| 286 | |
| 287 | static MemoryListener hax_memory_listener = { |
| 288 | .begin = hax_transaction_begin, |
| 289 | .commit = hax_transaction_commit, |
| 290 | .region_add = hax_region_add, |
| 291 | .region_del = hax_region_del, |
| 292 | .log_sync = hax_log_sync, |
| 293 | .priority = 10, |
| 294 | }; |
| 295 | |
| 296 | static void hax_ram_block_added(RAMBlockNotifier *n, void *host, size_t size) |
| 297 | { |
| 298 | /* |
Yu Ning | 7a5235c | 2018-01-12 18:22:35 +0800 | [diff] [blame] | 299 | * We must register each RAM block with the HAXM kernel module, or |
| 300 | * hax_set_ram() will fail for any mapping into the RAM block: |
| 301 | * https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_alloc_ram |
| 302 | * |
| 303 | * Old versions of the HAXM kernel module (< 6.2.0) used to preallocate all |
| 304 | * host physical pages for the RAM block as part of this registration |
| 305 | * process, hence the name hax_populate_ram(). |
Vincent Palatin | 47c1c8c | 2017-01-10 11:59:56 +0100 | [diff] [blame] | 306 | */ |
| 307 | if (hax_populate_ram((uint64_t)(uintptr_t)host, size) < 0) { |
Yu Ning | 7a5235c | 2018-01-12 18:22:35 +0800 | [diff] [blame] | 308 | fprintf(stderr, "HAX failed to populate RAM\n"); |
Vincent Palatin | 47c1c8c | 2017-01-10 11:59:56 +0100 | [diff] [blame] | 309 | abort(); |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | static struct RAMBlockNotifier hax_ram_notifier = { |
| 314 | .ram_block_added = hax_ram_block_added, |
| 315 | }; |
| 316 | |
| 317 | void hax_memory_init(void) |
| 318 | { |
| 319 | ram_block_notifier_add(&hax_ram_notifier); |
| 320 | memory_listener_register(&hax_memory_listener, &address_space_memory); |
| 321 | } |