Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 1 | /* |
| 2 | * QEMU Host Memory Backend |
| 3 | * |
| 4 | * Copyright (C) 2013-2014 Red Hat Inc |
| 5 | * |
| 6 | * Authors: |
| 7 | * Igor Mammedov <imammedo@redhat.com> |
| 8 | * |
| 9 | * This work is licensed under the terms of the GNU GPL, version 2 or later. |
| 10 | * See the COPYING file in the top-level directory. |
| 11 | */ |
Peter Maydell | 9c05833 | 2016-01-29 17:49:54 +0000 | [diff] [blame] | 12 | #include "qemu/osdep.h" |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 13 | #include "sysemu/hostmem.h" |
Eduardo Habkost | 6b26996 | 2015-07-16 17:29:12 -0300 | [diff] [blame] | 14 | #include "hw/boards.h" |
Markus Armbruster | da34e65 | 2016-03-14 09:01:28 +0100 | [diff] [blame] | 15 | #include "qapi/error.h" |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 16 | #include "qapi/visitor.h" |
Hu Tao | 4cf1b76 | 2014-06-10 19:15:25 +0800 | [diff] [blame] | 17 | #include "qapi-types.h" |
| 18 | #include "qapi-visit.h" |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 19 | #include "qemu/config-file.h" |
| 20 | #include "qom/object_interfaces.h" |
| 21 | |
Hu Tao | 4cf1b76 | 2014-06-10 19:15:25 +0800 | [diff] [blame] | 22 | #ifdef CONFIG_NUMA |
| 23 | #include <numaif.h> |
| 24 | QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT); |
| 25 | QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED); |
| 26 | QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND); |
| 27 | QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE); |
| 28 | #endif |
| 29 | |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 30 | static void |
Eric Blake | d7bce99 | 2016-01-29 06:48:55 -0700 | [diff] [blame] | 31 | host_memory_backend_get_size(Object *obj, Visitor *v, const char *name, |
| 32 | void *opaque, Error **errp) |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 33 | { |
| 34 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 35 | uint64_t value = backend->size; |
| 36 | |
Eric Blake | 51e72bc | 2016-01-29 06:48:54 -0700 | [diff] [blame] | 37 | visit_type_size(v, name, &value, errp); |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 38 | } |
| 39 | |
| 40 | static void |
Eric Blake | d7bce99 | 2016-01-29 06:48:55 -0700 | [diff] [blame] | 41 | host_memory_backend_set_size(Object *obj, Visitor *v, const char *name, |
| 42 | void *opaque, Error **errp) |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 43 | { |
| 44 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 45 | Error *local_err = NULL; |
| 46 | uint64_t value; |
| 47 | |
| 48 | if (memory_region_size(&backend->mr)) { |
| 49 | error_setg(&local_err, "cannot change property value"); |
| 50 | goto out; |
| 51 | } |
| 52 | |
Eric Blake | 51e72bc | 2016-01-29 06:48:54 -0700 | [diff] [blame] | 53 | visit_type_size(v, name, &value, &local_err); |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 54 | if (local_err) { |
| 55 | goto out; |
| 56 | } |
| 57 | if (!value) { |
| 58 | error_setg(&local_err, "Property '%s.%s' doesn't take value '%" |
| 59 | PRIu64 "'", object_get_typename(obj), name, value); |
| 60 | goto out; |
| 61 | } |
| 62 | backend->size = value; |
| 63 | out: |
| 64 | error_propagate(errp, local_err); |
| 65 | } |
| 66 | |
Hu Tao | 4cf1b76 | 2014-06-10 19:15:25 +0800 | [diff] [blame] | 67 | static void |
Eric Blake | d7bce99 | 2016-01-29 06:48:55 -0700 | [diff] [blame] | 68 | host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name, |
| 69 | void *opaque, Error **errp) |
Hu Tao | 4cf1b76 | 2014-06-10 19:15:25 +0800 | [diff] [blame] | 70 | { |
| 71 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 72 | uint16List *host_nodes = NULL; |
| 73 | uint16List **node = &host_nodes; |
| 74 | unsigned long value; |
| 75 | |
| 76 | value = find_first_bit(backend->host_nodes, MAX_NODES); |
| 77 | if (value == MAX_NODES) { |
| 78 | return; |
| 79 | } |
| 80 | |
| 81 | *node = g_malloc0(sizeof(**node)); |
| 82 | (*node)->value = value; |
| 83 | node = &(*node)->next; |
| 84 | |
| 85 | do { |
| 86 | value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1); |
| 87 | if (value == MAX_NODES) { |
| 88 | break; |
| 89 | } |
| 90 | |
| 91 | *node = g_malloc0(sizeof(**node)); |
| 92 | (*node)->value = value; |
| 93 | node = &(*node)->next; |
| 94 | } while (true); |
| 95 | |
Eric Blake | 51e72bc | 2016-01-29 06:48:54 -0700 | [diff] [blame] | 96 | visit_type_uint16List(v, name, &host_nodes, errp); |
Hu Tao | 4cf1b76 | 2014-06-10 19:15:25 +0800 | [diff] [blame] | 97 | } |
| 98 | |
| 99 | static void |
Eric Blake | d7bce99 | 2016-01-29 06:48:55 -0700 | [diff] [blame] | 100 | host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name, |
| 101 | void *opaque, Error **errp) |
Hu Tao | 4cf1b76 | 2014-06-10 19:15:25 +0800 | [diff] [blame] | 102 | { |
| 103 | #ifdef CONFIG_NUMA |
| 104 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 105 | uint16List *l = NULL; |
| 106 | |
Eric Blake | 51e72bc | 2016-01-29 06:48:54 -0700 | [diff] [blame] | 107 | visit_type_uint16List(v, name, &l, errp); |
Hu Tao | 4cf1b76 | 2014-06-10 19:15:25 +0800 | [diff] [blame] | 108 | |
| 109 | while (l) { |
| 110 | bitmap_set(backend->host_nodes, l->value, 1); |
| 111 | l = l->next; |
| 112 | } |
| 113 | #else |
| 114 | error_setg(errp, "NUMA node binding are not supported by this QEMU"); |
| 115 | #endif |
| 116 | } |
| 117 | |
Daniel P. Berrange | a3590da | 2015-05-27 16:07:56 +0100 | [diff] [blame] | 118 | static int |
| 119 | host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED) |
Hu Tao | 4cf1b76 | 2014-06-10 19:15:25 +0800 | [diff] [blame] | 120 | { |
| 121 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
Daniel P. Berrange | a3590da | 2015-05-27 16:07:56 +0100 | [diff] [blame] | 122 | return backend->policy; |
Hu Tao | 4cf1b76 | 2014-06-10 19:15:25 +0800 | [diff] [blame] | 123 | } |
| 124 | |
| 125 | static void |
Daniel P. Berrange | a3590da | 2015-05-27 16:07:56 +0100 | [diff] [blame] | 126 | host_memory_backend_set_policy(Object *obj, int policy, Error **errp) |
Hu Tao | 4cf1b76 | 2014-06-10 19:15:25 +0800 | [diff] [blame] | 127 | { |
| 128 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
Hu Tao | 4cf1b76 | 2014-06-10 19:15:25 +0800 | [diff] [blame] | 129 | backend->policy = policy; |
| 130 | |
| 131 | #ifndef CONFIG_NUMA |
| 132 | if (policy != HOST_MEM_POLICY_DEFAULT) { |
| 133 | error_setg(errp, "NUMA policies are not supported by this QEMU"); |
| 134 | } |
| 135 | #endif |
| 136 | } |
| 137 | |
Paolo Bonzini | 605d0a9 | 2014-06-10 19:15:22 +0800 | [diff] [blame] | 138 | static bool host_memory_backend_get_merge(Object *obj, Error **errp) |
| 139 | { |
| 140 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 141 | |
| 142 | return backend->merge; |
| 143 | } |
| 144 | |
| 145 | static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp) |
| 146 | { |
| 147 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 148 | |
| 149 | if (!memory_region_size(&backend->mr)) { |
| 150 | backend->merge = value; |
| 151 | return; |
| 152 | } |
| 153 | |
| 154 | if (value != backend->merge) { |
| 155 | void *ptr = memory_region_get_ram_ptr(&backend->mr); |
| 156 | uint64_t sz = memory_region_size(&backend->mr); |
| 157 | |
| 158 | qemu_madvise(ptr, sz, |
| 159 | value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE); |
| 160 | backend->merge = value; |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | static bool host_memory_backend_get_dump(Object *obj, Error **errp) |
| 165 | { |
| 166 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 167 | |
| 168 | return backend->dump; |
| 169 | } |
| 170 | |
| 171 | static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp) |
| 172 | { |
| 173 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 174 | |
| 175 | if (!memory_region_size(&backend->mr)) { |
| 176 | backend->dump = value; |
| 177 | return; |
| 178 | } |
| 179 | |
| 180 | if (value != backend->dump) { |
| 181 | void *ptr = memory_region_get_ram_ptr(&backend->mr); |
| 182 | uint64_t sz = memory_region_size(&backend->mr); |
| 183 | |
| 184 | qemu_madvise(ptr, sz, |
| 185 | value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP); |
| 186 | backend->dump = value; |
| 187 | } |
| 188 | } |
| 189 | |
Paolo Bonzini | a35ba7b | 2014-06-10 19:15:23 +0800 | [diff] [blame] | 190 | static bool host_memory_backend_get_prealloc(Object *obj, Error **errp) |
| 191 | { |
| 192 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 193 | |
| 194 | return backend->prealloc || backend->force_prealloc; |
| 195 | } |
| 196 | |
| 197 | static void host_memory_backend_set_prealloc(Object *obj, bool value, |
| 198 | Error **errp) |
| 199 | { |
| 200 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 201 | |
| 202 | if (backend->force_prealloc) { |
| 203 | if (value) { |
| 204 | error_setg(errp, |
| 205 | "remove -mem-prealloc to use the prealloc property"); |
| 206 | return; |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | if (!memory_region_size(&backend->mr)) { |
| 211 | backend->prealloc = value; |
| 212 | return; |
| 213 | } |
| 214 | |
| 215 | if (value && !backend->prealloc) { |
| 216 | int fd = memory_region_get_fd(&backend->mr); |
| 217 | void *ptr = memory_region_get_ram_ptr(&backend->mr); |
| 218 | uint64_t sz = memory_region_size(&backend->mr); |
| 219 | |
| 220 | os_mem_prealloc(fd, ptr, sz); |
| 221 | backend->prealloc = true; |
| 222 | } |
| 223 | } |
| 224 | |
Hu Tao | 58f4662 | 2014-06-10 19:15:18 +0800 | [diff] [blame] | 225 | static void host_memory_backend_init(Object *obj) |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 226 | { |
Paolo Bonzini | 605d0a9 | 2014-06-10 19:15:22 +0800 | [diff] [blame] | 227 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
Eduardo Habkost | 6b26996 | 2015-07-16 17:29:12 -0300 | [diff] [blame] | 228 | MachineState *machine = MACHINE(qdev_get_machine()); |
Paolo Bonzini | 605d0a9 | 2014-06-10 19:15:22 +0800 | [diff] [blame] | 229 | |
Eduardo Habkost | 6b26996 | 2015-07-16 17:29:12 -0300 | [diff] [blame] | 230 | backend->merge = machine_mem_merge(machine); |
| 231 | backend->dump = machine_dump_guest_core(machine); |
Paolo Bonzini | a35ba7b | 2014-06-10 19:15:23 +0800 | [diff] [blame] | 232 | backend->prealloc = mem_prealloc; |
Paolo Bonzini | 605d0a9 | 2014-06-10 19:15:22 +0800 | [diff] [blame] | 233 | |
| 234 | object_property_add_bool(obj, "merge", |
| 235 | host_memory_backend_get_merge, |
| 236 | host_memory_backend_set_merge, NULL); |
| 237 | object_property_add_bool(obj, "dump", |
| 238 | host_memory_backend_get_dump, |
| 239 | host_memory_backend_set_dump, NULL); |
Paolo Bonzini | a35ba7b | 2014-06-10 19:15:23 +0800 | [diff] [blame] | 240 | object_property_add_bool(obj, "prealloc", |
| 241 | host_memory_backend_get_prealloc, |
| 242 | host_memory_backend_set_prealloc, NULL); |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 243 | object_property_add(obj, "size", "int", |
Hu Tao | 58f4662 | 2014-06-10 19:15:18 +0800 | [diff] [blame] | 244 | host_memory_backend_get_size, |
| 245 | host_memory_backend_set_size, NULL, NULL, NULL); |
Hu Tao | 4cf1b76 | 2014-06-10 19:15:25 +0800 | [diff] [blame] | 246 | object_property_add(obj, "host-nodes", "int", |
| 247 | host_memory_backend_get_host_nodes, |
| 248 | host_memory_backend_set_host_nodes, NULL, NULL, NULL); |
Daniel P. Berrange | a3590da | 2015-05-27 16:07:56 +0100 | [diff] [blame] | 249 | object_property_add_enum(obj, "policy", "HostMemPolicy", |
| 250 | HostMemPolicy_lookup, |
| 251 | host_memory_backend_get_policy, |
| 252 | host_memory_backend_set_policy, NULL); |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 253 | } |
| 254 | |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 255 | MemoryRegion * |
| 256 | host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp) |
| 257 | { |
| 258 | return memory_region_size(&backend->mr) ? &backend->mr : NULL; |
| 259 | } |
| 260 | |
Hu Tao | bd9262d | 2014-06-10 19:15:19 +0800 | [diff] [blame] | 261 | static void |
| 262 | host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) |
| 263 | { |
| 264 | HostMemoryBackend *backend = MEMORY_BACKEND(uc); |
| 265 | HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); |
Paolo Bonzini | 605d0a9 | 2014-06-10 19:15:22 +0800 | [diff] [blame] | 266 | Error *local_err = NULL; |
| 267 | void *ptr; |
| 268 | uint64_t sz; |
Hu Tao | bd9262d | 2014-06-10 19:15:19 +0800 | [diff] [blame] | 269 | |
| 270 | if (bc->alloc) { |
Paolo Bonzini | 605d0a9 | 2014-06-10 19:15:22 +0800 | [diff] [blame] | 271 | bc->alloc(backend, &local_err); |
| 272 | if (local_err) { |
| 273 | error_propagate(errp, local_err); |
| 274 | return; |
| 275 | } |
| 276 | |
| 277 | ptr = memory_region_get_ram_ptr(&backend->mr); |
| 278 | sz = memory_region_size(&backend->mr); |
| 279 | |
| 280 | if (backend->merge) { |
| 281 | qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE); |
| 282 | } |
| 283 | if (!backend->dump) { |
| 284 | qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP); |
| 285 | } |
Hu Tao | 4cf1b76 | 2014-06-10 19:15:25 +0800 | [diff] [blame] | 286 | #ifdef CONFIG_NUMA |
| 287 | unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); |
| 288 | /* lastbit == MAX_NODES means maxnode = 0 */ |
| 289 | unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); |
| 290 | /* ensure policy won't be ignored in case memory is preallocated |
| 291 | * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so |
| 292 | * this doesn't catch hugepage case. */ |
Michael S. Tsirkin | 288d332 | 2014-08-13 13:50:24 +0200 | [diff] [blame] | 293 | unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE; |
Hu Tao | 4cf1b76 | 2014-06-10 19:15:25 +0800 | [diff] [blame] | 294 | |
| 295 | /* check for invalid host-nodes and policies and give more verbose |
| 296 | * error messages than mbind(). */ |
| 297 | if (maxnode && backend->policy == MPOL_DEFAULT) { |
| 298 | error_setg(errp, "host-nodes must be empty for policy default," |
| 299 | " or you should explicitly specify a policy other" |
| 300 | " than default"); |
| 301 | return; |
| 302 | } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { |
| 303 | error_setg(errp, "host-nodes must be set for policy %s", |
| 304 | HostMemPolicy_lookup[backend->policy]); |
| 305 | return; |
| 306 | } |
| 307 | |
| 308 | /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1 |
| 309 | * as argument to mbind() due to an old Linux bug (feature?) which |
| 310 | * cuts off the last specified node. This means backend->host_nodes |
| 311 | * must have MAX_NODES+1 bits available. |
| 312 | */ |
| 313 | assert(sizeof(backend->host_nodes) >= |
| 314 | BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); |
| 315 | assert(maxnode <= MAX_NODES); |
| 316 | if (mbind(ptr, sz, backend->policy, |
| 317 | maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) { |
Pavel Fedin | a3567ba | 2015-10-27 15:51:31 +0300 | [diff] [blame] | 318 | if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) { |
| 319 | error_setg_errno(errp, errno, |
| 320 | "cannot bind memory to host NUMA nodes"); |
| 321 | return; |
| 322 | } |
Hu Tao | 4cf1b76 | 2014-06-10 19:15:25 +0800 | [diff] [blame] | 323 | } |
| 324 | #endif |
| 325 | /* Preallocate memory after the NUMA policy has been instantiated. |
| 326 | * This is necessary to guarantee memory is allocated with |
| 327 | * specified NUMA policy in place. |
| 328 | */ |
Paolo Bonzini | a35ba7b | 2014-06-10 19:15:23 +0800 | [diff] [blame] | 329 | if (backend->prealloc) { |
| 330 | os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz); |
| 331 | } |
Hu Tao | bd9262d | 2014-06-10 19:15:19 +0800 | [diff] [blame] | 332 | } |
| 333 | } |
| 334 | |
Lin Ma | 36bce5c | 2015-03-30 16:36:29 +0800 | [diff] [blame] | 335 | static bool |
| 336 | host_memory_backend_can_be_deleted(UserCreatable *uc, Error **errp) |
| 337 | { |
| 338 | MemoryRegion *mr; |
| 339 | |
| 340 | mr = host_memory_backend_get_memory(MEMORY_BACKEND(uc), errp); |
| 341 | if (memory_region_is_mapped(mr)) { |
| 342 | return false; |
| 343 | } else { |
| 344 | return true; |
| 345 | } |
| 346 | } |
| 347 | |
Hu Tao | bd9262d | 2014-06-10 19:15:19 +0800 | [diff] [blame] | 348 | static void |
| 349 | host_memory_backend_class_init(ObjectClass *oc, void *data) |
| 350 | { |
| 351 | UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); |
| 352 | |
| 353 | ucc->complete = host_memory_backend_memory_complete; |
Lin Ma | 36bce5c | 2015-03-30 16:36:29 +0800 | [diff] [blame] | 354 | ucc->can_be_deleted = host_memory_backend_can_be_deleted; |
Hu Tao | bd9262d | 2014-06-10 19:15:19 +0800 | [diff] [blame] | 355 | } |
| 356 | |
Hu Tao | 58f4662 | 2014-06-10 19:15:18 +0800 | [diff] [blame] | 357 | static const TypeInfo host_memory_backend_info = { |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 358 | .name = TYPE_MEMORY_BACKEND, |
| 359 | .parent = TYPE_OBJECT, |
| 360 | .abstract = true, |
| 361 | .class_size = sizeof(HostMemoryBackendClass), |
Hu Tao | bd9262d | 2014-06-10 19:15:19 +0800 | [diff] [blame] | 362 | .class_init = host_memory_backend_class_init, |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 363 | .instance_size = sizeof(HostMemoryBackend), |
Hu Tao | 58f4662 | 2014-06-10 19:15:18 +0800 | [diff] [blame] | 364 | .instance_init = host_memory_backend_init, |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 365 | .interfaces = (InterfaceInfo[]) { |
| 366 | { TYPE_USER_CREATABLE }, |
| 367 | { } |
| 368 | } |
| 369 | }; |
| 370 | |
| 371 | static void register_types(void) |
| 372 | { |
Hu Tao | 58f4662 | 2014-06-10 19:15:18 +0800 | [diff] [blame] | 373 | type_register_static(&host_memory_backend_info); |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 374 | } |
| 375 | |
| 376 | type_init(register_types); |