Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 1 | /* |
| 2 | * QEMU Host Memory Backend |
| 3 | * |
| 4 | * Copyright (C) 2013-2014 Red Hat Inc |
| 5 | * |
| 6 | * Authors: |
| 7 | * Igor Mammedov <imammedo@redhat.com> |
| 8 | * |
| 9 | * This work is licensed under the terms of the GNU GPL, version 2 or later. |
| 10 | * See the COPYING file in the top-level directory. |
| 11 | */ |
| 12 | #include "sysemu/hostmem.h" |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 13 | #include "qapi/visitor.h" |
Hu Tao | 4cf1b76 | 2014-06-10 19:15:25 +0800 | [diff] [blame] | 14 | #include "qapi-types.h" |
| 15 | #include "qapi-visit.h" |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 16 | #include "qapi/qmp/qerror.h" |
| 17 | #include "qemu/config-file.h" |
| 18 | #include "qom/object_interfaces.h" |
| 19 | |
Hu Tao | 4cf1b76 | 2014-06-10 19:15:25 +0800 | [diff] [blame] | 20 | #ifdef CONFIG_NUMA |
| 21 | #include <numaif.h> |
| 22 | QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT); |
| 23 | QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED); |
| 24 | QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND); |
| 25 | QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE); |
| 26 | #endif |
| 27 | |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 28 | static void |
Hu Tao | 58f4662 | 2014-06-10 19:15:18 +0800 | [diff] [blame] | 29 | host_memory_backend_get_size(Object *obj, Visitor *v, void *opaque, |
| 30 | const char *name, Error **errp) |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 31 | { |
| 32 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 33 | uint64_t value = backend->size; |
| 34 | |
| 35 | visit_type_size(v, &value, name, errp); |
| 36 | } |
| 37 | |
| 38 | static void |
Hu Tao | 58f4662 | 2014-06-10 19:15:18 +0800 | [diff] [blame] | 39 | host_memory_backend_set_size(Object *obj, Visitor *v, void *opaque, |
| 40 | const char *name, Error **errp) |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 41 | { |
| 42 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 43 | Error *local_err = NULL; |
| 44 | uint64_t value; |
| 45 | |
| 46 | if (memory_region_size(&backend->mr)) { |
| 47 | error_setg(&local_err, "cannot change property value"); |
| 48 | goto out; |
| 49 | } |
| 50 | |
| 51 | visit_type_size(v, &value, name, &local_err); |
| 52 | if (local_err) { |
| 53 | goto out; |
| 54 | } |
| 55 | if (!value) { |
| 56 | error_setg(&local_err, "Property '%s.%s' doesn't take value '%" |
| 57 | PRIu64 "'", object_get_typename(obj), name, value); |
| 58 | goto out; |
| 59 | } |
| 60 | backend->size = value; |
| 61 | out: |
| 62 | error_propagate(errp, local_err); |
| 63 | } |
| 64 | |
Hu Tao | 4cf1b76 | 2014-06-10 19:15:25 +0800 | [diff] [blame] | 65 | static void |
| 66 | host_memory_backend_get_host_nodes(Object *obj, Visitor *v, void *opaque, |
| 67 | const char *name, Error **errp) |
| 68 | { |
| 69 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 70 | uint16List *host_nodes = NULL; |
| 71 | uint16List **node = &host_nodes; |
| 72 | unsigned long value; |
| 73 | |
| 74 | value = find_first_bit(backend->host_nodes, MAX_NODES); |
| 75 | if (value == MAX_NODES) { |
| 76 | return; |
| 77 | } |
| 78 | |
| 79 | *node = g_malloc0(sizeof(**node)); |
| 80 | (*node)->value = value; |
| 81 | node = &(*node)->next; |
| 82 | |
| 83 | do { |
| 84 | value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1); |
| 85 | if (value == MAX_NODES) { |
| 86 | break; |
| 87 | } |
| 88 | |
| 89 | *node = g_malloc0(sizeof(**node)); |
| 90 | (*node)->value = value; |
| 91 | node = &(*node)->next; |
| 92 | } while (true); |
| 93 | |
| 94 | visit_type_uint16List(v, &host_nodes, name, errp); |
| 95 | } |
| 96 | |
| 97 | static void |
| 98 | host_memory_backend_set_host_nodes(Object *obj, Visitor *v, void *opaque, |
| 99 | const char *name, Error **errp) |
| 100 | { |
| 101 | #ifdef CONFIG_NUMA |
| 102 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 103 | uint16List *l = NULL; |
| 104 | |
| 105 | visit_type_uint16List(v, &l, name, errp); |
| 106 | |
| 107 | while (l) { |
| 108 | bitmap_set(backend->host_nodes, l->value, 1); |
| 109 | l = l->next; |
| 110 | } |
| 111 | #else |
| 112 | error_setg(errp, "NUMA node binding are not supported by this QEMU"); |
| 113 | #endif |
| 114 | } |
| 115 | |
| 116 | static void |
| 117 | host_memory_backend_get_policy(Object *obj, Visitor *v, void *opaque, |
| 118 | const char *name, Error **errp) |
| 119 | { |
| 120 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 121 | int policy = backend->policy; |
| 122 | |
| 123 | visit_type_enum(v, &policy, HostMemPolicy_lookup, NULL, name, errp); |
| 124 | } |
| 125 | |
| 126 | static void |
| 127 | host_memory_backend_set_policy(Object *obj, Visitor *v, void *opaque, |
| 128 | const char *name, Error **errp) |
| 129 | { |
| 130 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 131 | int policy; |
| 132 | |
| 133 | visit_type_enum(v, &policy, HostMemPolicy_lookup, NULL, name, errp); |
| 134 | backend->policy = policy; |
| 135 | |
| 136 | #ifndef CONFIG_NUMA |
| 137 | if (policy != HOST_MEM_POLICY_DEFAULT) { |
| 138 | error_setg(errp, "NUMA policies are not supported by this QEMU"); |
| 139 | } |
| 140 | #endif |
| 141 | } |
| 142 | |
Paolo Bonzini | 605d0a9 | 2014-06-10 19:15:22 +0800 | [diff] [blame] | 143 | static bool host_memory_backend_get_merge(Object *obj, Error **errp) |
| 144 | { |
| 145 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 146 | |
| 147 | return backend->merge; |
| 148 | } |
| 149 | |
| 150 | static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp) |
| 151 | { |
| 152 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 153 | |
| 154 | if (!memory_region_size(&backend->mr)) { |
| 155 | backend->merge = value; |
| 156 | return; |
| 157 | } |
| 158 | |
| 159 | if (value != backend->merge) { |
| 160 | void *ptr = memory_region_get_ram_ptr(&backend->mr); |
| 161 | uint64_t sz = memory_region_size(&backend->mr); |
| 162 | |
| 163 | qemu_madvise(ptr, sz, |
| 164 | value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE); |
| 165 | backend->merge = value; |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | static bool host_memory_backend_get_dump(Object *obj, Error **errp) |
| 170 | { |
| 171 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 172 | |
| 173 | return backend->dump; |
| 174 | } |
| 175 | |
| 176 | static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp) |
| 177 | { |
| 178 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 179 | |
| 180 | if (!memory_region_size(&backend->mr)) { |
| 181 | backend->dump = value; |
| 182 | return; |
| 183 | } |
| 184 | |
| 185 | if (value != backend->dump) { |
| 186 | void *ptr = memory_region_get_ram_ptr(&backend->mr); |
| 187 | uint64_t sz = memory_region_size(&backend->mr); |
| 188 | |
| 189 | qemu_madvise(ptr, sz, |
| 190 | value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP); |
| 191 | backend->dump = value; |
| 192 | } |
| 193 | } |
| 194 | |
Paolo Bonzini | a35ba7b | 2014-06-10 19:15:23 +0800 | [diff] [blame] | 195 | static bool host_memory_backend_get_prealloc(Object *obj, Error **errp) |
| 196 | { |
| 197 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 198 | |
| 199 | return backend->prealloc || backend->force_prealloc; |
| 200 | } |
| 201 | |
| 202 | static void host_memory_backend_set_prealloc(Object *obj, bool value, |
| 203 | Error **errp) |
| 204 | { |
| 205 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 206 | |
| 207 | if (backend->force_prealloc) { |
| 208 | if (value) { |
| 209 | error_setg(errp, |
| 210 | "remove -mem-prealloc to use the prealloc property"); |
| 211 | return; |
| 212 | } |
| 213 | } |
| 214 | |
| 215 | if (!memory_region_size(&backend->mr)) { |
| 216 | backend->prealloc = value; |
| 217 | return; |
| 218 | } |
| 219 | |
| 220 | if (value && !backend->prealloc) { |
| 221 | int fd = memory_region_get_fd(&backend->mr); |
| 222 | void *ptr = memory_region_get_ram_ptr(&backend->mr); |
| 223 | uint64_t sz = memory_region_size(&backend->mr); |
| 224 | |
| 225 | os_mem_prealloc(fd, ptr, sz); |
| 226 | backend->prealloc = true; |
| 227 | } |
| 228 | } |
| 229 | |
Hu Tao | 58f4662 | 2014-06-10 19:15:18 +0800 | [diff] [blame] | 230 | static void host_memory_backend_init(Object *obj) |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 231 | { |
Paolo Bonzini | 605d0a9 | 2014-06-10 19:15:22 +0800 | [diff] [blame] | 232 | HostMemoryBackend *backend = MEMORY_BACKEND(obj); |
| 233 | |
| 234 | backend->merge = qemu_opt_get_bool(qemu_get_machine_opts(), |
| 235 | "mem-merge", true); |
| 236 | backend->dump = qemu_opt_get_bool(qemu_get_machine_opts(), |
| 237 | "dump-guest-core", true); |
Paolo Bonzini | a35ba7b | 2014-06-10 19:15:23 +0800 | [diff] [blame] | 238 | backend->prealloc = mem_prealloc; |
Paolo Bonzini | 605d0a9 | 2014-06-10 19:15:22 +0800 | [diff] [blame] | 239 | |
| 240 | object_property_add_bool(obj, "merge", |
| 241 | host_memory_backend_get_merge, |
| 242 | host_memory_backend_set_merge, NULL); |
| 243 | object_property_add_bool(obj, "dump", |
| 244 | host_memory_backend_get_dump, |
| 245 | host_memory_backend_set_dump, NULL); |
Paolo Bonzini | a35ba7b | 2014-06-10 19:15:23 +0800 | [diff] [blame] | 246 | object_property_add_bool(obj, "prealloc", |
| 247 | host_memory_backend_get_prealloc, |
| 248 | host_memory_backend_set_prealloc, NULL); |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 249 | object_property_add(obj, "size", "int", |
Hu Tao | 58f4662 | 2014-06-10 19:15:18 +0800 | [diff] [blame] | 250 | host_memory_backend_get_size, |
| 251 | host_memory_backend_set_size, NULL, NULL, NULL); |
Hu Tao | 4cf1b76 | 2014-06-10 19:15:25 +0800 | [diff] [blame] | 252 | object_property_add(obj, "host-nodes", "int", |
| 253 | host_memory_backend_get_host_nodes, |
| 254 | host_memory_backend_set_host_nodes, NULL, NULL, NULL); |
| 255 | object_property_add(obj, "policy", "str", |
| 256 | host_memory_backend_get_policy, |
| 257 | host_memory_backend_set_policy, NULL, NULL, NULL); |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 258 | } |
| 259 | |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 260 | MemoryRegion * |
| 261 | host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp) |
| 262 | { |
| 263 | return memory_region_size(&backend->mr) ? &backend->mr : NULL; |
| 264 | } |
| 265 | |
Hu Tao | bd9262d | 2014-06-10 19:15:19 +0800 | [diff] [blame] | 266 | static void |
| 267 | host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) |
| 268 | { |
| 269 | HostMemoryBackend *backend = MEMORY_BACKEND(uc); |
| 270 | HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); |
Paolo Bonzini | 605d0a9 | 2014-06-10 19:15:22 +0800 | [diff] [blame] | 271 | Error *local_err = NULL; |
| 272 | void *ptr; |
| 273 | uint64_t sz; |
Hu Tao | bd9262d | 2014-06-10 19:15:19 +0800 | [diff] [blame] | 274 | |
| 275 | if (bc->alloc) { |
Paolo Bonzini | 605d0a9 | 2014-06-10 19:15:22 +0800 | [diff] [blame] | 276 | bc->alloc(backend, &local_err); |
| 277 | if (local_err) { |
| 278 | error_propagate(errp, local_err); |
| 279 | return; |
| 280 | } |
| 281 | |
| 282 | ptr = memory_region_get_ram_ptr(&backend->mr); |
| 283 | sz = memory_region_size(&backend->mr); |
| 284 | |
| 285 | if (backend->merge) { |
| 286 | qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE); |
| 287 | } |
| 288 | if (!backend->dump) { |
| 289 | qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP); |
| 290 | } |
Hu Tao | 4cf1b76 | 2014-06-10 19:15:25 +0800 | [diff] [blame] | 291 | #ifdef CONFIG_NUMA |
| 292 | unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); |
| 293 | /* lastbit == MAX_NODES means maxnode = 0 */ |
| 294 | unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); |
| 295 | /* ensure policy won't be ignored in case memory is preallocated |
| 296 | * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so |
| 297 | * this doesn't catch hugepage case. */ |
Michael S. Tsirkin | 288d332 | 2014-08-13 13:50:24 +0200 | [diff] [blame] | 298 | unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE; |
Hu Tao | 4cf1b76 | 2014-06-10 19:15:25 +0800 | [diff] [blame] | 299 | |
| 300 | /* check for invalid host-nodes and policies and give more verbose |
| 301 | * error messages than mbind(). */ |
| 302 | if (maxnode && backend->policy == MPOL_DEFAULT) { |
| 303 | error_setg(errp, "host-nodes must be empty for policy default," |
| 304 | " or you should explicitly specify a policy other" |
| 305 | " than default"); |
| 306 | return; |
| 307 | } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { |
| 308 | error_setg(errp, "host-nodes must be set for policy %s", |
| 309 | HostMemPolicy_lookup[backend->policy]); |
| 310 | return; |
| 311 | } |
| 312 | |
| 313 | /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1 |
| 314 | * as argument to mbind() due to an old Linux bug (feature?) which |
| 315 | * cuts off the last specified node. This means backend->host_nodes |
| 316 | * must have MAX_NODES+1 bits available. |
| 317 | */ |
| 318 | assert(sizeof(backend->host_nodes) >= |
| 319 | BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); |
| 320 | assert(maxnode <= MAX_NODES); |
| 321 | if (mbind(ptr, sz, backend->policy, |
| 322 | maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) { |
| 323 | error_setg_errno(errp, errno, |
| 324 | "cannot bind memory to host NUMA nodes"); |
| 325 | return; |
| 326 | } |
| 327 | #endif |
| 328 | /* Preallocate memory after the NUMA policy has been instantiated. |
| 329 | * This is necessary to guarantee memory is allocated with |
| 330 | * specified NUMA policy in place. |
| 331 | */ |
Paolo Bonzini | a35ba7b | 2014-06-10 19:15:23 +0800 | [diff] [blame] | 332 | if (backend->prealloc) { |
| 333 | os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz); |
| 334 | } |
Hu Tao | bd9262d | 2014-06-10 19:15:19 +0800 | [diff] [blame] | 335 | } |
| 336 | } |
| 337 | |
| 338 | static void |
| 339 | host_memory_backend_class_init(ObjectClass *oc, void *data) |
| 340 | { |
| 341 | UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); |
| 342 | |
| 343 | ucc->complete = host_memory_backend_memory_complete; |
| 344 | } |
| 345 | |
Hu Tao | 58f4662 | 2014-06-10 19:15:18 +0800 | [diff] [blame] | 346 | static const TypeInfo host_memory_backend_info = { |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 347 | .name = TYPE_MEMORY_BACKEND, |
| 348 | .parent = TYPE_OBJECT, |
| 349 | .abstract = true, |
| 350 | .class_size = sizeof(HostMemoryBackendClass), |
Hu Tao | bd9262d | 2014-06-10 19:15:19 +0800 | [diff] [blame] | 351 | .class_init = host_memory_backend_class_init, |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 352 | .instance_size = sizeof(HostMemoryBackend), |
Hu Tao | 58f4662 | 2014-06-10 19:15:18 +0800 | [diff] [blame] | 353 | .instance_init = host_memory_backend_init, |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 354 | .interfaces = (InterfaceInfo[]) { |
| 355 | { TYPE_USER_CREATABLE }, |
| 356 | { } |
| 357 | } |
| 358 | }; |
| 359 | |
| 360 | static void register_types(void) |
| 361 | { |
Hu Tao | 58f4662 | 2014-06-10 19:15:18 +0800 | [diff] [blame] | 362 | type_register_static(&host_memory_backend_info); |
Igor Mammedov | 1f07048 | 2014-06-06 17:54:29 +0200 | [diff] [blame] | 363 | } |
| 364 | |
| 365 | type_init(register_types); |