blob: 2f55f5e94f2e0c9a3a4c5b4513bcd8151015a901 [file] [log] [blame]
Michael S. Tsirkin794e8f32015-09-24 14:41:17 +03001/*
2 * Support for RAM backed by mmaped host memory.
3 *
4 * Copyright (c) 2015 Red Hat, Inc.
5 *
6 * Authors:
7 * Michael S. Tsirkin <mst@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
11 */
Markus Armbrustera9c94272016-06-22 19:11:19 +020012
Peter Maydellaafd7582016-01-29 17:49:55 +000013#include "qemu/osdep.h"
Markus Armbrustera9c94272016-06-22 19:11:19 +020014#include "qemu/mmap-alloc.h"
Cao jin4a3ecf22016-11-02 21:44:46 +080015#include "qemu/host-utils.h"
Michael S. Tsirkin794e8f32015-09-24 14:41:17 +030016
Michael S. Tsirkin7197fb42015-12-02 21:14:12 +020017#define HUGETLBFS_MAGIC 0x958458f6
18
19#ifdef CONFIG_LINUX
20#include <sys/vfs.h>
21#endif
22
23size_t qemu_fd_getpagesize(int fd)
24{
25#ifdef CONFIG_LINUX
26 struct statfs fs;
27 int ret;
28
29 if (fd != -1) {
30 do {
31 ret = fstatfs(fd, &fs);
32 } while (ret != 0 && errno == EINTR);
33
34 if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
35 return fs.f_bsize;
36 }
37 }
38#endif
39
40 return getpagesize();
41}
42
Michael S. Tsirkin794e8f32015-09-24 14:41:17 +030043void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
44{
45 /*
46 * Note: this always allocates at least one extra page of virtual address
47 * space, even if size is already aligned.
48 */
49 size_t total = size + align;
Michael S. Tsirkin7197fb42015-12-02 21:14:12 +020050#if defined(__powerpc64__) && defined(__linux__)
51 /* On ppc64 mappings in the same segment (aka slice) must share the same
52 * page size. Since we will be re-allocating part of this segment
Michael S. Tsirkin097a50d2015-12-03 10:35:31 +020053 * from the supplied fd, we should make sure to use the same page size, to
54 * this end we mmap the supplied fd. In this case, set MAP_NORESERVE to
55 * avoid allocating backing store memory.
56 * We do this unless we are using the system page size, in which case
57 * anonymous memory is OK.
Michael S. Tsirkin7197fb42015-12-02 21:14:12 +020058 */
59 int anonfd = fd == -1 || qemu_fd_getpagesize(fd) == getpagesize() ? -1 : fd;
60 int flags = anonfd == -1 ? MAP_ANONYMOUS : MAP_NORESERVE;
61 void *ptr = mmap(0, total, PROT_NONE, flags | MAP_PRIVATE, anonfd, 0);
62#else
Michael S. Tsirkin794e8f32015-09-24 14:41:17 +030063 void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
Michael S. Tsirkin7197fb42015-12-02 21:14:12 +020064#endif
Cao jin4a3ecf22016-11-02 21:44:46 +080065 size_t offset;
Michael S. Tsirkin794e8f32015-09-24 14:41:17 +030066 void *ptr1;
67
68 if (ptr == MAP_FAILED) {
Michael S. Tsirkin9d4ec932015-10-25 17:07:45 +020069 return MAP_FAILED;
Michael S. Tsirkin794e8f32015-09-24 14:41:17 +030070 }
71
Cao jin4a3ecf22016-11-02 21:44:46 +080072 assert(is_power_of_2(align));
Michael S. Tsirkin794e8f32015-09-24 14:41:17 +030073 /* Always align to host page size */
74 assert(align >= getpagesize());
75
Cao jin4a3ecf22016-11-02 21:44:46 +080076 offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
Michael S. Tsirkin794e8f32015-09-24 14:41:17 +030077 ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
78 MAP_FIXED |
79 (fd == -1 ? MAP_ANONYMOUS : 0) |
80 (shared ? MAP_SHARED : MAP_PRIVATE),
81 fd, 0);
82 if (ptr1 == MAP_FAILED) {
83 munmap(ptr, total);
Michael S. Tsirkin9d4ec932015-10-25 17:07:45 +020084 return MAP_FAILED;
Michael S. Tsirkin794e8f32015-09-24 14:41:17 +030085 }
86
Michael S. Tsirkin794e8f32015-09-24 14:41:17 +030087 if (offset > 0) {
Cao jin6e4c8902016-11-02 21:44:47 +080088 munmap(ptr, offset);
Michael S. Tsirkin794e8f32015-09-24 14:41:17 +030089 }
90
91 /*
92 * Leave a single PROT_NONE page allocated after the RAM block, to serve as
93 * a guard page guarding against potential buffer overflows.
94 */
Cao jin6e4c8902016-11-02 21:44:47 +080095 total -= offset;
Michael S. Tsirkin794e8f32015-09-24 14:41:17 +030096 if (total > size + getpagesize()) {
Cao jin6e4c8902016-11-02 21:44:47 +080097 munmap(ptr1 + size + getpagesize(), total - size - getpagesize());
Michael S. Tsirkin794e8f32015-09-24 14:41:17 +030098 }
99
Cao jin6e4c8902016-11-02 21:44:47 +0800100 return ptr1;
Michael S. Tsirkin794e8f32015-09-24 14:41:17 +0300101}
102
103void qemu_ram_munmap(void *ptr, size_t size)
104{
105 if (ptr) {
106 /* Unmap both the RAM block and the guard page */
107 munmap(ptr, size + getpagesize());
108 }
109}