Laszlo Ersek | 3e16d14 | 2013-12-17 01:37:06 +0100 | [diff] [blame] | 1 | # This python script adds a new gdb command, "dump-guest-memory". It |
| 2 | # should be loaded with "source dump-guest-memory.py" at the (gdb) |
| 3 | # prompt. |
| 4 | # |
| 5 | # Copyright (C) 2013, Red Hat, Inc. |
| 6 | # |
| 7 | # Authors: |
| 8 | # Laszlo Ersek <lersek@redhat.com> |
| 9 | # |
| 10 | # This work is licensed under the terms of the GNU GPL, version 2 or later. See |
| 11 | # the COPYING file in the top-level directory. |
| 12 | # |
| 13 | # The leading docstring doesn't have idiomatic Python formatting. It is |
| 14 | # printed by gdb's "help" command (the first line is printed in the |
| 15 | # "help data" summary), and it should match how other help texts look in |
| 16 | # gdb. |
| 17 | |
| 18 | import struct |
| 19 | |
| 20 | class DumpGuestMemory(gdb.Command): |
| 21 | """Extract guest vmcore from qemu process coredump. |
| 22 | |
| 23 | The sole argument is FILE, identifying the target file to write the |
| 24 | guest vmcore to. |
| 25 | |
| 26 | This GDB command reimplements the dump-guest-memory QMP command in |
| 27 | python, using the representation of guest memory as captured in the qemu |
| 28 | coredump. The qemu process that has been dumped must have had the |
| 29 | command line option "-machine dump-guest-core=on". |
| 30 | |
| 31 | For simplicity, the "paging", "begin" and "end" parameters of the QMP |
| 32 | command are not supported -- no attempt is made to get the guest's |
| 33 | internal paging structures (ie. paging=false is hard-wired), and guest |
| 34 | memory is always fully dumped. |
| 35 | |
| 36 | Only x86_64 guests are supported. |
| 37 | |
| 38 | The CORE/NT_PRSTATUS and QEMU notes (that is, the VCPUs' statuses) are |
| 39 | not written to the vmcore. Preparing these would require context that is |
| 40 | only present in the KVM host kernel module when the guest is alive. A |
| 41 | fake ELF note is written instead, only to keep the ELF parser of "crash" |
| 42 | happy. |
| 43 | |
| 44 | Dependent on how busted the qemu process was at the time of the |
| 45 | coredump, this command might produce unpredictable results. If qemu |
| 46 | deliberately called abort(), or it was dumped in response to a signal at |
| 47 | a halfway fortunate point, then its coredump should be in reasonable |
| 48 | shape and this command should mostly work.""" |
| 49 | |
| 50 | TARGET_PAGE_SIZE = 0x1000 |
| 51 | TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000 |
| 52 | |
| 53 | # Various ELF constants |
| 54 | EM_X86_64 = 62 # AMD x86-64 target machine |
| 55 | ELFDATA2LSB = 1 # little endian |
| 56 | ELFCLASS64 = 2 |
| 57 | ELFMAG = "\x7FELF" |
| 58 | EV_CURRENT = 1 |
| 59 | ET_CORE = 4 |
| 60 | PT_LOAD = 1 |
| 61 | PT_NOTE = 4 |
| 62 | |
| 63 | # Special value for e_phnum. This indicates that the real number of |
| 64 | # program headers is too large to fit into e_phnum. Instead the real |
| 65 | # value is in the field sh_info of section 0. |
| 66 | PN_XNUM = 0xFFFF |
| 67 | |
| 68 | # Format strings for packing and header size calculation. |
| 69 | ELF64_EHDR = ("4s" # e_ident/magic |
| 70 | "B" # e_ident/class |
| 71 | "B" # e_ident/data |
| 72 | "B" # e_ident/version |
| 73 | "B" # e_ident/osabi |
| 74 | "8s" # e_ident/pad |
| 75 | "H" # e_type |
| 76 | "H" # e_machine |
| 77 | "I" # e_version |
| 78 | "Q" # e_entry |
| 79 | "Q" # e_phoff |
| 80 | "Q" # e_shoff |
| 81 | "I" # e_flags |
| 82 | "H" # e_ehsize |
| 83 | "H" # e_phentsize |
| 84 | "H" # e_phnum |
| 85 | "H" # e_shentsize |
| 86 | "H" # e_shnum |
| 87 | "H" # e_shstrndx |
| 88 | ) |
| 89 | ELF64_PHDR = ("I" # p_type |
| 90 | "I" # p_flags |
| 91 | "Q" # p_offset |
| 92 | "Q" # p_vaddr |
| 93 | "Q" # p_paddr |
| 94 | "Q" # p_filesz |
| 95 | "Q" # p_memsz |
| 96 | "Q" # p_align |
| 97 | ) |
| 98 | |
| 99 | def __init__(self): |
| 100 | super(DumpGuestMemory, self).__init__("dump-guest-memory", |
| 101 | gdb.COMMAND_DATA, |
| 102 | gdb.COMPLETE_FILENAME) |
| 103 | self.uintptr_t = gdb.lookup_type("uintptr_t") |
| 104 | self.elf64_ehdr_le = struct.Struct("<%s" % self.ELF64_EHDR) |
| 105 | self.elf64_phdr_le = struct.Struct("<%s" % self.ELF64_PHDR) |
| 106 | |
| 107 | def int128_get64(self, val): |
| 108 | assert (val["hi"] == 0) |
| 109 | return val["lo"] |
| 110 | |
Mike Day | 0d53d9f | 2015-01-21 13:45:24 +0100 | [diff] [blame] | 111 | def qlist_foreach(self, head, field_str): |
| 112 | var_p = head["lh_first"] |
Laszlo Ersek | 3e16d14 | 2013-12-17 01:37:06 +0100 | [diff] [blame] | 113 | while (var_p != 0): |
| 114 | var = var_p.dereference() |
| 115 | yield var |
Mike Day | 0d53d9f | 2015-01-21 13:45:24 +0100 | [diff] [blame] | 116 | var_p = var[field_str]["le_next"] |
Laszlo Ersek | 3e16d14 | 2013-12-17 01:37:06 +0100 | [diff] [blame] | 117 | |
| 118 | def qemu_get_ram_block(self, ram_addr): |
| 119 | ram_blocks = gdb.parse_and_eval("ram_list.blocks") |
Mike Day | 0d53d9f | 2015-01-21 13:45:24 +0100 | [diff] [blame] | 120 | for block in self.qlist_foreach(ram_blocks, "next"): |
Michael S. Tsirkin | 0c71d41 | 2015-08-27 12:06:23 +0300 | [diff] [blame] | 121 | if (ram_addr - block["offset"] < block["used_length"]): |
Laszlo Ersek | 3e16d14 | 2013-12-17 01:37:06 +0100 | [diff] [blame] | 122 | return block |
| 123 | raise gdb.GdbError("Bad ram offset %x" % ram_addr) |
| 124 | |
| 125 | def qemu_get_ram_ptr(self, ram_addr): |
| 126 | block = self.qemu_get_ram_block(ram_addr) |
| 127 | return block["host"] + (ram_addr - block["offset"]) |
| 128 | |
| 129 | def memory_region_get_ram_ptr(self, mr): |
| 130 | if (mr["alias"] != 0): |
| 131 | return (self.memory_region_get_ram_ptr(mr["alias"].dereference()) + |
| 132 | mr["alias_offset"]) |
| 133 | return self.qemu_get_ram_ptr(mr["ram_addr"] & self.TARGET_PAGE_MASK) |
| 134 | |
| 135 | def guest_phys_blocks_init(self): |
| 136 | self.guest_phys_blocks = [] |
| 137 | |
| 138 | def guest_phys_blocks_append(self): |
| 139 | print "guest RAM blocks:" |
| 140 | print ("target_start target_end host_addr message " |
| 141 | "count") |
| 142 | print ("---------------- ---------------- ---------------- ------- " |
| 143 | "-----") |
| 144 | |
| 145 | current_map_p = gdb.parse_and_eval("address_space_memory.current_map") |
| 146 | current_map = current_map_p.dereference() |
| 147 | for cur in range(current_map["nr"]): |
| 148 | flat_range = (current_map["ranges"] + cur).dereference() |
| 149 | mr = flat_range["mr"].dereference() |
| 150 | |
| 151 | # we only care about RAM |
| 152 | if (not mr["ram"]): |
| 153 | continue |
| 154 | |
| 155 | section_size = self.int128_get64(flat_range["addr"]["size"]) |
| 156 | target_start = self.int128_get64(flat_range["addr"]["start"]) |
| 157 | target_end = target_start + section_size |
| 158 | host_addr = (self.memory_region_get_ram_ptr(mr) + |
| 159 | flat_range["offset_in_region"]) |
| 160 | predecessor = None |
| 161 | |
| 162 | # find continuity in guest physical address space |
| 163 | if (len(self.guest_phys_blocks) > 0): |
| 164 | predecessor = self.guest_phys_blocks[-1] |
| 165 | predecessor_size = (predecessor["target_end"] - |
| 166 | predecessor["target_start"]) |
| 167 | |
| 168 | # the memory API guarantees monotonically increasing |
| 169 | # traversal |
| 170 | assert (predecessor["target_end"] <= target_start) |
| 171 | |
| 172 | # we want continuity in both guest-physical and |
| 173 | # host-virtual memory |
| 174 | if (predecessor["target_end"] < target_start or |
| 175 | predecessor["host_addr"] + predecessor_size != host_addr): |
| 176 | predecessor = None |
| 177 | |
| 178 | if (predecessor is None): |
| 179 | # isolated mapping, add it to the list |
| 180 | self.guest_phys_blocks.append({"target_start": target_start, |
| 181 | "target_end" : target_end, |
| 182 | "host_addr" : host_addr}) |
| 183 | message = "added" |
| 184 | else: |
| 185 | # expand predecessor until @target_end; predecessor's |
| 186 | # start doesn't change |
| 187 | predecessor["target_end"] = target_end |
| 188 | message = "joined" |
| 189 | |
| 190 | print ("%016x %016x %016x %-7s %5u" % |
| 191 | (target_start, target_end, host_addr.cast(self.uintptr_t), |
| 192 | message, len(self.guest_phys_blocks))) |
| 193 | |
| 194 | def cpu_get_dump_info(self): |
| 195 | # We can't synchronize the registers with KVM post-mortem, and |
| 196 | # the bits in (first_x86_cpu->env.hflags) seem to be stale; they |
| 197 | # may not reflect long mode for example. Hence just assume the |
| 198 | # most common values. This also means that instruction pointer |
| 199 | # etc. will be bogus in the dump, but at least the RAM contents |
| 200 | # should be valid. |
| 201 | self.dump_info = {"d_machine": self.EM_X86_64, |
| 202 | "d_endian" : self.ELFDATA2LSB, |
| 203 | "d_class" : self.ELFCLASS64} |
| 204 | |
| 205 | def encode_elf64_ehdr_le(self): |
| 206 | return self.elf64_ehdr_le.pack( |
| 207 | self.ELFMAG, # e_ident/magic |
| 208 | self.dump_info["d_class"], # e_ident/class |
| 209 | self.dump_info["d_endian"], # e_ident/data |
| 210 | self.EV_CURRENT, # e_ident/version |
| 211 | 0, # e_ident/osabi |
| 212 | "", # e_ident/pad |
| 213 | self.ET_CORE, # e_type |
| 214 | self.dump_info["d_machine"], # e_machine |
| 215 | self.EV_CURRENT, # e_version |
| 216 | 0, # e_entry |
| 217 | self.elf64_ehdr_le.size, # e_phoff |
| 218 | 0, # e_shoff |
| 219 | 0, # e_flags |
| 220 | self.elf64_ehdr_le.size, # e_ehsize |
| 221 | self.elf64_phdr_le.size, # e_phentsize |
| 222 | self.phdr_num, # e_phnum |
| 223 | 0, # e_shentsize |
| 224 | 0, # e_shnum |
| 225 | 0 # e_shstrndx |
| 226 | ) |
| 227 | |
| 228 | def encode_elf64_note_le(self): |
| 229 | return self.elf64_phdr_le.pack(self.PT_NOTE, # p_type |
| 230 | 0, # p_flags |
| 231 | (self.memory_offset - |
| 232 | len(self.note)), # p_offset |
| 233 | 0, # p_vaddr |
| 234 | 0, # p_paddr |
| 235 | len(self.note), # p_filesz |
| 236 | len(self.note), # p_memsz |
| 237 | 0 # p_align |
| 238 | ) |
| 239 | |
| 240 | def encode_elf64_load_le(self, offset, start_hwaddr, range_size): |
| 241 | return self.elf64_phdr_le.pack(self.PT_LOAD, # p_type |
| 242 | 0, # p_flags |
| 243 | offset, # p_offset |
| 244 | 0, # p_vaddr |
| 245 | start_hwaddr, # p_paddr |
| 246 | range_size, # p_filesz |
| 247 | range_size, # p_memsz |
| 248 | 0 # p_align |
| 249 | ) |
| 250 | |
| 251 | def note_init(self, name, desc, type): |
| 252 | # name must include a trailing NUL |
| 253 | namesz = (len(name) + 1 + 3) / 4 * 4 |
| 254 | descsz = (len(desc) + 3) / 4 * 4 |
| 255 | fmt = ("<" # little endian |
| 256 | "I" # n_namesz |
| 257 | "I" # n_descsz |
| 258 | "I" # n_type |
| 259 | "%us" # name |
| 260 | "%us" # desc |
| 261 | % (namesz, descsz)) |
| 262 | self.note = struct.pack(fmt, |
| 263 | len(name) + 1, len(desc), type, name, desc) |
| 264 | |
| 265 | def dump_init(self): |
| 266 | self.guest_phys_blocks_init() |
| 267 | self.guest_phys_blocks_append() |
| 268 | self.cpu_get_dump_info() |
| 269 | # we have no way to retrieve the VCPU status from KVM |
| 270 | # post-mortem |
| 271 | self.note_init("NONE", "EMPTY", 0) |
| 272 | |
| 273 | # Account for PT_NOTE. |
| 274 | self.phdr_num = 1 |
| 275 | |
| 276 | # We should never reach PN_XNUM for paging=false dumps: there's |
| 277 | # just a handful of discontiguous ranges after merging. |
| 278 | self.phdr_num += len(self.guest_phys_blocks) |
| 279 | assert (self.phdr_num < self.PN_XNUM) |
| 280 | |
| 281 | # Calculate the ELF file offset where the memory dump commences: |
| 282 | # |
| 283 | # ELF header |
| 284 | # PT_NOTE |
| 285 | # PT_LOAD: 1 |
| 286 | # PT_LOAD: 2 |
| 287 | # ... |
| 288 | # PT_LOAD: len(self.guest_phys_blocks) |
| 289 | # ELF note |
| 290 | # memory dump |
| 291 | self.memory_offset = (self.elf64_ehdr_le.size + |
| 292 | self.elf64_phdr_le.size * self.phdr_num + |
| 293 | len(self.note)) |
| 294 | |
| 295 | def dump_begin(self, vmcore): |
| 296 | vmcore.write(self.encode_elf64_ehdr_le()) |
| 297 | vmcore.write(self.encode_elf64_note_le()) |
| 298 | running = self.memory_offset |
| 299 | for block in self.guest_phys_blocks: |
| 300 | range_size = block["target_end"] - block["target_start"] |
| 301 | vmcore.write(self.encode_elf64_load_le(running, |
| 302 | block["target_start"], |
| 303 | range_size)) |
| 304 | running += range_size |
| 305 | vmcore.write(self.note) |
| 306 | |
| 307 | def dump_iterate(self, vmcore): |
| 308 | qemu_core = gdb.inferiors()[0] |
| 309 | for block in self.guest_phys_blocks: |
| 310 | cur = block["host_addr"] |
| 311 | left = block["target_end"] - block["target_start"] |
| 312 | print ("dumping range at %016x for length %016x" % |
| 313 | (cur.cast(self.uintptr_t), left)) |
| 314 | while (left > 0): |
| 315 | chunk_size = min(self.TARGET_PAGE_SIZE, left) |
| 316 | chunk = qemu_core.read_memory(cur, chunk_size) |
| 317 | vmcore.write(chunk) |
| 318 | cur += chunk_size |
| 319 | left -= chunk_size |
| 320 | |
| 321 | def create_vmcore(self, filename): |
| 322 | vmcore = open(filename, "wb") |
| 323 | self.dump_begin(vmcore) |
| 324 | self.dump_iterate(vmcore) |
| 325 | vmcore.close() |
| 326 | |
| 327 | def invoke(self, args, from_tty): |
| 328 | # Unwittingly pressing the Enter key after the command should |
| 329 | # not dump the same multi-gig coredump to the same file. |
| 330 | self.dont_repeat() |
| 331 | |
| 332 | argv = gdb.string_to_argv(args) |
| 333 | if (len(argv) != 1): |
| 334 | raise gdb.GdbError("usage: dump-guest-memory FILE") |
| 335 | |
| 336 | self.dump_init() |
| 337 | self.create_vmcore(argv[0]) |
| 338 | |
| 339 | DumpGuestMemory() |