Richard Henderson | f343346 | 2020-09-12 10:47:33 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Interface to the capstone disassembler. |
| 3 | * SPDX-License-Identifier: GPL-2.0-or-later |
| 4 | */ |
| 5 | |
| 6 | #include "qemu/osdep.h" |
| 7 | #include "qemu/bswap.h" |
| 8 | #include "disas/dis-asm.h" |
| 9 | #include "disas/capstone.h" |
| 10 | |
| 11 | |
| 12 | /* |
| 13 | * Temporary storage for the capstone library. This will be alloced via |
| 14 | * malloc with a size private to the library; thus there's no reason not |
| 15 | * to share this across calls and across host vs target disassembly. |
| 16 | */ |
| 17 | static __thread cs_insn *cap_insn; |
| 18 | |
| 19 | /* |
Richard Henderson | c6d3da9 | 2020-09-11 18:19:41 -0700 | [diff] [blame] | 20 | * The capstone library always skips 2 bytes for S390X. |
| 21 | * This is less than ideal, since we can tell from the first two bits |
| 22 | * the size of the insn and thus stay in sync with the insn stream. |
| 23 | */ |
| 24 | static size_t CAPSTONE_API |
| 25 | cap_skipdata_s390x_cb(const uint8_t *code, size_t code_size, |
| 26 | size_t offset, void *user_data) |
| 27 | { |
| 28 | size_t ilen; |
| 29 | |
| 30 | /* See get_ilen() in target/s390x/internal.h. */ |
| 31 | switch (code[offset] >> 6) { |
| 32 | case 0: |
| 33 | ilen = 2; |
| 34 | break; |
| 35 | case 1: |
| 36 | case 2: |
| 37 | ilen = 4; |
| 38 | break; |
| 39 | default: |
| 40 | ilen = 6; |
| 41 | break; |
| 42 | } |
| 43 | |
| 44 | return ilen; |
| 45 | } |
| 46 | |
| 47 | static const cs_opt_skipdata cap_skipdata_s390x = { |
| 48 | .mnemonic = ".byte", |
| 49 | .callback = cap_skipdata_s390x_cb |
| 50 | }; |
| 51 | |
| 52 | /* |
Richard Henderson | f343346 | 2020-09-12 10:47:33 -0700 | [diff] [blame] | 53 | * Initialize the Capstone library. |
| 54 | * |
| 55 | * ??? It would be nice to cache this. We would need one handle for the |
| 56 | * host and one for the target. For most targets we can reset specific |
| 57 | * parameters via cs_option(CS_OPT_MODE, new_mode), but we cannot change |
| 58 | * CS_ARCH_* in this way. Thus we would need to be able to close and |
| 59 | * re-open the target handle with a different arch for the target in order |
| 60 | * to handle AArch64 vs AArch32 mode switching. |
| 61 | */ |
| 62 | static cs_err cap_disas_start(disassemble_info *info, csh *handle) |
| 63 | { |
| 64 | cs_mode cap_mode = info->cap_mode; |
| 65 | cs_err err; |
| 66 | |
| 67 | cap_mode += (info->endian == BFD_ENDIAN_BIG ? CS_MODE_BIG_ENDIAN |
| 68 | : CS_MODE_LITTLE_ENDIAN); |
| 69 | |
| 70 | err = cs_open(info->cap_arch, cap_mode, handle); |
| 71 | if (err != CS_ERR_OK) { |
| 72 | return err; |
| 73 | } |
| 74 | |
| 75 | /* "Disassemble" unknown insns as ".byte W,X,Y,Z". */ |
| 76 | cs_option(*handle, CS_OPT_SKIPDATA, CS_OPT_ON); |
| 77 | |
Richard Henderson | c6d3da9 | 2020-09-11 18:19:41 -0700 | [diff] [blame] | 78 | switch (info->cap_arch) { |
| 79 | case CS_ARCH_SYSZ: |
| 80 | cs_option(*handle, CS_OPT_SKIPDATA_SETUP, |
| 81 | (uintptr_t)&cap_skipdata_s390x); |
| 82 | break; |
| 83 | |
| 84 | case CS_ARCH_X86: |
Richard Henderson | f343346 | 2020-09-12 10:47:33 -0700 | [diff] [blame] | 85 | /* |
| 86 | * We don't care about errors (if for some reason the library |
| 87 | * is compiled without AT&T syntax); the user will just have |
| 88 | * to deal with the Intel syntax. |
| 89 | */ |
| 90 | cs_option(*handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); |
Richard Henderson | c6d3da9 | 2020-09-11 18:19:41 -0700 | [diff] [blame] | 91 | break; |
Richard Henderson | f343346 | 2020-09-12 10:47:33 -0700 | [diff] [blame] | 92 | } |
| 93 | |
| 94 | /* Allocate temp space for cs_disasm_iter. */ |
| 95 | if (cap_insn == NULL) { |
| 96 | cap_insn = cs_malloc(*handle); |
| 97 | if (cap_insn == NULL) { |
| 98 | cs_close(handle); |
| 99 | return CS_ERR_MEM; |
| 100 | } |
| 101 | } |
| 102 | return CS_ERR_OK; |
| 103 | } |
| 104 | |
| 105 | static void cap_dump_insn_units(disassemble_info *info, cs_insn *insn, |
| 106 | int i, int n) |
| 107 | { |
| 108 | fprintf_function print = info->fprintf_func; |
| 109 | FILE *stream = info->stream; |
| 110 | |
| 111 | switch (info->cap_insn_unit) { |
| 112 | case 4: |
| 113 | if (info->endian == BFD_ENDIAN_BIG) { |
| 114 | for (; i < n; i += 4) { |
| 115 | print(stream, " %08x", ldl_be_p(insn->bytes + i)); |
| 116 | |
| 117 | } |
| 118 | } else { |
| 119 | for (; i < n; i += 4) { |
| 120 | print(stream, " %08x", ldl_le_p(insn->bytes + i)); |
| 121 | } |
| 122 | } |
| 123 | break; |
| 124 | |
| 125 | case 2: |
| 126 | if (info->endian == BFD_ENDIAN_BIG) { |
| 127 | for (; i < n; i += 2) { |
| 128 | print(stream, " %04x", lduw_be_p(insn->bytes + i)); |
| 129 | } |
| 130 | } else { |
| 131 | for (; i < n; i += 2) { |
| 132 | print(stream, " %04x", lduw_le_p(insn->bytes + i)); |
| 133 | } |
| 134 | } |
| 135 | break; |
| 136 | |
| 137 | default: |
| 138 | for (; i < n; i++) { |
| 139 | print(stream, " %02x", insn->bytes[i]); |
| 140 | } |
| 141 | break; |
| 142 | } |
| 143 | } |
| 144 | |
| 145 | static void cap_dump_insn(disassemble_info *info, cs_insn *insn) |
| 146 | { |
| 147 | fprintf_function print = info->fprintf_func; |
| 148 | FILE *stream = info->stream; |
| 149 | int i, n, split; |
| 150 | |
| 151 | print(stream, "0x%08" PRIx64 ": ", insn->address); |
| 152 | |
| 153 | n = insn->size; |
| 154 | split = info->cap_insn_split; |
| 155 | |
| 156 | /* Dump the first SPLIT bytes of the instruction. */ |
| 157 | cap_dump_insn_units(info, insn, 0, MIN(n, split)); |
| 158 | |
| 159 | /* Add padding up to SPLIT so that mnemonics line up. */ |
| 160 | if (n < split) { |
| 161 | int width = (split - n) / info->cap_insn_unit; |
| 162 | width *= (2 * info->cap_insn_unit + 1); |
| 163 | print(stream, "%*s", width, ""); |
| 164 | } |
| 165 | |
| 166 | /* Print the actual instruction. */ |
| 167 | print(stream, " %-8s %s\n", insn->mnemonic, insn->op_str); |
| 168 | |
| 169 | /* Dump any remaining part of the insn on subsequent lines. */ |
| 170 | for (i = split; i < n; i += split) { |
| 171 | print(stream, "0x%08" PRIx64 ": ", insn->address + i); |
| 172 | cap_dump_insn_units(info, insn, i, MIN(n, i + split)); |
| 173 | print(stream, "\n"); |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | /* Disassemble SIZE bytes at PC for the target. */ |
| 178 | bool cap_disas_target(disassemble_info *info, uint64_t pc, size_t size) |
| 179 | { |
| 180 | uint8_t cap_buf[1024]; |
| 181 | csh handle; |
| 182 | cs_insn *insn; |
| 183 | size_t csize = 0; |
| 184 | |
| 185 | if (cap_disas_start(info, &handle) != CS_ERR_OK) { |
| 186 | return false; |
| 187 | } |
| 188 | insn = cap_insn; |
| 189 | |
| 190 | while (1) { |
| 191 | size_t tsize = MIN(sizeof(cap_buf) - csize, size); |
| 192 | const uint8_t *cbuf = cap_buf; |
| 193 | |
| 194 | info->read_memory_func(pc + csize, cap_buf + csize, tsize, info); |
| 195 | csize += tsize; |
| 196 | size -= tsize; |
| 197 | |
| 198 | while (cs_disasm_iter(handle, &cbuf, &csize, &pc, insn)) { |
| 199 | cap_dump_insn(info, insn); |
| 200 | } |
| 201 | |
| 202 | /* If the target memory is not consumed, go back for more... */ |
| 203 | if (size != 0) { |
| 204 | /* |
| 205 | * ... taking care to move any remaining fractional insn |
| 206 | * to the beginning of the buffer. |
| 207 | */ |
| 208 | if (csize != 0) { |
| 209 | memmove(cap_buf, cbuf, csize); |
| 210 | } |
| 211 | continue; |
| 212 | } |
| 213 | |
| 214 | /* |
| 215 | * Since the target memory is consumed, we should not have |
| 216 | * a remaining fractional insn. |
| 217 | */ |
| 218 | if (csize != 0) { |
| 219 | info->fprintf_func(info->stream, |
| 220 | "Disassembler disagrees with translator " |
| 221 | "over instruction decoding\n" |
| 222 | "Please report this to qemu-devel@nongnu.org\n"); |
| 223 | } |
| 224 | break; |
| 225 | } |
| 226 | |
| 227 | cs_close(&handle); |
| 228 | return true; |
| 229 | } |
| 230 | |
| 231 | /* Disassemble SIZE bytes at CODE for the host. */ |
Richard Henderson | f06176b | 2020-10-30 20:59:01 -0700 | [diff] [blame] | 232 | bool cap_disas_host(disassemble_info *info, const void *code, size_t size) |
Richard Henderson | f343346 | 2020-09-12 10:47:33 -0700 | [diff] [blame] | 233 | { |
| 234 | csh handle; |
| 235 | const uint8_t *cbuf; |
| 236 | cs_insn *insn; |
| 237 | uint64_t pc; |
| 238 | |
| 239 | if (cap_disas_start(info, &handle) != CS_ERR_OK) { |
| 240 | return false; |
| 241 | } |
| 242 | insn = cap_insn; |
| 243 | |
| 244 | cbuf = code; |
| 245 | pc = (uintptr_t)code; |
| 246 | |
| 247 | while (cs_disasm_iter(handle, &cbuf, &size, &pc, insn)) { |
| 248 | cap_dump_insn(info, insn); |
| 249 | } |
| 250 | if (size != 0) { |
| 251 | info->fprintf_func(info->stream, |
| 252 | "Disassembler disagrees with TCG over instruction encoding\n" |
| 253 | "Please report this to qemu-devel@nongnu.org\n"); |
| 254 | } |
| 255 | |
| 256 | cs_close(&handle); |
| 257 | return true; |
| 258 | } |
| 259 | |
| 260 | /* Disassemble COUNT insns at PC for the target. */ |
| 261 | bool cap_disas_monitor(disassemble_info *info, uint64_t pc, int count) |
| 262 | { |
| 263 | uint8_t cap_buf[32]; |
| 264 | csh handle; |
| 265 | cs_insn *insn; |
| 266 | size_t csize = 0; |
| 267 | |
| 268 | if (cap_disas_start(info, &handle) != CS_ERR_OK) { |
| 269 | return false; |
| 270 | } |
| 271 | insn = cap_insn; |
| 272 | |
| 273 | while (1) { |
| 274 | /* |
| 275 | * We want to read memory for one insn, but generically we do not |
| 276 | * know how much memory that is. We have a small buffer which is |
| 277 | * known to be sufficient for all supported targets. Try to not |
| 278 | * read beyond the page, Just In Case. For even more simplicity, |
| 279 | * ignore the actual target page size and use a 1k boundary. If |
| 280 | * that turns out to be insufficient, we'll come back around the |
| 281 | * loop and read more. |
| 282 | */ |
| 283 | uint64_t epc = QEMU_ALIGN_UP(pc + csize + 1, 1024); |
| 284 | size_t tsize = MIN(sizeof(cap_buf) - csize, epc - pc); |
| 285 | const uint8_t *cbuf = cap_buf; |
| 286 | |
| 287 | /* Make certain that we can make progress. */ |
| 288 | assert(tsize != 0); |
Peter Maydell | 437588d | 2020-11-02 16:52:16 +0000 | [diff] [blame] | 289 | info->read_memory_func(pc + csize, cap_buf + csize, tsize, info); |
Richard Henderson | f343346 | 2020-09-12 10:47:33 -0700 | [diff] [blame] | 290 | csize += tsize; |
| 291 | |
| 292 | if (cs_disasm_iter(handle, &cbuf, &csize, &pc, insn)) { |
| 293 | cap_dump_insn(info, insn); |
| 294 | if (--count <= 0) { |
| 295 | break; |
| 296 | } |
| 297 | } |
| 298 | memmove(cap_buf, cbuf, csize); |
| 299 | } |
| 300 | |
| 301 | cs_close(&handle); |
| 302 | return true; |
| 303 | } |
| 304 | |
| 305 | /* Disassemble a single instruction directly into plugin output */ |
| 306 | bool cap_disas_plugin(disassemble_info *info, uint64_t pc, size_t size) |
| 307 | { |
| 308 | uint8_t cap_buf[32]; |
| 309 | const uint8_t *cbuf = cap_buf; |
| 310 | csh handle; |
| 311 | |
| 312 | if (cap_disas_start(info, &handle) != CS_ERR_OK) { |
| 313 | return false; |
| 314 | } |
| 315 | |
| 316 | assert(size < sizeof(cap_buf)); |
| 317 | info->read_memory_func(pc, cap_buf, size, info); |
| 318 | |
| 319 | if (cs_disasm_iter(handle, &cbuf, &size, &pc, cap_insn)) { |
| 320 | info->fprintf_func(info->stream, "%s %s", |
| 321 | cap_insn->mnemonic, cap_insn->op_str); |
| 322 | } |
| 323 | |
| 324 | cs_close(&handle); |
| 325 | return true; |
| 326 | } |