blob: 4a5ec3d936a47e93965acf84b6416c6abd19ef58 [file] [log] [blame]
Alex Bennéef79e8fa2019-05-21 10:15:53 +01001/*
2 * Copyright (C) 2019, Alex Bennée <alex.bennee@linaro.org>
3 *
4 * How vectorised is this code?
5 *
6 * Attempt to measure the amount of vectorisation that has been done
7 * on some code by counting classes of instruction.
8 *
9 * License: GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12#include <inttypes.h>
13#include <assert.h>
14#include <stdlib.h>
15#include <inttypes.h>
16#include <string.h>
17#include <unistd.h>
18#include <stdio.h>
19#include <glib.h>
20
21#include <qemu-plugin.h>
22
Alex Bennée3fb356c2019-11-04 13:18:36 +000023QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
24
Alex Bennéef79e8fa2019-05-21 10:15:53 +010025#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
26
27typedef enum {
28 COUNT_CLASS,
29 COUNT_INDIVIDUAL,
30 COUNT_NONE
31} CountType;
32
33static int limit = 50;
34static bool do_inline;
35static bool verbose;
36
37static GMutex lock;
38static GHashTable *insns;
39
40typedef struct {
41 const char *class;
42 const char *opt;
43 uint32_t mask;
44 uint32_t pattern;
45 CountType what;
46 uint64_t count;
47} InsnClassExecCount;
48
49typedef struct {
50 char *insn;
51 uint32_t opcode;
52 uint64_t count;
53 InsnClassExecCount *class;
54} InsnExecCount;
55
56/*
57 * Matchers for classes of instructions, order is important.
58 *
59 * Your most precise match must be before looser matches. If no match
60 * is found in the table we can create an individual entry.
61 *
62 * 31..28 27..24 23..20 19..16 15..12 11..8 7..4 3..0
63 */
64static InsnClassExecCount aarch64_insn_classes[] = {
65 /* "Reserved"" */
66 { " UDEF", "udef", 0xffff0000, 0x00000000, COUNT_NONE},
67 { " SVE", "sve", 0x1e000000, 0x04000000, COUNT_CLASS},
68 { "Reserved", "res", 0x1e000000, 0x00000000, COUNT_CLASS},
69 /* Data Processing Immediate */
70 { " PCrel addr", "pcrel", 0x1f000000, 0x10000000, COUNT_CLASS},
zhouyangedd4a852021-02-13 13:03:09 +000071 { " Add/Sub (imm,tags)", "asit", 0x1f800000, 0x11800000, COUNT_CLASS},
Alex Bennéef79e8fa2019-05-21 10:15:53 +010072 { " Add/Sub (imm)", "asi", 0x1f000000, 0x11000000, COUNT_CLASS},
73 { " Logical (imm)", "logi", 0x1f800000, 0x12000000, COUNT_CLASS},
74 { " Move Wide (imm)", "movwi", 0x1f800000, 0x12800000, COUNT_CLASS},
75 { " Bitfield", "bitf", 0x1f800000, 0x13000000, COUNT_CLASS},
76 { " Extract", "extr", 0x1f800000, 0x13800000, COUNT_CLASS},
77 { "Data Proc Imm", "dpri", 0x1c000000, 0x10000000, COUNT_CLASS},
78 /* Branches */
79 { " Cond Branch (imm)", "cndb", 0xfe000000, 0x54000000, COUNT_CLASS},
80 { " Exception Gen", "excp", 0xff000000, 0xd4000000, COUNT_CLASS},
81 { " NOP", "nop", 0xffffffff, 0xd503201f, COUNT_NONE},
82 { " Hints", "hint", 0xfffff000, 0xd5032000, COUNT_CLASS},
83 { " Barriers", "barr", 0xfffff000, 0xd5033000, COUNT_CLASS},
84 { " PSTATE", "psta", 0xfff8f000, 0xd5004000, COUNT_CLASS},
85 { " System Insn", "sins", 0xffd80000, 0xd5080000, COUNT_CLASS},
86 { " System Reg", "sreg", 0xffd00000, 0xd5100000, COUNT_CLASS},
87 { " Branch (reg)", "breg", 0xfe000000, 0xd6000000, COUNT_CLASS},
88 { " Branch (imm)", "bimm", 0x7c000000, 0x14000000, COUNT_CLASS},
89 { " Cmp & Branch", "cmpb", 0x7e000000, 0x34000000, COUNT_CLASS},
90 { " Tst & Branch", "tstb", 0x7e000000, 0x36000000, COUNT_CLASS},
91 { "Branches", "branch", 0x1c000000, 0x14000000, COUNT_CLASS},
92 /* Loads and Stores */
93 { " AdvSimd ldstmult", "advlsm", 0xbfbf0000, 0x0c000000, COUNT_CLASS},
zhouyangedd4a852021-02-13 13:03:09 +000094 { " AdvSimd ldstmult++", "advlsmp", 0xbfb00000, 0x0c800000, COUNT_CLASS},
Alex Bennéef79e8fa2019-05-21 10:15:53 +010095 { " AdvSimd ldst", "advlss", 0xbf9f0000, 0x0d000000, COUNT_CLASS},
zhouyangedd4a852021-02-13 13:03:09 +000096 { " AdvSimd ldst++", "advlssp", 0xbf800000, 0x0d800000, COUNT_CLASS},
Alex Bennéef79e8fa2019-05-21 10:15:53 +010097 { " ldst excl", "ldstx", 0x3f000000, 0x08000000, COUNT_CLASS},
98 { " Prefetch", "prfm", 0xff000000, 0xd8000000, COUNT_CLASS},
99 { " Load Reg (lit)", "ldlit", 0x1b000000, 0x18000000, COUNT_CLASS},
zhouyangedd4a852021-02-13 13:03:09 +0000100 { " ldst noalloc pair", "ldstnap", 0x3b800000, 0x28000000, COUNT_CLASS},
Alex Bennéef79e8fa2019-05-21 10:15:53 +0100101 { " ldst pair", "ldstp", 0x38000000, 0x28000000, COUNT_CLASS},
102 { " ldst reg", "ldstr", 0x3b200000, 0x38000000, COUNT_CLASS},
103 { " Atomic ldst", "atomic", 0x3b200c00, 0x38200000, COUNT_CLASS},
zhouyangedd4a852021-02-13 13:03:09 +0000104 { " ldst reg (reg off)", "ldstro", 0x3b200b00, 0x38200800, COUNT_CLASS},
Alex Bennéef79e8fa2019-05-21 10:15:53 +0100105 { " ldst reg (pac)", "ldstpa", 0x3b200200, 0x38200800, COUNT_CLASS},
106 { " ldst reg (imm)", "ldsti", 0x3b000000, 0x39000000, COUNT_CLASS},
107 { "Loads & Stores", "ldst", 0x0a000000, 0x08000000, COUNT_CLASS},
108 /* Data Processing Register */
109 { "Data Proc Reg", "dprr", 0x0e000000, 0x0a000000, COUNT_CLASS},
110 /* Scalar FP */
111 { "Scalar FP ", "fpsimd", 0x0e000000, 0x0e000000, COUNT_CLASS},
112 /* Unclassified */
113 { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_CLASS},
114};
115
116static InsnClassExecCount sparc32_insn_classes[] = {
117 { "Call", "call", 0xc0000000, 0x40000000, COUNT_CLASS},
118 { "Branch ICond", "bcc", 0xc1c00000, 0x00800000, COUNT_CLASS},
119 { "Branch Fcond", "fbcc", 0xc1c00000, 0x01800000, COUNT_CLASS},
120 { "SetHi", "sethi", 0xc1c00000, 0x01000000, COUNT_CLASS},
121 { "FPU ALU", "fpu", 0xc1f00000, 0x81a00000, COUNT_CLASS},
122 { "ALU", "alu", 0xc0000000, 0x80000000, COUNT_CLASS},
123 { "Load/Store", "ldst", 0xc0000000, 0xc0000000, COUNT_CLASS},
124 /* Unclassified */
125 { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
126};
127
128static InsnClassExecCount sparc64_insn_classes[] = {
129 { "SetHi & Branches", "op0", 0xc0000000, 0x00000000, COUNT_CLASS},
130 { "Call", "op1", 0xc0000000, 0x40000000, COUNT_CLASS},
131 { "Arith/Logical/Move", "op2", 0xc0000000, 0x80000000, COUNT_CLASS},
132 { "Arith/Logical/Move", "op3", 0xc0000000, 0xc0000000, COUNT_CLASS},
133 /* Unclassified */
134 { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
135};
136
137/* Default matcher for currently unclassified architectures */
138static InsnClassExecCount default_insn_classes[] = {
139 { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
140};
141
142typedef struct {
143 const char *qemu_target;
144 InsnClassExecCount *table;
145 int table_sz;
146} ClassSelector;
147
zhouyang24fa5d62021-02-13 13:03:10 +0000148static ClassSelector class_tables[] = {
Alex Bennéef79e8fa2019-05-21 10:15:53 +0100149 { "aarch64", aarch64_insn_classes, ARRAY_SIZE(aarch64_insn_classes) },
150 { "sparc", sparc32_insn_classes, ARRAY_SIZE(sparc32_insn_classes) },
151 { "sparc64", sparc64_insn_classes, ARRAY_SIZE(sparc64_insn_classes) },
152 { NULL, default_insn_classes, ARRAY_SIZE(default_insn_classes) },
153};
154
155static InsnClassExecCount *class_table;
156static int class_table_sz;
157
158static gint cmp_exec_count(gconstpointer a, gconstpointer b)
159{
160 InsnExecCount *ea = (InsnExecCount *) a;
161 InsnExecCount *eb = (InsnExecCount *) b;
162 return ea->count > eb->count ? -1 : 1;
163}
164
Alex Bennéeec11c4a2020-02-25 12:47:06 +0000165static void free_record(gpointer data)
166{
167 InsnExecCount *rec = (InsnExecCount *) data;
168 g_free(rec->insn);
169 g_free(rec);
170}
171
Alex Bennéef79e8fa2019-05-21 10:15:53 +0100172static void plugin_exit(qemu_plugin_id_t id, void *p)
173{
174 g_autoptr(GString) report = g_string_new("Instruction Classes:\n");
175 int i;
176 GList *counts;
177 InsnClassExecCount *class = NULL;
178
179 for (i = 0; i < class_table_sz; i++) {
180 class = &class_table[i];
181 switch (class->what) {
182 case COUNT_CLASS:
183 if (class->count || verbose) {
184 g_string_append_printf(report, "Class: %-24s\t(%ld hits)\n",
185 class->class,
186 class->count);
187 }
188 break;
189 case COUNT_INDIVIDUAL:
190 g_string_append_printf(report, "Class: %-24s\tcounted individually\n",
191 class->class);
192 break;
193 case COUNT_NONE:
194 g_string_append_printf(report, "Class: %-24s\tnot counted\n",
195 class->class);
196 break;
197 default:
198 break;
199 }
200 }
201
202 counts = g_hash_table_get_values(insns);
203 if (counts && g_list_next(counts)) {
zhouyangedd4a852021-02-13 13:03:09 +0000204 g_string_append_printf(report, "Individual Instructions:\n");
Alex Bennéeec11c4a2020-02-25 12:47:06 +0000205 counts = g_list_sort(counts, cmp_exec_count);
Alex Bennéef79e8fa2019-05-21 10:15:53 +0100206
Alex Bennéeec11c4a2020-02-25 12:47:06 +0000207 for (i = 0; i < limit && g_list_next(counts);
208 i++, counts = g_list_next(counts)) {
209 InsnExecCount *rec = (InsnExecCount *) counts->data;
210 g_string_append_printf(report,
zhouyang7fe7ab12021-02-13 13:03:06 +0000211 "Instr: %-24s\t(%ld hits)\t(op=0x%08x/%s)\n",
Alex Bennéef79e8fa2019-05-21 10:15:53 +0100212 rec->insn,
213 rec->count,
214 rec->opcode,
215 rec->class ?
216 rec->class->class : "un-categorised");
217 }
Alex Bennéeec11c4a2020-02-25 12:47:06 +0000218 g_list_free(counts);
Alex Bennéef79e8fa2019-05-21 10:15:53 +0100219 }
220
Alex Bennéeec11c4a2020-02-25 12:47:06 +0000221 g_hash_table_destroy(insns);
222
Alex Bennéef79e8fa2019-05-21 10:15:53 +0100223 qemu_plugin_outs(report->str);
224}
225
226static void plugin_init(void)
227{
Alex Bennéeec11c4a2020-02-25 12:47:06 +0000228 insns = g_hash_table_new_full(NULL, g_direct_equal, NULL, &free_record);
Alex Bennéef79e8fa2019-05-21 10:15:53 +0100229}
230
231static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata)
232{
233 uint64_t *count = (uint64_t *) udata;
234 (*count)++;
235}
236
zhouyang247b3c72021-02-13 13:03:07 +0000237static uint64_t *find_counter(struct qemu_plugin_insn *insn)
Alex Bennéef79e8fa2019-05-21 10:15:53 +0100238{
239 int i;
240 uint64_t *cnt = NULL;
241 uint32_t opcode;
242 InsnClassExecCount *class = NULL;
243
244 /*
245 * We only match the first 32 bits of the instruction which is
246 * fine for most RISCs but a bit limiting for CISC architectures.
247 * They would probably benefit from a more tailored plugin.
248 * However we can fall back to individual instruction counting.
249 */
250 opcode = *((uint32_t *)qemu_plugin_insn_data(insn));
251
252 for (i = 0; !cnt && i < class_table_sz; i++) {
253 class = &class_table[i];
254 uint32_t masked_bits = opcode & class->mask;
255 if (masked_bits == class->pattern) {
256 break;
257 }
258 }
259
260 g_assert(class);
261
262 switch (class->what) {
263 case COUNT_NONE:
264 return NULL;
265 case COUNT_CLASS:
266 return &class->count;
267 case COUNT_INDIVIDUAL:
268 {
269 InsnExecCount *icount;
270
271 g_mutex_lock(&lock);
272 icount = (InsnExecCount *) g_hash_table_lookup(insns,
273 GUINT_TO_POINTER(opcode));
274
275 if (!icount) {
276 icount = g_new0(InsnExecCount, 1);
277 icount->opcode = opcode;
278 icount->insn = qemu_plugin_insn_disas(insn);
279 icount->class = class;
280
281 g_hash_table_insert(insns, GUINT_TO_POINTER(opcode),
282 (gpointer) icount);
283 }
284 g_mutex_unlock(&lock);
285
286 return &icount->count;
287 }
288 default:
289 g_assert_not_reached();
290 }
291
292 return NULL;
293}
294
295static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
296{
297 size_t n = qemu_plugin_tb_n_insns(tb);
298 size_t i;
299
300 for (i = 0; i < n; i++) {
301 uint64_t *cnt;
302 struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
303 cnt = find_counter(insn);
304
305 if (cnt) {
306 if (do_inline) {
307 qemu_plugin_register_vcpu_insn_exec_inline(
308 insn, QEMU_PLUGIN_INLINE_ADD_U64, cnt, 1);
309 } else {
310 qemu_plugin_register_vcpu_insn_exec_cb(
311 insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS, cnt);
312 }
313 }
314 }
315}
316
317QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
318 const qemu_info_t *info,
319 int argc, char **argv)
320{
321 int i;
322
323 /* Select a class table appropriate to the guest architecture */
324 for (i = 0; i < ARRAY_SIZE(class_tables); i++) {
325 ClassSelector *entry = &class_tables[i];
326 if (!entry->qemu_target ||
327 strcmp(entry->qemu_target, info->target_name) == 0) {
328 class_table = entry->table;
329 class_table_sz = entry->table_sz;
330 break;
331 }
332 }
333
334 for (i = 0; i < argc; i++) {
335 char *p = argv[i];
Mahmoud Mandourd8525352021-07-30 15:58:11 +0200336 g_autofree char **tokens = g_strsplit(p, "=", -1);
337 if (g_strcmp0(tokens[0], "inline") == 0) {
338 if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_inline)) {
339 fprintf(stderr, "boolean argument parsing failed: %s\n", p);
340 return -1;
341 }
342 } else if (g_strcmp0(tokens[0], "verbose") == 0) {
343 if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &verbose)) {
344 fprintf(stderr, "boolean argument parsing failed: %s\n", p);
345 return -1;
346 }
347 } else if (g_strcmp0(tokens[0], "count") == 0) {
348 char *value = tokens[1];
Alex Bennéef79e8fa2019-05-21 10:15:53 +0100349 int j;
350 CountType type = COUNT_INDIVIDUAL;
Mahmoud Mandourd8525352021-07-30 15:58:11 +0200351 if (*value == '!') {
Alex Bennéef79e8fa2019-05-21 10:15:53 +0100352 type = COUNT_NONE;
Mahmoud Mandourd8525352021-07-30 15:58:11 +0200353 value++;
Alex Bennéef79e8fa2019-05-21 10:15:53 +0100354 }
355 for (j = 0; j < class_table_sz; j++) {
Mahmoud Mandourd8525352021-07-30 15:58:11 +0200356 if (strcmp(value, class_table[j].opt) == 0) {
Alex Bennéef79e8fa2019-05-21 10:15:53 +0100357 class_table[j].what = type;
358 break;
359 }
360 }
Mahmoud Mandourd8525352021-07-30 15:58:11 +0200361 } else {
362 fprintf(stderr, "option parsing failed: %s\n", p);
363 return -1;
Alex Bennéef79e8fa2019-05-21 10:15:53 +0100364 }
365 }
366
367 plugin_init();
368
369 qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
370 qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
371 return 0;
372}