trace: add some tcg tracing support

This adds a couple of tcg specific trace-events which are useful for
tracing execution though tcg generated blocks. It's been tested with
lttng user space tracing but is generic enough for all systems. The tcg
events are:

  * translate_block - when a subject block is translated
  * exec_tb - when a translated block is entered
  * exec_tb_exit - when we exit the translated code
  * exec_tb_nocache - special case translations

Of course we can only trace the entrance to the first block of a chain
as each block will jump directly to the next when it can. See the -d
nochain patch to allow more complete tracing at the expense of
performance.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
diff --git a/cpu-exec.c b/cpu-exec.c
index cbc8067..c6aad74 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -18,6 +18,7 @@
  */
 #include "config.h"
 #include "cpu.h"
+#include "trace.h"
 #include "disas/disas.h"
 #include "tcg.h"
 #include "qemu/atomic.h"
@@ -168,6 +169,9 @@
 #endif /* DEBUG_DISAS */
 
     next_tb = tcg_qemu_tb_exec(env, tb_ptr);
+    trace_exec_tb_exit((void *) (next_tb & ~TB_EXIT_MASK),
+                       next_tb & TB_EXIT_MASK);
+
     if ((next_tb & TB_EXIT_MASK) > TB_EXIT_IDX1) {
         /* We didn't start executing this TB (eg because the instruction
          * counter hit zero); we must restore the guest PC to the address
@@ -208,6 +212,7 @@
                      max_cycles);
     cpu->current_tb = tb;
     /* execute the generated code */
+    trace_exec_tb_nocache(tb, tb->pc);
     cpu_tb_exec(cpu, tb->tc_ptr);
     cpu->current_tb = NULL;
     tb_phys_invalidate(tb, -1);
@@ -749,6 +754,7 @@
                 cpu->current_tb = tb;
                 barrier();
                 if (likely(!cpu->exit_request)) {
+                    trace_exec_tb(tb, tb->pc);
                     tc_ptr = tb->tc_ptr;
                     /* execute the generated code */
                     next_tb = cpu_tb_exec(cpu, tc_ptr);