trace: emit name <-> ID mapping in simpletrace header

Currently simpletrace assumes that events are given IDs
starting from 0, based on the order in which they appear
in the trace-events file, with no gaps. When the
trace-events file is split up, this assumption becomes
problematic.

To deal with this, extend the simpletrace format so that
it outputs a table of event name <-> ID mappings. That
will allow QEMU to assign arbitrary IDs to events without
breaking simpletrace parsing.

The v3 simple trace format was

  FILE HEADER
  EVENT TRACE RECORD 0
  EVENT TRACE RECORD 1
  ...
  EVENT TRACE RECORD N

The v4 simple trace format is now

  FILE HEADER
  EVENT MAPPING RECORD 0
  EVENT MAPPING RECORD 1
  ...
  EVENT MAPPING RECORD M
  EVENT TRACE RECORD RECORD 0
  EVENT TRACE RECORD RECORD 1
  ...
  EVENT TRACE RECORD N

Although this shows all the mapping records being emitted
upfront, this is not required by the format. While the main
simpletrace backend will emit all mappings at startup,
the systemtap simpletrace.stp script will emit the mappings
at first use. eg

  FILE HEADER
  ...
  EVENT MAPPING RECORD 0
  EVENT TRACE RECORD RECORD 0
  EVENT TRACE RECORD RECORD 1
  EVENT MAPPING RECORD 1
  EVENT TRACE RECORD RECORD 2
  ...
  EVENT TRACE RECORD N

This is more space efficient given that most trace records
only include a subset of events.

In modifying the systemtap simpletrace code, a 'begin' probe
was added to emit the trace event header, so you no longer
need to add '--no-header' when running simpletrace.py for
systemtap generated trace files.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Message-id: 1475588159-30598-12-git-send-email-berrange@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py
index 3916c6d..f40467a 100755
--- a/scripts/simpletrace.py
+++ b/scripts/simpletrace.py
@@ -19,6 +19,9 @@
 header_magic    = 0xf2b177cb0aa429b4
 dropped_event_id = 0xfffffffffffffffe
 
+record_type_mapping = 0
+record_type_event = 1
+
 log_header_fmt = '=QQQ'
 rec_header_fmt = '=QQII'
 
@@ -30,14 +33,16 @@
         return None
     return struct.unpack(hfmt, hdr)
 
-def get_record(edict, rechdr, fobj):
-    """Deserialize a trace record from a file into a tuple (event_num, timestamp, pid, arg1, ..., arg6)."""
+def get_record(edict, idtoname, rechdr, fobj):
+    """Deserialize a trace record from a file into a tuple
+       (name, timestamp, pid, arg1, ..., arg6)."""
     if rechdr is None:
         return None
-    rec = (rechdr[0], rechdr[1], rechdr[3])
     if rechdr[0] != dropped_event_id:
         event_id = rechdr[0]
-        event = edict[event_id]
+        name = idtoname[event_id]
+        rec = (name, rechdr[1], rechdr[3])
+        event = edict[name]
         for type, name in event.args:
             if is_string(type):
                 l = fobj.read(4)
@@ -48,15 +53,22 @@
                 (value,) = struct.unpack('=Q', fobj.read(8))
                 rec = rec + (value,)
     else:
+        rec = ("dropped", rechdr[1], rechdr[3])
         (value,) = struct.unpack('=Q', fobj.read(8))
         rec = rec + (value,)
     return rec
 
+def get_mapping(fobj):
+    (event_id, ) = struct.unpack('=Q', fobj.read(8))
+    (len, ) = struct.unpack('=L', fobj.read(4))
+    name = fobj.read(len)
 
-def read_record(edict, fobj):
+    return (event_id, name)
+
+def read_record(edict, idtoname, fobj):
     """Deserialize a trace record from a file into a tuple (event_num, timestamp, pid, arg1, ..., arg6)."""
     rechdr = read_header(fobj, rec_header_fmt)
-    return get_record(edict, rechdr, fobj) # return tuple of record elements
+    return get_record(edict, idtoname, rechdr, fobj)
 
 def read_trace_header(fobj):
     """Read and verify trace file header"""
@@ -67,20 +79,30 @@
         raise ValueError('Not a valid trace file!')
 
     log_version = header[2]
-    if log_version not in [0, 2, 3]:
+    if log_version not in [0, 2, 3, 4]:
         raise ValueError('Unknown version of tracelog format!')
-    if log_version != 3:
+    if log_version != 4:
         raise ValueError('Log format %d not supported with this QEMU release!'
                          % log_version)
 
 def read_trace_records(edict, fobj):
     """Deserialize trace records from a file, yielding record tuples (event_num, timestamp, pid, arg1, ..., arg6)."""
+    idtoname = {
+        dropped_event_id: "dropped"
+    }
     while True:
-        rec = read_record(edict, fobj)
-        if rec is None:
+        t = fobj.read(8)
+        if len(t) == 0:
             break
 
-        yield rec
+        (rectype, ) = struct.unpack('=Q', t)
+        if rectype == record_type_mapping:
+            event_id, name = get_mapping(fobj)
+            idtoname[event_id] = name
+        else:
+            rec = read_record(edict, idtoname, fobj)
+
+            yield rec
 
 class Analyzer(object):
     """A trace file analyzer which processes trace records.
@@ -115,10 +137,10 @@
         read_trace_header(log)
 
     dropped_event = Event.build("Dropped_Event(uint64_t num_events_dropped)")
-    edict = {dropped_event_id: dropped_event}
+    edict = {"dropped": dropped_event}
 
-    for num, event in enumerate(events):
-        edict[num] = event
+    for event in events:
+        edict[event.name] = event
 
     def build_fn(analyzer, event):
         if isinstance(event, str):