Jan Kiszka | 626c427 | 2011-10-07 09:37:49 +0200 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | # |
| 3 | # top-like utility for displaying kvm statistics |
| 4 | # |
| 5 | # Copyright 2006-2008 Qumranet Technologies |
| 6 | # Copyright 2008-2011 Red Hat, Inc. |
| 7 | # |
| 8 | # Authors: |
| 9 | # Avi Kivity <avi@redhat.com> |
| 10 | # |
| 11 | # This work is licensed under the terms of the GNU GPL, version 2. See |
| 12 | # the COPYING file in the top-level directory. |
| 13 | |
| 14 | import curses |
| 15 | import sys, os, time, optparse |
| 16 | |
| 17 | class DebugfsProvider(object): |
| 18 | def __init__(self): |
| 19 | self.base = '/sys/kernel/debug/kvm' |
| 20 | self._fields = os.listdir(self.base) |
| 21 | def fields(self): |
| 22 | return self._fields |
| 23 | def select(self, fields): |
| 24 | self._fields = fields |
| 25 | def read(self): |
| 26 | def val(key): |
| 27 | return int(file(self.base + '/' + key).read()) |
| 28 | return dict([(key, val(key)) for key in self._fields]) |
| 29 | |
| 30 | vmx_exit_reasons = { |
| 31 | 0: 'EXCEPTION_NMI', |
| 32 | 1: 'EXTERNAL_INTERRUPT', |
| 33 | 2: 'TRIPLE_FAULT', |
| 34 | 7: 'PENDING_INTERRUPT', |
| 35 | 8: 'NMI_WINDOW', |
| 36 | 9: 'TASK_SWITCH', |
| 37 | 10: 'CPUID', |
| 38 | 12: 'HLT', |
| 39 | 14: 'INVLPG', |
| 40 | 15: 'RDPMC', |
| 41 | 16: 'RDTSC', |
| 42 | 18: 'VMCALL', |
| 43 | 19: 'VMCLEAR', |
| 44 | 20: 'VMLAUNCH', |
| 45 | 21: 'VMPTRLD', |
| 46 | 22: 'VMPTRST', |
| 47 | 23: 'VMREAD', |
| 48 | 24: 'VMRESUME', |
| 49 | 25: 'VMWRITE', |
| 50 | 26: 'VMOFF', |
| 51 | 27: 'VMON', |
| 52 | 28: 'CR_ACCESS', |
| 53 | 29: 'DR_ACCESS', |
| 54 | 30: 'IO_INSTRUCTION', |
| 55 | 31: 'MSR_READ', |
| 56 | 32: 'MSR_WRITE', |
| 57 | 33: 'INVALID_STATE', |
| 58 | 36: 'MWAIT_INSTRUCTION', |
| 59 | 39: 'MONITOR_INSTRUCTION', |
| 60 | 40: 'PAUSE_INSTRUCTION', |
| 61 | 41: 'MCE_DURING_VMENTRY', |
| 62 | 43: 'TPR_BELOW_THRESHOLD', |
| 63 | 44: 'APIC_ACCESS', |
| 64 | 48: 'EPT_VIOLATION', |
| 65 | 49: 'EPT_MISCONFIG', |
| 66 | 54: 'WBINVD', |
| 67 | 55: 'XSETBV', |
| 68 | } |
| 69 | |
| 70 | svm_exit_reasons = { |
| 71 | 0x000: 'READ_CR0', |
| 72 | 0x003: 'READ_CR3', |
| 73 | 0x004: 'READ_CR4', |
| 74 | 0x008: 'READ_CR8', |
| 75 | 0x010: 'WRITE_CR0', |
| 76 | 0x013: 'WRITE_CR3', |
| 77 | 0x014: 'WRITE_CR4', |
| 78 | 0x018: 'WRITE_CR8', |
| 79 | 0x020: 'READ_DR0', |
| 80 | 0x021: 'READ_DR1', |
| 81 | 0x022: 'READ_DR2', |
| 82 | 0x023: 'READ_DR3', |
| 83 | 0x024: 'READ_DR4', |
| 84 | 0x025: 'READ_DR5', |
| 85 | 0x026: 'READ_DR6', |
| 86 | 0x027: 'READ_DR7', |
| 87 | 0x030: 'WRITE_DR0', |
| 88 | 0x031: 'WRITE_DR1', |
| 89 | 0x032: 'WRITE_DR2', |
| 90 | 0x033: 'WRITE_DR3', |
| 91 | 0x034: 'WRITE_DR4', |
| 92 | 0x035: 'WRITE_DR5', |
| 93 | 0x036: 'WRITE_DR6', |
| 94 | 0x037: 'WRITE_DR7', |
| 95 | 0x040: 'EXCP_BASE', |
| 96 | 0x060: 'INTR', |
| 97 | 0x061: 'NMI', |
| 98 | 0x062: 'SMI', |
| 99 | 0x063: 'INIT', |
| 100 | 0x064: 'VINTR', |
| 101 | 0x065: 'CR0_SEL_WRITE', |
| 102 | 0x066: 'IDTR_READ', |
| 103 | 0x067: 'GDTR_READ', |
| 104 | 0x068: 'LDTR_READ', |
| 105 | 0x069: 'TR_READ', |
| 106 | 0x06a: 'IDTR_WRITE', |
| 107 | 0x06b: 'GDTR_WRITE', |
| 108 | 0x06c: 'LDTR_WRITE', |
| 109 | 0x06d: 'TR_WRITE', |
| 110 | 0x06e: 'RDTSC', |
| 111 | 0x06f: 'RDPMC', |
| 112 | 0x070: 'PUSHF', |
| 113 | 0x071: 'POPF', |
| 114 | 0x072: 'CPUID', |
| 115 | 0x073: 'RSM', |
| 116 | 0x074: 'IRET', |
| 117 | 0x075: 'SWINT', |
| 118 | 0x076: 'INVD', |
| 119 | 0x077: 'PAUSE', |
| 120 | 0x078: 'HLT', |
| 121 | 0x079: 'INVLPG', |
| 122 | 0x07a: 'INVLPGA', |
| 123 | 0x07b: 'IOIO', |
| 124 | 0x07c: 'MSR', |
| 125 | 0x07d: 'TASK_SWITCH', |
| 126 | 0x07e: 'FERR_FREEZE', |
| 127 | 0x07f: 'SHUTDOWN', |
| 128 | 0x080: 'VMRUN', |
| 129 | 0x081: 'VMMCALL', |
| 130 | 0x082: 'VMLOAD', |
| 131 | 0x083: 'VMSAVE', |
| 132 | 0x084: 'STGI', |
| 133 | 0x085: 'CLGI', |
| 134 | 0x086: 'SKINIT', |
| 135 | 0x087: 'RDTSCP', |
| 136 | 0x088: 'ICEBP', |
| 137 | 0x089: 'WBINVD', |
| 138 | 0x08a: 'MONITOR', |
| 139 | 0x08b: 'MWAIT', |
| 140 | 0x08c: 'MWAIT_COND', |
| 141 | 0x400: 'NPF', |
| 142 | } |
| 143 | |
Jens Freimann | c5854ac | 2012-06-06 02:05:18 +0000 | [diff] [blame] | 144 | s390_exit_reasons = { |
| 145 | 0x000: 'UNKNOWN', |
| 146 | 0x001: 'EXCEPTION', |
| 147 | 0x002: 'IO', |
| 148 | 0x003: 'HYPERCALL', |
| 149 | 0x004: 'DEBUG', |
| 150 | 0x005: 'HLT', |
| 151 | 0x006: 'MMIO', |
| 152 | 0x007: 'IRQ_WINDOW_OPEN', |
| 153 | 0x008: 'SHUTDOWN', |
| 154 | 0x009: 'FAIL_ENTRY', |
| 155 | 0x010: 'INTR', |
| 156 | 0x011: 'SET_TPR', |
| 157 | 0x012: 'TPR_ACCESS', |
| 158 | 0x013: 'S390_SIEIC', |
| 159 | 0x014: 'S390_RESET', |
| 160 | 0x015: 'DCR', |
| 161 | 0x016: 'NMI', |
| 162 | 0x017: 'INTERNAL_ERROR', |
| 163 | 0x018: 'OSI', |
| 164 | 0x019: 'PAPR_HCALL', |
| 165 | } |
| 166 | |
Jan Kiszka | 626c427 | 2011-10-07 09:37:49 +0200 | [diff] [blame] | 167 | vendor_exit_reasons = { |
| 168 | 'vmx': vmx_exit_reasons, |
| 169 | 'svm': svm_exit_reasons, |
Jens Freimann | c5854ac | 2012-06-06 02:05:18 +0000 | [diff] [blame] | 170 | 'IBM/S390': s390_exit_reasons, |
Jan Kiszka | 626c427 | 2011-10-07 09:37:49 +0200 | [diff] [blame] | 171 | } |
| 172 | |
| 173 | exit_reasons = None |
| 174 | |
| 175 | for line in file('/proc/cpuinfo').readlines(): |
Jens Freimann | c5854ac | 2012-06-06 02:05:18 +0000 | [diff] [blame] | 176 | if line.startswith('flags') or line.startswith('vendor_id'): |
Jan Kiszka | 626c427 | 2011-10-07 09:37:49 +0200 | [diff] [blame] | 177 | for flag in line.split(): |
| 178 | if flag in vendor_exit_reasons: |
| 179 | exit_reasons = vendor_exit_reasons[flag] |
| 180 | |
| 181 | filters = { |
| 182 | 'kvm_exit': ('exit_reason', exit_reasons) |
| 183 | } |
| 184 | |
| 185 | def invert(d): |
| 186 | return dict((x[1], x[0]) for x in d.iteritems()) |
| 187 | |
| 188 | for f in filters: |
| 189 | filters[f] = (filters[f][0], invert(filters[f][1])) |
| 190 | |
| 191 | import ctypes, struct, array |
| 192 | |
| 193 | libc = ctypes.CDLL('libc.so.6') |
| 194 | syscall = libc.syscall |
| 195 | class perf_event_attr(ctypes.Structure): |
| 196 | _fields_ = [('type', ctypes.c_uint32), |
| 197 | ('size', ctypes.c_uint32), |
| 198 | ('config', ctypes.c_uint64), |
| 199 | ('sample_freq', ctypes.c_uint64), |
| 200 | ('sample_type', ctypes.c_uint64), |
| 201 | ('read_format', ctypes.c_uint64), |
| 202 | ('flags', ctypes.c_uint64), |
| 203 | ('wakeup_events', ctypes.c_uint32), |
| 204 | ('bp_type', ctypes.c_uint32), |
| 205 | ('bp_addr', ctypes.c_uint64), |
| 206 | ('bp_len', ctypes.c_uint64), |
| 207 | ] |
| 208 | def _perf_event_open(attr, pid, cpu, group_fd, flags): |
| 209 | return syscall(298, ctypes.pointer(attr), ctypes.c_int(pid), |
| 210 | ctypes.c_int(cpu), ctypes.c_int(group_fd), |
| 211 | ctypes.c_long(flags)) |
| 212 | |
| 213 | PERF_TYPE_HARDWARE = 0 |
| 214 | PERF_TYPE_SOFTWARE = 1 |
| 215 | PERF_TYPE_TRACEPOINT = 2 |
| 216 | PERF_TYPE_HW_CACHE = 3 |
| 217 | PERF_TYPE_RAW = 4 |
| 218 | PERF_TYPE_BREAKPOINT = 5 |
| 219 | |
| 220 | PERF_SAMPLE_IP = 1 << 0 |
| 221 | PERF_SAMPLE_TID = 1 << 1 |
| 222 | PERF_SAMPLE_TIME = 1 << 2 |
| 223 | PERF_SAMPLE_ADDR = 1 << 3 |
| 224 | PERF_SAMPLE_READ = 1 << 4 |
| 225 | PERF_SAMPLE_CALLCHAIN = 1 << 5 |
| 226 | PERF_SAMPLE_ID = 1 << 6 |
| 227 | PERF_SAMPLE_CPU = 1 << 7 |
| 228 | PERF_SAMPLE_PERIOD = 1 << 8 |
| 229 | PERF_SAMPLE_STREAM_ID = 1 << 9 |
| 230 | PERF_SAMPLE_RAW = 1 << 10 |
| 231 | |
| 232 | PERF_FORMAT_TOTAL_TIME_ENABLED = 1 << 0 |
| 233 | PERF_FORMAT_TOTAL_TIME_RUNNING = 1 << 1 |
| 234 | PERF_FORMAT_ID = 1 << 2 |
| 235 | PERF_FORMAT_GROUP = 1 << 3 |
| 236 | |
| 237 | import re |
| 238 | |
| 239 | sys_tracing = '/sys/kernel/debug/tracing' |
| 240 | |
| 241 | class Group(object): |
| 242 | def __init__(self, cpu): |
| 243 | self.events = [] |
| 244 | self.group_leader = None |
| 245 | self.cpu = cpu |
| 246 | def add_event(self, name, event_set, tracepoint, filter = None): |
| 247 | self.events.append(Event(group = self, |
| 248 | name = name, event_set = event_set, |
| 249 | tracepoint = tracepoint, filter = filter)) |
| 250 | if len(self.events) == 1: |
| 251 | self.file = os.fdopen(self.events[0].fd) |
| 252 | def read(self): |
| 253 | bytes = 8 * (1 + len(self.events)) |
| 254 | fmt = 'xxxxxxxx' + 'q' * len(self.events) |
| 255 | return dict(zip([event.name for event in self.events], |
| 256 | struct.unpack(fmt, self.file.read(bytes)))) |
| 257 | |
| 258 | class Event(object): |
| 259 | def __init__(self, group, name, event_set, tracepoint, filter = None): |
| 260 | self.name = name |
| 261 | attr = perf_event_attr() |
| 262 | attr.type = PERF_TYPE_TRACEPOINT |
| 263 | attr.size = ctypes.sizeof(attr) |
| 264 | id_path = os.path.join(sys_tracing, 'events', event_set, |
| 265 | tracepoint, 'id') |
| 266 | id = int(file(id_path).read()) |
| 267 | attr.config = id |
| 268 | attr.sample_type = (PERF_SAMPLE_RAW |
| 269 | | PERF_SAMPLE_TIME |
| 270 | | PERF_SAMPLE_CPU) |
| 271 | attr.sample_period = 1 |
| 272 | attr.read_format = PERF_FORMAT_GROUP |
| 273 | group_leader = -1 |
| 274 | if group.events: |
| 275 | group_leader = group.events[0].fd |
| 276 | fd = _perf_event_open(attr, -1, group.cpu, group_leader, 0) |
| 277 | if fd == -1: |
| 278 | raise Exception('perf_event_open failed') |
| 279 | if filter: |
| 280 | import fcntl |
| 281 | fcntl.ioctl(fd, 0x40082406, filter) |
| 282 | self.fd = fd |
| 283 | def enable(self): |
| 284 | import fcntl |
| 285 | fcntl.ioctl(self.fd, 0x00002400, 0) |
| 286 | def disable(self): |
| 287 | import fcntl |
| 288 | fcntl.ioctl(self.fd, 0x00002401, 0) |
| 289 | |
| 290 | class TracepointProvider(object): |
| 291 | def __init__(self): |
| 292 | path = os.path.join(sys_tracing, 'events', 'kvm') |
| 293 | fields = [f |
| 294 | for f in os.listdir(path) |
| 295 | if os.path.isdir(os.path.join(path, f))] |
| 296 | extra = [] |
| 297 | for f in fields: |
| 298 | if f in filters: |
| 299 | subfield, values = filters[f] |
| 300 | for name, number in values.iteritems(): |
| 301 | extra.append(f + '(' + name + ')') |
| 302 | fields += extra |
| 303 | self._setup(fields) |
| 304 | self.select(fields) |
| 305 | def fields(self): |
| 306 | return self._fields |
| 307 | def _setup(self, _fields): |
| 308 | self._fields = _fields |
| 309 | cpure = r'cpu([0-9]+)' |
| 310 | self.cpus = [int(re.match(cpure, x).group(1)) |
| 311 | for x in os.listdir('/sys/devices/system/cpu') |
| 312 | if re.match(cpure, x)] |
| 313 | import resource |
| 314 | nfiles = len(self.cpus) * 1000 |
| 315 | resource.setrlimit(resource.RLIMIT_NOFILE, (nfiles, nfiles)) |
| 316 | events = [] |
| 317 | self.group_leaders = [] |
| 318 | for cpu in self.cpus: |
| 319 | group = Group(cpu) |
| 320 | for name in _fields: |
| 321 | tracepoint = name |
| 322 | filter = None |
| 323 | m = re.match(r'(.*)\((.*)\)', name) |
| 324 | if m: |
| 325 | tracepoint, sub = m.groups() |
| 326 | filter = '%s==%d\0' % (filters[tracepoint][0], |
| 327 | filters[tracepoint][1][sub]) |
| 328 | event = group.add_event(name, event_set = 'kvm', |
| 329 | tracepoint = tracepoint, |
| 330 | filter = filter) |
| 331 | self.group_leaders.append(group) |
| 332 | def select(self, fields): |
| 333 | for group in self.group_leaders: |
| 334 | for event in group.events: |
| 335 | if event.name in fields: |
| 336 | event.enable() |
| 337 | else: |
| 338 | event.disable() |
| 339 | def read(self): |
| 340 | from collections import defaultdict |
| 341 | ret = defaultdict(int) |
| 342 | for group in self.group_leaders: |
| 343 | for name, val in group.read().iteritems(): |
| 344 | ret[name] += val |
| 345 | return ret |
| 346 | |
| 347 | class Stats: |
| 348 | def __init__(self, provider, fields = None): |
| 349 | self.provider = provider |
| 350 | self.fields_filter = fields |
| 351 | self._update() |
| 352 | def _update(self): |
| 353 | def wanted(key): |
| 354 | import re |
| 355 | if not self.fields_filter: |
| 356 | return True |
| 357 | return re.match(self.fields_filter, key) is not None |
| 358 | self.values = dict([(key, None) |
| 359 | for key in provider.fields() |
| 360 | if wanted(key)]) |
| 361 | self.provider.select(self.values.keys()) |
| 362 | def set_fields_filter(self, fields_filter): |
| 363 | self.fields_filter = fields_filter |
| 364 | self._update() |
| 365 | def get(self): |
| 366 | new = self.provider.read() |
| 367 | for key in self.provider.fields(): |
| 368 | oldval = self.values.get(key, (0, 0)) |
| 369 | newval = new[key] |
| 370 | newdelta = None |
| 371 | if oldval is not None: |
| 372 | newdelta = newval - oldval[0] |
| 373 | self.values[key] = (newval, newdelta) |
| 374 | return self.values |
| 375 | |
| 376 | if not os.access('/sys/kernel/debug', os.F_OK): |
| 377 | print 'Please enable CONFIG_DEBUG_FS in your kernel' |
| 378 | sys.exit(1) |
| 379 | if not os.access('/sys/kernel/debug/kvm', os.F_OK): |
| 380 | print "Please mount debugfs ('mount -t debugfs debugfs /sys/kernel/debug')" |
| 381 | print "and ensure the kvm modules are loaded" |
| 382 | sys.exit(1) |
| 383 | |
| 384 | label_width = 40 |
| 385 | number_width = 10 |
| 386 | |
| 387 | def tui(screen, stats): |
| 388 | curses.use_default_colors() |
| 389 | curses.noecho() |
| 390 | drilldown = False |
| 391 | fields_filter = stats.fields_filter |
| 392 | def update_drilldown(): |
| 393 | if not fields_filter: |
| 394 | if drilldown: |
| 395 | stats.set_fields_filter(None) |
| 396 | else: |
| 397 | stats.set_fields_filter(r'^[^\(]*$') |
| 398 | update_drilldown() |
| 399 | def refresh(sleeptime): |
| 400 | screen.erase() |
| 401 | screen.addstr(0, 0, 'kvm statistics') |
| 402 | row = 2 |
| 403 | s = stats.get() |
| 404 | def sortkey(x): |
| 405 | if s[x][1]: |
| 406 | return (-s[x][1], -s[x][0]) |
| 407 | else: |
| 408 | return (0, -s[x][0]) |
| 409 | for key in sorted(s.keys(), key = sortkey): |
| 410 | if row >= screen.getmaxyx()[0]: |
| 411 | break |
| 412 | values = s[key] |
| 413 | if not values[0] and not values[1]: |
| 414 | break |
| 415 | col = 1 |
| 416 | screen.addstr(row, col, key) |
| 417 | col += label_width |
| 418 | screen.addstr(row, col, '%10d' % (values[0],)) |
| 419 | col += number_width |
| 420 | if values[1] is not None: |
| 421 | screen.addstr(row, col, '%8d' % (values[1] / sleeptime,)) |
| 422 | row += 1 |
| 423 | screen.refresh() |
| 424 | |
| 425 | sleeptime = 0.25 |
| 426 | while True: |
| 427 | refresh(sleeptime) |
| 428 | curses.halfdelay(int(sleeptime * 10)) |
| 429 | sleeptime = 3 |
| 430 | try: |
| 431 | c = screen.getkey() |
| 432 | if c == 'x': |
| 433 | drilldown = not drilldown |
| 434 | update_drilldown() |
| 435 | if c == 'q': |
| 436 | break |
| 437 | except KeyboardInterrupt: |
| 438 | break |
| 439 | except curses.error: |
| 440 | continue |
| 441 | |
| 442 | def batch(stats): |
| 443 | s = stats.get() |
| 444 | time.sleep(1) |
| 445 | s = stats.get() |
| 446 | for key in sorted(s.keys()): |
| 447 | values = s[key] |
| 448 | print '%-22s%10d%10d' % (key, values[0], values[1]) |
| 449 | |
| 450 | def log(stats): |
| 451 | keys = sorted(stats.get().iterkeys()) |
| 452 | def banner(): |
| 453 | for k in keys: |
| 454 | print '%10s' % k[0:9], |
| 455 | print |
| 456 | def statline(): |
| 457 | s = stats.get() |
| 458 | for k in keys: |
| 459 | print ' %9d' % s[k][1], |
| 460 | print |
| 461 | line = 0 |
| 462 | banner_repeat = 20 |
| 463 | while True: |
| 464 | time.sleep(1) |
| 465 | if line % banner_repeat == 0: |
| 466 | banner() |
| 467 | statline() |
| 468 | line += 1 |
| 469 | |
| 470 | options = optparse.OptionParser() |
| 471 | options.add_option('-1', '--once', '--batch', |
| 472 | action = 'store_true', |
| 473 | default = False, |
| 474 | dest = 'once', |
| 475 | help = 'run in batch mode for one second', |
| 476 | ) |
| 477 | options.add_option('-l', '--log', |
| 478 | action = 'store_true', |
| 479 | default = False, |
| 480 | dest = 'log', |
| 481 | help = 'run in logging mode (like vmstat)', |
| 482 | ) |
| 483 | options.add_option('-f', '--fields', |
| 484 | action = 'store', |
| 485 | default = None, |
| 486 | dest = 'fields', |
| 487 | help = 'fields to display (regex)', |
| 488 | ) |
| 489 | (options, args) = options.parse_args(sys.argv) |
| 490 | |
| 491 | try: |
| 492 | provider = TracepointProvider() |
| 493 | except: |
| 494 | provider = DebugfsProvider() |
| 495 | |
| 496 | stats = Stats(provider, fields = options.fields) |
| 497 | |
| 498 | if options.log: |
| 499 | log(stats) |
| 500 | elif not options.once: |
| 501 | import curses.wrapper |
| 502 | curses.wrapper(tui, stats) |
| 503 | else: |
| 504 | batch(stats) |