| #!/usr/bin/python |
| # |
| # top-like utility for displaying kvm statistics |
| # |
| # Copyright 2006-2008 Qumranet Technologies |
| # Copyright 2008-2011 Red Hat, Inc. |
| # |
| # Authors: |
| # Avi Kivity <avi@redhat.com> |
| # |
| # This work is licensed under the terms of the GNU GPL, version 2. See |
| # the COPYING file in the top-level directory. |
| |
| import curses |
| import sys, os, time, optparse, ctypes |
| from ctypes import * |
| |
| class DebugfsProvider(object): |
| def __init__(self): |
| self.base = '/sys/kernel/debug/kvm' |
| self._fields = os.listdir(self.base) |
| def fields(self): |
| return self._fields |
| def select(self, fields): |
| self._fields = fields |
| def read(self): |
| def val(key): |
| return int(file(self.base + '/' + key).read()) |
| return dict([(key, val(key)) for key in self._fields]) |
| |
| vmx_exit_reasons = { |
| 0: 'EXCEPTION_NMI', |
| 1: 'EXTERNAL_INTERRUPT', |
| 2: 'TRIPLE_FAULT', |
| 7: 'PENDING_INTERRUPT', |
| 8: 'NMI_WINDOW', |
| 9: 'TASK_SWITCH', |
| 10: 'CPUID', |
| 12: 'HLT', |
| 14: 'INVLPG', |
| 15: 'RDPMC', |
| 16: 'RDTSC', |
| 18: 'VMCALL', |
| 19: 'VMCLEAR', |
| 20: 'VMLAUNCH', |
| 21: 'VMPTRLD', |
| 22: 'VMPTRST', |
| 23: 'VMREAD', |
| 24: 'VMRESUME', |
| 25: 'VMWRITE', |
| 26: 'VMOFF', |
| 27: 'VMON', |
| 28: 'CR_ACCESS', |
| 29: 'DR_ACCESS', |
| 30: 'IO_INSTRUCTION', |
| 31: 'MSR_READ', |
| 32: 'MSR_WRITE', |
| 33: 'INVALID_STATE', |
| 36: 'MWAIT_INSTRUCTION', |
| 39: 'MONITOR_INSTRUCTION', |
| 40: 'PAUSE_INSTRUCTION', |
| 41: 'MCE_DURING_VMENTRY', |
| 43: 'TPR_BELOW_THRESHOLD', |
| 44: 'APIC_ACCESS', |
| 48: 'EPT_VIOLATION', |
| 49: 'EPT_MISCONFIG', |
| 54: 'WBINVD', |
| 55: 'XSETBV', |
| 56: 'APIC_WRITE', |
| 58: 'INVPCID', |
| } |
| |
| svm_exit_reasons = { |
| 0x000: 'READ_CR0', |
| 0x003: 'READ_CR3', |
| 0x004: 'READ_CR4', |
| 0x008: 'READ_CR8', |
| 0x010: 'WRITE_CR0', |
| 0x013: 'WRITE_CR3', |
| 0x014: 'WRITE_CR4', |
| 0x018: 'WRITE_CR8', |
| 0x020: 'READ_DR0', |
| 0x021: 'READ_DR1', |
| 0x022: 'READ_DR2', |
| 0x023: 'READ_DR3', |
| 0x024: 'READ_DR4', |
| 0x025: 'READ_DR5', |
| 0x026: 'READ_DR6', |
| 0x027: 'READ_DR7', |
| 0x030: 'WRITE_DR0', |
| 0x031: 'WRITE_DR1', |
| 0x032: 'WRITE_DR2', |
| 0x033: 'WRITE_DR3', |
| 0x034: 'WRITE_DR4', |
| 0x035: 'WRITE_DR5', |
| 0x036: 'WRITE_DR6', |
| 0x037: 'WRITE_DR7', |
| 0x040: 'EXCP_BASE', |
| 0x060: 'INTR', |
| 0x061: 'NMI', |
| 0x062: 'SMI', |
| 0x063: 'INIT', |
| 0x064: 'VINTR', |
| 0x065: 'CR0_SEL_WRITE', |
| 0x066: 'IDTR_READ', |
| 0x067: 'GDTR_READ', |
| 0x068: 'LDTR_READ', |
| 0x069: 'TR_READ', |
| 0x06a: 'IDTR_WRITE', |
| 0x06b: 'GDTR_WRITE', |
| 0x06c: 'LDTR_WRITE', |
| 0x06d: 'TR_WRITE', |
| 0x06e: 'RDTSC', |
| 0x06f: 'RDPMC', |
| 0x070: 'PUSHF', |
| 0x071: 'POPF', |
| 0x072: 'CPUID', |
| 0x073: 'RSM', |
| 0x074: 'IRET', |
| 0x075: 'SWINT', |
| 0x076: 'INVD', |
| 0x077: 'PAUSE', |
| 0x078: 'HLT', |
| 0x079: 'INVLPG', |
| 0x07a: 'INVLPGA', |
| 0x07b: 'IOIO', |
| 0x07c: 'MSR', |
| 0x07d: 'TASK_SWITCH', |
| 0x07e: 'FERR_FREEZE', |
| 0x07f: 'SHUTDOWN', |
| 0x080: 'VMRUN', |
| 0x081: 'VMMCALL', |
| 0x082: 'VMLOAD', |
| 0x083: 'VMSAVE', |
| 0x084: 'STGI', |
| 0x085: 'CLGI', |
| 0x086: 'SKINIT', |
| 0x087: 'RDTSCP', |
| 0x088: 'ICEBP', |
| 0x089: 'WBINVD', |
| 0x08a: 'MONITOR', |
| 0x08b: 'MWAIT', |
| 0x08c: 'MWAIT_COND', |
| 0x08d: 'XSETBV', |
| 0x400: 'NPF', |
| } |
| |
| # From include/uapi/linux/kvm.h, KVM_EXIT_xxx |
| userspace_exit_reasons = { |
| 0: 'UNKNOWN', |
| 1: 'EXCEPTION', |
| 2: 'IO', |
| 3: 'HYPERCALL', |
| 4: 'DEBUG', |
| 5: 'HLT', |
| 6: 'MMIO', |
| 7: 'IRQ_WINDOW_OPEN', |
| 8: 'SHUTDOWN', |
| 9: 'FAIL_ENTRY', |
| 10: 'INTR', |
| 11: 'SET_TPR', |
| 12: 'TPR_ACCESS', |
| 13: 'S390_SIEIC', |
| 14: 'S390_RESET', |
| 15: 'DCR', |
| 16: 'NMI', |
| 17: 'INTERNAL_ERROR', |
| 18: 'OSI', |
| 19: 'PAPR_HCALL', |
| 20: 'S390_UCONTROL', |
| 21: 'WATCHDOG', |
| 22: 'S390_TSCH', |
| 23: 'EPR', |
| 24: 'SYSTEM_EVENT', |
| } |
| |
| x86_exit_reasons = { |
| 'vmx': vmx_exit_reasons, |
| 'svm': svm_exit_reasons, |
| } |
| |
| sc_perf_evt_open = None |
| exit_reasons = None |
| |
| ioctl_numbers = { |
| 'SET_FILTER' : 0x40082406, |
| 'ENABLE' : 0x00002400, |
| 'DISABLE' : 0x00002401, |
| 'RESET' : 0x00002403, |
| } |
| |
| def x86_init(flag): |
| globals().update({ |
| 'sc_perf_evt_open' : 298, |
| 'exit_reasons' : x86_exit_reasons[flag], |
| }) |
| |
| def s390_init(): |
| globals().update({ |
| 'sc_perf_evt_open' : 331 |
| }) |
| |
| def ppc_init(): |
| globals().update({ |
| 'sc_perf_evt_open' : 319, |
| 'ioctl_numbers' : { |
| 'SET_FILTER' : 0x80002406 | (ctypes.sizeof(ctypes.c_char_p) << 16), |
| 'ENABLE' : 0x20002400, |
| 'DISABLE' : 0x20002401, |
| } |
| }) |
| |
| def aarch64_init(): |
| globals().update({ |
| 'sc_perf_evt_open' : 241 |
| }) |
| |
| def detect_platform(): |
| if os.uname()[4].startswith('ppc'): |
| ppc_init() |
| return |
| elif os.uname()[4].startswith('aarch64'): |
| aarch64_init() |
| return |
| |
| for line in file('/proc/cpuinfo').readlines(): |
| if line.startswith('flags'): |
| for flag in line.split(): |
| if flag in x86_exit_reasons: |
| x86_init(flag) |
| return |
| elif line.startswith('vendor_id'): |
| for flag in line.split(): |
| if flag == 'IBM/S390': |
| s390_init() |
| return |
| |
| detect_platform() |
| |
| def invert(d): |
| return dict((x[1], x[0]) for x in d.iteritems()) |
| |
| filters = {} |
| filters['kvm_userspace_exit'] = ('reason', invert(userspace_exit_reasons)) |
| if exit_reasons: |
| filters['kvm_exit'] = ('exit_reason', invert(exit_reasons)) |
| |
| import struct, array |
| |
| libc = ctypes.CDLL('libc.so.6') |
| syscall = libc.syscall |
| get_errno = libc.__errno_location |
| get_errno.restype = POINTER(c_int) |
| |
| class perf_event_attr(ctypes.Structure): |
| _fields_ = [('type', ctypes.c_uint32), |
| ('size', ctypes.c_uint32), |
| ('config', ctypes.c_uint64), |
| ('sample_freq', ctypes.c_uint64), |
| ('sample_type', ctypes.c_uint64), |
| ('read_format', ctypes.c_uint64), |
| ('flags', ctypes.c_uint64), |
| ('wakeup_events', ctypes.c_uint32), |
| ('bp_type', ctypes.c_uint32), |
| ('bp_addr', ctypes.c_uint64), |
| ('bp_len', ctypes.c_uint64), |
| ] |
| def _perf_event_open(attr, pid, cpu, group_fd, flags): |
| return syscall(sc_perf_evt_open, ctypes.pointer(attr), ctypes.c_int(pid), |
| ctypes.c_int(cpu), ctypes.c_int(group_fd), |
| ctypes.c_long(flags)) |
| |
| PERF_TYPE_HARDWARE = 0 |
| PERF_TYPE_SOFTWARE = 1 |
| PERF_TYPE_TRACEPOINT = 2 |
| PERF_TYPE_HW_CACHE = 3 |
| PERF_TYPE_RAW = 4 |
| PERF_TYPE_BREAKPOINT = 5 |
| |
| PERF_SAMPLE_IP = 1 << 0 |
| PERF_SAMPLE_TID = 1 << 1 |
| PERF_SAMPLE_TIME = 1 << 2 |
| PERF_SAMPLE_ADDR = 1 << 3 |
| PERF_SAMPLE_READ = 1 << 4 |
| PERF_SAMPLE_CALLCHAIN = 1 << 5 |
| PERF_SAMPLE_ID = 1 << 6 |
| PERF_SAMPLE_CPU = 1 << 7 |
| PERF_SAMPLE_PERIOD = 1 << 8 |
| PERF_SAMPLE_STREAM_ID = 1 << 9 |
| PERF_SAMPLE_RAW = 1 << 10 |
| |
| PERF_FORMAT_TOTAL_TIME_ENABLED = 1 << 0 |
| PERF_FORMAT_TOTAL_TIME_RUNNING = 1 << 1 |
| PERF_FORMAT_ID = 1 << 2 |
| PERF_FORMAT_GROUP = 1 << 3 |
| |
| import re |
| |
| sys_tracing = '/sys/kernel/debug/tracing' |
| |
| class Group(object): |
| def __init__(self, cpu): |
| self.events = [] |
| self.group_leader = None |
| self.cpu = cpu |
| def add_event(self, name, event_set, tracepoint, filter = None): |
| self.events.append(Event(group = self, |
| name = name, event_set = event_set, |
| tracepoint = tracepoint, filter = filter)) |
| if len(self.events) == 1: |
| self.file = os.fdopen(self.events[0].fd) |
| def read(self): |
| bytes = 8 * (1 + len(self.events)) |
| fmt = 'xxxxxxxx' + 'q' * len(self.events) |
| return dict(zip([event.name for event in self.events], |
| struct.unpack(fmt, self.file.read(bytes)))) |
| |
| class Event(object): |
| def __init__(self, group, name, event_set, tracepoint, filter = None): |
| self.name = name |
| attr = perf_event_attr() |
| attr.type = PERF_TYPE_TRACEPOINT |
| attr.size = ctypes.sizeof(attr) |
| id_path = os.path.join(sys_tracing, 'events', event_set, |
| tracepoint, 'id') |
| id = int(file(id_path).read()) |
| attr.config = id |
| attr.sample_type = (PERF_SAMPLE_RAW |
| | PERF_SAMPLE_TIME |
| | PERF_SAMPLE_CPU) |
| attr.sample_period = 1 |
| attr.read_format = PERF_FORMAT_GROUP |
| group_leader = -1 |
| if group.events: |
| group_leader = group.events[0].fd |
| fd = _perf_event_open(attr, -1, group.cpu, group_leader, 0) |
| if fd == -1: |
| err = get_errno()[0] |
| raise Exception('perf_event_open failed, errno = ' + err.__str__()) |
| if filter: |
| import fcntl |
| fcntl.ioctl(fd, ioctl_numbers['SET_FILTER'], filter) |
| self.fd = fd |
| def enable(self): |
| import fcntl |
| fcntl.ioctl(self.fd, ioctl_numbers['ENABLE'], 0) |
| def disable(self): |
| import fcntl |
| fcntl.ioctl(self.fd, ioctl_numbers['DISABLE'], 0) |
| def reset(self): |
| import fcntl |
| fcntl.ioctl(self.fd, ioctl_numbers['RESET'], 0) |
| |
| class TracepointProvider(object): |
| def __init__(self): |
| path = os.path.join(sys_tracing, 'events', 'kvm') |
| fields = [f |
| for f in os.listdir(path) |
| if os.path.isdir(os.path.join(path, f))] |
| extra = [] |
| for f in fields: |
| if f in filters: |
| subfield, values = filters[f] |
| for name, number in values.iteritems(): |
| extra.append(f + '(' + name + ')') |
| fields += extra |
| self._setup(fields) |
| self.select(fields) |
| def fields(self): |
| return self._fields |
| |
| def _online_cpus(self): |
| l = [] |
| pattern = r'cpu([0-9]+)' |
| basedir = '/sys/devices/system/cpu' |
| for entry in os.listdir(basedir): |
| match = re.match(pattern, entry) |
| if not match: |
| continue |
| path = os.path.join(basedir, entry, 'online') |
| if os.path.exists(path) and open(path).read().strip() != '1': |
| continue |
| l.append(int(match.group(1))) |
| return l |
| |
| def _setup(self, _fields): |
| self._fields = _fields |
| cpus = self._online_cpus() |
| import resource |
| nfiles = len(cpus) * 1000 |
| resource.setrlimit(resource.RLIMIT_NOFILE, (nfiles, nfiles)) |
| events = [] |
| self.group_leaders = [] |
| for cpu in cpus: |
| group = Group(cpu) |
| for name in _fields: |
| tracepoint = name |
| filter = None |
| m = re.match(r'(.*)\((.*)\)', name) |
| if m: |
| tracepoint, sub = m.groups() |
| filter = '%s==%d\0' % (filters[tracepoint][0], |
| filters[tracepoint][1][sub]) |
| event = group.add_event(name, event_set = 'kvm', |
| tracepoint = tracepoint, |
| filter = filter) |
| self.group_leaders.append(group) |
| def select(self, fields): |
| for group in self.group_leaders: |
| for event in group.events: |
| if event.name in fields: |
| event.reset() |
| event.enable() |
| else: |
| event.disable() |
| def read(self): |
| from collections import defaultdict |
| ret = defaultdict(int) |
| for group in self.group_leaders: |
| for name, val in group.read().iteritems(): |
| ret[name] += val |
| return ret |
| |
| class Stats: |
| def __init__(self, providers, fields = None): |
| self.providers = providers |
| self.fields_filter = fields |
| self._update() |
| def _update(self): |
| def wanted(key): |
| import re |
| if not self.fields_filter: |
| return True |
| return re.match(self.fields_filter, key) is not None |
| self.values = dict() |
| for d in providers: |
| provider_fields = [key for key in d.fields() if wanted(key)] |
| for key in provider_fields: |
| self.values[key] = None |
| d.select(provider_fields) |
| def set_fields_filter(self, fields_filter): |
| self.fields_filter = fields_filter |
| self._update() |
| def get(self): |
| for d in providers: |
| new = d.read() |
| for key in d.fields(): |
| oldval = self.values.get(key, (0, 0)) |
| newval = new[key] |
| newdelta = None |
| if oldval is not None: |
| newdelta = newval - oldval[0] |
| self.values[key] = (newval, newdelta) |
| return self.values |
| |
| if not os.access('/sys/kernel/debug', os.F_OK): |
| print 'Please enable CONFIG_DEBUG_FS in your kernel' |
| sys.exit(1) |
| if not os.access('/sys/kernel/debug/kvm', os.F_OK): |
| print "Please mount debugfs ('mount -t debugfs debugfs /sys/kernel/debug')" |
| print "and ensure the kvm modules are loaded" |
| sys.exit(1) |
| |
| label_width = 40 |
| number_width = 10 |
| |
| def tui(screen, stats): |
| curses.use_default_colors() |
| curses.noecho() |
| drilldown = False |
| fields_filter = stats.fields_filter |
| def update_drilldown(): |
| if not fields_filter: |
| if drilldown: |
| stats.set_fields_filter(None) |
| else: |
| stats.set_fields_filter(r'^[^\(]*$') |
| update_drilldown() |
| def refresh(sleeptime): |
| screen.erase() |
| screen.addstr(0, 0, 'kvm statistics') |
| row = 2 |
| s = stats.get() |
| def sortkey(x): |
| if s[x][1]: |
| return (-s[x][1], -s[x][0]) |
| else: |
| return (0, -s[x][0]) |
| for key in sorted(s.keys(), key = sortkey): |
| if row >= screen.getmaxyx()[0]: |
| break |
| values = s[key] |
| if not values[0] and not values[1]: |
| break |
| col = 1 |
| screen.addstr(row, col, key) |
| col += label_width |
| screen.addstr(row, col, '%10d' % (values[0],)) |
| col += number_width |
| if values[1] is not None: |
| screen.addstr(row, col, '%8d' % (values[1] / sleeptime,)) |
| row += 1 |
| screen.refresh() |
| |
| sleeptime = 0.25 |
| while True: |
| refresh(sleeptime) |
| curses.halfdelay(int(sleeptime * 10)) |
| sleeptime = 3 |
| try: |
| c = screen.getkey() |
| if c == 'x': |
| drilldown = not drilldown |
| update_drilldown() |
| if c == 'q': |
| break |
| except KeyboardInterrupt: |
| break |
| except curses.error: |
| continue |
| |
| def batch(stats): |
| s = stats.get() |
| time.sleep(1) |
| s = stats.get() |
| for key in sorted(s.keys()): |
| values = s[key] |
| print '%-22s%10d%10d' % (key, values[0], values[1]) |
| |
| def log(stats): |
| keys = sorted(stats.get().iterkeys()) |
| def banner(): |
| for k in keys: |
| print '%10s' % k[0:9], |
| print |
| def statline(): |
| s = stats.get() |
| for k in keys: |
| print ' %9d' % s[k][1], |
| print |
| line = 0 |
| banner_repeat = 20 |
| while True: |
| time.sleep(1) |
| if line % banner_repeat == 0: |
| banner() |
| statline() |
| line += 1 |
| |
| options = optparse.OptionParser() |
| options.add_option('-1', '--once', '--batch', |
| action = 'store_true', |
| default = False, |
| dest = 'once', |
| help = 'run in batch mode for one second', |
| ) |
| options.add_option('-l', '--log', |
| action = 'store_true', |
| default = False, |
| dest = 'log', |
| help = 'run in logging mode (like vmstat)', |
| ) |
| options.add_option('-t', '--tracepoints', |
| action = 'store_true', |
| default = False, |
| dest = 'tracepoints', |
| help = 'retrieve statistics from tracepoints', |
| ) |
| options.add_option('-d', '--debugfs', |
| action = 'store_true', |
| default = False, |
| dest = 'debugfs', |
| help = 'retrieve statistics from debugfs', |
| ) |
| options.add_option('-f', '--fields', |
| action = 'store', |
| default = None, |
| dest = 'fields', |
| help = 'fields to display (regex)', |
| ) |
| (options, args) = options.parse_args(sys.argv) |
| |
| providers = [] |
| if options.tracepoints: |
| providers.append(TracepointProvider()) |
| if options.debugfs: |
| providers.append(DebugfsProvider()) |
| |
| if len(providers) == 0: |
| try: |
| providers = [TracepointProvider()] |
| except: |
| providers = [DebugfsProvider()] |
| |
| stats = Stats(providers, fields = options.fields) |
| |
| if options.log: |
| log(stats) |
| elif not options.once: |
| import curses.wrapper |
| curses.wrapper(tui, stats) |
| else: |
| batch(stats) |