| #! /usr/bin/env python | |
| """ | |
| combinerefs path | |
| A helper for analyzing PYTHONDUMPREFS output. | |
| When the PYTHONDUMPREFS envar is set in a debug build, at Python shutdown | |
| time Py_Finalize() prints the list of all live objects twice: first it | |
| prints the repr() of each object while the interpreter is still fully intact. | |
| After cleaning up everything it can, it prints all remaining live objects | |
| again, but the second time just prints their addresses, refcounts, and type | |
| names (because the interpreter has been torn down, calling repr methods at | |
| this point can get into infinite loops or blow up). | |
| Save all this output into a file, then run this script passing the path to | |
| that file. The script finds both output chunks, combines them, then prints | |
| a line of output for each object still alive at the end: | |
| address refcnt typename repr | |
| address is the address of the object, in whatever format the platform C | |
| produces for a %p format code. | |
| refcnt is of the form | |
| "[" ref "]" | |
| when the object's refcount is the same in both PYTHONDUMPREFS output blocks, | |
| or | |
| "[" ref_before "->" ref_after "]" | |
| if the refcount changed. | |
| typename is object->ob_type->tp_name, extracted from the second PYTHONDUMPREFS | |
| output block. | |
| repr is repr(object), extracted from the first PYTHONDUMPREFS output block. | |
| CAUTION: If object is a container type, it may not actually contain all the | |
| objects shown in the repr: the repr was captured from the first output block, | |
| and some of the containees may have been released since then. For example, | |
| it's common for the line showing the dict of interned strings to display | |
| strings that no longer exist at the end of Py_Finalize; this can be recognized | |
| (albeit painfully) because such containees don't have a line of their own. | |
| The objects are listed in allocation order, with most-recently allocated | |
| printed first, and the first object allocated printed last. | |
| Simple examples: | |
| 00857060 [14] str '__len__' | |
| The str object '__len__' is alive at shutdown time, and both PYTHONDUMPREFS | |
| output blocks said there were 14 references to it. This is probably due to | |
| C modules that intern the string "__len__" and keep a reference to it in a | |
| file static. | |
| 00857038 [46->5] tuple () | |
| 46-5 = 41 references to the empty tuple were removed by the cleanup actions | |
| between the times PYTHONDUMPREFS produced output. | |
| 00858028 [1025->1456] str '<dummy key>' | |
| The string '<dummy key>', which is used in dictobject.c to overwrite a real | |
| key that gets deleted, grew several hundred references during cleanup. It | |
| suggests that stuff did get removed from dicts by cleanup, but that the dicts | |
| themselves are staying alive for some reason. """ | |
| import re | |
| import sys | |
| # Generate lines from fileiter. If whilematch is true, continue reading | |
| # while the regexp object pat matches line. If whilematch is false, lines | |
| # are read so long as pat doesn't match them. In any case, the first line | |
| # that doesn't match pat (when whilematch is true), or that does match pat | |
| # (when whilematch is false), is lost, and fileiter will resume at the line | |
| # following it. | |
| def read(fileiter, pat, whilematch): | |
| for line in fileiter: | |
| if bool(pat.match(line)) == whilematch: | |
| yield line | |
| else: | |
| break | |
| def combine(fname): | |
| f = file(fname) | |
| fi = iter(f) | |
| for line in read(fi, re.compile(r'^Remaining objects:$'), False): | |
| pass | |
| crack = re.compile(r'([a-zA-Z\d]+) \[(\d+)\] (.*)') | |
| addr2rc = {} | |
| addr2guts = {} | |
| before = 0 | |
| for line in read(fi, re.compile(r'^Remaining object addresses:$'), False): | |
| m = crack.match(line) | |
| if m: | |
| addr, addr2rc[addr], addr2guts[addr] = m.groups() | |
| before += 1 | |
| else: | |
| print '??? skipped:', line | |
| after = 0 | |
| for line in read(fi, crack, True): | |
| after += 1 | |
| m = crack.match(line) | |
| assert m | |
| addr, rc, guts = m.groups() # guts is type name here | |
| if addr not in addr2rc: | |
| print '??? new object created while tearing down:', line.rstrip() | |
| continue | |
| print addr, | |
| if rc == addr2rc[addr]: | |
| print '[%s]' % rc, | |
| else: | |
| print '[%s->%s]' % (addr2rc[addr], rc), | |
| print guts, addr2guts[addr] | |
| f.close() | |
| print "%d objects before, %d after" % (before, after) | |
| if __name__ == '__main__': | |
| combine(sys.argv[1]) |