| """Simple code to extract class & function docstrings from a module. | |
| This code is used as an example in the library reference manual in the | |
| section on using the parser module. Refer to the manual for a thorough | |
| discussion of the operation of this code. | |
| """ | |
| import os | |
| import parser | |
| import symbol | |
| import token | |
| import types | |
| from types import ListType, TupleType | |
| def get_docs(fileName): | |
| """Retrieve information from the parse tree of a source file. | |
| fileName | |
| Name of the file to read Python source code from. | |
| """ | |
| source = open(fileName).read() | |
| basename = os.path.basename(os.path.splitext(fileName)[0]) | |
| ast = parser.suite(source) | |
| return ModuleInfo(ast.totuple(), basename) | |
| class SuiteInfoBase: | |
| _docstring = '' | |
| _name = '' | |
| def __init__(self, tree = None): | |
| self._class_info = {} | |
| self._function_info = {} | |
| if tree: | |
| self._extract_info(tree) | |
| def _extract_info(self, tree): | |
| # extract docstring | |
| if len(tree) == 2: | |
| found, vars = match(DOCSTRING_STMT_PATTERN[1], tree[1]) | |
| else: | |
| found, vars = match(DOCSTRING_STMT_PATTERN, tree[3]) | |
| if found: | |
| self._docstring = eval(vars['docstring']) | |
| # discover inner definitions | |
| for node in tree[1:]: | |
| found, vars = match(COMPOUND_STMT_PATTERN, node) | |
| if found: | |
| cstmt = vars['compound'] | |
| if cstmt[0] == symbol.funcdef: | |
| name = cstmt[2][1] | |
| self._function_info[name] = FunctionInfo(cstmt) | |
| elif cstmt[0] == symbol.classdef: | |
| name = cstmt[2][1] | |
| self._class_info[name] = ClassInfo(cstmt) | |
| def get_docstring(self): | |
| return self._docstring | |
| def get_name(self): | |
| return self._name | |
| def get_class_names(self): | |
| return self._class_info.keys() | |
| def get_class_info(self, name): | |
| return self._class_info[name] | |
| def __getitem__(self, name): | |
| try: | |
| return self._class_info[name] | |
| except KeyError: | |
| return self._function_info[name] | |
| class SuiteFuncInfo: | |
| # Mixin class providing access to function names and info. | |
| def get_function_names(self): | |
| return self._function_info.keys() | |
| def get_function_info(self, name): | |
| return self._function_info[name] | |
| class FunctionInfo(SuiteInfoBase, SuiteFuncInfo): | |
| def __init__(self, tree = None): | |
| self._name = tree[2][1] | |
| SuiteInfoBase.__init__(self, tree and tree[-1] or None) | |
| class ClassInfo(SuiteInfoBase): | |
| def __init__(self, tree = None): | |
| self._name = tree[2][1] | |
| SuiteInfoBase.__init__(self, tree and tree[-1] or None) | |
| def get_method_names(self): | |
| return self._function_info.keys() | |
| def get_method_info(self, name): | |
| return self._function_info[name] | |
| class ModuleInfo(SuiteInfoBase, SuiteFuncInfo): | |
| def __init__(self, tree = None, name = "<string>"): | |
| self._name = name | |
| SuiteInfoBase.__init__(self, tree) | |
| if tree: | |
| found, vars = match(DOCSTRING_STMT_PATTERN, tree[1]) | |
| if found: | |
| self._docstring = vars["docstring"] | |
| def match(pattern, data, vars=None): | |
| """Match `data' to `pattern', with variable extraction. | |
| pattern | |
| Pattern to match against, possibly containing variables. | |
| data | |
| Data to be checked and against which variables are extracted. | |
| vars | |
| Dictionary of variables which have already been found. If not | |
| provided, an empty dictionary is created. | |
| The `pattern' value may contain variables of the form ['varname'] which | |
| are allowed to match anything. The value that is matched is returned as | |
| part of a dictionary which maps 'varname' to the matched value. 'varname' | |
| is not required to be a string object, but using strings makes patterns | |
| and the code which uses them more readable. | |
| This function returns two values: a boolean indicating whether a match | |
| was found and a dictionary mapping variable names to their associated | |
| values. | |
| """ | |
| if vars is None: | |
| vars = {} | |
| if type(pattern) is ListType: # 'variables' are ['varname'] | |
| vars[pattern[0]] = data | |
| return 1, vars | |
| if type(pattern) is not TupleType: | |
| return (pattern == data), vars | |
| if len(data) != len(pattern): | |
| return 0, vars | |
| for pattern, data in map(None, pattern, data): | |
| same, vars = match(pattern, data, vars) | |
| if not same: | |
| break | |
| return same, vars | |
| # This pattern identifies compound statements, allowing them to be readily | |
| # differentiated from simple statements. | |
| # | |
| COMPOUND_STMT_PATTERN = ( | |
| symbol.stmt, | |
| (symbol.compound_stmt, ['compound']) | |
| ) | |
| # This pattern will match a 'stmt' node which *might* represent a docstring; | |
| # docstrings require that the statement which provides the docstring be the | |
| # first statement in the class or function, which this pattern does not check. | |
| # | |
| DOCSTRING_STMT_PATTERN = ( | |
| symbol.stmt, | |
| (symbol.simple_stmt, | |
| (symbol.small_stmt, | |
| (symbol.expr_stmt, | |
| (symbol.testlist, | |
| (symbol.test, | |
| (symbol.and_test, | |
| (symbol.not_test, | |
| (symbol.comparison, | |
| (symbol.expr, | |
| (symbol.xor_expr, | |
| (symbol.and_expr, | |
| (symbol.shift_expr, | |
| (symbol.arith_expr, | |
| (symbol.term, | |
| (symbol.factor, | |
| (symbol.power, | |
| (symbol.atom, | |
| (token.STRING, ['docstring']) | |
| )))))))))))))))), | |
| (token.NEWLINE, '') | |
| )) |