| # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. | |
| # Licensed to PSF under a Contributor Agreement. | |
| """This module defines the data structures used to represent a grammar. | |
| These are a bit arcane because they are derived from the data | |
| structures used by Python's 'pgen' parser generator. | |
| There's also a table here mapping operators to their names in the | |
| token module; the Python tokenize module reports all operators as the | |
| fallback token code OP, but the parser needs the actual token code. | |
| """ | |
| # Python imports | |
| import pickle | |
| # Local imports | |
| from . import token, tokenize | |
| class Grammar(object): | |
| """Pgen parsing tables tables conversion class. | |
| Once initialized, this class supplies the grammar tables for the | |
| parsing engine implemented by parse.py. The parsing engine | |
| accesses the instance variables directly. The class here does not | |
| provide initialization of the tables; several subclasses exist to | |
| do this (see the conv and pgen modules). | |
| The load() method reads the tables from a pickle file, which is | |
| much faster than the other ways offered by subclasses. The pickle | |
| file is written by calling dump() (after loading the grammar | |
| tables using a subclass). The report() method prints a readable | |
| representation of the tables to stdout, for debugging. | |
| The instance variables are as follows: | |
| symbol2number -- a dict mapping symbol names to numbers. Symbol | |
| numbers are always 256 or higher, to distinguish | |
| them from token numbers, which are between 0 and | |
| 255 (inclusive). | |
| number2symbol -- a dict mapping numbers to symbol names; | |
| these two are each other's inverse. | |
| states -- a list of DFAs, where each DFA is a list of | |
| states, each state is is a list of arcs, and each | |
| arc is a (i, j) pair where i is a label and j is | |
| a state number. The DFA number is the index into | |
| this list. (This name is slightly confusing.) | |
| Final states are represented by a special arc of | |
| the form (0, j) where j is its own state number. | |
| dfas -- a dict mapping symbol numbers to (DFA, first) | |
| pairs, where DFA is an item from the states list | |
| above, and first is a set of tokens that can | |
| begin this grammar rule (represented by a dict | |
| whose values are always 1). | |
| labels -- a list of (x, y) pairs where x is either a token | |
| number or a symbol number, and y is either None | |
| or a string; the strings are keywords. The label | |
| number is the index in this list; label numbers | |
| are used to mark state transitions (arcs) in the | |
| DFAs. | |
| start -- the number of the grammar's start symbol. | |
| keywords -- a dict mapping keyword strings to arc labels. | |
| tokens -- a dict mapping token numbers to arc labels. | |
| """ | |
| def __init__(self): | |
| self.symbol2number = {} | |
| self.number2symbol = {} | |
| self.states = [] | |
| self.dfas = {} | |
| self.labels = [(0, "EMPTY")] | |
| self.keywords = {} | |
| self.tokens = {} | |
| self.symbol2label = {} | |
| self.start = 256 | |
| def dump(self, filename): | |
| """Dump the grammar tables to a pickle file.""" | |
| f = open(filename, "wb") | |
| pickle.dump(self.__dict__, f, 2) | |
| f.close() | |
| def load(self, filename): | |
| """Load the grammar tables from a pickle file.""" | |
| f = open(filename, "rb") | |
| d = pickle.load(f) | |
| f.close() | |
| self.__dict__.update(d) | |
| def copy(self): | |
| """ | |
| Copy the grammar. | |
| """ | |
| new = self.__class__() | |
| for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords", | |
| "tokens", "symbol2label"): | |
| setattr(new, dict_attr, getattr(self, dict_attr).copy()) | |
| new.labels = self.labels[:] | |
| new.states = self.states[:] | |
| new.start = self.start | |
| return new | |
| def report(self): | |
| """Dump the grammar tables to standard output, for debugging.""" | |
| from pprint import pprint | |
| print "s2n" | |
| pprint(self.symbol2number) | |
| print "n2s" | |
| pprint(self.number2symbol) | |
| print "states" | |
| pprint(self.states) | |
| print "dfas" | |
| pprint(self.dfas) | |
| print "labels" | |
| pprint(self.labels) | |
| print "start", self.start | |
| # Map from operator to number (since tokenize doesn't do this) | |
| opmap_raw = """ | |
| ( LPAR | |
| ) RPAR | |
| [ LSQB | |
| ] RSQB | |
| : COLON | |
| , COMMA | |
| ; SEMI | |
| + PLUS | |
| - MINUS | |
| * STAR | |
| / SLASH | |
| | VBAR | |
| & AMPER | |
| < LESS | |
| > GREATER | |
| = EQUAL | |
| . DOT | |
| % PERCENT | |
| ` BACKQUOTE | |
| { LBRACE | |
| } RBRACE | |
| @ AT | |
| == EQEQUAL | |
| != NOTEQUAL | |
| <> NOTEQUAL | |
| <= LESSEQUAL | |
| >= GREATEREQUAL | |
| ~ TILDE | |
| ^ CIRCUMFLEX | |
| << LEFTSHIFT | |
| >> RIGHTSHIFT | |
| ** DOUBLESTAR | |
| += PLUSEQUAL | |
| -= MINEQUAL | |
| *= STAREQUAL | |
| /= SLASHEQUAL | |
| %= PERCENTEQUAL | |
| &= AMPEREQUAL | |
| |= VBAREQUAL | |
| ^= CIRCUMFLEXEQUAL | |
| <<= LEFTSHIFTEQUAL | |
| >>= RIGHTSHIFTEQUAL | |
| **= DOUBLESTAREQUAL | |
| // DOUBLESLASH | |
| //= DOUBLESLASHEQUAL | |
| -> RARROW | |
| """ | |
| opmap = {} | |
| for line in opmap_raw.splitlines(): | |
| if line: | |
| op, name = line.split() | |
| opmap[op] = getattr(token, name) |