| # SPDX-License-Identifier: Apache-2.0 |
| # Copyright 2014-2017 The Meson development team |
| |
| from __future__ import annotations |
| from dataclasses import dataclass, field |
| import re |
| import codecs |
| import os |
| import typing as T |
| |
| from .mesonlib import MesonException |
| from . import mlog |
| |
| if T.TYPE_CHECKING: |
| from typing_extensions import Literal |
| |
| from .ast import AstVisitor |
| |
| BaseNodeT = T.TypeVar('BaseNodeT', bound='BaseNode') |
| |
| # This is the regex for the supported escape sequences of a regular string |
| # literal, like 'abc\x00' |
| ESCAPE_SEQUENCE_SINGLE_RE = re.compile(r''' |
| ( \\U[A-Fa-f0-9]{8} # 8-digit hex escapes |
| | \\u[A-Fa-f0-9]{4} # 4-digit hex escapes |
| | \\x[A-Fa-f0-9]{2} # 2-digit hex escapes |
| | \\[0-7]{1,3} # Octal escapes |
| | \\N\{[^}]+\} # Unicode characters by name |
| | \\[\\'abfnrtv] # Single-character escapes |
| )''', re.UNICODE | re.VERBOSE) |
| |
| def decode_match(match: T.Match[str]) -> str: |
| return codecs.decode(match.group(0).encode(), 'unicode_escape') |
| |
| class ParseException(MesonException): |
| |
| ast: T.Optional[CodeBlockNode] = None |
| |
| def __init__(self, text: str, line: str, lineno: int, colno: int) -> None: |
| # Format as error message, followed by the line with the error, followed by a caret to show the error column. |
| super().__init__(mlog.code_line(text, line, colno)) |
| self.lineno = lineno |
| self.colno = colno |
| |
| class BlockParseException(ParseException): |
| def __init__( |
| self, |
| text: str, |
| line: str, |
| lineno: int, |
| colno: int, |
| start_line: str, |
| start_lineno: int, |
| start_colno: int, |
| ) -> None: |
| # This can be formatted in two ways - one if the block start and end are on the same line, and a different way if they are on different lines. |
| |
| if lineno == start_lineno: |
| # If block start and end are on the same line, it is formatted as: |
| # Error message |
| # Followed by the line with the error |
| # Followed by a caret to show the block start |
| # Followed by underscores |
| # Followed by a caret to show the block end. |
| MesonException.__init__(self, "{}\n{}\n{}".format(text, line, '{}^{}^'.format(' ' * start_colno, '_' * (colno - start_colno - 1)))) |
| else: |
| # If block start and end are on different lines, it is formatted as: |
| # Error message |
| # Followed by the line with the error |
| # Followed by a caret to show the error column. |
| # Followed by a message saying where the block started. |
| # Followed by the line of the block start. |
| # Followed by a caret for the block start. |
| MesonException.__init__(self, "%s\n%s\n%s\nFor a block that started at %d,%d\n%s\n%s" % (text, line, '%s^' % (' ' * colno), start_lineno, start_colno, start_line, "%s^" % (' ' * start_colno))) |
| self.lineno = lineno |
| self.colno = colno |
| |
| TV_TokenTypes = T.TypeVar('TV_TokenTypes', int, str, bool) |
| |
| @dataclass(eq=False) |
| class Token(T.Generic[TV_TokenTypes]): |
| tid: str |
| filename: str |
| line_start: int |
| lineno: int |
| colno: int |
| bytespan: T.Tuple[int, int] |
| value: TV_TokenTypes |
| |
| def __eq__(self, other: object) -> bool: |
| if isinstance(other, str): |
| return self.tid == other |
| elif isinstance(other, Token): |
| return self.tid == other.tid |
| return NotImplemented |
| |
| class Lexer: |
| def __init__(self, code: str): |
| if code.startswith(codecs.BOM_UTF8.decode('utf-8')): |
| line, *_ = code.split('\n', maxsplit=1) |
| raise ParseException('Builder file must be encoded in UTF-8 (with no BOM)', line, lineno=0, colno=0) |
| |
| self.code = code |
| self.keywords = {'true', 'false', 'if', 'else', 'elif', |
| 'endif', 'and', 'or', 'not', 'foreach', 'endforeach', |
| 'in', 'continue', 'break'} |
| self.future_keywords = {'return'} |
| self.in_unit_test = 'MESON_RUNNING_IN_PROJECT_TESTS' in os.environ |
| if self.in_unit_test: |
| self.keywords.update({'testcase', 'endtestcase'}) |
| self.token_specification = [ |
| # Need to be sorted longest to shortest. |
| ('whitespace', re.compile(r'[ \t]+')), |
| ('multiline_fstring', re.compile(r"f'''(.|\n)*?'''", re.M)), |
| ('fstring', re.compile(r"f'([^'\\]|(\\.))*'")), |
| ('id', re.compile('[_a-zA-Z][_0-9a-zA-Z]*')), |
| ('number', re.compile(r'0[bB][01]+|0[oO][0-7]+|0[xX][0-9a-fA-F]+|0|[1-9]\d*')), |
| ('eol_cont', re.compile(r'\\[ \t]*(#.*)?\n')), |
| ('eol', re.compile(r'\n')), |
| ('multiline_string', re.compile(r"'''(.|\n)*?'''", re.M)), |
| ('comment', re.compile(r'#.*')), |
| ('lparen', re.compile(r'\(')), |
| ('rparen', re.compile(r'\)')), |
| ('lbracket', re.compile(r'\[')), |
| ('rbracket', re.compile(r'\]')), |
| ('lcurl', re.compile(r'\{')), |
| ('rcurl', re.compile(r'\}')), |
| ('dblquote', re.compile(r'"')), |
| ('string', re.compile(r"'([^'\\]|(\\.))*'")), |
| ('comma', re.compile(r',')), |
| ('plusassign', re.compile(r'\+=')), |
| ('dot', re.compile(r'\.')), |
| ('plus', re.compile(r'\+')), |
| ('dash', re.compile(r'-')), |
| ('star', re.compile(r'\*')), |
| ('percent', re.compile(r'%')), |
| ('fslash', re.compile(r'/')), |
| ('colon', re.compile(r':')), |
| ('equal', re.compile(r'==')), |
| ('nequal', re.compile(r'!=')), |
| ('assign', re.compile(r'=')), |
| ('le', re.compile(r'<=')), |
| ('lt', re.compile(r'<')), |
| ('ge', re.compile(r'>=')), |
| ('gt', re.compile(r'>')), |
| ('questionmark', re.compile(r'\?')), |
| ] |
| |
| def getline(self, line_start: int) -> str: |
| return self.code[line_start:self.code.find('\n', line_start)] |
| |
| def lex(self, filename: str) -> T.Generator[Token, None, None]: |
| line_start = 0 |
| lineno = 1 |
| loc = 0 |
| par_count = 0 |
| bracket_count = 0 |
| curl_count = 0 |
| col = 0 |
| while loc < len(self.code): |
| matched = False |
| value: str = '' |
| for (tid, reg) in self.token_specification: |
| mo = reg.match(self.code, loc) |
| if mo: |
| curline = lineno |
| curline_start = line_start |
| col = mo.start() - line_start |
| matched = True |
| span_start = loc |
| loc = mo.end() |
| span_end = loc |
| bytespan = (span_start, span_end) |
| value = mo.group() |
| if tid == 'lparen': |
| par_count += 1 |
| elif tid == 'rparen': |
| par_count -= 1 |
| elif tid == 'lbracket': |
| bracket_count += 1 |
| elif tid == 'rbracket': |
| bracket_count -= 1 |
| elif tid == 'lcurl': |
| curl_count += 1 |
| elif tid == 'rcurl': |
| curl_count -= 1 |
| elif tid == 'dblquote': |
| raise ParseException('Double quotes are not supported. Use single quotes.', self.getline(line_start), lineno, col) |
| elif tid in {'string', 'fstring'}: |
| if value.find("\n") != -1: |
| msg = ("Newline character in a string detected, use ''' (three single quotes) " |
| "for multiline strings instead.\n" |
| "This will become a hard error in a future Meson release.") |
| mlog.warning(mlog.code_line(msg, self.getline(line_start), col), location=BaseNode(lineno, col, filename)) |
| value = value[2 if tid == 'fstring' else 1:-1] |
| elif tid in {'multiline_string', 'multiline_fstring'}: |
| value = value[4 if tid == 'multiline_fstring' else 3:-3] |
| lines = value.split('\n') |
| if len(lines) > 1: |
| lineno += len(lines) - 1 |
| line_start = mo.end() - len(lines[-1]) |
| elif tid == 'eol_cont': |
| lineno += 1 |
| line_start = loc |
| tid = 'whitespace' |
| elif tid == 'eol': |
| lineno += 1 |
| line_start = loc |
| if par_count > 0 or bracket_count > 0 or curl_count > 0: |
| tid = 'whitespace' |
| elif tid == 'id': |
| if value in self.keywords: |
| tid = value |
| else: |
| if value in self.future_keywords: |
| mlog.warning(f"Identifier '{value}' will become a reserved keyword in a future release. Please rename it.", |
| location=BaseNode(lineno, col, filename)) |
| yield Token(tid, filename, curline_start, curline, col, bytespan, value) |
| break |
| if not matched: |
| raise ParseException('lexer', self.getline(line_start), lineno, col) |
| |
| @dataclass |
| class BaseNode: |
| lineno: int |
| colno: int |
| filename: str = field(hash=False) |
| end_lineno: int = field(hash=False) |
| end_colno: int = field(hash=False) |
| whitespaces: T.Optional[WhitespaceNode] = field(hash=False) |
| |
| def __init__(self, lineno: int, colno: int, filename: str, |
| end_lineno: T.Optional[int] = None, end_colno: T.Optional[int] = None) -> None: |
| self.lineno = lineno |
| self.colno = colno |
| self.filename = filename |
| self.end_lineno = end_lineno if end_lineno is not None else lineno |
| self.end_colno = end_colno if end_colno is not None else colno |
| self.whitespaces = None |
| |
| # Attributes for the visitors |
| self.level = 0 |
| self.ast_id = '' |
| self.condition_level = 0 |
| |
| def accept(self, visitor: 'AstVisitor') -> None: |
| fname = 'visit_{}'.format(type(self).__name__) |
| if hasattr(visitor, fname): |
| func = getattr(visitor, fname) |
| if callable(func): |
| func(self) |
| |
| def append_whitespaces(self, token: Token) -> None: |
| if self.whitespaces is None: |
| self.whitespaces = WhitespaceNode(token) |
| else: |
| self.whitespaces.append(token) |
| |
| |
| @dataclass(unsafe_hash=True) |
| class WhitespaceNode(BaseNode): |
| |
| value: str |
| |
| def __init__(self, token: Token[str]): |
| super().__init__(token.lineno, token.colno, token.filename) |
| self.value = '' |
| self.append(token) |
| |
| def append(self, token: Token[str]) -> None: |
| self.value += token.value |
| |
| @dataclass(unsafe_hash=True) |
| class ElementaryNode(T.Generic[TV_TokenTypes], BaseNode): |
| |
| value: TV_TokenTypes |
| bytespan: T.Tuple[int, int] = field(hash=False) |
| |
| def __init__(self, token: Token[TV_TokenTypes]): |
| super().__init__(token.lineno, token.colno, token.filename) |
| self.value = token.value |
| self.bytespan = token.bytespan |
| |
| class BooleanNode(ElementaryNode[bool]): |
| pass |
| |
| class IdNode(ElementaryNode[str]): |
| pass |
| |
| @dataclass(unsafe_hash=True) |
| class NumberNode(ElementaryNode[int]): |
| |
| raw_value: str = field(hash=False) |
| |
| def __init__(self, token: Token[str]): |
| BaseNode.__init__(self, token.lineno, token.colno, token.filename) |
| self.raw_value = token.value |
| self.value = int(token.value, base=0) |
| self.bytespan = token.bytespan |
| |
| @dataclass(unsafe_hash=True) |
| class StringNode(ElementaryNode[str]): |
| |
| raw_value: str = field(hash=False) |
| is_multiline: bool |
| is_fstring: bool |
| |
| def __init__(self, token: Token[str], escape: bool = True): |
| super().__init__(token) |
| |
| self.is_multiline = 'multiline' in token.tid |
| self.is_fstring = 'fstring' in token.tid |
| self.raw_value = token.value |
| |
| if escape and not self.is_multiline: |
| self.value = self.escape() |
| |
| def escape(self) -> str: |
| return ESCAPE_SEQUENCE_SINGLE_RE.sub(decode_match, self.raw_value) |
| |
| class ContinueNode(ElementaryNode): |
| pass |
| |
| class BreakNode(ElementaryNode): |
| pass |
| |
| class SymbolNode(ElementaryNode[str]): |
| pass |
| |
| @dataclass(unsafe_hash=True) |
| class ArgumentNode(BaseNode): |
| |
| arguments: T.List[BaseNode] = field(hash=False) |
| commas: T.List[SymbolNode] = field(hash=False) |
| colons: T.List[SymbolNode] = field(hash=False) |
| kwargs: T.Dict[BaseNode, BaseNode] = field(hash=False) |
| |
| def __init__(self, token: Token[TV_TokenTypes]): |
| super().__init__(token.lineno, token.colno, token.filename) |
| self.arguments = [] |
| self.commas = [] |
| self.colons = [] |
| self.kwargs = {} |
| self.order_error = False |
| |
| # Attributes for the visitors |
| self.is_multiline = False |
| |
| def prepend(self, statement: BaseNode) -> None: |
| if self.num_kwargs() > 0: |
| self.order_error = True |
| if not isinstance(statement, EmptyNode): |
| self.arguments = [statement] + self.arguments |
| |
| def append(self, statement: BaseNode) -> None: |
| if self.num_kwargs() > 0: |
| self.order_error = True |
| if not isinstance(statement, EmptyNode): |
| self.arguments += [statement] |
| |
| def set_kwarg(self, name: IdNode, value: BaseNode) -> None: |
| if any((isinstance(x, IdNode) and name.value == x.value) for x in self.kwargs): |
| mlog.warning(f'Keyword argument "{name.value}" defined multiple times.', location=self) |
| mlog.warning('This will be an error in future Meson releases.') |
| self.kwargs[name] = value |
| |
| def set_kwarg_no_check(self, name: BaseNode, value: BaseNode) -> None: |
| self.kwargs[name] = value |
| |
| def num_args(self) -> int: |
| return len(self.arguments) |
| |
| def num_kwargs(self) -> int: |
| return len(self.kwargs) |
| |
| def incorrect_order(self) -> bool: |
| return self.order_error |
| |
| def __len__(self) -> int: |
| return self.num_args() # Fixme |
| |
| @dataclass(unsafe_hash=True) |
| class ArrayNode(BaseNode): |
| |
| lbracket: SymbolNode |
| args: ArgumentNode |
| rbracket: SymbolNode |
| |
| def __init__(self, lbracket: SymbolNode, args: ArgumentNode, rbracket: SymbolNode): |
| super().__init__(lbracket.lineno, lbracket.colno, args.filename, end_lineno=rbracket.lineno, end_colno=rbracket.colno+1) |
| self.lbracket = lbracket |
| self.args = args |
| self.rbracket = rbracket |
| |
| @dataclass(unsafe_hash=True) |
| class DictNode(BaseNode): |
| |
| lcurl: SymbolNode |
| args: ArgumentNode |
| rcurl: SymbolNode |
| |
| def __init__(self, lcurl: SymbolNode, args: ArgumentNode, rcurl: SymbolNode): |
| super().__init__(lcurl.lineno, lcurl.colno, args.filename, end_lineno=rcurl.lineno, end_colno=rcurl.colno+1) |
| self.lcurl = lcurl |
| self.args = args |
| self.rcurl = rcurl |
| |
| class EmptyNode(BaseNode): |
| pass |
| |
| @dataclass(unsafe_hash=True) |
| class BinaryOperatorNode(BaseNode): |
| |
| left: BaseNode |
| operator: SymbolNode |
| right: BaseNode |
| |
| def __init__(self, left: BaseNode, operator: SymbolNode, right: BaseNode): |
| super().__init__(left.lineno, left.colno, left.filename) |
| self.left = left |
| self.operator = operator |
| self.right = right |
| |
| class OrNode(BinaryOperatorNode): |
| pass |
| |
| class AndNode(BinaryOperatorNode): |
| pass |
| |
| @dataclass(unsafe_hash=True) |
| class ComparisonNode(BinaryOperatorNode): |
| |
| ctype: COMPARISONS |
| |
| def __init__(self, ctype: COMPARISONS, left: BaseNode, operator: SymbolNode, right: BaseNode): |
| super().__init__(left, operator, right) |
| self.ctype = ctype |
| |
| @dataclass(unsafe_hash=True) |
| class ArithmeticNode(BinaryOperatorNode): |
| |
| # TODO: use a Literal for operation |
| operation: str |
| |
| def __init__(self, operation: str, left: BaseNode, operator: SymbolNode, right: BaseNode): |
| super().__init__(left, operator, right) |
| self.operation = operation |
| |
| @dataclass(unsafe_hash=True) |
| class UnaryOperatorNode(BaseNode): |
| |
| operator: SymbolNode |
| value: BaseNode |
| |
| def __init__(self, token: Token[TV_TokenTypes], operator: SymbolNode, value: BaseNode): |
| super().__init__(token.lineno, token.colno, token.filename) |
| self.operator = operator |
| self.value = value |
| |
| class NotNode(UnaryOperatorNode): |
| pass |
| |
| class UMinusNode(UnaryOperatorNode): |
| pass |
| |
| @dataclass(unsafe_hash=True) |
| class CodeBlockNode(BaseNode): |
| |
| pre_whitespaces: T.Optional[WhitespaceNode] = field(hash=False) |
| lines: T.List[BaseNode] = field(hash=False) |
| |
| def __init__(self, token: Token[TV_TokenTypes]): |
| super().__init__(token.lineno, token.colno, token.filename) |
| self.pre_whitespaces = None |
| self.lines = [] |
| |
| def append_whitespaces(self, token: Token) -> None: |
| if self.lines: |
| self.lines[-1].append_whitespaces(token) |
| elif self.pre_whitespaces is None: |
| self.pre_whitespaces = WhitespaceNode(token) |
| else: |
| self.pre_whitespaces.append(token) |
| |
| @dataclass(unsafe_hash=True) |
| class IndexNode(BaseNode): |
| |
| iobject: BaseNode |
| lbracket: SymbolNode |
| index: BaseNode |
| rbracket: SymbolNode |
| |
| def __init__(self, iobject: BaseNode, lbracket: SymbolNode, index: BaseNode, rbracket: SymbolNode): |
| super().__init__(iobject.lineno, iobject.colno, iobject.filename) |
| self.iobject = iobject |
| self.lbracket = lbracket |
| self.index = index |
| self.rbracket = rbracket |
| |
| @dataclass(unsafe_hash=True) |
| class MethodNode(BaseNode): |
| |
| source_object: BaseNode |
| dot: SymbolNode |
| name: IdNode |
| lpar: SymbolNode |
| args: ArgumentNode |
| rpar: SymbolNode |
| |
| def __init__(self, source_object: BaseNode, dot: SymbolNode, name: IdNode, lpar: SymbolNode, args: ArgumentNode, rpar: SymbolNode): |
| super().__init__(name.lineno, name.colno, name.filename, end_lineno=rpar.lineno, end_colno=rpar.colno+1) |
| self.source_object = source_object |
| self.dot = dot |
| self.name = name |
| self.lpar = lpar |
| self.args = args |
| self.rpar = rpar |
| |
| @dataclass(unsafe_hash=True) |
| class FunctionNode(BaseNode): |
| |
| func_name: IdNode |
| lpar: SymbolNode |
| args: ArgumentNode |
| rpar: SymbolNode |
| |
| def __init__(self, func_name: IdNode, lpar: SymbolNode, args: ArgumentNode, rpar: SymbolNode): |
| super().__init__(func_name.lineno, func_name.colno, func_name.filename, end_lineno=rpar.end_lineno, end_colno=rpar.end_colno+1) |
| self.func_name = func_name |
| self.lpar = lpar |
| self.args = args |
| self.rpar = rpar |
| |
| @dataclass(unsafe_hash=True) |
| class AssignmentNode(BaseNode): |
| |
| var_name: IdNode |
| operator: SymbolNode |
| value: BaseNode |
| |
| def __init__(self, var_name: IdNode, operator: SymbolNode, value: BaseNode): |
| super().__init__(var_name.lineno, var_name.colno, var_name.filename) |
| self.var_name = var_name |
| self.operator = operator |
| self.value = value |
| |
| class PlusAssignmentNode(AssignmentNode): |
| pass |
| |
| @dataclass(unsafe_hash=True) |
| class ForeachClauseNode(BaseNode): |
| |
| foreach_: SymbolNode = field(hash=False) |
| varnames: T.List[IdNode] = field(hash=False) |
| commas: T.List[SymbolNode] = field(hash=False) |
| colon: SymbolNode = field(hash=False) |
| items: BaseNode |
| block: CodeBlockNode |
| endforeach: SymbolNode = field(hash=False) |
| |
| def __init__(self, foreach_: SymbolNode, varnames: T.List[IdNode], commas: T.List[SymbolNode], colon: SymbolNode, items: BaseNode, block: CodeBlockNode, endforeach: SymbolNode): |
| super().__init__(foreach_.lineno, foreach_.colno, foreach_.filename) |
| self.foreach_ = foreach_ |
| self.varnames = varnames |
| self.commas = commas |
| self.colon = colon |
| self.items = items |
| self.block = block |
| self.endforeach = endforeach |
| |
| |
| @dataclass(unsafe_hash=True) |
| class IfNode(BaseNode): |
| |
| if_: SymbolNode |
| condition: BaseNode |
| block: CodeBlockNode |
| |
| def __init__(self, linenode: BaseNode, if_node: SymbolNode, condition: BaseNode, block: CodeBlockNode): |
| super().__init__(linenode.lineno, linenode.colno, linenode.filename) |
| self.if_ = if_node |
| self.condition = condition |
| self.block = block |
| |
| @dataclass(unsafe_hash=True) |
| class ElseNode(BaseNode): |
| |
| else_: SymbolNode |
| block: CodeBlockNode |
| |
| def __init__(self, else_: SymbolNode, block: CodeBlockNode): |
| super().__init__(block.lineno, block.colno, block.filename) |
| self.else_ = else_ |
| self.block = block |
| |
| @dataclass(unsafe_hash=True) |
| class IfClauseNode(BaseNode): |
| |
| ifs: T.List[IfNode] = field(hash=False) |
| elseblock: T.Union[EmptyNode, ElseNode] |
| endif: SymbolNode |
| |
| def __init__(self, linenode: BaseNode): |
| super().__init__(linenode.lineno, linenode.colno, linenode.filename) |
| self.ifs = [] |
| self.elseblock = EmptyNode(linenode.lineno, linenode.colno, linenode.filename) |
| |
| @dataclass(unsafe_hash=True) |
| class TestCaseClauseNode(BaseNode): |
| |
| testcase: SymbolNode |
| condition: BaseNode |
| block: CodeBlockNode |
| endtestcase: SymbolNode |
| |
| def __init__(self, testcase: SymbolNode, condition: BaseNode, block: CodeBlockNode, endtestcase: SymbolNode): |
| super().__init__(condition.lineno, condition.colno, condition.filename) |
| self.testcase = testcase |
| self.condition = condition |
| self.block = block |
| self.endtestcase = endtestcase |
| |
| @dataclass(unsafe_hash=True) |
| class TernaryNode(BaseNode): |
| |
| condition: BaseNode |
| questionmark: SymbolNode |
| trueblock: BaseNode |
| colon: SymbolNode |
| falseblock: BaseNode |
| |
| def __init__(self, condition: BaseNode, questionmark: SymbolNode, trueblock: BaseNode, colon: SymbolNode, falseblock: BaseNode): |
| super().__init__(condition.lineno, condition.colno, condition.filename) |
| self.condition = condition |
| self.questionmark = questionmark |
| self.trueblock = trueblock |
| self.colon = colon |
| self.falseblock = falseblock |
| |
| |
| @dataclass(unsafe_hash=True) |
| class ParenthesizedNode(BaseNode): |
| |
| lpar: SymbolNode = field(hash=False) |
| inner: BaseNode |
| rpar: SymbolNode = field(hash=False) |
| |
| def __init__(self, lpar: SymbolNode, inner: BaseNode, rpar: SymbolNode): |
| super().__init__(lpar.lineno, lpar.colno, inner.filename, end_lineno=rpar.lineno, end_colno=rpar.colno+1) |
| self.lpar = lpar |
| self.inner = inner |
| self.rpar = rpar |
| |
| |
| if T.TYPE_CHECKING: |
| COMPARISONS = Literal['==', '!=', '<', '<=', '>=', '>', 'in', 'notin'] |
| |
| comparison_map: T.Mapping[str, COMPARISONS] = { |
| 'equal': '==', |
| 'nequal': '!=', |
| 'lt': '<', |
| 'le': '<=', |
| 'gt': '>', |
| 'ge': '>=', |
| 'in': 'in', |
| 'not in': 'notin', |
| } |
| |
| # Recursive descent parser for Meson's definition language. |
| # Very basic apart from the fact that we have many precedence |
| # levels so there are not enough words to describe them all. |
| # Enter numbering: |
| # |
| # 1 assignment |
| # 2 or |
| # 3 and |
| # 4 comparison |
| # 5 arithmetic |
| # 6 negation |
| # 7 funcall, method call |
| # 8 parentheses |
| # 9 plain token |
| |
| class Parser: |
| def __init__(self, code: str, filename: str): |
| self.lexer = Lexer(code) |
| self.stream = self.lexer.lex(filename) |
| self.current: Token = Token('eof', '', 0, 0, 0, (0, 0), None) |
| self.previous = self.current |
| self.current_ws: T.List[Token] = [] |
| |
| self.getsym() |
| self.in_ternary = False |
| |
| def create_node(self, node_type: T.Type[BaseNodeT], *args: T.Any, **kwargs: T.Any) -> BaseNodeT: |
| node = node_type(*args, **kwargs) |
| for ws_token in self.current_ws: |
| node.append_whitespaces(ws_token) |
| self.current_ws = [] |
| return node |
| |
| def getsym(self) -> None: |
| self.previous = self.current |
| try: |
| self.current = next(self.stream) |
| |
| while self.current.tid in {'eol', 'comment', 'whitespace'}: |
| self.current_ws.append(self.current) |
| if self.current.tid == 'eol': |
| break |
| self.current = next(self.stream) |
| |
| except StopIteration: |
| self.current = Token('eof', '', self.current.line_start, self.current.lineno, self.current.colno + self.current.bytespan[1] - self.current.bytespan[0], (0, 0), None) |
| |
| def getline(self) -> str: |
| return self.lexer.getline(self.current.line_start) |
| |
| def accept(self, s: str) -> bool: |
| if self.current.tid == s: |
| self.getsym() |
| return True |
| return False |
| |
| def accept_any(self, tids: T.Tuple[str, ...]) -> str: |
| tid = self.current.tid |
| if tid in tids: |
| self.getsym() |
| return tid |
| return '' |
| |
| def expect(self, s: str) -> bool: |
| if self.accept(s): |
| return True |
| raise ParseException(f'Expecting {s} got {self.current.tid}.', self.getline(), self.current.lineno, self.current.colno) |
| |
| def block_expect(self, s: str, block_start: Token) -> bool: |
| if self.accept(s): |
| return True |
| raise BlockParseException(f'Expecting {s} got {self.current.tid}.', self.getline(), self.current.lineno, self.current.colno, self.lexer.getline(block_start.line_start), block_start.lineno, block_start.colno) |
| |
| def parse(self) -> CodeBlockNode: |
| block = self.codeblock() |
| try: |
| self.expect('eof') |
| except ParseException as e: |
| e.ast = block |
| raise |
| return block |
| |
| def statement(self) -> BaseNode: |
| return self.e1() |
| |
| def e1(self) -> BaseNode: |
| left = self.e2() |
| if self.accept('plusassign'): |
| operator = self.create_node(SymbolNode, self.previous) |
| value = self.e1() |
| if not isinstance(left, IdNode): |
| raise ParseException('Plusassignment target must be an id.', self.getline(), left.lineno, left.colno) |
| assert isinstance(left.value, str) |
| return self.create_node(PlusAssignmentNode, left, operator, value) |
| elif self.accept('assign'): |
| operator = self.create_node(SymbolNode, self.previous) |
| value = self.e1() |
| if not isinstance(left, IdNode): |
| raise ParseException('Assignment target must be an id.', |
| self.getline(), left.lineno, left.colno) |
| assert isinstance(left.value, str) |
| return self.create_node(AssignmentNode, left, operator, value) |
| elif self.accept('questionmark'): |
| if self.in_ternary: |
| raise ParseException('Nested ternary operators are not allowed.', |
| self.getline(), left.lineno, left.colno) |
| |
| qm_node = self.create_node(SymbolNode, self.previous) |
| self.in_ternary = True |
| trueblock = self.e1() |
| self.expect('colon') |
| colon_node = self.create_node(SymbolNode, self.previous) |
| falseblock = self.e1() |
| self.in_ternary = False |
| return self.create_node(TernaryNode, left, qm_node, trueblock, colon_node, falseblock) |
| return left |
| |
| def e2(self) -> BaseNode: |
| left = self.e3() |
| while self.accept('or'): |
| operator = self.create_node(SymbolNode, self.previous) |
| if isinstance(left, EmptyNode): |
| raise ParseException('Invalid or clause.', |
| self.getline(), left.lineno, left.colno) |
| left = self.create_node(OrNode, left, operator, self.e3()) |
| return left |
| |
| def e3(self) -> BaseNode: |
| left = self.e4() |
| while self.accept('and'): |
| operator = self.create_node(SymbolNode, self.previous) |
| if isinstance(left, EmptyNode): |
| raise ParseException('Invalid and clause.', |
| self.getline(), left.lineno, left.colno) |
| left = self.create_node(AndNode, left, operator, self.e4()) |
| return left |
| |
| def e4(self) -> BaseNode: |
| left = self.e5() |
| for nodename, operator_type in comparison_map.items(): |
| if self.accept(nodename): |
| operator = self.create_node(SymbolNode, self.previous) |
| return self.create_node(ComparisonNode, operator_type, left, operator, self.e5()) |
| if self.accept('not'): |
| ws = self.current_ws.copy() |
| not_token = self.previous |
| if self.accept('in'): |
| in_token = self.previous |
| self.current_ws = self.current_ws[len(ws):] # remove whitespaces between not and in |
| temp_node = EmptyNode(in_token.lineno, in_token.colno, in_token.filename) |
| for w in ws: |
| temp_node.append_whitespaces(w) |
| |
| not_token.bytespan = (not_token.bytespan[0], in_token.bytespan[1]) |
| not_token.value += temp_node.whitespaces.value + in_token.value |
| operator = self.create_node(SymbolNode, not_token) |
| return self.create_node(ComparisonNode, 'notin', left, operator, self.e5()) |
| return left |
| |
| def e5(self) -> BaseNode: |
| return self.e5addsub() |
| |
| def e5addsub(self) -> BaseNode: |
| op_map = { |
| 'plus': 'add', |
| 'dash': 'sub', |
| } |
| left = self.e5muldiv() |
| while True: |
| op = self.accept_any(tuple(op_map.keys())) |
| if op: |
| operator = self.create_node(SymbolNode, self.previous) |
| left = self.create_node(ArithmeticNode, op_map[op], left, operator, self.e5muldiv()) |
| else: |
| break |
| return left |
| |
| def e5muldiv(self) -> BaseNode: |
| op_map = { |
| 'percent': 'mod', |
| 'star': 'mul', |
| 'fslash': 'div', |
| } |
| left = self.e6() |
| while True: |
| op = self.accept_any(tuple(op_map.keys())) |
| if op: |
| operator = self.create_node(SymbolNode, self.previous) |
| left = self.create_node(ArithmeticNode, op_map[op], left, operator, self.e6()) |
| else: |
| break |
| return left |
| |
| def e6(self) -> BaseNode: |
| if self.accept('not'): |
| operator = self.create_node(SymbolNode, self.previous) |
| return self.create_node(NotNode, self.current, operator, self.e7()) |
| if self.accept('dash'): |
| operator = self.create_node(SymbolNode, self.previous) |
| return self.create_node(UMinusNode, self.current, operator, self.e7()) |
| return self.e7() |
| |
| def e7(self) -> BaseNode: |
| left = self.e8() |
| block_start = self.current |
| if self.accept('lparen'): |
| lpar = self.create_node(SymbolNode, block_start) |
| args = self.args() |
| self.block_expect('rparen', block_start) |
| rpar = self.create_node(SymbolNode, self.previous) |
| if not isinstance(left, IdNode): |
| raise ParseException('Function call must be applied to plain id', |
| self.getline(), left.lineno, left.colno) |
| assert isinstance(left.value, str) |
| left = self.create_node(FunctionNode, left, lpar, args, rpar) |
| go_again = True |
| while go_again: |
| go_again = False |
| if self.accept('dot'): |
| go_again = True |
| left = self.method_call(left) |
| if self.accept('lbracket'): |
| go_again = True |
| left = self.index_call(left) |
| return left |
| |
| def e8(self) -> BaseNode: |
| block_start = self.current |
| if self.accept('lparen'): |
| lpar = self.create_node(SymbolNode, block_start) |
| e = self.statement() |
| self.block_expect('rparen', block_start) |
| rpar = self.create_node(SymbolNode, self.previous) |
| return ParenthesizedNode(lpar, e, rpar) |
| elif self.accept('lbracket'): |
| lbracket = self.create_node(SymbolNode, block_start) |
| args = self.args() |
| self.block_expect('rbracket', block_start) |
| rbracket = self.create_node(SymbolNode, self.previous) |
| return self.create_node(ArrayNode, lbracket, args, rbracket) |
| elif self.accept('lcurl'): |
| lcurl = self.create_node(SymbolNode, block_start) |
| key_values = self.key_values() |
| self.block_expect('rcurl', block_start) |
| rcurl = self.create_node(SymbolNode, self.previous) |
| return self.create_node(DictNode, lcurl, key_values, rcurl) |
| else: |
| return self.e9() |
| |
| def e9(self) -> BaseNode: |
| t = self.current |
| if self.accept('true'): |
| t.value = True |
| return self.create_node(BooleanNode, t) |
| if self.accept('false'): |
| t.value = False |
| return self.create_node(BooleanNode, t) |
| if self.accept('id'): |
| return self.create_node(IdNode, t) |
| if self.accept('number'): |
| return self.create_node(NumberNode, t) |
| if self.accept_any(('string', 'fstring', 'multiline_string', 'multiline_fstring')): |
| return self.create_node(StringNode, t) |
| return EmptyNode(self.current.lineno, self.current.colno, self.current.filename) |
| |
| def key_values(self) -> ArgumentNode: |
| s = self.statement() |
| a = self.create_node(ArgumentNode, self.current) |
| |
| while not isinstance(s, EmptyNode): |
| if self.accept('colon'): |
| a.colons.append(self.create_node(SymbolNode, self.previous)) |
| a.set_kwarg_no_check(s, self.statement()) |
| if not self.accept('comma'): |
| return a |
| a.commas.append(self.create_node(SymbolNode, self.previous)) |
| else: |
| raise ParseException('Only key:value pairs are valid in dict construction.', |
| self.getline(), s.lineno, s.colno) |
| s = self.statement() |
| return a |
| |
| def args(self) -> ArgumentNode: |
| s = self.statement() |
| a = self.create_node(ArgumentNode, self.current) |
| |
| while not isinstance(s, EmptyNode): |
| if self.accept('comma'): |
| a.commas.append(self.create_node(SymbolNode, self.previous)) |
| a.append(s) |
| elif self.accept('colon'): |
| a.colons.append(self.create_node(SymbolNode, self.previous)) |
| if not isinstance(s, IdNode): |
| raise ParseException('Dictionary key must be a plain identifier.', |
| self.getline(), s.lineno, s.colno) |
| a.set_kwarg(s, self.statement()) |
| if not self.accept('comma'): |
| return a |
| a.commas.append(self.create_node(SymbolNode, self.previous)) |
| else: |
| a.append(s) |
| return a |
| s = self.statement() |
| return a |
| |
| def method_call(self, source_object: BaseNode) -> MethodNode: |
| dot = self.create_node(SymbolNode, self.previous) |
| methodname = self.e9() |
| if not isinstance(methodname, IdNode): |
| if isinstance(source_object, NumberNode) and isinstance(methodname, NumberNode): |
| raise ParseException('meson does not support float numbers', |
| self.getline(), source_object.lineno, source_object.colno) |
| raise ParseException('Method name must be plain id', |
| self.getline(), self.current.lineno, self.current.colno) |
| assert isinstance(methodname.value, str) |
| self.expect('lparen') |
| lpar = self.create_node(SymbolNode, self.previous) |
| args = self.args() |
| rpar = self.create_node(SymbolNode, self.current) |
| self.expect('rparen') |
| method = self.create_node(MethodNode, source_object, dot, methodname, lpar, args, rpar) |
| if self.accept('dot'): |
| return self.method_call(method) |
| return method |
| |
| def index_call(self, source_object: BaseNode) -> IndexNode: |
| lbracket = self.create_node(SymbolNode, self.previous) |
| index_statement = self.statement() |
| self.expect('rbracket') |
| rbracket = self.create_node(SymbolNode, self.previous) |
| return self.create_node(IndexNode, source_object, lbracket, index_statement, rbracket) |
| |
| def foreachblock(self) -> ForeachClauseNode: |
| foreach_ = self.create_node(SymbolNode, self.previous) |
| self.expect('id') |
| assert isinstance(self.previous.value, str) |
| varnames = [self.create_node(IdNode, self.previous)] |
| commas = [] |
| |
| if self.accept('comma'): |
| commas.append(self.create_node(SymbolNode, self.previous)) |
| self.expect('id') |
| assert isinstance(self.previous.value, str) |
| varnames.append(self.create_node(IdNode, self.previous)) |
| |
| self.expect('colon') |
| colon = self.create_node(SymbolNode, self.previous) |
| items = self.statement() |
| block = self.codeblock() |
| endforeach = self.create_node(SymbolNode, self.current) |
| return self.create_node(ForeachClauseNode, foreach_, varnames, commas, colon, items, block, endforeach) |
| |
| def ifblock(self) -> IfClauseNode: |
| if_node = self.create_node(SymbolNode, self.previous) |
| condition = self.statement() |
| clause = self.create_node(IfClauseNode, condition) |
| self.expect('eol') |
| block = self.codeblock() |
| clause.ifs.append(self.create_node(IfNode, clause, if_node, condition, block)) |
| self.elseifblock(clause) |
| clause.elseblock = self.elseblock() |
| clause.endif = self.create_node(SymbolNode, self.current) |
| return clause |
| |
| def elseifblock(self, clause: IfClauseNode) -> None: |
| while self.accept('elif'): |
| elif_ = self.create_node(SymbolNode, self.previous) |
| s = self.statement() |
| self.expect('eol') |
| b = self.codeblock() |
| clause.ifs.append(self.create_node(IfNode, s, elif_, s, b)) |
| |
| def elseblock(self) -> T.Union[ElseNode, EmptyNode]: |
| if self.accept('else'): |
| else_ = self.create_node(SymbolNode, self.previous) |
| self.expect('eol') |
| block = self.codeblock() |
| return ElseNode(else_, block) |
| return EmptyNode(self.current.lineno, self.current.colno, self.current.filename) |
| |
| def testcaseblock(self) -> TestCaseClauseNode: |
| testcase = self.create_node(SymbolNode, self.previous) |
| condition = self.statement() |
| self.expect('eol') |
| block = self.codeblock() |
| endtestcase = SymbolNode(self.current) |
| return self.create_node(TestCaseClauseNode, testcase, condition, block, endtestcase) |
| |
| def line(self) -> BaseNode: |
| block_start = self.current |
| if self.current == 'eol': |
| return EmptyNode(self.current.lineno, self.current.colno, self.current.filename) |
| if self.accept('if'): |
| ifblock = self.ifblock() |
| self.block_expect('endif', block_start) |
| return ifblock |
| if self.accept('foreach'): |
| forblock = self.foreachblock() |
| self.block_expect('endforeach', block_start) |
| return forblock |
| if self.accept('continue'): |
| return self.create_node(ContinueNode, self.current) |
| if self.accept('break'): |
| return self.create_node(BreakNode, self.current) |
| if self.lexer.in_unit_test and self.accept('testcase'): |
| block = self.testcaseblock() |
| self.block_expect('endtestcase', block_start) |
| return block |
| return self.statement() |
| |
| def codeblock(self) -> CodeBlockNode: |
| block = self.create_node(CodeBlockNode, self.current) |
| cond = True |
| |
| try: |
| while cond: |
| for ws_token in self.current_ws: |
| block.append_whitespaces(ws_token) |
| self.current_ws = [] |
| |
| curline = self.line() |
| |
| if not isinstance(curline, EmptyNode): |
| block.lines.append(curline) |
| |
| cond = self.accept('eol') |
| |
| except ParseException as e: |
| e.ast = block |
| raise |
| |
| # Remaining whitespaces will not be catched since there are no more nodes |
| for ws_token in self.current_ws: |
| block.append_whitespaces(ws_token) |
| self.current_ws = [] |
| |
| return block |