GardenSnake.py revision 4479
14479Sbinkertn@umich.edu# GardenSnake - a parser generator demonstration program 24479Sbinkertn@umich.edu# 34479Sbinkertn@umich.edu# This implements a modified version of a subset of Python: 44479Sbinkertn@umich.edu# - only 'def', 'return' and 'if' statements 54479Sbinkertn@umich.edu# - 'if' only has 'then' clause (no elif nor else) 64479Sbinkertn@umich.edu# - single-quoted strings only, content in raw format 74479Sbinkertn@umich.edu# - numbers are decimal.Decimal instances (not integers or floats) 84479Sbinkertn@umich.edu# - no print statment; use the built-in 'print' function 94479Sbinkertn@umich.edu# - only < > == + - / * implemented (and unary + -) 104479Sbinkertn@umich.edu# - assignment and tuple assignment work 114479Sbinkertn@umich.edu# - no generators of any sort 124479Sbinkertn@umich.edu# - no ... well, no quite a lot 134479Sbinkertn@umich.edu 144479Sbinkertn@umich.edu# Why? I'm thinking about a new indentation-based configuration 154479Sbinkertn@umich.edu# language for a project and wanted to figure out how to do it. Once 164479Sbinkertn@umich.edu# I got that working I needed a way to test it out. My original AST 174479Sbinkertn@umich.edu# was dumb so I decided to target Python's AST and compile it into 184479Sbinkertn@umich.edu# Python code. Plus, it's pretty cool that it only took a day or so 194479Sbinkertn@umich.edu# from sitting down with Ply to having working code. 204479Sbinkertn@umich.edu 214479Sbinkertn@umich.edu# This uses David Beazley's Ply from http://www.dabeaz.com/ply/ 224479Sbinkertn@umich.edu 234479Sbinkertn@umich.edu# This work is hereby released into the Public Domain. To view a copy of 244479Sbinkertn@umich.edu# the public domain dedication, visit 254479Sbinkertn@umich.edu# http://creativecommons.org/licenses/publicdomain/ or send a letter to 264479Sbinkertn@umich.edu# Creative Commons, 543 Howard Street, 5th Floor, San Francisco, 274479Sbinkertn@umich.edu# California, 94105, USA. 284479Sbinkertn@umich.edu# 294479Sbinkertn@umich.edu# Portions of this work are derived from Python's Grammar definition 304479Sbinkertn@umich.edu# and may be covered under the Python copyright and license 314479Sbinkertn@umich.edu# 324479Sbinkertn@umich.edu# Andrew Dalke / Dalke Scientific Software, LLC 334479Sbinkertn@umich.edu# 30 August 2006 / Cape Town, South Africa 344479Sbinkertn@umich.edu 354479Sbinkertn@umich.edu# Changelog: 364479Sbinkertn@umich.edu# 30 August - added link to CC license; removed the "swapcase" encoding 374479Sbinkertn@umich.edu 384479Sbinkertn@umich.edu# Modifications for inclusion in PLY distribution 394479Sbinkertn@umich.eduimport sys 404479Sbinkertn@umich.edusys.path.insert(0,"../..") 414479Sbinkertn@umich.edufrom ply import * 424479Sbinkertn@umich.edu 434479Sbinkertn@umich.edu##### Lexer ###### 444479Sbinkertn@umich.edu#import lex 454479Sbinkertn@umich.eduimport decimal 464479Sbinkertn@umich.edu 474479Sbinkertn@umich.edutokens = ( 484479Sbinkertn@umich.edu 'DEF', 494479Sbinkertn@umich.edu 'IF', 504479Sbinkertn@umich.edu 'NAME', 514479Sbinkertn@umich.edu 'NUMBER', # Python decimals 524479Sbinkertn@umich.edu 'STRING', # single quoted strings only; syntax of raw strings 534479Sbinkertn@umich.edu 'LPAR', 544479Sbinkertn@umich.edu 'RPAR', 554479Sbinkertn@umich.edu 'COLON', 564479Sbinkertn@umich.edu 'EQ', 574479Sbinkertn@umich.edu 'ASSIGN', 584479Sbinkertn@umich.edu 'LT', 594479Sbinkertn@umich.edu 'GT', 604479Sbinkertn@umich.edu 'PLUS', 614479Sbinkertn@umich.edu 'MINUS', 624479Sbinkertn@umich.edu 'MULT', 634479Sbinkertn@umich.edu 'DIV', 644479Sbinkertn@umich.edu 'RETURN', 654479Sbinkertn@umich.edu 'WS', 664479Sbinkertn@umich.edu 'NEWLINE', 674479Sbinkertn@umich.edu 'COMMA', 684479Sbinkertn@umich.edu 'SEMICOLON', 694479Sbinkertn@umich.edu 'INDENT', 704479Sbinkertn@umich.edu 'DEDENT', 714479Sbinkertn@umich.edu 'ENDMARKER', 724479Sbinkertn@umich.edu ) 734479Sbinkertn@umich.edu 744479Sbinkertn@umich.edu#t_NUMBER = r'\d+' 754479Sbinkertn@umich.edu# taken from decmial.py but without the leading sign 764479Sbinkertn@umich.edudef t_NUMBER(t): 774479Sbinkertn@umich.edu r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?""" 784479Sbinkertn@umich.edu t.value = decimal.Decimal(t.value) 794479Sbinkertn@umich.edu return t 804479Sbinkertn@umich.edu 814479Sbinkertn@umich.edudef t_STRING(t): 824479Sbinkertn@umich.edu r"'([^\\']+|\\'|\\\\)*'" # I think this is right ... 834479Sbinkertn@umich.edu t.value=t.value[1:-1].decode("string-escape") # .swapcase() # for fun 844479Sbinkertn@umich.edu return t 854479Sbinkertn@umich.edu 864479Sbinkertn@umich.edut_COLON = r':' 874479Sbinkertn@umich.edut_EQ = r'==' 884479Sbinkertn@umich.edut_ASSIGN = r'=' 894479Sbinkertn@umich.edut_LT = r'<' 904479Sbinkertn@umich.edut_GT = r'>' 914479Sbinkertn@umich.edut_PLUS = r'\+' 924479Sbinkertn@umich.edut_MINUS = r'-' 934479Sbinkertn@umich.edut_MULT = r'\*' 944479Sbinkertn@umich.edut_DIV = r'/' 954479Sbinkertn@umich.edut_COMMA = r',' 964479Sbinkertn@umich.edut_SEMICOLON = r';' 974479Sbinkertn@umich.edu 984479Sbinkertn@umich.edu# Ply nicely documented how to do this. 994479Sbinkertn@umich.edu 1004479Sbinkertn@umich.eduRESERVED = { 1014479Sbinkertn@umich.edu "def": "DEF", 1024479Sbinkertn@umich.edu "if": "IF", 1034479Sbinkertn@umich.edu "return": "RETURN", 1044479Sbinkertn@umich.edu } 1054479Sbinkertn@umich.edu 1064479Sbinkertn@umich.edudef t_NAME(t): 1074479Sbinkertn@umich.edu r'[a-zA-Z_][a-zA-Z0-9_]*' 1084479Sbinkertn@umich.edu t.type = RESERVED.get(t.value, "NAME") 1094479Sbinkertn@umich.edu return t 1104479Sbinkertn@umich.edu 1114479Sbinkertn@umich.edu# Putting this before t_WS let it consume lines with only comments in 1124479Sbinkertn@umich.edu# them so the latter code never sees the WS part. Not consuming the 1134479Sbinkertn@umich.edu# newline. Needed for "if 1: #comment" 1144479Sbinkertn@umich.edudef t_comment(t): 1154479Sbinkertn@umich.edu r"[ ]*\043[^\n]*" # \043 is '#' 1164479Sbinkertn@umich.edu pass 1174479Sbinkertn@umich.edu 1184479Sbinkertn@umich.edu 1194479Sbinkertn@umich.edu# Whitespace 1204479Sbinkertn@umich.edudef t_WS(t): 1214479Sbinkertn@umich.edu r' [ ]+ ' 1224479Sbinkertn@umich.edu if t.lexer.at_line_start and t.lexer.paren_count == 0: 1234479Sbinkertn@umich.edu return t 1244479Sbinkertn@umich.edu 1254479Sbinkertn@umich.edu# Don't generate newline tokens when inside of parenthesis, eg 1264479Sbinkertn@umich.edu# a = (1, 1274479Sbinkertn@umich.edu# 2, 3) 1284479Sbinkertn@umich.edudef t_newline(t): 1294479Sbinkertn@umich.edu r'\n+' 1304479Sbinkertn@umich.edu t.lexer.lineno += len(t.value) 1314479Sbinkertn@umich.edu t.type = "NEWLINE" 1324479Sbinkertn@umich.edu if t.lexer.paren_count == 0: 1334479Sbinkertn@umich.edu return t 1344479Sbinkertn@umich.edu 1354479Sbinkertn@umich.edudef t_LPAR(t): 1364479Sbinkertn@umich.edu r'\(' 1374479Sbinkertn@umich.edu t.lexer.paren_count += 1 1384479Sbinkertn@umich.edu return t 1394479Sbinkertn@umich.edu 1404479Sbinkertn@umich.edudef t_RPAR(t): 1414479Sbinkertn@umich.edu r'\)' 1424479Sbinkertn@umich.edu # check for underflow? should be the job of the parser 1434479Sbinkertn@umich.edu t.lexer.paren_count -= 1 1444479Sbinkertn@umich.edu return t 1454479Sbinkertn@umich.edu 1464479Sbinkertn@umich.edu 1474479Sbinkertn@umich.edudef t_error(t): 1484479Sbinkertn@umich.edu raise SyntaxError("Unknown symbol %r" % (t.value[0],)) 1494479Sbinkertn@umich.edu print "Skipping", repr(t.value[0]) 1504479Sbinkertn@umich.edu t.lexer.skip(1) 1514479Sbinkertn@umich.edu 1524479Sbinkertn@umich.edu## I implemented INDENT / DEDENT generation as a post-processing filter 1534479Sbinkertn@umich.edu 1544479Sbinkertn@umich.edu# The original lex token stream contains WS and NEWLINE characters. 1554479Sbinkertn@umich.edu# WS will only occur before any other tokens on a line. 1564479Sbinkertn@umich.edu 1574479Sbinkertn@umich.edu# I have three filters. One tags tokens by adding two attributes. 1584479Sbinkertn@umich.edu# "must_indent" is True if the token must be indented from the 1594479Sbinkertn@umich.edu# previous code. The other is "at_line_start" which is True for WS 1604479Sbinkertn@umich.edu# and the first non-WS/non-NEWLINE on a line. It flags the check so 1614479Sbinkertn@umich.edu# see if the new line has changed indication level. 1624479Sbinkertn@umich.edu 1634479Sbinkertn@umich.edu# Python's syntax has three INDENT states 1644479Sbinkertn@umich.edu# 0) no colon hence no need to indent 1654479Sbinkertn@umich.edu# 1) "if 1: go()" - simple statements have a COLON but no need for an indent 1664479Sbinkertn@umich.edu# 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indent 1674479Sbinkertn@umich.eduNO_INDENT = 0 1684479Sbinkertn@umich.eduMAY_INDENT = 1 1694479Sbinkertn@umich.eduMUST_INDENT = 2 1704479Sbinkertn@umich.edu 1714479Sbinkertn@umich.edu# only care about whitespace at the start of a line 1724479Sbinkertn@umich.edudef track_tokens_filter(lexer, tokens): 1734479Sbinkertn@umich.edu lexer.at_line_start = at_line_start = True 1744479Sbinkertn@umich.edu indent = NO_INDENT 1754479Sbinkertn@umich.edu saw_colon = False 1764479Sbinkertn@umich.edu for token in tokens: 1774479Sbinkertn@umich.edu token.at_line_start = at_line_start 1784479Sbinkertn@umich.edu 1794479Sbinkertn@umich.edu if token.type == "COLON": 1804479Sbinkertn@umich.edu at_line_start = False 1814479Sbinkertn@umich.edu indent = MAY_INDENT 1824479Sbinkertn@umich.edu token.must_indent = False 1834479Sbinkertn@umich.edu 1844479Sbinkertn@umich.edu elif token.type == "NEWLINE": 1854479Sbinkertn@umich.edu at_line_start = True 1864479Sbinkertn@umich.edu if indent == MAY_INDENT: 1874479Sbinkertn@umich.edu indent = MUST_INDENT 1884479Sbinkertn@umich.edu token.must_indent = False 1894479Sbinkertn@umich.edu 1904479Sbinkertn@umich.edu elif token.type == "WS": 1914479Sbinkertn@umich.edu assert token.at_line_start == True 1924479Sbinkertn@umich.edu at_line_start = True 1934479Sbinkertn@umich.edu token.must_indent = False 1944479Sbinkertn@umich.edu 1954479Sbinkertn@umich.edu else: 1964479Sbinkertn@umich.edu # A real token; only indent after COLON NEWLINE 1974479Sbinkertn@umich.edu if indent == MUST_INDENT: 1984479Sbinkertn@umich.edu token.must_indent = True 1994479Sbinkertn@umich.edu else: 2004479Sbinkertn@umich.edu token.must_indent = False 2014479Sbinkertn@umich.edu at_line_start = False 2024479Sbinkertn@umich.edu indent = NO_INDENT 2034479Sbinkertn@umich.edu 2044479Sbinkertn@umich.edu yield token 2054479Sbinkertn@umich.edu lexer.at_line_start = at_line_start 2064479Sbinkertn@umich.edu 2074479Sbinkertn@umich.edudef _new_token(type, lineno): 2084479Sbinkertn@umich.edu tok = lex.LexToken() 2094479Sbinkertn@umich.edu tok.type = type 2104479Sbinkertn@umich.edu tok.value = None 2114479Sbinkertn@umich.edu tok.lineno = lineno 2124479Sbinkertn@umich.edu return tok 2134479Sbinkertn@umich.edu 2144479Sbinkertn@umich.edu# Synthesize a DEDENT tag 2154479Sbinkertn@umich.edudef DEDENT(lineno): 2164479Sbinkertn@umich.edu return _new_token("DEDENT", lineno) 2174479Sbinkertn@umich.edu 2184479Sbinkertn@umich.edu# Synthesize an INDENT tag 2194479Sbinkertn@umich.edudef INDENT(lineno): 2204479Sbinkertn@umich.edu return _new_token("INDENT", lineno) 2214479Sbinkertn@umich.edu 2224479Sbinkertn@umich.edu 2234479Sbinkertn@umich.edu# Track the indentation level and emit the right INDENT / DEDENT events. 2244479Sbinkertn@umich.edudef indentation_filter(tokens): 2254479Sbinkertn@umich.edu # A stack of indentation levels; will never pop item 0 2264479Sbinkertn@umich.edu levels = [0] 2274479Sbinkertn@umich.edu token = None 2284479Sbinkertn@umich.edu depth = 0 2294479Sbinkertn@umich.edu prev_was_ws = False 2304479Sbinkertn@umich.edu for token in tokens: 2314479Sbinkertn@umich.edu## if 1: 2324479Sbinkertn@umich.edu## print "Process", token, 2334479Sbinkertn@umich.edu## if token.at_line_start: 2344479Sbinkertn@umich.edu## print "at_line_start", 2354479Sbinkertn@umich.edu## if token.must_indent: 2364479Sbinkertn@umich.edu## print "must_indent", 2374479Sbinkertn@umich.edu## print 2384479Sbinkertn@umich.edu 2394479Sbinkertn@umich.edu # WS only occurs at the start of the line 2404479Sbinkertn@umich.edu # There may be WS followed by NEWLINE so 2414479Sbinkertn@umich.edu # only track the depth here. Don't indent/dedent 2424479Sbinkertn@umich.edu # until there's something real. 2434479Sbinkertn@umich.edu if token.type == "WS": 2444479Sbinkertn@umich.edu assert depth == 0 2454479Sbinkertn@umich.edu depth = len(token.value) 2464479Sbinkertn@umich.edu prev_was_ws = True 2474479Sbinkertn@umich.edu # WS tokens are never passed to the parser 2484479Sbinkertn@umich.edu continue 2494479Sbinkertn@umich.edu 2504479Sbinkertn@umich.edu if token.type == "NEWLINE": 2514479Sbinkertn@umich.edu depth = 0 2524479Sbinkertn@umich.edu if prev_was_ws or token.at_line_start: 2534479Sbinkertn@umich.edu # ignore blank lines 2544479Sbinkertn@umich.edu continue 2554479Sbinkertn@umich.edu # pass the other cases on through 2564479Sbinkertn@umich.edu yield token 2574479Sbinkertn@umich.edu continue 2584479Sbinkertn@umich.edu 2594479Sbinkertn@umich.edu # then it must be a real token (not WS, not NEWLINE) 2604479Sbinkertn@umich.edu # which can affect the indentation level 2614479Sbinkertn@umich.edu 2624479Sbinkertn@umich.edu prev_was_ws = False 2634479Sbinkertn@umich.edu if token.must_indent: 2644479Sbinkertn@umich.edu # The current depth must be larger than the previous level 2654479Sbinkertn@umich.edu if not (depth > levels[-1]): 2664479Sbinkertn@umich.edu raise IndentationError("expected an indented block") 2674479Sbinkertn@umich.edu 2684479Sbinkertn@umich.edu levels.append(depth) 2694479Sbinkertn@umich.edu yield INDENT(token.lineno) 2704479Sbinkertn@umich.edu 2714479Sbinkertn@umich.edu elif token.at_line_start: 2724479Sbinkertn@umich.edu # Must be on the same level or one of the previous levels 2734479Sbinkertn@umich.edu if depth == levels[-1]: 2744479Sbinkertn@umich.edu # At the same level 2754479Sbinkertn@umich.edu pass 2764479Sbinkertn@umich.edu elif depth > levels[-1]: 2774479Sbinkertn@umich.edu raise IndentationError("indentation increase but not in new block") 2784479Sbinkertn@umich.edu else: 2794479Sbinkertn@umich.edu # Back up; but only if it matches a previous level 2804479Sbinkertn@umich.edu try: 2814479Sbinkertn@umich.edu i = levels.index(depth) 2824479Sbinkertn@umich.edu except ValueError: 2834479Sbinkertn@umich.edu raise IndentationError("inconsistent indentation") 2844479Sbinkertn@umich.edu for _ in range(i+1, len(levels)): 2854479Sbinkertn@umich.edu yield DEDENT(token.lineno) 2864479Sbinkertn@umich.edu levels.pop() 2874479Sbinkertn@umich.edu 2884479Sbinkertn@umich.edu yield token 2894479Sbinkertn@umich.edu 2904479Sbinkertn@umich.edu ### Finished processing ### 2914479Sbinkertn@umich.edu 2924479Sbinkertn@umich.edu # Must dedent any remaining levels 2934479Sbinkertn@umich.edu if len(levels) > 1: 2944479Sbinkertn@umich.edu assert token is not None 2954479Sbinkertn@umich.edu for _ in range(1, len(levels)): 2964479Sbinkertn@umich.edu yield DEDENT(token.lineno) 2974479Sbinkertn@umich.edu 2984479Sbinkertn@umich.edu 2994479Sbinkertn@umich.edu# The top-level filter adds an ENDMARKER, if requested. 3004479Sbinkertn@umich.edu# Python's grammar uses it. 3014479Sbinkertn@umich.edudef filter(lexer, add_endmarker = True): 3024479Sbinkertn@umich.edu token = None 3034479Sbinkertn@umich.edu tokens = iter(lexer.token, None) 3044479Sbinkertn@umich.edu tokens = track_tokens_filter(lexer, tokens) 3054479Sbinkertn@umich.edu for token in indentation_filter(tokens): 3064479Sbinkertn@umich.edu yield token 3074479Sbinkertn@umich.edu 3084479Sbinkertn@umich.edu if add_endmarker: 3094479Sbinkertn@umich.edu lineno = 1 3104479Sbinkertn@umich.edu if token is not None: 3114479Sbinkertn@umich.edu lineno = token.lineno 3124479Sbinkertn@umich.edu yield _new_token("ENDMARKER", lineno) 3134479Sbinkertn@umich.edu 3144479Sbinkertn@umich.edu# Combine Ply and my filters into a new lexer 3154479Sbinkertn@umich.edu 3164479Sbinkertn@umich.educlass IndentLexer(object): 3174479Sbinkertn@umich.edu def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0): 3184479Sbinkertn@umich.edu self.lexer = lex.lex(debug=debug, optimize=optimize, lextab=lextab, reflags=reflags) 3194479Sbinkertn@umich.edu self.token_stream = None 3204479Sbinkertn@umich.edu def input(self, s, add_endmarker=True): 3214479Sbinkertn@umich.edu self.lexer.paren_count = 0 3224479Sbinkertn@umich.edu self.lexer.input(s) 3234479Sbinkertn@umich.edu self.token_stream = filter(self.lexer, add_endmarker) 3244479Sbinkertn@umich.edu def token(self): 3254479Sbinkertn@umich.edu try: 3264479Sbinkertn@umich.edu return self.token_stream.next() 3274479Sbinkertn@umich.edu except StopIteration: 3284479Sbinkertn@umich.edu return None 3294479Sbinkertn@umich.edu 3304479Sbinkertn@umich.edu########## Parser (tokens -> AST) ###### 3314479Sbinkertn@umich.edu 3324479Sbinkertn@umich.edu# also part of Ply 3334479Sbinkertn@umich.edu#import yacc 3344479Sbinkertn@umich.edu 3354479Sbinkertn@umich.edu# I use the Python AST 3364479Sbinkertn@umich.edufrom compiler import ast 3374479Sbinkertn@umich.edu 3384479Sbinkertn@umich.edu# Helper function 3394479Sbinkertn@umich.edudef Assign(left, right): 3404479Sbinkertn@umich.edu names = [] 3414479Sbinkertn@umich.edu if isinstance(left, ast.Name): 3424479Sbinkertn@umich.edu # Single assignment on left 3434479Sbinkertn@umich.edu return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right) 3444479Sbinkertn@umich.edu elif isinstance(left, ast.Tuple): 3454479Sbinkertn@umich.edu # List of things - make sure they are Name nodes 3464479Sbinkertn@umich.edu names = [] 3474479Sbinkertn@umich.edu for child in left.getChildren(): 3484479Sbinkertn@umich.edu if not isinstance(child, ast.Name): 3494479Sbinkertn@umich.edu raise SyntaxError("that assignment not supported") 3504479Sbinkertn@umich.edu names.append(child.name) 3514479Sbinkertn@umich.edu ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names] 3524479Sbinkertn@umich.edu return ast.Assign([ast.AssTuple(ass_list)], right) 3534479Sbinkertn@umich.edu else: 3544479Sbinkertn@umich.edu raise SyntaxError("Can't do that yet") 3554479Sbinkertn@umich.edu 3564479Sbinkertn@umich.edu 3574479Sbinkertn@umich.edu# The grammar comments come from Python's Grammar/Grammar file 3584479Sbinkertn@umich.edu 3594479Sbinkertn@umich.edu## NB: compound_stmt in single_input is followed by extra NEWLINE! 3604479Sbinkertn@umich.edu# file_input: (NEWLINE | stmt)* ENDMARKER 3614479Sbinkertn@umich.edudef p_file_input_end(p): 3624479Sbinkertn@umich.edu """file_input_end : file_input ENDMARKER""" 3634479Sbinkertn@umich.edu p[0] = ast.Stmt(p[1]) 3644479Sbinkertn@umich.edudef p_file_input(p): 3654479Sbinkertn@umich.edu """file_input : file_input NEWLINE 3664479Sbinkertn@umich.edu | file_input stmt 3674479Sbinkertn@umich.edu | NEWLINE 3684479Sbinkertn@umich.edu | stmt""" 3694479Sbinkertn@umich.edu if isinstance(p[len(p)-1], basestring): 3704479Sbinkertn@umich.edu if len(p) == 3: 3714479Sbinkertn@umich.edu p[0] = p[1] 3724479Sbinkertn@umich.edu else: 3734479Sbinkertn@umich.edu p[0] = [] # p == 2 --> only a blank line 3744479Sbinkertn@umich.edu else: 3754479Sbinkertn@umich.edu if len(p) == 3: 3764479Sbinkertn@umich.edu p[0] = p[1] + p[2] 3774479Sbinkertn@umich.edu else: 3784479Sbinkertn@umich.edu p[0] = p[1] 3794479Sbinkertn@umich.edu 3804479Sbinkertn@umich.edu 3814479Sbinkertn@umich.edu# funcdef: [decorators] 'def' NAME parameters ':' suite 3824479Sbinkertn@umich.edu# ignoring decorators 3834479Sbinkertn@umich.edudef p_funcdef(p): 3844479Sbinkertn@umich.edu "funcdef : DEF NAME parameters COLON suite" 3854479Sbinkertn@umich.edu p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5]) 3864479Sbinkertn@umich.edu 3874479Sbinkertn@umich.edu# parameters: '(' [varargslist] ')' 3884479Sbinkertn@umich.edudef p_parameters(p): 3894479Sbinkertn@umich.edu """parameters : LPAR RPAR 3904479Sbinkertn@umich.edu | LPAR varargslist RPAR""" 3914479Sbinkertn@umich.edu if len(p) == 3: 3924479Sbinkertn@umich.edu p[0] = [] 3934479Sbinkertn@umich.edu else: 3944479Sbinkertn@umich.edu p[0] = p[2] 3954479Sbinkertn@umich.edu 3964479Sbinkertn@umich.edu 3974479Sbinkertn@umich.edu# varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) | 3984479Sbinkertn@umich.edu# highly simplified 3994479Sbinkertn@umich.edudef p_varargslist(p): 4004479Sbinkertn@umich.edu """varargslist : varargslist COMMA NAME 4014479Sbinkertn@umich.edu | NAME""" 4024479Sbinkertn@umich.edu if len(p) == 4: 4034479Sbinkertn@umich.edu p[0] = p[1] + p[3] 4044479Sbinkertn@umich.edu else: 4054479Sbinkertn@umich.edu p[0] = [p[1]] 4064479Sbinkertn@umich.edu 4074479Sbinkertn@umich.edu# stmt: simple_stmt | compound_stmt 4084479Sbinkertn@umich.edudef p_stmt_simple(p): 4094479Sbinkertn@umich.edu """stmt : simple_stmt""" 4104479Sbinkertn@umich.edu # simple_stmt is a list 4114479Sbinkertn@umich.edu p[0] = p[1] 4124479Sbinkertn@umich.edu 4134479Sbinkertn@umich.edudef p_stmt_compound(p): 4144479Sbinkertn@umich.edu """stmt : compound_stmt""" 4154479Sbinkertn@umich.edu p[0] = [p[1]] 4164479Sbinkertn@umich.edu 4174479Sbinkertn@umich.edu# simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE 4184479Sbinkertn@umich.edudef p_simple_stmt(p): 4194479Sbinkertn@umich.edu """simple_stmt : small_stmts NEWLINE 4204479Sbinkertn@umich.edu | small_stmts SEMICOLON NEWLINE""" 4214479Sbinkertn@umich.edu p[0] = p[1] 4224479Sbinkertn@umich.edu 4234479Sbinkertn@umich.edudef p_small_stmts(p): 4244479Sbinkertn@umich.edu """small_stmts : small_stmts SEMICOLON small_stmt 4254479Sbinkertn@umich.edu | small_stmt""" 4264479Sbinkertn@umich.edu if len(p) == 4: 4274479Sbinkertn@umich.edu p[0] = p[1] + [p[3]] 4284479Sbinkertn@umich.edu else: 4294479Sbinkertn@umich.edu p[0] = [p[1]] 4304479Sbinkertn@umich.edu 4314479Sbinkertn@umich.edu# small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | 4324479Sbinkertn@umich.edu# import_stmt | global_stmt | exec_stmt | assert_stmt 4334479Sbinkertn@umich.edudef p_small_stmt(p): 4344479Sbinkertn@umich.edu """small_stmt : flow_stmt 4354479Sbinkertn@umich.edu | expr_stmt""" 4364479Sbinkertn@umich.edu p[0] = p[1] 4374479Sbinkertn@umich.edu 4384479Sbinkertn@umich.edu# expr_stmt: testlist (augassign (yield_expr|testlist) | 4394479Sbinkertn@umich.edu# ('=' (yield_expr|testlist))*) 4404479Sbinkertn@umich.edu# augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | 4414479Sbinkertn@umich.edu# '<<=' | '>>=' | '**=' | '//=') 4424479Sbinkertn@umich.edudef p_expr_stmt(p): 4434479Sbinkertn@umich.edu """expr_stmt : testlist ASSIGN testlist 4444479Sbinkertn@umich.edu | testlist """ 4454479Sbinkertn@umich.edu if len(p) == 2: 4464479Sbinkertn@umich.edu # a list of expressions 4474479Sbinkertn@umich.edu p[0] = ast.Discard(p[1]) 4484479Sbinkertn@umich.edu else: 4494479Sbinkertn@umich.edu p[0] = Assign(p[1], p[3]) 4504479Sbinkertn@umich.edu 4514479Sbinkertn@umich.edudef p_flow_stmt(p): 4524479Sbinkertn@umich.edu "flow_stmt : return_stmt" 4534479Sbinkertn@umich.edu p[0] = p[1] 4544479Sbinkertn@umich.edu 4554479Sbinkertn@umich.edu# return_stmt: 'return' [testlist] 4564479Sbinkertn@umich.edudef p_return_stmt(p): 4574479Sbinkertn@umich.edu "return_stmt : RETURN testlist" 4584479Sbinkertn@umich.edu p[0] = ast.Return(p[2]) 4594479Sbinkertn@umich.edu 4604479Sbinkertn@umich.edu 4614479Sbinkertn@umich.edudef p_compound_stmt(p): 4624479Sbinkertn@umich.edu """compound_stmt : if_stmt 4634479Sbinkertn@umich.edu | funcdef""" 4644479Sbinkertn@umich.edu p[0] = p[1] 4654479Sbinkertn@umich.edu 4664479Sbinkertn@umich.edudef p_if_stmt(p): 4674479Sbinkertn@umich.edu 'if_stmt : IF test COLON suite' 4684479Sbinkertn@umich.edu p[0] = ast.If([(p[2], p[4])], None) 4694479Sbinkertn@umich.edu 4704479Sbinkertn@umich.edudef p_suite(p): 4714479Sbinkertn@umich.edu """suite : simple_stmt 4724479Sbinkertn@umich.edu | NEWLINE INDENT stmts DEDENT""" 4734479Sbinkertn@umich.edu if len(p) == 2: 4744479Sbinkertn@umich.edu p[0] = ast.Stmt(p[1]) 4754479Sbinkertn@umich.edu else: 4764479Sbinkertn@umich.edu p[0] = ast.Stmt(p[3]) 4774479Sbinkertn@umich.edu 4784479Sbinkertn@umich.edu 4794479Sbinkertn@umich.edudef p_stmts(p): 4804479Sbinkertn@umich.edu """stmts : stmts stmt 4814479Sbinkertn@umich.edu | stmt""" 4824479Sbinkertn@umich.edu if len(p) == 3: 4834479Sbinkertn@umich.edu p[0] = p[1] + p[2] 4844479Sbinkertn@umich.edu else: 4854479Sbinkertn@umich.edu p[0] = p[1] 4864479Sbinkertn@umich.edu 4874479Sbinkertn@umich.edu## No using Python's approach because Ply supports precedence 4884479Sbinkertn@umich.edu 4894479Sbinkertn@umich.edu# comparison: expr (comp_op expr)* 4904479Sbinkertn@umich.edu# arith_expr: term (('+'|'-') term)* 4914479Sbinkertn@umich.edu# term: factor (('*'|'/'|'%'|'//') factor)* 4924479Sbinkertn@umich.edu# factor: ('+'|'-'|'~') factor | power 4934479Sbinkertn@umich.edu# comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' 4944479Sbinkertn@umich.edu 4954479Sbinkertn@umich.edudef make_lt_compare((left, right)): 4964479Sbinkertn@umich.edu return ast.Compare(left, [('<', right),]) 4974479Sbinkertn@umich.edudef make_gt_compare((left, right)): 4984479Sbinkertn@umich.edu return ast.Compare(left, [('>', right),]) 4994479Sbinkertn@umich.edudef make_eq_compare((left, right)): 5004479Sbinkertn@umich.edu return ast.Compare(left, [('==', right),]) 5014479Sbinkertn@umich.edu 5024479Sbinkertn@umich.edu 5034479Sbinkertn@umich.edubinary_ops = { 5044479Sbinkertn@umich.edu "+": ast.Add, 5054479Sbinkertn@umich.edu "-": ast.Sub, 5064479Sbinkertn@umich.edu "*": ast.Mul, 5074479Sbinkertn@umich.edu "/": ast.Div, 5084479Sbinkertn@umich.edu "<": make_lt_compare, 5094479Sbinkertn@umich.edu ">": make_gt_compare, 5104479Sbinkertn@umich.edu "==": make_eq_compare, 5114479Sbinkertn@umich.edu} 5124479Sbinkertn@umich.eduunary_ops = { 5134479Sbinkertn@umich.edu "+": ast.UnaryAdd, 5144479Sbinkertn@umich.edu "-": ast.UnarySub, 5154479Sbinkertn@umich.edu } 5164479Sbinkertn@umich.eduprecedence = ( 5174479Sbinkertn@umich.edu ("left", "EQ", "GT", "LT"), 5184479Sbinkertn@umich.edu ("left", "PLUS", "MINUS"), 5194479Sbinkertn@umich.edu ("left", "MULT", "DIV"), 5204479Sbinkertn@umich.edu ) 5214479Sbinkertn@umich.edu 5224479Sbinkertn@umich.edudef p_comparison(p): 5234479Sbinkertn@umich.edu """comparison : comparison PLUS comparison 5244479Sbinkertn@umich.edu | comparison MINUS comparison 5254479Sbinkertn@umich.edu | comparison MULT comparison 5264479Sbinkertn@umich.edu | comparison DIV comparison 5274479Sbinkertn@umich.edu | comparison LT comparison 5284479Sbinkertn@umich.edu | comparison EQ comparison 5294479Sbinkertn@umich.edu | comparison GT comparison 5304479Sbinkertn@umich.edu | PLUS comparison 5314479Sbinkertn@umich.edu | MINUS comparison 5324479Sbinkertn@umich.edu | power""" 5334479Sbinkertn@umich.edu if len(p) == 4: 5344479Sbinkertn@umich.edu p[0] = binary_ops[p[2]]((p[1], p[3])) 5354479Sbinkertn@umich.edu elif len(p) == 3: 5364479Sbinkertn@umich.edu p[0] = unary_ops[p[1]](p[2]) 5374479Sbinkertn@umich.edu else: 5384479Sbinkertn@umich.edu p[0] = p[1] 5394479Sbinkertn@umich.edu 5404479Sbinkertn@umich.edu# power: atom trailer* ['**' factor] 5414479Sbinkertn@umich.edu# trailers enables function calls. I only allow one level of calls 5424479Sbinkertn@umich.edu# so this is 'trailer' 5434479Sbinkertn@umich.edudef p_power(p): 5444479Sbinkertn@umich.edu """power : atom 5454479Sbinkertn@umich.edu | atom trailer""" 5464479Sbinkertn@umich.edu if len(p) == 2: 5474479Sbinkertn@umich.edu p[0] = p[1] 5484479Sbinkertn@umich.edu else: 5494479Sbinkertn@umich.edu if p[2][0] == "CALL": 5504479Sbinkertn@umich.edu p[0] = ast.CallFunc(p[1], p[2][1], None, None) 5514479Sbinkertn@umich.edu else: 5524479Sbinkertn@umich.edu raise AssertionError("not implemented") 5534479Sbinkertn@umich.edu 5544479Sbinkertn@umich.edudef p_atom_name(p): 5554479Sbinkertn@umich.edu """atom : NAME""" 5564479Sbinkertn@umich.edu p[0] = ast.Name(p[1]) 5574479Sbinkertn@umich.edu 5584479Sbinkertn@umich.edudef p_atom_number(p): 5594479Sbinkertn@umich.edu """atom : NUMBER 5604479Sbinkertn@umich.edu | STRING""" 5614479Sbinkertn@umich.edu p[0] = ast.Const(p[1]) 5624479Sbinkertn@umich.edu 5634479Sbinkertn@umich.edudef p_atom_tuple(p): 5644479Sbinkertn@umich.edu """atom : LPAR testlist RPAR""" 5654479Sbinkertn@umich.edu p[0] = p[2] 5664479Sbinkertn@umich.edu 5674479Sbinkertn@umich.edu# trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME 5684479Sbinkertn@umich.edudef p_trailer(p): 5694479Sbinkertn@umich.edu "trailer : LPAR arglist RPAR" 5704479Sbinkertn@umich.edu p[0] = ("CALL", p[2]) 5714479Sbinkertn@umich.edu 5724479Sbinkertn@umich.edu# testlist: test (',' test)* [','] 5734479Sbinkertn@umich.edu# Contains shift/reduce error 5744479Sbinkertn@umich.edudef p_testlist(p): 5754479Sbinkertn@umich.edu """testlist : testlist_multi COMMA 5764479Sbinkertn@umich.edu | testlist_multi """ 5774479Sbinkertn@umich.edu if len(p) == 2: 5784479Sbinkertn@umich.edu p[0] = p[1] 5794479Sbinkertn@umich.edu else: 5804479Sbinkertn@umich.edu # May need to promote singleton to tuple 5814479Sbinkertn@umich.edu if isinstance(p[1], list): 5824479Sbinkertn@umich.edu p[0] = p[1] 5834479Sbinkertn@umich.edu else: 5844479Sbinkertn@umich.edu p[0] = [p[1]] 5854479Sbinkertn@umich.edu # Convert into a tuple? 5864479Sbinkertn@umich.edu if isinstance(p[0], list): 5874479Sbinkertn@umich.edu p[0] = ast.Tuple(p[0]) 5884479Sbinkertn@umich.edu 5894479Sbinkertn@umich.edudef p_testlist_multi(p): 5904479Sbinkertn@umich.edu """testlist_multi : testlist_multi COMMA test 5914479Sbinkertn@umich.edu | test""" 5924479Sbinkertn@umich.edu if len(p) == 2: 5934479Sbinkertn@umich.edu # singleton 5944479Sbinkertn@umich.edu p[0] = p[1] 5954479Sbinkertn@umich.edu else: 5964479Sbinkertn@umich.edu if isinstance(p[1], list): 5974479Sbinkertn@umich.edu p[0] = p[1] + [p[3]] 5984479Sbinkertn@umich.edu else: 5994479Sbinkertn@umich.edu # singleton -> tuple 6004479Sbinkertn@umich.edu p[0] = [p[1], p[3]] 6014479Sbinkertn@umich.edu 6024479Sbinkertn@umich.edu 6034479Sbinkertn@umich.edu# test: or_test ['if' or_test 'else' test] | lambdef 6044479Sbinkertn@umich.edu# as I don't support 'and', 'or', and 'not' this works down to 'comparison' 6054479Sbinkertn@umich.edudef p_test(p): 6064479Sbinkertn@umich.edu "test : comparison" 6074479Sbinkertn@umich.edu p[0] = p[1] 6084479Sbinkertn@umich.edu 6094479Sbinkertn@umich.edu 6104479Sbinkertn@umich.edu 6114479Sbinkertn@umich.edu# arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test) 6124479Sbinkertn@umich.edu# XXX INCOMPLETE: this doesn't allow the trailing comma 6134479Sbinkertn@umich.edudef p_arglist(p): 6144479Sbinkertn@umich.edu """arglist : arglist COMMA argument 6154479Sbinkertn@umich.edu | argument""" 6164479Sbinkertn@umich.edu if len(p) == 4: 6174479Sbinkertn@umich.edu p[0] = p[1] + [p[3]] 6184479Sbinkertn@umich.edu else: 6194479Sbinkertn@umich.edu p[0] = [p[1]] 6204479Sbinkertn@umich.edu 6214479Sbinkertn@umich.edu# argument: test [gen_for] | test '=' test # Really [keyword '='] test 6224479Sbinkertn@umich.edudef p_argument(p): 6234479Sbinkertn@umich.edu "argument : test" 6244479Sbinkertn@umich.edu p[0] = p[1] 6254479Sbinkertn@umich.edu 6264479Sbinkertn@umich.edudef p_error(p): 6274479Sbinkertn@umich.edu #print "Error!", repr(p) 6284479Sbinkertn@umich.edu raise SyntaxError(p) 6294479Sbinkertn@umich.edu 6304479Sbinkertn@umich.edu 6314479Sbinkertn@umich.educlass GardenSnakeParser(object): 6324479Sbinkertn@umich.edu def __init__(self, lexer = None): 6334479Sbinkertn@umich.edu if lexer is None: 6344479Sbinkertn@umich.edu lexer = IndentLexer() 6354479Sbinkertn@umich.edu self.lexer = lexer 6364479Sbinkertn@umich.edu self.parser = yacc.yacc(start="file_input_end") 6374479Sbinkertn@umich.edu 6384479Sbinkertn@umich.edu def parse(self, code): 6394479Sbinkertn@umich.edu self.lexer.input(code) 6404479Sbinkertn@umich.edu result = self.parser.parse(lexer = self.lexer) 6414479Sbinkertn@umich.edu return ast.Module(None, result) 6424479Sbinkertn@umich.edu 6434479Sbinkertn@umich.edu 6444479Sbinkertn@umich.edu###### Code generation ###### 6454479Sbinkertn@umich.edu 6464479Sbinkertn@umich.edufrom compiler import misc, syntax, pycodegen 6474479Sbinkertn@umich.edu 6484479Sbinkertn@umich.educlass GardenSnakeCompiler(object): 6494479Sbinkertn@umich.edu def __init__(self): 6504479Sbinkertn@umich.edu self.parser = GardenSnakeParser() 6514479Sbinkertn@umich.edu def compile(self, code, filename="<string>"): 6524479Sbinkertn@umich.edu tree = self.parser.parse(code) 6534479Sbinkertn@umich.edu #print tree 6544479Sbinkertn@umich.edu misc.set_filename(filename, tree) 6554479Sbinkertn@umich.edu syntax.check(tree) 6564479Sbinkertn@umich.edu gen = pycodegen.ModuleCodeGenerator(tree) 6574479Sbinkertn@umich.edu code = gen.getCode() 6584479Sbinkertn@umich.edu return code 6594479Sbinkertn@umich.edu 6604479Sbinkertn@umich.edu####### Test code ####### 6614479Sbinkertn@umich.edu 6624479Sbinkertn@umich.educompile = GardenSnakeCompiler().compile 6634479Sbinkertn@umich.edu 6644479Sbinkertn@umich.educode = r""" 6654479Sbinkertn@umich.edu 6664479Sbinkertn@umich.eduprint('LET\'S TRY THIS \\OUT') 6674479Sbinkertn@umich.edu 6684479Sbinkertn@umich.edu#Comment here 6694479Sbinkertn@umich.edudef x(a): 6704479Sbinkertn@umich.edu print('called with',a) 6714479Sbinkertn@umich.edu if a == 1: 6724479Sbinkertn@umich.edu return 2 6734479Sbinkertn@umich.edu if a*2 > 10: return 999 / 4 6744479Sbinkertn@umich.edu # Another comment here 6754479Sbinkertn@umich.edu 6764479Sbinkertn@umich.edu return a+2*3 6774479Sbinkertn@umich.edu 6784479Sbinkertn@umich.eduints = (1, 2, 6794479Sbinkertn@umich.edu 3, 4, 6804479Sbinkertn@umich.edu5) 6814479Sbinkertn@umich.eduprint('mutiline-expression', ints) 6824479Sbinkertn@umich.edu 6834479Sbinkertn@umich.edut = 4+1/3*2+6*(9-5+1) 6844479Sbinkertn@umich.eduprint('predence test; should be 34+2/3:', t, t==(34+2/3)) 6854479Sbinkertn@umich.edu 6864479Sbinkertn@umich.eduprint('numbers', 1,2,3,4,5) 6874479Sbinkertn@umich.eduif 1: 6884479Sbinkertn@umich.edu 8 6894479Sbinkertn@umich.edu a=9 6904479Sbinkertn@umich.edu print(x(a)) 6914479Sbinkertn@umich.edu 6924479Sbinkertn@umich.eduprint(x(1)) 6934479Sbinkertn@umich.eduprint(x(2)) 6944479Sbinkertn@umich.eduprint(x(8),'3') 6954479Sbinkertn@umich.eduprint('this is decimal', 1/5) 6964479Sbinkertn@umich.eduprint('BIG DECIMAL', 1.234567891234567e12345) 6974479Sbinkertn@umich.edu 6984479Sbinkertn@umich.edu""" 6994479Sbinkertn@umich.edu 7004479Sbinkertn@umich.edu# Set up the GardenSnake run-time environment 7014479Sbinkertn@umich.edudef print_(*args): 7024479Sbinkertn@umich.edu print "-->", " ".join(map(str,args)) 7034479Sbinkertn@umich.edu 7044479Sbinkertn@umich.eduglobals()["print"] = print_ 7054479Sbinkertn@umich.edu 7064479Sbinkertn@umich.educompiled_code = compile(code) 7074479Sbinkertn@umich.edu 7084479Sbinkertn@umich.eduexec compiled_code in globals() 7094479Sbinkertn@umich.eduprint "Done" 710