14479Sbinkertn@umich.edu# GardenSnake - a parser generator demonstration program
24479Sbinkertn@umich.edu#
34479Sbinkertn@umich.edu# This implements a modified version of a subset of Python:
44479Sbinkertn@umich.edu#  - only 'def', 'return' and 'if' statements
54479Sbinkertn@umich.edu#  - 'if' only has 'then' clause (no elif nor else)
64479Sbinkertn@umich.edu#  - single-quoted strings only, content in raw format
74479Sbinkertn@umich.edu#  - numbers are decimal.Decimal instances (not integers or floats)
84479Sbinkertn@umich.edu#  - no print statment; use the built-in 'print' function
94479Sbinkertn@umich.edu#  - only < > == + - / * implemented (and unary + -)
104479Sbinkertn@umich.edu#  - assignment and tuple assignment work
114479Sbinkertn@umich.edu#  - no generators of any sort
124479Sbinkertn@umich.edu#  - no ... well, no quite a lot
134479Sbinkertn@umich.edu
144479Sbinkertn@umich.edu# Why?  I'm thinking about a new indentation-based configuration
154479Sbinkertn@umich.edu# language for a project and wanted to figure out how to do it.  Once
164479Sbinkertn@umich.edu# I got that working I needed a way to test it out.  My original AST
174479Sbinkertn@umich.edu# was dumb so I decided to target Python's AST and compile it into
184479Sbinkertn@umich.edu# Python code.  Plus, it's pretty cool that it only took a day or so
194479Sbinkertn@umich.edu# from sitting down with Ply to having working code.
204479Sbinkertn@umich.edu
214479Sbinkertn@umich.edu# This uses David Beazley's Ply from http://www.dabeaz.com/ply/
224479Sbinkertn@umich.edu
234479Sbinkertn@umich.edu# This work is hereby released into the Public Domain. To view a copy of
244479Sbinkertn@umich.edu# the public domain dedication, visit
254479Sbinkertn@umich.edu# http://creativecommons.org/licenses/publicdomain/ or send a letter to
264479Sbinkertn@umich.edu# Creative Commons, 543 Howard Street, 5th Floor, San Francisco,
274479Sbinkertn@umich.edu# California, 94105, USA.
284479Sbinkertn@umich.edu#
294479Sbinkertn@umich.edu# Portions of this work are derived from Python's Grammar definition
304479Sbinkertn@umich.edu# and may be covered under the Python copyright and license
314479Sbinkertn@umich.edu#
324479Sbinkertn@umich.edu#          Andrew Dalke / Dalke Scientific Software, LLC
334479Sbinkertn@umich.edu#             30 August 2006 / Cape Town, South Africa
344479Sbinkertn@umich.edu
354479Sbinkertn@umich.edu# Changelog:
364479Sbinkertn@umich.edu#  30 August - added link to CC license; removed the "swapcase" encoding
374479Sbinkertn@umich.edu
384479Sbinkertn@umich.edu# Modifications for inclusion in PLY distribution
394479Sbinkertn@umich.eduimport sys
404479Sbinkertn@umich.edusys.path.insert(0,"../..")
414479Sbinkertn@umich.edufrom ply import *
424479Sbinkertn@umich.edu
434479Sbinkertn@umich.edu##### Lexer ######
444479Sbinkertn@umich.edu#import lex
454479Sbinkertn@umich.eduimport decimal
464479Sbinkertn@umich.edu
474479Sbinkertn@umich.edutokens = (
484479Sbinkertn@umich.edu    'DEF',
494479Sbinkertn@umich.edu    'IF',
504479Sbinkertn@umich.edu    'NAME',
514479Sbinkertn@umich.edu    'NUMBER',  # Python decimals
524479Sbinkertn@umich.edu    'STRING',  # single quoted strings only; syntax of raw strings
534479Sbinkertn@umich.edu    'LPAR',
544479Sbinkertn@umich.edu    'RPAR',
554479Sbinkertn@umich.edu    'COLON',
564479Sbinkertn@umich.edu    'EQ',
574479Sbinkertn@umich.edu    'ASSIGN',
584479Sbinkertn@umich.edu    'LT',
594479Sbinkertn@umich.edu    'GT',
604479Sbinkertn@umich.edu    'PLUS',
614479Sbinkertn@umich.edu    'MINUS',
624479Sbinkertn@umich.edu    'MULT',
634479Sbinkertn@umich.edu    'DIV',
644479Sbinkertn@umich.edu    'RETURN',
654479Sbinkertn@umich.edu    'WS',
664479Sbinkertn@umich.edu    'NEWLINE',
674479Sbinkertn@umich.edu    'COMMA',
684479Sbinkertn@umich.edu    'SEMICOLON',
694479Sbinkertn@umich.edu    'INDENT',
704479Sbinkertn@umich.edu    'DEDENT',
714479Sbinkertn@umich.edu    'ENDMARKER',
724479Sbinkertn@umich.edu    )
734479Sbinkertn@umich.edu
744479Sbinkertn@umich.edu#t_NUMBER = r'\d+'
754479Sbinkertn@umich.edu# taken from decmial.py but without the leading sign
764479Sbinkertn@umich.edudef t_NUMBER(t):
774479Sbinkertn@umich.edu    r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?"""
784479Sbinkertn@umich.edu    t.value = decimal.Decimal(t.value)
794479Sbinkertn@umich.edu    return t
804479Sbinkertn@umich.edu
814479Sbinkertn@umich.edudef t_STRING(t):
824479Sbinkertn@umich.edu    r"'([^\\']+|\\'|\\\\)*'"  # I think this is right ...
834479Sbinkertn@umich.edu    t.value=t.value[1:-1].decode("string-escape") # .swapcase() # for fun
844479Sbinkertn@umich.edu    return t
854479Sbinkertn@umich.edu
864479Sbinkertn@umich.edut_COLON = r':'
874479Sbinkertn@umich.edut_EQ = r'=='
884479Sbinkertn@umich.edut_ASSIGN = r'='
894479Sbinkertn@umich.edut_LT = r'<'
904479Sbinkertn@umich.edut_GT = r'>'
914479Sbinkertn@umich.edut_PLUS = r'\+'
924479Sbinkertn@umich.edut_MINUS = r'-'
934479Sbinkertn@umich.edut_MULT = r'\*'
944479Sbinkertn@umich.edut_DIV = r'/'
954479Sbinkertn@umich.edut_COMMA = r','
964479Sbinkertn@umich.edut_SEMICOLON = r';'
974479Sbinkertn@umich.edu
984479Sbinkertn@umich.edu# Ply nicely documented how to do this.
994479Sbinkertn@umich.edu
1004479Sbinkertn@umich.eduRESERVED = {
1014479Sbinkertn@umich.edu  "def": "DEF",
1024479Sbinkertn@umich.edu  "if": "IF",
1034479Sbinkertn@umich.edu  "return": "RETURN",
1044479Sbinkertn@umich.edu  }
1054479Sbinkertn@umich.edu
1064479Sbinkertn@umich.edudef t_NAME(t):
1074479Sbinkertn@umich.edu    r'[a-zA-Z_][a-zA-Z0-9_]*'
1084479Sbinkertn@umich.edu    t.type = RESERVED.get(t.value, "NAME")
1094479Sbinkertn@umich.edu    return t
1104479Sbinkertn@umich.edu
1114479Sbinkertn@umich.edu# Putting this before t_WS let it consume lines with only comments in
1124479Sbinkertn@umich.edu# them so the latter code never sees the WS part.  Not consuming the
1134479Sbinkertn@umich.edu# newline.  Needed for "if 1: #comment"
1144479Sbinkertn@umich.edudef t_comment(t):
1154479Sbinkertn@umich.edu    r"[ ]*\043[^\n]*"  # \043 is '#'
1164479Sbinkertn@umich.edu    pass
1174479Sbinkertn@umich.edu
1184479Sbinkertn@umich.edu
1194479Sbinkertn@umich.edu# Whitespace
1204479Sbinkertn@umich.edudef t_WS(t):
1214479Sbinkertn@umich.edu    r' [ ]+ '
1224479Sbinkertn@umich.edu    if t.lexer.at_line_start and t.lexer.paren_count == 0:
1234479Sbinkertn@umich.edu        return t
1244479Sbinkertn@umich.edu
1254479Sbinkertn@umich.edu# Don't generate newline tokens when inside of parenthesis, eg
1264479Sbinkertn@umich.edu#   a = (1,
1274479Sbinkertn@umich.edu#        2, 3)
1284479Sbinkertn@umich.edudef t_newline(t):
1294479Sbinkertn@umich.edu    r'\n+'
1304479Sbinkertn@umich.edu    t.lexer.lineno += len(t.value)
1314479Sbinkertn@umich.edu    t.type = "NEWLINE"
1324479Sbinkertn@umich.edu    if t.lexer.paren_count == 0:
1334479Sbinkertn@umich.edu        return t
1344479Sbinkertn@umich.edu
1354479Sbinkertn@umich.edudef t_LPAR(t):
1364479Sbinkertn@umich.edu    r'\('
1374479Sbinkertn@umich.edu    t.lexer.paren_count += 1
1384479Sbinkertn@umich.edu    return t
1394479Sbinkertn@umich.edu
1404479Sbinkertn@umich.edudef t_RPAR(t):
1414479Sbinkertn@umich.edu    r'\)'
1424479Sbinkertn@umich.edu    # check for underflow?  should be the job of the parser
1434479Sbinkertn@umich.edu    t.lexer.paren_count -= 1
1444479Sbinkertn@umich.edu    return t
1454479Sbinkertn@umich.edu
1464479Sbinkertn@umich.edu
1474479Sbinkertn@umich.edudef t_error(t):
1484479Sbinkertn@umich.edu    raise SyntaxError("Unknown symbol %r" % (t.value[0],))
1494479Sbinkertn@umich.edu    print "Skipping", repr(t.value[0])
1504479Sbinkertn@umich.edu    t.lexer.skip(1)
1514479Sbinkertn@umich.edu
1524479Sbinkertn@umich.edu## I implemented INDENT / DEDENT generation as a post-processing filter
1534479Sbinkertn@umich.edu
1544479Sbinkertn@umich.edu# The original lex token stream contains WS and NEWLINE characters.
1554479Sbinkertn@umich.edu# WS will only occur before any other tokens on a line.
1564479Sbinkertn@umich.edu
1574479Sbinkertn@umich.edu# I have three filters.  One tags tokens by adding two attributes.
1584479Sbinkertn@umich.edu# "must_indent" is True if the token must be indented from the
1594479Sbinkertn@umich.edu# previous code.  The other is "at_line_start" which is True for WS
1604479Sbinkertn@umich.edu# and the first non-WS/non-NEWLINE on a line.  It flags the check so
1614479Sbinkertn@umich.edu# see if the new line has changed indication level.
1624479Sbinkertn@umich.edu
1634479Sbinkertn@umich.edu# Python's syntax has three INDENT states
1644479Sbinkertn@umich.edu#  0) no colon hence no need to indent
1654479Sbinkertn@umich.edu#  1) "if 1: go()" - simple statements have a COLON but no need for an indent
1664479Sbinkertn@umich.edu#  2) "if 1:\n  go()" - complex statements have a COLON NEWLINE and must indent
1674479Sbinkertn@umich.eduNO_INDENT = 0
1684479Sbinkertn@umich.eduMAY_INDENT = 1
1694479Sbinkertn@umich.eduMUST_INDENT = 2
1704479Sbinkertn@umich.edu
1714479Sbinkertn@umich.edu# only care about whitespace at the start of a line
1724479Sbinkertn@umich.edudef track_tokens_filter(lexer, tokens):
1734479Sbinkertn@umich.edu    lexer.at_line_start = at_line_start = True
1744479Sbinkertn@umich.edu    indent = NO_INDENT
1754479Sbinkertn@umich.edu    saw_colon = False
1764479Sbinkertn@umich.edu    for token in tokens:
1774479Sbinkertn@umich.edu        token.at_line_start = at_line_start
1784479Sbinkertn@umich.edu
1794479Sbinkertn@umich.edu        if token.type == "COLON":
1804479Sbinkertn@umich.edu            at_line_start = False
1814479Sbinkertn@umich.edu            indent = MAY_INDENT
1824479Sbinkertn@umich.edu            token.must_indent = False
1836498Snate@binkert.org
1844479Sbinkertn@umich.edu        elif token.type == "NEWLINE":
1854479Sbinkertn@umich.edu            at_line_start = True
1864479Sbinkertn@umich.edu            if indent == MAY_INDENT:
1874479Sbinkertn@umich.edu                indent = MUST_INDENT
1884479Sbinkertn@umich.edu            token.must_indent = False
1894479Sbinkertn@umich.edu
1904479Sbinkertn@umich.edu        elif token.type == "WS":
1914479Sbinkertn@umich.edu            assert token.at_line_start == True
1924479Sbinkertn@umich.edu            at_line_start = True
1934479Sbinkertn@umich.edu            token.must_indent = False
1944479Sbinkertn@umich.edu
1954479Sbinkertn@umich.edu        else:
1964479Sbinkertn@umich.edu            # A real token; only indent after COLON NEWLINE
1974479Sbinkertn@umich.edu            if indent == MUST_INDENT:
1984479Sbinkertn@umich.edu                token.must_indent = True
1994479Sbinkertn@umich.edu            else:
2004479Sbinkertn@umich.edu                token.must_indent = False
2014479Sbinkertn@umich.edu            at_line_start = False
2024479Sbinkertn@umich.edu            indent = NO_INDENT
2034479Sbinkertn@umich.edu
2044479Sbinkertn@umich.edu        yield token
2054479Sbinkertn@umich.edu        lexer.at_line_start = at_line_start
2064479Sbinkertn@umich.edu
2074479Sbinkertn@umich.edudef _new_token(type, lineno):
2084479Sbinkertn@umich.edu    tok = lex.LexToken()
2094479Sbinkertn@umich.edu    tok.type = type
2104479Sbinkertn@umich.edu    tok.value = None
2114479Sbinkertn@umich.edu    tok.lineno = lineno
2124479Sbinkertn@umich.edu    return tok
2134479Sbinkertn@umich.edu
2144479Sbinkertn@umich.edu# Synthesize a DEDENT tag
2154479Sbinkertn@umich.edudef DEDENT(lineno):
2164479Sbinkertn@umich.edu    return _new_token("DEDENT", lineno)
2174479Sbinkertn@umich.edu
2184479Sbinkertn@umich.edu# Synthesize an INDENT tag
2194479Sbinkertn@umich.edudef INDENT(lineno):
2204479Sbinkertn@umich.edu    return _new_token("INDENT", lineno)
2214479Sbinkertn@umich.edu
2224479Sbinkertn@umich.edu
2234479Sbinkertn@umich.edu# Track the indentation level and emit the right INDENT / DEDENT events.
2244479Sbinkertn@umich.edudef indentation_filter(tokens):
2254479Sbinkertn@umich.edu    # A stack of indentation levels; will never pop item 0
2264479Sbinkertn@umich.edu    levels = [0]
2274479Sbinkertn@umich.edu    token = None
2284479Sbinkertn@umich.edu    depth = 0
2294479Sbinkertn@umich.edu    prev_was_ws = False
2304479Sbinkertn@umich.edu    for token in tokens:
2314479Sbinkertn@umich.edu##        if 1:
2324479Sbinkertn@umich.edu##            print "Process", token,
2334479Sbinkertn@umich.edu##            if token.at_line_start:
2344479Sbinkertn@umich.edu##                print "at_line_start",
2354479Sbinkertn@umich.edu##            if token.must_indent:
2364479Sbinkertn@umich.edu##                print "must_indent",
2374479Sbinkertn@umich.edu##            print
2386498Snate@binkert.org
2394479Sbinkertn@umich.edu        # WS only occurs at the start of the line
2404479Sbinkertn@umich.edu        # There may be WS followed by NEWLINE so
2414479Sbinkertn@umich.edu        # only track the depth here.  Don't indent/dedent
2424479Sbinkertn@umich.edu        # until there's something real.
2434479Sbinkertn@umich.edu        if token.type == "WS":
2444479Sbinkertn@umich.edu            assert depth == 0
2454479Sbinkertn@umich.edu            depth = len(token.value)
2464479Sbinkertn@umich.edu            prev_was_ws = True
2474479Sbinkertn@umich.edu            # WS tokens are never passed to the parser
2484479Sbinkertn@umich.edu            continue
2494479Sbinkertn@umich.edu
2504479Sbinkertn@umich.edu        if token.type == "NEWLINE":
2514479Sbinkertn@umich.edu            depth = 0
2524479Sbinkertn@umich.edu            if prev_was_ws or token.at_line_start:
2534479Sbinkertn@umich.edu                # ignore blank lines
2544479Sbinkertn@umich.edu                continue
2554479Sbinkertn@umich.edu            # pass the other cases on through
2564479Sbinkertn@umich.edu            yield token
2574479Sbinkertn@umich.edu            continue
2584479Sbinkertn@umich.edu
2594479Sbinkertn@umich.edu        # then it must be a real token (not WS, not NEWLINE)
2604479Sbinkertn@umich.edu        # which can affect the indentation level
2614479Sbinkertn@umich.edu
2624479Sbinkertn@umich.edu        prev_was_ws = False
2634479Sbinkertn@umich.edu        if token.must_indent:
2644479Sbinkertn@umich.edu            # The current depth must be larger than the previous level
2654479Sbinkertn@umich.edu            if not (depth > levels[-1]):
2664479Sbinkertn@umich.edu                raise IndentationError("expected an indented block")
2674479Sbinkertn@umich.edu
2684479Sbinkertn@umich.edu            levels.append(depth)
2694479Sbinkertn@umich.edu            yield INDENT(token.lineno)
2704479Sbinkertn@umich.edu
2714479Sbinkertn@umich.edu        elif token.at_line_start:
2724479Sbinkertn@umich.edu            # Must be on the same level or one of the previous levels
2734479Sbinkertn@umich.edu            if depth == levels[-1]:
2744479Sbinkertn@umich.edu                # At the same level
2754479Sbinkertn@umich.edu                pass
2764479Sbinkertn@umich.edu            elif depth > levels[-1]:
2774479Sbinkertn@umich.edu                raise IndentationError("indentation increase but not in new block")
2784479Sbinkertn@umich.edu            else:
2794479Sbinkertn@umich.edu                # Back up; but only if it matches a previous level
2804479Sbinkertn@umich.edu                try:
2814479Sbinkertn@umich.edu                    i = levels.index(depth)
2824479Sbinkertn@umich.edu                except ValueError:
2834479Sbinkertn@umich.edu                    raise IndentationError("inconsistent indentation")
2844479Sbinkertn@umich.edu                for _ in range(i+1, len(levels)):
2854479Sbinkertn@umich.edu                    yield DEDENT(token.lineno)
2864479Sbinkertn@umich.edu                    levels.pop()
2874479Sbinkertn@umich.edu
2884479Sbinkertn@umich.edu        yield token
2894479Sbinkertn@umich.edu
2904479Sbinkertn@umich.edu    ### Finished processing ###
2914479Sbinkertn@umich.edu
2924479Sbinkertn@umich.edu    # Must dedent any remaining levels
2934479Sbinkertn@umich.edu    if len(levels) > 1:
2944479Sbinkertn@umich.edu        assert token is not None
2954479Sbinkertn@umich.edu        for _ in range(1, len(levels)):
2964479Sbinkertn@umich.edu            yield DEDENT(token.lineno)
2976498Snate@binkert.org
2984479Sbinkertn@umich.edu
2994479Sbinkertn@umich.edu# The top-level filter adds an ENDMARKER, if requested.
3004479Sbinkertn@umich.edu# Python's grammar uses it.
3014479Sbinkertn@umich.edudef filter(lexer, add_endmarker = True):
3024479Sbinkertn@umich.edu    token = None
3034479Sbinkertn@umich.edu    tokens = iter(lexer.token, None)
3044479Sbinkertn@umich.edu    tokens = track_tokens_filter(lexer, tokens)
3054479Sbinkertn@umich.edu    for token in indentation_filter(tokens):
3064479Sbinkertn@umich.edu        yield token
3074479Sbinkertn@umich.edu
3084479Sbinkertn@umich.edu    if add_endmarker:
3094479Sbinkertn@umich.edu        lineno = 1
3104479Sbinkertn@umich.edu        if token is not None:
3114479Sbinkertn@umich.edu            lineno = token.lineno
3124479Sbinkertn@umich.edu        yield _new_token("ENDMARKER", lineno)
3134479Sbinkertn@umich.edu
3144479Sbinkertn@umich.edu# Combine Ply and my filters into a new lexer
3154479Sbinkertn@umich.edu
3164479Sbinkertn@umich.educlass IndentLexer(object):
3174479Sbinkertn@umich.edu    def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0):
3184479Sbinkertn@umich.edu        self.lexer = lex.lex(debug=debug, optimize=optimize, lextab=lextab, reflags=reflags)
3194479Sbinkertn@umich.edu        self.token_stream = None
3204479Sbinkertn@umich.edu    def input(self, s, add_endmarker=True):
3214479Sbinkertn@umich.edu        self.lexer.paren_count = 0
3224479Sbinkertn@umich.edu        self.lexer.input(s)
3234479Sbinkertn@umich.edu        self.token_stream = filter(self.lexer, add_endmarker)
3244479Sbinkertn@umich.edu    def token(self):
3254479Sbinkertn@umich.edu        try:
3264479Sbinkertn@umich.edu            return self.token_stream.next()
3274479Sbinkertn@umich.edu        except StopIteration:
3284479Sbinkertn@umich.edu            return None
3294479Sbinkertn@umich.edu
3304479Sbinkertn@umich.edu##########   Parser (tokens -> AST) ######
3314479Sbinkertn@umich.edu
3324479Sbinkertn@umich.edu# also part of Ply
3334479Sbinkertn@umich.edu#import yacc
3344479Sbinkertn@umich.edu
3354479Sbinkertn@umich.edu# I use the Python AST
3364479Sbinkertn@umich.edufrom compiler import ast
3374479Sbinkertn@umich.edu
3384479Sbinkertn@umich.edu# Helper function
3394479Sbinkertn@umich.edudef Assign(left, right):
3404479Sbinkertn@umich.edu    names = []
3414479Sbinkertn@umich.edu    if isinstance(left, ast.Name):
3424479Sbinkertn@umich.edu        # Single assignment on left
3434479Sbinkertn@umich.edu        return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right)
3444479Sbinkertn@umich.edu    elif isinstance(left, ast.Tuple):
3454479Sbinkertn@umich.edu        # List of things - make sure they are Name nodes
3464479Sbinkertn@umich.edu        names = []
3474479Sbinkertn@umich.edu        for child in left.getChildren():
3484479Sbinkertn@umich.edu            if not isinstance(child, ast.Name):
3494479Sbinkertn@umich.edu                raise SyntaxError("that assignment not supported")
3504479Sbinkertn@umich.edu            names.append(child.name)
3514479Sbinkertn@umich.edu        ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names]
3524479Sbinkertn@umich.edu        return ast.Assign([ast.AssTuple(ass_list)], right)
3534479Sbinkertn@umich.edu    else:
3544479Sbinkertn@umich.edu        raise SyntaxError("Can't do that yet")
3554479Sbinkertn@umich.edu
3564479Sbinkertn@umich.edu
3574479Sbinkertn@umich.edu# The grammar comments come from Python's Grammar/Grammar file
3584479Sbinkertn@umich.edu
3594479Sbinkertn@umich.edu## NB: compound_stmt in single_input is followed by extra NEWLINE!
3604479Sbinkertn@umich.edu# file_input: (NEWLINE | stmt)* ENDMARKER
3614479Sbinkertn@umich.edudef p_file_input_end(p):
3624479Sbinkertn@umich.edu    """file_input_end : file_input ENDMARKER"""
3634479Sbinkertn@umich.edu    p[0] = ast.Stmt(p[1])
3644479Sbinkertn@umich.edudef p_file_input(p):
3654479Sbinkertn@umich.edu    """file_input : file_input NEWLINE
3664479Sbinkertn@umich.edu                  | file_input stmt
3674479Sbinkertn@umich.edu                  | NEWLINE
3684479Sbinkertn@umich.edu                  | stmt"""
3694479Sbinkertn@umich.edu    if isinstance(p[len(p)-1], basestring):
3704479Sbinkertn@umich.edu        if len(p) == 3:
3714479Sbinkertn@umich.edu            p[0] = p[1]
3724479Sbinkertn@umich.edu        else:
3734479Sbinkertn@umich.edu            p[0] = [] # p == 2 --> only a blank line
3744479Sbinkertn@umich.edu    else:
3754479Sbinkertn@umich.edu        if len(p) == 3:
3764479Sbinkertn@umich.edu            p[0] = p[1] + p[2]
3774479Sbinkertn@umich.edu        else:
3784479Sbinkertn@umich.edu            p[0] = p[1]
3796498Snate@binkert.org
3804479Sbinkertn@umich.edu
3814479Sbinkertn@umich.edu# funcdef: [decorators] 'def' NAME parameters ':' suite
3824479Sbinkertn@umich.edu# ignoring decorators
3834479Sbinkertn@umich.edudef p_funcdef(p):
3844479Sbinkertn@umich.edu    "funcdef : DEF NAME parameters COLON suite"
3854479Sbinkertn@umich.edu    p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5])
3866498Snate@binkert.org
3874479Sbinkertn@umich.edu# parameters: '(' [varargslist] ')'
3884479Sbinkertn@umich.edudef p_parameters(p):
3894479Sbinkertn@umich.edu    """parameters : LPAR RPAR
3904479Sbinkertn@umich.edu                  | LPAR varargslist RPAR"""
3914479Sbinkertn@umich.edu    if len(p) == 3:
3924479Sbinkertn@umich.edu        p[0] = []
3934479Sbinkertn@umich.edu    else:
3944479Sbinkertn@umich.edu        p[0] = p[2]
3956498Snate@binkert.org
3964479Sbinkertn@umich.edu
3976498Snate@binkert.org# varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) |
3984479Sbinkertn@umich.edu# highly simplified
3994479Sbinkertn@umich.edudef p_varargslist(p):
4004479Sbinkertn@umich.edu    """varargslist : varargslist COMMA NAME
4014479Sbinkertn@umich.edu                   | NAME"""
4024479Sbinkertn@umich.edu    if len(p) == 4:
4034479Sbinkertn@umich.edu        p[0] = p[1] + p[3]
4044479Sbinkertn@umich.edu    else:
4054479Sbinkertn@umich.edu        p[0] = [p[1]]
4064479Sbinkertn@umich.edu
4074479Sbinkertn@umich.edu# stmt: simple_stmt | compound_stmt
4084479Sbinkertn@umich.edudef p_stmt_simple(p):
4094479Sbinkertn@umich.edu    """stmt : simple_stmt"""
4104479Sbinkertn@umich.edu    # simple_stmt is a list
4114479Sbinkertn@umich.edu    p[0] = p[1]
4126498Snate@binkert.org
4134479Sbinkertn@umich.edudef p_stmt_compound(p):
4144479Sbinkertn@umich.edu    """stmt : compound_stmt"""
4154479Sbinkertn@umich.edu    p[0] = [p[1]]
4164479Sbinkertn@umich.edu
4174479Sbinkertn@umich.edu# simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
4184479Sbinkertn@umich.edudef p_simple_stmt(p):
4194479Sbinkertn@umich.edu    """simple_stmt : small_stmts NEWLINE
4204479Sbinkertn@umich.edu                   | small_stmts SEMICOLON NEWLINE"""
4214479Sbinkertn@umich.edu    p[0] = p[1]
4224479Sbinkertn@umich.edu
4234479Sbinkertn@umich.edudef p_small_stmts(p):
4244479Sbinkertn@umich.edu    """small_stmts : small_stmts SEMICOLON small_stmt
4254479Sbinkertn@umich.edu                   | small_stmt"""
4264479Sbinkertn@umich.edu    if len(p) == 4:
4274479Sbinkertn@umich.edu        p[0] = p[1] + [p[3]]
4284479Sbinkertn@umich.edu    else:
4294479Sbinkertn@umich.edu        p[0] = [p[1]]
4304479Sbinkertn@umich.edu
4314479Sbinkertn@umich.edu# small_stmt: expr_stmt | print_stmt  | del_stmt | pass_stmt | flow_stmt |
4324479Sbinkertn@umich.edu#    import_stmt | global_stmt | exec_stmt | assert_stmt
4334479Sbinkertn@umich.edudef p_small_stmt(p):
4344479Sbinkertn@umich.edu    """small_stmt : flow_stmt
4354479Sbinkertn@umich.edu                  | expr_stmt"""
4364479Sbinkertn@umich.edu    p[0] = p[1]
4374479Sbinkertn@umich.edu
4384479Sbinkertn@umich.edu# expr_stmt: testlist (augassign (yield_expr|testlist) |
4394479Sbinkertn@umich.edu#                      ('=' (yield_expr|testlist))*)
4404479Sbinkertn@umich.edu# augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
4414479Sbinkertn@umich.edu#             '<<=' | '>>=' | '**=' | '//=')
4424479Sbinkertn@umich.edudef p_expr_stmt(p):
4434479Sbinkertn@umich.edu    """expr_stmt : testlist ASSIGN testlist
4444479Sbinkertn@umich.edu                 | testlist """
4454479Sbinkertn@umich.edu    if len(p) == 2:
4464479Sbinkertn@umich.edu        # a list of expressions
4474479Sbinkertn@umich.edu        p[0] = ast.Discard(p[1])
4484479Sbinkertn@umich.edu    else:
4494479Sbinkertn@umich.edu        p[0] = Assign(p[1], p[3])
4504479Sbinkertn@umich.edu
4514479Sbinkertn@umich.edudef p_flow_stmt(p):
4524479Sbinkertn@umich.edu    "flow_stmt : return_stmt"
4534479Sbinkertn@umich.edu    p[0] = p[1]
4544479Sbinkertn@umich.edu
4554479Sbinkertn@umich.edu# return_stmt: 'return' [testlist]
4564479Sbinkertn@umich.edudef p_return_stmt(p):
4574479Sbinkertn@umich.edu    "return_stmt : RETURN testlist"
4584479Sbinkertn@umich.edu    p[0] = ast.Return(p[2])
4594479Sbinkertn@umich.edu
4604479Sbinkertn@umich.edu
4614479Sbinkertn@umich.edudef p_compound_stmt(p):
4624479Sbinkertn@umich.edu    """compound_stmt : if_stmt
4634479Sbinkertn@umich.edu                     | funcdef"""
4644479Sbinkertn@umich.edu    p[0] = p[1]
4654479Sbinkertn@umich.edu
4664479Sbinkertn@umich.edudef p_if_stmt(p):
4674479Sbinkertn@umich.edu    'if_stmt : IF test COLON suite'
4684479Sbinkertn@umich.edu    p[0] = ast.If([(p[2], p[4])], None)
4694479Sbinkertn@umich.edu
4704479Sbinkertn@umich.edudef p_suite(p):
4714479Sbinkertn@umich.edu    """suite : simple_stmt
4724479Sbinkertn@umich.edu             | NEWLINE INDENT stmts DEDENT"""
4734479Sbinkertn@umich.edu    if len(p) == 2:
4744479Sbinkertn@umich.edu        p[0] = ast.Stmt(p[1])
4754479Sbinkertn@umich.edu    else:
4764479Sbinkertn@umich.edu        p[0] = ast.Stmt(p[3])
4776498Snate@binkert.org
4784479Sbinkertn@umich.edu
4794479Sbinkertn@umich.edudef p_stmts(p):
4804479Sbinkertn@umich.edu    """stmts : stmts stmt
4814479Sbinkertn@umich.edu             | stmt"""
4824479Sbinkertn@umich.edu    if len(p) == 3:
4834479Sbinkertn@umich.edu        p[0] = p[1] + p[2]
4844479Sbinkertn@umich.edu    else:
4854479Sbinkertn@umich.edu        p[0] = p[1]
4864479Sbinkertn@umich.edu
4874479Sbinkertn@umich.edu## No using Python's approach because Ply supports precedence
4884479Sbinkertn@umich.edu
4894479Sbinkertn@umich.edu# comparison: expr (comp_op expr)*
4904479Sbinkertn@umich.edu# arith_expr: term (('+'|'-') term)*
4914479Sbinkertn@umich.edu# term: factor (('*'|'/'|'%'|'//') factor)*
4924479Sbinkertn@umich.edu# factor: ('+'|'-'|'~') factor | power
4934479Sbinkertn@umich.edu# comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
4944479Sbinkertn@umich.edu
4954479Sbinkertn@umich.edudef make_lt_compare((left, right)):
4964479Sbinkertn@umich.edu    return ast.Compare(left, [('<', right),])
4974479Sbinkertn@umich.edudef make_gt_compare((left, right)):
4984479Sbinkertn@umich.edu    return ast.Compare(left, [('>', right),])
4994479Sbinkertn@umich.edudef make_eq_compare((left, right)):
5004479Sbinkertn@umich.edu    return ast.Compare(left, [('==', right),])
5014479Sbinkertn@umich.edu
5024479Sbinkertn@umich.edu
5034479Sbinkertn@umich.edubinary_ops = {
5044479Sbinkertn@umich.edu    "+": ast.Add,
5054479Sbinkertn@umich.edu    "-": ast.Sub,
5064479Sbinkertn@umich.edu    "*": ast.Mul,
5074479Sbinkertn@umich.edu    "/": ast.Div,
5084479Sbinkertn@umich.edu    "<": make_lt_compare,
5094479Sbinkertn@umich.edu    ">": make_gt_compare,
5104479Sbinkertn@umich.edu    "==": make_eq_compare,
5114479Sbinkertn@umich.edu}
5124479Sbinkertn@umich.eduunary_ops = {
5134479Sbinkertn@umich.edu    "+": ast.UnaryAdd,
5144479Sbinkertn@umich.edu    "-": ast.UnarySub,
5154479Sbinkertn@umich.edu    }
5164479Sbinkertn@umich.eduprecedence = (
5174479Sbinkertn@umich.edu    ("left", "EQ", "GT", "LT"),
5184479Sbinkertn@umich.edu    ("left", "PLUS", "MINUS"),
5194479Sbinkertn@umich.edu    ("left", "MULT", "DIV"),
5204479Sbinkertn@umich.edu    )
5214479Sbinkertn@umich.edu
5224479Sbinkertn@umich.edudef p_comparison(p):
5234479Sbinkertn@umich.edu    """comparison : comparison PLUS comparison
5244479Sbinkertn@umich.edu                  | comparison MINUS comparison
5254479Sbinkertn@umich.edu                  | comparison MULT comparison
5264479Sbinkertn@umich.edu                  | comparison DIV comparison
5274479Sbinkertn@umich.edu                  | comparison LT comparison
5284479Sbinkertn@umich.edu                  | comparison EQ comparison
5294479Sbinkertn@umich.edu                  | comparison GT comparison
5304479Sbinkertn@umich.edu                  | PLUS comparison
5314479Sbinkertn@umich.edu                  | MINUS comparison
5324479Sbinkertn@umich.edu                  | power"""
5334479Sbinkertn@umich.edu    if len(p) == 4:
5344479Sbinkertn@umich.edu        p[0] = binary_ops[p[2]]((p[1], p[3]))
5354479Sbinkertn@umich.edu    elif len(p) == 3:
5364479Sbinkertn@umich.edu        p[0] = unary_ops[p[1]](p[2])
5374479Sbinkertn@umich.edu    else:
5384479Sbinkertn@umich.edu        p[0] = p[1]
5396498Snate@binkert.org
5404479Sbinkertn@umich.edu# power: atom trailer* ['**' factor]
5414479Sbinkertn@umich.edu# trailers enables function calls.  I only allow one level of calls
5424479Sbinkertn@umich.edu# so this is 'trailer'
5434479Sbinkertn@umich.edudef p_power(p):
5444479Sbinkertn@umich.edu    """power : atom
5454479Sbinkertn@umich.edu             | atom trailer"""
5464479Sbinkertn@umich.edu    if len(p) == 2:
5474479Sbinkertn@umich.edu        p[0] = p[1]
5484479Sbinkertn@umich.edu    else:
5494479Sbinkertn@umich.edu        if p[2][0] == "CALL":
5504479Sbinkertn@umich.edu            p[0] = ast.CallFunc(p[1], p[2][1], None, None)
5514479Sbinkertn@umich.edu        else:
5524479Sbinkertn@umich.edu            raise AssertionError("not implemented")
5534479Sbinkertn@umich.edu
5544479Sbinkertn@umich.edudef p_atom_name(p):
5554479Sbinkertn@umich.edu    """atom : NAME"""
5564479Sbinkertn@umich.edu    p[0] = ast.Name(p[1])
5574479Sbinkertn@umich.edu
5584479Sbinkertn@umich.edudef p_atom_number(p):
5594479Sbinkertn@umich.edu    """atom : NUMBER
5604479Sbinkertn@umich.edu            | STRING"""
5614479Sbinkertn@umich.edu    p[0] = ast.Const(p[1])
5624479Sbinkertn@umich.edu
5634479Sbinkertn@umich.edudef p_atom_tuple(p):
5644479Sbinkertn@umich.edu    """atom : LPAR testlist RPAR"""
5654479Sbinkertn@umich.edu    p[0] = p[2]
5664479Sbinkertn@umich.edu
5674479Sbinkertn@umich.edu# trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
5684479Sbinkertn@umich.edudef p_trailer(p):
5694479Sbinkertn@umich.edu    "trailer : LPAR arglist RPAR"
5704479Sbinkertn@umich.edu    p[0] = ("CALL", p[2])
5714479Sbinkertn@umich.edu
5724479Sbinkertn@umich.edu# testlist: test (',' test)* [',']
5734479Sbinkertn@umich.edu# Contains shift/reduce error
5744479Sbinkertn@umich.edudef p_testlist(p):
5754479Sbinkertn@umich.edu    """testlist : testlist_multi COMMA
5764479Sbinkertn@umich.edu                | testlist_multi """
5774479Sbinkertn@umich.edu    if len(p) == 2:
5784479Sbinkertn@umich.edu        p[0] = p[1]
5794479Sbinkertn@umich.edu    else:
5804479Sbinkertn@umich.edu        # May need to promote singleton to tuple
5814479Sbinkertn@umich.edu        if isinstance(p[1], list):
5824479Sbinkertn@umich.edu            p[0] = p[1]
5834479Sbinkertn@umich.edu        else:
5844479Sbinkertn@umich.edu            p[0] = [p[1]]
5854479Sbinkertn@umich.edu    # Convert into a tuple?
5864479Sbinkertn@umich.edu    if isinstance(p[0], list):
5874479Sbinkertn@umich.edu        p[0] = ast.Tuple(p[0])
5884479Sbinkertn@umich.edu
5894479Sbinkertn@umich.edudef p_testlist_multi(p):
5904479Sbinkertn@umich.edu    """testlist_multi : testlist_multi COMMA test
5914479Sbinkertn@umich.edu                      | test"""
5924479Sbinkertn@umich.edu    if len(p) == 2:
5934479Sbinkertn@umich.edu        # singleton
5944479Sbinkertn@umich.edu        p[0] = p[1]
5954479Sbinkertn@umich.edu    else:
5964479Sbinkertn@umich.edu        if isinstance(p[1], list):
5974479Sbinkertn@umich.edu            p[0] = p[1] + [p[3]]
5984479Sbinkertn@umich.edu        else:
5994479Sbinkertn@umich.edu            # singleton -> tuple
6004479Sbinkertn@umich.edu            p[0] = [p[1], p[3]]
6014479Sbinkertn@umich.edu
6024479Sbinkertn@umich.edu
6034479Sbinkertn@umich.edu# test: or_test ['if' or_test 'else' test] | lambdef
6044479Sbinkertn@umich.edu#  as I don't support 'and', 'or', and 'not' this works down to 'comparison'
6054479Sbinkertn@umich.edudef p_test(p):
6064479Sbinkertn@umich.edu    "test : comparison"
6074479Sbinkertn@umich.edu    p[0] = p[1]
6086498Snate@binkert.org
6094479Sbinkertn@umich.edu
6104479Sbinkertn@umich.edu
6114479Sbinkertn@umich.edu# arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test)
6124479Sbinkertn@umich.edu# XXX INCOMPLETE: this doesn't allow the trailing comma
6134479Sbinkertn@umich.edudef p_arglist(p):
6144479Sbinkertn@umich.edu    """arglist : arglist COMMA argument
6154479Sbinkertn@umich.edu               | argument"""
6164479Sbinkertn@umich.edu    if len(p) == 4:
6174479Sbinkertn@umich.edu        p[0] = p[1] + [p[3]]
6184479Sbinkertn@umich.edu    else:
6194479Sbinkertn@umich.edu        p[0] = [p[1]]
6204479Sbinkertn@umich.edu
6214479Sbinkertn@umich.edu# argument: test [gen_for] | test '=' test  # Really [keyword '='] test
6224479Sbinkertn@umich.edudef p_argument(p):
6234479Sbinkertn@umich.edu    "argument : test"
6244479Sbinkertn@umich.edu    p[0] = p[1]
6254479Sbinkertn@umich.edu
6264479Sbinkertn@umich.edudef p_error(p):
6274479Sbinkertn@umich.edu    #print "Error!", repr(p)
6284479Sbinkertn@umich.edu    raise SyntaxError(p)
6294479Sbinkertn@umich.edu
6304479Sbinkertn@umich.edu
6314479Sbinkertn@umich.educlass GardenSnakeParser(object):
6324479Sbinkertn@umich.edu    def __init__(self, lexer = None):
6334479Sbinkertn@umich.edu        if lexer is None:
6344479Sbinkertn@umich.edu            lexer = IndentLexer()
6354479Sbinkertn@umich.edu        self.lexer = lexer
6364479Sbinkertn@umich.edu        self.parser = yacc.yacc(start="file_input_end")
6374479Sbinkertn@umich.edu
6384479Sbinkertn@umich.edu    def parse(self, code):
6394479Sbinkertn@umich.edu        self.lexer.input(code)
6404479Sbinkertn@umich.edu        result = self.parser.parse(lexer = self.lexer)
6414479Sbinkertn@umich.edu        return ast.Module(None, result)
6424479Sbinkertn@umich.edu
6434479Sbinkertn@umich.edu
6444479Sbinkertn@umich.edu###### Code generation ######
6456498Snate@binkert.org
6464479Sbinkertn@umich.edufrom compiler import misc, syntax, pycodegen
6474479Sbinkertn@umich.edu
6484479Sbinkertn@umich.educlass GardenSnakeCompiler(object):
6494479Sbinkertn@umich.edu    def __init__(self):
6504479Sbinkertn@umich.edu        self.parser = GardenSnakeParser()
6514479Sbinkertn@umich.edu    def compile(self, code, filename="<string>"):
6524479Sbinkertn@umich.edu        tree = self.parser.parse(code)
6534479Sbinkertn@umich.edu        #print  tree
6544479Sbinkertn@umich.edu        misc.set_filename(filename, tree)
6554479Sbinkertn@umich.edu        syntax.check(tree)
6564479Sbinkertn@umich.edu        gen = pycodegen.ModuleCodeGenerator(tree)
6574479Sbinkertn@umich.edu        code = gen.getCode()
6584479Sbinkertn@umich.edu        return code
6594479Sbinkertn@umich.edu
6604479Sbinkertn@umich.edu####### Test code #######
6616498Snate@binkert.org
6624479Sbinkertn@umich.educompile = GardenSnakeCompiler().compile
6634479Sbinkertn@umich.edu
6644479Sbinkertn@umich.educode = r"""
6654479Sbinkertn@umich.edu
6664479Sbinkertn@umich.eduprint('LET\'S TRY THIS \\OUT')
6676498Snate@binkert.org
6684479Sbinkertn@umich.edu#Comment here
6694479Sbinkertn@umich.edudef x(a):
6704479Sbinkertn@umich.edu    print('called with',a)
6714479Sbinkertn@umich.edu    if a == 1:
6724479Sbinkertn@umich.edu        return 2
6734479Sbinkertn@umich.edu    if a*2 > 10: return 999 / 4
6744479Sbinkertn@umich.edu        # Another comment here
6754479Sbinkertn@umich.edu
6764479Sbinkertn@umich.edu    return a+2*3
6774479Sbinkertn@umich.edu
6784479Sbinkertn@umich.eduints = (1, 2,
6794479Sbinkertn@umich.edu   3, 4,
6804479Sbinkertn@umich.edu5)
6814479Sbinkertn@umich.eduprint('mutiline-expression', ints)
6824479Sbinkertn@umich.edu
6834479Sbinkertn@umich.edut = 4+1/3*2+6*(9-5+1)
6844479Sbinkertn@umich.eduprint('predence test; should be 34+2/3:', t, t==(34+2/3))
6854479Sbinkertn@umich.edu
6864479Sbinkertn@umich.eduprint('numbers', 1,2,3,4,5)
6874479Sbinkertn@umich.eduif 1:
6884479Sbinkertn@umich.edu 8
6894479Sbinkertn@umich.edu a=9
6904479Sbinkertn@umich.edu print(x(a))
6914479Sbinkertn@umich.edu
6924479Sbinkertn@umich.eduprint(x(1))
6934479Sbinkertn@umich.eduprint(x(2))
6944479Sbinkertn@umich.eduprint(x(8),'3')
6954479Sbinkertn@umich.eduprint('this is decimal', 1/5)
6964479Sbinkertn@umich.eduprint('BIG DECIMAL', 1.234567891234567e12345)
6974479Sbinkertn@umich.edu
6984479Sbinkertn@umich.edu"""
6994479Sbinkertn@umich.edu
7004479Sbinkertn@umich.edu# Set up the GardenSnake run-time environment
7014479Sbinkertn@umich.edudef print_(*args):
7024479Sbinkertn@umich.edu    print "-->", " ".join(map(str,args))
7034479Sbinkertn@umich.edu
7044479Sbinkertn@umich.eduglobals()["print"] = print_
7054479Sbinkertn@umich.edu
7064479Sbinkertn@umich.educompiled_code = compile(code)
7074479Sbinkertn@umich.edu
7084479Sbinkertn@umich.eduexec compiled_code in globals()
7094479Sbinkertn@umich.eduprint "Done"
710