GardenSnake.py (4479:61d3ed46e373) | GardenSnake.py (6498:e21e9ab5fad0) |
---|---|
1# GardenSnake - a parser generator demonstration program 2# 3# This implements a modified version of a subset of Python: 4# - only 'def', 'return' and 'if' statements 5# - 'if' only has 'then' clause (no elif nor else) 6# - single-quoted strings only, content in raw format 7# - numbers are decimal.Decimal instances (not integers or floats) 8# - no print statment; use the built-in 'print' function --- 166 unchanged lines hidden (view full) --- 175 saw_colon = False 176 for token in tokens: 177 token.at_line_start = at_line_start 178 179 if token.type == "COLON": 180 at_line_start = False 181 indent = MAY_INDENT 182 token.must_indent = False | 1# GardenSnake - a parser generator demonstration program 2# 3# This implements a modified version of a subset of Python: 4# - only 'def', 'return' and 'if' statements 5# - 'if' only has 'then' clause (no elif nor else) 6# - single-quoted strings only, content in raw format 7# - numbers are decimal.Decimal instances (not integers or floats) 8# - no print statment; use the built-in 'print' function --- 166 unchanged lines hidden (view full) --- 175 saw_colon = False 176 for token in tokens: 177 token.at_line_start = at_line_start 178 179 if token.type == "COLON": 180 at_line_start = False 181 indent = MAY_INDENT 182 token.must_indent = False |
183 | 183 |
184 elif token.type == "NEWLINE": 185 at_line_start = True 186 if indent == MAY_INDENT: 187 indent = MUST_INDENT 188 token.must_indent = False 189 190 elif token.type == "WS": 191 assert token.at_line_start == True --- 38 unchanged lines hidden (view full) --- 230 for token in tokens: 231## if 1: 232## print "Process", token, 233## if token.at_line_start: 234## print "at_line_start", 235## if token.must_indent: 236## print "must_indent", 237## print | 184 elif token.type == "NEWLINE": 185 at_line_start = True 186 if indent == MAY_INDENT: 187 indent = MUST_INDENT 188 token.must_indent = False 189 190 elif token.type == "WS": 191 assert token.at_line_start == True --- 38 unchanged lines hidden (view full) --- 230 for token in tokens: 231## if 1: 232## print "Process", token, 233## if token.at_line_start: 234## print "at_line_start", 235## if token.must_indent: 236## print "must_indent", 237## print |
238 | 238 |
239 # WS only occurs at the start of the line 240 # There may be WS followed by NEWLINE so 241 # only track the depth here. Don't indent/dedent 242 # until there's something real. 243 if token.type == "WS": 244 assert depth == 0 245 depth = len(token.value) 246 prev_was_ws = True --- 42 unchanged lines hidden (view full) --- 289 290 ### Finished processing ### 291 292 # Must dedent any remaining levels 293 if len(levels) > 1: 294 assert token is not None 295 for _ in range(1, len(levels)): 296 yield DEDENT(token.lineno) | 239 # WS only occurs at the start of the line 240 # There may be WS followed by NEWLINE so 241 # only track the depth here. Don't indent/dedent 242 # until there's something real. 243 if token.type == "WS": 244 assert depth == 0 245 depth = len(token.value) 246 prev_was_ws = True --- 42 unchanged lines hidden (view full) --- 289 290 ### Finished processing ### 291 292 # Must dedent any remaining levels 293 if len(levels) > 1: 294 assert token is not None 295 for _ in range(1, len(levels)): 296 yield DEDENT(token.lineno) |
297 |
|
297 | 298 |
298 | |
299# The top-level filter adds an ENDMARKER, if requested. 300# Python's grammar uses it. 301def filter(lexer, add_endmarker = True): 302 token = None 303 tokens = iter(lexer.token, None) 304 tokens = track_tokens_filter(lexer, tokens) 305 for token in indentation_filter(tokens): 306 yield token --- 64 unchanged lines hidden (view full) --- 371 p[0] = p[1] 372 else: 373 p[0] = [] # p == 2 --> only a blank line 374 else: 375 if len(p) == 3: 376 p[0] = p[1] + p[2] 377 else: 378 p[0] = p[1] | 299# The top-level filter adds an ENDMARKER, if requested. 300# Python's grammar uses it. 301def filter(lexer, add_endmarker = True): 302 token = None 303 tokens = iter(lexer.token, None) 304 tokens = track_tokens_filter(lexer, tokens) 305 for token in indentation_filter(tokens): 306 yield token --- 64 unchanged lines hidden (view full) --- 371 p[0] = p[1] 372 else: 373 p[0] = [] # p == 2 --> only a blank line 374 else: 375 if len(p) == 3: 376 p[0] = p[1] + p[2] 377 else: 378 p[0] = p[1] |
379 |
|
379 | 380 |
380 | |
381# funcdef: [decorators] 'def' NAME parameters ':' suite 382# ignoring decorators 383def p_funcdef(p): 384 "funcdef : DEF NAME parameters COLON suite" 385 p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5]) | 381# funcdef: [decorators] 'def' NAME parameters ':' suite 382# ignoring decorators 383def p_funcdef(p): 384 "funcdef : DEF NAME parameters COLON suite" 385 p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5]) |
386 | 386 |
387# parameters: '(' [varargslist] ')' 388def p_parameters(p): 389 """parameters : LPAR RPAR 390 | LPAR varargslist RPAR""" 391 if len(p) == 3: 392 p[0] = [] 393 else: 394 p[0] = p[2] | 387# parameters: '(' [varargslist] ')' 388def p_parameters(p): 389 """parameters : LPAR RPAR 390 | LPAR varargslist RPAR""" 391 if len(p) == 3: 392 p[0] = [] 393 else: 394 p[0] = p[2] |
395 |
|
395 | 396 |
396 397# varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) | | 397# varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) | |
398# highly simplified 399def p_varargslist(p): 400 """varargslist : varargslist COMMA NAME 401 | NAME""" 402 if len(p) == 4: 403 p[0] = p[1] + p[3] 404 else: 405 p[0] = [p[1]] 406 407# stmt: simple_stmt | compound_stmt 408def p_stmt_simple(p): 409 """stmt : simple_stmt""" 410 # simple_stmt is a list 411 p[0] = p[1] | 398# highly simplified 399def p_varargslist(p): 400 """varargslist : varargslist COMMA NAME 401 | NAME""" 402 if len(p) == 4: 403 p[0] = p[1] + p[3] 404 else: 405 p[0] = [p[1]] 406 407# stmt: simple_stmt | compound_stmt 408def p_stmt_simple(p): 409 """stmt : simple_stmt""" 410 # simple_stmt is a list 411 p[0] = p[1] |
412 | 412 |
413def p_stmt_compound(p): 414 """stmt : compound_stmt""" 415 p[0] = [p[1]] 416 417# simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE 418def p_simple_stmt(p): 419 """simple_stmt : small_stmts NEWLINE 420 | small_stmts SEMICOLON NEWLINE""" --- 48 unchanged lines hidden (view full) --- 469 470def p_suite(p): 471 """suite : simple_stmt 472 | NEWLINE INDENT stmts DEDENT""" 473 if len(p) == 2: 474 p[0] = ast.Stmt(p[1]) 475 else: 476 p[0] = ast.Stmt(p[3]) | 413def p_stmt_compound(p): 414 """stmt : compound_stmt""" 415 p[0] = [p[1]] 416 417# simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE 418def p_simple_stmt(p): 419 """simple_stmt : small_stmts NEWLINE 420 | small_stmts SEMICOLON NEWLINE""" --- 48 unchanged lines hidden (view full) --- 469 470def p_suite(p): 471 """suite : simple_stmt 472 | NEWLINE INDENT stmts DEDENT""" 473 if len(p) == 2: 474 p[0] = ast.Stmt(p[1]) 475 else: 476 p[0] = ast.Stmt(p[3]) |
477 |
|
477 | 478 |
478 | |
479def p_stmts(p): 480 """stmts : stmts stmt 481 | stmt""" 482 if len(p) == 3: 483 p[0] = p[1] + p[2] 484 else: 485 p[0] = p[1] 486 --- 44 unchanged lines hidden (view full) --- 531 | MINUS comparison 532 | power""" 533 if len(p) == 4: 534 p[0] = binary_ops[p[2]]((p[1], p[3])) 535 elif len(p) == 3: 536 p[0] = unary_ops[p[1]](p[2]) 537 else: 538 p[0] = p[1] | 479def p_stmts(p): 480 """stmts : stmts stmt 481 | stmt""" 482 if len(p) == 3: 483 p[0] = p[1] + p[2] 484 else: 485 p[0] = p[1] 486 --- 44 unchanged lines hidden (view full) --- 531 | MINUS comparison 532 | power""" 533 if len(p) == 4: 534 p[0] = binary_ops[p[2]]((p[1], p[3])) 535 elif len(p) == 3: 536 p[0] = unary_ops[p[1]](p[2]) 537 else: 538 p[0] = p[1] |
539 | 539 |
540# power: atom trailer* ['**' factor] 541# trailers enables function calls. I only allow one level of calls 542# so this is 'trailer' 543def p_power(p): 544 """power : atom 545 | atom trailer""" 546 if len(p) == 2: 547 p[0] = p[1] --- 52 unchanged lines hidden (view full) --- 600 p[0] = [p[1], p[3]] 601 602 603# test: or_test ['if' or_test 'else' test] | lambdef 604# as I don't support 'and', 'or', and 'not' this works down to 'comparison' 605def p_test(p): 606 "test : comparison" 607 p[0] = p[1] | 540# power: atom trailer* ['**' factor] 541# trailers enables function calls. I only allow one level of calls 542# so this is 'trailer' 543def p_power(p): 544 """power : atom 545 | atom trailer""" 546 if len(p) == 2: 547 p[0] = p[1] --- 52 unchanged lines hidden (view full) --- 600 p[0] = [p[1], p[3]] 601 602 603# test: or_test ['if' or_test 'else' test] | lambdef 604# as I don't support 'and', 'or', and 'not' this works down to 'comparison' 605def p_test(p): 606 "test : comparison" 607 p[0] = p[1] |
608 |
|
608 609 | 609 610 |
610 | |
611# arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test) 612# XXX INCOMPLETE: this doesn't allow the trailing comma 613def p_arglist(p): 614 """arglist : arglist COMMA argument 615 | argument""" 616 if len(p) == 4: 617 p[0] = p[1] + [p[3]] 618 else: --- 18 unchanged lines hidden (view full) --- 637 638 def parse(self, code): 639 self.lexer.input(code) 640 result = self.parser.parse(lexer = self.lexer) 641 return ast.Module(None, result) 642 643 644###### Code generation ###### | 611# arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test) 612# XXX INCOMPLETE: this doesn't allow the trailing comma 613def p_arglist(p): 614 """arglist : arglist COMMA argument 615 | argument""" 616 if len(p) == 4: 617 p[0] = p[1] + [p[3]] 618 else: --- 18 unchanged lines hidden (view full) --- 637 638 def parse(self, code): 639 self.lexer.input(code) 640 result = self.parser.parse(lexer = self.lexer) 641 return ast.Module(None, result) 642 643 644###### Code generation ###### |
645 | 645 |
646from compiler import misc, syntax, pycodegen 647 648class GardenSnakeCompiler(object): 649 def __init__(self): 650 self.parser = GardenSnakeParser() 651 def compile(self, code, filename="<string>"): 652 tree = self.parser.parse(code) 653 #print tree 654 misc.set_filename(filename, tree) 655 syntax.check(tree) 656 gen = pycodegen.ModuleCodeGenerator(tree) 657 code = gen.getCode() 658 return code 659 660####### Test code ####### | 646from compiler import misc, syntax, pycodegen 647 648class GardenSnakeCompiler(object): 649 def __init__(self): 650 self.parser = GardenSnakeParser() 651 def compile(self, code, filename="<string>"): 652 tree = self.parser.parse(code) 653 #print tree 654 misc.set_filename(filename, tree) 655 syntax.check(tree) 656 gen = pycodegen.ModuleCodeGenerator(tree) 657 code = gen.getCode() 658 return code 659 660####### Test code ####### |
661 | 661 |
662compile = GardenSnakeCompiler().compile 663 664code = r""" 665 666print('LET\'S TRY THIS \\OUT') | 662compile = GardenSnakeCompiler().compile 663 664code = r""" 665 666print('LET\'S TRY THIS \\OUT') |
667 | 667 |
668#Comment here 669def x(a): 670 print('called with',a) 671 if a == 1: 672 return 2 673 if a*2 > 10: return 999 / 4 674 # Another comment here 675 --- 34 unchanged lines hidden --- | 668#Comment here 669def x(a): 670 print('called with',a) 671 if a == 1: 672 return 2 673 if a*2 > 10: return 999 / 4 674 # Another comment here 675 --- 34 unchanged lines hidden --- |