yacc.py revision 2632
12972Sgblack@eecs.umich.edu#----------------------------------------------------------------------------- 22972Sgblack@eecs.umich.edu# ply: yacc.py 32972Sgblack@eecs.umich.edu# 42972Sgblack@eecs.umich.edu# Author: David M. Beazley (beazley@cs.uchicago.edu) 52972Sgblack@eecs.umich.edu# Department of Computer Science 62972Sgblack@eecs.umich.edu# University of Chicago 72972Sgblack@eecs.umich.edu# Chicago, IL 60637 82972Sgblack@eecs.umich.edu# 92972Sgblack@eecs.umich.edu# Copyright (C) 2001, David M. Beazley 102972Sgblack@eecs.umich.edu# 112972Sgblack@eecs.umich.edu# $Header: /home/stever/bk/newmem2/ext/ply/yacc.py 1.3 03/06/06 14:59:28-00:00 stever@ $ 122972Sgblack@eecs.umich.edu# 132972Sgblack@eecs.umich.edu# This library is free software; you can redistribute it and/or 142972Sgblack@eecs.umich.edu# modify it under the terms of the GNU Lesser General Public 152972Sgblack@eecs.umich.edu# License as published by the Free Software Foundation; either 162972Sgblack@eecs.umich.edu# version 2.1 of the License, or (at your option) any later version. 172972Sgblack@eecs.umich.edu# 182972Sgblack@eecs.umich.edu# This library is distributed in the hope that it will be useful, 192972Sgblack@eecs.umich.edu# but WITHOUT ANY WARRANTY; without even the implied warranty of 202972Sgblack@eecs.umich.edu# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 212972Sgblack@eecs.umich.edu# Lesser General Public License for more details. 222972Sgblack@eecs.umich.edu# 232972Sgblack@eecs.umich.edu# You should have received a copy of the GNU Lesser General Public 242972Sgblack@eecs.umich.edu# License along with this library; if not, write to the Free Software 252972Sgblack@eecs.umich.edu# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 262972Sgblack@eecs.umich.edu# 272972Sgblack@eecs.umich.edu# See the file COPYING for a complete copy of the LGPL. 282972Sgblack@eecs.umich.edu# 292972Sgblack@eecs.umich.edu# 302972Sgblack@eecs.umich.edu# This implements an LR parser that is constructed from grammar rules defined 312972Sgblack@eecs.umich.edu# as Python functions. Roughly speaking, this module is a cross between 322972Sgblack@eecs.umich.edu# John Aycock's Spark system and the GNU bison utility. 332972Sgblack@eecs.umich.edu# 344040Ssaidi@eecs.umich.edu# Disclaimer: This is a work in progress. SLR parsing seems to work fairly 356215Snate@binkert.org# well and there is extensive error checking. LALR(1) is in progress. The 367720Sgblack@eecs.umich.edu# rest of this file is a bit of a mess. Please pardon the dust. 372972Sgblack@eecs.umich.edu# 382972Sgblack@eecs.umich.edu# The current implementation is only somewhat object-oriented. The 392972Sgblack@eecs.umich.edu# LR parser itself is defined in terms of an object (which allows multiple 402972Sgblack@eecs.umich.edu# parsers to co-exist). However, most of the variables used during table 417741Sgblack@eecs.umich.edu# construction are defined in terms of global variables. Users shouldn't 427741Sgblack@eecs.umich.edu# notice unless they are trying to define multiple parsers at the same 437720Sgblack@eecs.umich.edu# time using threads (in which case they should have their head examined). 447741Sgblack@eecs.umich.edu#----------------------------------------------------------------------------- 455251Sksewell@umich.edu 467741Sgblack@eecs.umich.edu__version__ = "1.3" 477741Sgblack@eecs.umich.edu 487741Sgblack@eecs.umich.edu#----------------------------------------------------------------------------- 497741Sgblack@eecs.umich.edu# === User configurable parameters === 507741Sgblack@eecs.umich.edu# 517741Sgblack@eecs.umich.edu# Change these to modify the default behavior of yacc (if you wish) 527741Sgblack@eecs.umich.edu#----------------------------------------------------------------------------- 532972Sgblack@eecs.umich.edu 542972Sgblack@eecs.umich.eduyaccdebug = 1 # Debugging mode. If set, yacc generates a 552972Sgblack@eecs.umich.edu # a 'parser.out' file in the current directory 56 57debug_file = 'parser.out' # Default name of the debugging file 58tab_module = 'parsetab' # Default name of the table module 59default_lr = 'SLR' # Default LR table generation method 60 61error_count = 3 # Number of symbols that must be shifted to leave recovery mode 62 63import re, types, sys, cStringIO, md5, os.path 64 65# Exception raised for yacc-related errors 66class YaccError(Exception): pass 67 68#----------------------------------------------------------------------------- 69# === LR Parsing Engine === 70# 71# The following classes are used for the LR parser itself. These are not 72# used during table construction and are independent of the actual LR 73# table generation algorithm 74#----------------------------------------------------------------------------- 75 76# This class is used to hold non-terminal grammar symbols during parsing. 77# It normally has the following attributes set: 78# .type = Grammar symbol type 79# .value = Symbol value 80# .lineno = Starting line number 81# .endlineno = Ending line number (optional, set automatically) 82 83class YaccSymbol: 84 def __str__(self): return self.type 85 def __repr__(self): return str(self) 86 87# This class is a wrapper around the objects actually passed to each 88# grammar rule. Index lookup and assignment actually assign the 89# .value attribute of the underlying YaccSymbol object. 90# The lineno() method returns the line number of a given 91# item (or 0 if not defined). The linespan() method returns 92# a tuple of (startline,endline) representing the range of lines 93# for a symbol. 94 95class YaccSlice: 96 def __init__(self,s): 97 self.slice = s 98 self.pbstack = [] 99 100 def __getitem__(self,n): 101 return self.slice[n].value 102 103 def __setitem__(self,n,v): 104 self.slice[n].value = v 105 106 def __len__(self): 107 return len(self.slice) 108 109 def lineno(self,n): 110 return getattr(self.slice[n],"lineno",0) 111 112 def linespan(self,n): 113 startline = getattr(self.slice[n],"lineno",0) 114 endline = getattr(self.slice[n],"endlineno",startline) 115 return startline,endline 116 117 def pushback(self,n): 118 if n <= 0: 119 raise ValueError, "Expected a positive value" 120 if n > (len(self.slice)-1): 121 raise ValueError, "Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1) 122 for i in range(0,n): 123 self.pbstack.append(self.slice[-i-1]) 124 125# The LR Parsing engine. This is defined as a class so that multiple parsers 126# can exist in the same process. A user never instantiates this directly. 127# Instead, the global yacc() function should be used to create a suitable Parser 128# object. 129 130class Parser: 131 def __init__(self,magic=None): 132 133 # This is a hack to keep users from trying to instantiate a Parser 134 # object directly. 135 136 if magic != "xyzzy": 137 raise YaccError, "Can't instantiate Parser. Use yacc() instead." 138 139 # Reset internal state 140 self.productions = None # List of productions 141 self.errorfunc = None # Error handling function 142 self.action = { } # LR Action table 143 self.goto = { } # LR goto table 144 self.require = { } # Attribute require table 145 self.method = "Unknown LR" # Table construction method used 146 147 def errok(self): 148 self.errorcount = 0 149 150 def restart(self): 151 del self.statestack[:] 152 del self.symstack[:] 153 sym = YaccSymbol() 154 sym.type = '$' 155 self.symstack.append(sym) 156 self.statestack.append(0) 157 158 def parse(self,input=None,lexer=None,debug=0): 159 lookahead = None # Current lookahead symbol 160 lookaheadstack = [ ] # Stack of lookahead symbols 161 actions = self.action # Local reference to action table 162 goto = self.goto # Local reference to goto table 163 prod = self.productions # Local reference to production list 164 pslice = YaccSlice(None) # Slice object passed to grammar rules 165 pslice.parser = self # Parser object 166 self.errorcount = 0 # Used during error recovery 167 168 # If no lexer was given, we will try to use the lex module 169 if not lexer: 170 import lex as lexer 171 172 pslice.lexer = lexer 173 174 # If input was supplied, pass to lexer 175 if input: 176 lexer.input(input) 177 178 # Tokenize function 179 get_token = lexer.token 180 181 statestack = [ ] # Stack of parsing states 182 self.statestack = statestack 183 symstack = [ ] # Stack of grammar symbols 184 self.symstack = symstack 185 186 errtoken = None # Err token 187 188 # The start state is assumed to be (0,$) 189 statestack.append(0) 190 sym = YaccSymbol() 191 sym.type = '$' 192 symstack.append(sym) 193 194 while 1: 195 # Get the next symbol on the input. If a lookahead symbol 196 # is already set, we just use that. Otherwise, we'll pull 197 # the next token off of the lookaheadstack or from the lexer 198 if not lookahead: 199 if not lookaheadstack: 200 lookahead = get_token() # Get the next token 201 else: 202 lookahead = lookaheadstack.pop() 203 if not lookahead: 204 lookahead = YaccSymbol() 205 lookahead.type = '$' 206 if debug: 207 print "%-20s : %s" % (lookahead, [xx.type for xx in symstack]) 208 209 # Check the action table 210 s = statestack[-1] 211 ltype = lookahead.type 212 t = actions.get((s,ltype),None) 213 214 if t is not None: 215 if t > 0: 216 # shift a symbol on the stack 217 if ltype == '$': 218 # Error, end of input 219 print "yacc: Parse error. EOF" 220 return 221 statestack.append(t) 222 symstack.append(lookahead) 223 lookahead = None 224 225 # Decrease error count on successful shift 226 if self.errorcount > 0: 227 self.errorcount -= 1 228 229 continue 230 231 if t < 0: 232 # reduce a symbol on the stack, emit a production 233 p = prod[-t] 234 pname = p.name 235 plen = p.len 236 237 # Get production function 238 sym = YaccSymbol() 239 sym.type = pname # Production name 240 sym.value = None 241 242 if plen: 243 targ = symstack[-plen-1:] 244 targ[0] = sym 245 try: 246 sym.lineno = targ[1].lineno 247 sym.endlineno = getattr(targ[-1],"endlineno",targ[-1].lineno) 248 except AttributeError: 249 sym.lineno = 0 250 del symstack[-plen:] 251 del statestack[-plen:] 252 else: 253 sym.lineno = 0 254 targ = [ sym ] 255 pslice.slice = targ 256 pslice.pbstack = [] 257 # Call the grammar rule with our special slice object 258 p.func(pslice) 259 260 # Validate attributes of the resulting value attribute 261# if require: 262# try: 263# t0 = targ[0] 264# r = Requires.get(t0.type,None) 265# t0d = t0.__dict__ 266# if r: 267# for field in r: 268# tn = t0 269# for fname in field: 270# try: 271# tf = tn.__dict__ 272# tn = tf.get(fname) 273# except StandardError: 274# tn = None 275# if not tn: 276# print "%s:%d: Rule %s doesn't set required attribute '%s'" % \ 277# (p.file,p.line,p.name,".".join(field)) 278# except TypeError,LookupError: 279# print "Bad requires directive " % r 280# pass 281 282 283 # If there was a pushback, put that on the stack 284 if pslice.pbstack: 285 lookaheadstack.append(lookahead) 286 for _t in pslice.pbstack: 287 lookaheadstack.append(_t) 288 lookahead = None 289 290 symstack.append(sym) 291 statestack.append(goto[statestack[-1],pname]) 292 continue 293 294 if t == 0: 295 n = symstack[-1] 296 return getattr(n,"value",None) 297 298 if t == None: 299 # We have some kind of parsing error here. To handle this, 300 # we are going to push the current token onto the tokenstack 301 # and replace it with an 'error' token. If there are any synchronization 302 # rules, they may catch it. 303 # 304 # In addition to pushing the error token, we call call the user defined p_error() 305 # function if this is the first syntax error. This function is only called 306 # if errorcount == 0. 307 308 if not self.errorcount: 309 self.errorcount = error_count 310 errtoken = lookahead 311 if errtoken.type == '$': 312 errtoken = None # End of file! 313 if self.errorfunc: 314 global errok,token,restart 315 errok = self.errok # Set some special functions available in error recovery 316 token = get_token 317 restart = self.restart 318 tok = self.errorfunc(errtoken) 319 del errok, token, restart # Delete special functions 320 321 if not self.errorcount: 322 # User must have done some kind of panic mode recovery on their own. The returned token 323 # is the next lookahead 324 lookahead = tok 325 errtoken = None 326 continue 327 else: 328 if errtoken: 329 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 330 else: lineno = 0 331 if lineno: 332 print "yacc: Syntax error at line %d, token=%s" % (lineno, errtoken.type) 333 else: 334 print "yacc: Syntax error, token=%s" % errtoken.type 335 else: 336 print "yacc: Parse error in input. EOF" 337 return 338 339 else: 340 self.errorcount = error_count 341 342 # case 1: the statestack only has 1 entry on it. If we're in this state, the 343 # entire parse has been rolled back and we're completely hosed. The token is 344 # discarded and we just keep going. 345 346 if len(statestack) <= 1 and lookahead.type != '$': 347 lookahead = None 348 errtoken = None 349 # Nuke the pushback stack 350 del lookaheadstack[:] 351 continue 352 353 # case 2: the statestack has a couple of entries on it, but we're 354 # at the end of the file. nuke the top entry and generate an error token 355 356 # Start nuking entries on the stack 357 if lookahead.type == '$': 358 # Whoa. We're really hosed here. Bail out 359 return 360 361 if lookahead.type != 'error': 362 sym = symstack[-1] 363 if sym.type == 'error': 364 # Hmmm. Error is on top of stack, we'll just nuke input 365 # symbol and continue 366 lookahead = None 367 continue 368 t = YaccSymbol() 369 t.type = 'error' 370 if hasattr(lookahead,"lineno"): 371 t.lineno = lookahead.lineno 372 t.value = lookahead 373 lookaheadstack.append(lookahead) 374 lookahead = t 375 else: 376 symstack.pop() 377 statestack.pop() 378 379 continue 380 381 # Call an error function here 382 raise RuntimeError, "yacc: internal parser error!!!\n" 383 384# ----------------------------------------------------------------------------- 385# === Parser Construction === 386# 387# The following functions and variables are used to implement the yacc() function 388# itself. This is pretty hairy stuff involving lots of error checking, 389# construction of LR items, kernels, and so forth. Although a lot of 390# this work is done using global variables, the resulting Parser object 391# is completely self contained--meaning that it is safe to repeatedly 392# call yacc() with different grammars in the same application. 393# ----------------------------------------------------------------------------- 394 395# ----------------------------------------------------------------------------- 396# validate_file() 397# 398# This function checks to see if there are duplicated p_rulename() functions 399# in the parser module file. Without this function, it is really easy for 400# users to make mistakes by cutting and pasting code fragments (and it's a real 401# bugger to try and figure out why the resulting parser doesn't work). Therefore, 402# we just do a little regular expression pattern matching of def statements 403# to try and detect duplicates. 404# ----------------------------------------------------------------------------- 405 406def validate_file(filename): 407 base,ext = os.path.splitext(filename) 408 if ext != '.py': return 1 # No idea. Assume it's okay. 409 410 try: 411 f = open(filename) 412 lines = f.readlines() 413 f.close() 414 except IOError: 415 return 1 # Oh well 416 417 # Match def p_funcname( 418 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') 419 counthash = { } 420 linen = 1 421 noerror = 1 422 for l in lines: 423 m = fre.match(l) 424 if m: 425 name = m.group(1) 426 prev = counthash.get(name) 427 if not prev: 428 counthash[name] = linen 429 else: 430 print "%s:%d: Function %s redefined. Previously defined on line %d" % (filename,linen,name,prev) 431 noerror = 0 432 linen += 1 433 return noerror 434 435# This function looks for functions that might be grammar rules, but which don't have the proper p_suffix. 436def validate_dict(d): 437 for n,v in d.items(): 438 if n[0:2] == 'p_' and isinstance(v,types.FunctionType): continue 439 if n[0:2] == 't_': continue 440 441 if n[0:2] == 'p_': 442 print "yacc: Warning. '%s' not defined as a function" % n 443 if isinstance(v,types.FunctionType) and v.func_code.co_argcount == 1: 444 try: 445 doc = v.__doc__.split(" ") 446 if doc[1] == ':': 447 print "%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix." % (v.func_code.co_filename, v.func_code.co_firstlineno,n) 448 except StandardError: 449 pass 450 451# ----------------------------------------------------------------------------- 452# === GRAMMAR FUNCTIONS === 453# 454# The following global variables and functions are used to store, manipulate, 455# and verify the grammar rules specified by the user. 456# ----------------------------------------------------------------------------- 457 458# Initialize all of the global variables used during grammar construction 459def initialize_vars(): 460 global Productions, Prodnames, Prodmap, Terminals 461 global Nonterminals, First, Follow, Precedence, LRitems 462 global Errorfunc, Signature, Requires 463 464 Productions = [None] # A list of all of the productions. The first 465 # entry is always reserved for the purpose of 466 # building an augmented grammar 467 468 Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all 469 # productions of that nonterminal. 470 471 Prodmap = { } # A dictionary that is only used to detect duplicate 472 # productions. 473 474 Terminals = { } # A dictionary mapping the names of terminal symbols to a 475 # list of the rules where they are used. 476 477 Nonterminals = { } # A dictionary mapping names of nonterminals to a list 478 # of rule numbers where they are used. 479 480 First = { } # A dictionary of precomputed FIRST(x) symbols 481 482 Follow = { } # A dictionary of precomputed FOLLOW(x) symbols 483 484 Precedence = { } # Precedence rules for each terminal. Contains tuples of the 485 # form ('right',level) or ('nonassoc', level) or ('left',level) 486 487 LRitems = [ ] # A list of all LR items for the grammar. These are the 488 # productions with the "dot" like E -> E . PLUS E 489 490 Errorfunc = None # User defined error handler 491 492 Signature = md5.new() # Digital signature of the grammar rules, precedence 493 # and other information. Used to determined when a 494 # parsing table needs to be regenerated. 495 496 Requires = { } # Requires list 497 498 # File objects used when creating the parser.out debugging file 499 global _vf, _vfc 500 _vf = cStringIO.StringIO() 501 _vfc = cStringIO.StringIO() 502 503# ----------------------------------------------------------------------------- 504# class Production: 505# 506# This class stores the raw information about a single production or grammar rule. 507# It has a few required attributes: 508# 509# name - Name of the production (nonterminal) 510# prod - A list of symbols making up its production 511# number - Production number. 512# 513# In addition, a few additional attributes are used to help with debugging or 514# optimization of table generation. 515# 516# file - File where production action is defined. 517# lineno - Line number where action is defined 518# func - Action function 519# prec - Precedence level 520# lr_next - Next LR item. Example, if we are ' E -> E . PLUS E' 521# then lr_next refers to 'E -> E PLUS . E' 522# lr_index - LR item index (location of the ".") in the prod list. 523# len - Length of the production (number of symbols on right hand side) 524# ----------------------------------------------------------------------------- 525 526class Production: 527 def __init__(self,**kw): 528 for k,v in kw.items(): 529 setattr(self,k,v) 530 self.lr_index = -1 531 self.lr0_added = 0 # Flag indicating whether or not added to LR0 closure 532 self.usyms = [ ] 533 534 def __str__(self): 535 if self.prod: 536 s = "%s -> %s" % (self.name," ".join(self.prod)) 537 else: 538 s = "%s -> <empty>" % self.name 539 return s 540 541 def __repr__(self): 542 return str(self) 543 544 # Compute lr_items from the production 545 def lr_item(self,n): 546 if n > len(self.prod): return None 547 p = Production() 548 p.name = self.name 549 p.prod = list(self.prod) 550 p.number = self.number 551 p.lr_index = n 552 p.prod.insert(n,".") 553 p.prod = tuple(p.prod) 554 p.len = len(p.prod) 555 p.usyms = self.usyms 556 557 # Precompute list of productions immediately following 558 try: 559 p.lrafter = Prodnames[p.prod[n+1]] 560 except (IndexError,KeyError),e: 561 p.lrafter = [] 562 try: 563 p.lrbefore = p.prod[n-1] 564 except IndexError: 565 p.lrbefore = None 566 567 return p 568 569class MiniProduction: 570 pass 571 572# Utility function 573def is_identifier(s): 574 for c in s: 575 if not (c.isalnum() or c == '_'): return 0 576 return 1 577 578# ----------------------------------------------------------------------------- 579# add_production() 580# 581# Given an action function, this function assembles a production rule. 582# The production rule is assumed to be found in the function's docstring. 583# This rule has the general syntax: 584# 585# name1 ::= production1 586# | production2 587# | production3 588# ... 589# | productionn 590# name2 ::= production1 591# | production2 592# ... 593# ----------------------------------------------------------------------------- 594 595def add_production(f,file,line,prodname,syms): 596 597 if Terminals.has_key(prodname): 598 print "%s:%d: Illegal rule name '%s'. Already defined as a token." % (file,line,prodname) 599 return -1 600 if prodname == 'error': 601 print "%s:%d: Illegal rule name '%s'. error is a reserved word." % (file,line,prodname) 602 return -1 603 604 if not is_identifier(prodname): 605 print "%s:%d: Illegal rule name '%s'" % (file,line,prodname) 606 return -1 607 608 for s in syms: 609 if not is_identifier(s) and s != '%prec': 610 print "%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname) 611 return -1 612 613 # See if the rule is already in the rulemap 614 map = "%s -> %s" % (prodname,syms) 615 if Prodmap.has_key(map): 616 m = Prodmap[map] 617 print "%s:%d: Duplicate rule %s." % (file,line, m) 618 print "%s:%d: Previous definition at %s:%d" % (file,line, m.file, m.line) 619 return -1 620 621 p = Production() 622 p.name = prodname 623 p.prod = syms 624 p.file = file 625 p.line = line 626 p.func = f 627 p.number = len(Productions) 628 629 630 Productions.append(p) 631 Prodmap[map] = p 632 if not Nonterminals.has_key(prodname): 633 Nonterminals[prodname] = [ ] 634 635 # Add all terminals to Terminals 636 i = 0 637 while i < len(p.prod): 638 t = p.prod[i] 639 if t == '%prec': 640 try: 641 precname = p.prod[i+1] 642 except IndexError: 643 print "%s:%d: Syntax error. Nothing follows %%prec." % (p.file,p.line) 644 return -1 645 646 prec = Precedence.get(precname,None) 647 if not prec: 648 print "%s:%d: Nothing known about the precedence of '%s'" % (p.file,p.line,precname) 649 return -1 650 else: 651 p.prec = prec 652 del p.prod[i] 653 del p.prod[i] 654 continue 655 656 if Terminals.has_key(t): 657 Terminals[t].append(p.number) 658 # Is a terminal. We'll assign a precedence to p based on this 659 if not hasattr(p,"prec"): 660 p.prec = Precedence.get(t,('right',0)) 661 else: 662 if not Nonterminals.has_key(t): 663 Nonterminals[t] = [ ] 664 Nonterminals[t].append(p.number) 665 i += 1 666 667 if not hasattr(p,"prec"): 668 p.prec = ('right',0) 669 670 # Set final length of productions 671 p.len = len(p.prod) 672 p.prod = tuple(p.prod) 673 674 # Calculate unique syms in the production 675 p.usyms = [ ] 676 for s in p.prod: 677 if s not in p.usyms: 678 p.usyms.append(s) 679 680 # Add to the global productions list 681 try: 682 Prodnames[p.name].append(p) 683 except KeyError: 684 Prodnames[p.name] = [ p ] 685 return 0 686 687# Given a raw rule function, this function rips out its doc string 688# and adds rules to the grammar 689 690def add_function(f): 691 line = f.func_code.co_firstlineno 692 file = f.func_code.co_filename 693 error = 0 694 695 if f.func_code.co_argcount > 1: 696 print "%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__) 697 return -1 698 699 if f.func_code.co_argcount < 1: 700 print "%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__) 701 return -1 702 703 if f.__doc__: 704 # Split the doc string into lines 705 pstrings = f.__doc__.splitlines() 706 lastp = None 707 dline = line 708 for ps in pstrings: 709 dline += 1 710 p = ps.split() 711 if not p: continue 712 try: 713 if p[0] == '|': 714 # This is a continuation of a previous rule 715 if not lastp: 716 print "%s:%d: Misplaced '|'." % (file,dline) 717 return -1 718 prodname = lastp 719 if len(p) > 1: 720 syms = p[1:] 721 else: 722 syms = [ ] 723 else: 724 prodname = p[0] 725 lastp = prodname 726 assign = p[1] 727 if len(p) > 2: 728 syms = p[2:] 729 else: 730 syms = [ ] 731 if assign != ':' and assign != '::=': 732 print "%s:%d: Syntax error. Expected ':'" % (file,dline) 733 return -1 734 e = add_production(f,file,dline,prodname,syms) 735 error += e 736 except StandardError: 737 print "%s:%d: Syntax error in rule '%s'" % (file,dline,ps) 738 error -= 1 739 else: 740 print "%s:%d: No documentation string specified in function '%s'" % (file,line,f.__name__) 741 return error 742 743 744# Cycle checking code (Michael Dyck) 745 746def compute_reachable(): 747 ''' 748 Find each symbol that can be reached from the start symbol. 749 Print a warning for any nonterminals that can't be reached. 750 (Unused terminals have already had their warning.) 751 ''' 752 Reachable = { } 753 for s in Terminals.keys() + Nonterminals.keys(): 754 Reachable[s] = 0 755 756 mark_reachable_from( Productions[0].prod[0], Reachable ) 757 758 for s in Nonterminals.keys(): 759 if not Reachable[s]: 760 print "yacc: Symbol '%s' is unreachable." % s 761 762def mark_reachable_from(s, Reachable): 763 ''' 764 Mark all symbols that are reachable from symbol s. 765 ''' 766 if Reachable[s]: 767 # We've already reached symbol s. 768 return 769 Reachable[s] = 1 770 for p in Prodnames.get(s,[]): 771 for r in p.prod: 772 mark_reachable_from(r, Reachable) 773 774# ----------------------------------------------------------------------------- 775# compute_terminates() 776# 777# This function looks at the various parsing rules and tries to detect 778# infinite recursion cycles (grammar rules where there is no possible way 779# to derive a string of only terminals). 780# ----------------------------------------------------------------------------- 781def compute_terminates(): 782 ''' 783 Raise an error for any symbols that don't terminate. 784 ''' 785 Terminates = {} 786 787 # Terminals: 788 for t in Terminals.keys(): 789 Terminates[t] = 1 790 791 Terminates['$'] = 1 792 793 # Nonterminals: 794 795 # Initialize to false: 796 for n in Nonterminals.keys(): 797 Terminates[n] = 0 798 799 # Then propagate termination until no change: 800 while 1: 801 some_change = 0 802 for (n,pl) in Prodnames.items(): 803 # Nonterminal n terminates iff any of its productions terminates. 804 for p in pl: 805 # Production p terminates iff all of its rhs symbols terminate. 806 for s in p.prod: 807 if not Terminates[s]: 808 # The symbol s does not terminate, 809 # so production p does not terminate. 810 p_terminates = 0 811 break 812 else: 813 # didn't break from the loop, 814 # so every symbol s terminates 815 # so production p terminates. 816 p_terminates = 1 817 818 if p_terminates: 819 # symbol n terminates! 820 if not Terminates[n]: 821 Terminates[n] = 1 822 some_change = 1 823 # Don't need to consider any more productions for this n. 824 break 825 826 if not some_change: 827 break 828 829 some_error = 0 830 for (s,terminates) in Terminates.items(): 831 if not terminates: 832 if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error': 833 # s is used-but-not-defined, and we've already warned of that, 834 # so it would be overkill to say that it's also non-terminating. 835 pass 836 else: 837 print "yacc: Infinite recursion detected for symbol '%s'." % s 838 some_error = 1 839 840 return some_error 841 842# ----------------------------------------------------------------------------- 843# verify_productions() 844# 845# This function examines all of the supplied rules to see if they seem valid. 846# ----------------------------------------------------------------------------- 847def verify_productions(cycle_check=1): 848 error = 0 849 for p in Productions: 850 if not p: continue 851 852 for s in p.prod: 853 if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error': 854 print "%s:%d: Symbol '%s' used, but not defined as a token or a rule." % (p.file,p.line,s) 855 error = 1 856 continue 857 858 unused_tok = 0 859 # Now verify all of the tokens 860 if yaccdebug: 861 _vf.write("Unused terminals:\n\n") 862 for s,v in Terminals.items(): 863 if s != 'error' and not v: 864 print "yacc: Warning. Token '%s' defined, but not used." % s 865 if yaccdebug: _vf.write(" %s\n"% s) 866 unused_tok += 1 867 868 # Print out all of the productions 869 if yaccdebug: 870 _vf.write("\nGrammar\n\n") 871 for i in range(1,len(Productions)): 872 _vf.write("Rule %-5d %s\n" % (i, Productions[i])) 873 874 unused_prod = 0 875 # Verify the use of all productions 876 for s,v in Nonterminals.items(): 877 if not v: 878 p = Prodnames[s][0] 879 print "%s:%d: Warning. Rule '%s' defined, but not used." % (p.file,p.line, s) 880 unused_prod += 1 881 882 883 if unused_tok == 1: 884 print "yacc: Warning. There is 1 unused token." 885 if unused_tok > 1: 886 print "yacc: Warning. There are %d unused tokens." % unused_tok 887 888 if unused_prod == 1: 889 print "yacc: Warning. There is 1 unused rule." 890 if unused_prod > 1: 891 print "yacc: Warning. There are %d unused rules." % unused_prod 892 893 if yaccdebug: 894 _vf.write("\nTerminals, with rules where they appear\n\n") 895 ks = Terminals.keys() 896 ks.sort() 897 for k in ks: 898 _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Terminals[k]]))) 899 _vf.write("\nNonterminals, with rules where they appear\n\n") 900 ks = Nonterminals.keys() 901 ks.sort() 902 for k in ks: 903 _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Nonterminals[k]]))) 904 905 if (cycle_check): 906 compute_reachable() 907 error += compute_terminates() 908# error += check_cycles() 909 return error 910 911# ----------------------------------------------------------------------------- 912# build_lritems() 913# 914# This function walks the list of productions and builds a complete set of the 915# LR items. The LR items are stored in two ways: First, they are uniquely 916# numbered and placed in the list _lritems. Second, a linked list of LR items 917# is built for each production. For example: 918# 919# E -> E PLUS E 920# 921# Creates the list 922# 923# [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] 924# ----------------------------------------------------------------------------- 925 926def build_lritems(): 927 for p in Productions: 928 lastlri = p 929 lri = p.lr_item(0) 930 i = 0 931 while 1: 932 lri = p.lr_item(i) 933 lastlri.lr_next = lri 934 if not lri: break 935 lri.lr_num = len(LRitems) 936 LRitems.append(lri) 937 lastlri = lri 938 i += 1 939 940 # In order for the rest of the parser generator to work, we need to 941 # guarantee that no more lritems are generated. Therefore, we nuke 942 # the p.lr_item method. (Only used in debugging) 943 # Production.lr_item = None 944 945# ----------------------------------------------------------------------------- 946# add_precedence() 947# 948# Given a list of precedence rules, add to the precedence table. 949# ----------------------------------------------------------------------------- 950 951def add_precedence(plist): 952 plevel = 0 953 error = 0 954 for p in plist: 955 plevel += 1 956 try: 957 prec = p[0] 958 terms = p[1:] 959 if prec != 'left' and prec != 'right' and prec != 'nonassoc': 960 print "yacc: Invalid precedence '%s'" % prec 961 return -1 962 for t in terms: 963 if Precedence.has_key(t): 964 print "yacc: Precedence already specified for terminal '%s'" % t 965 error += 1 966 continue 967 Precedence[t] = (prec,plevel) 968 except: 969 print "yacc: Invalid precedence table." 970 error += 1 971 972 return error 973 974# ----------------------------------------------------------------------------- 975# augment_grammar() 976# 977# Compute the augmented grammar. This is just a rule S' -> start where start 978# is the starting symbol. 979# ----------------------------------------------------------------------------- 980 981def augment_grammar(start=None): 982 if not start: 983 start = Productions[1].name 984 Productions[0] = Production(name="S'",prod=[start],number=0,len=1,prec=('right',0),func=None) 985 Productions[0].usyms = [ start ] 986 Nonterminals[start].append(0) 987 988 989# ------------------------------------------------------------------------- 990# first() 991# 992# Compute the value of FIRST1(beta) where beta is a tuple of symbols. 993# 994# During execution of compute_first1, the result may be incomplete. 995# Afterward (e.g., when called from compute_follow()), it will be complete. 996# ------------------------------------------------------------------------- 997def first(beta): 998 999 # We are computing First(x1,x2,x3,...,xn) 1000 result = [ ] 1001 for x in beta: 1002 x_produces_empty = 0 1003 1004 # Add all the non-<empty> symbols of First[x] to the result. 1005 for f in First[x]: 1006 if f == '<empty>': 1007 x_produces_empty = 1 1008 else: 1009 if f not in result: result.append(f) 1010 1011 if x_produces_empty: 1012 # We have to consider the next x in beta, 1013 # i.e. stay in the loop. 1014 pass 1015 else: 1016 # We don't have to consider any further symbols in beta. 1017 break 1018 else: 1019 # There was no 'break' from the loop, 1020 # so x_produces_empty was true for all x in beta, 1021 # so beta produces empty as well. 1022 result.append('<empty>') 1023 1024 return result 1025 1026 1027# FOLLOW(x) 1028# Given a non-terminal. This function computes the set of all symbols 1029# that might follow it. Dragon book, p. 189. 1030 1031def compute_follow(start=None): 1032 # Add '$' to the follow list of the start symbol 1033 for k in Nonterminals.keys(): 1034 Follow[k] = [ ] 1035 1036 if not start: 1037 start = Productions[1].name 1038 1039 Follow[start] = [ '$' ] 1040 1041 while 1: 1042 didadd = 0 1043 for p in Productions[1:]: 1044 # Here is the production set 1045 for i in range(len(p.prod)): 1046 B = p.prod[i] 1047 if Nonterminals.has_key(B): 1048 # Okay. We got a non-terminal in a production 1049 fst = first(p.prod[i+1:]) 1050 hasempty = 0 1051 for f in fst: 1052 if f != '<empty>' and f not in Follow[B]: 1053 Follow[B].append(f) 1054 didadd = 1 1055 if f == '<empty>': 1056 hasempty = 1 1057 if hasempty or i == (len(p.prod)-1): 1058 # Add elements of follow(a) to follow(b) 1059 for f in Follow[p.name]: 1060 if f not in Follow[B]: 1061 Follow[B].append(f) 1062 didadd = 1 1063 if not didadd: break 1064 1065 if 0 and yaccdebug: 1066 _vf.write('\nFollow:\n') 1067 for k in Nonterminals.keys(): 1068 _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Follow[k]]))) 1069 1070# ------------------------------------------------------------------------- 1071# compute_first1() 1072# 1073# Compute the value of FIRST1(X) for all symbols 1074# ------------------------------------------------------------------------- 1075def compute_first1(): 1076 1077 # Terminals: 1078 for t in Terminals.keys(): 1079 First[t] = [t] 1080 1081 First['$'] = ['$'] 1082 First['#'] = ['#'] # what's this for? 1083 1084 # Nonterminals: 1085 1086 # Initialize to the empty set: 1087 for n in Nonterminals.keys(): 1088 First[n] = [] 1089 1090 # Then propagate symbols until no change: 1091 while 1: 1092 some_change = 0 1093 for n in Nonterminals.keys(): 1094 for p in Prodnames[n]: 1095 for f in first(p.prod): 1096 if f not in First[n]: 1097 First[n].append( f ) 1098 some_change = 1 1099 if not some_change: 1100 break 1101 1102 if 0 and yaccdebug: 1103 _vf.write('\nFirst:\n') 1104 for k in Nonterminals.keys(): 1105 _vf.write("%-20s : %s\n" % 1106 (k, " ".join([str(s) for s in First[k]]))) 1107 1108# ----------------------------------------------------------------------------- 1109# === SLR Generation === 1110# 1111# The following functions are used to construct SLR (Simple LR) parsing tables 1112# as described on p.221-229 of the dragon book. 1113# ----------------------------------------------------------------------------- 1114 1115# Global variables for the LR parsing engine 1116def lr_init_vars(): 1117 global _lr_action, _lr_goto, _lr_method 1118 global _lr_goto_cache 1119 1120 _lr_action = { } # Action table 1121 _lr_goto = { } # Goto table 1122 _lr_method = "Unknown" # LR method used 1123 _lr_goto_cache = { } 1124 1125# Compute the LR(0) closure operation on I, where I is a set of LR(0) items. 1126# prodlist is a list of productions. 1127 1128_add_count = 0 # Counter used to detect cycles 1129 1130def lr0_closure(I): 1131 global _add_count 1132 1133 _add_count += 1 1134 prodlist = Productions 1135 1136 # Add everything in I to J 1137 J = I[:] 1138 didadd = 1 1139 while didadd: 1140 didadd = 0 1141 for j in J: 1142 for x in j.lrafter: 1143 if x.lr0_added == _add_count: continue 1144 # Add B --> .G to J 1145 J.append(x.lr_next) 1146 x.lr0_added = _add_count 1147 didadd = 1 1148 1149 return J 1150 1151# Compute the LR(0) goto function goto(I,X) where I is a set 1152# of LR(0) items and X is a grammar symbol. This function is written 1153# in a way that guarantees uniqueness of the generated goto sets 1154# (i.e. the same goto set will never be returned as two different Python 1155# objects). With uniqueness, we can later do fast set comparisons using 1156# id(obj) instead of element-wise comparison. 1157 1158def lr0_goto(I,x): 1159 # First we look for a previously cached entry 1160 g = _lr_goto_cache.get((id(I),x),None) 1161 if g: return g 1162 1163 # Now we generate the goto set in a way that guarantees uniqueness 1164 # of the result 1165 1166 s = _lr_goto_cache.get(x,None) 1167 if not s: 1168 s = { } 1169 _lr_goto_cache[x] = s 1170 1171 gs = [ ] 1172 for p in I: 1173 n = p.lr_next 1174 if n and n.lrbefore == x: 1175 s1 = s.get(id(n),None) 1176 if not s1: 1177 s1 = { } 1178 s[id(n)] = s1 1179 gs.append(n) 1180 s = s1 1181 g = s.get('$',None) 1182 if not g: 1183 if gs: 1184 g = lr0_closure(gs) 1185 s['$'] = g 1186 else: 1187 s['$'] = gs 1188 _lr_goto_cache[(id(I),x)] = g 1189 return g 1190 1191# Compute the kernel of a set of LR(0) items 1192def lr0_kernel(I): 1193 KI = [ ] 1194 for p in I: 1195 if p.name == "S'" or p.lr_index > 0 or p.len == 0: 1196 KI.append(p) 1197 1198 return KI 1199 1200_lr0_cidhash = { } 1201 1202# Compute the LR(0) sets of item function 1203def lr0_items(): 1204 1205 C = [ lr0_closure([Productions[0].lr_next]) ] 1206 i = 0 1207 for I in C: 1208 _lr0_cidhash[id(I)] = i 1209 i += 1 1210 1211 # Loop over the items in C and each grammar symbols 1212 i = 0 1213 while i < len(C): 1214 I = C[i] 1215 i += 1 1216 1217 # Collect all of the symbols that could possibly be in the goto(I,X) sets 1218 asyms = { } 1219 for ii in I: 1220 for s in ii.usyms: 1221 asyms[s] = None 1222 1223 for x in asyms.keys(): 1224 g = lr0_goto(I,x) 1225 if not g: continue 1226 if _lr0_cidhash.has_key(id(g)): continue 1227 _lr0_cidhash[id(g)] = len(C) 1228 C.append(g) 1229 1230 return C 1231 1232# ----------------------------------------------------------------------------- 1233# slr_parse_table() 1234# 1235# This function constructs an SLR table. 1236# ----------------------------------------------------------------------------- 1237def slr_parse_table(): 1238 global _lr_method 1239 goto = _lr_goto # Goto array 1240 action = _lr_action # Action array 1241 actionp = { } # Action production array (temporary) 1242 1243 _lr_method = "SLR" 1244 1245 n_srconflict = 0 1246 n_rrconflict = 0 1247 1248 if yaccdebug: 1249 _vf.write("\n\nParsing method: SLR\n\n") 1250 1251 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items 1252 # This determines the number of states 1253 1254 C = lr0_items() 1255 1256 # Build the parser table, state by state 1257 st = 0 1258 for I in C: 1259 # Loop over each production in I 1260 actlist = [ ] # List of actions 1261 1262 if yaccdebug: 1263 _vf.write("\nstate %d\n\n" % st) 1264 for p in I: 1265 _vf.write(" (%d) %s\n" % (p.number, str(p))) 1266 _vf.write("\n") 1267 1268 for p in I: 1269 try: 1270 if p.prod[-1] == ".": 1271 if p.name == "S'": 1272 # Start symbol. Accept! 1273 action[st,"$"] = 0 1274 actionp[st,"$"] = p 1275 else: 1276 # We are at the end of a production. Reduce! 1277 for a in Follow[p.name]: 1278 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) 1279 r = action.get((st,a),None) 1280 if r is not None: 1281 # Whoa. Have a shift/reduce or reduce/reduce conflict 1282 if r > 0: 1283 # Need to decide on shift or reduce here 1284 # By default we favor shifting. Need to add 1285 # some precedence rules here. 1286 sprec,slevel = Productions[actionp[st,a].number].prec 1287 rprec,rlevel = Precedence.get(a,('right',0)) 1288 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): 1289 # We really need to reduce here. 1290 action[st,a] = -p.number 1291 actionp[st,a] = p 1292 if not slevel and not rlevel: 1293 _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) 1294 _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) 1295 n_srconflict += 1 1296 elif (slevel == rlevel) and (rprec == 'nonassoc'): 1297 action[st,a] = None 1298 else: 1299 # Hmmm. Guess we'll keep the shift 1300 if not slevel and not rlevel: 1301 _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) 1302 _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) 1303 n_srconflict +=1 1304 elif r < 0: 1305 # Reduce/reduce conflict. In this case, we favor the rule 1306 # that was defined first in the grammar file 1307 oldp = Productions[-r] 1308 pp = Productions[p.number] 1309 if oldp.line > pp.line: 1310 action[st,a] = -p.number 1311 actionp[st,a] = p 1312 # print "Reduce/reduce conflict in state %d" % st 1313 n_rrconflict += 1 1314 _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a])) 1315 _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a])) 1316 else: 1317 print "Unknown conflict in state %d" % st 1318 else: 1319 action[st,a] = -p.number 1320 actionp[st,a] = p 1321 else: 1322 i = p.lr_index 1323 a = p.prod[i+1] # Get symbol right after the "." 1324 if Terminals.has_key(a): 1325 g = lr0_goto(I,a) 1326 j = _lr0_cidhash.get(id(g),-1) 1327 if j >= 0: 1328 # We are in a shift state 1329 actlist.append((a,p,"shift and go to state %d" % j)) 1330 r = action.get((st,a),None) 1331 if r is not None: 1332 # Whoa have a shift/reduce or shift/shift conflict 1333 if r > 0: 1334 if r != j: 1335 print "Shift/shift conflict in state %d" % st 1336 elif r < 0: 1337 # Do a precedence check. 1338 # - if precedence of reduce rule is higher, we reduce. 1339 # - if precedence of reduce is same and left assoc, we reduce. 1340 # - otherwise we shift 1341 rprec,rlevel = Productions[actionp[st,a].number].prec 1342 sprec,slevel = Precedence.get(a,('right',0)) 1343 if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')): 1344 # We decide to shift here... highest precedence to shift 1345 action[st,a] = j 1346 actionp[st,a] = p 1347 if not slevel and not rlevel: 1348 n_srconflict += 1 1349 _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) 1350 _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) 1351 elif (slevel == rlevel) and (rprec == 'nonassoc'): 1352 action[st,a] = None 1353 else: 1354 # Hmmm. Guess we'll keep the reduce 1355 if not slevel and not rlevel: 1356 n_srconflict +=1 1357 _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) 1358 _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) 1359 1360 else: 1361 print "Unknown conflict in state %d" % st 1362 else: 1363 action[st,a] = j 1364 actionp[st,a] = p 1365 1366 except StandardError,e: 1367 raise YaccError, "Hosed in slr_parse_table", e 1368 1369 # Print the actions associated with each terminal 1370 if yaccdebug: 1371 for a,p,m in actlist: 1372 if action.has_key((st,a)): 1373 if p is actionp[st,a]: 1374 _vf.write(" %-15s %s\n" % (a,m)) 1375 _vf.write("\n") 1376 for a,p,m in actlist: 1377 if action.has_key((st,a)): 1378 if p is not actionp[st,a]: 1379 _vf.write(" ! %-15s [ %s ]\n" % (a,m)) 1380 1381 # Construct the goto table for this state 1382 if yaccdebug: 1383 _vf.write("\n") 1384 nkeys = { } 1385 for ii in I: 1386 for s in ii.usyms: 1387 if Nonterminals.has_key(s): 1388 nkeys[s] = None 1389 for n in nkeys.keys(): 1390 g = lr0_goto(I,n) 1391 j = _lr0_cidhash.get(id(g),-1) 1392 if j >= 0: 1393 goto[st,n] = j 1394 if yaccdebug: 1395 _vf.write(" %-15s shift and go to state %d\n" % (n,j)) 1396 1397 st += 1 1398 1399 if n_srconflict == 1: 1400 print "yacc: %d shift/reduce conflict" % n_srconflict 1401 if n_srconflict > 1: 1402 print "yacc: %d shift/reduce conflicts" % n_srconflict 1403 if n_rrconflict == 1: 1404 print "yacc: %d reduce/reduce conflict" % n_rrconflict 1405 if n_rrconflict > 1: 1406 print "yacc: %d reduce/reduce conflicts" % n_rrconflict 1407 1408 1409# ----------------------------------------------------------------------------- 1410# ==== LALR(1) Parsing ==== 1411# **** UNFINISHED! 6/16/01 1412# ----------------------------------------------------------------------------- 1413 1414 1415# Compute the lr1_closure of a set I. I is a list of tuples (p,a) where 1416# p is a LR0 item and a is a terminal 1417 1418_lr1_add_count = 0 1419 1420def lr1_closure(I): 1421 global _lr1_add_count 1422 1423 _lr1_add_count += 1 1424 1425 J = I[:] 1426 1427 # Loop over items (p,a) in I. 1428 ji = 0 1429 while ji < len(J): 1430 p,a = J[ji] 1431 # p = [ A -> alpha . B beta] 1432 1433 # For each production B -> gamma 1434 for B in p.lr1_after: 1435 f = tuple(p.lr1_beta + (a,)) 1436 1437 # For each terminal b in first(Beta a) 1438 for b in first(f): 1439 # Check if (B -> . gamma, b) is in J 1440 # Only way this can happen is if the add count mismatches 1441 pn = B.lr_next 1442 if pn.lr_added.get(b,0) == _lr1_add_count: continue 1443 pn.lr_added[b] = _lr1_add_count 1444 J.append((pn,b)) 1445 ji += 1 1446 1447 return J 1448 1449def lalr_parse_table(): 1450 1451 # Compute some lr1 information about all of the productions 1452 for p in LRitems: 1453 try: 1454 after = p.prod[p.lr_index + 1] 1455 p.lr1_after = Prodnames[after] 1456 p.lr1_beta = p.prod[p.lr_index + 2:] 1457 except LookupError: 1458 p.lr1_after = [ ] 1459 p.lr1_beta = [ ] 1460 p.lr_added = { } 1461 1462 # Compute the LR(0) items 1463 C = lr0_items() 1464 CK = [] 1465 for I in C: 1466 CK.append(lr0_kernel(I)) 1467 1468 print CK 1469 1470# ----------------------------------------------------------------------------- 1471# ==== LR Utility functions ==== 1472# ----------------------------------------------------------------------------- 1473 1474# ----------------------------------------------------------------------------- 1475# _lr_write_tables() 1476# 1477# This function writes the LR parsing tables to a file 1478# ----------------------------------------------------------------------------- 1479 1480def lr_write_tables(modulename=tab_module): 1481 filename = modulename + ".py" 1482 try: 1483 f = open(filename,"w") 1484 1485 f.write(""" 1486# %s 1487# This file is automatically generated. Do not edit. 1488 1489_lr_method = %s 1490 1491_lr_signature = %s 1492""" % (filename, repr(_lr_method), repr(Signature.digest()))) 1493 1494 # Change smaller to 0 to go back to original tables 1495 smaller = 1 1496 1497 # Factor out names to try and make smaller 1498 if smaller: 1499 items = { } 1500 1501 for k,v in _lr_action.items(): 1502 i = items.get(k[1]) 1503 if not i: 1504 i = ([],[]) 1505 items[k[1]] = i 1506 i[0].append(k[0]) 1507 i[1].append(v) 1508 1509 f.write("\n_lr_action_items = {") 1510 for k,v in items.items(): 1511 f.write("%r:([" % k) 1512 for i in v[0]: 1513 f.write("%r," % i) 1514 f.write("],[") 1515 for i in v[1]: 1516 f.write("%r," % i) 1517 1518 f.write("]),") 1519 f.write("}\n") 1520 1521 f.write(""" 1522_lr_action = { } 1523for _k, _v in _lr_action_items.items(): 1524 for _x,_y in zip(_v[0],_v[1]): 1525 _lr_action[(_x,_k)] = _y 1526del _lr_action_items 1527""") 1528 1529 else: 1530 f.write("\n_lr_action = { "); 1531 for k,v in _lr_action.items(): 1532 f.write("(%r,%r):%r," % (k[0],k[1],v)) 1533 f.write("}\n"); 1534 1535 if smaller: 1536 # Factor out names to try and make smaller 1537 items = { } 1538 1539 for k,v in _lr_goto.items(): 1540 i = items.get(k[1]) 1541 if not i: 1542 i = ([],[]) 1543 items[k[1]] = i 1544 i[0].append(k[0]) 1545 i[1].append(v) 1546 1547 f.write("\n_lr_goto_items = {") 1548 for k,v in items.items(): 1549 f.write("%r:([" % k) 1550 for i in v[0]: 1551 f.write("%r," % i) 1552 f.write("],[") 1553 for i in v[1]: 1554 f.write("%r," % i) 1555 1556 f.write("]),") 1557 f.write("}\n") 1558 1559 f.write(""" 1560_lr_goto = { } 1561for _k, _v in _lr_goto_items.items(): 1562 for _x,_y in zip(_v[0],_v[1]): 1563 _lr_goto[(_x,_k)] = _y 1564del _lr_goto_items 1565""") 1566 else: 1567 f.write("\n_lr_goto = { "); 1568 for k,v in _lr_goto.items(): 1569 f.write("(%r,%r):%r," % (k[0],k[1],v)) 1570 f.write("}\n"); 1571 1572 # Write production table 1573 f.write("_lr_productions = [\n") 1574 for p in Productions: 1575 if p: 1576 if (p.func): 1577 f.write(" (%r,%d,%r,%r,%d),\n" % (p.name, p.len, p.func.__name__,p.file,p.line)) 1578 else: 1579 f.write(" (%r,%d,None,None,None),\n" % (p.name, p.len)) 1580 else: 1581 f.write(" None,\n") 1582 f.write("]\n") 1583 f.close() 1584 1585 except IOError,e: 1586 print "Unable to create '%s'" % filename 1587 print e 1588 return 1589 1590def lr_read_tables(module=tab_module,optimize=0): 1591 global _lr_action, _lr_goto, _lr_productions, _lr_method 1592 try: 1593 exec "import %s as parsetab" % module 1594 1595 if (optimize) or (Signature.digest() == parsetab._lr_signature): 1596 _lr_action = parsetab._lr_action 1597 _lr_goto = parsetab._lr_goto 1598 _lr_productions = parsetab._lr_productions 1599 _lr_method = parsetab._lr_method 1600 return 1 1601 else: 1602 return 0 1603 1604 except (ImportError,AttributeError): 1605 return 0 1606 1607# ----------------------------------------------------------------------------- 1608# yacc(module) 1609# 1610# Build the parser module 1611# ----------------------------------------------------------------------------- 1612 1613def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, start=None, check_recursion=1, optimize=0): 1614 global yaccdebug 1615 yaccdebug = debug 1616 1617 initialize_vars() 1618 files = { } 1619 error = 0 1620 1621 # Add starting symbol to signature 1622 if start: 1623 Signature.update(start) 1624 1625 # Try to figure out what module we are working with 1626 if module: 1627 # User supplied a module object. 1628 if not isinstance(module, types.ModuleType): 1629 raise ValueError,"Expected a module" 1630 1631 ldict = module.__dict__ 1632 1633 else: 1634 # No module given. We might be able to get information from the caller. 1635 # Throw an exception and unwind the traceback to get the globals 1636 1637 try: 1638 raise RuntimeError 1639 except RuntimeError: 1640 e,b,t = sys.exc_info() 1641 f = t.tb_frame 1642 f = f.f_back # Walk out to our calling function 1643 ldict = f.f_globals # Grab its globals dictionary 1644 1645 # If running in optimized mode. We're going to 1646 1647 if (optimize and lr_read_tables(tabmodule,1)): 1648 # Read parse table 1649 del Productions[:] 1650 for p in _lr_productions: 1651 if not p: 1652 Productions.append(None) 1653 else: 1654 m = MiniProduction() 1655 m.name = p[0] 1656 m.len = p[1] 1657 m.file = p[3] 1658 m.line = p[4] 1659 if p[2]: 1660 m.func = ldict[p[2]] 1661 Productions.append(m) 1662 1663 else: 1664 # Get the tokens map 1665 tokens = ldict.get("tokens",None) 1666 1667 if not tokens: 1668 raise YaccError,"module does not define a list 'tokens'" 1669 if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)): 1670 raise YaccError,"tokens must be a list or tuple." 1671 1672 # Check to see if a requires dictionary is defined. 1673 requires = ldict.get("require",None) 1674 if requires: 1675 if not (isinstance(requires,types.DictType)): 1676 raise YaccError,"require must be a dictionary." 1677 1678 for r,v in requires.items(): 1679 try: 1680 if not (isinstance(v,types.ListType)): 1681 raise TypeError 1682 v1 = [x.split(".") for x in v] 1683 Requires[r] = v1 1684 except StandardError: 1685 print "Invalid specification for rule '%s' in require. Expected a list of strings" % r 1686 1687 1688 # Build the dictionary of terminals. We a record a 0 in the 1689 # dictionary to track whether or not a terminal is actually 1690 # used in the grammar 1691 1692 if 'error' in tokens: 1693 print "yacc: Illegal token 'error'. Is a reserved word." 1694 raise YaccError,"Illegal token name" 1695 1696 for n in tokens: 1697 if Terminals.has_key(n): 1698 print "yacc: Warning. Token '%s' multiply defined." % n 1699 Terminals[n] = [ ] 1700 1701 Terminals['error'] = [ ] 1702 1703 # Get the precedence map (if any) 1704 prec = ldict.get("precedence",None) 1705 if prec: 1706 if not (isinstance(prec,types.ListType) or isinstance(prec,types.TupleType)): 1707 raise YaccError,"precedence must be a list or tuple." 1708 add_precedence(prec) 1709 Signature.update(repr(prec)) 1710 1711 for n in tokens: 1712 if not Precedence.has_key(n): 1713 Precedence[n] = ('right',0) # Default, right associative, 0 precedence 1714 1715 # Look for error handler 1716 ef = ldict.get('p_error',None) 1717 if ef: 1718 if not isinstance(ef,types.FunctionType): 1719 raise YaccError,"'p_error' defined, but is not a function." 1720 eline = ef.func_code.co_firstlineno 1721 efile = ef.func_code.co_filename 1722 files[efile] = None 1723 1724 if (ef.func_code.co_argcount != 1): 1725 raise YaccError,"%s:%d: p_error() requires 1 argument." % (efile,eline) 1726 global Errorfunc 1727 Errorfunc = ef 1728 else: 1729 print "yacc: Warning. no p_error() function is defined." 1730 1731 # Get the list of built-in functions with p_ prefix 1732 symbols = [ldict[f] for f in ldict.keys() 1733 if (isinstance(ldict[f],types.FunctionType) and ldict[f].__name__[:2] == 'p_' 1734 and ldict[f].__name__ != 'p_error')] 1735 1736 # Check for non-empty symbols 1737 if len(symbols) == 0: 1738 raise YaccError,"no rules of the form p_rulename are defined." 1739 1740 # Sort the symbols by line number 1741 symbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno)) 1742 1743 # Add all of the symbols to the grammar 1744 for f in symbols: 1745 if (add_function(f)) < 0: 1746 error += 1 1747 else: 1748 files[f.func_code.co_filename] = None 1749 1750 # Make a signature of the docstrings 1751 for f in symbols: 1752 if f.__doc__: 1753 Signature.update(f.__doc__) 1754 1755 lr_init_vars() 1756 1757 if error: 1758 raise YaccError,"Unable to construct parser." 1759 1760 if not lr_read_tables(tabmodule): 1761 1762 # Validate files 1763 for filename in files.keys(): 1764 if not validate_file(filename): 1765 error = 1 1766 1767 # Validate dictionary 1768 validate_dict(ldict) 1769 1770 if start and not Prodnames.has_key(start): 1771 raise YaccError,"Bad starting symbol '%s'" % start 1772 1773 augment_grammar(start) 1774 error = verify_productions(cycle_check=check_recursion) 1775 otherfunc = [ldict[f] for f in ldict.keys() 1776 if (isinstance(ldict[f],types.FunctionType) and ldict[f].__name__[:2] != 'p_')] 1777 1778 if error: 1779 raise YaccError,"Unable to construct parser." 1780 1781 build_lritems() 1782 compute_first1() 1783 compute_follow(start) 1784 1785 if method == 'SLR': 1786 slr_parse_table() 1787 elif method == 'LALR1': 1788 lalr_parse_table() 1789 return 1790 else: 1791 raise YaccError, "Unknown parsing method '%s'" % method 1792 1793 lr_write_tables(tabmodule) 1794 1795 if yaccdebug: 1796 try: 1797 f = open(debug_file,"w") 1798 f.write(_vfc.getvalue()) 1799 f.write("\n\n") 1800 f.write(_vf.getvalue()) 1801 f.close() 1802 except IOError,e: 1803 print "yacc: can't create '%s'" % debug_file,e 1804 1805 # Made it here. Create a parser object and set up its internal state. 1806 # Set global parse() method to bound method of parser object. 1807 1808 p = Parser("xyzzy") 1809 p.productions = Productions 1810 p.errorfunc = Errorfunc 1811 p.action = _lr_action 1812 p.goto = _lr_goto 1813 p.method = _lr_method 1814 p.require = Requires 1815 1816 global parse 1817 parse = p.parse 1818 1819 # Clean up all of the globals we created 1820 if (not optimize): 1821 yacc_cleanup() 1822 return p 1823 1824# yacc_cleanup function. Delete all of the global variables 1825# used during table construction 1826 1827def yacc_cleanup(): 1828 global _lr_action, _lr_goto, _lr_method, _lr_goto_cache 1829 del _lr_action, _lr_goto, _lr_method, _lr_goto_cache 1830 1831 global Productions, Prodnames, Prodmap, Terminals 1832 global Nonterminals, First, Follow, Precedence, LRitems 1833 global Errorfunc, Signature, Requires 1834 1835 del Productions, Prodnames, Prodmap, Terminals 1836 del Nonterminals, First, Follow, Precedence, LRitems 1837 del Errorfunc, Signature, Requires 1838 1839 global _vf, _vfc 1840 del _vf, _vfc 1841 1842 1843# Stub that raises an error if parsing is attempted without first calling yacc() 1844def parse(*args,**kwargs): 1845 raise YaccError, "yacc: No parser built with yacc()" 1846 1847