16498Snate@binkert.org# ----------------------------------------------------------------------------- 22632SN/A# ply: yacc.py 32632SN/A# 46498Snate@binkert.org# Copyright (C) 2001-2009, 56498Snate@binkert.org# David M. Beazley (Dabeaz LLC) 66498Snate@binkert.org# All rights reserved. 72632SN/A# 86498Snate@binkert.org# Redistribution and use in source and binary forms, with or without 96498Snate@binkert.org# modification, are permitted provided that the following conditions are 106498Snate@binkert.org# met: 116498Snate@binkert.org# 126498Snate@binkert.org# * Redistributions of source code must retain the above copyright notice, 136498Snate@binkert.org# this list of conditions and the following disclaimer. 146498Snate@binkert.org# * Redistributions in binary form must reproduce the above copyright notice, 156498Snate@binkert.org# this list of conditions and the following disclaimer in the documentation 166498Snate@binkert.org# and/or other materials provided with the distribution. 176498Snate@binkert.org# * Neither the name of the David Beazley or Dabeaz LLC may be used to 186498Snate@binkert.org# endorse or promote products derived from this software without 196498Snate@binkert.org# specific prior written permission. 202632SN/A# 216498Snate@binkert.org# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 226498Snate@binkert.org# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 236498Snate@binkert.org# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 246498Snate@binkert.org# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 256498Snate@binkert.org# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 266498Snate@binkert.org# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 276498Snate@binkert.org# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 286498Snate@binkert.org# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 296498Snate@binkert.org# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 306498Snate@binkert.org# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 316498Snate@binkert.org# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 326498Snate@binkert.org# ----------------------------------------------------------------------------- 332632SN/A# 342632SN/A# This implements an LR parser that is constructed from grammar rules defined 354479Sbinkertn@umich.edu# as Python functions. The grammer is specified by supplying the BNF inside 364479Sbinkertn@umich.edu# Python documentation strings. The inspiration for this technique was borrowed 374479Sbinkertn@umich.edu# from John Aycock's Spark parsing system. PLY might be viewed as cross between 384479Sbinkertn@umich.edu# Spark and the GNU bison utility. 392632SN/A# 402632SN/A# The current implementation is only somewhat object-oriented. The 412632SN/A# LR parser itself is defined in terms of an object (which allows multiple 422632SN/A# parsers to co-exist). However, most of the variables used during table 432632SN/A# construction are defined in terms of global variables. Users shouldn't 442632SN/A# notice unless they are trying to define multiple parsers at the same 452632SN/A# time using threads (in which case they should have their head examined). 464479Sbinkertn@umich.edu# 474479Sbinkertn@umich.edu# This implementation supports both SLR and LALR(1) parsing. LALR(1) 484479Sbinkertn@umich.edu# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), 494479Sbinkertn@umich.edu# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, 504479Sbinkertn@umich.edu# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced 514479Sbinkertn@umich.edu# by the more efficient DeRemer and Pennello algorithm. 524479Sbinkertn@umich.edu# 534479Sbinkertn@umich.edu# :::::::: WARNING ::::::: 544479Sbinkertn@umich.edu# 554479Sbinkertn@umich.edu# Construction of LR parsing tables is fairly complicated and expensive. 564479Sbinkertn@umich.edu# To make this module run fast, a *LOT* of work has been put into 574479Sbinkertn@umich.edu# optimization---often at the expensive of readability and what might 584479Sbinkertn@umich.edu# consider to be good Python "coding style." Modify the code at your 594479Sbinkertn@umich.edu# own risk! 604479Sbinkertn@umich.edu# ---------------------------------------------------------------------------- 612632SN/A 626498Snate@binkert.org__version__ = "3.2" 636498Snate@binkert.org__tabversion__ = "3.2" # Table version 642632SN/A 652632SN/A#----------------------------------------------------------------------------- 662632SN/A# === User configurable parameters === 672632SN/A# 682632SN/A# Change these to modify the default behavior of yacc (if you wish) 692632SN/A#----------------------------------------------------------------------------- 702632SN/A 7110182SCurtis.Dunham@arm.comyaccdebug = 0 # Debugging mode. If set, yacc generates a 722632SN/A # a 'parser.out' file in the current directory 732632SN/A 742632SN/Adebug_file = 'parser.out' # Default name of the debugging file 752632SN/Atab_module = 'parsetab' # Default name of the table module 764479Sbinkertn@umich.edudefault_lr = 'LALR' # Default LR table generation method 772632SN/A 782632SN/Aerror_count = 3 # Number of symbols that must be shifted to leave recovery mode 792632SN/A 806498Snate@binkert.orgyaccdevel = 0 # Set to True if developing yacc. This turns off optimized 816498Snate@binkert.org # implementations of certain functions. 826498Snate@binkert.org 836498Snate@binkert.orgresultlimit = 40 # Size limit of results when running in debug mode. 846498Snate@binkert.org 856498Snate@binkert.orgpickle_protocol = 0 # Protocol to use when writing pickle files 866498Snate@binkert.org 876498Snate@binkert.orgimport re, types, sys, os.path 886498Snate@binkert.org 896498Snate@binkert.org# Compatibility function for python 2.6/3.0 906498Snate@binkert.orgif sys.version_info[0] < 3: 916498Snate@binkert.org def func_code(f): 926498Snate@binkert.org return f.func_code 936498Snate@binkert.orgelse: 946498Snate@binkert.org def func_code(f): 956498Snate@binkert.org return f.__code__ 966498Snate@binkert.org 976498Snate@binkert.org# Compatibility 986498Snate@binkert.orgtry: 996498Snate@binkert.org MAXINT = sys.maxint 1006498Snate@binkert.orgexcept AttributeError: 1016498Snate@binkert.org MAXINT = sys.maxsize 1026498Snate@binkert.org 1036498Snate@binkert.org# Python 2.x/3.0 compatibility. 1046498Snate@binkert.orgdef load_ply_lex(): 1056498Snate@binkert.org if sys.version_info[0] < 3: 1066498Snate@binkert.org import lex 1076498Snate@binkert.org else: 1086498Snate@binkert.org import ply.lex as lex 1096498Snate@binkert.org return lex 1106498Snate@binkert.org 1116498Snate@binkert.org# This object is a stand-in for a logging object created by the 1126498Snate@binkert.org# logging module. PLY will use this by default to create things 1136498Snate@binkert.org# such as the parser.out file. If a user wants more detailed 1146498Snate@binkert.org# information, they can create their own logging object and pass 1156498Snate@binkert.org# it into PLY. 1166498Snate@binkert.org 1176498Snate@binkert.orgclass PlyLogger(object): 1186498Snate@binkert.org def __init__(self,f): 1196498Snate@binkert.org self.f = f 1206498Snate@binkert.org def debug(self,msg,*args,**kwargs): 1216498Snate@binkert.org self.f.write((msg % args) + "\n") 1226498Snate@binkert.org info = debug 1236498Snate@binkert.org 1246498Snate@binkert.org def warning(self,msg,*args,**kwargs): 1256498Snate@binkert.org self.f.write("WARNING: "+ (msg % args) + "\n") 1266498Snate@binkert.org 1276498Snate@binkert.org def error(self,msg,*args,**kwargs): 1286498Snate@binkert.org self.f.write("ERROR: " + (msg % args) + "\n") 1296498Snate@binkert.org 1306498Snate@binkert.org critical = debug 1316498Snate@binkert.org 1326498Snate@binkert.org# Null logger is used when no output is generated. Does nothing. 1336498Snate@binkert.orgclass NullLogger(object): 1346498Snate@binkert.org def __getattribute__(self,name): 1356498Snate@binkert.org return self 1366498Snate@binkert.org def __call__(self,*args,**kwargs): 1376498Snate@binkert.org return self 1386498Snate@binkert.org 1392632SN/A# Exception raised for yacc-related errors 1402632SN/Aclass YaccError(Exception): pass 1412632SN/A 1426498Snate@binkert.org# Format the result message that the parser produces when running in debug mode. 1436498Snate@binkert.orgdef format_result(r): 1446498Snate@binkert.org repr_str = repr(r) 1456498Snate@binkert.org if '\n' in repr_str: repr_str = repr(repr_str) 1466498Snate@binkert.org if len(repr_str) > resultlimit: 1476498Snate@binkert.org repr_str = repr_str[:resultlimit]+" ..." 1486498Snate@binkert.org result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str) 1496498Snate@binkert.org return result 1506498Snate@binkert.org 1516498Snate@binkert.org 1526498Snate@binkert.org# Format stack entries when the parser is running in debug mode 1536498Snate@binkert.orgdef format_stack_entry(r): 1546498Snate@binkert.org repr_str = repr(r) 1556498Snate@binkert.org if '\n' in repr_str: repr_str = repr(repr_str) 1566498Snate@binkert.org if len(repr_str) < 16: 1576498Snate@binkert.org return repr_str 1586498Snate@binkert.org else: 1596498Snate@binkert.org return "<%s @ 0x%x>" % (type(r).__name__,id(r)) 1604479Sbinkertn@umich.edu 1612632SN/A#----------------------------------------------------------------------------- 1622632SN/A# === LR Parsing Engine === 1632632SN/A# 1642632SN/A# The following classes are used for the LR parser itself. These are not 1652632SN/A# used during table construction and are independent of the actual LR 1662632SN/A# table generation algorithm 1672632SN/A#----------------------------------------------------------------------------- 1682632SN/A 1692632SN/A# This class is used to hold non-terminal grammar symbols during parsing. 1702632SN/A# It normally has the following attributes set: 1712632SN/A# .type = Grammar symbol type 1722632SN/A# .value = Symbol value 1732632SN/A# .lineno = Starting line number 1742632SN/A# .endlineno = Ending line number (optional, set automatically) 1754479Sbinkertn@umich.edu# .lexpos = Starting lex position 1764479Sbinkertn@umich.edu# .endlexpos = Ending lex position (optional, set automatically) 1772632SN/A 1786498Snate@binkert.orgclass YaccSymbol: 1792632SN/A def __str__(self): return self.type 1802632SN/A def __repr__(self): return str(self) 1812632SN/A 1822632SN/A# This class is a wrapper around the objects actually passed to each 1832632SN/A# grammar rule. Index lookup and assignment actually assign the 1842632SN/A# .value attribute of the underlying YaccSymbol object. 1852632SN/A# The lineno() method returns the line number of a given 1862632SN/A# item (or 0 if not defined). The linespan() method returns 1872632SN/A# a tuple of (startline,endline) representing the range of lines 1884479Sbinkertn@umich.edu# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) 1894479Sbinkertn@umich.edu# representing the range of positional information for a symbol. 1902632SN/A 1914479Sbinkertn@umich.educlass YaccProduction: 1924479Sbinkertn@umich.edu def __init__(self,s,stack=None): 1932632SN/A self.slice = s 1944479Sbinkertn@umich.edu self.stack = stack 1956498Snate@binkert.org self.lexer = None 1966498Snate@binkert.org self.parser= None 1972632SN/A def __getitem__(self,n): 1984479Sbinkertn@umich.edu if n >= 0: return self.slice[n].value 1994479Sbinkertn@umich.edu else: return self.stack[n].value 2002632SN/A 2012632SN/A def __setitem__(self,n,v): 2022632SN/A self.slice[n].value = v 2032632SN/A 2044479Sbinkertn@umich.edu def __getslice__(self,i,j): 2054479Sbinkertn@umich.edu return [s.value for s in self.slice[i:j]] 2064479Sbinkertn@umich.edu 2072632SN/A def __len__(self): 2082632SN/A return len(self.slice) 2092632SN/A 2102632SN/A def lineno(self,n): 2112632SN/A return getattr(self.slice[n],"lineno",0) 2122632SN/A 2136498Snate@binkert.org def set_lineno(self,n,lineno): 2146498Snate@binkert.org self.slice[n].lineno = n 2156498Snate@binkert.org 2162632SN/A def linespan(self,n): 2172632SN/A startline = getattr(self.slice[n],"lineno",0) 2182632SN/A endline = getattr(self.slice[n],"endlineno",startline) 2192632SN/A return startline,endline 2202632SN/A 2214479Sbinkertn@umich.edu def lexpos(self,n): 2224479Sbinkertn@umich.edu return getattr(self.slice[n],"lexpos",0) 2234479Sbinkertn@umich.edu 2244479Sbinkertn@umich.edu def lexspan(self,n): 2254479Sbinkertn@umich.edu startpos = getattr(self.slice[n],"lexpos",0) 2264479Sbinkertn@umich.edu endpos = getattr(self.slice[n],"endlexpos",startpos) 2274479Sbinkertn@umich.edu return startpos,endpos 2284479Sbinkertn@umich.edu 2296498Snate@binkert.org def error(self): 2306498Snate@binkert.org raise SyntaxError 2316498Snate@binkert.org 2326498Snate@binkert.org 2336498Snate@binkert.org# ----------------------------------------------------------------------------- 2346498Snate@binkert.org# == LRParser == 2356498Snate@binkert.org# 2366498Snate@binkert.org# The LR Parsing engine. 2376498Snate@binkert.org# ----------------------------------------------------------------------------- 2386498Snate@binkert.org 2396498Snate@binkert.orgclass LRParser: 2406498Snate@binkert.org def __init__(self,lrtab,errorf): 2416498Snate@binkert.org self.productions = lrtab.lr_productions 2426498Snate@binkert.org self.action = lrtab.lr_action 2436498Snate@binkert.org self.goto = lrtab.lr_goto 2446498Snate@binkert.org self.errorfunc = errorf 2452632SN/A 2462632SN/A def errok(self): 2474479Sbinkertn@umich.edu self.errorok = 1 2482632SN/A 2492632SN/A def restart(self): 2502632SN/A del self.statestack[:] 2512632SN/A del self.symstack[:] 2522632SN/A sym = YaccSymbol() 2534479Sbinkertn@umich.edu sym.type = '$end' 2542632SN/A self.symstack.append(sym) 2552632SN/A self.statestack.append(0) 2562632SN/A 2576498Snate@binkert.org def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): 2586498Snate@binkert.org if debug or yaccdevel: 2596498Snate@binkert.org if isinstance(debug,int): 2606498Snate@binkert.org debug = PlyLogger(sys.stderr) 2616498Snate@binkert.org return self.parsedebug(input,lexer,debug,tracking,tokenfunc) 2626498Snate@binkert.org elif tracking: 2636498Snate@binkert.org return self.parseopt(input,lexer,debug,tracking,tokenfunc) 2646498Snate@binkert.org else: 2656498Snate@binkert.org return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc) 2666498Snate@binkert.org 2676498Snate@binkert.org 2686498Snate@binkert.org # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 2696498Snate@binkert.org # parsedebug(). 2706498Snate@binkert.org # 2716498Snate@binkert.org # This is the debugging enabled version of parse(). All changes made to the 2726498Snate@binkert.org # parsing engine should be made here. For the non-debugging version, 2736498Snate@binkert.org # copy this code to a method parseopt() and delete all of the sections 2746498Snate@binkert.org # enclosed in: 2756498Snate@binkert.org # 2766498Snate@binkert.org # #--! DEBUG 2776498Snate@binkert.org # statements 2786498Snate@binkert.org # #--! DEBUG 2796498Snate@binkert.org # 2806498Snate@binkert.org # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 2816498Snate@binkert.org 2826498Snate@binkert.org def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): 2832632SN/A lookahead = None # Current lookahead symbol 2842632SN/A lookaheadstack = [ ] # Stack of lookahead symbols 2856498Snate@binkert.org actions = self.action # Local reference to action table (to avoid lookup on self.) 2866498Snate@binkert.org goto = self.goto # Local reference to goto table (to avoid lookup on self.) 2876498Snate@binkert.org prod = self.productions # Local reference to production list (to avoid lookup on self.) 2884479Sbinkertn@umich.edu pslice = YaccProduction(None) # Production object passed to grammar rules 2896498Snate@binkert.org errorcount = 0 # Used during error recovery 2906498Snate@binkert.org 2916498Snate@binkert.org # --! DEBUG 2926498Snate@binkert.org debug.info("PLY: PARSE DEBUG START") 2936498Snate@binkert.org # --! DEBUG 2942632SN/A 2952632SN/A # If no lexer was given, we will try to use the lex module 2962632SN/A if not lexer: 2976498Snate@binkert.org lex = load_ply_lex() 2984479Sbinkertn@umich.edu lexer = lex.lexer 2992632SN/A 3006498Snate@binkert.org # Set up the lexer and parser objects on pslice 3012632SN/A pslice.lexer = lexer 3024479Sbinkertn@umich.edu pslice.parser = self 3032632SN/A 3042632SN/A # If input was supplied, pass to lexer 3056498Snate@binkert.org if input is not None: 3062632SN/A lexer.input(input) 3072632SN/A 3086498Snate@binkert.org if tokenfunc is None: 3096498Snate@binkert.org # Tokenize function 3106498Snate@binkert.org get_token = lexer.token 3116498Snate@binkert.org else: 3126498Snate@binkert.org get_token = tokenfunc 3136498Snate@binkert.org 3146498Snate@binkert.org # Set up the state and symbol stacks 3156498Snate@binkert.org 3166498Snate@binkert.org statestack = [ ] # Stack of parsing states 3176498Snate@binkert.org self.statestack = statestack 3186498Snate@binkert.org symstack = [ ] # Stack of grammar symbols 3196498Snate@binkert.org self.symstack = symstack 3206498Snate@binkert.org 3216498Snate@binkert.org pslice.stack = symstack # Put in the production 3226498Snate@binkert.org errtoken = None # Err token 3236498Snate@binkert.org 3246498Snate@binkert.org # The start state is assumed to be (0,$end) 3256498Snate@binkert.org 3266498Snate@binkert.org statestack.append(0) 3276498Snate@binkert.org sym = YaccSymbol() 3286498Snate@binkert.org sym.type = "$end" 3296498Snate@binkert.org symstack.append(sym) 3306498Snate@binkert.org state = 0 3316498Snate@binkert.org while 1: 3326498Snate@binkert.org # Get the next symbol on the input. If a lookahead symbol 3336498Snate@binkert.org # is already set, we just use that. Otherwise, we'll pull 3346498Snate@binkert.org # the next token off of the lookaheadstack or from the lexer 3356498Snate@binkert.org 3366498Snate@binkert.org # --! DEBUG 3376498Snate@binkert.org debug.debug('') 3386498Snate@binkert.org debug.debug('State : %s', state) 3396498Snate@binkert.org # --! DEBUG 3406498Snate@binkert.org 3416498Snate@binkert.org if not lookahead: 3426498Snate@binkert.org if not lookaheadstack: 3436498Snate@binkert.org lookahead = get_token() # Get the next token 3446498Snate@binkert.org else: 3456498Snate@binkert.org lookahead = lookaheadstack.pop() 3466498Snate@binkert.org if not lookahead: 3476498Snate@binkert.org lookahead = YaccSymbol() 3486498Snate@binkert.org lookahead.type = "$end" 3496498Snate@binkert.org 3506498Snate@binkert.org # --! DEBUG 3516498Snate@binkert.org debug.debug('Stack : %s', 3526498Snate@binkert.org ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 3536498Snate@binkert.org # --! DEBUG 3546498Snate@binkert.org 3556498Snate@binkert.org # Check the action table 3566498Snate@binkert.org ltype = lookahead.type 3576498Snate@binkert.org t = actions[state].get(ltype) 3586498Snate@binkert.org 3596498Snate@binkert.org if t is not None: 3606498Snate@binkert.org if t > 0: 3616498Snate@binkert.org # shift a symbol on the stack 3626498Snate@binkert.org statestack.append(t) 3636498Snate@binkert.org state = t 3646498Snate@binkert.org 3656498Snate@binkert.org # --! DEBUG 3666498Snate@binkert.org debug.debug("Action : Shift and goto state %s", t) 3676498Snate@binkert.org # --! DEBUG 3686498Snate@binkert.org 3696498Snate@binkert.org symstack.append(lookahead) 3706498Snate@binkert.org lookahead = None 3716498Snate@binkert.org 3726498Snate@binkert.org # Decrease error count on successful shift 3736498Snate@binkert.org if errorcount: errorcount -=1 3746498Snate@binkert.org continue 3756498Snate@binkert.org 3766498Snate@binkert.org if t < 0: 3776498Snate@binkert.org # reduce a symbol on the stack, emit a production 3786498Snate@binkert.org p = prod[-t] 3796498Snate@binkert.org pname = p.name 3806498Snate@binkert.org plen = p.len 3816498Snate@binkert.org 3826498Snate@binkert.org # Get production function 3836498Snate@binkert.org sym = YaccSymbol() 3846498Snate@binkert.org sym.type = pname # Production name 3856498Snate@binkert.org sym.value = None 3866498Snate@binkert.org 3876498Snate@binkert.org # --! DEBUG 3886498Snate@binkert.org if plen: 3896498Snate@binkert.org debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t) 3906498Snate@binkert.org else: 3916498Snate@binkert.org debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t) 3926498Snate@binkert.org 3936498Snate@binkert.org # --! DEBUG 3946498Snate@binkert.org 3956498Snate@binkert.org if plen: 3966498Snate@binkert.org targ = symstack[-plen-1:] 3976498Snate@binkert.org targ[0] = sym 3986498Snate@binkert.org 3996498Snate@binkert.org # --! TRACKING 4006498Snate@binkert.org if tracking: 4016498Snate@binkert.org t1 = targ[1] 4026498Snate@binkert.org sym.lineno = t1.lineno 4036498Snate@binkert.org sym.lexpos = t1.lexpos 4046498Snate@binkert.org t1 = targ[-1] 4056498Snate@binkert.org sym.endlineno = getattr(t1,"endlineno",t1.lineno) 4066498Snate@binkert.org sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) 4076498Snate@binkert.org 4086498Snate@binkert.org # --! TRACKING 4096498Snate@binkert.org 4106498Snate@binkert.org # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 4116498Snate@binkert.org # The code enclosed in this section is duplicated 4126498Snate@binkert.org # below as a performance optimization. Make sure 4136498Snate@binkert.org # changes get made in both locations. 4146498Snate@binkert.org 4156498Snate@binkert.org pslice.slice = targ 4166498Snate@binkert.org 4176498Snate@binkert.org try: 4186498Snate@binkert.org # Call the grammar rule with our special slice object 4196498Snate@binkert.org del symstack[-plen:] 4206498Snate@binkert.org del statestack[-plen:] 4216498Snate@binkert.org p.callable(pslice) 4226498Snate@binkert.org # --! DEBUG 4236498Snate@binkert.org debug.info("Result : %s", format_result(pslice[0])) 4246498Snate@binkert.org # --! DEBUG 4256498Snate@binkert.org symstack.append(sym) 4266498Snate@binkert.org state = goto[statestack[-1]][pname] 4276498Snate@binkert.org statestack.append(state) 4286498Snate@binkert.org except SyntaxError: 4296498Snate@binkert.org # If an error was set. Enter error recovery state 4306498Snate@binkert.org lookaheadstack.append(lookahead) 4316498Snate@binkert.org symstack.pop() 4326498Snate@binkert.org statestack.pop() 4336498Snate@binkert.org state = statestack[-1] 4346498Snate@binkert.org sym.type = 'error' 4356498Snate@binkert.org lookahead = sym 4366498Snate@binkert.org errorcount = error_count 4376498Snate@binkert.org self.errorok = 0 4386498Snate@binkert.org continue 4396498Snate@binkert.org # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 4406498Snate@binkert.org 4416498Snate@binkert.org else: 4426498Snate@binkert.org 4436498Snate@binkert.org # --! TRACKING 4446498Snate@binkert.org if tracking: 4456498Snate@binkert.org sym.lineno = lexer.lineno 4466498Snate@binkert.org sym.lexpos = lexer.lexpos 4476498Snate@binkert.org # --! TRACKING 4486498Snate@binkert.org 4496498Snate@binkert.org targ = [ sym ] 4506498Snate@binkert.org 4516498Snate@binkert.org # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 4526498Snate@binkert.org # The code enclosed in this section is duplicated 4536498Snate@binkert.org # above as a performance optimization. Make sure 4546498Snate@binkert.org # changes get made in both locations. 4556498Snate@binkert.org 4566498Snate@binkert.org pslice.slice = targ 4576498Snate@binkert.org 4586498Snate@binkert.org try: 4596498Snate@binkert.org # Call the grammar rule with our special slice object 4606498Snate@binkert.org p.callable(pslice) 4616498Snate@binkert.org # --! DEBUG 4626498Snate@binkert.org debug.info("Result : %s", format_result(pslice[0])) 4636498Snate@binkert.org # --! DEBUG 4646498Snate@binkert.org symstack.append(sym) 4656498Snate@binkert.org state = goto[statestack[-1]][pname] 4666498Snate@binkert.org statestack.append(state) 4676498Snate@binkert.org except SyntaxError: 4686498Snate@binkert.org # If an error was set. Enter error recovery state 4696498Snate@binkert.org lookaheadstack.append(lookahead) 4706498Snate@binkert.org symstack.pop() 4716498Snate@binkert.org statestack.pop() 4726498Snate@binkert.org state = statestack[-1] 4736498Snate@binkert.org sym.type = 'error' 4746498Snate@binkert.org lookahead = sym 4756498Snate@binkert.org errorcount = error_count 4766498Snate@binkert.org self.errorok = 0 4776498Snate@binkert.org continue 4786498Snate@binkert.org # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 4796498Snate@binkert.org 4806498Snate@binkert.org if t == 0: 4816498Snate@binkert.org n = symstack[-1] 4826498Snate@binkert.org result = getattr(n,"value",None) 4836498Snate@binkert.org # --! DEBUG 4846498Snate@binkert.org debug.info("Done : Returning %s", format_result(result)) 4856498Snate@binkert.org debug.info("PLY: PARSE DEBUG END") 4866498Snate@binkert.org # --! DEBUG 4876498Snate@binkert.org return result 4886498Snate@binkert.org 4896498Snate@binkert.org if t == None: 4906498Snate@binkert.org 4916498Snate@binkert.org # --! DEBUG 4926498Snate@binkert.org debug.error('Error : %s', 4936498Snate@binkert.org ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 4946498Snate@binkert.org # --! DEBUG 4956498Snate@binkert.org 4966498Snate@binkert.org # We have some kind of parsing error here. To handle 4976498Snate@binkert.org # this, we are going to push the current token onto 4986498Snate@binkert.org # the tokenstack and replace it with an 'error' token. 4996498Snate@binkert.org # If there are any synchronization rules, they may 5006498Snate@binkert.org # catch it. 5016498Snate@binkert.org # 5026498Snate@binkert.org # In addition to pushing the error token, we call call 5036498Snate@binkert.org # the user defined p_error() function if this is the 5046498Snate@binkert.org # first syntax error. This function is only called if 5056498Snate@binkert.org # errorcount == 0. 5066498Snate@binkert.org if errorcount == 0 or self.errorok: 5076498Snate@binkert.org errorcount = error_count 5086498Snate@binkert.org self.errorok = 0 5096498Snate@binkert.org errtoken = lookahead 5106498Snate@binkert.org if errtoken.type == "$end": 5116498Snate@binkert.org errtoken = None # End of file! 5126498Snate@binkert.org if self.errorfunc: 5136498Snate@binkert.org global errok,token,restart 5146498Snate@binkert.org errok = self.errok # Set some special functions available in error recovery 5156498Snate@binkert.org token = get_token 5166498Snate@binkert.org restart = self.restart 5176498Snate@binkert.org if errtoken and not hasattr(errtoken,'lexer'): 5186498Snate@binkert.org errtoken.lexer = lexer 5196498Snate@binkert.org tok = self.errorfunc(errtoken) 5206498Snate@binkert.org del errok, token, restart # Delete special functions 5216498Snate@binkert.org 5226498Snate@binkert.org if self.errorok: 5236498Snate@binkert.org # User must have done some kind of panic 5246498Snate@binkert.org # mode recovery on their own. The 5256498Snate@binkert.org # returned token is the next lookahead 5266498Snate@binkert.org lookahead = tok 5276498Snate@binkert.org errtoken = None 5286498Snate@binkert.org continue 5296498Snate@binkert.org else: 5306498Snate@binkert.org if errtoken: 5316498Snate@binkert.org if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 5326498Snate@binkert.org else: lineno = 0 5336498Snate@binkert.org if lineno: 5346498Snate@binkert.org sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 5356498Snate@binkert.org else: 5366498Snate@binkert.org sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 5376498Snate@binkert.org else: 5386498Snate@binkert.org sys.stderr.write("yacc: Parse error in input. EOF\n") 5396498Snate@binkert.org return 5406498Snate@binkert.org 5416498Snate@binkert.org else: 5426498Snate@binkert.org errorcount = error_count 5436498Snate@binkert.org 5446498Snate@binkert.org # case 1: the statestack only has 1 entry on it. If we're in this state, the 5456498Snate@binkert.org # entire parse has been rolled back and we're completely hosed. The token is 5466498Snate@binkert.org # discarded and we just keep going. 5476498Snate@binkert.org 5486498Snate@binkert.org if len(statestack) <= 1 and lookahead.type != "$end": 5496498Snate@binkert.org lookahead = None 5506498Snate@binkert.org errtoken = None 5516498Snate@binkert.org state = 0 5526498Snate@binkert.org # Nuke the pushback stack 5536498Snate@binkert.org del lookaheadstack[:] 5546498Snate@binkert.org continue 5556498Snate@binkert.org 5566498Snate@binkert.org # case 2: the statestack has a couple of entries on it, but we're 5576498Snate@binkert.org # at the end of the file. nuke the top entry and generate an error token 5586498Snate@binkert.org 5596498Snate@binkert.org # Start nuking entries on the stack 5606498Snate@binkert.org if lookahead.type == "$end": 5616498Snate@binkert.org # Whoa. We're really hosed here. Bail out 5626498Snate@binkert.org return 5636498Snate@binkert.org 5646498Snate@binkert.org if lookahead.type != 'error': 5656498Snate@binkert.org sym = symstack[-1] 5666498Snate@binkert.org if sym.type == 'error': 5676498Snate@binkert.org # Hmmm. Error is on top of stack, we'll just nuke input 5686498Snate@binkert.org # symbol and continue 5696498Snate@binkert.org lookahead = None 5706498Snate@binkert.org continue 5716498Snate@binkert.org t = YaccSymbol() 5726498Snate@binkert.org t.type = 'error' 5736498Snate@binkert.org if hasattr(lookahead,"lineno"): 5746498Snate@binkert.org t.lineno = lookahead.lineno 5756498Snate@binkert.org t.value = lookahead 5766498Snate@binkert.org lookaheadstack.append(lookahead) 5776498Snate@binkert.org lookahead = t 5786498Snate@binkert.org else: 5796498Snate@binkert.org symstack.pop() 5806498Snate@binkert.org statestack.pop() 5816498Snate@binkert.org state = statestack[-1] # Potential bug fix 5826498Snate@binkert.org 5836498Snate@binkert.org continue 5846498Snate@binkert.org 5856498Snate@binkert.org # Call an error function here 5866498Snate@binkert.org raise RuntimeError("yacc: internal parser error!!!\n") 5876498Snate@binkert.org 5886498Snate@binkert.org # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 5896498Snate@binkert.org # parseopt(). 5906498Snate@binkert.org # 5916498Snate@binkert.org # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY. 5926498Snate@binkert.org # Edit the debug version above, then copy any modifications to the method 5936498Snate@binkert.org # below while removing #--! DEBUG sections. 5946498Snate@binkert.org # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 5956498Snate@binkert.org 5966498Snate@binkert.org 5976498Snate@binkert.org def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): 5986498Snate@binkert.org lookahead = None # Current lookahead symbol 5996498Snate@binkert.org lookaheadstack = [ ] # Stack of lookahead symbols 6006498Snate@binkert.org actions = self.action # Local reference to action table (to avoid lookup on self.) 6016498Snate@binkert.org goto = self.goto # Local reference to goto table (to avoid lookup on self.) 6026498Snate@binkert.org prod = self.productions # Local reference to production list (to avoid lookup on self.) 6036498Snate@binkert.org pslice = YaccProduction(None) # Production object passed to grammar rules 6046498Snate@binkert.org errorcount = 0 # Used during error recovery 6056498Snate@binkert.org 6066498Snate@binkert.org # If no lexer was given, we will try to use the lex module 6076498Snate@binkert.org if not lexer: 6086498Snate@binkert.org lex = load_ply_lex() 6096498Snate@binkert.org lexer = lex.lexer 6106498Snate@binkert.org 6116498Snate@binkert.org # Set up the lexer and parser objects on pslice 6126498Snate@binkert.org pslice.lexer = lexer 6136498Snate@binkert.org pslice.parser = self 6146498Snate@binkert.org 6156498Snate@binkert.org # If input was supplied, pass to lexer 6166498Snate@binkert.org if input is not None: 6176498Snate@binkert.org lexer.input(input) 6186498Snate@binkert.org 6196498Snate@binkert.org if tokenfunc is None: 6206498Snate@binkert.org # Tokenize function 6216498Snate@binkert.org get_token = lexer.token 6226498Snate@binkert.org else: 6236498Snate@binkert.org get_token = tokenfunc 6246498Snate@binkert.org 6256498Snate@binkert.org # Set up the state and symbol stacks 6262632SN/A 6272632SN/A statestack = [ ] # Stack of parsing states 6282632SN/A self.statestack = statestack 6292632SN/A symstack = [ ] # Stack of grammar symbols 6302632SN/A self.symstack = symstack 6312632SN/A 6324479Sbinkertn@umich.edu pslice.stack = symstack # Put in the production 6332632SN/A errtoken = None # Err token 6342632SN/A 6354479Sbinkertn@umich.edu # The start state is assumed to be (0,$end) 6364479Sbinkertn@umich.edu 6372632SN/A statestack.append(0) 6382632SN/A sym = YaccSymbol() 6394479Sbinkertn@umich.edu sym.type = '$end' 6402632SN/A symstack.append(sym) 6414479Sbinkertn@umich.edu state = 0 6422632SN/A while 1: 6432632SN/A # Get the next symbol on the input. If a lookahead symbol 6442632SN/A # is already set, we just use that. Otherwise, we'll pull 6452632SN/A # the next token off of the lookaheadstack or from the lexer 6466498Snate@binkert.org 6472632SN/A if not lookahead: 6482632SN/A if not lookaheadstack: 6492632SN/A lookahead = get_token() # Get the next token 6502632SN/A else: 6512632SN/A lookahead = lookaheadstack.pop() 6522632SN/A if not lookahead: 6532632SN/A lookahead = YaccSymbol() 6544479Sbinkertn@umich.edu lookahead.type = '$end' 6552632SN/A 6562632SN/A # Check the action table 6572632SN/A ltype = lookahead.type 6584479Sbinkertn@umich.edu t = actions[state].get(ltype) 6592632SN/A 6602632SN/A if t is not None: 6612632SN/A if t > 0: 6622632SN/A # shift a symbol on the stack 6632632SN/A statestack.append(t) 6644479Sbinkertn@umich.edu state = t 6656498Snate@binkert.org 6662632SN/A symstack.append(lookahead) 6672632SN/A lookahead = None 6682632SN/A 6692632SN/A # Decrease error count on successful shift 6704479Sbinkertn@umich.edu if errorcount: errorcount -=1 6712632SN/A continue 6722632SN/A 6732632SN/A if t < 0: 6742632SN/A # reduce a symbol on the stack, emit a production 6752632SN/A p = prod[-t] 6762632SN/A pname = p.name 6772632SN/A plen = p.len 6782632SN/A 6792632SN/A # Get production function 6802632SN/A sym = YaccSymbol() 6812632SN/A sym.type = pname # Production name 6822632SN/A sym.value = None 6832632SN/A 6842632SN/A if plen: 6852632SN/A targ = symstack[-plen-1:] 6862632SN/A targ[0] = sym 6876498Snate@binkert.org 6886498Snate@binkert.org # --! TRACKING 6894479Sbinkertn@umich.edu if tracking: 6904479Sbinkertn@umich.edu t1 = targ[1] 6914479Sbinkertn@umich.edu sym.lineno = t1.lineno 6924479Sbinkertn@umich.edu sym.lexpos = t1.lexpos 6934479Sbinkertn@umich.edu t1 = targ[-1] 6944479Sbinkertn@umich.edu sym.endlineno = getattr(t1,"endlineno",t1.lineno) 6954479Sbinkertn@umich.edu sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) 6966498Snate@binkert.org 6976498Snate@binkert.org # --! TRACKING 6986498Snate@binkert.org 6996498Snate@binkert.org # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 7006498Snate@binkert.org # The code enclosed in this section is duplicated 7016498Snate@binkert.org # below as a performance optimization. Make sure 7026498Snate@binkert.org # changes get made in both locations. 7036498Snate@binkert.org 7046498Snate@binkert.org pslice.slice = targ 7056498Snate@binkert.org 7066498Snate@binkert.org try: 7076498Snate@binkert.org # Call the grammar rule with our special slice object 7086498Snate@binkert.org del symstack[-plen:] 7096498Snate@binkert.org del statestack[-plen:] 7106498Snate@binkert.org p.callable(pslice) 7116498Snate@binkert.org symstack.append(sym) 7126498Snate@binkert.org state = goto[statestack[-1]][pname] 7136498Snate@binkert.org statestack.append(state) 7146498Snate@binkert.org except SyntaxError: 7156498Snate@binkert.org # If an error was set. Enter error recovery state 7166498Snate@binkert.org lookaheadstack.append(lookahead) 7176498Snate@binkert.org symstack.pop() 7186498Snate@binkert.org statestack.pop() 7196498Snate@binkert.org state = statestack[-1] 7206498Snate@binkert.org sym.type = 'error' 7216498Snate@binkert.org lookahead = sym 7226498Snate@binkert.org errorcount = error_count 7236498Snate@binkert.org self.errorok = 0 7246498Snate@binkert.org continue 7256498Snate@binkert.org # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 7266498Snate@binkert.org 7272632SN/A else: 7286498Snate@binkert.org 7296498Snate@binkert.org # --! TRACKING 7304479Sbinkertn@umich.edu if tracking: 7314479Sbinkertn@umich.edu sym.lineno = lexer.lineno 7324479Sbinkertn@umich.edu sym.lexpos = lexer.lexpos 7336498Snate@binkert.org # --! TRACKING 7346498Snate@binkert.org 7352632SN/A targ = [ sym ] 7366498Snate@binkert.org 7376498Snate@binkert.org # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 7386498Snate@binkert.org # The code enclosed in this section is duplicated 7396498Snate@binkert.org # above as a performance optimization. Make sure 7406498Snate@binkert.org # changes get made in both locations. 7416498Snate@binkert.org 7426498Snate@binkert.org pslice.slice = targ 7436498Snate@binkert.org 7446498Snate@binkert.org try: 7456498Snate@binkert.org # Call the grammar rule with our special slice object 7466498Snate@binkert.org p.callable(pslice) 7476498Snate@binkert.org symstack.append(sym) 7486498Snate@binkert.org state = goto[statestack[-1]][pname] 7496498Snate@binkert.org statestack.append(state) 7506498Snate@binkert.org except SyntaxError: 7516498Snate@binkert.org # If an error was set. Enter error recovery state 7526498Snate@binkert.org lookaheadstack.append(lookahead) 7536498Snate@binkert.org symstack.pop() 7546498Snate@binkert.org statestack.pop() 7556498Snate@binkert.org state = statestack[-1] 7566498Snate@binkert.org sym.type = 'error' 7576498Snate@binkert.org lookahead = sym 7586498Snate@binkert.org errorcount = error_count 7596498Snate@binkert.org self.errorok = 0 7606498Snate@binkert.org continue 7616498Snate@binkert.org # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 7622632SN/A 7632632SN/A if t == 0: 7642632SN/A n = symstack[-1] 7652632SN/A return getattr(n,"value",None) 7662632SN/A 7672632SN/A if t == None: 7686498Snate@binkert.org 7694479Sbinkertn@umich.edu # We have some kind of parsing error here. To handle 7704479Sbinkertn@umich.edu # this, we are going to push the current token onto 7714479Sbinkertn@umich.edu # the tokenstack and replace it with an 'error' token. 7724479Sbinkertn@umich.edu # If there are any synchronization rules, they may 7734479Sbinkertn@umich.edu # catch it. 7742632SN/A # 7754479Sbinkertn@umich.edu # In addition to pushing the error token, we call call 7764479Sbinkertn@umich.edu # the user defined p_error() function if this is the 7774479Sbinkertn@umich.edu # first syntax error. This function is only called if 7784479Sbinkertn@umich.edu # errorcount == 0. 7794479Sbinkertn@umich.edu if errorcount == 0 or self.errorok: 7804479Sbinkertn@umich.edu errorcount = error_count 7814479Sbinkertn@umich.edu self.errorok = 0 7822632SN/A errtoken = lookahead 7834479Sbinkertn@umich.edu if errtoken.type == '$end': 7842632SN/A errtoken = None # End of file! 7852632SN/A if self.errorfunc: 7862632SN/A global errok,token,restart 7872632SN/A errok = self.errok # Set some special functions available in error recovery 7882632SN/A token = get_token 7892632SN/A restart = self.restart 7906498Snate@binkert.org if errtoken and not hasattr(errtoken,'lexer'): 7916498Snate@binkert.org errtoken.lexer = lexer 7922632SN/A tok = self.errorfunc(errtoken) 7932632SN/A del errok, token, restart # Delete special functions 7942632SN/A 7954479Sbinkertn@umich.edu if self.errorok: 7964479Sbinkertn@umich.edu # User must have done some kind of panic 7974479Sbinkertn@umich.edu # mode recovery on their own. The 7984479Sbinkertn@umich.edu # returned token is the next lookahead 7992632SN/A lookahead = tok 8002632SN/A errtoken = None 8012632SN/A continue 8022632SN/A else: 8032632SN/A if errtoken: 8042632SN/A if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 8052632SN/A else: lineno = 0 8062632SN/A if lineno: 8074479Sbinkertn@umich.edu sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 8082632SN/A else: 8094479Sbinkertn@umich.edu sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 8102632SN/A else: 8114479Sbinkertn@umich.edu sys.stderr.write("yacc: Parse error in input. EOF\n") 8122632SN/A return 8132632SN/A 8142632SN/A else: 8154479Sbinkertn@umich.edu errorcount = error_count 8162632SN/A 8172632SN/A # case 1: the statestack only has 1 entry on it. If we're in this state, the 8182632SN/A # entire parse has been rolled back and we're completely hosed. The token is 8192632SN/A # discarded and we just keep going. 8202632SN/A 8214479Sbinkertn@umich.edu if len(statestack) <= 1 and lookahead.type != '$end': 8222632SN/A lookahead = None 8232632SN/A errtoken = None 8246498Snate@binkert.org state = 0 8252632SN/A # Nuke the pushback stack 8262632SN/A del lookaheadstack[:] 8272632SN/A continue 8282632SN/A 8292632SN/A # case 2: the statestack has a couple of entries on it, but we're 8302632SN/A # at the end of the file. nuke the top entry and generate an error token 8312632SN/A 8322632SN/A # Start nuking entries on the stack 8334479Sbinkertn@umich.edu if lookahead.type == '$end': 8342632SN/A # Whoa. We're really hosed here. Bail out 8352632SN/A return 8362632SN/A 8372632SN/A if lookahead.type != 'error': 8382632SN/A sym = symstack[-1] 8392632SN/A if sym.type == 'error': 8402632SN/A # Hmmm. Error is on top of stack, we'll just nuke input 8412632SN/A # symbol and continue 8422632SN/A lookahead = None 8432632SN/A continue 8442632SN/A t = YaccSymbol() 8452632SN/A t.type = 'error' 8462632SN/A if hasattr(lookahead,"lineno"): 8472632SN/A t.lineno = lookahead.lineno 8482632SN/A t.value = lookahead 8492632SN/A lookaheadstack.append(lookahead) 8502632SN/A lookahead = t 8512632SN/A else: 8522632SN/A symstack.pop() 8532632SN/A statestack.pop() 8546498Snate@binkert.org state = statestack[-1] # Potential bug fix 8552632SN/A 8562632SN/A continue 8572632SN/A 8582632SN/A # Call an error function here 8596498Snate@binkert.org raise RuntimeError("yacc: internal parser error!!!\n") 8606498Snate@binkert.org 8616498Snate@binkert.org # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 8626498Snate@binkert.org # parseopt_notrack(). 8636498Snate@binkert.org # 8646498Snate@binkert.org # Optimized version of parseopt() with line number tracking removed. 8656498Snate@binkert.org # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove 8666498Snate@binkert.org # code in the #--! TRACKING sections 8676498Snate@binkert.org # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 8686498Snate@binkert.org 8696498Snate@binkert.org def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): 8706498Snate@binkert.org lookahead = None # Current lookahead symbol 8716498Snate@binkert.org lookaheadstack = [ ] # Stack of lookahead symbols 8726498Snate@binkert.org actions = self.action # Local reference to action table (to avoid lookup on self.) 8736498Snate@binkert.org goto = self.goto # Local reference to goto table (to avoid lookup on self.) 8746498Snate@binkert.org prod = self.productions # Local reference to production list (to avoid lookup on self.) 8756498Snate@binkert.org pslice = YaccProduction(None) # Production object passed to grammar rules 8766498Snate@binkert.org errorcount = 0 # Used during error recovery 8776498Snate@binkert.org 8786498Snate@binkert.org # If no lexer was given, we will try to use the lex module 8796498Snate@binkert.org if not lexer: 8806498Snate@binkert.org lex = load_ply_lex() 8816498Snate@binkert.org lexer = lex.lexer 8826498Snate@binkert.org 8836498Snate@binkert.org # Set up the lexer and parser objects on pslice 8846498Snate@binkert.org pslice.lexer = lexer 8856498Snate@binkert.org pslice.parser = self 8866498Snate@binkert.org 8876498Snate@binkert.org # If input was supplied, pass to lexer 8886498Snate@binkert.org if input is not None: 8896498Snate@binkert.org lexer.input(input) 8906498Snate@binkert.org 8916498Snate@binkert.org if tokenfunc is None: 8926498Snate@binkert.org # Tokenize function 8936498Snate@binkert.org get_token = lexer.token 8946498Snate@binkert.org else: 8956498Snate@binkert.org get_token = tokenfunc 8966498Snate@binkert.org 8976498Snate@binkert.org # Set up the state and symbol stacks 8986498Snate@binkert.org 8996498Snate@binkert.org statestack = [ ] # Stack of parsing states 9006498Snate@binkert.org self.statestack = statestack 9016498Snate@binkert.org symstack = [ ] # Stack of grammar symbols 9026498Snate@binkert.org self.symstack = symstack 9036498Snate@binkert.org 9046498Snate@binkert.org pslice.stack = symstack # Put in the production 9056498Snate@binkert.org errtoken = None # Err token 9066498Snate@binkert.org 9076498Snate@binkert.org # The start state is assumed to be (0,$end) 9086498Snate@binkert.org 9096498Snate@binkert.org statestack.append(0) 9106498Snate@binkert.org sym = YaccSymbol() 9116498Snate@binkert.org sym.type = '$end' 9126498Snate@binkert.org symstack.append(sym) 9136498Snate@binkert.org state = 0 9146498Snate@binkert.org while 1: 9156498Snate@binkert.org # Get the next symbol on the input. If a lookahead symbol 9166498Snate@binkert.org # is already set, we just use that. Otherwise, we'll pull 9176498Snate@binkert.org # the next token off of the lookaheadstack or from the lexer 9186498Snate@binkert.org 9196498Snate@binkert.org if not lookahead: 9206498Snate@binkert.org if not lookaheadstack: 9216498Snate@binkert.org lookahead = get_token() # Get the next token 9226498Snate@binkert.org else: 9236498Snate@binkert.org lookahead = lookaheadstack.pop() 9246498Snate@binkert.org if not lookahead: 9256498Snate@binkert.org lookahead = YaccSymbol() 9266498Snate@binkert.org lookahead.type = '$end' 9276498Snate@binkert.org 9286498Snate@binkert.org # Check the action table 9296498Snate@binkert.org ltype = lookahead.type 9306498Snate@binkert.org t = actions[state].get(ltype) 9316498Snate@binkert.org 9326498Snate@binkert.org if t is not None: 9336498Snate@binkert.org if t > 0: 9346498Snate@binkert.org # shift a symbol on the stack 9356498Snate@binkert.org statestack.append(t) 9366498Snate@binkert.org state = t 9376498Snate@binkert.org 9386498Snate@binkert.org symstack.append(lookahead) 9396498Snate@binkert.org lookahead = None 9406498Snate@binkert.org 9416498Snate@binkert.org # Decrease error count on successful shift 9426498Snate@binkert.org if errorcount: errorcount -=1 9436498Snate@binkert.org continue 9446498Snate@binkert.org 9456498Snate@binkert.org if t < 0: 9466498Snate@binkert.org # reduce a symbol on the stack, emit a production 9476498Snate@binkert.org p = prod[-t] 9486498Snate@binkert.org pname = p.name 9496498Snate@binkert.org plen = p.len 9506498Snate@binkert.org 9516498Snate@binkert.org # Get production function 9526498Snate@binkert.org sym = YaccSymbol() 9536498Snate@binkert.org sym.type = pname # Production name 9546498Snate@binkert.org sym.value = None 9556498Snate@binkert.org 9566498Snate@binkert.org if plen: 9576498Snate@binkert.org targ = symstack[-plen-1:] 9586498Snate@binkert.org targ[0] = sym 9596498Snate@binkert.org 9606498Snate@binkert.org # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 9616498Snate@binkert.org # The code enclosed in this section is duplicated 9626498Snate@binkert.org # below as a performance optimization. Make sure 9636498Snate@binkert.org # changes get made in both locations. 9646498Snate@binkert.org 9656498Snate@binkert.org pslice.slice = targ 9666498Snate@binkert.org 9676498Snate@binkert.org try: 9686498Snate@binkert.org # Call the grammar rule with our special slice object 9696498Snate@binkert.org del symstack[-plen:] 9706498Snate@binkert.org del statestack[-plen:] 9716498Snate@binkert.org p.callable(pslice) 9726498Snate@binkert.org symstack.append(sym) 9736498Snate@binkert.org state = goto[statestack[-1]][pname] 9746498Snate@binkert.org statestack.append(state) 9756498Snate@binkert.org except SyntaxError: 9766498Snate@binkert.org # If an error was set. Enter error recovery state 9776498Snate@binkert.org lookaheadstack.append(lookahead) 9786498Snate@binkert.org symstack.pop() 9796498Snate@binkert.org statestack.pop() 9806498Snate@binkert.org state = statestack[-1] 9816498Snate@binkert.org sym.type = 'error' 9826498Snate@binkert.org lookahead = sym 9836498Snate@binkert.org errorcount = error_count 9846498Snate@binkert.org self.errorok = 0 9856498Snate@binkert.org continue 9866498Snate@binkert.org # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 9876498Snate@binkert.org 9886498Snate@binkert.org else: 9896498Snate@binkert.org 9906498Snate@binkert.org targ = [ sym ] 9916498Snate@binkert.org 9926498Snate@binkert.org # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 9936498Snate@binkert.org # The code enclosed in this section is duplicated 9946498Snate@binkert.org # above as a performance optimization. Make sure 9956498Snate@binkert.org # changes get made in both locations. 9966498Snate@binkert.org 9976498Snate@binkert.org pslice.slice = targ 9986498Snate@binkert.org 9996498Snate@binkert.org try: 10006498Snate@binkert.org # Call the grammar rule with our special slice object 10016498Snate@binkert.org p.callable(pslice) 10026498Snate@binkert.org symstack.append(sym) 10036498Snate@binkert.org state = goto[statestack[-1]][pname] 10046498Snate@binkert.org statestack.append(state) 10056498Snate@binkert.org except SyntaxError: 10066498Snate@binkert.org # If an error was set. Enter error recovery state 10076498Snate@binkert.org lookaheadstack.append(lookahead) 10086498Snate@binkert.org symstack.pop() 10096498Snate@binkert.org statestack.pop() 10106498Snate@binkert.org state = statestack[-1] 10116498Snate@binkert.org sym.type = 'error' 10126498Snate@binkert.org lookahead = sym 10136498Snate@binkert.org errorcount = error_count 10146498Snate@binkert.org self.errorok = 0 10156498Snate@binkert.org continue 10166498Snate@binkert.org # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 10176498Snate@binkert.org 10186498Snate@binkert.org if t == 0: 10196498Snate@binkert.org n = symstack[-1] 10206498Snate@binkert.org return getattr(n,"value",None) 10216498Snate@binkert.org 10226498Snate@binkert.org if t == None: 10236498Snate@binkert.org 10246498Snate@binkert.org # We have some kind of parsing error here. To handle 10256498Snate@binkert.org # this, we are going to push the current token onto 10266498Snate@binkert.org # the tokenstack and replace it with an 'error' token. 10276498Snate@binkert.org # If there are any synchronization rules, they may 10286498Snate@binkert.org # catch it. 10296498Snate@binkert.org # 10306498Snate@binkert.org # In addition to pushing the error token, we call call 10316498Snate@binkert.org # the user defined p_error() function if this is the 10326498Snate@binkert.org # first syntax error. This function is only called if 10336498Snate@binkert.org # errorcount == 0. 10346498Snate@binkert.org if errorcount == 0 or self.errorok: 10356498Snate@binkert.org errorcount = error_count 10366498Snate@binkert.org self.errorok = 0 10376498Snate@binkert.org errtoken = lookahead 10386498Snate@binkert.org if errtoken.type == '$end': 10396498Snate@binkert.org errtoken = None # End of file! 10406498Snate@binkert.org if self.errorfunc: 10416498Snate@binkert.org global errok,token,restart 10426498Snate@binkert.org errok = self.errok # Set some special functions available in error recovery 10436498Snate@binkert.org token = get_token 10446498Snate@binkert.org restart = self.restart 10456498Snate@binkert.org if errtoken and not hasattr(errtoken,'lexer'): 10466498Snate@binkert.org errtoken.lexer = lexer 10476498Snate@binkert.org tok = self.errorfunc(errtoken) 10486498Snate@binkert.org del errok, token, restart # Delete special functions 10496498Snate@binkert.org 10506498Snate@binkert.org if self.errorok: 10516498Snate@binkert.org # User must have done some kind of panic 10526498Snate@binkert.org # mode recovery on their own. The 10536498Snate@binkert.org # returned token is the next lookahead 10546498Snate@binkert.org lookahead = tok 10556498Snate@binkert.org errtoken = None 10566498Snate@binkert.org continue 10576498Snate@binkert.org else: 10586498Snate@binkert.org if errtoken: 10596498Snate@binkert.org if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 10606498Snate@binkert.org else: lineno = 0 10616498Snate@binkert.org if lineno: 10626498Snate@binkert.org sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 10636498Snate@binkert.org else: 10646498Snate@binkert.org sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 10656498Snate@binkert.org else: 10666498Snate@binkert.org sys.stderr.write("yacc: Parse error in input. EOF\n") 10676498Snate@binkert.org return 10686498Snate@binkert.org 10696498Snate@binkert.org else: 10706498Snate@binkert.org errorcount = error_count 10716498Snate@binkert.org 10726498Snate@binkert.org # case 1: the statestack only has 1 entry on it. If we're in this state, the 10736498Snate@binkert.org # entire parse has been rolled back and we're completely hosed. The token is 10746498Snate@binkert.org # discarded and we just keep going. 10756498Snate@binkert.org 10766498Snate@binkert.org if len(statestack) <= 1 and lookahead.type != '$end': 10776498Snate@binkert.org lookahead = None 10786498Snate@binkert.org errtoken = None 10796498Snate@binkert.org state = 0 10806498Snate@binkert.org # Nuke the pushback stack 10816498Snate@binkert.org del lookaheadstack[:] 10826498Snate@binkert.org continue 10836498Snate@binkert.org 10846498Snate@binkert.org # case 2: the statestack has a couple of entries on it, but we're 10856498Snate@binkert.org # at the end of the file. nuke the top entry and generate an error token 10866498Snate@binkert.org 10876498Snate@binkert.org # Start nuking entries on the stack 10886498Snate@binkert.org if lookahead.type == '$end': 10896498Snate@binkert.org # Whoa. We're really hosed here. Bail out 10906498Snate@binkert.org return 10916498Snate@binkert.org 10926498Snate@binkert.org if lookahead.type != 'error': 10936498Snate@binkert.org sym = symstack[-1] 10946498Snate@binkert.org if sym.type == 'error': 10956498Snate@binkert.org # Hmmm. Error is on top of stack, we'll just nuke input 10966498Snate@binkert.org # symbol and continue 10976498Snate@binkert.org lookahead = None 10986498Snate@binkert.org continue 10996498Snate@binkert.org t = YaccSymbol() 11006498Snate@binkert.org t.type = 'error' 11016498Snate@binkert.org if hasattr(lookahead,"lineno"): 11026498Snate@binkert.org t.lineno = lookahead.lineno 11036498Snate@binkert.org t.value = lookahead 11046498Snate@binkert.org lookaheadstack.append(lookahead) 11056498Snate@binkert.org lookahead = t 11066498Snate@binkert.org else: 11076498Snate@binkert.org symstack.pop() 11086498Snate@binkert.org statestack.pop() 11096498Snate@binkert.org state = statestack[-1] # Potential bug fix 11106498Snate@binkert.org 11116498Snate@binkert.org continue 11126498Snate@binkert.org 11136498Snate@binkert.org # Call an error function here 11146498Snate@binkert.org raise RuntimeError("yacc: internal parser error!!!\n") 11152632SN/A 11162632SN/A# ----------------------------------------------------------------------------- 11176498Snate@binkert.org# === Grammar Representation === 11182632SN/A# 11196498Snate@binkert.org# The following functions, classes, and variables are used to represent and 11206498Snate@binkert.org# manipulate the rules that make up a grammar. 11212632SN/A# ----------------------------------------------------------------------------- 11222632SN/A 11236498Snate@binkert.orgimport re 11246498Snate@binkert.org 11256498Snate@binkert.org# regex matching identifiers 11266498Snate@binkert.org_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') 11272632SN/A 11282632SN/A# ----------------------------------------------------------------------------- 11292632SN/A# class Production: 11302632SN/A# 11312632SN/A# This class stores the raw information about a single production or grammar rule. 11326498Snate@binkert.org# A grammar rule refers to a specification such as this: 11332632SN/A# 11346498Snate@binkert.org# expr : expr PLUS term 11356498Snate@binkert.org# 11366498Snate@binkert.org# Here are the basic attributes defined on all productions 11376498Snate@binkert.org# 11386498Snate@binkert.org# name - Name of the production. For example 'expr' 11396498Snate@binkert.org# prod - A list of symbols on the right side ['expr','PLUS','term'] 11406498Snate@binkert.org# prec - Production precedence level 11412632SN/A# number - Production number. 11426498Snate@binkert.org# func - Function that executes on reduce 11436498Snate@binkert.org# file - File where production function is defined 11446498Snate@binkert.org# lineno - Line number where production function is defined 11452632SN/A# 11466498Snate@binkert.org# The following attributes are defined or optional. 11472632SN/A# 11486498Snate@binkert.org# len - Length of the production (number of symbols on right hand side) 11496498Snate@binkert.org# usyms - Set of unique symbols found in the production 11506498Snate@binkert.org# ----------------------------------------------------------------------------- 11516498Snate@binkert.org 11526498Snate@binkert.orgclass Production(object): 11536498Snate@binkert.org reduced = 0 11546498Snate@binkert.org def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0): 11556498Snate@binkert.org self.name = name 11566498Snate@binkert.org self.prod = tuple(prod) 11576498Snate@binkert.org self.number = number 11586498Snate@binkert.org self.func = func 11596498Snate@binkert.org self.callable = None 11606498Snate@binkert.org self.file = file 11616498Snate@binkert.org self.line = line 11626498Snate@binkert.org self.prec = precedence 11636498Snate@binkert.org 11646498Snate@binkert.org # Internal settings used during table construction 11656498Snate@binkert.org 11666498Snate@binkert.org self.len = len(self.prod) # Length of the production 11676498Snate@binkert.org 11686498Snate@binkert.org # Create a list of unique production symbols used in the production 11696498Snate@binkert.org self.usyms = [ ] 11706498Snate@binkert.org for s in self.prod: 11716498Snate@binkert.org if s not in self.usyms: 11726498Snate@binkert.org self.usyms.append(s) 11736498Snate@binkert.org 11746498Snate@binkert.org # List of all LR items for the production 11756498Snate@binkert.org self.lr_items = [] 11766498Snate@binkert.org self.lr_next = None 11776498Snate@binkert.org 11786498Snate@binkert.org # Create a string representation 11796498Snate@binkert.org if self.prod: 11806498Snate@binkert.org self.str = "%s -> %s" % (self.name," ".join(self.prod)) 11816498Snate@binkert.org else: 11826498Snate@binkert.org self.str = "%s -> <empty>" % self.name 11836498Snate@binkert.org 11846498Snate@binkert.org def __str__(self): 11856498Snate@binkert.org return self.str 11866498Snate@binkert.org 11876498Snate@binkert.org def __repr__(self): 11886498Snate@binkert.org return "Production("+str(self)+")" 11896498Snate@binkert.org 11906498Snate@binkert.org def __len__(self): 11916498Snate@binkert.org return len(self.prod) 11926498Snate@binkert.org 11936498Snate@binkert.org def __nonzero__(self): 11946498Snate@binkert.org return 1 11956498Snate@binkert.org 11966498Snate@binkert.org def __getitem__(self,index): 11976498Snate@binkert.org return self.prod[index] 11986498Snate@binkert.org 11996498Snate@binkert.org # Return the nth lr_item from the production (or None if at the end) 12006498Snate@binkert.org def lr_item(self,n): 12016498Snate@binkert.org if n > len(self.prod): return None 12026498Snate@binkert.org p = LRItem(self,n) 12036498Snate@binkert.org 12046498Snate@binkert.org # Precompute the list of productions immediately following. Hack. Remove later 12056498Snate@binkert.org try: 12066498Snate@binkert.org p.lr_after = Prodnames[p.prod[n+1]] 12076498Snate@binkert.org except (IndexError,KeyError): 12086498Snate@binkert.org p.lr_after = [] 12096498Snate@binkert.org try: 12106498Snate@binkert.org p.lr_before = p.prod[n-1] 12116498Snate@binkert.org except IndexError: 12126498Snate@binkert.org p.lr_before = None 12136498Snate@binkert.org 12146498Snate@binkert.org return p 12156498Snate@binkert.org 12166498Snate@binkert.org # Bind the production function name to a callable 12176498Snate@binkert.org def bind(self,pdict): 12186498Snate@binkert.org if self.func: 12196498Snate@binkert.org self.callable = pdict[self.func] 12206498Snate@binkert.org 12216498Snate@binkert.org# This class serves as a minimal standin for Production objects when 12226498Snate@binkert.org# reading table data from files. It only contains information 12236498Snate@binkert.org# actually used by the LR parsing engine, plus some additional 12246498Snate@binkert.org# debugging information. 12256498Snate@binkert.orgclass MiniProduction(object): 12266498Snate@binkert.org def __init__(self,str,name,len,func,file,line): 12276498Snate@binkert.org self.name = name 12286498Snate@binkert.org self.len = len 12296498Snate@binkert.org self.func = func 12306498Snate@binkert.org self.callable = None 12316498Snate@binkert.org self.file = file 12326498Snate@binkert.org self.line = line 12336498Snate@binkert.org self.str = str 12346498Snate@binkert.org def __str__(self): 12356498Snate@binkert.org return self.str 12366498Snate@binkert.org def __repr__(self): 12376498Snate@binkert.org return "MiniProduction(%s)" % self.str 12386498Snate@binkert.org 12396498Snate@binkert.org # Bind the production function name to a callable 12406498Snate@binkert.org def bind(self,pdict): 12416498Snate@binkert.org if self.func: 12426498Snate@binkert.org self.callable = pdict[self.func] 12436498Snate@binkert.org 12446498Snate@binkert.org 12456498Snate@binkert.org# ----------------------------------------------------------------------------- 12466498Snate@binkert.org# class LRItem 12476498Snate@binkert.org# 12486498Snate@binkert.org# This class represents a specific stage of parsing a production rule. For 12496498Snate@binkert.org# example: 12506498Snate@binkert.org# 12516498Snate@binkert.org# expr : expr . PLUS term 12526498Snate@binkert.org# 12536498Snate@binkert.org# In the above, the "." represents the current location of the parse. Here 12546498Snate@binkert.org# basic attributes: 12556498Snate@binkert.org# 12566498Snate@binkert.org# name - Name of the production. For example 'expr' 12576498Snate@binkert.org# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] 12586498Snate@binkert.org# number - Production number. 12596498Snate@binkert.org# 12606498Snate@binkert.org# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' 12616498Snate@binkert.org# then lr_next refers to 'expr -> expr PLUS . term' 12626498Snate@binkert.org# lr_index - LR item index (location of the ".") in the prod list. 12634479Sbinkertn@umich.edu# lookaheads - LALR lookahead symbols for this item 12646498Snate@binkert.org# len - Length of the production (number of symbols on right hand side) 12656498Snate@binkert.org# lr_after - List of all productions that immediately follow 12666498Snate@binkert.org# lr_before - Grammar symbol immediately before 12672632SN/A# ----------------------------------------------------------------------------- 12682632SN/A 12696498Snate@binkert.orgclass LRItem(object): 12706498Snate@binkert.org def __init__(self,p,n): 12716498Snate@binkert.org self.name = p.name 12726498Snate@binkert.org self.prod = list(p.prod) 12736498Snate@binkert.org self.number = p.number 12746498Snate@binkert.org self.lr_index = n 12754479Sbinkertn@umich.edu self.lookaheads = { } 12766498Snate@binkert.org self.prod.insert(n,".") 12776498Snate@binkert.org self.prod = tuple(self.prod) 12786498Snate@binkert.org self.len = len(self.prod) 12796498Snate@binkert.org self.usyms = p.usyms 12802632SN/A 12812632SN/A def __str__(self): 12822632SN/A if self.prod: 12832632SN/A s = "%s -> %s" % (self.name," ".join(self.prod)) 12842632SN/A else: 12852632SN/A s = "%s -> <empty>" % self.name 12862632SN/A return s 12872632SN/A 12882632SN/A def __repr__(self): 12896498Snate@binkert.org return "LRItem("+str(self)+")" 12906498Snate@binkert.org 12916498Snate@binkert.org# ----------------------------------------------------------------------------- 12926498Snate@binkert.org# rightmost_terminal() 12936498Snate@binkert.org# 12946498Snate@binkert.org# Return the rightmost terminal from a list of symbols. Used in add_production() 12956498Snate@binkert.org# ----------------------------------------------------------------------------- 12966498Snate@binkert.orgdef rightmost_terminal(symbols, terminals): 12976498Snate@binkert.org i = len(symbols) - 1 12986498Snate@binkert.org while i >= 0: 12996498Snate@binkert.org if symbols[i] in terminals: 13006498Snate@binkert.org return symbols[i] 13016498Snate@binkert.org i -= 1 13026498Snate@binkert.org return None 13036498Snate@binkert.org 13046498Snate@binkert.org# ----------------------------------------------------------------------------- 13056498Snate@binkert.org# === GRAMMAR CLASS === 13066498Snate@binkert.org# 13076498Snate@binkert.org# The following class represents the contents of the specified grammar along 13086498Snate@binkert.org# with various computed properties such as first sets, follow sets, LR items, etc. 13096498Snate@binkert.org# This data is used for critical parts of the table generation process later. 13106498Snate@binkert.org# ----------------------------------------------------------------------------- 13116498Snate@binkert.org 13126498Snate@binkert.orgclass GrammarError(YaccError): pass 13136498Snate@binkert.org 13146498Snate@binkert.orgclass Grammar(object): 13156498Snate@binkert.org def __init__(self,terminals): 13166498Snate@binkert.org self.Productions = [None] # A list of all of the productions. The first 13176498Snate@binkert.org # entry is always reserved for the purpose of 13186498Snate@binkert.org # building an augmented grammar 13196498Snate@binkert.org 13206498Snate@binkert.org self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all 13216498Snate@binkert.org # productions of that nonterminal. 13226498Snate@binkert.org 13236498Snate@binkert.org self.Prodmap = { } # A dictionary that is only used to detect duplicate 13246498Snate@binkert.org # productions. 13256498Snate@binkert.org 13266498Snate@binkert.org self.Terminals = { } # A dictionary mapping the names of terminal symbols to a 13276498Snate@binkert.org # list of the rules where they are used. 13286498Snate@binkert.org 13296498Snate@binkert.org for term in terminals: 13306498Snate@binkert.org self.Terminals[term] = [] 13316498Snate@binkert.org 13326498Snate@binkert.org self.Terminals['error'] = [] 13336498Snate@binkert.org 13346498Snate@binkert.org self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list 13356498Snate@binkert.org # of rule numbers where they are used. 13366498Snate@binkert.org 13376498Snate@binkert.org self.First = { } # A dictionary of precomputed FIRST(x) symbols 13386498Snate@binkert.org 13396498Snate@binkert.org self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols 13406498Snate@binkert.org 13416498Snate@binkert.org self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the 13426498Snate@binkert.org # form ('right',level) or ('nonassoc', level) or ('left',level) 13436498Snate@binkert.org 13446498Snate@binkert.org self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer. 13456498Snate@binkert.org # This is only used to provide error checking and to generate 13466498Snate@binkert.org # a warning about unused precedence rules. 13476498Snate@binkert.org 13486498Snate@binkert.org self.Start = None # Starting symbol for the grammar 13496498Snate@binkert.org 13506498Snate@binkert.org 13516498Snate@binkert.org def __len__(self): 13526498Snate@binkert.org return len(self.Productions) 13536498Snate@binkert.org 13546498Snate@binkert.org def __getitem__(self,index): 13556498Snate@binkert.org return self.Productions[index] 13566498Snate@binkert.org 13576498Snate@binkert.org # ----------------------------------------------------------------------------- 13586498Snate@binkert.org # set_precedence() 13596498Snate@binkert.org # 13606498Snate@binkert.org # Sets the precedence for a given terminal. assoc is the associativity such as 13616498Snate@binkert.org # 'left','right', or 'nonassoc'. level is a numeric level. 13626498Snate@binkert.org # 13636498Snate@binkert.org # ----------------------------------------------------------------------------- 13646498Snate@binkert.org 13656498Snate@binkert.org def set_precedence(self,term,assoc,level): 13666498Snate@binkert.org assert self.Productions == [None],"Must call set_precedence() before add_production()" 13676498Snate@binkert.org if term in self.Precedence: 13686498Snate@binkert.org raise GrammarError("Precedence already specified for terminal '%s'" % term) 13696498Snate@binkert.org if assoc not in ['left','right','nonassoc']: 13706498Snate@binkert.org raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") 13716498Snate@binkert.org self.Precedence[term] = (assoc,level) 13726498Snate@binkert.org 13736498Snate@binkert.org # ----------------------------------------------------------------------------- 13746498Snate@binkert.org # add_production() 13756498Snate@binkert.org # 13766498Snate@binkert.org # Given an action function, this function assembles a production rule and 13776498Snate@binkert.org # computes its precedence level. 13786498Snate@binkert.org # 13796498Snate@binkert.org # The production rule is supplied as a list of symbols. For example, 13806498Snate@binkert.org # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and 13816498Snate@binkert.org # symbols ['expr','PLUS','term']. 13826498Snate@binkert.org # 13836498Snate@binkert.org # Precedence is determined by the precedence of the right-most non-terminal 13846498Snate@binkert.org # or the precedence of a terminal specified by %prec. 13856498Snate@binkert.org # 13866498Snate@binkert.org # A variety of error checks are performed to make sure production symbols 13876498Snate@binkert.org # are valid and that %prec is used correctly. 13886498Snate@binkert.org # ----------------------------------------------------------------------------- 13896498Snate@binkert.org 13906498Snate@binkert.org def add_production(self,prodname,syms,func=None,file='',line=0): 13916498Snate@binkert.org 13926498Snate@binkert.org if prodname in self.Terminals: 13936498Snate@binkert.org raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname)) 13946498Snate@binkert.org if prodname == 'error': 13956498Snate@binkert.org raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname)) 13966498Snate@binkert.org if not _is_identifier.match(prodname): 13976498Snate@binkert.org raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname)) 13986498Snate@binkert.org 13996498Snate@binkert.org # Look for literal tokens 14006498Snate@binkert.org for n,s in enumerate(syms): 14016498Snate@binkert.org if s[0] in "'\"": 14026498Snate@binkert.org try: 14036498Snate@binkert.org c = eval(s) 14046498Snate@binkert.org if (len(c) > 1): 14056498Snate@binkert.org raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname)) 14066498Snate@binkert.org if not c in self.Terminals: 14076498Snate@binkert.org self.Terminals[c] = [] 14086498Snate@binkert.org syms[n] = c 14096498Snate@binkert.org continue 14106498Snate@binkert.org except SyntaxError: 14116498Snate@binkert.org pass 14126498Snate@binkert.org if not _is_identifier.match(s) and s != '%prec': 14136498Snate@binkert.org raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)) 14146498Snate@binkert.org 14156498Snate@binkert.org # Determine the precedence level 14166498Snate@binkert.org if '%prec' in syms: 14176498Snate@binkert.org if syms[-1] == '%prec': 14186498Snate@binkert.org raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line)) 14196498Snate@binkert.org if syms[-2] != '%prec': 14206498Snate@binkert.org raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line)) 14216498Snate@binkert.org precname = syms[-1] 14226498Snate@binkert.org prodprec = self.Precedence.get(precname,None) 14236498Snate@binkert.org if not prodprec: 14246498Snate@binkert.org raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname)) 14256498Snate@binkert.org else: 14266498Snate@binkert.org self.UsedPrecedence[precname] = 1 14276498Snate@binkert.org del syms[-2:] # Drop %prec from the rule 14286498Snate@binkert.org else: 14296498Snate@binkert.org # If no %prec, precedence is determined by the rightmost terminal symbol 14306498Snate@binkert.org precname = rightmost_terminal(syms,self.Terminals) 14316498Snate@binkert.org prodprec = self.Precedence.get(precname,('right',0)) 14326498Snate@binkert.org 14336498Snate@binkert.org # See if the rule is already in the rulemap 14346498Snate@binkert.org map = "%s -> %s" % (prodname,syms) 14356498Snate@binkert.org if map in self.Prodmap: 14366498Snate@binkert.org m = self.Prodmap[map] 14376498Snate@binkert.org raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) + 14386498Snate@binkert.org "Previous definition at %s:%d" % (m.file, m.line)) 14396498Snate@binkert.org 14406498Snate@binkert.org # From this point on, everything is valid. Create a new Production instance 14416498Snate@binkert.org pnumber = len(self.Productions) 14426498Snate@binkert.org if not prodname in self.Nonterminals: 14436498Snate@binkert.org self.Nonterminals[prodname] = [ ] 14446498Snate@binkert.org 14456498Snate@binkert.org # Add the production number to Terminals and Nonterminals 14466498Snate@binkert.org for t in syms: 14476498Snate@binkert.org if t in self.Terminals: 14486498Snate@binkert.org self.Terminals[t].append(pnumber) 14496498Snate@binkert.org else: 14506498Snate@binkert.org if not t in self.Nonterminals: 14516498Snate@binkert.org self.Nonterminals[t] = [ ] 14526498Snate@binkert.org self.Nonterminals[t].append(pnumber) 14536498Snate@binkert.org 14546498Snate@binkert.org # Create a production and add it to the list of productions 14556498Snate@binkert.org p = Production(pnumber,prodname,syms,prodprec,func,file,line) 14566498Snate@binkert.org self.Productions.append(p) 14576498Snate@binkert.org self.Prodmap[map] = p 14586498Snate@binkert.org 14596498Snate@binkert.org # Add to the global productions list 14602632SN/A try: 14616498Snate@binkert.org self.Prodnames[prodname].append(p) 14626498Snate@binkert.org except KeyError: 14636498Snate@binkert.org self.Prodnames[prodname] = [ p ] 14646498Snate@binkert.org return 0 14656498Snate@binkert.org 14666498Snate@binkert.org # ----------------------------------------------------------------------------- 14676498Snate@binkert.org # set_start() 14686498Snate@binkert.org # 14696498Snate@binkert.org # Sets the starting symbol and creates the augmented grammar. Production 14706498Snate@binkert.org # rule 0 is S' -> start where start is the start symbol. 14716498Snate@binkert.org # ----------------------------------------------------------------------------- 14726498Snate@binkert.org 14736498Snate@binkert.org def set_start(self,start=None): 14746498Snate@binkert.org if not start: 14756498Snate@binkert.org start = self.Productions[1].name 14766498Snate@binkert.org if start not in self.Nonterminals: 14776498Snate@binkert.org raise GrammarError("start symbol %s undefined" % start) 14786498Snate@binkert.org self.Productions[0] = Production(0,"S'",[start]) 14796498Snate@binkert.org self.Nonterminals[start].append(0) 14806498Snate@binkert.org self.Start = start 14816498Snate@binkert.org 14826498Snate@binkert.org # ----------------------------------------------------------------------------- 14836498Snate@binkert.org # find_unreachable() 14846498Snate@binkert.org # 14856498Snate@binkert.org # Find all of the nonterminal symbols that can't be reached from the starting 14866498Snate@binkert.org # symbol. Returns a list of nonterminals that can't be reached. 14876498Snate@binkert.org # ----------------------------------------------------------------------------- 14886498Snate@binkert.org 14896498Snate@binkert.org def find_unreachable(self): 14906498Snate@binkert.org 14916498Snate@binkert.org # Mark all symbols that are reachable from a symbol s 14926498Snate@binkert.org def mark_reachable_from(s): 14936498Snate@binkert.org if reachable[s]: 14946498Snate@binkert.org # We've already reached symbol s. 14956498Snate@binkert.org return 14966498Snate@binkert.org reachable[s] = 1 14976498Snate@binkert.org for p in self.Prodnames.get(s,[]): 14986498Snate@binkert.org for r in p.prod: 14996498Snate@binkert.org mark_reachable_from(r) 15006498Snate@binkert.org 15016498Snate@binkert.org reachable = { } 15026498Snate@binkert.org for s in list(self.Terminals) + list(self.Nonterminals): 15036498Snate@binkert.org reachable[s] = 0 15046498Snate@binkert.org 15056498Snate@binkert.org mark_reachable_from( self.Productions[0].prod[0] ) 15066498Snate@binkert.org 15076498Snate@binkert.org return [s for s in list(self.Nonterminals) 15086498Snate@binkert.org if not reachable[s]] 15096498Snate@binkert.org 15106498Snate@binkert.org # ----------------------------------------------------------------------------- 15116498Snate@binkert.org # infinite_cycles() 15126498Snate@binkert.org # 15136498Snate@binkert.org # This function looks at the various parsing rules and tries to detect 15146498Snate@binkert.org # infinite recursion cycles (grammar rules where there is no possible way 15156498Snate@binkert.org # to derive a string of only terminals). 15166498Snate@binkert.org # ----------------------------------------------------------------------------- 15176498Snate@binkert.org 15186498Snate@binkert.org def infinite_cycles(self): 15196498Snate@binkert.org terminates = {} 15206498Snate@binkert.org 15216498Snate@binkert.org # Terminals: 15226498Snate@binkert.org for t in self.Terminals: 15236498Snate@binkert.org terminates[t] = 1 15246498Snate@binkert.org 15256498Snate@binkert.org terminates['$end'] = 1 15266498Snate@binkert.org 15276498Snate@binkert.org # Nonterminals: 15286498Snate@binkert.org 15296498Snate@binkert.org # Initialize to false: 15306498Snate@binkert.org for n in self.Nonterminals: 15316498Snate@binkert.org terminates[n] = 0 15326498Snate@binkert.org 15336498Snate@binkert.org # Then propagate termination until no change: 15346498Snate@binkert.org while 1: 15356498Snate@binkert.org some_change = 0 15366498Snate@binkert.org for (n,pl) in self.Prodnames.items(): 15376498Snate@binkert.org # Nonterminal n terminates iff any of its productions terminates. 15386498Snate@binkert.org for p in pl: 15396498Snate@binkert.org # Production p terminates iff all of its rhs symbols terminate. 15406498Snate@binkert.org for s in p.prod: 15416498Snate@binkert.org if not terminates[s]: 15426498Snate@binkert.org # The symbol s does not terminate, 15436498Snate@binkert.org # so production p does not terminate. 15446498Snate@binkert.org p_terminates = 0 15456498Snate@binkert.org break 15466498Snate@binkert.org else: 15476498Snate@binkert.org # didn't break from the loop, 15486498Snate@binkert.org # so every symbol s terminates 15496498Snate@binkert.org # so production p terminates. 15506498Snate@binkert.org p_terminates = 1 15516498Snate@binkert.org 15526498Snate@binkert.org if p_terminates: 15536498Snate@binkert.org # symbol n terminates! 15546498Snate@binkert.org if not terminates[n]: 15556498Snate@binkert.org terminates[n] = 1 15566498Snate@binkert.org some_change = 1 15576498Snate@binkert.org # Don't need to consider any more productions for this n. 15586498Snate@binkert.org break 15596498Snate@binkert.org 15606498Snate@binkert.org if not some_change: 15616498Snate@binkert.org break 15626498Snate@binkert.org 15636498Snate@binkert.org infinite = [] 15646498Snate@binkert.org for (s,term) in terminates.items(): 15656498Snate@binkert.org if not term: 15666498Snate@binkert.org if not s in self.Prodnames and not s in self.Terminals and s != 'error': 15676498Snate@binkert.org # s is used-but-not-defined, and we've already warned of that, 15686498Snate@binkert.org # so it would be overkill to say that it's also non-terminating. 15696498Snate@binkert.org pass 15706498Snate@binkert.org else: 15716498Snate@binkert.org infinite.append(s) 15726498Snate@binkert.org 15736498Snate@binkert.org return infinite 15746498Snate@binkert.org 15756498Snate@binkert.org 15766498Snate@binkert.org # ----------------------------------------------------------------------------- 15776498Snate@binkert.org # undefined_symbols() 15786498Snate@binkert.org # 15796498Snate@binkert.org # Find all symbols that were used the grammar, but not defined as tokens or 15806498Snate@binkert.org # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol 15816498Snate@binkert.org # and prod is the production where the symbol was used. 15826498Snate@binkert.org # ----------------------------------------------------------------------------- 15836498Snate@binkert.org def undefined_symbols(self): 15846498Snate@binkert.org result = [] 15856498Snate@binkert.org for p in self.Productions: 15862632SN/A if not p: continue 15876498Snate@binkert.org 15886498Snate@binkert.org for s in p.prod: 15896498Snate@binkert.org if not s in self.Prodnames and not s in self.Terminals and s != 'error': 15906498Snate@binkert.org result.append((s,p)) 15916498Snate@binkert.org return result 15926498Snate@binkert.org 15936498Snate@binkert.org # ----------------------------------------------------------------------------- 15946498Snate@binkert.org # unused_terminals() 15956498Snate@binkert.org # 15966498Snate@binkert.org # Find all terminals that were defined, but not used by the grammar. Returns 15976498Snate@binkert.org # a list of all symbols. 15986498Snate@binkert.org # ----------------------------------------------------------------------------- 15996498Snate@binkert.org def unused_terminals(self): 16006498Snate@binkert.org unused_tok = [] 16016498Snate@binkert.org for s,v in self.Terminals.items(): 16026498Snate@binkert.org if s != 'error' and not v: 16036498Snate@binkert.org unused_tok.append(s) 16046498Snate@binkert.org 16056498Snate@binkert.org return unused_tok 16066498Snate@binkert.org 16076498Snate@binkert.org # ------------------------------------------------------------------------------ 16086498Snate@binkert.org # unused_rules() 16096498Snate@binkert.org # 16106498Snate@binkert.org # Find all grammar rules that were defined, but not used (maybe not reachable) 16116498Snate@binkert.org # Returns a list of productions. 16126498Snate@binkert.org # ------------------------------------------------------------------------------ 16136498Snate@binkert.org 16146498Snate@binkert.org def unused_rules(self): 16156498Snate@binkert.org unused_prod = [] 16166498Snate@binkert.org for s,v in self.Nonterminals.items(): 16176498Snate@binkert.org if not v: 16186498Snate@binkert.org p = self.Prodnames[s][0] 16196498Snate@binkert.org unused_prod.append(p) 16206498Snate@binkert.org return unused_prod 16216498Snate@binkert.org 16226498Snate@binkert.org # ----------------------------------------------------------------------------- 16236498Snate@binkert.org # unused_precedence() 16246498Snate@binkert.org # 16256498Snate@binkert.org # Returns a list of tuples (term,precedence) corresponding to precedence 16266498Snate@binkert.org # rules that were never used by the grammar. term is the name of the terminal 16276498Snate@binkert.org # on which precedence was applied and precedence is a string such as 'left' or 16286498Snate@binkert.org # 'right' corresponding to the type of precedence. 16296498Snate@binkert.org # ----------------------------------------------------------------------------- 16306498Snate@binkert.org 16316498Snate@binkert.org def unused_precedence(self): 16326498Snate@binkert.org unused = [] 16336498Snate@binkert.org for termname in self.Precedence: 16346498Snate@binkert.org if not (termname in self.Terminals or termname in self.UsedPrecedence): 16356498Snate@binkert.org unused.append((termname,self.Precedence[termname][0])) 16366498Snate@binkert.org 16376498Snate@binkert.org return unused 16386498Snate@binkert.org 16396498Snate@binkert.org # ------------------------------------------------------------------------- 16406498Snate@binkert.org # _first() 16416498Snate@binkert.org # 16426498Snate@binkert.org # Compute the value of FIRST1(beta) where beta is a tuple of symbols. 16436498Snate@binkert.org # 16446498Snate@binkert.org # During execution of compute_first1, the result may be incomplete. 16456498Snate@binkert.org # Afterward (e.g., when called from compute_follow()), it will be complete. 16466498Snate@binkert.org # ------------------------------------------------------------------------- 16476498Snate@binkert.org def _first(self,beta): 16486498Snate@binkert.org 16496498Snate@binkert.org # We are computing First(x1,x2,x3,...,xn) 16506498Snate@binkert.org result = [ ] 16516498Snate@binkert.org for x in beta: 16526498Snate@binkert.org x_produces_empty = 0 16536498Snate@binkert.org 16546498Snate@binkert.org # Add all the non-<empty> symbols of First[x] to the result. 16556498Snate@binkert.org for f in self.First[x]: 16566498Snate@binkert.org if f == '<empty>': 16576498Snate@binkert.org x_produces_empty = 1 16582632SN/A else: 16596498Snate@binkert.org if f not in result: result.append(f) 16606498Snate@binkert.org 16616498Snate@binkert.org if x_produces_empty: 16626498Snate@binkert.org # We have to consider the next x in beta, 16636498Snate@binkert.org # i.e. stay in the loop. 16642632SN/A pass 16652632SN/A else: 16666498Snate@binkert.org # We don't have to consider any further symbols in beta. 16676498Snate@binkert.org break 16686498Snate@binkert.org else: 16696498Snate@binkert.org # There was no 'break' from the loop, 16706498Snate@binkert.org # so x_produces_empty was true for all x in beta, 16716498Snate@binkert.org # so beta produces empty as well. 16726498Snate@binkert.org result.append('<empty>') 16736498Snate@binkert.org 16746498Snate@binkert.org return result 16756498Snate@binkert.org 16766498Snate@binkert.org # ------------------------------------------------------------------------- 16776498Snate@binkert.org # compute_first() 16786498Snate@binkert.org # 16796498Snate@binkert.org # Compute the value of FIRST1(X) for all symbols 16806498Snate@binkert.org # ------------------------------------------------------------------------- 16816498Snate@binkert.org def compute_first(self): 16826498Snate@binkert.org if self.First: 16836498Snate@binkert.org return self.First 16846498Snate@binkert.org 16856498Snate@binkert.org # Terminals: 16866498Snate@binkert.org for t in self.Terminals: 16876498Snate@binkert.org self.First[t] = [t] 16886498Snate@binkert.org 16896498Snate@binkert.org self.First['$end'] = ['$end'] 16906498Snate@binkert.org 16916498Snate@binkert.org # Nonterminals: 16926498Snate@binkert.org 16936498Snate@binkert.org # Initialize to the empty set: 16946498Snate@binkert.org for n in self.Nonterminals: 16956498Snate@binkert.org self.First[n] = [] 16966498Snate@binkert.org 16976498Snate@binkert.org # Then propagate symbols until no change: 16986498Snate@binkert.org while 1: 16996498Snate@binkert.org some_change = 0 17006498Snate@binkert.org for n in self.Nonterminals: 17016498Snate@binkert.org for p in self.Prodnames[n]: 17026498Snate@binkert.org for f in self._first(p.prod): 17036498Snate@binkert.org if f not in self.First[n]: 17046498Snate@binkert.org self.First[n].append( f ) 17056498Snate@binkert.org some_change = 1 17066498Snate@binkert.org if not some_change: 17076498Snate@binkert.org break 17086498Snate@binkert.org 17096498Snate@binkert.org return self.First 17106498Snate@binkert.org 17116498Snate@binkert.org # --------------------------------------------------------------------- 17126498Snate@binkert.org # compute_follow() 17136498Snate@binkert.org # 17146498Snate@binkert.org # Computes all of the follow sets for every non-terminal symbol. The 17156498Snate@binkert.org # follow set is the set of all symbols that might follow a given 17166498Snate@binkert.org # non-terminal. See the Dragon book, 2nd Ed. p. 189. 17176498Snate@binkert.org # --------------------------------------------------------------------- 17186498Snate@binkert.org def compute_follow(self,start=None): 17196498Snate@binkert.org # If already computed, return the result 17206498Snate@binkert.org if self.Follow: 17216498Snate@binkert.org return self.Follow 17226498Snate@binkert.org 17236498Snate@binkert.org # If first sets not computed yet, do that first. 17246498Snate@binkert.org if not self.First: 17256498Snate@binkert.org self.compute_first() 17266498Snate@binkert.org 17276498Snate@binkert.org # Add '$end' to the follow list of the start symbol 17286498Snate@binkert.org for k in self.Nonterminals: 17296498Snate@binkert.org self.Follow[k] = [ ] 17306498Snate@binkert.org 17316498Snate@binkert.org if not start: 17326498Snate@binkert.org start = self.Productions[1].name 17336498Snate@binkert.org 17346498Snate@binkert.org self.Follow[start] = [ '$end' ] 17356498Snate@binkert.org 17366498Snate@binkert.org while 1: 17376498Snate@binkert.org didadd = 0 17386498Snate@binkert.org for p in self.Productions[1:]: 17396498Snate@binkert.org # Here is the production set 17406498Snate@binkert.org for i in range(len(p.prod)): 17416498Snate@binkert.org B = p.prod[i] 17426498Snate@binkert.org if B in self.Nonterminals: 17436498Snate@binkert.org # Okay. We got a non-terminal in a production 17446498Snate@binkert.org fst = self._first(p.prod[i+1:]) 17456498Snate@binkert.org hasempty = 0 17466498Snate@binkert.org for f in fst: 17476498Snate@binkert.org if f != '<empty>' and f not in self.Follow[B]: 17486498Snate@binkert.org self.Follow[B].append(f) 17496498Snate@binkert.org didadd = 1 17506498Snate@binkert.org if f == '<empty>': 17516498Snate@binkert.org hasempty = 1 17526498Snate@binkert.org if hasempty or i == (len(p.prod)-1): 17536498Snate@binkert.org # Add elements of follow(a) to follow(b) 17546498Snate@binkert.org for f in self.Follow[p.name]: 17556498Snate@binkert.org if f not in self.Follow[B]: 17566498Snate@binkert.org self.Follow[B].append(f) 17576498Snate@binkert.org didadd = 1 17586498Snate@binkert.org if not didadd: break 17596498Snate@binkert.org return self.Follow 17606498Snate@binkert.org 17616498Snate@binkert.org 17626498Snate@binkert.org # ----------------------------------------------------------------------------- 17636498Snate@binkert.org # build_lritems() 17646498Snate@binkert.org # 17656498Snate@binkert.org # This function walks the list of productions and builds a complete set of the 17666498Snate@binkert.org # LR items. The LR items are stored in two ways: First, they are uniquely 17676498Snate@binkert.org # numbered and placed in the list _lritems. Second, a linked list of LR items 17686498Snate@binkert.org # is built for each production. For example: 17696498Snate@binkert.org # 17706498Snate@binkert.org # E -> E PLUS E 17716498Snate@binkert.org # 17726498Snate@binkert.org # Creates the list 17736498Snate@binkert.org # 17746498Snate@binkert.org # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] 17756498Snate@binkert.org # ----------------------------------------------------------------------------- 17766498Snate@binkert.org 17776498Snate@binkert.org def build_lritems(self): 17786498Snate@binkert.org for p in self.Productions: 17796498Snate@binkert.org lastlri = p 17806498Snate@binkert.org i = 0 17816498Snate@binkert.org lr_items = [] 17826498Snate@binkert.org while 1: 17836498Snate@binkert.org if i > len(p): 17846498Snate@binkert.org lri = None 17856498Snate@binkert.org else: 17866498Snate@binkert.org lri = LRItem(p,i) 17876498Snate@binkert.org # Precompute the list of productions immediately following 17886498Snate@binkert.org try: 17896498Snate@binkert.org lri.lr_after = self.Prodnames[lri.prod[i+1]] 17906498Snate@binkert.org except (IndexError,KeyError): 17916498Snate@binkert.org lri.lr_after = [] 17926498Snate@binkert.org try: 17936498Snate@binkert.org lri.lr_before = lri.prod[i-1] 17946498Snate@binkert.org except IndexError: 17956498Snate@binkert.org lri.lr_before = None 17966498Snate@binkert.org 17976498Snate@binkert.org lastlri.lr_next = lri 17986498Snate@binkert.org if not lri: break 17996498Snate@binkert.org lr_items.append(lri) 18006498Snate@binkert.org lastlri = lri 18016498Snate@binkert.org i += 1 18026498Snate@binkert.org p.lr_items = lr_items 18032632SN/A 18042632SN/A# ----------------------------------------------------------------------------- 18056498Snate@binkert.org# == Class LRTable == 18062632SN/A# 18076498Snate@binkert.org# This basic class represents a basic table of LR parsing information. 18086498Snate@binkert.org# Methods for generating the tables are not defined here. They are defined 18096498Snate@binkert.org# in the derived class LRGeneratedTable. 18102632SN/A# ----------------------------------------------------------------------------- 18116498Snate@binkert.org 18126498Snate@binkert.orgclass VersionError(YaccError): pass 18136498Snate@binkert.org 18146498Snate@binkert.orgclass LRTable(object): 18156498Snate@binkert.org def __init__(self): 18166498Snate@binkert.org self.lr_action = None 18176498Snate@binkert.org self.lr_goto = None 18186498Snate@binkert.org self.lr_productions = None 18196498Snate@binkert.org self.lr_method = None 18206498Snate@binkert.org 18216498Snate@binkert.org def read_table(self,module): 18226498Snate@binkert.org if isinstance(module,types.ModuleType): 18236498Snate@binkert.org parsetab = module 18246498Snate@binkert.org else: 18256498Snate@binkert.org if sys.version_info[0] < 3: 18266498Snate@binkert.org exec("import %s as parsetab" % module) 18276498Snate@binkert.org else: 18286498Snate@binkert.org env = { } 18296498Snate@binkert.org exec("import %s as parsetab" % module, env, env) 18306498Snate@binkert.org parsetab = env['parsetab'] 18316498Snate@binkert.org 18326498Snate@binkert.org if parsetab._tabversion != __tabversion__: 18336498Snate@binkert.org raise VersionError("yacc table file version is out of date") 18346498Snate@binkert.org 18356498Snate@binkert.org self.lr_action = parsetab._lr_action 18366498Snate@binkert.org self.lr_goto = parsetab._lr_goto 18376498Snate@binkert.org 18386498Snate@binkert.org self.lr_productions = [] 18396498Snate@binkert.org for p in parsetab._lr_productions: 18406498Snate@binkert.org self.lr_productions.append(MiniProduction(*p)) 18416498Snate@binkert.org 18426498Snate@binkert.org self.lr_method = parsetab._lr_method 18436498Snate@binkert.org return parsetab._lr_signature 18446498Snate@binkert.org 18456498Snate@binkert.org def read_pickle(self,filename): 18466498Snate@binkert.org try: 18476498Snate@binkert.org import cPickle as pickle 18486498Snate@binkert.org except ImportError: 18496498Snate@binkert.org import pickle 18506498Snate@binkert.org 18516498Snate@binkert.org in_f = open(filename,"rb") 18526498Snate@binkert.org 18536498Snate@binkert.org tabversion = pickle.load(in_f) 18546498Snate@binkert.org if tabversion != __tabversion__: 18556498Snate@binkert.org raise VersionError("yacc table file version is out of date") 18566498Snate@binkert.org self.lr_method = pickle.load(in_f) 18576498Snate@binkert.org signature = pickle.load(in_f) 18586498Snate@binkert.org self.lr_action = pickle.load(in_f) 18596498Snate@binkert.org self.lr_goto = pickle.load(in_f) 18606498Snate@binkert.org productions = pickle.load(in_f) 18616498Snate@binkert.org 18626498Snate@binkert.org self.lr_productions = [] 18636498Snate@binkert.org for p in productions: 18646498Snate@binkert.org self.lr_productions.append(MiniProduction(*p)) 18656498Snate@binkert.org 18666498Snate@binkert.org in_f.close() 18676498Snate@binkert.org return signature 18686498Snate@binkert.org 18696498Snate@binkert.org # Bind all production function names to callable objects in pdict 18706498Snate@binkert.org def bind_callables(self,pdict): 18716498Snate@binkert.org for p in self.lr_productions: 18726498Snate@binkert.org p.bind(pdict) 18736498Snate@binkert.org 18742632SN/A# ----------------------------------------------------------------------------- 18756498Snate@binkert.org# === LR Generator === 18762632SN/A# 18776498Snate@binkert.org# The following classes and functions are used to generate LR parsing tables on 18786498Snate@binkert.org# a grammar. 18792632SN/A# ----------------------------------------------------------------------------- 18802632SN/A 18814479Sbinkertn@umich.edu# ----------------------------------------------------------------------------- 18824479Sbinkertn@umich.edu# digraph() 18834479Sbinkertn@umich.edu# traverse() 18844479Sbinkertn@umich.edu# 18854479Sbinkertn@umich.edu# The following two functions are used to compute set valued functions 18864479Sbinkertn@umich.edu# of the form: 18874479Sbinkertn@umich.edu# 18884479Sbinkertn@umich.edu# F(x) = F'(x) U U{F(y) | x R y} 18894479Sbinkertn@umich.edu# 18904479Sbinkertn@umich.edu# This is used to compute the values of Read() sets as well as FOLLOW sets 18914479Sbinkertn@umich.edu# in LALR(1) generation. 18924479Sbinkertn@umich.edu# 18934479Sbinkertn@umich.edu# Inputs: X - An input set 18944479Sbinkertn@umich.edu# R - A relation 18954479Sbinkertn@umich.edu# FP - Set-valued function 18964479Sbinkertn@umich.edu# ------------------------------------------------------------------------------ 18974479Sbinkertn@umich.edu 18984479Sbinkertn@umich.edudef digraph(X,R,FP): 18994479Sbinkertn@umich.edu N = { } 19004479Sbinkertn@umich.edu for x in X: 19014479Sbinkertn@umich.edu N[x] = 0 19024479Sbinkertn@umich.edu stack = [] 19034479Sbinkertn@umich.edu F = { } 19044479Sbinkertn@umich.edu for x in X: 19054479Sbinkertn@umich.edu if N[x] == 0: traverse(x,N,stack,F,X,R,FP) 19064479Sbinkertn@umich.edu return F 19074479Sbinkertn@umich.edu 19084479Sbinkertn@umich.edudef traverse(x,N,stack,F,X,R,FP): 19094479Sbinkertn@umich.edu stack.append(x) 19104479Sbinkertn@umich.edu d = len(stack) 19114479Sbinkertn@umich.edu N[x] = d 19124479Sbinkertn@umich.edu F[x] = FP(x) # F(X) <- F'(x) 19134479Sbinkertn@umich.edu 19144479Sbinkertn@umich.edu rel = R(x) # Get y's related to x 19154479Sbinkertn@umich.edu for y in rel: 19164479Sbinkertn@umich.edu if N[y] == 0: 19174479Sbinkertn@umich.edu traverse(y,N,stack,F,X,R,FP) 19184479Sbinkertn@umich.edu N[x] = min(N[x],N[y]) 19194479Sbinkertn@umich.edu for a in F.get(y,[]): 19204479Sbinkertn@umich.edu if a not in F[x]: F[x].append(a) 19214479Sbinkertn@umich.edu if N[x] == d: 19226498Snate@binkert.org N[stack[-1]] = MAXINT 19234479Sbinkertn@umich.edu F[stack[-1]] = F[x] 19244479Sbinkertn@umich.edu element = stack.pop() 19254479Sbinkertn@umich.edu while element != x: 19266498Snate@binkert.org N[stack[-1]] = MAXINT 19274479Sbinkertn@umich.edu F[stack[-1]] = F[x] 19284479Sbinkertn@umich.edu element = stack.pop() 19294479Sbinkertn@umich.edu 19306498Snate@binkert.orgclass LALRError(YaccError): pass 19316498Snate@binkert.org 19324479Sbinkertn@umich.edu# ----------------------------------------------------------------------------- 19336498Snate@binkert.org# == LRGeneratedTable == 19344479Sbinkertn@umich.edu# 19356498Snate@binkert.org# This class implements the LR table generation algorithm. There are no 19366498Snate@binkert.org# public methods except for write() 19374479Sbinkertn@umich.edu# ----------------------------------------------------------------------------- 19384479Sbinkertn@umich.edu 19396498Snate@binkert.orgclass LRGeneratedTable(LRTable): 19406498Snate@binkert.org def __init__(self,grammar,method='LALR',log=None): 19416498Snate@binkert.org if method not in ['SLR','LALR']: 19426498Snate@binkert.org raise LALRError("Unsupported method %s" % method) 19436498Snate@binkert.org 19446498Snate@binkert.org self.grammar = grammar 19456498Snate@binkert.org self.lr_method = method 19466498Snate@binkert.org 19476498Snate@binkert.org # Set up the logger 19486498Snate@binkert.org if not log: 19496498Snate@binkert.org log = NullLogger() 19506498Snate@binkert.org self.log = log 19516498Snate@binkert.org 19526498Snate@binkert.org # Internal attributes 19536498Snate@binkert.org self.lr_action = {} # Action table 19546498Snate@binkert.org self.lr_goto = {} # Goto table 19556498Snate@binkert.org self.lr_productions = grammar.Productions # Copy of grammar Production array 19566498Snate@binkert.org self.lr_goto_cache = {} # Cache of computed gotos 19576498Snate@binkert.org self.lr0_cidhash = {} # Cache of closures 19586498Snate@binkert.org 19596498Snate@binkert.org self._add_count = 0 # Internal counter used to detect cycles 19606498Snate@binkert.org 19616498Snate@binkert.org # Diagonistic information filled in by the table generator 19626498Snate@binkert.org self.sr_conflict = 0 19636498Snate@binkert.org self.rr_conflict = 0 19646498Snate@binkert.org self.conflicts = [] # List of conflicts 19656498Snate@binkert.org 19666498Snate@binkert.org self.sr_conflicts = [] 19676498Snate@binkert.org self.rr_conflicts = [] 19686498Snate@binkert.org 19696498Snate@binkert.org # Build the tables 19706498Snate@binkert.org self.grammar.build_lritems() 19716498Snate@binkert.org self.grammar.compute_first() 19726498Snate@binkert.org self.grammar.compute_follow() 19736498Snate@binkert.org self.lr_parse_table() 19746498Snate@binkert.org 19756498Snate@binkert.org # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. 19766498Snate@binkert.org 19776498Snate@binkert.org def lr0_closure(self,I): 19786498Snate@binkert.org self._add_count += 1 19796498Snate@binkert.org 19806498Snate@binkert.org # Add everything in I to J 19816498Snate@binkert.org J = I[:] 19826498Snate@binkert.org didadd = 1 19836498Snate@binkert.org while didadd: 19846498Snate@binkert.org didadd = 0 19856498Snate@binkert.org for j in J: 19866498Snate@binkert.org for x in j.lr_after: 19876498Snate@binkert.org if getattr(x,"lr0_added",0) == self._add_count: continue 19886498Snate@binkert.org # Add B --> .G to J 19896498Snate@binkert.org J.append(x.lr_next) 19906498Snate@binkert.org x.lr0_added = self._add_count 19916498Snate@binkert.org didadd = 1 19926498Snate@binkert.org 19936498Snate@binkert.org return J 19946498Snate@binkert.org 19956498Snate@binkert.org # Compute the LR(0) goto function goto(I,X) where I is a set 19966498Snate@binkert.org # of LR(0) items and X is a grammar symbol. This function is written 19976498Snate@binkert.org # in a way that guarantees uniqueness of the generated goto sets 19986498Snate@binkert.org # (i.e. the same goto set will never be returned as two different Python 19996498Snate@binkert.org # objects). With uniqueness, we can later do fast set comparisons using 20006498Snate@binkert.org # id(obj) instead of element-wise comparison. 20016498Snate@binkert.org 20026498Snate@binkert.org def lr0_goto(self,I,x): 20036498Snate@binkert.org # First we look for a previously cached entry 20046498Snate@binkert.org g = self.lr_goto_cache.get((id(I),x),None) 20056498Snate@binkert.org if g: return g 20066498Snate@binkert.org 20076498Snate@binkert.org # Now we generate the goto set in a way that guarantees uniqueness 20086498Snate@binkert.org # of the result 20096498Snate@binkert.org 20106498Snate@binkert.org s = self.lr_goto_cache.get(x,None) 20116498Snate@binkert.org if not s: 20126498Snate@binkert.org s = { } 20136498Snate@binkert.org self.lr_goto_cache[x] = s 20146498Snate@binkert.org 20156498Snate@binkert.org gs = [ ] 20166498Snate@binkert.org for p in I: 20176498Snate@binkert.org n = p.lr_next 20186498Snate@binkert.org if n and n.lr_before == x: 20196498Snate@binkert.org s1 = s.get(id(n),None) 20206498Snate@binkert.org if not s1: 20216498Snate@binkert.org s1 = { } 20226498Snate@binkert.org s[id(n)] = s1 20236498Snate@binkert.org gs.append(n) 20246498Snate@binkert.org s = s1 20256498Snate@binkert.org g = s.get('$end',None) 20266498Snate@binkert.org if not g: 20276498Snate@binkert.org if gs: 20286498Snate@binkert.org g = self.lr0_closure(gs) 20296498Snate@binkert.org s['$end'] = g 20306498Snate@binkert.org else: 20316498Snate@binkert.org s['$end'] = gs 20326498Snate@binkert.org self.lr_goto_cache[(id(I),x)] = g 20336498Snate@binkert.org return g 20346498Snate@binkert.org 20356498Snate@binkert.org # Compute the LR(0) sets of item function 20366498Snate@binkert.org def lr0_items(self): 20376498Snate@binkert.org 20386498Snate@binkert.org C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ] 20396498Snate@binkert.org i = 0 20406498Snate@binkert.org for I in C: 20416498Snate@binkert.org self.lr0_cidhash[id(I)] = i 20426498Snate@binkert.org i += 1 20436498Snate@binkert.org 20446498Snate@binkert.org # Loop over the items in C and each grammar symbols 20456498Snate@binkert.org i = 0 20466498Snate@binkert.org while i < len(C): 20476498Snate@binkert.org I = C[i] 20486498Snate@binkert.org i += 1 20496498Snate@binkert.org 20506498Snate@binkert.org # Collect all of the symbols that could possibly be in the goto(I,X) sets 20516498Snate@binkert.org asyms = { } 20526498Snate@binkert.org for ii in I: 20536498Snate@binkert.org for s in ii.usyms: 20546498Snate@binkert.org asyms[s] = None 20556498Snate@binkert.org 20566498Snate@binkert.org for x in asyms: 20576498Snate@binkert.org g = self.lr0_goto(I,x) 20586498Snate@binkert.org if not g: continue 20596498Snate@binkert.org if id(g) in self.lr0_cidhash: continue 20606498Snate@binkert.org self.lr0_cidhash[id(g)] = len(C) 20616498Snate@binkert.org C.append(g) 20626498Snate@binkert.org 20636498Snate@binkert.org return C 20646498Snate@binkert.org 20656498Snate@binkert.org # ----------------------------------------------------------------------------- 20666498Snate@binkert.org # ==== LALR(1) Parsing ==== 20676498Snate@binkert.org # 20686498Snate@binkert.org # LALR(1) parsing is almost exactly the same as SLR except that instead of 20696498Snate@binkert.org # relying upon Follow() sets when performing reductions, a more selective 20706498Snate@binkert.org # lookahead set that incorporates the state of the LR(0) machine is utilized. 20716498Snate@binkert.org # Thus, we mainly just have to focus on calculating the lookahead sets. 20726498Snate@binkert.org # 20736498Snate@binkert.org # The method used here is due to DeRemer and Pennelo (1982). 20746498Snate@binkert.org # 20756498Snate@binkert.org # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) 20766498Snate@binkert.org # Lookahead Sets", ACM Transactions on Programming Languages and Systems, 20776498Snate@binkert.org # Vol. 4, No. 4, Oct. 1982, pp. 615-649 20786498Snate@binkert.org # 20796498Snate@binkert.org # Further details can also be found in: 20806498Snate@binkert.org # 20816498Snate@binkert.org # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", 20826498Snate@binkert.org # McGraw-Hill Book Company, (1985). 20836498Snate@binkert.org # 20846498Snate@binkert.org # ----------------------------------------------------------------------------- 20856498Snate@binkert.org 20866498Snate@binkert.org # ----------------------------------------------------------------------------- 20876498Snate@binkert.org # compute_nullable_nonterminals() 20886498Snate@binkert.org # 20896498Snate@binkert.org # Creates a dictionary containing all of the non-terminals that might produce 20906498Snate@binkert.org # an empty production. 20916498Snate@binkert.org # ----------------------------------------------------------------------------- 20926498Snate@binkert.org 20936498Snate@binkert.org def compute_nullable_nonterminals(self): 20946498Snate@binkert.org nullable = {} 20956498Snate@binkert.org num_nullable = 0 20966498Snate@binkert.org while 1: 20976498Snate@binkert.org for p in self.grammar.Productions[1:]: 20986498Snate@binkert.org if p.len == 0: 20996498Snate@binkert.org nullable[p.name] = 1 21006498Snate@binkert.org continue 21016498Snate@binkert.org for t in p.prod: 21026498Snate@binkert.org if not t in nullable: break 21036498Snate@binkert.org else: 21046498Snate@binkert.org nullable[p.name] = 1 21056498Snate@binkert.org if len(nullable) == num_nullable: break 21066498Snate@binkert.org num_nullable = len(nullable) 21076498Snate@binkert.org return nullable 21086498Snate@binkert.org 21096498Snate@binkert.org # ----------------------------------------------------------------------------- 21106498Snate@binkert.org # find_nonterminal_trans(C) 21116498Snate@binkert.org # 21126498Snate@binkert.org # Given a set of LR(0) items, this functions finds all of the non-terminal 21136498Snate@binkert.org # transitions. These are transitions in which a dot appears immediately before 21146498Snate@binkert.org # a non-terminal. Returns a list of tuples of the form (state,N) where state 21156498Snate@binkert.org # is the state number and N is the nonterminal symbol. 21166498Snate@binkert.org # 21176498Snate@binkert.org # The input C is the set of LR(0) items. 21186498Snate@binkert.org # ----------------------------------------------------------------------------- 21196498Snate@binkert.org 21206498Snate@binkert.org def find_nonterminal_transitions(self,C): 21216498Snate@binkert.org trans = [] 21226498Snate@binkert.org for state in range(len(C)): 21236498Snate@binkert.org for p in C[state]: 21246498Snate@binkert.org if p.lr_index < p.len - 1: 21256498Snate@binkert.org t = (state,p.prod[p.lr_index+1]) 21266498Snate@binkert.org if t[1] in self.grammar.Nonterminals: 21276498Snate@binkert.org if t not in trans: trans.append(t) 21286498Snate@binkert.org state = state + 1 21296498Snate@binkert.org return trans 21306498Snate@binkert.org 21316498Snate@binkert.org # ----------------------------------------------------------------------------- 21326498Snate@binkert.org # dr_relation() 21336498Snate@binkert.org # 21346498Snate@binkert.org # Computes the DR(p,A) relationships for non-terminal transitions. The input 21356498Snate@binkert.org # is a tuple (state,N) where state is a number and N is a nonterminal symbol. 21366498Snate@binkert.org # 21376498Snate@binkert.org # Returns a list of terminals. 21386498Snate@binkert.org # ----------------------------------------------------------------------------- 21396498Snate@binkert.org 21406498Snate@binkert.org def dr_relation(self,C,trans,nullable): 21416498Snate@binkert.org dr_set = { } 21426498Snate@binkert.org state,N = trans 21436498Snate@binkert.org terms = [] 21446498Snate@binkert.org 21456498Snate@binkert.org g = self.lr0_goto(C[state],N) 21466498Snate@binkert.org for p in g: 21476498Snate@binkert.org if p.lr_index < p.len - 1: 21486498Snate@binkert.org a = p.prod[p.lr_index+1] 21496498Snate@binkert.org if a in self.grammar.Terminals: 21506498Snate@binkert.org if a not in terms: terms.append(a) 21516498Snate@binkert.org 21526498Snate@binkert.org # This extra bit is to handle the start state 21536498Snate@binkert.org if state == 0 and N == self.grammar.Productions[0].prod[0]: 21546498Snate@binkert.org terms.append('$end') 21556498Snate@binkert.org 21566498Snate@binkert.org return terms 21576498Snate@binkert.org 21586498Snate@binkert.org # ----------------------------------------------------------------------------- 21596498Snate@binkert.org # reads_relation() 21606498Snate@binkert.org # 21616498Snate@binkert.org # Computes the READS() relation (p,A) READS (t,C). 21626498Snate@binkert.org # ----------------------------------------------------------------------------- 21636498Snate@binkert.org 21646498Snate@binkert.org def reads_relation(self,C, trans, empty): 21656498Snate@binkert.org # Look for empty transitions 21666498Snate@binkert.org rel = [] 21676498Snate@binkert.org state, N = trans 21686498Snate@binkert.org 21696498Snate@binkert.org g = self.lr0_goto(C[state],N) 21706498Snate@binkert.org j = self.lr0_cidhash.get(id(g),-1) 21716498Snate@binkert.org for p in g: 21726498Snate@binkert.org if p.lr_index < p.len - 1: 21736498Snate@binkert.org a = p.prod[p.lr_index + 1] 21746498Snate@binkert.org if a in empty: 21756498Snate@binkert.org rel.append((j,a)) 21766498Snate@binkert.org 21776498Snate@binkert.org return rel 21786498Snate@binkert.org 21796498Snate@binkert.org # ----------------------------------------------------------------------------- 21806498Snate@binkert.org # compute_lookback_includes() 21816498Snate@binkert.org # 21826498Snate@binkert.org # Determines the lookback and includes relations 21836498Snate@binkert.org # 21846498Snate@binkert.org # LOOKBACK: 21856498Snate@binkert.org # 21866498Snate@binkert.org # This relation is determined by running the LR(0) state machine forward. 21876498Snate@binkert.org # For example, starting with a production "N : . A B C", we run it forward 21886498Snate@binkert.org # to obtain "N : A B C ." We then build a relationship between this final 21896498Snate@binkert.org # state and the starting state. These relationships are stored in a dictionary 21906498Snate@binkert.org # lookdict. 21916498Snate@binkert.org # 21926498Snate@binkert.org # INCLUDES: 21936498Snate@binkert.org # 21946498Snate@binkert.org # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). 21956498Snate@binkert.org # 21966498Snate@binkert.org # This relation is used to determine non-terminal transitions that occur 21976498Snate@binkert.org # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) 21986498Snate@binkert.org # if the following holds: 21996498Snate@binkert.org # 22006498Snate@binkert.org # B -> LAT, where T -> epsilon and p' -L-> p 22016498Snate@binkert.org # 22026498Snate@binkert.org # L is essentially a prefix (which may be empty), T is a suffix that must be 22036498Snate@binkert.org # able to derive an empty string. State p' must lead to state p with the string L. 22046498Snate@binkert.org # 22056498Snate@binkert.org # ----------------------------------------------------------------------------- 22066498Snate@binkert.org 22076498Snate@binkert.org def compute_lookback_includes(self,C,trans,nullable): 22086498Snate@binkert.org 22096498Snate@binkert.org lookdict = {} # Dictionary of lookback relations 22106498Snate@binkert.org includedict = {} # Dictionary of include relations 22116498Snate@binkert.org 22126498Snate@binkert.org # Make a dictionary of non-terminal transitions 22136498Snate@binkert.org dtrans = {} 22146498Snate@binkert.org for t in trans: 22156498Snate@binkert.org dtrans[t] = 1 22166498Snate@binkert.org 22176498Snate@binkert.org # Loop over all transitions and compute lookbacks and includes 22186498Snate@binkert.org for state,N in trans: 22196498Snate@binkert.org lookb = [] 22206498Snate@binkert.org includes = [] 22216498Snate@binkert.org for p in C[state]: 22226498Snate@binkert.org if p.name != N: continue 22236498Snate@binkert.org 22246498Snate@binkert.org # Okay, we have a name match. We now follow the production all the way 22256498Snate@binkert.org # through the state machine until we get the . on the right hand side 22266498Snate@binkert.org 22276498Snate@binkert.org lr_index = p.lr_index 22286498Snate@binkert.org j = state 22296498Snate@binkert.org while lr_index < p.len - 1: 22306498Snate@binkert.org lr_index = lr_index + 1 22316498Snate@binkert.org t = p.prod[lr_index] 22326498Snate@binkert.org 22336498Snate@binkert.org # Check to see if this symbol and state are a non-terminal transition 22346498Snate@binkert.org if (j,t) in dtrans: 22356498Snate@binkert.org # Yes. Okay, there is some chance that this is an includes relation 22366498Snate@binkert.org # the only way to know for certain is whether the rest of the 22376498Snate@binkert.org # production derives empty 22386498Snate@binkert.org 22396498Snate@binkert.org li = lr_index + 1 22406498Snate@binkert.org while li < p.len: 22416498Snate@binkert.org if p.prod[li] in self.grammar.Terminals: break # No forget it 22426498Snate@binkert.org if not p.prod[li] in nullable: break 22436498Snate@binkert.org li = li + 1 22446498Snate@binkert.org else: 22456498Snate@binkert.org # Appears to be a relation between (j,t) and (state,N) 22466498Snate@binkert.org includes.append((j,t)) 22476498Snate@binkert.org 22486498Snate@binkert.org g = self.lr0_goto(C[j],t) # Go to next set 22496498Snate@binkert.org j = self.lr0_cidhash.get(id(g),-1) # Go to next state 22506498Snate@binkert.org 22516498Snate@binkert.org # When we get here, j is the final state, now we have to locate the production 22526498Snate@binkert.org for r in C[j]: 22536498Snate@binkert.org if r.name != p.name: continue 22546498Snate@binkert.org if r.len != p.len: continue 22556498Snate@binkert.org i = 0 22566498Snate@binkert.org # This look is comparing a production ". A B C" with "A B C ." 22576498Snate@binkert.org while i < r.lr_index: 22586498Snate@binkert.org if r.prod[i] != p.prod[i+1]: break 22596498Snate@binkert.org i = i + 1 22606498Snate@binkert.org else: 22616498Snate@binkert.org lookb.append((j,r)) 22626498Snate@binkert.org for i in includes: 22636498Snate@binkert.org if not i in includedict: includedict[i] = [] 22646498Snate@binkert.org includedict[i].append((state,N)) 22656498Snate@binkert.org lookdict[(state,N)] = lookb 22666498Snate@binkert.org 22676498Snate@binkert.org return lookdict,includedict 22686498Snate@binkert.org 22696498Snate@binkert.org # ----------------------------------------------------------------------------- 22706498Snate@binkert.org # compute_read_sets() 22716498Snate@binkert.org # 22726498Snate@binkert.org # Given a set of LR(0) items, this function computes the read sets. 22736498Snate@binkert.org # 22746498Snate@binkert.org # Inputs: C = Set of LR(0) items 22756498Snate@binkert.org # ntrans = Set of nonterminal transitions 22766498Snate@binkert.org # nullable = Set of empty transitions 22776498Snate@binkert.org # 22786498Snate@binkert.org # Returns a set containing the read sets 22796498Snate@binkert.org # ----------------------------------------------------------------------------- 22806498Snate@binkert.org 22816498Snate@binkert.org def compute_read_sets(self,C, ntrans, nullable): 22826498Snate@binkert.org FP = lambda x: self.dr_relation(C,x,nullable) 22836498Snate@binkert.org R = lambda x: self.reads_relation(C,x,nullable) 22846498Snate@binkert.org F = digraph(ntrans,R,FP) 22856498Snate@binkert.org return F 22866498Snate@binkert.org 22876498Snate@binkert.org # ----------------------------------------------------------------------------- 22886498Snate@binkert.org # compute_follow_sets() 22896498Snate@binkert.org # 22906498Snate@binkert.org # Given a set of LR(0) items, a set of non-terminal transitions, a readset, 22916498Snate@binkert.org # and an include set, this function computes the follow sets 22926498Snate@binkert.org # 22936498Snate@binkert.org # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} 22946498Snate@binkert.org # 22956498Snate@binkert.org # Inputs: 22966498Snate@binkert.org # ntrans = Set of nonterminal transitions 22976498Snate@binkert.org # readsets = Readset (previously computed) 22986498Snate@binkert.org # inclsets = Include sets (previously computed) 22996498Snate@binkert.org # 23006498Snate@binkert.org # Returns a set containing the follow sets 23016498Snate@binkert.org # ----------------------------------------------------------------------------- 23026498Snate@binkert.org 23036498Snate@binkert.org def compute_follow_sets(self,ntrans,readsets,inclsets): 23046498Snate@binkert.org FP = lambda x: readsets[x] 23056498Snate@binkert.org R = lambda x: inclsets.get(x,[]) 23066498Snate@binkert.org F = digraph(ntrans,R,FP) 23076498Snate@binkert.org return F 23086498Snate@binkert.org 23096498Snate@binkert.org # ----------------------------------------------------------------------------- 23106498Snate@binkert.org # add_lookaheads() 23116498Snate@binkert.org # 23126498Snate@binkert.org # Attaches the lookahead symbols to grammar rules. 23136498Snate@binkert.org # 23146498Snate@binkert.org # Inputs: lookbacks - Set of lookback relations 23156498Snate@binkert.org # followset - Computed follow set 23166498Snate@binkert.org # 23176498Snate@binkert.org # This function directly attaches the lookaheads to productions contained 23186498Snate@binkert.org # in the lookbacks set 23196498Snate@binkert.org # ----------------------------------------------------------------------------- 23206498Snate@binkert.org 23216498Snate@binkert.org def add_lookaheads(self,lookbacks,followset): 23226498Snate@binkert.org for trans,lb in lookbacks.items(): 23236498Snate@binkert.org # Loop over productions in lookback 23246498Snate@binkert.org for state,p in lb: 23256498Snate@binkert.org if not state in p.lookaheads: 23266498Snate@binkert.org p.lookaheads[state] = [] 23276498Snate@binkert.org f = followset.get(trans,[]) 23286498Snate@binkert.org for a in f: 23296498Snate@binkert.org if a not in p.lookaheads[state]: p.lookaheads[state].append(a) 23306498Snate@binkert.org 23316498Snate@binkert.org # ----------------------------------------------------------------------------- 23326498Snate@binkert.org # add_lalr_lookaheads() 23336498Snate@binkert.org # 23346498Snate@binkert.org # This function does all of the work of adding lookahead information for use 23356498Snate@binkert.org # with LALR parsing 23366498Snate@binkert.org # ----------------------------------------------------------------------------- 23376498Snate@binkert.org 23386498Snate@binkert.org def add_lalr_lookaheads(self,C): 23396498Snate@binkert.org # Determine all of the nullable nonterminals 23406498Snate@binkert.org nullable = self.compute_nullable_nonterminals() 23416498Snate@binkert.org 23426498Snate@binkert.org # Find all non-terminal transitions 23436498Snate@binkert.org trans = self.find_nonterminal_transitions(C) 23446498Snate@binkert.org 23456498Snate@binkert.org # Compute read sets 23466498Snate@binkert.org readsets = self.compute_read_sets(C,trans,nullable) 23476498Snate@binkert.org 23486498Snate@binkert.org # Compute lookback/includes relations 23496498Snate@binkert.org lookd, included = self.compute_lookback_includes(C,trans,nullable) 23506498Snate@binkert.org 23516498Snate@binkert.org # Compute LALR FOLLOW sets 23526498Snate@binkert.org followsets = self.compute_follow_sets(trans,readsets,included) 23536498Snate@binkert.org 23546498Snate@binkert.org # Add all of the lookaheads 23556498Snate@binkert.org self.add_lookaheads(lookd,followsets) 23566498Snate@binkert.org 23576498Snate@binkert.org # ----------------------------------------------------------------------------- 23586498Snate@binkert.org # lr_parse_table() 23596498Snate@binkert.org # 23606498Snate@binkert.org # This function constructs the parse tables for SLR or LALR 23616498Snate@binkert.org # ----------------------------------------------------------------------------- 23626498Snate@binkert.org def lr_parse_table(self): 23636498Snate@binkert.org Productions = self.grammar.Productions 23646498Snate@binkert.org Precedence = self.grammar.Precedence 23656498Snate@binkert.org goto = self.lr_goto # Goto array 23666498Snate@binkert.org action = self.lr_action # Action array 23676498Snate@binkert.org log = self.log # Logger for output 23686498Snate@binkert.org 23696498Snate@binkert.org actionp = { } # Action production array (temporary) 23706498Snate@binkert.org 23716498Snate@binkert.org log.info("Parsing method: %s", self.lr_method) 23726498Snate@binkert.org 23736498Snate@binkert.org # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items 23746498Snate@binkert.org # This determines the number of states 23756498Snate@binkert.org 23766498Snate@binkert.org C = self.lr0_items() 23776498Snate@binkert.org 23786498Snate@binkert.org if self.lr_method == 'LALR': 23796498Snate@binkert.org self.add_lalr_lookaheads(C) 23806498Snate@binkert.org 23816498Snate@binkert.org # Build the parser table, state by state 23826498Snate@binkert.org st = 0 23836498Snate@binkert.org for I in C: 23846498Snate@binkert.org # Loop over each production in I 23856498Snate@binkert.org actlist = [ ] # List of actions 23866498Snate@binkert.org st_action = { } 23876498Snate@binkert.org st_actionp = { } 23886498Snate@binkert.org st_goto = { } 23896498Snate@binkert.org log.info("") 23906498Snate@binkert.org log.info("state %d", st) 23916498Snate@binkert.org log.info("") 23922632SN/A for p in I: 23936498Snate@binkert.org log.info(" (%d) %s", p.number, str(p)) 23946498Snate@binkert.org log.info("") 23956498Snate@binkert.org 23966498Snate@binkert.org for p in I: 23976498Snate@binkert.org if p.len == p.lr_index + 1: 23986498Snate@binkert.org if p.name == "S'": 23996498Snate@binkert.org # Start symbol. Accept! 24006498Snate@binkert.org st_action["$end"] = 0 24016498Snate@binkert.org st_actionp["$end"] = p 24026498Snate@binkert.org else: 24036498Snate@binkert.org # We are at the end of a production. Reduce! 24046498Snate@binkert.org if self.lr_method == 'LALR': 24056498Snate@binkert.org laheads = p.lookaheads[st] 24066498Snate@binkert.org else: 24076498Snate@binkert.org laheads = self.grammar.Follow[p.name] 24086498Snate@binkert.org for a in laheads: 24096498Snate@binkert.org actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) 24106498Snate@binkert.org r = st_action.get(a,None) 24116498Snate@binkert.org if r is not None: 24126498Snate@binkert.org # Whoa. Have a shift/reduce or reduce/reduce conflict 24136498Snate@binkert.org if r > 0: 24146498Snate@binkert.org # Need to decide on shift or reduce here 24156498Snate@binkert.org # By default we favor shifting. Need to add 24166498Snate@binkert.org # some precedence rules here. 24176498Snate@binkert.org sprec,slevel = Productions[st_actionp[a].number].prec 24186498Snate@binkert.org rprec,rlevel = Precedence.get(a,('right',0)) 24196498Snate@binkert.org if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): 24206498Snate@binkert.org # We really need to reduce here. 24216498Snate@binkert.org st_action[a] = -p.number 24226498Snate@binkert.org st_actionp[a] = p 24236498Snate@binkert.org if not slevel and not rlevel: 24246498Snate@binkert.org log.info(" ! shift/reduce conflict for %s resolved as reduce",a) 24256498Snate@binkert.org self.sr_conflicts.append((st,a,'reduce')) 24266498Snate@binkert.org Productions[p.number].reduced += 1 24276498Snate@binkert.org elif (slevel == rlevel) and (rprec == 'nonassoc'): 24286498Snate@binkert.org st_action[a] = None 24296498Snate@binkert.org else: 24306498Snate@binkert.org # Hmmm. Guess we'll keep the shift 24316498Snate@binkert.org if not rlevel: 24326498Snate@binkert.org log.info(" ! shift/reduce conflict for %s resolved as shift",a) 24336498Snate@binkert.org self.sr_conflicts.append((st,a,'shift')) 24346498Snate@binkert.org elif r < 0: 24356498Snate@binkert.org # Reduce/reduce conflict. In this case, we favor the rule 24366498Snate@binkert.org # that was defined first in the grammar file 24376498Snate@binkert.org oldp = Productions[-r] 24386498Snate@binkert.org pp = Productions[p.number] 24396498Snate@binkert.org if oldp.line > pp.line: 24406498Snate@binkert.org st_action[a] = -p.number 24416498Snate@binkert.org st_actionp[a] = p 24426498Snate@binkert.org chosenp,rejectp = pp,oldp 24436498Snate@binkert.org Productions[p.number].reduced += 1 24446498Snate@binkert.org Productions[oldp.number].reduced -= 1 24456498Snate@binkert.org else: 24466498Snate@binkert.org chosenp,rejectp = oldp,pp 24476498Snate@binkert.org self.rr_conflicts.append((st,chosenp,rejectp)) 24486498Snate@binkert.org log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a]) 24496498Snate@binkert.org else: 24506498Snate@binkert.org raise LALRError("Unknown conflict in state %d" % st) 24516498Snate@binkert.org else: 24526498Snate@binkert.org st_action[a] = -p.number 24536498Snate@binkert.org st_actionp[a] = p 24546498Snate@binkert.org Productions[p.number].reduced += 1 24552632SN/A else: 24566498Snate@binkert.org i = p.lr_index 24576498Snate@binkert.org a = p.prod[i+1] # Get symbol right after the "." 24586498Snate@binkert.org if a in self.grammar.Terminals: 24596498Snate@binkert.org g = self.lr0_goto(I,a) 24606498Snate@binkert.org j = self.lr0_cidhash.get(id(g),-1) 24616498Snate@binkert.org if j >= 0: 24626498Snate@binkert.org # We are in a shift state 24636498Snate@binkert.org actlist.append((a,p,"shift and go to state %d" % j)) 24646498Snate@binkert.org r = st_action.get(a,None) 24656498Snate@binkert.org if r is not None: 24666498Snate@binkert.org # Whoa have a shift/reduce or shift/shift conflict 24676498Snate@binkert.org if r > 0: 24686498Snate@binkert.org if r != j: 24696498Snate@binkert.org raise LALRError("Shift/shift conflict in state %d" % st) 24706498Snate@binkert.org elif r < 0: 24716498Snate@binkert.org # Do a precedence check. 24726498Snate@binkert.org # - if precedence of reduce rule is higher, we reduce. 24736498Snate@binkert.org # - if precedence of reduce is same and left assoc, we reduce. 24746498Snate@binkert.org # - otherwise we shift 24756498Snate@binkert.org rprec,rlevel = Productions[st_actionp[a].number].prec 24766498Snate@binkert.org sprec,slevel = Precedence.get(a,('right',0)) 24776498Snate@binkert.org if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): 24786498Snate@binkert.org # We decide to shift here... highest precedence to shift 24796498Snate@binkert.org Productions[st_actionp[a].number].reduced -= 1 24806498Snate@binkert.org st_action[a] = j 24816498Snate@binkert.org st_actionp[a] = p 24826498Snate@binkert.org if not rlevel: 24836498Snate@binkert.org log.info(" ! shift/reduce conflict for %s resolved as shift",a) 24846498Snate@binkert.org self.sr_conflicts.append((st,a,'shift')) 24856498Snate@binkert.org elif (slevel == rlevel) and (rprec == 'nonassoc'): 24866498Snate@binkert.org st_action[a] = None 24876498Snate@binkert.org else: 24886498Snate@binkert.org # Hmmm. Guess we'll keep the reduce 24896498Snate@binkert.org if not slevel and not rlevel: 24906498Snate@binkert.org log.info(" ! shift/reduce conflict for %s resolved as reduce",a) 24916498Snate@binkert.org self.sr_conflicts.append((st,a,'reduce')) 24926498Snate@binkert.org 24932632SN/A else: 24946498Snate@binkert.org raise LALRError("Unknown conflict in state %d" % st) 24952632SN/A else: 24966498Snate@binkert.org st_action[a] = j 24976498Snate@binkert.org st_actionp[a] = p 24986498Snate@binkert.org 24996498Snate@binkert.org # Print the actions associated with each terminal 25006498Snate@binkert.org _actprint = { } 25016498Snate@binkert.org for a,p,m in actlist: 25026498Snate@binkert.org if a in st_action: 25036498Snate@binkert.org if p is st_actionp[a]: 25046498Snate@binkert.org log.info(" %-15s %s",a,m) 25054479Sbinkertn@umich.edu _actprint[(a,m)] = 1 25066498Snate@binkert.org log.info("") 25076498Snate@binkert.org # Print the actions that were not used. (debugging) 25086498Snate@binkert.org not_used = 0 25096498Snate@binkert.org for a,p,m in actlist: 25106498Snate@binkert.org if a in st_action: 25116498Snate@binkert.org if p is not st_actionp[a]: 25126498Snate@binkert.org if not (a,m) in _actprint: 25136498Snate@binkert.org log.debug(" ! %-15s [ %s ]",a,m) 25146498Snate@binkert.org not_used = 1 25156498Snate@binkert.org _actprint[(a,m)] = 1 25166498Snate@binkert.org if not_used: 25176498Snate@binkert.org log.debug("") 25186498Snate@binkert.org 25196498Snate@binkert.org # Construct the goto table for this state 25206498Snate@binkert.org 25216498Snate@binkert.org nkeys = { } 25226498Snate@binkert.org for ii in I: 25236498Snate@binkert.org for s in ii.usyms: 25246498Snate@binkert.org if s in self.grammar.Nonterminals: 25256498Snate@binkert.org nkeys[s] = None 25266498Snate@binkert.org for n in nkeys: 25276498Snate@binkert.org g = self.lr0_goto(I,n) 25286498Snate@binkert.org j = self.lr0_cidhash.get(id(g),-1) 25296498Snate@binkert.org if j >= 0: 25306498Snate@binkert.org st_goto[n] = j 25316498Snate@binkert.org log.info(" %-30s shift and go to state %d",n,j) 25326498Snate@binkert.org 25336498Snate@binkert.org action[st] = st_action 25346498Snate@binkert.org actionp[st] = st_actionp 25356498Snate@binkert.org goto[st] = st_goto 25366498Snate@binkert.org st += 1 25376498Snate@binkert.org 25386498Snate@binkert.org 25396498Snate@binkert.org # ----------------------------------------------------------------------------- 25406498Snate@binkert.org # write() 25416498Snate@binkert.org # 25426498Snate@binkert.org # This function writes the LR parsing tables to a file 25436498Snate@binkert.org # ----------------------------------------------------------------------------- 25446498Snate@binkert.org 25456498Snate@binkert.org def write_table(self,modulename,outputdir='',signature=""): 25466498Snate@binkert.org basemodulename = modulename.split(".")[-1] 25476498Snate@binkert.org filename = os.path.join(outputdir,basemodulename) + ".py" 25486498Snate@binkert.org try: 25496498Snate@binkert.org f = open(filename,"w") 25506498Snate@binkert.org 25516498Snate@binkert.org f.write(""" 25522632SN/A# %s 25532632SN/A# This file is automatically generated. Do not edit. 25546498Snate@binkert.org_tabversion = %r 25556498Snate@binkert.org 25566498Snate@binkert.org_lr_method = %r 25576498Snate@binkert.org 25586498Snate@binkert.org_lr_signature = %r 25596498Snate@binkert.org """ % (filename, __tabversion__, self.lr_method, signature)) 25606498Snate@binkert.org 25616498Snate@binkert.org # Change smaller to 0 to go back to original tables 25626498Snate@binkert.org smaller = 1 25636498Snate@binkert.org 25646498Snate@binkert.org # Factor out names to try and make smaller 25656498Snate@binkert.org if smaller: 25666498Snate@binkert.org items = { } 25676498Snate@binkert.org 25686498Snate@binkert.org for s,nd in self.lr_action.items(): 25696498Snate@binkert.org for name,v in nd.items(): 25706498Snate@binkert.org i = items.get(name) 25716498Snate@binkert.org if not i: 25726498Snate@binkert.org i = ([],[]) 25736498Snate@binkert.org items[name] = i 25746498Snate@binkert.org i[0].append(s) 25756498Snate@binkert.org i[1].append(v) 25766498Snate@binkert.org 25776498Snate@binkert.org f.write("\n_lr_action_items = {") 25786498Snate@binkert.org for k,v in items.items(): 25796498Snate@binkert.org f.write("%r:([" % k) 25806498Snate@binkert.org for i in v[0]: 25816498Snate@binkert.org f.write("%r," % i) 25826498Snate@binkert.org f.write("],[") 25836498Snate@binkert.org for i in v[1]: 25846498Snate@binkert.org f.write("%r," % i) 25856498Snate@binkert.org 25866498Snate@binkert.org f.write("]),") 25876498Snate@binkert.org f.write("}\n") 25886498Snate@binkert.org 25896498Snate@binkert.org f.write(""" 25902632SN/A_lr_action = { } 25912632SN/Afor _k, _v in _lr_action_items.items(): 25922632SN/A for _x,_y in zip(_v[0],_v[1]): 25936498Snate@binkert.org if not _x in _lr_action: _lr_action[_x] = { } 25944479Sbinkertn@umich.edu _lr_action[_x][_k] = _y 25952632SN/Adel _lr_action_items 25962632SN/A""") 25972632SN/A 25986498Snate@binkert.org else: 25996498Snate@binkert.org f.write("\n_lr_action = { "); 26006498Snate@binkert.org for k,v in self.lr_action.items(): 26016498Snate@binkert.org f.write("(%r,%r):%r," % (k[0],k[1],v)) 26026498Snate@binkert.org f.write("}\n"); 26036498Snate@binkert.org 26046498Snate@binkert.org if smaller: 26056498Snate@binkert.org # Factor out names to try and make smaller 26066498Snate@binkert.org items = { } 26076498Snate@binkert.org 26086498Snate@binkert.org for s,nd in self.lr_goto.items(): 26096498Snate@binkert.org for name,v in nd.items(): 26106498Snate@binkert.org i = items.get(name) 26116498Snate@binkert.org if not i: 26126498Snate@binkert.org i = ([],[]) 26136498Snate@binkert.org items[name] = i 26146498Snate@binkert.org i[0].append(s) 26156498Snate@binkert.org i[1].append(v) 26166498Snate@binkert.org 26176498Snate@binkert.org f.write("\n_lr_goto_items = {") 26186498Snate@binkert.org for k,v in items.items(): 26196498Snate@binkert.org f.write("%r:([" % k) 26206498Snate@binkert.org for i in v[0]: 26216498Snate@binkert.org f.write("%r," % i) 26226498Snate@binkert.org f.write("],[") 26236498Snate@binkert.org for i in v[1]: 26246498Snate@binkert.org f.write("%r," % i) 26256498Snate@binkert.org 26266498Snate@binkert.org f.write("]),") 26276498Snate@binkert.org f.write("}\n") 26286498Snate@binkert.org 26296498Snate@binkert.org f.write(""" 26302632SN/A_lr_goto = { } 26312632SN/Afor _k, _v in _lr_goto_items.items(): 26322632SN/A for _x,_y in zip(_v[0],_v[1]): 26336498Snate@binkert.org if not _x in _lr_goto: _lr_goto[_x] = { } 26344479Sbinkertn@umich.edu _lr_goto[_x][_k] = _y 26352632SN/Adel _lr_goto_items 26362632SN/A""") 26376498Snate@binkert.org else: 26386498Snate@binkert.org f.write("\n_lr_goto = { "); 26396498Snate@binkert.org for k,v in self.lr_goto.items(): 26406498Snate@binkert.org f.write("(%r,%r):%r," % (k[0],k[1],v)) 26416498Snate@binkert.org f.write("}\n"); 26426498Snate@binkert.org 26436498Snate@binkert.org # Write production table 26446498Snate@binkert.org f.write("_lr_productions = [\n") 26456498Snate@binkert.org for p in self.lr_productions: 26466498Snate@binkert.org if p.func: 26476498Snate@binkert.org f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line)) 26486498Snate@binkert.org else: 26496498Snate@binkert.org f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len)) 26506498Snate@binkert.org f.write("]\n") 26516498Snate@binkert.org f.close() 26526498Snate@binkert.org 26536498Snate@binkert.org except IOError: 26546498Snate@binkert.org e = sys.exc_info()[1] 26556498Snate@binkert.org sys.stderr.write("Unable to create '%s'\n" % filename) 26566498Snate@binkert.org sys.stderr.write(str(e)+"\n") 26576498Snate@binkert.org return 26586498Snate@binkert.org 26596498Snate@binkert.org 26606498Snate@binkert.org # ----------------------------------------------------------------------------- 26616498Snate@binkert.org # pickle_table() 26626498Snate@binkert.org # 26636498Snate@binkert.org # This function pickles the LR parsing tables to a supplied file object 26646498Snate@binkert.org # ----------------------------------------------------------------------------- 26656498Snate@binkert.org 26666498Snate@binkert.org def pickle_table(self,filename,signature=""): 26676498Snate@binkert.org try: 26686498Snate@binkert.org import cPickle as pickle 26696498Snate@binkert.org except ImportError: 26706498Snate@binkert.org import pickle 26716498Snate@binkert.org outf = open(filename,"wb") 26726498Snate@binkert.org pickle.dump(__tabversion__,outf,pickle_protocol) 26736498Snate@binkert.org pickle.dump(self.lr_method,outf,pickle_protocol) 26746498Snate@binkert.org pickle.dump(signature,outf,pickle_protocol) 26756498Snate@binkert.org pickle.dump(self.lr_action,outf,pickle_protocol) 26766498Snate@binkert.org pickle.dump(self.lr_goto,outf,pickle_protocol) 26776498Snate@binkert.org 26786498Snate@binkert.org outp = [] 26796498Snate@binkert.org for p in self.lr_productions: 26806498Snate@binkert.org if p.func: 26816498Snate@binkert.org outp.append((p.str,p.name, p.len, p.func,p.file,p.line)) 26826498Snate@binkert.org else: 26836498Snate@binkert.org outp.append((str(p),p.name,p.len,None,None,None)) 26846498Snate@binkert.org pickle.dump(outp,outf,pickle_protocol) 26856498Snate@binkert.org outf.close() 26866498Snate@binkert.org 26876498Snate@binkert.org# ----------------------------------------------------------------------------- 26886498Snate@binkert.org# === INTROSPECTION === 26896498Snate@binkert.org# 26906498Snate@binkert.org# The following functions and classes are used to implement the PLY 26916498Snate@binkert.org# introspection features followed by the yacc() function itself. 26926498Snate@binkert.org# ----------------------------------------------------------------------------- 26936498Snate@binkert.org 26946498Snate@binkert.org# ----------------------------------------------------------------------------- 26956498Snate@binkert.org# get_caller_module_dict() 26966498Snate@binkert.org# 26976498Snate@binkert.org# This function returns a dictionary containing all of the symbols defined within 26986498Snate@binkert.org# a caller further down the call stack. This is used to get the environment 26996498Snate@binkert.org# associated with the yacc() call if none was provided. 27006498Snate@binkert.org# ----------------------------------------------------------------------------- 27016498Snate@binkert.org 27026498Snate@binkert.orgdef get_caller_module_dict(levels): 27036498Snate@binkert.org try: 27046498Snate@binkert.org raise RuntimeError 27056498Snate@binkert.org except RuntimeError: 27066498Snate@binkert.org e,b,t = sys.exc_info() 27076498Snate@binkert.org f = t.tb_frame 27086498Snate@binkert.org while levels > 0: 27096498Snate@binkert.org f = f.f_back 27106498Snate@binkert.org levels -= 1 27116498Snate@binkert.org ldict = f.f_globals.copy() 27126498Snate@binkert.org if f.f_globals != f.f_locals: 27136498Snate@binkert.org ldict.update(f.f_locals) 27146498Snate@binkert.org 27156498Snate@binkert.org return ldict 27166498Snate@binkert.org 27176498Snate@binkert.org# ----------------------------------------------------------------------------- 27186498Snate@binkert.org# parse_grammar() 27196498Snate@binkert.org# 27206498Snate@binkert.org# This takes a raw grammar rule string and parses it into production data 27216498Snate@binkert.org# ----------------------------------------------------------------------------- 27226498Snate@binkert.orgdef parse_grammar(doc,file,line): 27236498Snate@binkert.org grammar = [] 27246498Snate@binkert.org # Split the doc string into lines 27256498Snate@binkert.org pstrings = doc.splitlines() 27266498Snate@binkert.org lastp = None 27276498Snate@binkert.org dline = line 27286498Snate@binkert.org for ps in pstrings: 27296498Snate@binkert.org dline += 1 27306498Snate@binkert.org p = ps.split() 27316498Snate@binkert.org if not p: continue 27326498Snate@binkert.org try: 27336498Snate@binkert.org if p[0] == '|': 27346498Snate@binkert.org # This is a continuation of a previous rule 27356498Snate@binkert.org if not lastp: 27366498Snate@binkert.org raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline)) 27376498Snate@binkert.org prodname = lastp 27386498Snate@binkert.org syms = p[1:] 27396498Snate@binkert.org else: 27406498Snate@binkert.org prodname = p[0] 27416498Snate@binkert.org lastp = prodname 27426498Snate@binkert.org syms = p[2:] 27436498Snate@binkert.org assign = p[1] 27446498Snate@binkert.org if assign != ':' and assign != '::=': 27456498Snate@binkert.org raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline)) 27466498Snate@binkert.org 27476498Snate@binkert.org grammar.append((file,dline,prodname,syms)) 27486498Snate@binkert.org except SyntaxError: 27496498Snate@binkert.org raise 27506498Snate@binkert.org except Exception: 27516498Snate@binkert.org raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip())) 27526498Snate@binkert.org 27536498Snate@binkert.org return grammar 27546498Snate@binkert.org 27556498Snate@binkert.org# ----------------------------------------------------------------------------- 27566498Snate@binkert.org# ParserReflect() 27576498Snate@binkert.org# 27586498Snate@binkert.org# This class represents information extracted for building a parser including 27596498Snate@binkert.org# start symbol, error function, tokens, precedence list, action functions, 27606498Snate@binkert.org# etc. 27616498Snate@binkert.org# ----------------------------------------------------------------------------- 27626498Snate@binkert.orgclass ParserReflect(object): 27636498Snate@binkert.org def __init__(self,pdict,log=None): 27646498Snate@binkert.org self.pdict = pdict 27656498Snate@binkert.org self.start = None 27666498Snate@binkert.org self.error_func = None 27676498Snate@binkert.org self.tokens = None 27686498Snate@binkert.org self.files = {} 27696498Snate@binkert.org self.grammar = [] 27706498Snate@binkert.org self.error = 0 27716498Snate@binkert.org 27726498Snate@binkert.org if log is None: 27736498Snate@binkert.org self.log = PlyLogger(sys.stderr) 27742632SN/A else: 27756498Snate@binkert.org self.log = log 27766498Snate@binkert.org 27776498Snate@binkert.org # Get all of the basic information 27786498Snate@binkert.org def get_all(self): 27796498Snate@binkert.org self.get_start() 27806498Snate@binkert.org self.get_error_func() 27816498Snate@binkert.org self.get_tokens() 27826498Snate@binkert.org self.get_precedence() 27836498Snate@binkert.org self.get_pfunctions() 27846498Snate@binkert.org 27856498Snate@binkert.org # Validate all of the information 27866498Snate@binkert.org def validate_all(self): 27876498Snate@binkert.org self.validate_start() 27886498Snate@binkert.org self.validate_error_func() 27896498Snate@binkert.org self.validate_tokens() 27906498Snate@binkert.org self.validate_precedence() 27916498Snate@binkert.org self.validate_pfunctions() 27926498Snate@binkert.org self.validate_files() 27936498Snate@binkert.org return self.error 27946498Snate@binkert.org 27956498Snate@binkert.org # Compute a signature over the grammar 27966498Snate@binkert.org def signature(self): 27976498Snate@binkert.org try: 27986498Snate@binkert.org from hashlib import md5 27996498Snate@binkert.org except ImportError: 28006498Snate@binkert.org from md5 import md5 28016498Snate@binkert.org try: 28026498Snate@binkert.org sig = md5() 28036498Snate@binkert.org if self.start: 28046498Snate@binkert.org sig.update(self.start.encode('latin-1')) 28056498Snate@binkert.org if self.prec: 28066498Snate@binkert.org sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1')) 28076498Snate@binkert.org if self.tokens: 28086498Snate@binkert.org sig.update(" ".join(self.tokens).encode('latin-1')) 28096498Snate@binkert.org for f in self.pfuncs: 28106498Snate@binkert.org if f[3]: 28116498Snate@binkert.org sig.update(f[3].encode('latin-1')) 28126498Snate@binkert.org except (TypeError,ValueError): 28136498Snate@binkert.org pass 28146498Snate@binkert.org return sig.digest() 28156498Snate@binkert.org 28166498Snate@binkert.org # ----------------------------------------------------------------------------- 28176498Snate@binkert.org # validate_file() 28186498Snate@binkert.org # 28196498Snate@binkert.org # This method checks to see if there are duplicated p_rulename() functions 28206498Snate@binkert.org # in the parser module file. Without this function, it is really easy for 28216498Snate@binkert.org # users to make mistakes by cutting and pasting code fragments (and it's a real 28226498Snate@binkert.org # bugger to try and figure out why the resulting parser doesn't work). Therefore, 28236498Snate@binkert.org # we just do a little regular expression pattern matching of def statements 28246498Snate@binkert.org # to try and detect duplicates. 28256498Snate@binkert.org # ----------------------------------------------------------------------------- 28266498Snate@binkert.org 28276498Snate@binkert.org def validate_files(self): 28286498Snate@binkert.org # Match def p_funcname( 28296498Snate@binkert.org fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') 28306498Snate@binkert.org 28316498Snate@binkert.org for filename in self.files.keys(): 28326498Snate@binkert.org base,ext = os.path.splitext(filename) 28336498Snate@binkert.org if ext != '.py': return 1 # No idea. Assume it's okay. 28346498Snate@binkert.org 28356498Snate@binkert.org try: 28366498Snate@binkert.org f = open(filename) 28376498Snate@binkert.org lines = f.readlines() 28386498Snate@binkert.org f.close() 28396498Snate@binkert.org except IOError: 28406498Snate@binkert.org continue 28416498Snate@binkert.org 28426498Snate@binkert.org counthash = { } 28436498Snate@binkert.org for linen,l in enumerate(lines): 28446498Snate@binkert.org linen += 1 28456498Snate@binkert.org m = fre.match(l) 28466498Snate@binkert.org if m: 28476498Snate@binkert.org name = m.group(1) 28486498Snate@binkert.org prev = counthash.get(name) 28496498Snate@binkert.org if not prev: 28506498Snate@binkert.org counthash[name] = linen 28516498Snate@binkert.org else: 28526498Snate@binkert.org self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev) 28536498Snate@binkert.org 28546498Snate@binkert.org # Get the start symbol 28556498Snate@binkert.org def get_start(self): 28566498Snate@binkert.org self.start = self.pdict.get('start') 28576498Snate@binkert.org 28586498Snate@binkert.org # Validate the start symbol 28596498Snate@binkert.org def validate_start(self): 28606498Snate@binkert.org if self.start is not None: 28616498Snate@binkert.org if not isinstance(self.start,str): 28626498Snate@binkert.org self.log.error("'start' must be a string") 28636498Snate@binkert.org 28646498Snate@binkert.org # Look for error handler 28656498Snate@binkert.org def get_error_func(self): 28666498Snate@binkert.org self.error_func = self.pdict.get('p_error') 28676498Snate@binkert.org 28686498Snate@binkert.org # Validate the error function 28696498Snate@binkert.org def validate_error_func(self): 28706498Snate@binkert.org if self.error_func: 28716498Snate@binkert.org if isinstance(self.error_func,types.FunctionType): 28726498Snate@binkert.org ismethod = 0 28736498Snate@binkert.org elif isinstance(self.error_func, types.MethodType): 28746498Snate@binkert.org ismethod = 1 28752632SN/A else: 28766498Snate@binkert.org self.log.error("'p_error' defined, but is not a function or method") 28776498Snate@binkert.org self.error = 1 28786498Snate@binkert.org return 28796498Snate@binkert.org 28806498Snate@binkert.org eline = func_code(self.error_func).co_firstlineno 28816498Snate@binkert.org efile = func_code(self.error_func).co_filename 28826498Snate@binkert.org self.files[efile] = 1 28836498Snate@binkert.org 28846498Snate@binkert.org if (func_code(self.error_func).co_argcount != 1+ismethod): 28856498Snate@binkert.org self.log.error("%s:%d: p_error() requires 1 argument",efile,eline) 28866498Snate@binkert.org self.error = 1 28876498Snate@binkert.org 28886498Snate@binkert.org # Get the tokens map 28896498Snate@binkert.org def get_tokens(self): 28906498Snate@binkert.org tokens = self.pdict.get("tokens",None) 28916498Snate@binkert.org if not tokens: 28926498Snate@binkert.org self.log.error("No token list is defined") 28936498Snate@binkert.org self.error = 1 28946498Snate@binkert.org return 28956498Snate@binkert.org 28966498Snate@binkert.org if not isinstance(tokens,(list, tuple)): 28976498Snate@binkert.org self.log.error("tokens must be a list or tuple") 28986498Snate@binkert.org self.error = 1 28996498Snate@binkert.org return 29006498Snate@binkert.org 29016498Snate@binkert.org if not tokens: 29026498Snate@binkert.org self.log.error("tokens is empty") 29036498Snate@binkert.org self.error = 1 29046498Snate@binkert.org return 29056498Snate@binkert.org 29066498Snate@binkert.org self.tokens = tokens 29076498Snate@binkert.org 29086498Snate@binkert.org # Validate the tokens 29096498Snate@binkert.org def validate_tokens(self): 29106498Snate@binkert.org # Validate the tokens. 29116498Snate@binkert.org if 'error' in self.tokens: 29126498Snate@binkert.org self.log.error("Illegal token name 'error'. Is a reserved word") 29136498Snate@binkert.org self.error = 1 29146498Snate@binkert.org return 29156498Snate@binkert.org 29166498Snate@binkert.org terminals = {} 29176498Snate@binkert.org for n in self.tokens: 29186498Snate@binkert.org if n in terminals: 29196498Snate@binkert.org self.log.warning("Token '%s' multiply defined", n) 29206498Snate@binkert.org terminals[n] = 1 29216498Snate@binkert.org 29226498Snate@binkert.org # Get the precedence map (if any) 29236498Snate@binkert.org def get_precedence(self): 29246498Snate@binkert.org self.prec = self.pdict.get("precedence",None) 29256498Snate@binkert.org 29266498Snate@binkert.org # Validate and parse the precedence map 29276498Snate@binkert.org def validate_precedence(self): 29286498Snate@binkert.org preclist = [] 29296498Snate@binkert.org if self.prec: 29306498Snate@binkert.org if not isinstance(self.prec,(list,tuple)): 29316498Snate@binkert.org self.log.error("precedence must be a list or tuple") 29326498Snate@binkert.org self.error = 1 29336498Snate@binkert.org return 29346498Snate@binkert.org for level,p in enumerate(self.prec): 29356498Snate@binkert.org if not isinstance(p,(list,tuple)): 29366498Snate@binkert.org self.log.error("Bad precedence table") 29376498Snate@binkert.org self.error = 1 29386498Snate@binkert.org return 29396498Snate@binkert.org 29406498Snate@binkert.org if len(p) < 2: 29416498Snate@binkert.org self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p) 29426498Snate@binkert.org self.error = 1 29436498Snate@binkert.org return 29446498Snate@binkert.org assoc = p[0] 29456498Snate@binkert.org if not isinstance(assoc,str): 29466498Snate@binkert.org self.log.error("precedence associativity must be a string") 29476498Snate@binkert.org self.error = 1 29486498Snate@binkert.org return 29496498Snate@binkert.org for term in p[1:]: 29506498Snate@binkert.org if not isinstance(term,str): 29516498Snate@binkert.org self.log.error("precedence items must be strings") 29526498Snate@binkert.org self.error = 1 29536498Snate@binkert.org return 29546498Snate@binkert.org preclist.append((term,assoc,level+1)) 29556498Snate@binkert.org self.preclist = preclist 29566498Snate@binkert.org 29576498Snate@binkert.org # Get all p_functions from the grammar 29586498Snate@binkert.org def get_pfunctions(self): 29596498Snate@binkert.org p_functions = [] 29606498Snate@binkert.org for name, item in self.pdict.items(): 29616498Snate@binkert.org if name[:2] != 'p_': continue 29626498Snate@binkert.org if name == 'p_error': continue 29636498Snate@binkert.org if isinstance(item,(types.FunctionType,types.MethodType)): 29646498Snate@binkert.org line = func_code(item).co_firstlineno 29656498Snate@binkert.org file = func_code(item).co_filename 29666498Snate@binkert.org p_functions.append((line,file,name,item.__doc__)) 29676498Snate@binkert.org 29686498Snate@binkert.org # Sort all of the actions by line number 29696498Snate@binkert.org p_functions.sort() 29706498Snate@binkert.org self.pfuncs = p_functions 29716498Snate@binkert.org 29726498Snate@binkert.org 29736498Snate@binkert.org # Validate all of the p_functions 29746498Snate@binkert.org def validate_pfunctions(self): 29756498Snate@binkert.org grammar = [] 29766498Snate@binkert.org # Check for non-empty symbols 29776498Snate@binkert.org if len(self.pfuncs) == 0: 29786498Snate@binkert.org self.log.error("no rules of the form p_rulename are defined") 29796498Snate@binkert.org self.error = 1 29806498Snate@binkert.org return 29816498Snate@binkert.org 29826498Snate@binkert.org for line, file, name, doc in self.pfuncs: 29836498Snate@binkert.org func = self.pdict[name] 29846498Snate@binkert.org if isinstance(func, types.MethodType): 29856498Snate@binkert.org reqargs = 2 29866498Snate@binkert.org else: 29876498Snate@binkert.org reqargs = 1 29886498Snate@binkert.org if func_code(func).co_argcount > reqargs: 29896498Snate@binkert.org self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__) 29906498Snate@binkert.org self.error = 1 29916498Snate@binkert.org elif func_code(func).co_argcount < reqargs: 29926498Snate@binkert.org self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__) 29936498Snate@binkert.org self.error = 1 29946498Snate@binkert.org elif not func.__doc__: 29956498Snate@binkert.org self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__) 29966498Snate@binkert.org else: 29976498Snate@binkert.org try: 29986498Snate@binkert.org parsed_g = parse_grammar(doc,file,line) 29996498Snate@binkert.org for g in parsed_g: 30006498Snate@binkert.org grammar.append((name, g)) 30016498Snate@binkert.org except SyntaxError: 30026498Snate@binkert.org e = sys.exc_info()[1] 30036498Snate@binkert.org self.log.error(str(e)) 30046498Snate@binkert.org self.error = 1 30056498Snate@binkert.org 30066498Snate@binkert.org # Looks like a valid grammar rule 30076498Snate@binkert.org # Mark the file in which defined. 30086498Snate@binkert.org self.files[file] = 1 30096498Snate@binkert.org 30106498Snate@binkert.org # Secondary validation step that looks for p_ definitions that are not functions 30116498Snate@binkert.org # or functions that look like they might be grammar rules. 30126498Snate@binkert.org 30136498Snate@binkert.org for n,v in self.pdict.items(): 30146498Snate@binkert.org if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue 30156498Snate@binkert.org if n[0:2] == 't_': continue 30166498Snate@binkert.org if n[0:2] == 'p_' and n != 'p_error': 30176498Snate@binkert.org self.log.warning("'%s' not defined as a function", n) 30186498Snate@binkert.org if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or 30196498Snate@binkert.org (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)): 30206498Snate@binkert.org try: 30216498Snate@binkert.org doc = v.__doc__.split(" ") 30226498Snate@binkert.org if doc[1] == ':': 30236498Snate@binkert.org self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix", 30246498Snate@binkert.org func_code(v).co_filename, func_code(v).co_firstlineno,n) 30256498Snate@binkert.org except Exception: 30266498Snate@binkert.org pass 30276498Snate@binkert.org 30286498Snate@binkert.org self.grammar = grammar 30294479Sbinkertn@umich.edu 30302632SN/A# ----------------------------------------------------------------------------- 30312632SN/A# yacc(module) 30322632SN/A# 30336498Snate@binkert.org# Build a parser 30342632SN/A# ----------------------------------------------------------------------------- 30352632SN/A 30366498Snate@binkert.orgdef yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, 30376498Snate@binkert.org check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='', 30386498Snate@binkert.org debuglog=None, errorlog = None, picklefile=None): 30396498Snate@binkert.org 30406498Snate@binkert.org global parse # Reference to the parsing method of the last built parser 30416498Snate@binkert.org 30426498Snate@binkert.org # If pickling is enabled, table files are not created 30436498Snate@binkert.org 30446498Snate@binkert.org if picklefile: 30456498Snate@binkert.org write_tables = 0 30466498Snate@binkert.org 30476498Snate@binkert.org if errorlog is None: 30486498Snate@binkert.org errorlog = PlyLogger(sys.stderr) 30496498Snate@binkert.org 30506498Snate@binkert.org # Get the module dictionary used for the parser 30512632SN/A if module: 30526498Snate@binkert.org _items = [(k,getattr(module,k)) for k in dir(module)] 30536498Snate@binkert.org pdict = dict(_items) 30546498Snate@binkert.org else: 30556498Snate@binkert.org pdict = get_caller_module_dict(2) 30566498Snate@binkert.org 30576498Snate@binkert.org # Collect parser information from the dictionary 30586498Snate@binkert.org pinfo = ParserReflect(pdict,log=errorlog) 30596498Snate@binkert.org pinfo.get_all() 30606498Snate@binkert.org 30616498Snate@binkert.org if pinfo.error: 30626498Snate@binkert.org raise YaccError("Unable to build parser") 30636498Snate@binkert.org 30646498Snate@binkert.org # Check signature against table files (if any) 30656498Snate@binkert.org signature = pinfo.signature() 30666498Snate@binkert.org 30676498Snate@binkert.org # Read the tables 30686498Snate@binkert.org try: 30696498Snate@binkert.org lr = LRTable() 30706498Snate@binkert.org if picklefile: 30716498Snate@binkert.org read_signature = lr.read_pickle(picklefile) 30724479Sbinkertn@umich.edu else: 30736498Snate@binkert.org read_signature = lr.read_table(tabmodule) 30746498Snate@binkert.org if optimize or (read_signature == signature): 30756498Snate@binkert.org try: 30766498Snate@binkert.org lr.bind_callables(pinfo.pdict) 30776498Snate@binkert.org parser = LRParser(lr,pinfo.error_func) 30786498Snate@binkert.org parse = parser.parse 30796498Snate@binkert.org return parser 30806498Snate@binkert.org except Exception: 30816498Snate@binkert.org e = sys.exc_info()[1] 30826498Snate@binkert.org errorlog.warning("There was a problem loading the table file: %s", repr(e)) 30836498Snate@binkert.org except VersionError: 30846498Snate@binkert.org e = sys.exc_info() 30856498Snate@binkert.org errorlog.warning(str(e)) 30866498Snate@binkert.org except Exception: 30876498Snate@binkert.org pass 30886498Snate@binkert.org 30896498Snate@binkert.org if debuglog is None: 30906498Snate@binkert.org if debug: 30916498Snate@binkert.org debuglog = PlyLogger(open(debugfile,"w")) 30926498Snate@binkert.org else: 30936498Snate@binkert.org debuglog = NullLogger() 30946498Snate@binkert.org 30956498Snate@binkert.org debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__) 30966498Snate@binkert.org 30976498Snate@binkert.org 30986498Snate@binkert.org errors = 0 30996498Snate@binkert.org 31006498Snate@binkert.org # Validate the parser information 31016498Snate@binkert.org if pinfo.validate_all(): 31026498Snate@binkert.org raise YaccError("Unable to build parser") 31036498Snate@binkert.org 31046498Snate@binkert.org if not pinfo.error_func: 31056498Snate@binkert.org errorlog.warning("no p_error() function is defined") 31066498Snate@binkert.org 31076498Snate@binkert.org # Create a grammar object 31086498Snate@binkert.org grammar = Grammar(pinfo.tokens) 31096498Snate@binkert.org 31106498Snate@binkert.org # Set precedence level for terminals 31116498Snate@binkert.org for term, assoc, level in pinfo.preclist: 31122632SN/A try: 31136498Snate@binkert.org grammar.set_precedence(term,assoc,level) 31146498Snate@binkert.org except GrammarError: 31156498Snate@binkert.org e = sys.exc_info()[1] 31166498Snate@binkert.org errorlog.warning("%s",str(e)) 31176498Snate@binkert.org 31186498Snate@binkert.org # Add productions to the grammar 31196498Snate@binkert.org for funcname, gram in pinfo.grammar: 31206498Snate@binkert.org file, line, prodname, syms = gram 31216498Snate@binkert.org try: 31226498Snate@binkert.org grammar.add_production(prodname,syms,funcname,file,line) 31236498Snate@binkert.org except GrammarError: 31246498Snate@binkert.org e = sys.exc_info()[1] 31256498Snate@binkert.org errorlog.error("%s",str(e)) 31266498Snate@binkert.org errors = 1 31276498Snate@binkert.org 31286498Snate@binkert.org # Set the grammar start symbols 31296498Snate@binkert.org try: 31306498Snate@binkert.org if start is None: 31316498Snate@binkert.org grammar.set_start(pinfo.start) 31324479Sbinkertn@umich.edu else: 31336498Snate@binkert.org grammar.set_start(start) 31346498Snate@binkert.org except GrammarError: 31356498Snate@binkert.org e = sys.exc_info()[1] 31366498Snate@binkert.org errorlog.error(str(e)) 31376498Snate@binkert.org errors = 1 31386498Snate@binkert.org 31396498Snate@binkert.org if errors: 31406498Snate@binkert.org raise YaccError("Unable to build parser") 31416498Snate@binkert.org 31426498Snate@binkert.org # Verify the grammar structure 31436498Snate@binkert.org undefined_symbols = grammar.undefined_symbols() 31446498Snate@binkert.org for sym, prod in undefined_symbols: 31456498Snate@binkert.org errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym) 31466498Snate@binkert.org errors = 1 31476498Snate@binkert.org 31486498Snate@binkert.org unused_terminals = grammar.unused_terminals() 31496498Snate@binkert.org if unused_terminals: 31506498Snate@binkert.org debuglog.info("") 31516498Snate@binkert.org debuglog.info("Unused terminals:") 31526498Snate@binkert.org debuglog.info("") 31536498Snate@binkert.org for term in unused_terminals: 31546498Snate@binkert.org errorlog.warning("Token '%s' defined, but not used", term) 31556498Snate@binkert.org debuglog.info(" %s", term) 31566498Snate@binkert.org 31576498Snate@binkert.org # Print out all productions to the debug log 31586498Snate@binkert.org if debug: 31596498Snate@binkert.org debuglog.info("") 31606498Snate@binkert.org debuglog.info("Grammar") 31616498Snate@binkert.org debuglog.info("") 31626498Snate@binkert.org for n,p in enumerate(grammar.Productions): 31636498Snate@binkert.org debuglog.info("Rule %-5d %s", n, p) 31646498Snate@binkert.org 31656498Snate@binkert.org # Find unused non-terminals 31666498Snate@binkert.org unused_rules = grammar.unused_rules() 31676498Snate@binkert.org for prod in unused_rules: 31686498Snate@binkert.org errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name) 31696498Snate@binkert.org 31706498Snate@binkert.org if len(unused_terminals) == 1: 31716498Snate@binkert.org errorlog.warning("There is 1 unused token") 31726498Snate@binkert.org if len(unused_terminals) > 1: 31736498Snate@binkert.org errorlog.warning("There are %d unused tokens", len(unused_terminals)) 31746498Snate@binkert.org 31756498Snate@binkert.org if len(unused_rules) == 1: 31766498Snate@binkert.org errorlog.warning("There is 1 unused rule") 31776498Snate@binkert.org if len(unused_rules) > 1: 31786498Snate@binkert.org errorlog.warning("There are %d unused rules", len(unused_rules)) 31796498Snate@binkert.org 31806498Snate@binkert.org if debug: 31816498Snate@binkert.org debuglog.info("") 31826498Snate@binkert.org debuglog.info("Terminals, with rules where they appear") 31836498Snate@binkert.org debuglog.info("") 31846498Snate@binkert.org terms = list(grammar.Terminals) 31856498Snate@binkert.org terms.sort() 31866498Snate@binkert.org for term in terms: 31876498Snate@binkert.org debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]])) 31886498Snate@binkert.org 31896498Snate@binkert.org debuglog.info("") 31906498Snate@binkert.org debuglog.info("Nonterminals, with rules where they appear") 31916498Snate@binkert.org debuglog.info("") 31926498Snate@binkert.org nonterms = list(grammar.Nonterminals) 31936498Snate@binkert.org nonterms.sort() 31946498Snate@binkert.org for nonterm in nonterms: 31956498Snate@binkert.org debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]])) 31966498Snate@binkert.org debuglog.info("") 31976498Snate@binkert.org 31986498Snate@binkert.org if check_recursion: 31996498Snate@binkert.org unreachable = grammar.find_unreachable() 32006498Snate@binkert.org for u in unreachable: 32016498Snate@binkert.org errorlog.warning("Symbol '%s' is unreachable",u) 32026498Snate@binkert.org 32036498Snate@binkert.org infinite = grammar.infinite_cycles() 32046498Snate@binkert.org for inf in infinite: 32056498Snate@binkert.org errorlog.error("Infinite recursion detected for symbol '%s'", inf) 32066498Snate@binkert.org errors = 1 32076498Snate@binkert.org 32086498Snate@binkert.org unused_prec = grammar.unused_precedence() 32096498Snate@binkert.org for term, assoc in unused_prec: 32106498Snate@binkert.org errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term) 32116498Snate@binkert.org errors = 1 32126498Snate@binkert.org 32136498Snate@binkert.org if errors: 32146498Snate@binkert.org raise YaccError("Unable to build parser") 32156498Snate@binkert.org 32166498Snate@binkert.org # Run the LRGeneratedTable on the grammar 32176498Snate@binkert.org if debug: 32186498Snate@binkert.org errorlog.debug("Generating %s tables", method) 32196498Snate@binkert.org 32206498Snate@binkert.org lr = LRGeneratedTable(grammar,method,debuglog) 32216498Snate@binkert.org 32226498Snate@binkert.org if debug: 32236498Snate@binkert.org num_sr = len(lr.sr_conflicts) 32246498Snate@binkert.org 32256498Snate@binkert.org # Report shift/reduce and reduce/reduce conflicts 32266498Snate@binkert.org if num_sr == 1: 32276498Snate@binkert.org errorlog.warning("1 shift/reduce conflict") 32286498Snate@binkert.org elif num_sr > 1: 32296498Snate@binkert.org errorlog.warning("%d shift/reduce conflicts", num_sr) 32306498Snate@binkert.org 32316498Snate@binkert.org num_rr = len(lr.rr_conflicts) 32326498Snate@binkert.org if num_rr == 1: 32336498Snate@binkert.org errorlog.warning("1 reduce/reduce conflict") 32346498Snate@binkert.org elif num_rr > 1: 32356498Snate@binkert.org errorlog.warning("%d reduce/reduce conflicts", num_rr) 32366498Snate@binkert.org 32376498Snate@binkert.org # Write out conflicts to the output file 32386498Snate@binkert.org if debug and (lr.sr_conflicts or lr.rr_conflicts): 32396498Snate@binkert.org debuglog.warning("") 32406498Snate@binkert.org debuglog.warning("Conflicts:") 32416498Snate@binkert.org debuglog.warning("") 32426498Snate@binkert.org 32436498Snate@binkert.org for state, tok, resolution in lr.sr_conflicts: 32446498Snate@binkert.org debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution) 32456498Snate@binkert.org 32466498Snate@binkert.org already_reported = {} 32476498Snate@binkert.org for state, rule, rejected in lr.rr_conflicts: 32486498Snate@binkert.org if (state,id(rule),id(rejected)) in already_reported: 32496498Snate@binkert.org continue 32506498Snate@binkert.org debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) 32516498Snate@binkert.org debuglog.warning("rejected rule (%s) in state %d", rejected,state) 32526498Snate@binkert.org errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) 32536498Snate@binkert.org errorlog.warning("rejected rule (%s) in state %d", rejected, state) 32546498Snate@binkert.org already_reported[state,id(rule),id(rejected)] = 1 32556498Snate@binkert.org 32566498Snate@binkert.org warned_never = [] 32576498Snate@binkert.org for state, rule, rejected in lr.rr_conflicts: 32586498Snate@binkert.org if not rejected.reduced and (rejected not in warned_never): 32596498Snate@binkert.org debuglog.warning("Rule (%s) is never reduced", rejected) 32606498Snate@binkert.org errorlog.warning("Rule (%s) is never reduced", rejected) 32616498Snate@binkert.org warned_never.append(rejected) 32626498Snate@binkert.org 32636498Snate@binkert.org # Write the table file if requested 32646498Snate@binkert.org if write_tables: 32656498Snate@binkert.org lr.write_table(tabmodule,outputdir,signature) 32666498Snate@binkert.org 32676498Snate@binkert.org # Write a pickled version of the tables 32686498Snate@binkert.org if picklefile: 32696498Snate@binkert.org lr.pickle_table(picklefile,signature) 32706498Snate@binkert.org 32716498Snate@binkert.org # Build the parser 32726498Snate@binkert.org lr.bind_callables(pinfo.pdict) 32736498Snate@binkert.org parser = LRParser(lr,pinfo.error_func) 32746498Snate@binkert.org 32756498Snate@binkert.org parse = parser.parse 32766498Snate@binkert.org return parser 3277