16498Snate@binkert.org# -----------------------------------------------------------------------------
22632SN/A# ply: yacc.py
32632SN/A#
46498Snate@binkert.org# Copyright (C) 2001-2009,
56498Snate@binkert.org# David M. Beazley (Dabeaz LLC)
66498Snate@binkert.org# All rights reserved.
72632SN/A#
86498Snate@binkert.org# Redistribution and use in source and binary forms, with or without
96498Snate@binkert.org# modification, are permitted provided that the following conditions are
106498Snate@binkert.org# met:
116498Snate@binkert.org#
126498Snate@binkert.org# * Redistributions of source code must retain the above copyright notice,
136498Snate@binkert.org#   this list of conditions and the following disclaimer.
146498Snate@binkert.org# * Redistributions in binary form must reproduce the above copyright notice,
156498Snate@binkert.org#   this list of conditions and the following disclaimer in the documentation
166498Snate@binkert.org#   and/or other materials provided with the distribution.
176498Snate@binkert.org# * Neither the name of the David Beazley or Dabeaz LLC may be used to
186498Snate@binkert.org#   endorse or promote products derived from this software without
196498Snate@binkert.org#  specific prior written permission.
202632SN/A#
216498Snate@binkert.org# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
226498Snate@binkert.org# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
236498Snate@binkert.org# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
246498Snate@binkert.org# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
256498Snate@binkert.org# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
266498Snate@binkert.org# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
276498Snate@binkert.org# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
286498Snate@binkert.org# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
296498Snate@binkert.org# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
306498Snate@binkert.org# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
316498Snate@binkert.org# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
326498Snate@binkert.org# -----------------------------------------------------------------------------
332632SN/A#
342632SN/A# This implements an LR parser that is constructed from grammar rules defined
354479Sbinkertn@umich.edu# as Python functions. The grammer is specified by supplying the BNF inside
364479Sbinkertn@umich.edu# Python documentation strings.  The inspiration for this technique was borrowed
374479Sbinkertn@umich.edu# from John Aycock's Spark parsing system.  PLY might be viewed as cross between
384479Sbinkertn@umich.edu# Spark and the GNU bison utility.
392632SN/A#
402632SN/A# The current implementation is only somewhat object-oriented. The
412632SN/A# LR parser itself is defined in terms of an object (which allows multiple
422632SN/A# parsers to co-exist).  However, most of the variables used during table
432632SN/A# construction are defined in terms of global variables.  Users shouldn't
442632SN/A# notice unless they are trying to define multiple parsers at the same
452632SN/A# time using threads (in which case they should have their head examined).
464479Sbinkertn@umich.edu#
474479Sbinkertn@umich.edu# This implementation supports both SLR and LALR(1) parsing.  LALR(1)
484479Sbinkertn@umich.edu# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
494479Sbinkertn@umich.edu# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
504479Sbinkertn@umich.edu# Techniques, and Tools" (The Dragon Book).  LALR(1) has since been replaced
514479Sbinkertn@umich.edu# by the more efficient DeRemer and Pennello algorithm.
524479Sbinkertn@umich.edu#
534479Sbinkertn@umich.edu# :::::::: WARNING :::::::
544479Sbinkertn@umich.edu#
554479Sbinkertn@umich.edu# Construction of LR parsing tables is fairly complicated and expensive.
564479Sbinkertn@umich.edu# To make this module run fast, a *LOT* of work has been put into
574479Sbinkertn@umich.edu# optimization---often at the expensive of readability and what might
584479Sbinkertn@umich.edu# consider to be good Python "coding style."   Modify the code at your
594479Sbinkertn@umich.edu# own risk!
604479Sbinkertn@umich.edu# ----------------------------------------------------------------------------
612632SN/A
626498Snate@binkert.org__version__    = "3.2"
636498Snate@binkert.org__tabversion__ = "3.2"       # Table version
642632SN/A
652632SN/A#-----------------------------------------------------------------------------
662632SN/A#                     === User configurable parameters ===
672632SN/A#
682632SN/A# Change these to modify the default behavior of yacc (if you wish)
692632SN/A#-----------------------------------------------------------------------------
702632SN/A
7110182SCurtis.Dunham@arm.comyaccdebug   = 0                # Debugging mode.  If set, yacc generates a
722632SN/A                               # a 'parser.out' file in the current directory
732632SN/A
742632SN/Adebug_file  = 'parser.out'     # Default name of the debugging file
752632SN/Atab_module  = 'parsetab'       # Default name of the table module
764479Sbinkertn@umich.edudefault_lr  = 'LALR'           # Default LR table generation method
772632SN/A
782632SN/Aerror_count = 3                # Number of symbols that must be shifted to leave recovery mode
792632SN/A
806498Snate@binkert.orgyaccdevel   = 0                # Set to True if developing yacc.  This turns off optimized
816498Snate@binkert.org                               # implementations of certain functions.
826498Snate@binkert.org
836498Snate@binkert.orgresultlimit = 40               # Size limit of results when running in debug mode.
846498Snate@binkert.org
856498Snate@binkert.orgpickle_protocol = 0            # Protocol to use when writing pickle files
866498Snate@binkert.org
876498Snate@binkert.orgimport re, types, sys, os.path
886498Snate@binkert.org
896498Snate@binkert.org# Compatibility function for python 2.6/3.0
906498Snate@binkert.orgif sys.version_info[0] < 3:
916498Snate@binkert.org    def func_code(f):
926498Snate@binkert.org        return f.func_code
936498Snate@binkert.orgelse:
946498Snate@binkert.org    def func_code(f):
956498Snate@binkert.org        return f.__code__
966498Snate@binkert.org
976498Snate@binkert.org# Compatibility
986498Snate@binkert.orgtry:
996498Snate@binkert.org    MAXINT = sys.maxint
1006498Snate@binkert.orgexcept AttributeError:
1016498Snate@binkert.org    MAXINT = sys.maxsize
1026498Snate@binkert.org
1036498Snate@binkert.org# Python 2.x/3.0 compatibility.
1046498Snate@binkert.orgdef load_ply_lex():
1056498Snate@binkert.org    if sys.version_info[0] < 3:
1066498Snate@binkert.org        import lex
1076498Snate@binkert.org    else:
1086498Snate@binkert.org        import ply.lex as lex
1096498Snate@binkert.org    return lex
1106498Snate@binkert.org
1116498Snate@binkert.org# This object is a stand-in for a logging object created by the
1126498Snate@binkert.org# logging module.   PLY will use this by default to create things
1136498Snate@binkert.org# such as the parser.out file.  If a user wants more detailed
1146498Snate@binkert.org# information, they can create their own logging object and pass
1156498Snate@binkert.org# it into PLY.
1166498Snate@binkert.org
1176498Snate@binkert.orgclass PlyLogger(object):
1186498Snate@binkert.org    def __init__(self,f):
1196498Snate@binkert.org        self.f = f
1206498Snate@binkert.org    def debug(self,msg,*args,**kwargs):
1216498Snate@binkert.org        self.f.write((msg % args) + "\n")
1226498Snate@binkert.org    info     = debug
1236498Snate@binkert.org
1246498Snate@binkert.org    def warning(self,msg,*args,**kwargs):
1256498Snate@binkert.org        self.f.write("WARNING: "+ (msg % args) + "\n")
1266498Snate@binkert.org
1276498Snate@binkert.org    def error(self,msg,*args,**kwargs):
1286498Snate@binkert.org        self.f.write("ERROR: " + (msg % args) + "\n")
1296498Snate@binkert.org
1306498Snate@binkert.org    critical = debug
1316498Snate@binkert.org
1326498Snate@binkert.org# Null logger is used when no output is generated. Does nothing.
1336498Snate@binkert.orgclass NullLogger(object):
1346498Snate@binkert.org    def __getattribute__(self,name):
1356498Snate@binkert.org        return self
1366498Snate@binkert.org    def __call__(self,*args,**kwargs):
1376498Snate@binkert.org        return self
1386498Snate@binkert.org
1392632SN/A# Exception raised for yacc-related errors
1402632SN/Aclass YaccError(Exception):   pass
1412632SN/A
1426498Snate@binkert.org# Format the result message that the parser produces when running in debug mode.
1436498Snate@binkert.orgdef format_result(r):
1446498Snate@binkert.org    repr_str = repr(r)
1456498Snate@binkert.org    if '\n' in repr_str: repr_str = repr(repr_str)
1466498Snate@binkert.org    if len(repr_str) > resultlimit:
1476498Snate@binkert.org        repr_str = repr_str[:resultlimit]+" ..."
1486498Snate@binkert.org    result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str)
1496498Snate@binkert.org    return result
1506498Snate@binkert.org
1516498Snate@binkert.org
1526498Snate@binkert.org# Format stack entries when the parser is running in debug mode
1536498Snate@binkert.orgdef format_stack_entry(r):
1546498Snate@binkert.org    repr_str = repr(r)
1556498Snate@binkert.org    if '\n' in repr_str: repr_str = repr(repr_str)
1566498Snate@binkert.org    if len(repr_str) < 16:
1576498Snate@binkert.org        return repr_str
1586498Snate@binkert.org    else:
1596498Snate@binkert.org        return "<%s @ 0x%x>" % (type(r).__name__,id(r))
1604479Sbinkertn@umich.edu
1612632SN/A#-----------------------------------------------------------------------------
1622632SN/A#                        ===  LR Parsing Engine ===
1632632SN/A#
1642632SN/A# The following classes are used for the LR parser itself.  These are not
1652632SN/A# used during table construction and are independent of the actual LR
1662632SN/A# table generation algorithm
1672632SN/A#-----------------------------------------------------------------------------
1682632SN/A
1692632SN/A# This class is used to hold non-terminal grammar symbols during parsing.
1702632SN/A# It normally has the following attributes set:
1712632SN/A#        .type       = Grammar symbol type
1722632SN/A#        .value      = Symbol value
1732632SN/A#        .lineno     = Starting line number
1742632SN/A#        .endlineno  = Ending line number (optional, set automatically)
1754479Sbinkertn@umich.edu#        .lexpos     = Starting lex position
1764479Sbinkertn@umich.edu#        .endlexpos  = Ending lex position (optional, set automatically)
1772632SN/A
1786498Snate@binkert.orgclass YaccSymbol:
1792632SN/A    def __str__(self):    return self.type
1802632SN/A    def __repr__(self):   return str(self)
1812632SN/A
1822632SN/A# This class is a wrapper around the objects actually passed to each
1832632SN/A# grammar rule.   Index lookup and assignment actually assign the
1842632SN/A# .value attribute of the underlying YaccSymbol object.
1852632SN/A# The lineno() method returns the line number of a given
1862632SN/A# item (or 0 if not defined).   The linespan() method returns
1872632SN/A# a tuple of (startline,endline) representing the range of lines
1884479Sbinkertn@umich.edu# for a symbol.  The lexspan() method returns a tuple (lexpos,endlexpos)
1894479Sbinkertn@umich.edu# representing the range of positional information for a symbol.
1902632SN/A
1914479Sbinkertn@umich.educlass YaccProduction:
1924479Sbinkertn@umich.edu    def __init__(self,s,stack=None):
1932632SN/A        self.slice = s
1944479Sbinkertn@umich.edu        self.stack = stack
1956498Snate@binkert.org        self.lexer = None
1966498Snate@binkert.org        self.parser= None
1972632SN/A    def __getitem__(self,n):
1984479Sbinkertn@umich.edu        if n >= 0: return self.slice[n].value
1994479Sbinkertn@umich.edu        else: return self.stack[n].value
2002632SN/A
2012632SN/A    def __setitem__(self,n,v):
2022632SN/A        self.slice[n].value = v
2032632SN/A
2044479Sbinkertn@umich.edu    def __getslice__(self,i,j):
2054479Sbinkertn@umich.edu        return [s.value for s in self.slice[i:j]]
2064479Sbinkertn@umich.edu
2072632SN/A    def __len__(self):
2082632SN/A        return len(self.slice)
2092632SN/A
2102632SN/A    def lineno(self,n):
2112632SN/A        return getattr(self.slice[n],"lineno",0)
2122632SN/A
2136498Snate@binkert.org    def set_lineno(self,n,lineno):
2146498Snate@binkert.org        self.slice[n].lineno = n
2156498Snate@binkert.org
2162632SN/A    def linespan(self,n):
2172632SN/A        startline = getattr(self.slice[n],"lineno",0)
2182632SN/A        endline = getattr(self.slice[n],"endlineno",startline)
2192632SN/A        return startline,endline
2202632SN/A
2214479Sbinkertn@umich.edu    def lexpos(self,n):
2224479Sbinkertn@umich.edu        return getattr(self.slice[n],"lexpos",0)
2234479Sbinkertn@umich.edu
2244479Sbinkertn@umich.edu    def lexspan(self,n):
2254479Sbinkertn@umich.edu        startpos = getattr(self.slice[n],"lexpos",0)
2264479Sbinkertn@umich.edu        endpos = getattr(self.slice[n],"endlexpos",startpos)
2274479Sbinkertn@umich.edu        return startpos,endpos
2284479Sbinkertn@umich.edu
2296498Snate@binkert.org    def error(self):
2306498Snate@binkert.org       raise SyntaxError
2316498Snate@binkert.org
2326498Snate@binkert.org
2336498Snate@binkert.org# -----------------------------------------------------------------------------
2346498Snate@binkert.org#                               == LRParser ==
2356498Snate@binkert.org#
2366498Snate@binkert.org# The LR Parsing engine.
2376498Snate@binkert.org# -----------------------------------------------------------------------------
2386498Snate@binkert.org
2396498Snate@binkert.orgclass LRParser:
2406498Snate@binkert.org    def __init__(self,lrtab,errorf):
2416498Snate@binkert.org        self.productions = lrtab.lr_productions
2426498Snate@binkert.org        self.action      = lrtab.lr_action
2436498Snate@binkert.org        self.goto        = lrtab.lr_goto
2446498Snate@binkert.org        self.errorfunc   = errorf
2452632SN/A
2462632SN/A    def errok(self):
2474479Sbinkertn@umich.edu        self.errorok     = 1
2482632SN/A
2492632SN/A    def restart(self):
2502632SN/A        del self.statestack[:]
2512632SN/A        del self.symstack[:]
2522632SN/A        sym = YaccSymbol()
2534479Sbinkertn@umich.edu        sym.type = '$end'
2542632SN/A        self.symstack.append(sym)
2552632SN/A        self.statestack.append(0)
2562632SN/A
2576498Snate@binkert.org    def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
2586498Snate@binkert.org        if debug or yaccdevel:
2596498Snate@binkert.org            if isinstance(debug,int):
2606498Snate@binkert.org                debug = PlyLogger(sys.stderr)
2616498Snate@binkert.org            return self.parsedebug(input,lexer,debug,tracking,tokenfunc)
2626498Snate@binkert.org        elif tracking:
2636498Snate@binkert.org            return self.parseopt(input,lexer,debug,tracking,tokenfunc)
2646498Snate@binkert.org        else:
2656498Snate@binkert.org            return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc)
2666498Snate@binkert.org
2676498Snate@binkert.org
2686498Snate@binkert.org    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2696498Snate@binkert.org    # parsedebug().
2706498Snate@binkert.org    #
2716498Snate@binkert.org    # This is the debugging enabled version of parse().  All changes made to the
2726498Snate@binkert.org    # parsing engine should be made here.   For the non-debugging version,
2736498Snate@binkert.org    # copy this code to a method parseopt() and delete all of the sections
2746498Snate@binkert.org    # enclosed in:
2756498Snate@binkert.org    #
2766498Snate@binkert.org    #      #--! DEBUG
2776498Snate@binkert.org    #      statements
2786498Snate@binkert.org    #      #--! DEBUG
2796498Snate@binkert.org    #
2806498Snate@binkert.org    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2816498Snate@binkert.org
2826498Snate@binkert.org    def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None):
2832632SN/A        lookahead = None                 # Current lookahead symbol
2842632SN/A        lookaheadstack = [ ]             # Stack of lookahead symbols
2856498Snate@binkert.org        actions = self.action            # Local reference to action table (to avoid lookup on self.)
2866498Snate@binkert.org        goto    = self.goto              # Local reference to goto table (to avoid lookup on self.)
2876498Snate@binkert.org        prod    = self.productions       # Local reference to production list (to avoid lookup on self.)
2884479Sbinkertn@umich.edu        pslice  = YaccProduction(None)   # Production object passed to grammar rules
2896498Snate@binkert.org        errorcount = 0                   # Used during error recovery
2906498Snate@binkert.org
2916498Snate@binkert.org        # --! DEBUG
2926498Snate@binkert.org        debug.info("PLY: PARSE DEBUG START")
2936498Snate@binkert.org        # --! DEBUG
2942632SN/A
2952632SN/A        # If no lexer was given, we will try to use the lex module
2962632SN/A        if not lexer:
2976498Snate@binkert.org            lex = load_ply_lex()
2984479Sbinkertn@umich.edu            lexer = lex.lexer
2992632SN/A
3006498Snate@binkert.org        # Set up the lexer and parser objects on pslice
3012632SN/A        pslice.lexer = lexer
3024479Sbinkertn@umich.edu        pslice.parser = self
3032632SN/A
3042632SN/A        # If input was supplied, pass to lexer
3056498Snate@binkert.org        if input is not None:
3062632SN/A            lexer.input(input)
3072632SN/A
3086498Snate@binkert.org        if tokenfunc is None:
3096498Snate@binkert.org           # Tokenize function
3106498Snate@binkert.org           get_token = lexer.token
3116498Snate@binkert.org        else:
3126498Snate@binkert.org           get_token = tokenfunc
3136498Snate@binkert.org
3146498Snate@binkert.org        # Set up the state and symbol stacks
3156498Snate@binkert.org
3166498Snate@binkert.org        statestack = [ ]                # Stack of parsing states
3176498Snate@binkert.org        self.statestack = statestack
3186498Snate@binkert.org        symstack   = [ ]                # Stack of grammar symbols
3196498Snate@binkert.org        self.symstack = symstack
3206498Snate@binkert.org
3216498Snate@binkert.org        pslice.stack = symstack         # Put in the production
3226498Snate@binkert.org        errtoken   = None               # Err token
3236498Snate@binkert.org
3246498Snate@binkert.org        # The start state is assumed to be (0,$end)
3256498Snate@binkert.org
3266498Snate@binkert.org        statestack.append(0)
3276498Snate@binkert.org        sym = YaccSymbol()
3286498Snate@binkert.org        sym.type = "$end"
3296498Snate@binkert.org        symstack.append(sym)
3306498Snate@binkert.org        state = 0
3316498Snate@binkert.org        while 1:
3326498Snate@binkert.org            # Get the next symbol on the input.  If a lookahead symbol
3336498Snate@binkert.org            # is already set, we just use that. Otherwise, we'll pull
3346498Snate@binkert.org            # the next token off of the lookaheadstack or from the lexer
3356498Snate@binkert.org
3366498Snate@binkert.org            # --! DEBUG
3376498Snate@binkert.org            debug.debug('')
3386498Snate@binkert.org            debug.debug('State  : %s', state)
3396498Snate@binkert.org            # --! DEBUG
3406498Snate@binkert.org
3416498Snate@binkert.org            if not lookahead:
3426498Snate@binkert.org                if not lookaheadstack:
3436498Snate@binkert.org                    lookahead = get_token()     # Get the next token
3446498Snate@binkert.org                else:
3456498Snate@binkert.org                    lookahead = lookaheadstack.pop()
3466498Snate@binkert.org                if not lookahead:
3476498Snate@binkert.org                    lookahead = YaccSymbol()
3486498Snate@binkert.org                    lookahead.type = "$end"
3496498Snate@binkert.org
3506498Snate@binkert.org            # --! DEBUG
3516498Snate@binkert.org            debug.debug('Stack  : %s',
3526498Snate@binkert.org                        ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
3536498Snate@binkert.org            # --! DEBUG
3546498Snate@binkert.org
3556498Snate@binkert.org            # Check the action table
3566498Snate@binkert.org            ltype = lookahead.type
3576498Snate@binkert.org            t = actions[state].get(ltype)
3586498Snate@binkert.org
3596498Snate@binkert.org            if t is not None:
3606498Snate@binkert.org                if t > 0:
3616498Snate@binkert.org                    # shift a symbol on the stack
3626498Snate@binkert.org                    statestack.append(t)
3636498Snate@binkert.org                    state = t
3646498Snate@binkert.org
3656498Snate@binkert.org                    # --! DEBUG
3666498Snate@binkert.org                    debug.debug("Action : Shift and goto state %s", t)
3676498Snate@binkert.org                    # --! DEBUG
3686498Snate@binkert.org
3696498Snate@binkert.org                    symstack.append(lookahead)
3706498Snate@binkert.org                    lookahead = None
3716498Snate@binkert.org
3726498Snate@binkert.org                    # Decrease error count on successful shift
3736498Snate@binkert.org                    if errorcount: errorcount -=1
3746498Snate@binkert.org                    continue
3756498Snate@binkert.org
3766498Snate@binkert.org                if t < 0:
3776498Snate@binkert.org                    # reduce a symbol on the stack, emit a production
3786498Snate@binkert.org                    p = prod[-t]
3796498Snate@binkert.org                    pname = p.name
3806498Snate@binkert.org                    plen  = p.len
3816498Snate@binkert.org
3826498Snate@binkert.org                    # Get production function
3836498Snate@binkert.org                    sym = YaccSymbol()
3846498Snate@binkert.org                    sym.type = pname       # Production name
3856498Snate@binkert.org                    sym.value = None
3866498Snate@binkert.org
3876498Snate@binkert.org                    # --! DEBUG
3886498Snate@binkert.org                    if plen:
3896498Snate@binkert.org                        debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t)
3906498Snate@binkert.org                    else:
3916498Snate@binkert.org                        debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t)
3926498Snate@binkert.org
3936498Snate@binkert.org                    # --! DEBUG
3946498Snate@binkert.org
3956498Snate@binkert.org                    if plen:
3966498Snate@binkert.org                        targ = symstack[-plen-1:]
3976498Snate@binkert.org                        targ[0] = sym
3986498Snate@binkert.org
3996498Snate@binkert.org                        # --! TRACKING
4006498Snate@binkert.org                        if tracking:
4016498Snate@binkert.org                           t1 = targ[1]
4026498Snate@binkert.org                           sym.lineno = t1.lineno
4036498Snate@binkert.org                           sym.lexpos = t1.lexpos
4046498Snate@binkert.org                           t1 = targ[-1]
4056498Snate@binkert.org                           sym.endlineno = getattr(t1,"endlineno",t1.lineno)
4066498Snate@binkert.org                           sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
4076498Snate@binkert.org
4086498Snate@binkert.org                        # --! TRACKING
4096498Snate@binkert.org
4106498Snate@binkert.org                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4116498Snate@binkert.org                        # The code enclosed in this section is duplicated
4126498Snate@binkert.org                        # below as a performance optimization.  Make sure
4136498Snate@binkert.org                        # changes get made in both locations.
4146498Snate@binkert.org
4156498Snate@binkert.org                        pslice.slice = targ
4166498Snate@binkert.org
4176498Snate@binkert.org                        try:
4186498Snate@binkert.org                            # Call the grammar rule with our special slice object
4196498Snate@binkert.org                            del symstack[-plen:]
4206498Snate@binkert.org                            del statestack[-plen:]
4216498Snate@binkert.org                            p.callable(pslice)
4226498Snate@binkert.org                            # --! DEBUG
4236498Snate@binkert.org                            debug.info("Result : %s", format_result(pslice[0]))
4246498Snate@binkert.org                            # --! DEBUG
4256498Snate@binkert.org                            symstack.append(sym)
4266498Snate@binkert.org                            state = goto[statestack[-1]][pname]
4276498Snate@binkert.org                            statestack.append(state)
4286498Snate@binkert.org                        except SyntaxError:
4296498Snate@binkert.org                            # If an error was set. Enter error recovery state
4306498Snate@binkert.org                            lookaheadstack.append(lookahead)
4316498Snate@binkert.org                            symstack.pop()
4326498Snate@binkert.org                            statestack.pop()
4336498Snate@binkert.org                            state = statestack[-1]
4346498Snate@binkert.org                            sym.type = 'error'
4356498Snate@binkert.org                            lookahead = sym
4366498Snate@binkert.org                            errorcount = error_count
4376498Snate@binkert.org                            self.errorok = 0
4386498Snate@binkert.org                        continue
4396498Snate@binkert.org                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4406498Snate@binkert.org
4416498Snate@binkert.org                    else:
4426498Snate@binkert.org
4436498Snate@binkert.org                        # --! TRACKING
4446498Snate@binkert.org                        if tracking:
4456498Snate@binkert.org                           sym.lineno = lexer.lineno
4466498Snate@binkert.org                           sym.lexpos = lexer.lexpos
4476498Snate@binkert.org                        # --! TRACKING
4486498Snate@binkert.org
4496498Snate@binkert.org                        targ = [ sym ]
4506498Snate@binkert.org
4516498Snate@binkert.org                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4526498Snate@binkert.org                        # The code enclosed in this section is duplicated
4536498Snate@binkert.org                        # above as a performance optimization.  Make sure
4546498Snate@binkert.org                        # changes get made in both locations.
4556498Snate@binkert.org
4566498Snate@binkert.org                        pslice.slice = targ
4576498Snate@binkert.org
4586498Snate@binkert.org                        try:
4596498Snate@binkert.org                            # Call the grammar rule with our special slice object
4606498Snate@binkert.org                            p.callable(pslice)
4616498Snate@binkert.org                            # --! DEBUG
4626498Snate@binkert.org                            debug.info("Result : %s", format_result(pslice[0]))
4636498Snate@binkert.org                            # --! DEBUG
4646498Snate@binkert.org                            symstack.append(sym)
4656498Snate@binkert.org                            state = goto[statestack[-1]][pname]
4666498Snate@binkert.org                            statestack.append(state)
4676498Snate@binkert.org                        except SyntaxError:
4686498Snate@binkert.org                            # If an error was set. Enter error recovery state
4696498Snate@binkert.org                            lookaheadstack.append(lookahead)
4706498Snate@binkert.org                            symstack.pop()
4716498Snate@binkert.org                            statestack.pop()
4726498Snate@binkert.org                            state = statestack[-1]
4736498Snate@binkert.org                            sym.type = 'error'
4746498Snate@binkert.org                            lookahead = sym
4756498Snate@binkert.org                            errorcount = error_count
4766498Snate@binkert.org                            self.errorok = 0
4776498Snate@binkert.org                        continue
4786498Snate@binkert.org                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4796498Snate@binkert.org
4806498Snate@binkert.org                if t == 0:
4816498Snate@binkert.org                    n = symstack[-1]
4826498Snate@binkert.org                    result = getattr(n,"value",None)
4836498Snate@binkert.org                    # --! DEBUG
4846498Snate@binkert.org                    debug.info("Done   : Returning %s", format_result(result))
4856498Snate@binkert.org                    debug.info("PLY: PARSE DEBUG END")
4866498Snate@binkert.org                    # --! DEBUG
4876498Snate@binkert.org                    return result
4886498Snate@binkert.org
4896498Snate@binkert.org            if t == None:
4906498Snate@binkert.org
4916498Snate@binkert.org                # --! DEBUG
4926498Snate@binkert.org                debug.error('Error  : %s',
4936498Snate@binkert.org                            ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
4946498Snate@binkert.org                # --! DEBUG
4956498Snate@binkert.org
4966498Snate@binkert.org                # We have some kind of parsing error here.  To handle
4976498Snate@binkert.org                # this, we are going to push the current token onto
4986498Snate@binkert.org                # the tokenstack and replace it with an 'error' token.
4996498Snate@binkert.org                # If there are any synchronization rules, they may
5006498Snate@binkert.org                # catch it.
5016498Snate@binkert.org                #
5026498Snate@binkert.org                # In addition to pushing the error token, we call call
5036498Snate@binkert.org                # the user defined p_error() function if this is the
5046498Snate@binkert.org                # first syntax error.  This function is only called if
5056498Snate@binkert.org                # errorcount == 0.
5066498Snate@binkert.org                if errorcount == 0 or self.errorok:
5076498Snate@binkert.org                    errorcount = error_count
5086498Snate@binkert.org                    self.errorok = 0
5096498Snate@binkert.org                    errtoken = lookahead
5106498Snate@binkert.org                    if errtoken.type == "$end":
5116498Snate@binkert.org                        errtoken = None               # End of file!
5126498Snate@binkert.org                    if self.errorfunc:
5136498Snate@binkert.org                        global errok,token,restart
5146498Snate@binkert.org                        errok = self.errok        # Set some special functions available in error recovery
5156498Snate@binkert.org                        token = get_token
5166498Snate@binkert.org                        restart = self.restart
5176498Snate@binkert.org                        if errtoken and not hasattr(errtoken,'lexer'):
5186498Snate@binkert.org                            errtoken.lexer = lexer
5196498Snate@binkert.org                        tok = self.errorfunc(errtoken)
5206498Snate@binkert.org                        del errok, token, restart   # Delete special functions
5216498Snate@binkert.org
5226498Snate@binkert.org                        if self.errorok:
5236498Snate@binkert.org                            # User must have done some kind of panic
5246498Snate@binkert.org                            # mode recovery on their own.  The
5256498Snate@binkert.org                            # returned token is the next lookahead
5266498Snate@binkert.org                            lookahead = tok
5276498Snate@binkert.org                            errtoken = None
5286498Snate@binkert.org                            continue
5296498Snate@binkert.org                    else:
5306498Snate@binkert.org                        if errtoken:
5316498Snate@binkert.org                            if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
5326498Snate@binkert.org                            else: lineno = 0
5336498Snate@binkert.org                            if lineno:
5346498Snate@binkert.org                                sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
5356498Snate@binkert.org                            else:
5366498Snate@binkert.org                                sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
5376498Snate@binkert.org                        else:
5386498Snate@binkert.org                            sys.stderr.write("yacc: Parse error in input. EOF\n")
5396498Snate@binkert.org                            return
5406498Snate@binkert.org
5416498Snate@binkert.org                else:
5426498Snate@binkert.org                    errorcount = error_count
5436498Snate@binkert.org
5446498Snate@binkert.org                # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
5456498Snate@binkert.org                # entire parse has been rolled back and we're completely hosed.   The token is
5466498Snate@binkert.org                # discarded and we just keep going.
5476498Snate@binkert.org
5486498Snate@binkert.org                if len(statestack) <= 1 and lookahead.type != "$end":
5496498Snate@binkert.org                    lookahead = None
5506498Snate@binkert.org                    errtoken = None
5516498Snate@binkert.org                    state = 0
5526498Snate@binkert.org                    # Nuke the pushback stack
5536498Snate@binkert.org                    del lookaheadstack[:]
5546498Snate@binkert.org                    continue
5556498Snate@binkert.org
5566498Snate@binkert.org                # case 2: the statestack has a couple of entries on it, but we're
5576498Snate@binkert.org                # at the end of the file. nuke the top entry and generate an error token
5586498Snate@binkert.org
5596498Snate@binkert.org                # Start nuking entries on the stack
5606498Snate@binkert.org                if lookahead.type == "$end":
5616498Snate@binkert.org                    # Whoa. We're really hosed here. Bail out
5626498Snate@binkert.org                    return
5636498Snate@binkert.org
5646498Snate@binkert.org                if lookahead.type != 'error':
5656498Snate@binkert.org                    sym = symstack[-1]
5666498Snate@binkert.org                    if sym.type == 'error':
5676498Snate@binkert.org                        # Hmmm. Error is on top of stack, we'll just nuke input
5686498Snate@binkert.org                        # symbol and continue
5696498Snate@binkert.org                        lookahead = None
5706498Snate@binkert.org                        continue
5716498Snate@binkert.org                    t = YaccSymbol()
5726498Snate@binkert.org                    t.type = 'error'
5736498Snate@binkert.org                    if hasattr(lookahead,"lineno"):
5746498Snate@binkert.org                        t.lineno = lookahead.lineno
5756498Snate@binkert.org                    t.value = lookahead
5766498Snate@binkert.org                    lookaheadstack.append(lookahead)
5776498Snate@binkert.org                    lookahead = t
5786498Snate@binkert.org                else:
5796498Snate@binkert.org                    symstack.pop()
5806498Snate@binkert.org                    statestack.pop()
5816498Snate@binkert.org                    state = statestack[-1]       # Potential bug fix
5826498Snate@binkert.org
5836498Snate@binkert.org                continue
5846498Snate@binkert.org
5856498Snate@binkert.org            # Call an error function here
5866498Snate@binkert.org            raise RuntimeError("yacc: internal parser error!!!\n")
5876498Snate@binkert.org
5886498Snate@binkert.org    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
5896498Snate@binkert.org    # parseopt().
5906498Snate@binkert.org    #
5916498Snate@binkert.org    # Optimized version of parse() method.  DO NOT EDIT THIS CODE DIRECTLY.
5926498Snate@binkert.org    # Edit the debug version above, then copy any modifications to the method
5936498Snate@binkert.org    # below while removing #--! DEBUG sections.
5946498Snate@binkert.org    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
5956498Snate@binkert.org
5966498Snate@binkert.org
5976498Snate@binkert.org    def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
5986498Snate@binkert.org        lookahead = None                 # Current lookahead symbol
5996498Snate@binkert.org        lookaheadstack = [ ]             # Stack of lookahead symbols
6006498Snate@binkert.org        actions = self.action            # Local reference to action table (to avoid lookup on self.)
6016498Snate@binkert.org        goto    = self.goto              # Local reference to goto table (to avoid lookup on self.)
6026498Snate@binkert.org        prod    = self.productions       # Local reference to production list (to avoid lookup on self.)
6036498Snate@binkert.org        pslice  = YaccProduction(None)   # Production object passed to grammar rules
6046498Snate@binkert.org        errorcount = 0                   # Used during error recovery
6056498Snate@binkert.org
6066498Snate@binkert.org        # If no lexer was given, we will try to use the lex module
6076498Snate@binkert.org        if not lexer:
6086498Snate@binkert.org            lex = load_ply_lex()
6096498Snate@binkert.org            lexer = lex.lexer
6106498Snate@binkert.org
6116498Snate@binkert.org        # Set up the lexer and parser objects on pslice
6126498Snate@binkert.org        pslice.lexer = lexer
6136498Snate@binkert.org        pslice.parser = self
6146498Snate@binkert.org
6156498Snate@binkert.org        # If input was supplied, pass to lexer
6166498Snate@binkert.org        if input is not None:
6176498Snate@binkert.org            lexer.input(input)
6186498Snate@binkert.org
6196498Snate@binkert.org        if tokenfunc is None:
6206498Snate@binkert.org           # Tokenize function
6216498Snate@binkert.org           get_token = lexer.token
6226498Snate@binkert.org        else:
6236498Snate@binkert.org           get_token = tokenfunc
6246498Snate@binkert.org
6256498Snate@binkert.org        # Set up the state and symbol stacks
6262632SN/A
6272632SN/A        statestack = [ ]                # Stack of parsing states
6282632SN/A        self.statestack = statestack
6292632SN/A        symstack   = [ ]                # Stack of grammar symbols
6302632SN/A        self.symstack = symstack
6312632SN/A
6324479Sbinkertn@umich.edu        pslice.stack = symstack         # Put in the production
6332632SN/A        errtoken   = None               # Err token
6342632SN/A
6354479Sbinkertn@umich.edu        # The start state is assumed to be (0,$end)
6364479Sbinkertn@umich.edu
6372632SN/A        statestack.append(0)
6382632SN/A        sym = YaccSymbol()
6394479Sbinkertn@umich.edu        sym.type = '$end'
6402632SN/A        symstack.append(sym)
6414479Sbinkertn@umich.edu        state = 0
6422632SN/A        while 1:
6432632SN/A            # Get the next symbol on the input.  If a lookahead symbol
6442632SN/A            # is already set, we just use that. Otherwise, we'll pull
6452632SN/A            # the next token off of the lookaheadstack or from the lexer
6466498Snate@binkert.org
6472632SN/A            if not lookahead:
6482632SN/A                if not lookaheadstack:
6492632SN/A                    lookahead = get_token()     # Get the next token
6502632SN/A                else:
6512632SN/A                    lookahead = lookaheadstack.pop()
6522632SN/A                if not lookahead:
6532632SN/A                    lookahead = YaccSymbol()
6544479Sbinkertn@umich.edu                    lookahead.type = '$end'
6552632SN/A
6562632SN/A            # Check the action table
6572632SN/A            ltype = lookahead.type
6584479Sbinkertn@umich.edu            t = actions[state].get(ltype)
6592632SN/A
6602632SN/A            if t is not None:
6612632SN/A                if t > 0:
6622632SN/A                    # shift a symbol on the stack
6632632SN/A                    statestack.append(t)
6644479Sbinkertn@umich.edu                    state = t
6656498Snate@binkert.org
6662632SN/A                    symstack.append(lookahead)
6672632SN/A                    lookahead = None
6682632SN/A
6692632SN/A                    # Decrease error count on successful shift
6704479Sbinkertn@umich.edu                    if errorcount: errorcount -=1
6712632SN/A                    continue
6722632SN/A
6732632SN/A                if t < 0:
6742632SN/A                    # reduce a symbol on the stack, emit a production
6752632SN/A                    p = prod[-t]
6762632SN/A                    pname = p.name
6772632SN/A                    plen  = p.len
6782632SN/A
6792632SN/A                    # Get production function
6802632SN/A                    sym = YaccSymbol()
6812632SN/A                    sym.type = pname       # Production name
6822632SN/A                    sym.value = None
6832632SN/A
6842632SN/A                    if plen:
6852632SN/A                        targ = symstack[-plen-1:]
6862632SN/A                        targ[0] = sym
6876498Snate@binkert.org
6886498Snate@binkert.org                        # --! TRACKING
6894479Sbinkertn@umich.edu                        if tracking:
6904479Sbinkertn@umich.edu                           t1 = targ[1]
6914479Sbinkertn@umich.edu                           sym.lineno = t1.lineno
6924479Sbinkertn@umich.edu                           sym.lexpos = t1.lexpos
6934479Sbinkertn@umich.edu                           t1 = targ[-1]
6944479Sbinkertn@umich.edu                           sym.endlineno = getattr(t1,"endlineno",t1.lineno)
6954479Sbinkertn@umich.edu                           sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
6966498Snate@binkert.org
6976498Snate@binkert.org                        # --! TRACKING
6986498Snate@binkert.org
6996498Snate@binkert.org                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
7006498Snate@binkert.org                        # The code enclosed in this section is duplicated
7016498Snate@binkert.org                        # below as a performance optimization.  Make sure
7026498Snate@binkert.org                        # changes get made in both locations.
7036498Snate@binkert.org
7046498Snate@binkert.org                        pslice.slice = targ
7056498Snate@binkert.org
7066498Snate@binkert.org                        try:
7076498Snate@binkert.org                            # Call the grammar rule with our special slice object
7086498Snate@binkert.org                            del symstack[-plen:]
7096498Snate@binkert.org                            del statestack[-plen:]
7106498Snate@binkert.org                            p.callable(pslice)
7116498Snate@binkert.org                            symstack.append(sym)
7126498Snate@binkert.org                            state = goto[statestack[-1]][pname]
7136498Snate@binkert.org                            statestack.append(state)
7146498Snate@binkert.org                        except SyntaxError:
7156498Snate@binkert.org                            # If an error was set. Enter error recovery state
7166498Snate@binkert.org                            lookaheadstack.append(lookahead)
7176498Snate@binkert.org                            symstack.pop()
7186498Snate@binkert.org                            statestack.pop()
7196498Snate@binkert.org                            state = statestack[-1]
7206498Snate@binkert.org                            sym.type = 'error'
7216498Snate@binkert.org                            lookahead = sym
7226498Snate@binkert.org                            errorcount = error_count
7236498Snate@binkert.org                            self.errorok = 0
7246498Snate@binkert.org                        continue
7256498Snate@binkert.org                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
7266498Snate@binkert.org
7272632SN/A                    else:
7286498Snate@binkert.org
7296498Snate@binkert.org                        # --! TRACKING
7304479Sbinkertn@umich.edu                        if tracking:
7314479Sbinkertn@umich.edu                           sym.lineno = lexer.lineno
7324479Sbinkertn@umich.edu                           sym.lexpos = lexer.lexpos
7336498Snate@binkert.org                        # --! TRACKING
7346498Snate@binkert.org
7352632SN/A                        targ = [ sym ]
7366498Snate@binkert.org
7376498Snate@binkert.org                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
7386498Snate@binkert.org                        # The code enclosed in this section is duplicated
7396498Snate@binkert.org                        # above as a performance optimization.  Make sure
7406498Snate@binkert.org                        # changes get made in both locations.
7416498Snate@binkert.org
7426498Snate@binkert.org                        pslice.slice = targ
7436498Snate@binkert.org
7446498Snate@binkert.org                        try:
7456498Snate@binkert.org                            # Call the grammar rule with our special slice object
7466498Snate@binkert.org                            p.callable(pslice)
7476498Snate@binkert.org                            symstack.append(sym)
7486498Snate@binkert.org                            state = goto[statestack[-1]][pname]
7496498Snate@binkert.org                            statestack.append(state)
7506498Snate@binkert.org                        except SyntaxError:
7516498Snate@binkert.org                            # If an error was set. Enter error recovery state
7526498Snate@binkert.org                            lookaheadstack.append(lookahead)
7536498Snate@binkert.org                            symstack.pop()
7546498Snate@binkert.org                            statestack.pop()
7556498Snate@binkert.org                            state = statestack[-1]
7566498Snate@binkert.org                            sym.type = 'error'
7576498Snate@binkert.org                            lookahead = sym
7586498Snate@binkert.org                            errorcount = error_count
7596498Snate@binkert.org                            self.errorok = 0
7606498Snate@binkert.org                        continue
7616498Snate@binkert.org                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
7622632SN/A
7632632SN/A                if t == 0:
7642632SN/A                    n = symstack[-1]
7652632SN/A                    return getattr(n,"value",None)
7662632SN/A
7672632SN/A            if t == None:
7686498Snate@binkert.org
7694479Sbinkertn@umich.edu                # We have some kind of parsing error here.  To handle
7704479Sbinkertn@umich.edu                # this, we are going to push the current token onto
7714479Sbinkertn@umich.edu                # the tokenstack and replace it with an 'error' token.
7724479Sbinkertn@umich.edu                # If there are any synchronization rules, they may
7734479Sbinkertn@umich.edu                # catch it.
7742632SN/A                #
7754479Sbinkertn@umich.edu                # In addition to pushing the error token, we call call
7764479Sbinkertn@umich.edu                # the user defined p_error() function if this is the
7774479Sbinkertn@umich.edu                # first syntax error.  This function is only called if
7784479Sbinkertn@umich.edu                # errorcount == 0.
7794479Sbinkertn@umich.edu                if errorcount == 0 or self.errorok:
7804479Sbinkertn@umich.edu                    errorcount = error_count
7814479Sbinkertn@umich.edu                    self.errorok = 0
7822632SN/A                    errtoken = lookahead
7834479Sbinkertn@umich.edu                    if errtoken.type == '$end':
7842632SN/A                        errtoken = None               # End of file!
7852632SN/A                    if self.errorfunc:
7862632SN/A                        global errok,token,restart
7872632SN/A                        errok = self.errok        # Set some special functions available in error recovery
7882632SN/A                        token = get_token
7892632SN/A                        restart = self.restart
7906498Snate@binkert.org                        if errtoken and not hasattr(errtoken,'lexer'):
7916498Snate@binkert.org                            errtoken.lexer = lexer
7922632SN/A                        tok = self.errorfunc(errtoken)
7932632SN/A                        del errok, token, restart   # Delete special functions
7942632SN/A
7954479Sbinkertn@umich.edu                        if self.errorok:
7964479Sbinkertn@umich.edu                            # User must have done some kind of panic
7974479Sbinkertn@umich.edu                            # mode recovery on their own.  The
7984479Sbinkertn@umich.edu                            # returned token is the next lookahead
7992632SN/A                            lookahead = tok
8002632SN/A                            errtoken = None
8012632SN/A                            continue
8022632SN/A                    else:
8032632SN/A                        if errtoken:
8042632SN/A                            if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
8052632SN/A                            else: lineno = 0
8062632SN/A                            if lineno:
8074479Sbinkertn@umich.edu                                sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
8082632SN/A                            else:
8094479Sbinkertn@umich.edu                                sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
8102632SN/A                        else:
8114479Sbinkertn@umich.edu                            sys.stderr.write("yacc: Parse error in input. EOF\n")
8122632SN/A                            return
8132632SN/A
8142632SN/A                else:
8154479Sbinkertn@umich.edu                    errorcount = error_count
8162632SN/A
8172632SN/A                # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
8182632SN/A                # entire parse has been rolled back and we're completely hosed.   The token is
8192632SN/A                # discarded and we just keep going.
8202632SN/A
8214479Sbinkertn@umich.edu                if len(statestack) <= 1 and lookahead.type != '$end':
8222632SN/A                    lookahead = None
8232632SN/A                    errtoken = None
8246498Snate@binkert.org                    state = 0
8252632SN/A                    # Nuke the pushback stack
8262632SN/A                    del lookaheadstack[:]
8272632SN/A                    continue
8282632SN/A
8292632SN/A                # case 2: the statestack has a couple of entries on it, but we're
8302632SN/A                # at the end of the file. nuke the top entry and generate an error token
8312632SN/A
8322632SN/A                # Start nuking entries on the stack
8334479Sbinkertn@umich.edu                if lookahead.type == '$end':
8342632SN/A                    # Whoa. We're really hosed here. Bail out
8352632SN/A                    return
8362632SN/A
8372632SN/A                if lookahead.type != 'error':
8382632SN/A                    sym = symstack[-1]
8392632SN/A                    if sym.type == 'error':
8402632SN/A                        # Hmmm. Error is on top of stack, we'll just nuke input
8412632SN/A                        # symbol and continue
8422632SN/A                        lookahead = None
8432632SN/A                        continue
8442632SN/A                    t = YaccSymbol()
8452632SN/A                    t.type = 'error'
8462632SN/A                    if hasattr(lookahead,"lineno"):
8472632SN/A                        t.lineno = lookahead.lineno
8482632SN/A                    t.value = lookahead
8492632SN/A                    lookaheadstack.append(lookahead)
8502632SN/A                    lookahead = t
8512632SN/A                else:
8522632SN/A                    symstack.pop()
8532632SN/A                    statestack.pop()
8546498Snate@binkert.org                    state = statestack[-1]       # Potential bug fix
8552632SN/A
8562632SN/A                continue
8572632SN/A
8582632SN/A            # Call an error function here
8596498Snate@binkert.org            raise RuntimeError("yacc: internal parser error!!!\n")
8606498Snate@binkert.org
8616498Snate@binkert.org    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
8626498Snate@binkert.org    # parseopt_notrack().
8636498Snate@binkert.org    #
8646498Snate@binkert.org    # Optimized version of parseopt() with line number tracking removed.
8656498Snate@binkert.org    # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove
8666498Snate@binkert.org    # code in the #--! TRACKING sections
8676498Snate@binkert.org    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
8686498Snate@binkert.org
8696498Snate@binkert.org    def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
8706498Snate@binkert.org        lookahead = None                 # Current lookahead symbol
8716498Snate@binkert.org        lookaheadstack = [ ]             # Stack of lookahead symbols
8726498Snate@binkert.org        actions = self.action            # Local reference to action table (to avoid lookup on self.)
8736498Snate@binkert.org        goto    = self.goto              # Local reference to goto table (to avoid lookup on self.)
8746498Snate@binkert.org        prod    = self.productions       # Local reference to production list (to avoid lookup on self.)
8756498Snate@binkert.org        pslice  = YaccProduction(None)   # Production object passed to grammar rules
8766498Snate@binkert.org        errorcount = 0                   # Used during error recovery
8776498Snate@binkert.org
8786498Snate@binkert.org        # If no lexer was given, we will try to use the lex module
8796498Snate@binkert.org        if not lexer:
8806498Snate@binkert.org            lex = load_ply_lex()
8816498Snate@binkert.org            lexer = lex.lexer
8826498Snate@binkert.org
8836498Snate@binkert.org        # Set up the lexer and parser objects on pslice
8846498Snate@binkert.org        pslice.lexer = lexer
8856498Snate@binkert.org        pslice.parser = self
8866498Snate@binkert.org
8876498Snate@binkert.org        # If input was supplied, pass to lexer
8886498Snate@binkert.org        if input is not None:
8896498Snate@binkert.org            lexer.input(input)
8906498Snate@binkert.org
8916498Snate@binkert.org        if tokenfunc is None:
8926498Snate@binkert.org           # Tokenize function
8936498Snate@binkert.org           get_token = lexer.token
8946498Snate@binkert.org        else:
8956498Snate@binkert.org           get_token = tokenfunc
8966498Snate@binkert.org
8976498Snate@binkert.org        # Set up the state and symbol stacks
8986498Snate@binkert.org
8996498Snate@binkert.org        statestack = [ ]                # Stack of parsing states
9006498Snate@binkert.org        self.statestack = statestack
9016498Snate@binkert.org        symstack   = [ ]                # Stack of grammar symbols
9026498Snate@binkert.org        self.symstack = symstack
9036498Snate@binkert.org
9046498Snate@binkert.org        pslice.stack = symstack         # Put in the production
9056498Snate@binkert.org        errtoken   = None               # Err token
9066498Snate@binkert.org
9076498Snate@binkert.org        # The start state is assumed to be (0,$end)
9086498Snate@binkert.org
9096498Snate@binkert.org        statestack.append(0)
9106498Snate@binkert.org        sym = YaccSymbol()
9116498Snate@binkert.org        sym.type = '$end'
9126498Snate@binkert.org        symstack.append(sym)
9136498Snate@binkert.org        state = 0
9146498Snate@binkert.org        while 1:
9156498Snate@binkert.org            # Get the next symbol on the input.  If a lookahead symbol
9166498Snate@binkert.org            # is already set, we just use that. Otherwise, we'll pull
9176498Snate@binkert.org            # the next token off of the lookaheadstack or from the lexer
9186498Snate@binkert.org
9196498Snate@binkert.org            if not lookahead:
9206498Snate@binkert.org                if not lookaheadstack:
9216498Snate@binkert.org                    lookahead = get_token()     # Get the next token
9226498Snate@binkert.org                else:
9236498Snate@binkert.org                    lookahead = lookaheadstack.pop()
9246498Snate@binkert.org                if not lookahead:
9256498Snate@binkert.org                    lookahead = YaccSymbol()
9266498Snate@binkert.org                    lookahead.type = '$end'
9276498Snate@binkert.org
9286498Snate@binkert.org            # Check the action table
9296498Snate@binkert.org            ltype = lookahead.type
9306498Snate@binkert.org            t = actions[state].get(ltype)
9316498Snate@binkert.org
9326498Snate@binkert.org            if t is not None:
9336498Snate@binkert.org                if t > 0:
9346498Snate@binkert.org                    # shift a symbol on the stack
9356498Snate@binkert.org                    statestack.append(t)
9366498Snate@binkert.org                    state = t
9376498Snate@binkert.org
9386498Snate@binkert.org                    symstack.append(lookahead)
9396498Snate@binkert.org                    lookahead = None
9406498Snate@binkert.org
9416498Snate@binkert.org                    # Decrease error count on successful shift
9426498Snate@binkert.org                    if errorcount: errorcount -=1
9436498Snate@binkert.org                    continue
9446498Snate@binkert.org
9456498Snate@binkert.org                if t < 0:
9466498Snate@binkert.org                    # reduce a symbol on the stack, emit a production
9476498Snate@binkert.org                    p = prod[-t]
9486498Snate@binkert.org                    pname = p.name
9496498Snate@binkert.org                    plen  = p.len
9506498Snate@binkert.org
9516498Snate@binkert.org                    # Get production function
9526498Snate@binkert.org                    sym = YaccSymbol()
9536498Snate@binkert.org                    sym.type = pname       # Production name
9546498Snate@binkert.org                    sym.value = None
9556498Snate@binkert.org
9566498Snate@binkert.org                    if plen:
9576498Snate@binkert.org                        targ = symstack[-plen-1:]
9586498Snate@binkert.org                        targ[0] = sym
9596498Snate@binkert.org
9606498Snate@binkert.org                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
9616498Snate@binkert.org                        # The code enclosed in this section is duplicated
9626498Snate@binkert.org                        # below as a performance optimization.  Make sure
9636498Snate@binkert.org                        # changes get made in both locations.
9646498Snate@binkert.org
9656498Snate@binkert.org                        pslice.slice = targ
9666498Snate@binkert.org
9676498Snate@binkert.org                        try:
9686498Snate@binkert.org                            # Call the grammar rule with our special slice object
9696498Snate@binkert.org                            del symstack[-plen:]
9706498Snate@binkert.org                            del statestack[-plen:]
9716498Snate@binkert.org                            p.callable(pslice)
9726498Snate@binkert.org                            symstack.append(sym)
9736498Snate@binkert.org                            state = goto[statestack[-1]][pname]
9746498Snate@binkert.org                            statestack.append(state)
9756498Snate@binkert.org                        except SyntaxError:
9766498Snate@binkert.org                            # If an error was set. Enter error recovery state
9776498Snate@binkert.org                            lookaheadstack.append(lookahead)
9786498Snate@binkert.org                            symstack.pop()
9796498Snate@binkert.org                            statestack.pop()
9806498Snate@binkert.org                            state = statestack[-1]
9816498Snate@binkert.org                            sym.type = 'error'
9826498Snate@binkert.org                            lookahead = sym
9836498Snate@binkert.org                            errorcount = error_count
9846498Snate@binkert.org                            self.errorok = 0
9856498Snate@binkert.org                        continue
9866498Snate@binkert.org                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
9876498Snate@binkert.org
9886498Snate@binkert.org                    else:
9896498Snate@binkert.org
9906498Snate@binkert.org                        targ = [ sym ]
9916498Snate@binkert.org
9926498Snate@binkert.org                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
9936498Snate@binkert.org                        # The code enclosed in this section is duplicated
9946498Snate@binkert.org                        # above as a performance optimization.  Make sure
9956498Snate@binkert.org                        # changes get made in both locations.
9966498Snate@binkert.org
9976498Snate@binkert.org                        pslice.slice = targ
9986498Snate@binkert.org
9996498Snate@binkert.org                        try:
10006498Snate@binkert.org                            # Call the grammar rule with our special slice object
10016498Snate@binkert.org                            p.callable(pslice)
10026498Snate@binkert.org                            symstack.append(sym)
10036498Snate@binkert.org                            state = goto[statestack[-1]][pname]
10046498Snate@binkert.org                            statestack.append(state)
10056498Snate@binkert.org                        except SyntaxError:
10066498Snate@binkert.org                            # If an error was set. Enter error recovery state
10076498Snate@binkert.org                            lookaheadstack.append(lookahead)
10086498Snate@binkert.org                            symstack.pop()
10096498Snate@binkert.org                            statestack.pop()
10106498Snate@binkert.org                            state = statestack[-1]
10116498Snate@binkert.org                            sym.type = 'error'
10126498Snate@binkert.org                            lookahead = sym
10136498Snate@binkert.org                            errorcount = error_count
10146498Snate@binkert.org                            self.errorok = 0
10156498Snate@binkert.org                        continue
10166498Snate@binkert.org                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
10176498Snate@binkert.org
10186498Snate@binkert.org                if t == 0:
10196498Snate@binkert.org                    n = symstack[-1]
10206498Snate@binkert.org                    return getattr(n,"value",None)
10216498Snate@binkert.org
10226498Snate@binkert.org            if t == None:
10236498Snate@binkert.org
10246498Snate@binkert.org                # We have some kind of parsing error here.  To handle
10256498Snate@binkert.org                # this, we are going to push the current token onto
10266498Snate@binkert.org                # the tokenstack and replace it with an 'error' token.
10276498Snate@binkert.org                # If there are any synchronization rules, they may
10286498Snate@binkert.org                # catch it.
10296498Snate@binkert.org                #
10306498Snate@binkert.org                # In addition to pushing the error token, we call call
10316498Snate@binkert.org                # the user defined p_error() function if this is the
10326498Snate@binkert.org                # first syntax error.  This function is only called if
10336498Snate@binkert.org                # errorcount == 0.
10346498Snate@binkert.org                if errorcount == 0 or self.errorok:
10356498Snate@binkert.org                    errorcount = error_count
10366498Snate@binkert.org                    self.errorok = 0
10376498Snate@binkert.org                    errtoken = lookahead
10386498Snate@binkert.org                    if errtoken.type == '$end':
10396498Snate@binkert.org                        errtoken = None               # End of file!
10406498Snate@binkert.org                    if self.errorfunc:
10416498Snate@binkert.org                        global errok,token,restart
10426498Snate@binkert.org                        errok = self.errok        # Set some special functions available in error recovery
10436498Snate@binkert.org                        token = get_token
10446498Snate@binkert.org                        restart = self.restart
10456498Snate@binkert.org                        if errtoken and not hasattr(errtoken,'lexer'):
10466498Snate@binkert.org                            errtoken.lexer = lexer
10476498Snate@binkert.org                        tok = self.errorfunc(errtoken)
10486498Snate@binkert.org                        del errok, token, restart   # Delete special functions
10496498Snate@binkert.org
10506498Snate@binkert.org                        if self.errorok:
10516498Snate@binkert.org                            # User must have done some kind of panic
10526498Snate@binkert.org                            # mode recovery on their own.  The
10536498Snate@binkert.org                            # returned token is the next lookahead
10546498Snate@binkert.org                            lookahead = tok
10556498Snate@binkert.org                            errtoken = None
10566498Snate@binkert.org                            continue
10576498Snate@binkert.org                    else:
10586498Snate@binkert.org                        if errtoken:
10596498Snate@binkert.org                            if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
10606498Snate@binkert.org                            else: lineno = 0
10616498Snate@binkert.org                            if lineno:
10626498Snate@binkert.org                                sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
10636498Snate@binkert.org                            else:
10646498Snate@binkert.org                                sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
10656498Snate@binkert.org                        else:
10666498Snate@binkert.org                            sys.stderr.write("yacc: Parse error in input. EOF\n")
10676498Snate@binkert.org                            return
10686498Snate@binkert.org
10696498Snate@binkert.org                else:
10706498Snate@binkert.org                    errorcount = error_count
10716498Snate@binkert.org
10726498Snate@binkert.org                # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
10736498Snate@binkert.org                # entire parse has been rolled back and we're completely hosed.   The token is
10746498Snate@binkert.org                # discarded and we just keep going.
10756498Snate@binkert.org
10766498Snate@binkert.org                if len(statestack) <= 1 and lookahead.type != '$end':
10776498Snate@binkert.org                    lookahead = None
10786498Snate@binkert.org                    errtoken = None
10796498Snate@binkert.org                    state = 0
10806498Snate@binkert.org                    # Nuke the pushback stack
10816498Snate@binkert.org                    del lookaheadstack[:]
10826498Snate@binkert.org                    continue
10836498Snate@binkert.org
10846498Snate@binkert.org                # case 2: the statestack has a couple of entries on it, but we're
10856498Snate@binkert.org                # at the end of the file. nuke the top entry and generate an error token
10866498Snate@binkert.org
10876498Snate@binkert.org                # Start nuking entries on the stack
10886498Snate@binkert.org                if lookahead.type == '$end':
10896498Snate@binkert.org                    # Whoa. We're really hosed here. Bail out
10906498Snate@binkert.org                    return
10916498Snate@binkert.org
10926498Snate@binkert.org                if lookahead.type != 'error':
10936498Snate@binkert.org                    sym = symstack[-1]
10946498Snate@binkert.org                    if sym.type == 'error':
10956498Snate@binkert.org                        # Hmmm. Error is on top of stack, we'll just nuke input
10966498Snate@binkert.org                        # symbol and continue
10976498Snate@binkert.org                        lookahead = None
10986498Snate@binkert.org                        continue
10996498Snate@binkert.org                    t = YaccSymbol()
11006498Snate@binkert.org                    t.type = 'error'
11016498Snate@binkert.org                    if hasattr(lookahead,"lineno"):
11026498Snate@binkert.org                        t.lineno = lookahead.lineno
11036498Snate@binkert.org                    t.value = lookahead
11046498Snate@binkert.org                    lookaheadstack.append(lookahead)
11056498Snate@binkert.org                    lookahead = t
11066498Snate@binkert.org                else:
11076498Snate@binkert.org                    symstack.pop()
11086498Snate@binkert.org                    statestack.pop()
11096498Snate@binkert.org                    state = statestack[-1]       # Potential bug fix
11106498Snate@binkert.org
11116498Snate@binkert.org                continue
11126498Snate@binkert.org
11136498Snate@binkert.org            # Call an error function here
11146498Snate@binkert.org            raise RuntimeError("yacc: internal parser error!!!\n")
11152632SN/A
11162632SN/A# -----------------------------------------------------------------------------
11176498Snate@binkert.org#                          === Grammar Representation ===
11182632SN/A#
11196498Snate@binkert.org# The following functions, classes, and variables are used to represent and
11206498Snate@binkert.org# manipulate the rules that make up a grammar.
11212632SN/A# -----------------------------------------------------------------------------
11222632SN/A
11236498Snate@binkert.orgimport re
11246498Snate@binkert.org
11256498Snate@binkert.org# regex matching identifiers
11266498Snate@binkert.org_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
11272632SN/A
11282632SN/A# -----------------------------------------------------------------------------
11292632SN/A# class Production:
11302632SN/A#
11312632SN/A# This class stores the raw information about a single production or grammar rule.
11326498Snate@binkert.org# A grammar rule refers to a specification such as this:
11332632SN/A#
11346498Snate@binkert.org#       expr : expr PLUS term
11356498Snate@binkert.org#
11366498Snate@binkert.org# Here are the basic attributes defined on all productions
11376498Snate@binkert.org#
11386498Snate@binkert.org#       name     - Name of the production.  For example 'expr'
11396498Snate@binkert.org#       prod     - A list of symbols on the right side ['expr','PLUS','term']
11406498Snate@binkert.org#       prec     - Production precedence level
11412632SN/A#       number   - Production number.
11426498Snate@binkert.org#       func     - Function that executes on reduce
11436498Snate@binkert.org#       file     - File where production function is defined
11446498Snate@binkert.org#       lineno   - Line number where production function is defined
11452632SN/A#
11466498Snate@binkert.org# The following attributes are defined or optional.
11472632SN/A#
11486498Snate@binkert.org#       len       - Length of the production (number of symbols on right hand side)
11496498Snate@binkert.org#       usyms     - Set of unique symbols found in the production
11506498Snate@binkert.org# -----------------------------------------------------------------------------
11516498Snate@binkert.org
11526498Snate@binkert.orgclass Production(object):
11536498Snate@binkert.org    reduced = 0
11546498Snate@binkert.org    def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0):
11556498Snate@binkert.org        self.name     = name
11566498Snate@binkert.org        self.prod     = tuple(prod)
11576498Snate@binkert.org        self.number   = number
11586498Snate@binkert.org        self.func     = func
11596498Snate@binkert.org        self.callable = None
11606498Snate@binkert.org        self.file     = file
11616498Snate@binkert.org        self.line     = line
11626498Snate@binkert.org        self.prec     = precedence
11636498Snate@binkert.org
11646498Snate@binkert.org        # Internal settings used during table construction
11656498Snate@binkert.org
11666498Snate@binkert.org        self.len  = len(self.prod)   # Length of the production
11676498Snate@binkert.org
11686498Snate@binkert.org        # Create a list of unique production symbols used in the production
11696498Snate@binkert.org        self.usyms = [ ]
11706498Snate@binkert.org        for s in self.prod:
11716498Snate@binkert.org            if s not in self.usyms:
11726498Snate@binkert.org                self.usyms.append(s)
11736498Snate@binkert.org
11746498Snate@binkert.org        # List of all LR items for the production
11756498Snate@binkert.org        self.lr_items = []
11766498Snate@binkert.org        self.lr_next = None
11776498Snate@binkert.org
11786498Snate@binkert.org        # Create a string representation
11796498Snate@binkert.org        if self.prod:
11806498Snate@binkert.org            self.str = "%s -> %s" % (self.name," ".join(self.prod))
11816498Snate@binkert.org        else:
11826498Snate@binkert.org            self.str = "%s -> <empty>" % self.name
11836498Snate@binkert.org
11846498Snate@binkert.org    def __str__(self):
11856498Snate@binkert.org        return self.str
11866498Snate@binkert.org
11876498Snate@binkert.org    def __repr__(self):
11886498Snate@binkert.org        return "Production("+str(self)+")"
11896498Snate@binkert.org
11906498Snate@binkert.org    def __len__(self):
11916498Snate@binkert.org        return len(self.prod)
11926498Snate@binkert.org
11936498Snate@binkert.org    def __nonzero__(self):
11946498Snate@binkert.org        return 1
11956498Snate@binkert.org
11966498Snate@binkert.org    def __getitem__(self,index):
11976498Snate@binkert.org        return self.prod[index]
11986498Snate@binkert.org
11996498Snate@binkert.org    # Return the nth lr_item from the production (or None if at the end)
12006498Snate@binkert.org    def lr_item(self,n):
12016498Snate@binkert.org        if n > len(self.prod): return None
12026498Snate@binkert.org        p = LRItem(self,n)
12036498Snate@binkert.org
12046498Snate@binkert.org        # Precompute the list of productions immediately following.  Hack. Remove later
12056498Snate@binkert.org        try:
12066498Snate@binkert.org            p.lr_after = Prodnames[p.prod[n+1]]
12076498Snate@binkert.org        except (IndexError,KeyError):
12086498Snate@binkert.org            p.lr_after = []
12096498Snate@binkert.org        try:
12106498Snate@binkert.org            p.lr_before = p.prod[n-1]
12116498Snate@binkert.org        except IndexError:
12126498Snate@binkert.org            p.lr_before = None
12136498Snate@binkert.org
12146498Snate@binkert.org        return p
12156498Snate@binkert.org
12166498Snate@binkert.org    # Bind the production function name to a callable
12176498Snate@binkert.org    def bind(self,pdict):
12186498Snate@binkert.org        if self.func:
12196498Snate@binkert.org            self.callable = pdict[self.func]
12206498Snate@binkert.org
12216498Snate@binkert.org# This class serves as a minimal standin for Production objects when
12226498Snate@binkert.org# reading table data from files.   It only contains information
12236498Snate@binkert.org# actually used by the LR parsing engine, plus some additional
12246498Snate@binkert.org# debugging information.
12256498Snate@binkert.orgclass MiniProduction(object):
12266498Snate@binkert.org    def __init__(self,str,name,len,func,file,line):
12276498Snate@binkert.org        self.name     = name
12286498Snate@binkert.org        self.len      = len
12296498Snate@binkert.org        self.func     = func
12306498Snate@binkert.org        self.callable = None
12316498Snate@binkert.org        self.file     = file
12326498Snate@binkert.org        self.line     = line
12336498Snate@binkert.org        self.str      = str
12346498Snate@binkert.org    def __str__(self):
12356498Snate@binkert.org        return self.str
12366498Snate@binkert.org    def __repr__(self):
12376498Snate@binkert.org        return "MiniProduction(%s)" % self.str
12386498Snate@binkert.org
12396498Snate@binkert.org    # Bind the production function name to a callable
12406498Snate@binkert.org    def bind(self,pdict):
12416498Snate@binkert.org        if self.func:
12426498Snate@binkert.org            self.callable = pdict[self.func]
12436498Snate@binkert.org
12446498Snate@binkert.org
12456498Snate@binkert.org# -----------------------------------------------------------------------------
12466498Snate@binkert.org# class LRItem
12476498Snate@binkert.org#
12486498Snate@binkert.org# This class represents a specific stage of parsing a production rule.  For
12496498Snate@binkert.org# example:
12506498Snate@binkert.org#
12516498Snate@binkert.org#       expr : expr . PLUS term
12526498Snate@binkert.org#
12536498Snate@binkert.org# In the above, the "." represents the current location of the parse.  Here
12546498Snate@binkert.org# basic attributes:
12556498Snate@binkert.org#
12566498Snate@binkert.org#       name       - Name of the production.  For example 'expr'
12576498Snate@binkert.org#       prod       - A list of symbols on the right side ['expr','.', 'PLUS','term']
12586498Snate@binkert.org#       number     - Production number.
12596498Snate@binkert.org#
12606498Snate@binkert.org#       lr_next      Next LR item. Example, if we are ' expr -> expr . PLUS term'
12616498Snate@binkert.org#                    then lr_next refers to 'expr -> expr PLUS . term'
12626498Snate@binkert.org#       lr_index   - LR item index (location of the ".") in the prod list.
12634479Sbinkertn@umich.edu#       lookaheads - LALR lookahead symbols for this item
12646498Snate@binkert.org#       len        - Length of the production (number of symbols on right hand side)
12656498Snate@binkert.org#       lr_after    - List of all productions that immediately follow
12666498Snate@binkert.org#       lr_before   - Grammar symbol immediately before
12672632SN/A# -----------------------------------------------------------------------------
12682632SN/A
12696498Snate@binkert.orgclass LRItem(object):
12706498Snate@binkert.org    def __init__(self,p,n):
12716498Snate@binkert.org        self.name       = p.name
12726498Snate@binkert.org        self.prod       = list(p.prod)
12736498Snate@binkert.org        self.number     = p.number
12746498Snate@binkert.org        self.lr_index   = n
12754479Sbinkertn@umich.edu        self.lookaheads = { }
12766498Snate@binkert.org        self.prod.insert(n,".")
12776498Snate@binkert.org        self.prod       = tuple(self.prod)
12786498Snate@binkert.org        self.len        = len(self.prod)
12796498Snate@binkert.org        self.usyms      = p.usyms
12802632SN/A
12812632SN/A    def __str__(self):
12822632SN/A        if self.prod:
12832632SN/A            s = "%s -> %s" % (self.name," ".join(self.prod))
12842632SN/A        else:
12852632SN/A            s = "%s -> <empty>" % self.name
12862632SN/A        return s
12872632SN/A
12882632SN/A    def __repr__(self):
12896498Snate@binkert.org        return "LRItem("+str(self)+")"
12906498Snate@binkert.org
12916498Snate@binkert.org# -----------------------------------------------------------------------------
12926498Snate@binkert.org# rightmost_terminal()
12936498Snate@binkert.org#
12946498Snate@binkert.org# Return the rightmost terminal from a list of symbols.  Used in add_production()
12956498Snate@binkert.org# -----------------------------------------------------------------------------
12966498Snate@binkert.orgdef rightmost_terminal(symbols, terminals):
12976498Snate@binkert.org    i = len(symbols) - 1
12986498Snate@binkert.org    while i >= 0:
12996498Snate@binkert.org        if symbols[i] in terminals:
13006498Snate@binkert.org            return symbols[i]
13016498Snate@binkert.org        i -= 1
13026498Snate@binkert.org    return None
13036498Snate@binkert.org
13046498Snate@binkert.org# -----------------------------------------------------------------------------
13056498Snate@binkert.org#                           === GRAMMAR CLASS ===
13066498Snate@binkert.org#
13076498Snate@binkert.org# The following class represents the contents of the specified grammar along
13086498Snate@binkert.org# with various computed properties such as first sets, follow sets, LR items, etc.
13096498Snate@binkert.org# This data is used for critical parts of the table generation process later.
13106498Snate@binkert.org# -----------------------------------------------------------------------------
13116498Snate@binkert.org
13126498Snate@binkert.orgclass GrammarError(YaccError): pass
13136498Snate@binkert.org
13146498Snate@binkert.orgclass Grammar(object):
13156498Snate@binkert.org    def __init__(self,terminals):
13166498Snate@binkert.org        self.Productions  = [None]  # A list of all of the productions.  The first
13176498Snate@binkert.org                                    # entry is always reserved for the purpose of
13186498Snate@binkert.org                                    # building an augmented grammar
13196498Snate@binkert.org
13206498Snate@binkert.org        self.Prodnames    = { }     # A dictionary mapping the names of nonterminals to a list of all
13216498Snate@binkert.org                                    # productions of that nonterminal.
13226498Snate@binkert.org
13236498Snate@binkert.org        self.Prodmap      = { }     # A dictionary that is only used to detect duplicate
13246498Snate@binkert.org                                    # productions.
13256498Snate@binkert.org
13266498Snate@binkert.org        self.Terminals    = { }     # A dictionary mapping the names of terminal symbols to a
13276498Snate@binkert.org                                    # list of the rules where they are used.
13286498Snate@binkert.org
13296498Snate@binkert.org        for term in terminals:
13306498Snate@binkert.org            self.Terminals[term] = []
13316498Snate@binkert.org
13326498Snate@binkert.org        self.Terminals['error'] = []
13336498Snate@binkert.org
13346498Snate@binkert.org        self.Nonterminals = { }     # A dictionary mapping names of nonterminals to a list
13356498Snate@binkert.org                                    # of rule numbers where they are used.
13366498Snate@binkert.org
13376498Snate@binkert.org        self.First        = { }     # A dictionary of precomputed FIRST(x) symbols
13386498Snate@binkert.org
13396498Snate@binkert.org        self.Follow       = { }     # A dictionary of precomputed FOLLOW(x) symbols
13406498Snate@binkert.org
13416498Snate@binkert.org        self.Precedence   = { }     # Precedence rules for each terminal. Contains tuples of the
13426498Snate@binkert.org                                    # form ('right',level) or ('nonassoc', level) or ('left',level)
13436498Snate@binkert.org
13446498Snate@binkert.org        self.UsedPrecedence = { }   # Precedence rules that were actually used by the grammer.
13456498Snate@binkert.org                                    # This is only used to provide error checking and to generate
13466498Snate@binkert.org                                    # a warning about unused precedence rules.
13476498Snate@binkert.org
13486498Snate@binkert.org        self.Start = None           # Starting symbol for the grammar
13496498Snate@binkert.org
13506498Snate@binkert.org
13516498Snate@binkert.org    def __len__(self):
13526498Snate@binkert.org        return len(self.Productions)
13536498Snate@binkert.org
13546498Snate@binkert.org    def __getitem__(self,index):
13556498Snate@binkert.org        return self.Productions[index]
13566498Snate@binkert.org
13576498Snate@binkert.org    # -----------------------------------------------------------------------------
13586498Snate@binkert.org    # set_precedence()
13596498Snate@binkert.org    #
13606498Snate@binkert.org    # Sets the precedence for a given terminal. assoc is the associativity such as
13616498Snate@binkert.org    # 'left','right', or 'nonassoc'.  level is a numeric level.
13626498Snate@binkert.org    #
13636498Snate@binkert.org    # -----------------------------------------------------------------------------
13646498Snate@binkert.org
13656498Snate@binkert.org    def set_precedence(self,term,assoc,level):
13666498Snate@binkert.org        assert self.Productions == [None],"Must call set_precedence() before add_production()"
13676498Snate@binkert.org        if term in self.Precedence:
13686498Snate@binkert.org            raise GrammarError("Precedence already specified for terminal '%s'" % term)
13696498Snate@binkert.org        if assoc not in ['left','right','nonassoc']:
13706498Snate@binkert.org            raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
13716498Snate@binkert.org        self.Precedence[term] = (assoc,level)
13726498Snate@binkert.org
13736498Snate@binkert.org    # -----------------------------------------------------------------------------
13746498Snate@binkert.org    # add_production()
13756498Snate@binkert.org    #
13766498Snate@binkert.org    # Given an action function, this function assembles a production rule and
13776498Snate@binkert.org    # computes its precedence level.
13786498Snate@binkert.org    #
13796498Snate@binkert.org    # The production rule is supplied as a list of symbols.   For example,
13806498Snate@binkert.org    # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
13816498Snate@binkert.org    # symbols ['expr','PLUS','term'].
13826498Snate@binkert.org    #
13836498Snate@binkert.org    # Precedence is determined by the precedence of the right-most non-terminal
13846498Snate@binkert.org    # or the precedence of a terminal specified by %prec.
13856498Snate@binkert.org    #
13866498Snate@binkert.org    # A variety of error checks are performed to make sure production symbols
13876498Snate@binkert.org    # are valid and that %prec is used correctly.
13886498Snate@binkert.org    # -----------------------------------------------------------------------------
13896498Snate@binkert.org
13906498Snate@binkert.org    def add_production(self,prodname,syms,func=None,file='',line=0):
13916498Snate@binkert.org
13926498Snate@binkert.org        if prodname in self.Terminals:
13936498Snate@binkert.org            raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname))
13946498Snate@binkert.org        if prodname == 'error':
13956498Snate@binkert.org            raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname))
13966498Snate@binkert.org        if not _is_identifier.match(prodname):
13976498Snate@binkert.org            raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname))
13986498Snate@binkert.org
13996498Snate@binkert.org        # Look for literal tokens
14006498Snate@binkert.org        for n,s in enumerate(syms):
14016498Snate@binkert.org            if s[0] in "'\"":
14026498Snate@binkert.org                 try:
14036498Snate@binkert.org                     c = eval(s)
14046498Snate@binkert.org                     if (len(c) > 1):
14056498Snate@binkert.org                          raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname))
14066498Snate@binkert.org                     if not c in self.Terminals:
14076498Snate@binkert.org                          self.Terminals[c] = []
14086498Snate@binkert.org                     syms[n] = c
14096498Snate@binkert.org                     continue
14106498Snate@binkert.org                 except SyntaxError:
14116498Snate@binkert.org                     pass
14126498Snate@binkert.org            if not _is_identifier.match(s) and s != '%prec':
14136498Snate@binkert.org                raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname))
14146498Snate@binkert.org
14156498Snate@binkert.org        # Determine the precedence level
14166498Snate@binkert.org        if '%prec' in syms:
14176498Snate@binkert.org            if syms[-1] == '%prec':
14186498Snate@binkert.org                raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line))
14196498Snate@binkert.org            if syms[-2] != '%prec':
14206498Snate@binkert.org                raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line))
14216498Snate@binkert.org            precname = syms[-1]
14226498Snate@binkert.org            prodprec = self.Precedence.get(precname,None)
14236498Snate@binkert.org            if not prodprec:
14246498Snate@binkert.org                raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname))
14256498Snate@binkert.org            else:
14266498Snate@binkert.org                self.UsedPrecedence[precname] = 1
14276498Snate@binkert.org            del syms[-2:]     # Drop %prec from the rule
14286498Snate@binkert.org        else:
14296498Snate@binkert.org            # If no %prec, precedence is determined by the rightmost terminal symbol
14306498Snate@binkert.org            precname = rightmost_terminal(syms,self.Terminals)
14316498Snate@binkert.org            prodprec = self.Precedence.get(precname,('right',0))
14326498Snate@binkert.org
14336498Snate@binkert.org        # See if the rule is already in the rulemap
14346498Snate@binkert.org        map = "%s -> %s" % (prodname,syms)
14356498Snate@binkert.org        if map in self.Prodmap:
14366498Snate@binkert.org            m = self.Prodmap[map]
14376498Snate@binkert.org            raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) +
14386498Snate@binkert.org                               "Previous definition at %s:%d" % (m.file, m.line))
14396498Snate@binkert.org
14406498Snate@binkert.org        # From this point on, everything is valid.  Create a new Production instance
14416498Snate@binkert.org        pnumber  = len(self.Productions)
14426498Snate@binkert.org        if not prodname in self.Nonterminals:
14436498Snate@binkert.org            self.Nonterminals[prodname] = [ ]
14446498Snate@binkert.org
14456498Snate@binkert.org        # Add the production number to Terminals and Nonterminals
14466498Snate@binkert.org        for t in syms:
14476498Snate@binkert.org            if t in self.Terminals:
14486498Snate@binkert.org                self.Terminals[t].append(pnumber)
14496498Snate@binkert.org            else:
14506498Snate@binkert.org                if not t in self.Nonterminals:
14516498Snate@binkert.org                    self.Nonterminals[t] = [ ]
14526498Snate@binkert.org                self.Nonterminals[t].append(pnumber)
14536498Snate@binkert.org
14546498Snate@binkert.org        # Create a production and add it to the list of productions
14556498Snate@binkert.org        p = Production(pnumber,prodname,syms,prodprec,func,file,line)
14566498Snate@binkert.org        self.Productions.append(p)
14576498Snate@binkert.org        self.Prodmap[map] = p
14586498Snate@binkert.org
14596498Snate@binkert.org        # Add to the global productions list
14602632SN/A        try:
14616498Snate@binkert.org            self.Prodnames[prodname].append(p)
14626498Snate@binkert.org        except KeyError:
14636498Snate@binkert.org            self.Prodnames[prodname] = [ p ]
14646498Snate@binkert.org        return 0
14656498Snate@binkert.org
14666498Snate@binkert.org    # -----------------------------------------------------------------------------
14676498Snate@binkert.org    # set_start()
14686498Snate@binkert.org    #
14696498Snate@binkert.org    # Sets the starting symbol and creates the augmented grammar.  Production
14706498Snate@binkert.org    # rule 0 is S' -> start where start is the start symbol.
14716498Snate@binkert.org    # -----------------------------------------------------------------------------
14726498Snate@binkert.org
14736498Snate@binkert.org    def set_start(self,start=None):
14746498Snate@binkert.org        if not start:
14756498Snate@binkert.org            start = self.Productions[1].name
14766498Snate@binkert.org        if start not in self.Nonterminals:
14776498Snate@binkert.org            raise GrammarError("start symbol %s undefined" % start)
14786498Snate@binkert.org        self.Productions[0] = Production(0,"S'",[start])
14796498Snate@binkert.org        self.Nonterminals[start].append(0)
14806498Snate@binkert.org        self.Start = start
14816498Snate@binkert.org
14826498Snate@binkert.org    # -----------------------------------------------------------------------------
14836498Snate@binkert.org    # find_unreachable()
14846498Snate@binkert.org    #
14856498Snate@binkert.org    # Find all of the nonterminal symbols that can't be reached from the starting
14866498Snate@binkert.org    # symbol.  Returns a list of nonterminals that can't be reached.
14876498Snate@binkert.org    # -----------------------------------------------------------------------------
14886498Snate@binkert.org
14896498Snate@binkert.org    def find_unreachable(self):
14906498Snate@binkert.org
14916498Snate@binkert.org        # Mark all symbols that are reachable from a symbol s
14926498Snate@binkert.org        def mark_reachable_from(s):
14936498Snate@binkert.org            if reachable[s]:
14946498Snate@binkert.org                # We've already reached symbol s.
14956498Snate@binkert.org                return
14966498Snate@binkert.org            reachable[s] = 1
14976498Snate@binkert.org            for p in self.Prodnames.get(s,[]):
14986498Snate@binkert.org                for r in p.prod:
14996498Snate@binkert.org                    mark_reachable_from(r)
15006498Snate@binkert.org
15016498Snate@binkert.org        reachable   = { }
15026498Snate@binkert.org        for s in list(self.Terminals) + list(self.Nonterminals):
15036498Snate@binkert.org            reachable[s] = 0
15046498Snate@binkert.org
15056498Snate@binkert.org        mark_reachable_from( self.Productions[0].prod[0] )
15066498Snate@binkert.org
15076498Snate@binkert.org        return [s for s in list(self.Nonterminals)
15086498Snate@binkert.org                        if not reachable[s]]
15096498Snate@binkert.org
15106498Snate@binkert.org    # -----------------------------------------------------------------------------
15116498Snate@binkert.org    # infinite_cycles()
15126498Snate@binkert.org    #
15136498Snate@binkert.org    # This function looks at the various parsing rules and tries to detect
15146498Snate@binkert.org    # infinite recursion cycles (grammar rules where there is no possible way
15156498Snate@binkert.org    # to derive a string of only terminals).
15166498Snate@binkert.org    # -----------------------------------------------------------------------------
15176498Snate@binkert.org
15186498Snate@binkert.org    def infinite_cycles(self):
15196498Snate@binkert.org        terminates = {}
15206498Snate@binkert.org
15216498Snate@binkert.org        # Terminals:
15226498Snate@binkert.org        for t in self.Terminals:
15236498Snate@binkert.org            terminates[t] = 1
15246498Snate@binkert.org
15256498Snate@binkert.org        terminates['$end'] = 1
15266498Snate@binkert.org
15276498Snate@binkert.org        # Nonterminals:
15286498Snate@binkert.org
15296498Snate@binkert.org        # Initialize to false:
15306498Snate@binkert.org        for n in self.Nonterminals:
15316498Snate@binkert.org            terminates[n] = 0
15326498Snate@binkert.org
15336498Snate@binkert.org        # Then propagate termination until no change:
15346498Snate@binkert.org        while 1:
15356498Snate@binkert.org            some_change = 0
15366498Snate@binkert.org            for (n,pl) in self.Prodnames.items():
15376498Snate@binkert.org                # Nonterminal n terminates iff any of its productions terminates.
15386498Snate@binkert.org                for p in pl:
15396498Snate@binkert.org                    # Production p terminates iff all of its rhs symbols terminate.
15406498Snate@binkert.org                    for s in p.prod:
15416498Snate@binkert.org                        if not terminates[s]:
15426498Snate@binkert.org                            # The symbol s does not terminate,
15436498Snate@binkert.org                            # so production p does not terminate.
15446498Snate@binkert.org                            p_terminates = 0
15456498Snate@binkert.org                            break
15466498Snate@binkert.org                    else:
15476498Snate@binkert.org                        # didn't break from the loop,
15486498Snate@binkert.org                        # so every symbol s terminates
15496498Snate@binkert.org                        # so production p terminates.
15506498Snate@binkert.org                        p_terminates = 1
15516498Snate@binkert.org
15526498Snate@binkert.org                    if p_terminates:
15536498Snate@binkert.org                        # symbol n terminates!
15546498Snate@binkert.org                        if not terminates[n]:
15556498Snate@binkert.org                            terminates[n] = 1
15566498Snate@binkert.org                            some_change = 1
15576498Snate@binkert.org                        # Don't need to consider any more productions for this n.
15586498Snate@binkert.org                        break
15596498Snate@binkert.org
15606498Snate@binkert.org            if not some_change:
15616498Snate@binkert.org                break
15626498Snate@binkert.org
15636498Snate@binkert.org        infinite = []
15646498Snate@binkert.org        for (s,term) in terminates.items():
15656498Snate@binkert.org            if not term:
15666498Snate@binkert.org                if not s in self.Prodnames and not s in self.Terminals and s != 'error':
15676498Snate@binkert.org                    # s is used-but-not-defined, and we've already warned of that,
15686498Snate@binkert.org                    # so it would be overkill to say that it's also non-terminating.
15696498Snate@binkert.org                    pass
15706498Snate@binkert.org                else:
15716498Snate@binkert.org                    infinite.append(s)
15726498Snate@binkert.org
15736498Snate@binkert.org        return infinite
15746498Snate@binkert.org
15756498Snate@binkert.org
15766498Snate@binkert.org    # -----------------------------------------------------------------------------
15776498Snate@binkert.org    # undefined_symbols()
15786498Snate@binkert.org    #
15796498Snate@binkert.org    # Find all symbols that were used the grammar, but not defined as tokens or
15806498Snate@binkert.org    # grammar rules.  Returns a list of tuples (sym, prod) where sym in the symbol
15816498Snate@binkert.org    # and prod is the production where the symbol was used.
15826498Snate@binkert.org    # -----------------------------------------------------------------------------
15836498Snate@binkert.org    def undefined_symbols(self):
15846498Snate@binkert.org        result = []
15856498Snate@binkert.org        for p in self.Productions:
15862632SN/A            if not p: continue
15876498Snate@binkert.org
15886498Snate@binkert.org            for s in p.prod:
15896498Snate@binkert.org                if not s in self.Prodnames and not s in self.Terminals and s != 'error':
15906498Snate@binkert.org                    result.append((s,p))
15916498Snate@binkert.org        return result
15926498Snate@binkert.org
15936498Snate@binkert.org    # -----------------------------------------------------------------------------
15946498Snate@binkert.org    # unused_terminals()
15956498Snate@binkert.org    #
15966498Snate@binkert.org    # Find all terminals that were defined, but not used by the grammar.  Returns
15976498Snate@binkert.org    # a list of all symbols.
15986498Snate@binkert.org    # -----------------------------------------------------------------------------
15996498Snate@binkert.org    def unused_terminals(self):
16006498Snate@binkert.org        unused_tok = []
16016498Snate@binkert.org        for s,v in self.Terminals.items():
16026498Snate@binkert.org            if s != 'error' and not v:
16036498Snate@binkert.org                unused_tok.append(s)
16046498Snate@binkert.org
16056498Snate@binkert.org        return unused_tok
16066498Snate@binkert.org
16076498Snate@binkert.org    # ------------------------------------------------------------------------------
16086498Snate@binkert.org    # unused_rules()
16096498Snate@binkert.org    #
16106498Snate@binkert.org    # Find all grammar rules that were defined,  but not used (maybe not reachable)
16116498Snate@binkert.org    # Returns a list of productions.
16126498Snate@binkert.org    # ------------------------------------------------------------------------------
16136498Snate@binkert.org
16146498Snate@binkert.org    def unused_rules(self):
16156498Snate@binkert.org        unused_prod = []
16166498Snate@binkert.org        for s,v in self.Nonterminals.items():
16176498Snate@binkert.org            if not v:
16186498Snate@binkert.org                p = self.Prodnames[s][0]
16196498Snate@binkert.org                unused_prod.append(p)
16206498Snate@binkert.org        return unused_prod
16216498Snate@binkert.org
16226498Snate@binkert.org    # -----------------------------------------------------------------------------
16236498Snate@binkert.org    # unused_precedence()
16246498Snate@binkert.org    #
16256498Snate@binkert.org    # Returns a list of tuples (term,precedence) corresponding to precedence
16266498Snate@binkert.org    # rules that were never used by the grammar.  term is the name of the terminal
16276498Snate@binkert.org    # on which precedence was applied and precedence is a string such as 'left' or
16286498Snate@binkert.org    # 'right' corresponding to the type of precedence.
16296498Snate@binkert.org    # -----------------------------------------------------------------------------
16306498Snate@binkert.org
16316498Snate@binkert.org    def unused_precedence(self):
16326498Snate@binkert.org        unused = []
16336498Snate@binkert.org        for termname in self.Precedence:
16346498Snate@binkert.org            if not (termname in self.Terminals or termname in self.UsedPrecedence):
16356498Snate@binkert.org                unused.append((termname,self.Precedence[termname][0]))
16366498Snate@binkert.org
16376498Snate@binkert.org        return unused
16386498Snate@binkert.org
16396498Snate@binkert.org    # -------------------------------------------------------------------------
16406498Snate@binkert.org    # _first()
16416498Snate@binkert.org    #
16426498Snate@binkert.org    # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
16436498Snate@binkert.org    #
16446498Snate@binkert.org    # During execution of compute_first1, the result may be incomplete.
16456498Snate@binkert.org    # Afterward (e.g., when called from compute_follow()), it will be complete.
16466498Snate@binkert.org    # -------------------------------------------------------------------------
16476498Snate@binkert.org    def _first(self,beta):
16486498Snate@binkert.org
16496498Snate@binkert.org        # We are computing First(x1,x2,x3,...,xn)
16506498Snate@binkert.org        result = [ ]
16516498Snate@binkert.org        for x in beta:
16526498Snate@binkert.org            x_produces_empty = 0
16536498Snate@binkert.org
16546498Snate@binkert.org            # Add all the non-<empty> symbols of First[x] to the result.
16556498Snate@binkert.org            for f in self.First[x]:
16566498Snate@binkert.org                if f == '<empty>':
16576498Snate@binkert.org                    x_produces_empty = 1
16582632SN/A                else:
16596498Snate@binkert.org                    if f not in result: result.append(f)
16606498Snate@binkert.org
16616498Snate@binkert.org            if x_produces_empty:
16626498Snate@binkert.org                # We have to consider the next x in beta,
16636498Snate@binkert.org                # i.e. stay in the loop.
16642632SN/A                pass
16652632SN/A            else:
16666498Snate@binkert.org                # We don't have to consider any further symbols in beta.
16676498Snate@binkert.org                break
16686498Snate@binkert.org        else:
16696498Snate@binkert.org            # There was no 'break' from the loop,
16706498Snate@binkert.org            # so x_produces_empty was true for all x in beta,
16716498Snate@binkert.org            # so beta produces empty as well.
16726498Snate@binkert.org            result.append('<empty>')
16736498Snate@binkert.org
16746498Snate@binkert.org        return result
16756498Snate@binkert.org
16766498Snate@binkert.org    # -------------------------------------------------------------------------
16776498Snate@binkert.org    # compute_first()
16786498Snate@binkert.org    #
16796498Snate@binkert.org    # Compute the value of FIRST1(X) for all symbols
16806498Snate@binkert.org    # -------------------------------------------------------------------------
16816498Snate@binkert.org    def compute_first(self):
16826498Snate@binkert.org        if self.First:
16836498Snate@binkert.org            return self.First
16846498Snate@binkert.org
16856498Snate@binkert.org        # Terminals:
16866498Snate@binkert.org        for t in self.Terminals:
16876498Snate@binkert.org            self.First[t] = [t]
16886498Snate@binkert.org
16896498Snate@binkert.org        self.First['$end'] = ['$end']
16906498Snate@binkert.org
16916498Snate@binkert.org        # Nonterminals:
16926498Snate@binkert.org
16936498Snate@binkert.org        # Initialize to the empty set:
16946498Snate@binkert.org        for n in self.Nonterminals:
16956498Snate@binkert.org            self.First[n] = []
16966498Snate@binkert.org
16976498Snate@binkert.org        # Then propagate symbols until no change:
16986498Snate@binkert.org        while 1:
16996498Snate@binkert.org            some_change = 0
17006498Snate@binkert.org            for n in self.Nonterminals:
17016498Snate@binkert.org                for p in self.Prodnames[n]:
17026498Snate@binkert.org                    for f in self._first(p.prod):
17036498Snate@binkert.org                        if f not in self.First[n]:
17046498Snate@binkert.org                            self.First[n].append( f )
17056498Snate@binkert.org                            some_change = 1
17066498Snate@binkert.org            if not some_change:
17076498Snate@binkert.org                break
17086498Snate@binkert.org
17096498Snate@binkert.org        return self.First
17106498Snate@binkert.org
17116498Snate@binkert.org    # ---------------------------------------------------------------------
17126498Snate@binkert.org    # compute_follow()
17136498Snate@binkert.org    #
17146498Snate@binkert.org    # Computes all of the follow sets for every non-terminal symbol.  The
17156498Snate@binkert.org    # follow set is the set of all symbols that might follow a given
17166498Snate@binkert.org    # non-terminal.  See the Dragon book, 2nd Ed. p. 189.
17176498Snate@binkert.org    # ---------------------------------------------------------------------
17186498Snate@binkert.org    def compute_follow(self,start=None):
17196498Snate@binkert.org        # If already computed, return the result
17206498Snate@binkert.org        if self.Follow:
17216498Snate@binkert.org            return self.Follow
17226498Snate@binkert.org
17236498Snate@binkert.org        # If first sets not computed yet, do that first.
17246498Snate@binkert.org        if not self.First:
17256498Snate@binkert.org            self.compute_first()
17266498Snate@binkert.org
17276498Snate@binkert.org        # Add '$end' to the follow list of the start symbol
17286498Snate@binkert.org        for k in self.Nonterminals:
17296498Snate@binkert.org            self.Follow[k] = [ ]
17306498Snate@binkert.org
17316498Snate@binkert.org        if not start:
17326498Snate@binkert.org            start = self.Productions[1].name
17336498Snate@binkert.org
17346498Snate@binkert.org        self.Follow[start] = [ '$end' ]
17356498Snate@binkert.org
17366498Snate@binkert.org        while 1:
17376498Snate@binkert.org            didadd = 0
17386498Snate@binkert.org            for p in self.Productions[1:]:
17396498Snate@binkert.org                # Here is the production set
17406498Snate@binkert.org                for i in range(len(p.prod)):
17416498Snate@binkert.org                    B = p.prod[i]
17426498Snate@binkert.org                    if B in self.Nonterminals:
17436498Snate@binkert.org                        # Okay. We got a non-terminal in a production
17446498Snate@binkert.org                        fst = self._first(p.prod[i+1:])
17456498Snate@binkert.org                        hasempty = 0
17466498Snate@binkert.org                        for f in fst:
17476498Snate@binkert.org                            if f != '<empty>' and f not in self.Follow[B]:
17486498Snate@binkert.org                                self.Follow[B].append(f)
17496498Snate@binkert.org                                didadd = 1
17506498Snate@binkert.org                            if f == '<empty>':
17516498Snate@binkert.org                                hasempty = 1
17526498Snate@binkert.org                        if hasempty or i == (len(p.prod)-1):
17536498Snate@binkert.org                            # Add elements of follow(a) to follow(b)
17546498Snate@binkert.org                            for f in self.Follow[p.name]:
17556498Snate@binkert.org                                if f not in self.Follow[B]:
17566498Snate@binkert.org                                    self.Follow[B].append(f)
17576498Snate@binkert.org                                    didadd = 1
17586498Snate@binkert.org            if not didadd: break
17596498Snate@binkert.org        return self.Follow
17606498Snate@binkert.org
17616498Snate@binkert.org
17626498Snate@binkert.org    # -----------------------------------------------------------------------------
17636498Snate@binkert.org    # build_lritems()
17646498Snate@binkert.org    #
17656498Snate@binkert.org    # This function walks the list of productions and builds a complete set of the
17666498Snate@binkert.org    # LR items.  The LR items are stored in two ways:  First, they are uniquely
17676498Snate@binkert.org    # numbered and placed in the list _lritems.  Second, a linked list of LR items
17686498Snate@binkert.org    # is built for each production.  For example:
17696498Snate@binkert.org    #
17706498Snate@binkert.org    #   E -> E PLUS E
17716498Snate@binkert.org    #
17726498Snate@binkert.org    # Creates the list
17736498Snate@binkert.org    #
17746498Snate@binkert.org    #  [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
17756498Snate@binkert.org    # -----------------------------------------------------------------------------
17766498Snate@binkert.org
17776498Snate@binkert.org    def build_lritems(self):
17786498Snate@binkert.org        for p in self.Productions:
17796498Snate@binkert.org            lastlri = p
17806498Snate@binkert.org            i = 0
17816498Snate@binkert.org            lr_items = []
17826498Snate@binkert.org            while 1:
17836498Snate@binkert.org                if i > len(p):
17846498Snate@binkert.org                    lri = None
17856498Snate@binkert.org                else:
17866498Snate@binkert.org                    lri = LRItem(p,i)
17876498Snate@binkert.org                    # Precompute the list of productions immediately following
17886498Snate@binkert.org                    try:
17896498Snate@binkert.org                        lri.lr_after = self.Prodnames[lri.prod[i+1]]
17906498Snate@binkert.org                    except (IndexError,KeyError):
17916498Snate@binkert.org                        lri.lr_after = []
17926498Snate@binkert.org                    try:
17936498Snate@binkert.org                        lri.lr_before = lri.prod[i-1]
17946498Snate@binkert.org                    except IndexError:
17956498Snate@binkert.org                        lri.lr_before = None
17966498Snate@binkert.org
17976498Snate@binkert.org                lastlri.lr_next = lri
17986498Snate@binkert.org                if not lri: break
17996498Snate@binkert.org                lr_items.append(lri)
18006498Snate@binkert.org                lastlri = lri
18016498Snate@binkert.org                i += 1
18026498Snate@binkert.org            p.lr_items = lr_items
18032632SN/A
18042632SN/A# -----------------------------------------------------------------------------
18056498Snate@binkert.org#                            == Class LRTable ==
18062632SN/A#
18076498Snate@binkert.org# This basic class represents a basic table of LR parsing information.
18086498Snate@binkert.org# Methods for generating the tables are not defined here.  They are defined
18096498Snate@binkert.org# in the derived class LRGeneratedTable.
18102632SN/A# -----------------------------------------------------------------------------
18116498Snate@binkert.org
18126498Snate@binkert.orgclass VersionError(YaccError): pass
18136498Snate@binkert.org
18146498Snate@binkert.orgclass LRTable(object):
18156498Snate@binkert.org    def __init__(self):
18166498Snate@binkert.org        self.lr_action = None
18176498Snate@binkert.org        self.lr_goto = None
18186498Snate@binkert.org        self.lr_productions = None
18196498Snate@binkert.org        self.lr_method = None
18206498Snate@binkert.org
18216498Snate@binkert.org    def read_table(self,module):
18226498Snate@binkert.org        if isinstance(module,types.ModuleType):
18236498Snate@binkert.org            parsetab = module
18246498Snate@binkert.org        else:
18256498Snate@binkert.org            if sys.version_info[0] < 3:
18266498Snate@binkert.org                exec("import %s as parsetab" % module)
18276498Snate@binkert.org            else:
18286498Snate@binkert.org                env = { }
18296498Snate@binkert.org                exec("import %s as parsetab" % module, env, env)
18306498Snate@binkert.org                parsetab = env['parsetab']
18316498Snate@binkert.org
18326498Snate@binkert.org        if parsetab._tabversion != __tabversion__:
18336498Snate@binkert.org            raise VersionError("yacc table file version is out of date")
18346498Snate@binkert.org
18356498Snate@binkert.org        self.lr_action = parsetab._lr_action
18366498Snate@binkert.org        self.lr_goto = parsetab._lr_goto
18376498Snate@binkert.org
18386498Snate@binkert.org        self.lr_productions = []
18396498Snate@binkert.org        for p in parsetab._lr_productions:
18406498Snate@binkert.org            self.lr_productions.append(MiniProduction(*p))
18416498Snate@binkert.org
18426498Snate@binkert.org        self.lr_method = parsetab._lr_method
18436498Snate@binkert.org        return parsetab._lr_signature
18446498Snate@binkert.org
18456498Snate@binkert.org    def read_pickle(self,filename):
18466498Snate@binkert.org        try:
18476498Snate@binkert.org            import cPickle as pickle
18486498Snate@binkert.org        except ImportError:
18496498Snate@binkert.org            import pickle
18506498Snate@binkert.org
18516498Snate@binkert.org        in_f = open(filename,"rb")
18526498Snate@binkert.org
18536498Snate@binkert.org        tabversion = pickle.load(in_f)
18546498Snate@binkert.org        if tabversion != __tabversion__:
18556498Snate@binkert.org            raise VersionError("yacc table file version is out of date")
18566498Snate@binkert.org        self.lr_method = pickle.load(in_f)
18576498Snate@binkert.org        signature      = pickle.load(in_f)
18586498Snate@binkert.org        self.lr_action = pickle.load(in_f)
18596498Snate@binkert.org        self.lr_goto   = pickle.load(in_f)
18606498Snate@binkert.org        productions    = pickle.load(in_f)
18616498Snate@binkert.org
18626498Snate@binkert.org        self.lr_productions = []
18636498Snate@binkert.org        for p in productions:
18646498Snate@binkert.org            self.lr_productions.append(MiniProduction(*p))
18656498Snate@binkert.org
18666498Snate@binkert.org        in_f.close()
18676498Snate@binkert.org        return signature
18686498Snate@binkert.org
18696498Snate@binkert.org    # Bind all production function names to callable objects in pdict
18706498Snate@binkert.org    def bind_callables(self,pdict):
18716498Snate@binkert.org        for p in self.lr_productions:
18726498Snate@binkert.org            p.bind(pdict)
18736498Snate@binkert.org
18742632SN/A# -----------------------------------------------------------------------------
18756498Snate@binkert.org#                           === LR Generator ===
18762632SN/A#
18776498Snate@binkert.org# The following classes and functions are used to generate LR parsing tables on
18786498Snate@binkert.org# a grammar.
18792632SN/A# -----------------------------------------------------------------------------
18802632SN/A
18814479Sbinkertn@umich.edu# -----------------------------------------------------------------------------
18824479Sbinkertn@umich.edu# digraph()
18834479Sbinkertn@umich.edu# traverse()
18844479Sbinkertn@umich.edu#
18854479Sbinkertn@umich.edu# The following two functions are used to compute set valued functions
18864479Sbinkertn@umich.edu# of the form:
18874479Sbinkertn@umich.edu#
18884479Sbinkertn@umich.edu#     F(x) = F'(x) U U{F(y) | x R y}
18894479Sbinkertn@umich.edu#
18904479Sbinkertn@umich.edu# This is used to compute the values of Read() sets as well as FOLLOW sets
18914479Sbinkertn@umich.edu# in LALR(1) generation.
18924479Sbinkertn@umich.edu#
18934479Sbinkertn@umich.edu# Inputs:  X    - An input set
18944479Sbinkertn@umich.edu#          R    - A relation
18954479Sbinkertn@umich.edu#          FP   - Set-valued function
18964479Sbinkertn@umich.edu# ------------------------------------------------------------------------------
18974479Sbinkertn@umich.edu
18984479Sbinkertn@umich.edudef digraph(X,R,FP):
18994479Sbinkertn@umich.edu    N = { }
19004479Sbinkertn@umich.edu    for x in X:
19014479Sbinkertn@umich.edu       N[x] = 0
19024479Sbinkertn@umich.edu    stack = []
19034479Sbinkertn@umich.edu    F = { }
19044479Sbinkertn@umich.edu    for x in X:
19054479Sbinkertn@umich.edu        if N[x] == 0: traverse(x,N,stack,F,X,R,FP)
19064479Sbinkertn@umich.edu    return F
19074479Sbinkertn@umich.edu
19084479Sbinkertn@umich.edudef traverse(x,N,stack,F,X,R,FP):
19094479Sbinkertn@umich.edu    stack.append(x)
19104479Sbinkertn@umich.edu    d = len(stack)
19114479Sbinkertn@umich.edu    N[x] = d
19124479Sbinkertn@umich.edu    F[x] = FP(x)             # F(X) <- F'(x)
19134479Sbinkertn@umich.edu
19144479Sbinkertn@umich.edu    rel = R(x)               # Get y's related to x
19154479Sbinkertn@umich.edu    for y in rel:
19164479Sbinkertn@umich.edu        if N[y] == 0:
19174479Sbinkertn@umich.edu             traverse(y,N,stack,F,X,R,FP)
19184479Sbinkertn@umich.edu        N[x] = min(N[x],N[y])
19194479Sbinkertn@umich.edu        for a in F.get(y,[]):
19204479Sbinkertn@umich.edu            if a not in F[x]: F[x].append(a)
19214479Sbinkertn@umich.edu    if N[x] == d:
19226498Snate@binkert.org       N[stack[-1]] = MAXINT
19234479Sbinkertn@umich.edu       F[stack[-1]] = F[x]
19244479Sbinkertn@umich.edu       element = stack.pop()
19254479Sbinkertn@umich.edu       while element != x:
19266498Snate@binkert.org           N[stack[-1]] = MAXINT
19274479Sbinkertn@umich.edu           F[stack[-1]] = F[x]
19284479Sbinkertn@umich.edu           element = stack.pop()
19294479Sbinkertn@umich.edu
19306498Snate@binkert.orgclass LALRError(YaccError): pass
19316498Snate@binkert.org
19324479Sbinkertn@umich.edu# -----------------------------------------------------------------------------
19336498Snate@binkert.org#                             == LRGeneratedTable ==
19344479Sbinkertn@umich.edu#
19356498Snate@binkert.org# This class implements the LR table generation algorithm.  There are no
19366498Snate@binkert.org# public methods except for write()
19374479Sbinkertn@umich.edu# -----------------------------------------------------------------------------
19384479Sbinkertn@umich.edu
19396498Snate@binkert.orgclass LRGeneratedTable(LRTable):
19406498Snate@binkert.org    def __init__(self,grammar,method='LALR',log=None):
19416498Snate@binkert.org        if method not in ['SLR','LALR']:
19426498Snate@binkert.org            raise LALRError("Unsupported method %s" % method)
19436498Snate@binkert.org
19446498Snate@binkert.org        self.grammar = grammar
19456498Snate@binkert.org        self.lr_method = method
19466498Snate@binkert.org
19476498Snate@binkert.org        # Set up the logger
19486498Snate@binkert.org        if not log:
19496498Snate@binkert.org            log = NullLogger()
19506498Snate@binkert.org        self.log = log
19516498Snate@binkert.org
19526498Snate@binkert.org        # Internal attributes
19536498Snate@binkert.org        self.lr_action     = {}        # Action table
19546498Snate@binkert.org        self.lr_goto       = {}        # Goto table
19556498Snate@binkert.org        self.lr_productions  = grammar.Productions    # Copy of grammar Production array
19566498Snate@binkert.org        self.lr_goto_cache = {}        # Cache of computed gotos
19576498Snate@binkert.org        self.lr0_cidhash   = {}        # Cache of closures
19586498Snate@binkert.org
19596498Snate@binkert.org        self._add_count    = 0         # Internal counter used to detect cycles
19606498Snate@binkert.org
19616498Snate@binkert.org        # Diagonistic information filled in by the table generator
19626498Snate@binkert.org        self.sr_conflict   = 0
19636498Snate@binkert.org        self.rr_conflict   = 0
19646498Snate@binkert.org        self.conflicts     = []        # List of conflicts
19656498Snate@binkert.org
19666498Snate@binkert.org        self.sr_conflicts  = []
19676498Snate@binkert.org        self.rr_conflicts  = []
19686498Snate@binkert.org
19696498Snate@binkert.org        # Build the tables
19706498Snate@binkert.org        self.grammar.build_lritems()
19716498Snate@binkert.org        self.grammar.compute_first()
19726498Snate@binkert.org        self.grammar.compute_follow()
19736498Snate@binkert.org        self.lr_parse_table()
19746498Snate@binkert.org
19756498Snate@binkert.org    # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
19766498Snate@binkert.org
19776498Snate@binkert.org    def lr0_closure(self,I):
19786498Snate@binkert.org        self._add_count += 1
19796498Snate@binkert.org
19806498Snate@binkert.org        # Add everything in I to J
19816498Snate@binkert.org        J = I[:]
19826498Snate@binkert.org        didadd = 1
19836498Snate@binkert.org        while didadd:
19846498Snate@binkert.org            didadd = 0
19856498Snate@binkert.org            for j in J:
19866498Snate@binkert.org                for x in j.lr_after:
19876498Snate@binkert.org                    if getattr(x,"lr0_added",0) == self._add_count: continue
19886498Snate@binkert.org                    # Add B --> .G to J
19896498Snate@binkert.org                    J.append(x.lr_next)
19906498Snate@binkert.org                    x.lr0_added = self._add_count
19916498Snate@binkert.org                    didadd = 1
19926498Snate@binkert.org
19936498Snate@binkert.org        return J
19946498Snate@binkert.org
19956498Snate@binkert.org    # Compute the LR(0) goto function goto(I,X) where I is a set
19966498Snate@binkert.org    # of LR(0) items and X is a grammar symbol.   This function is written
19976498Snate@binkert.org    # in a way that guarantees uniqueness of the generated goto sets
19986498Snate@binkert.org    # (i.e. the same goto set will never be returned as two different Python
19996498Snate@binkert.org    # objects).  With uniqueness, we can later do fast set comparisons using
20006498Snate@binkert.org    # id(obj) instead of element-wise comparison.
20016498Snate@binkert.org
20026498Snate@binkert.org    def lr0_goto(self,I,x):
20036498Snate@binkert.org        # First we look for a previously cached entry
20046498Snate@binkert.org        g = self.lr_goto_cache.get((id(I),x),None)
20056498Snate@binkert.org        if g: return g
20066498Snate@binkert.org
20076498Snate@binkert.org        # Now we generate the goto set in a way that guarantees uniqueness
20086498Snate@binkert.org        # of the result
20096498Snate@binkert.org
20106498Snate@binkert.org        s = self.lr_goto_cache.get(x,None)
20116498Snate@binkert.org        if not s:
20126498Snate@binkert.org            s = { }
20136498Snate@binkert.org            self.lr_goto_cache[x] = s
20146498Snate@binkert.org
20156498Snate@binkert.org        gs = [ ]
20166498Snate@binkert.org        for p in I:
20176498Snate@binkert.org            n = p.lr_next
20186498Snate@binkert.org            if n and n.lr_before == x:
20196498Snate@binkert.org                s1 = s.get(id(n),None)
20206498Snate@binkert.org                if not s1:
20216498Snate@binkert.org                    s1 = { }
20226498Snate@binkert.org                    s[id(n)] = s1
20236498Snate@binkert.org                gs.append(n)
20246498Snate@binkert.org                s = s1
20256498Snate@binkert.org        g = s.get('$end',None)
20266498Snate@binkert.org        if not g:
20276498Snate@binkert.org            if gs:
20286498Snate@binkert.org                g = self.lr0_closure(gs)
20296498Snate@binkert.org                s['$end'] = g
20306498Snate@binkert.org            else:
20316498Snate@binkert.org                s['$end'] = gs
20326498Snate@binkert.org        self.lr_goto_cache[(id(I),x)] = g
20336498Snate@binkert.org        return g
20346498Snate@binkert.org
20356498Snate@binkert.org    # Compute the LR(0) sets of item function
20366498Snate@binkert.org    def lr0_items(self):
20376498Snate@binkert.org
20386498Snate@binkert.org        C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ]
20396498Snate@binkert.org        i = 0
20406498Snate@binkert.org        for I in C:
20416498Snate@binkert.org            self.lr0_cidhash[id(I)] = i
20426498Snate@binkert.org            i += 1
20436498Snate@binkert.org
20446498Snate@binkert.org        # Loop over the items in C and each grammar symbols
20456498Snate@binkert.org        i = 0
20466498Snate@binkert.org        while i < len(C):
20476498Snate@binkert.org            I = C[i]
20486498Snate@binkert.org            i += 1
20496498Snate@binkert.org
20506498Snate@binkert.org            # Collect all of the symbols that could possibly be in the goto(I,X) sets
20516498Snate@binkert.org            asyms = { }
20526498Snate@binkert.org            for ii in I:
20536498Snate@binkert.org                for s in ii.usyms:
20546498Snate@binkert.org                    asyms[s] = None
20556498Snate@binkert.org
20566498Snate@binkert.org            for x in asyms:
20576498Snate@binkert.org                g = self.lr0_goto(I,x)
20586498Snate@binkert.org                if not g:  continue
20596498Snate@binkert.org                if id(g) in self.lr0_cidhash: continue
20606498Snate@binkert.org                self.lr0_cidhash[id(g)] = len(C)
20616498Snate@binkert.org                C.append(g)
20626498Snate@binkert.org
20636498Snate@binkert.org        return C
20646498Snate@binkert.org
20656498Snate@binkert.org    # -----------------------------------------------------------------------------
20666498Snate@binkert.org    #                       ==== LALR(1) Parsing ====
20676498Snate@binkert.org    #
20686498Snate@binkert.org    # LALR(1) parsing is almost exactly the same as SLR except that instead of
20696498Snate@binkert.org    # relying upon Follow() sets when performing reductions, a more selective
20706498Snate@binkert.org    # lookahead set that incorporates the state of the LR(0) machine is utilized.
20716498Snate@binkert.org    # Thus, we mainly just have to focus on calculating the lookahead sets.
20726498Snate@binkert.org    #
20736498Snate@binkert.org    # The method used here is due to DeRemer and Pennelo (1982).
20746498Snate@binkert.org    #
20756498Snate@binkert.org    # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
20766498Snate@binkert.org    #     Lookahead Sets", ACM Transactions on Programming Languages and Systems,
20776498Snate@binkert.org    #     Vol. 4, No. 4, Oct. 1982, pp. 615-649
20786498Snate@binkert.org    #
20796498Snate@binkert.org    # Further details can also be found in:
20806498Snate@binkert.org    #
20816498Snate@binkert.org    #  J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
20826498Snate@binkert.org    #      McGraw-Hill Book Company, (1985).
20836498Snate@binkert.org    #
20846498Snate@binkert.org    # -----------------------------------------------------------------------------
20856498Snate@binkert.org
20866498Snate@binkert.org    # -----------------------------------------------------------------------------
20876498Snate@binkert.org    # compute_nullable_nonterminals()
20886498Snate@binkert.org    #
20896498Snate@binkert.org    # Creates a dictionary containing all of the non-terminals that might produce
20906498Snate@binkert.org    # an empty production.
20916498Snate@binkert.org    # -----------------------------------------------------------------------------
20926498Snate@binkert.org
20936498Snate@binkert.org    def compute_nullable_nonterminals(self):
20946498Snate@binkert.org        nullable = {}
20956498Snate@binkert.org        num_nullable = 0
20966498Snate@binkert.org        while 1:
20976498Snate@binkert.org           for p in self.grammar.Productions[1:]:
20986498Snate@binkert.org               if p.len == 0:
20996498Snate@binkert.org                    nullable[p.name] = 1
21006498Snate@binkert.org                    continue
21016498Snate@binkert.org               for t in p.prod:
21026498Snate@binkert.org                    if not t in nullable: break
21036498Snate@binkert.org               else:
21046498Snate@binkert.org                    nullable[p.name] = 1
21056498Snate@binkert.org           if len(nullable) == num_nullable: break
21066498Snate@binkert.org           num_nullable = len(nullable)
21076498Snate@binkert.org        return nullable
21086498Snate@binkert.org
21096498Snate@binkert.org    # -----------------------------------------------------------------------------
21106498Snate@binkert.org    # find_nonterminal_trans(C)
21116498Snate@binkert.org    #
21126498Snate@binkert.org    # Given a set of LR(0) items, this functions finds all of the non-terminal
21136498Snate@binkert.org    # transitions.    These are transitions in which a dot appears immediately before
21146498Snate@binkert.org    # a non-terminal.   Returns a list of tuples of the form (state,N) where state
21156498Snate@binkert.org    # is the state number and N is the nonterminal symbol.
21166498Snate@binkert.org    #
21176498Snate@binkert.org    # The input C is the set of LR(0) items.
21186498Snate@binkert.org    # -----------------------------------------------------------------------------
21196498Snate@binkert.org
21206498Snate@binkert.org    def find_nonterminal_transitions(self,C):
21216498Snate@binkert.org         trans = []
21226498Snate@binkert.org         for state in range(len(C)):
21236498Snate@binkert.org             for p in C[state]:
21246498Snate@binkert.org                 if p.lr_index < p.len - 1:
21256498Snate@binkert.org                      t = (state,p.prod[p.lr_index+1])
21266498Snate@binkert.org                      if t[1] in self.grammar.Nonterminals:
21276498Snate@binkert.org                            if t not in trans: trans.append(t)
21286498Snate@binkert.org             state = state + 1
21296498Snate@binkert.org         return trans
21306498Snate@binkert.org
21316498Snate@binkert.org    # -----------------------------------------------------------------------------
21326498Snate@binkert.org    # dr_relation()
21336498Snate@binkert.org    #
21346498Snate@binkert.org    # Computes the DR(p,A) relationships for non-terminal transitions.  The input
21356498Snate@binkert.org    # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
21366498Snate@binkert.org    #
21376498Snate@binkert.org    # Returns a list of terminals.
21386498Snate@binkert.org    # -----------------------------------------------------------------------------
21396498Snate@binkert.org
21406498Snate@binkert.org    def dr_relation(self,C,trans,nullable):
21416498Snate@binkert.org        dr_set = { }
21426498Snate@binkert.org        state,N = trans
21436498Snate@binkert.org        terms = []
21446498Snate@binkert.org
21456498Snate@binkert.org        g = self.lr0_goto(C[state],N)
21466498Snate@binkert.org        for p in g:
21476498Snate@binkert.org           if p.lr_index < p.len - 1:
21486498Snate@binkert.org               a = p.prod[p.lr_index+1]
21496498Snate@binkert.org               if a in self.grammar.Terminals:
21506498Snate@binkert.org                   if a not in terms: terms.append(a)
21516498Snate@binkert.org
21526498Snate@binkert.org        # This extra bit is to handle the start state
21536498Snate@binkert.org        if state == 0 and N == self.grammar.Productions[0].prod[0]:
21546498Snate@binkert.org           terms.append('$end')
21556498Snate@binkert.org
21566498Snate@binkert.org        return terms
21576498Snate@binkert.org
21586498Snate@binkert.org    # -----------------------------------------------------------------------------
21596498Snate@binkert.org    # reads_relation()
21606498Snate@binkert.org    #
21616498Snate@binkert.org    # Computes the READS() relation (p,A) READS (t,C).
21626498Snate@binkert.org    # -----------------------------------------------------------------------------
21636498Snate@binkert.org
21646498Snate@binkert.org    def reads_relation(self,C, trans, empty):
21656498Snate@binkert.org        # Look for empty transitions
21666498Snate@binkert.org        rel = []
21676498Snate@binkert.org        state, N = trans
21686498Snate@binkert.org
21696498Snate@binkert.org        g = self.lr0_goto(C[state],N)
21706498Snate@binkert.org        j = self.lr0_cidhash.get(id(g),-1)
21716498Snate@binkert.org        for p in g:
21726498Snate@binkert.org            if p.lr_index < p.len - 1:
21736498Snate@binkert.org                 a = p.prod[p.lr_index + 1]
21746498Snate@binkert.org                 if a in empty:
21756498Snate@binkert.org                      rel.append((j,a))
21766498Snate@binkert.org
21776498Snate@binkert.org        return rel
21786498Snate@binkert.org
21796498Snate@binkert.org    # -----------------------------------------------------------------------------
21806498Snate@binkert.org    # compute_lookback_includes()
21816498Snate@binkert.org    #
21826498Snate@binkert.org    # Determines the lookback and includes relations
21836498Snate@binkert.org    #
21846498Snate@binkert.org    # LOOKBACK:
21856498Snate@binkert.org    #
21866498Snate@binkert.org    # This relation is determined by running the LR(0) state machine forward.
21876498Snate@binkert.org    # For example, starting with a production "N : . A B C", we run it forward
21886498Snate@binkert.org    # to obtain "N : A B C ."   We then build a relationship between this final
21896498Snate@binkert.org    # state and the starting state.   These relationships are stored in a dictionary
21906498Snate@binkert.org    # lookdict.
21916498Snate@binkert.org    #
21926498Snate@binkert.org    # INCLUDES:
21936498Snate@binkert.org    #
21946498Snate@binkert.org    # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
21956498Snate@binkert.org    #
21966498Snate@binkert.org    # This relation is used to determine non-terminal transitions that occur
21976498Snate@binkert.org    # inside of other non-terminal transition states.   (p,A) INCLUDES (p', B)
21986498Snate@binkert.org    # if the following holds:
21996498Snate@binkert.org    #
22006498Snate@binkert.org    #       B -> LAT, where T -> epsilon and p' -L-> p
22016498Snate@binkert.org    #
22026498Snate@binkert.org    # L is essentially a prefix (which may be empty), T is a suffix that must be
22036498Snate@binkert.org    # able to derive an empty string.  State p' must lead to state p with the string L.
22046498Snate@binkert.org    #
22056498Snate@binkert.org    # -----------------------------------------------------------------------------
22066498Snate@binkert.org
22076498Snate@binkert.org    def compute_lookback_includes(self,C,trans,nullable):
22086498Snate@binkert.org
22096498Snate@binkert.org        lookdict = {}          # Dictionary of lookback relations
22106498Snate@binkert.org        includedict = {}       # Dictionary of include relations
22116498Snate@binkert.org
22126498Snate@binkert.org        # Make a dictionary of non-terminal transitions
22136498Snate@binkert.org        dtrans = {}
22146498Snate@binkert.org        for t in trans:
22156498Snate@binkert.org            dtrans[t] = 1
22166498Snate@binkert.org
22176498Snate@binkert.org        # Loop over all transitions and compute lookbacks and includes
22186498Snate@binkert.org        for state,N in trans:
22196498Snate@binkert.org            lookb = []
22206498Snate@binkert.org            includes = []
22216498Snate@binkert.org            for p in C[state]:
22226498Snate@binkert.org                if p.name != N: continue
22236498Snate@binkert.org
22246498Snate@binkert.org                # Okay, we have a name match.  We now follow the production all the way
22256498Snate@binkert.org                # through the state machine until we get the . on the right hand side
22266498Snate@binkert.org
22276498Snate@binkert.org                lr_index = p.lr_index
22286498Snate@binkert.org                j = state
22296498Snate@binkert.org                while lr_index < p.len - 1:
22306498Snate@binkert.org                     lr_index = lr_index + 1
22316498Snate@binkert.org                     t = p.prod[lr_index]
22326498Snate@binkert.org
22336498Snate@binkert.org                     # Check to see if this symbol and state are a non-terminal transition
22346498Snate@binkert.org                     if (j,t) in dtrans:
22356498Snate@binkert.org                           # Yes.  Okay, there is some chance that this is an includes relation
22366498Snate@binkert.org                           # the only way to know for certain is whether the rest of the
22376498Snate@binkert.org                           # production derives empty
22386498Snate@binkert.org
22396498Snate@binkert.org                           li = lr_index + 1
22406498Snate@binkert.org                           while li < p.len:
22416498Snate@binkert.org                                if p.prod[li] in self.grammar.Terminals: break      # No forget it
22426498Snate@binkert.org                                if not p.prod[li] in nullable: break
22436498Snate@binkert.org                                li = li + 1
22446498Snate@binkert.org                           else:
22456498Snate@binkert.org                                # Appears to be a relation between (j,t) and (state,N)
22466498Snate@binkert.org                                includes.append((j,t))
22476498Snate@binkert.org
22486498Snate@binkert.org                     g = self.lr0_goto(C[j],t)               # Go to next set
22496498Snate@binkert.org                     j = self.lr0_cidhash.get(id(g),-1)     # Go to next state
22506498Snate@binkert.org
22516498Snate@binkert.org                # When we get here, j is the final state, now we have to locate the production
22526498Snate@binkert.org                for r in C[j]:
22536498Snate@binkert.org                     if r.name != p.name: continue
22546498Snate@binkert.org                     if r.len != p.len:   continue
22556498Snate@binkert.org                     i = 0
22566498Snate@binkert.org                     # This look is comparing a production ". A B C" with "A B C ."
22576498Snate@binkert.org                     while i < r.lr_index:
22586498Snate@binkert.org                          if r.prod[i] != p.prod[i+1]: break
22596498Snate@binkert.org                          i = i + 1
22606498Snate@binkert.org                     else:
22616498Snate@binkert.org                          lookb.append((j,r))
22626498Snate@binkert.org            for i in includes:
22636498Snate@binkert.org                 if not i in includedict: includedict[i] = []
22646498Snate@binkert.org                 includedict[i].append((state,N))
22656498Snate@binkert.org            lookdict[(state,N)] = lookb
22666498Snate@binkert.org
22676498Snate@binkert.org        return lookdict,includedict
22686498Snate@binkert.org
22696498Snate@binkert.org    # -----------------------------------------------------------------------------
22706498Snate@binkert.org    # compute_read_sets()
22716498Snate@binkert.org    #
22726498Snate@binkert.org    # Given a set of LR(0) items, this function computes the read sets.
22736498Snate@binkert.org    #
22746498Snate@binkert.org    # Inputs:  C        =  Set of LR(0) items
22756498Snate@binkert.org    #          ntrans   = Set of nonterminal transitions
22766498Snate@binkert.org    #          nullable = Set of empty transitions
22776498Snate@binkert.org    #
22786498Snate@binkert.org    # Returns a set containing the read sets
22796498Snate@binkert.org    # -----------------------------------------------------------------------------
22806498Snate@binkert.org
22816498Snate@binkert.org    def compute_read_sets(self,C, ntrans, nullable):
22826498Snate@binkert.org        FP = lambda x: self.dr_relation(C,x,nullable)
22836498Snate@binkert.org        R =  lambda x: self.reads_relation(C,x,nullable)
22846498Snate@binkert.org        F = digraph(ntrans,R,FP)
22856498Snate@binkert.org        return F
22866498Snate@binkert.org
22876498Snate@binkert.org    # -----------------------------------------------------------------------------
22886498Snate@binkert.org    # compute_follow_sets()
22896498Snate@binkert.org    #
22906498Snate@binkert.org    # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
22916498Snate@binkert.org    # and an include set, this function computes the follow sets
22926498Snate@binkert.org    #
22936498Snate@binkert.org    # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
22946498Snate@binkert.org    #
22956498Snate@binkert.org    # Inputs:
22966498Snate@binkert.org    #            ntrans     = Set of nonterminal transitions
22976498Snate@binkert.org    #            readsets   = Readset (previously computed)
22986498Snate@binkert.org    #            inclsets   = Include sets (previously computed)
22996498Snate@binkert.org    #
23006498Snate@binkert.org    # Returns a set containing the follow sets
23016498Snate@binkert.org    # -----------------------------------------------------------------------------
23026498Snate@binkert.org
23036498Snate@binkert.org    def compute_follow_sets(self,ntrans,readsets,inclsets):
23046498Snate@binkert.org         FP = lambda x: readsets[x]
23056498Snate@binkert.org         R  = lambda x: inclsets.get(x,[])
23066498Snate@binkert.org         F = digraph(ntrans,R,FP)
23076498Snate@binkert.org         return F
23086498Snate@binkert.org
23096498Snate@binkert.org    # -----------------------------------------------------------------------------
23106498Snate@binkert.org    # add_lookaheads()
23116498Snate@binkert.org    #
23126498Snate@binkert.org    # Attaches the lookahead symbols to grammar rules.
23136498Snate@binkert.org    #
23146498Snate@binkert.org    # Inputs:    lookbacks         -  Set of lookback relations
23156498Snate@binkert.org    #            followset         -  Computed follow set
23166498Snate@binkert.org    #
23176498Snate@binkert.org    # This function directly attaches the lookaheads to productions contained
23186498Snate@binkert.org    # in the lookbacks set
23196498Snate@binkert.org    # -----------------------------------------------------------------------------
23206498Snate@binkert.org
23216498Snate@binkert.org    def add_lookaheads(self,lookbacks,followset):
23226498Snate@binkert.org        for trans,lb in lookbacks.items():
23236498Snate@binkert.org            # Loop over productions in lookback
23246498Snate@binkert.org            for state,p in lb:
23256498Snate@binkert.org                 if not state in p.lookaheads:
23266498Snate@binkert.org                      p.lookaheads[state] = []
23276498Snate@binkert.org                 f = followset.get(trans,[])
23286498Snate@binkert.org                 for a in f:
23296498Snate@binkert.org                      if a not in p.lookaheads[state]: p.lookaheads[state].append(a)
23306498Snate@binkert.org
23316498Snate@binkert.org    # -----------------------------------------------------------------------------
23326498Snate@binkert.org    # add_lalr_lookaheads()
23336498Snate@binkert.org    #
23346498Snate@binkert.org    # This function does all of the work of adding lookahead information for use
23356498Snate@binkert.org    # with LALR parsing
23366498Snate@binkert.org    # -----------------------------------------------------------------------------
23376498Snate@binkert.org
23386498Snate@binkert.org    def add_lalr_lookaheads(self,C):
23396498Snate@binkert.org        # Determine all of the nullable nonterminals
23406498Snate@binkert.org        nullable = self.compute_nullable_nonterminals()
23416498Snate@binkert.org
23426498Snate@binkert.org        # Find all non-terminal transitions
23436498Snate@binkert.org        trans = self.find_nonterminal_transitions(C)
23446498Snate@binkert.org
23456498Snate@binkert.org        # Compute read sets
23466498Snate@binkert.org        readsets = self.compute_read_sets(C,trans,nullable)
23476498Snate@binkert.org
23486498Snate@binkert.org        # Compute lookback/includes relations
23496498Snate@binkert.org        lookd, included = self.compute_lookback_includes(C,trans,nullable)
23506498Snate@binkert.org
23516498Snate@binkert.org        # Compute LALR FOLLOW sets
23526498Snate@binkert.org        followsets = self.compute_follow_sets(trans,readsets,included)
23536498Snate@binkert.org
23546498Snate@binkert.org        # Add all of the lookaheads
23556498Snate@binkert.org        self.add_lookaheads(lookd,followsets)
23566498Snate@binkert.org
23576498Snate@binkert.org    # -----------------------------------------------------------------------------
23586498Snate@binkert.org    # lr_parse_table()
23596498Snate@binkert.org    #
23606498Snate@binkert.org    # This function constructs the parse tables for SLR or LALR
23616498Snate@binkert.org    # -----------------------------------------------------------------------------
23626498Snate@binkert.org    def lr_parse_table(self):
23636498Snate@binkert.org        Productions = self.grammar.Productions
23646498Snate@binkert.org        Precedence  = self.grammar.Precedence
23656498Snate@binkert.org        goto   = self.lr_goto         # Goto array
23666498Snate@binkert.org        action = self.lr_action       # Action array
23676498Snate@binkert.org        log    = self.log             # Logger for output
23686498Snate@binkert.org
23696498Snate@binkert.org        actionp = { }                 # Action production array (temporary)
23706498Snate@binkert.org
23716498Snate@binkert.org        log.info("Parsing method: %s", self.lr_method)
23726498Snate@binkert.org
23736498Snate@binkert.org        # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
23746498Snate@binkert.org        # This determines the number of states
23756498Snate@binkert.org
23766498Snate@binkert.org        C = self.lr0_items()
23776498Snate@binkert.org
23786498Snate@binkert.org        if self.lr_method == 'LALR':
23796498Snate@binkert.org            self.add_lalr_lookaheads(C)
23806498Snate@binkert.org
23816498Snate@binkert.org        # Build the parser table, state by state
23826498Snate@binkert.org        st = 0
23836498Snate@binkert.org        for I in C:
23846498Snate@binkert.org            # Loop over each production in I
23856498Snate@binkert.org            actlist = [ ]              # List of actions
23866498Snate@binkert.org            st_action  = { }
23876498Snate@binkert.org            st_actionp = { }
23886498Snate@binkert.org            st_goto    = { }
23896498Snate@binkert.org            log.info("")
23906498Snate@binkert.org            log.info("state %d", st)
23916498Snate@binkert.org            log.info("")
23922632SN/A            for p in I:
23936498Snate@binkert.org                log.info("    (%d) %s", p.number, str(p))
23946498Snate@binkert.org            log.info("")
23956498Snate@binkert.org
23966498Snate@binkert.org            for p in I:
23976498Snate@binkert.org                    if p.len == p.lr_index + 1:
23986498Snate@binkert.org                        if p.name == "S'":
23996498Snate@binkert.org                            # Start symbol. Accept!
24006498Snate@binkert.org                            st_action["$end"] = 0
24016498Snate@binkert.org                            st_actionp["$end"] = p
24026498Snate@binkert.org                        else:
24036498Snate@binkert.org                            # We are at the end of a production.  Reduce!
24046498Snate@binkert.org                            if self.lr_method == 'LALR':
24056498Snate@binkert.org                                laheads = p.lookaheads[st]
24066498Snate@binkert.org                            else:
24076498Snate@binkert.org                                laheads = self.grammar.Follow[p.name]
24086498Snate@binkert.org                            for a in laheads:
24096498Snate@binkert.org                                actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
24106498Snate@binkert.org                                r = st_action.get(a,None)
24116498Snate@binkert.org                                if r is not None:
24126498Snate@binkert.org                                    # Whoa. Have a shift/reduce or reduce/reduce conflict
24136498Snate@binkert.org                                    if r > 0:
24146498Snate@binkert.org                                        # Need to decide on shift or reduce here
24156498Snate@binkert.org                                        # By default we favor shifting. Need to add
24166498Snate@binkert.org                                        # some precedence rules here.
24176498Snate@binkert.org                                        sprec,slevel = Productions[st_actionp[a].number].prec
24186498Snate@binkert.org                                        rprec,rlevel = Precedence.get(a,('right',0))
24196498Snate@binkert.org                                        if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
24206498Snate@binkert.org                                            # We really need to reduce here.
24216498Snate@binkert.org                                            st_action[a] = -p.number
24226498Snate@binkert.org                                            st_actionp[a] = p
24236498Snate@binkert.org                                            if not slevel and not rlevel:
24246498Snate@binkert.org                                                log.info("  ! shift/reduce conflict for %s resolved as reduce",a)
24256498Snate@binkert.org                                                self.sr_conflicts.append((st,a,'reduce'))
24266498Snate@binkert.org                                            Productions[p.number].reduced += 1
24276498Snate@binkert.org                                        elif (slevel == rlevel) and (rprec == 'nonassoc'):
24286498Snate@binkert.org                                            st_action[a] = None
24296498Snate@binkert.org                                        else:
24306498Snate@binkert.org                                            # Hmmm. Guess we'll keep the shift
24316498Snate@binkert.org                                            if not rlevel:
24326498Snate@binkert.org                                                log.info("  ! shift/reduce conflict for %s resolved as shift",a)
24336498Snate@binkert.org                                                self.sr_conflicts.append((st,a,'shift'))
24346498Snate@binkert.org                                    elif r < 0:
24356498Snate@binkert.org                                        # Reduce/reduce conflict.   In this case, we favor the rule
24366498Snate@binkert.org                                        # that was defined first in the grammar file
24376498Snate@binkert.org                                        oldp = Productions[-r]
24386498Snate@binkert.org                                        pp = Productions[p.number]
24396498Snate@binkert.org                                        if oldp.line > pp.line:
24406498Snate@binkert.org                                            st_action[a] = -p.number
24416498Snate@binkert.org                                            st_actionp[a] = p
24426498Snate@binkert.org                                            chosenp,rejectp = pp,oldp
24436498Snate@binkert.org                                            Productions[p.number].reduced += 1
24446498Snate@binkert.org                                            Productions[oldp.number].reduced -= 1
24456498Snate@binkert.org                                        else:
24466498Snate@binkert.org                                            chosenp,rejectp = oldp,pp
24476498Snate@binkert.org                                        self.rr_conflicts.append((st,chosenp,rejectp))
24486498Snate@binkert.org                                        log.info("  ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a])
24496498Snate@binkert.org                                    else:
24506498Snate@binkert.org                                        raise LALRError("Unknown conflict in state %d" % st)
24516498Snate@binkert.org                                else:
24526498Snate@binkert.org                                    st_action[a] = -p.number
24536498Snate@binkert.org                                    st_actionp[a] = p
24546498Snate@binkert.org                                    Productions[p.number].reduced += 1
24552632SN/A                    else:
24566498Snate@binkert.org                        i = p.lr_index
24576498Snate@binkert.org                        a = p.prod[i+1]       # Get symbol right after the "."
24586498Snate@binkert.org                        if a in self.grammar.Terminals:
24596498Snate@binkert.org                            g = self.lr0_goto(I,a)
24606498Snate@binkert.org                            j = self.lr0_cidhash.get(id(g),-1)
24616498Snate@binkert.org                            if j >= 0:
24626498Snate@binkert.org                                # We are in a shift state
24636498Snate@binkert.org                                actlist.append((a,p,"shift and go to state %d" % j))
24646498Snate@binkert.org                                r = st_action.get(a,None)
24656498Snate@binkert.org                                if r is not None:
24666498Snate@binkert.org                                    # Whoa have a shift/reduce or shift/shift conflict
24676498Snate@binkert.org                                    if r > 0:
24686498Snate@binkert.org                                        if r != j:
24696498Snate@binkert.org                                            raise LALRError("Shift/shift conflict in state %d" % st)
24706498Snate@binkert.org                                    elif r < 0:
24716498Snate@binkert.org                                        # Do a precedence check.
24726498Snate@binkert.org                                        #   -  if precedence of reduce rule is higher, we reduce.
24736498Snate@binkert.org                                        #   -  if precedence of reduce is same and left assoc, we reduce.
24746498Snate@binkert.org                                        #   -  otherwise we shift
24756498Snate@binkert.org                                        rprec,rlevel = Productions[st_actionp[a].number].prec
24766498Snate@binkert.org                                        sprec,slevel = Precedence.get(a,('right',0))
24776498Snate@binkert.org                                        if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
24786498Snate@binkert.org                                            # We decide to shift here... highest precedence to shift
24796498Snate@binkert.org                                            Productions[st_actionp[a].number].reduced -= 1
24806498Snate@binkert.org                                            st_action[a] = j
24816498Snate@binkert.org                                            st_actionp[a] = p
24826498Snate@binkert.org                                            if not rlevel:
24836498Snate@binkert.org                                                log.info("  ! shift/reduce conflict for %s resolved as shift",a)
24846498Snate@binkert.org                                                self.sr_conflicts.append((st,a,'shift'))
24856498Snate@binkert.org                                        elif (slevel == rlevel) and (rprec == 'nonassoc'):
24866498Snate@binkert.org                                            st_action[a] = None
24876498Snate@binkert.org                                        else:
24886498Snate@binkert.org                                            # Hmmm. Guess we'll keep the reduce
24896498Snate@binkert.org                                            if not slevel and not rlevel:
24906498Snate@binkert.org                                                log.info("  ! shift/reduce conflict for %s resolved as reduce",a)
24916498Snate@binkert.org                                                self.sr_conflicts.append((st,a,'reduce'))
24926498Snate@binkert.org
24932632SN/A                                    else:
24946498Snate@binkert.org                                        raise LALRError("Unknown conflict in state %d" % st)
24952632SN/A                                else:
24966498Snate@binkert.org                                    st_action[a] = j
24976498Snate@binkert.org                                    st_actionp[a] = p
24986498Snate@binkert.org
24996498Snate@binkert.org            # Print the actions associated with each terminal
25006498Snate@binkert.org            _actprint = { }
25016498Snate@binkert.org            for a,p,m in actlist:
25026498Snate@binkert.org                if a in st_action:
25036498Snate@binkert.org                    if p is st_actionp[a]:
25046498Snate@binkert.org                        log.info("    %-15s %s",a,m)
25054479Sbinkertn@umich.edu                        _actprint[(a,m)] = 1
25066498Snate@binkert.org            log.info("")
25076498Snate@binkert.org            # Print the actions that were not used. (debugging)
25086498Snate@binkert.org            not_used = 0
25096498Snate@binkert.org            for a,p,m in actlist:
25106498Snate@binkert.org                if a in st_action:
25116498Snate@binkert.org                    if p is not st_actionp[a]:
25126498Snate@binkert.org                        if not (a,m) in _actprint:
25136498Snate@binkert.org                            log.debug("  ! %-15s [ %s ]",a,m)
25146498Snate@binkert.org                            not_used = 1
25156498Snate@binkert.org                            _actprint[(a,m)] = 1
25166498Snate@binkert.org            if not_used:
25176498Snate@binkert.org                log.debug("")
25186498Snate@binkert.org
25196498Snate@binkert.org            # Construct the goto table for this state
25206498Snate@binkert.org
25216498Snate@binkert.org            nkeys = { }
25226498Snate@binkert.org            for ii in I:
25236498Snate@binkert.org                for s in ii.usyms:
25246498Snate@binkert.org                    if s in self.grammar.Nonterminals:
25256498Snate@binkert.org                        nkeys[s] = None
25266498Snate@binkert.org            for n in nkeys:
25276498Snate@binkert.org                g = self.lr0_goto(I,n)
25286498Snate@binkert.org                j = self.lr0_cidhash.get(id(g),-1)
25296498Snate@binkert.org                if j >= 0:
25306498Snate@binkert.org                    st_goto[n] = j
25316498Snate@binkert.org                    log.info("    %-30s shift and go to state %d",n,j)
25326498Snate@binkert.org
25336498Snate@binkert.org            action[st] = st_action
25346498Snate@binkert.org            actionp[st] = st_actionp
25356498Snate@binkert.org            goto[st] = st_goto
25366498Snate@binkert.org            st += 1
25376498Snate@binkert.org
25386498Snate@binkert.org
25396498Snate@binkert.org    # -----------------------------------------------------------------------------
25406498Snate@binkert.org    # write()
25416498Snate@binkert.org    #
25426498Snate@binkert.org    # This function writes the LR parsing tables to a file
25436498Snate@binkert.org    # -----------------------------------------------------------------------------
25446498Snate@binkert.org
25456498Snate@binkert.org    def write_table(self,modulename,outputdir='',signature=""):
25466498Snate@binkert.org        basemodulename = modulename.split(".")[-1]
25476498Snate@binkert.org        filename = os.path.join(outputdir,basemodulename) + ".py"
25486498Snate@binkert.org        try:
25496498Snate@binkert.org            f = open(filename,"w")
25506498Snate@binkert.org
25516498Snate@binkert.org            f.write("""
25522632SN/A# %s
25532632SN/A# This file is automatically generated. Do not edit.
25546498Snate@binkert.org_tabversion = %r
25556498Snate@binkert.org
25566498Snate@binkert.org_lr_method = %r
25576498Snate@binkert.org
25586498Snate@binkert.org_lr_signature = %r
25596498Snate@binkert.org    """ % (filename, __tabversion__, self.lr_method, signature))
25606498Snate@binkert.org
25616498Snate@binkert.org            # Change smaller to 0 to go back to original tables
25626498Snate@binkert.org            smaller = 1
25636498Snate@binkert.org
25646498Snate@binkert.org            # Factor out names to try and make smaller
25656498Snate@binkert.org            if smaller:
25666498Snate@binkert.org                items = { }
25676498Snate@binkert.org
25686498Snate@binkert.org                for s,nd in self.lr_action.items():
25696498Snate@binkert.org                   for name,v in nd.items():
25706498Snate@binkert.org                      i = items.get(name)
25716498Snate@binkert.org                      if not i:
25726498Snate@binkert.org                         i = ([],[])
25736498Snate@binkert.org                         items[name] = i
25746498Snate@binkert.org                      i[0].append(s)
25756498Snate@binkert.org                      i[1].append(v)
25766498Snate@binkert.org
25776498Snate@binkert.org                f.write("\n_lr_action_items = {")
25786498Snate@binkert.org                for k,v in items.items():
25796498Snate@binkert.org                    f.write("%r:([" % k)
25806498Snate@binkert.org                    for i in v[0]:
25816498Snate@binkert.org                        f.write("%r," % i)
25826498Snate@binkert.org                    f.write("],[")
25836498Snate@binkert.org                    for i in v[1]:
25846498Snate@binkert.org                        f.write("%r," % i)
25856498Snate@binkert.org
25866498Snate@binkert.org                    f.write("]),")
25876498Snate@binkert.org                f.write("}\n")
25886498Snate@binkert.org
25896498Snate@binkert.org                f.write("""
25902632SN/A_lr_action = { }
25912632SN/Afor _k, _v in _lr_action_items.items():
25922632SN/A   for _x,_y in zip(_v[0],_v[1]):
25936498Snate@binkert.org      if not _x in _lr_action:  _lr_action[_x] = { }
25944479Sbinkertn@umich.edu      _lr_action[_x][_k] = _y
25952632SN/Adel _lr_action_items
25962632SN/A""")
25972632SN/A
25986498Snate@binkert.org            else:
25996498Snate@binkert.org                f.write("\n_lr_action = { ");
26006498Snate@binkert.org                for k,v in self.lr_action.items():
26016498Snate@binkert.org                    f.write("(%r,%r):%r," % (k[0],k[1],v))
26026498Snate@binkert.org                f.write("}\n");
26036498Snate@binkert.org
26046498Snate@binkert.org            if smaller:
26056498Snate@binkert.org                # Factor out names to try and make smaller
26066498Snate@binkert.org                items = { }
26076498Snate@binkert.org
26086498Snate@binkert.org                for s,nd in self.lr_goto.items():
26096498Snate@binkert.org                   for name,v in nd.items():
26106498Snate@binkert.org                      i = items.get(name)
26116498Snate@binkert.org                      if not i:
26126498Snate@binkert.org                         i = ([],[])
26136498Snate@binkert.org                         items[name] = i
26146498Snate@binkert.org                      i[0].append(s)
26156498Snate@binkert.org                      i[1].append(v)
26166498Snate@binkert.org
26176498Snate@binkert.org                f.write("\n_lr_goto_items = {")
26186498Snate@binkert.org                for k,v in items.items():
26196498Snate@binkert.org                    f.write("%r:([" % k)
26206498Snate@binkert.org                    for i in v[0]:
26216498Snate@binkert.org                        f.write("%r," % i)
26226498Snate@binkert.org                    f.write("],[")
26236498Snate@binkert.org                    for i in v[1]:
26246498Snate@binkert.org                        f.write("%r," % i)
26256498Snate@binkert.org
26266498Snate@binkert.org                    f.write("]),")
26276498Snate@binkert.org                f.write("}\n")
26286498Snate@binkert.org
26296498Snate@binkert.org                f.write("""
26302632SN/A_lr_goto = { }
26312632SN/Afor _k, _v in _lr_goto_items.items():
26322632SN/A   for _x,_y in zip(_v[0],_v[1]):
26336498Snate@binkert.org       if not _x in _lr_goto: _lr_goto[_x] = { }
26344479Sbinkertn@umich.edu       _lr_goto[_x][_k] = _y
26352632SN/Adel _lr_goto_items
26362632SN/A""")
26376498Snate@binkert.org            else:
26386498Snate@binkert.org                f.write("\n_lr_goto = { ");
26396498Snate@binkert.org                for k,v in self.lr_goto.items():
26406498Snate@binkert.org                    f.write("(%r,%r):%r," % (k[0],k[1],v))
26416498Snate@binkert.org                f.write("}\n");
26426498Snate@binkert.org
26436498Snate@binkert.org            # Write production table
26446498Snate@binkert.org            f.write("_lr_productions = [\n")
26456498Snate@binkert.org            for p in self.lr_productions:
26466498Snate@binkert.org                if p.func:
26476498Snate@binkert.org                    f.write("  (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line))
26486498Snate@binkert.org                else:
26496498Snate@binkert.org                    f.write("  (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len))
26506498Snate@binkert.org            f.write("]\n")
26516498Snate@binkert.org            f.close()
26526498Snate@binkert.org
26536498Snate@binkert.org        except IOError:
26546498Snate@binkert.org            e = sys.exc_info()[1]
26556498Snate@binkert.org            sys.stderr.write("Unable to create '%s'\n" % filename)
26566498Snate@binkert.org            sys.stderr.write(str(e)+"\n")
26576498Snate@binkert.org            return
26586498Snate@binkert.org
26596498Snate@binkert.org
26606498Snate@binkert.org    # -----------------------------------------------------------------------------
26616498Snate@binkert.org    # pickle_table()
26626498Snate@binkert.org    #
26636498Snate@binkert.org    # This function pickles the LR parsing tables to a supplied file object
26646498Snate@binkert.org    # -----------------------------------------------------------------------------
26656498Snate@binkert.org
26666498Snate@binkert.org    def pickle_table(self,filename,signature=""):
26676498Snate@binkert.org        try:
26686498Snate@binkert.org            import cPickle as pickle
26696498Snate@binkert.org        except ImportError:
26706498Snate@binkert.org            import pickle
26716498Snate@binkert.org        outf = open(filename,"wb")
26726498Snate@binkert.org        pickle.dump(__tabversion__,outf,pickle_protocol)
26736498Snate@binkert.org        pickle.dump(self.lr_method,outf,pickle_protocol)
26746498Snate@binkert.org        pickle.dump(signature,outf,pickle_protocol)
26756498Snate@binkert.org        pickle.dump(self.lr_action,outf,pickle_protocol)
26766498Snate@binkert.org        pickle.dump(self.lr_goto,outf,pickle_protocol)
26776498Snate@binkert.org
26786498Snate@binkert.org        outp = []
26796498Snate@binkert.org        for p in self.lr_productions:
26806498Snate@binkert.org            if p.func:
26816498Snate@binkert.org                outp.append((p.str,p.name, p.len, p.func,p.file,p.line))
26826498Snate@binkert.org            else:
26836498Snate@binkert.org                outp.append((str(p),p.name,p.len,None,None,None))
26846498Snate@binkert.org        pickle.dump(outp,outf,pickle_protocol)
26856498Snate@binkert.org        outf.close()
26866498Snate@binkert.org
26876498Snate@binkert.org# -----------------------------------------------------------------------------
26886498Snate@binkert.org#                            === INTROSPECTION ===
26896498Snate@binkert.org#
26906498Snate@binkert.org# The following functions and classes are used to implement the PLY
26916498Snate@binkert.org# introspection features followed by the yacc() function itself.
26926498Snate@binkert.org# -----------------------------------------------------------------------------
26936498Snate@binkert.org
26946498Snate@binkert.org# -----------------------------------------------------------------------------
26956498Snate@binkert.org# get_caller_module_dict()
26966498Snate@binkert.org#
26976498Snate@binkert.org# This function returns a dictionary containing all of the symbols defined within
26986498Snate@binkert.org# a caller further down the call stack.  This is used to get the environment
26996498Snate@binkert.org# associated with the yacc() call if none was provided.
27006498Snate@binkert.org# -----------------------------------------------------------------------------
27016498Snate@binkert.org
27026498Snate@binkert.orgdef get_caller_module_dict(levels):
27036498Snate@binkert.org    try:
27046498Snate@binkert.org        raise RuntimeError
27056498Snate@binkert.org    except RuntimeError:
27066498Snate@binkert.org        e,b,t = sys.exc_info()
27076498Snate@binkert.org        f = t.tb_frame
27086498Snate@binkert.org        while levels > 0:
27096498Snate@binkert.org            f = f.f_back
27106498Snate@binkert.org            levels -= 1
27116498Snate@binkert.org        ldict = f.f_globals.copy()
27126498Snate@binkert.org        if f.f_globals != f.f_locals:
27136498Snate@binkert.org            ldict.update(f.f_locals)
27146498Snate@binkert.org
27156498Snate@binkert.org        return ldict
27166498Snate@binkert.org
27176498Snate@binkert.org# -----------------------------------------------------------------------------
27186498Snate@binkert.org# parse_grammar()
27196498Snate@binkert.org#
27206498Snate@binkert.org# This takes a raw grammar rule string and parses it into production data
27216498Snate@binkert.org# -----------------------------------------------------------------------------
27226498Snate@binkert.orgdef parse_grammar(doc,file,line):
27236498Snate@binkert.org    grammar = []
27246498Snate@binkert.org    # Split the doc string into lines
27256498Snate@binkert.org    pstrings = doc.splitlines()
27266498Snate@binkert.org    lastp = None
27276498Snate@binkert.org    dline = line
27286498Snate@binkert.org    for ps in pstrings:
27296498Snate@binkert.org        dline += 1
27306498Snate@binkert.org        p = ps.split()
27316498Snate@binkert.org        if not p: continue
27326498Snate@binkert.org        try:
27336498Snate@binkert.org            if p[0] == '|':
27346498Snate@binkert.org                # This is a continuation of a previous rule
27356498Snate@binkert.org                if not lastp:
27366498Snate@binkert.org                    raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline))
27376498Snate@binkert.org                prodname = lastp
27386498Snate@binkert.org                syms = p[1:]
27396498Snate@binkert.org            else:
27406498Snate@binkert.org                prodname = p[0]
27416498Snate@binkert.org                lastp = prodname
27426498Snate@binkert.org                syms   = p[2:]
27436498Snate@binkert.org                assign = p[1]
27446498Snate@binkert.org                if assign != ':' and assign != '::=':
27456498Snate@binkert.org                    raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline))
27466498Snate@binkert.org
27476498Snate@binkert.org            grammar.append((file,dline,prodname,syms))
27486498Snate@binkert.org        except SyntaxError:
27496498Snate@binkert.org            raise
27506498Snate@binkert.org        except Exception:
27516498Snate@binkert.org            raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip()))
27526498Snate@binkert.org
27536498Snate@binkert.org    return grammar
27546498Snate@binkert.org
27556498Snate@binkert.org# -----------------------------------------------------------------------------
27566498Snate@binkert.org# ParserReflect()
27576498Snate@binkert.org#
27586498Snate@binkert.org# This class represents information extracted for building a parser including
27596498Snate@binkert.org# start symbol, error function, tokens, precedence list, action functions,
27606498Snate@binkert.org# etc.
27616498Snate@binkert.org# -----------------------------------------------------------------------------
27626498Snate@binkert.orgclass ParserReflect(object):
27636498Snate@binkert.org    def __init__(self,pdict,log=None):
27646498Snate@binkert.org        self.pdict      = pdict
27656498Snate@binkert.org        self.start      = None
27666498Snate@binkert.org        self.error_func = None
27676498Snate@binkert.org        self.tokens     = None
27686498Snate@binkert.org        self.files      = {}
27696498Snate@binkert.org        self.grammar    = []
27706498Snate@binkert.org        self.error      = 0
27716498Snate@binkert.org
27726498Snate@binkert.org        if log is None:
27736498Snate@binkert.org            self.log = PlyLogger(sys.stderr)
27742632SN/A        else:
27756498Snate@binkert.org            self.log = log
27766498Snate@binkert.org
27776498Snate@binkert.org    # Get all of the basic information
27786498Snate@binkert.org    def get_all(self):
27796498Snate@binkert.org        self.get_start()
27806498Snate@binkert.org        self.get_error_func()
27816498Snate@binkert.org        self.get_tokens()
27826498Snate@binkert.org        self.get_precedence()
27836498Snate@binkert.org        self.get_pfunctions()
27846498Snate@binkert.org
27856498Snate@binkert.org    # Validate all of the information
27866498Snate@binkert.org    def validate_all(self):
27876498Snate@binkert.org        self.validate_start()
27886498Snate@binkert.org        self.validate_error_func()
27896498Snate@binkert.org        self.validate_tokens()
27906498Snate@binkert.org        self.validate_precedence()
27916498Snate@binkert.org        self.validate_pfunctions()
27926498Snate@binkert.org        self.validate_files()
27936498Snate@binkert.org        return self.error
27946498Snate@binkert.org
27956498Snate@binkert.org    # Compute a signature over the grammar
27966498Snate@binkert.org    def signature(self):
27976498Snate@binkert.org        try:
27986498Snate@binkert.org            from hashlib import md5
27996498Snate@binkert.org        except ImportError:
28006498Snate@binkert.org            from md5 import md5
28016498Snate@binkert.org        try:
28026498Snate@binkert.org            sig = md5()
28036498Snate@binkert.org            if self.start:
28046498Snate@binkert.org                sig.update(self.start.encode('latin-1'))
28056498Snate@binkert.org            if self.prec:
28066498Snate@binkert.org                sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1'))
28076498Snate@binkert.org            if self.tokens:
28086498Snate@binkert.org                sig.update(" ".join(self.tokens).encode('latin-1'))
28096498Snate@binkert.org            for f in self.pfuncs:
28106498Snate@binkert.org                if f[3]:
28116498Snate@binkert.org                    sig.update(f[3].encode('latin-1'))
28126498Snate@binkert.org        except (TypeError,ValueError):
28136498Snate@binkert.org            pass
28146498Snate@binkert.org        return sig.digest()
28156498Snate@binkert.org
28166498Snate@binkert.org    # -----------------------------------------------------------------------------
28176498Snate@binkert.org    # validate_file()
28186498Snate@binkert.org    #
28196498Snate@binkert.org    # This method checks to see if there are duplicated p_rulename() functions
28206498Snate@binkert.org    # in the parser module file.  Without this function, it is really easy for
28216498Snate@binkert.org    # users to make mistakes by cutting and pasting code fragments (and it's a real
28226498Snate@binkert.org    # bugger to try and figure out why the resulting parser doesn't work).  Therefore,
28236498Snate@binkert.org    # we just do a little regular expression pattern matching of def statements
28246498Snate@binkert.org    # to try and detect duplicates.
28256498Snate@binkert.org    # -----------------------------------------------------------------------------
28266498Snate@binkert.org
28276498Snate@binkert.org    def validate_files(self):
28286498Snate@binkert.org        # Match def p_funcname(
28296498Snate@binkert.org        fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
28306498Snate@binkert.org
28316498Snate@binkert.org        for filename in self.files.keys():
28326498Snate@binkert.org            base,ext = os.path.splitext(filename)
28336498Snate@binkert.org            if ext != '.py': return 1          # No idea. Assume it's okay.
28346498Snate@binkert.org
28356498Snate@binkert.org            try:
28366498Snate@binkert.org                f = open(filename)
28376498Snate@binkert.org                lines = f.readlines()
28386498Snate@binkert.org                f.close()
28396498Snate@binkert.org            except IOError:
28406498Snate@binkert.org                continue
28416498Snate@binkert.org
28426498Snate@binkert.org            counthash = { }
28436498Snate@binkert.org            for linen,l in enumerate(lines):
28446498Snate@binkert.org                linen += 1
28456498Snate@binkert.org                m = fre.match(l)
28466498Snate@binkert.org                if m:
28476498Snate@binkert.org                    name = m.group(1)
28486498Snate@binkert.org                    prev = counthash.get(name)
28496498Snate@binkert.org                    if not prev:
28506498Snate@binkert.org                        counthash[name] = linen
28516498Snate@binkert.org                    else:
28526498Snate@binkert.org                        self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev)
28536498Snate@binkert.org
28546498Snate@binkert.org    # Get the start symbol
28556498Snate@binkert.org    def get_start(self):
28566498Snate@binkert.org        self.start = self.pdict.get('start')
28576498Snate@binkert.org
28586498Snate@binkert.org    # Validate the start symbol
28596498Snate@binkert.org    def validate_start(self):
28606498Snate@binkert.org        if self.start is not None:
28616498Snate@binkert.org            if not isinstance(self.start,str):
28626498Snate@binkert.org                self.log.error("'start' must be a string")
28636498Snate@binkert.org
28646498Snate@binkert.org    # Look for error handler
28656498Snate@binkert.org    def get_error_func(self):
28666498Snate@binkert.org        self.error_func = self.pdict.get('p_error')
28676498Snate@binkert.org
28686498Snate@binkert.org    # Validate the error function
28696498Snate@binkert.org    def validate_error_func(self):
28706498Snate@binkert.org        if self.error_func:
28716498Snate@binkert.org            if isinstance(self.error_func,types.FunctionType):
28726498Snate@binkert.org                ismethod = 0
28736498Snate@binkert.org            elif isinstance(self.error_func, types.MethodType):
28746498Snate@binkert.org                ismethod = 1
28752632SN/A            else:
28766498Snate@binkert.org                self.log.error("'p_error' defined, but is not a function or method")
28776498Snate@binkert.org                self.error = 1
28786498Snate@binkert.org                return
28796498Snate@binkert.org
28806498Snate@binkert.org            eline = func_code(self.error_func).co_firstlineno
28816498Snate@binkert.org            efile = func_code(self.error_func).co_filename
28826498Snate@binkert.org            self.files[efile] = 1
28836498Snate@binkert.org
28846498Snate@binkert.org            if (func_code(self.error_func).co_argcount != 1+ismethod):
28856498Snate@binkert.org                self.log.error("%s:%d: p_error() requires 1 argument",efile,eline)
28866498Snate@binkert.org                self.error = 1
28876498Snate@binkert.org
28886498Snate@binkert.org    # Get the tokens map
28896498Snate@binkert.org    def get_tokens(self):
28906498Snate@binkert.org        tokens = self.pdict.get("tokens",None)
28916498Snate@binkert.org        if not tokens:
28926498Snate@binkert.org            self.log.error("No token list is defined")
28936498Snate@binkert.org            self.error = 1
28946498Snate@binkert.org            return
28956498Snate@binkert.org
28966498Snate@binkert.org        if not isinstance(tokens,(list, tuple)):
28976498Snate@binkert.org            self.log.error("tokens must be a list or tuple")
28986498Snate@binkert.org            self.error = 1
28996498Snate@binkert.org            return
29006498Snate@binkert.org
29016498Snate@binkert.org        if not tokens:
29026498Snate@binkert.org            self.log.error("tokens is empty")
29036498Snate@binkert.org            self.error = 1
29046498Snate@binkert.org            return
29056498Snate@binkert.org
29066498Snate@binkert.org        self.tokens = tokens
29076498Snate@binkert.org
29086498Snate@binkert.org    # Validate the tokens
29096498Snate@binkert.org    def validate_tokens(self):
29106498Snate@binkert.org        # Validate the tokens.
29116498Snate@binkert.org        if 'error' in self.tokens:
29126498Snate@binkert.org            self.log.error("Illegal token name 'error'. Is a reserved word")
29136498Snate@binkert.org            self.error = 1
29146498Snate@binkert.org            return
29156498Snate@binkert.org
29166498Snate@binkert.org        terminals = {}
29176498Snate@binkert.org        for n in self.tokens:
29186498Snate@binkert.org            if n in terminals:
29196498Snate@binkert.org                self.log.warning("Token '%s' multiply defined", n)
29206498Snate@binkert.org            terminals[n] = 1
29216498Snate@binkert.org
29226498Snate@binkert.org    # Get the precedence map (if any)
29236498Snate@binkert.org    def get_precedence(self):
29246498Snate@binkert.org        self.prec = self.pdict.get("precedence",None)
29256498Snate@binkert.org
29266498Snate@binkert.org    # Validate and parse the precedence map
29276498Snate@binkert.org    def validate_precedence(self):
29286498Snate@binkert.org        preclist = []
29296498Snate@binkert.org        if self.prec:
29306498Snate@binkert.org            if not isinstance(self.prec,(list,tuple)):
29316498Snate@binkert.org                self.log.error("precedence must be a list or tuple")
29326498Snate@binkert.org                self.error = 1
29336498Snate@binkert.org                return
29346498Snate@binkert.org            for level,p in enumerate(self.prec):
29356498Snate@binkert.org                if not isinstance(p,(list,tuple)):
29366498Snate@binkert.org                    self.log.error("Bad precedence table")
29376498Snate@binkert.org                    self.error = 1
29386498Snate@binkert.org                    return
29396498Snate@binkert.org
29406498Snate@binkert.org                if len(p) < 2:
29416498Snate@binkert.org                    self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p)
29426498Snate@binkert.org                    self.error = 1
29436498Snate@binkert.org                    return
29446498Snate@binkert.org                assoc = p[0]
29456498Snate@binkert.org                if not isinstance(assoc,str):
29466498Snate@binkert.org                    self.log.error("precedence associativity must be a string")
29476498Snate@binkert.org                    self.error = 1
29486498Snate@binkert.org                    return
29496498Snate@binkert.org                for term in p[1:]:
29506498Snate@binkert.org                    if not isinstance(term,str):
29516498Snate@binkert.org                        self.log.error("precedence items must be strings")
29526498Snate@binkert.org                        self.error = 1
29536498Snate@binkert.org                        return
29546498Snate@binkert.org                    preclist.append((term,assoc,level+1))
29556498Snate@binkert.org        self.preclist = preclist
29566498Snate@binkert.org
29576498Snate@binkert.org    # Get all p_functions from the grammar
29586498Snate@binkert.org    def get_pfunctions(self):
29596498Snate@binkert.org        p_functions = []
29606498Snate@binkert.org        for name, item in self.pdict.items():
29616498Snate@binkert.org            if name[:2] != 'p_': continue
29626498Snate@binkert.org            if name == 'p_error': continue
29636498Snate@binkert.org            if isinstance(item,(types.FunctionType,types.MethodType)):
29646498Snate@binkert.org                line = func_code(item).co_firstlineno
29656498Snate@binkert.org                file = func_code(item).co_filename
29666498Snate@binkert.org                p_functions.append((line,file,name,item.__doc__))
29676498Snate@binkert.org
29686498Snate@binkert.org        # Sort all of the actions by line number
29696498Snate@binkert.org        p_functions.sort()
29706498Snate@binkert.org        self.pfuncs = p_functions
29716498Snate@binkert.org
29726498Snate@binkert.org
29736498Snate@binkert.org    # Validate all of the p_functions
29746498Snate@binkert.org    def validate_pfunctions(self):
29756498Snate@binkert.org        grammar = []
29766498Snate@binkert.org        # Check for non-empty symbols
29776498Snate@binkert.org        if len(self.pfuncs) == 0:
29786498Snate@binkert.org            self.log.error("no rules of the form p_rulename are defined")
29796498Snate@binkert.org            self.error = 1
29806498Snate@binkert.org            return
29816498Snate@binkert.org
29826498Snate@binkert.org        for line, file, name, doc in self.pfuncs:
29836498Snate@binkert.org            func = self.pdict[name]
29846498Snate@binkert.org            if isinstance(func, types.MethodType):
29856498Snate@binkert.org                reqargs = 2
29866498Snate@binkert.org            else:
29876498Snate@binkert.org                reqargs = 1
29886498Snate@binkert.org            if func_code(func).co_argcount > reqargs:
29896498Snate@binkert.org                self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__)
29906498Snate@binkert.org                self.error = 1
29916498Snate@binkert.org            elif func_code(func).co_argcount < reqargs:
29926498Snate@binkert.org                self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__)
29936498Snate@binkert.org                self.error = 1
29946498Snate@binkert.org            elif not func.__doc__:
29956498Snate@binkert.org                self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__)
29966498Snate@binkert.org            else:
29976498Snate@binkert.org                try:
29986498Snate@binkert.org                    parsed_g = parse_grammar(doc,file,line)
29996498Snate@binkert.org                    for g in parsed_g:
30006498Snate@binkert.org                        grammar.append((name, g))
30016498Snate@binkert.org                except SyntaxError:
30026498Snate@binkert.org                    e = sys.exc_info()[1]
30036498Snate@binkert.org                    self.log.error(str(e))
30046498Snate@binkert.org                    self.error = 1
30056498Snate@binkert.org
30066498Snate@binkert.org                # Looks like a valid grammar rule
30076498Snate@binkert.org                # Mark the file in which defined.
30086498Snate@binkert.org                self.files[file] = 1
30096498Snate@binkert.org
30106498Snate@binkert.org        # Secondary validation step that looks for p_ definitions that are not functions
30116498Snate@binkert.org        # or functions that look like they might be grammar rules.
30126498Snate@binkert.org
30136498Snate@binkert.org        for n,v in self.pdict.items():
30146498Snate@binkert.org            if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue
30156498Snate@binkert.org            if n[0:2] == 't_': continue
30166498Snate@binkert.org            if n[0:2] == 'p_' and n != 'p_error':
30176498Snate@binkert.org                self.log.warning("'%s' not defined as a function", n)
30186498Snate@binkert.org            if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or
30196498Snate@binkert.org                (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)):
30206498Snate@binkert.org                try:
30216498Snate@binkert.org                    doc = v.__doc__.split(" ")
30226498Snate@binkert.org                    if doc[1] == ':':
30236498Snate@binkert.org                        self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix",
30246498Snate@binkert.org                                         func_code(v).co_filename, func_code(v).co_firstlineno,n)
30256498Snate@binkert.org                except Exception:
30266498Snate@binkert.org                    pass
30276498Snate@binkert.org
30286498Snate@binkert.org        self.grammar = grammar
30294479Sbinkertn@umich.edu
30302632SN/A# -----------------------------------------------------------------------------
30312632SN/A# yacc(module)
30322632SN/A#
30336498Snate@binkert.org# Build a parser
30342632SN/A# -----------------------------------------------------------------------------
30352632SN/A
30366498Snate@binkert.orgdef yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
30376498Snate@binkert.org         check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='',
30386498Snate@binkert.org         debuglog=None, errorlog = None, picklefile=None):
30396498Snate@binkert.org
30406498Snate@binkert.org    global parse                 # Reference to the parsing method of the last built parser
30416498Snate@binkert.org
30426498Snate@binkert.org    # If pickling is enabled, table files are not created
30436498Snate@binkert.org
30446498Snate@binkert.org    if picklefile:
30456498Snate@binkert.org        write_tables = 0
30466498Snate@binkert.org
30476498Snate@binkert.org    if errorlog is None:
30486498Snate@binkert.org        errorlog = PlyLogger(sys.stderr)
30496498Snate@binkert.org
30506498Snate@binkert.org    # Get the module dictionary used for the parser
30512632SN/A    if module:
30526498Snate@binkert.org        _items = [(k,getattr(module,k)) for k in dir(module)]
30536498Snate@binkert.org        pdict = dict(_items)
30546498Snate@binkert.org    else:
30556498Snate@binkert.org        pdict = get_caller_module_dict(2)
30566498Snate@binkert.org
30576498Snate@binkert.org    # Collect parser information from the dictionary
30586498Snate@binkert.org    pinfo = ParserReflect(pdict,log=errorlog)
30596498Snate@binkert.org    pinfo.get_all()
30606498Snate@binkert.org
30616498Snate@binkert.org    if pinfo.error:
30626498Snate@binkert.org        raise YaccError("Unable to build parser")
30636498Snate@binkert.org
30646498Snate@binkert.org    # Check signature against table files (if any)
30656498Snate@binkert.org    signature = pinfo.signature()
30666498Snate@binkert.org
30676498Snate@binkert.org    # Read the tables
30686498Snate@binkert.org    try:
30696498Snate@binkert.org        lr = LRTable()
30706498Snate@binkert.org        if picklefile:
30716498Snate@binkert.org            read_signature = lr.read_pickle(picklefile)
30724479Sbinkertn@umich.edu        else:
30736498Snate@binkert.org            read_signature = lr.read_table(tabmodule)
30746498Snate@binkert.org        if optimize or (read_signature == signature):
30756498Snate@binkert.org            try:
30766498Snate@binkert.org                lr.bind_callables(pinfo.pdict)
30776498Snate@binkert.org                parser = LRParser(lr,pinfo.error_func)
30786498Snate@binkert.org                parse = parser.parse
30796498Snate@binkert.org                return parser
30806498Snate@binkert.org            except Exception:
30816498Snate@binkert.org                e = sys.exc_info()[1]
30826498Snate@binkert.org                errorlog.warning("There was a problem loading the table file: %s", repr(e))
30836498Snate@binkert.org    except VersionError:
30846498Snate@binkert.org        e = sys.exc_info()
30856498Snate@binkert.org        errorlog.warning(str(e))
30866498Snate@binkert.org    except Exception:
30876498Snate@binkert.org        pass
30886498Snate@binkert.org
30896498Snate@binkert.org    if debuglog is None:
30906498Snate@binkert.org        if debug:
30916498Snate@binkert.org            debuglog = PlyLogger(open(debugfile,"w"))
30926498Snate@binkert.org        else:
30936498Snate@binkert.org            debuglog = NullLogger()
30946498Snate@binkert.org
30956498Snate@binkert.org    debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__)
30966498Snate@binkert.org
30976498Snate@binkert.org
30986498Snate@binkert.org    errors = 0
30996498Snate@binkert.org
31006498Snate@binkert.org    # Validate the parser information
31016498Snate@binkert.org    if pinfo.validate_all():
31026498Snate@binkert.org        raise YaccError("Unable to build parser")
31036498Snate@binkert.org
31046498Snate@binkert.org    if not pinfo.error_func:
31056498Snate@binkert.org        errorlog.warning("no p_error() function is defined")
31066498Snate@binkert.org
31076498Snate@binkert.org    # Create a grammar object
31086498Snate@binkert.org    grammar = Grammar(pinfo.tokens)
31096498Snate@binkert.org
31106498Snate@binkert.org    # Set precedence level for terminals
31116498Snate@binkert.org    for term, assoc, level in pinfo.preclist:
31122632SN/A        try:
31136498Snate@binkert.org            grammar.set_precedence(term,assoc,level)
31146498Snate@binkert.org        except GrammarError:
31156498Snate@binkert.org            e = sys.exc_info()[1]
31166498Snate@binkert.org            errorlog.warning("%s",str(e))
31176498Snate@binkert.org
31186498Snate@binkert.org    # Add productions to the grammar
31196498Snate@binkert.org    for funcname, gram in pinfo.grammar:
31206498Snate@binkert.org        file, line, prodname, syms = gram
31216498Snate@binkert.org        try:
31226498Snate@binkert.org            grammar.add_production(prodname,syms,funcname,file,line)
31236498Snate@binkert.org        except GrammarError:
31246498Snate@binkert.org            e = sys.exc_info()[1]
31256498Snate@binkert.org            errorlog.error("%s",str(e))
31266498Snate@binkert.org            errors = 1
31276498Snate@binkert.org
31286498Snate@binkert.org    # Set the grammar start symbols
31296498Snate@binkert.org    try:
31306498Snate@binkert.org        if start is None:
31316498Snate@binkert.org            grammar.set_start(pinfo.start)
31324479Sbinkertn@umich.edu        else:
31336498Snate@binkert.org            grammar.set_start(start)
31346498Snate@binkert.org    except GrammarError:
31356498Snate@binkert.org        e = sys.exc_info()[1]
31366498Snate@binkert.org        errorlog.error(str(e))
31376498Snate@binkert.org        errors = 1
31386498Snate@binkert.org
31396498Snate@binkert.org    if errors:
31406498Snate@binkert.org        raise YaccError("Unable to build parser")
31416498Snate@binkert.org
31426498Snate@binkert.org    # Verify the grammar structure
31436498Snate@binkert.org    undefined_symbols = grammar.undefined_symbols()
31446498Snate@binkert.org    for sym, prod in undefined_symbols:
31456498Snate@binkert.org        errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym)
31466498Snate@binkert.org        errors = 1
31476498Snate@binkert.org
31486498Snate@binkert.org    unused_terminals = grammar.unused_terminals()
31496498Snate@binkert.org    if unused_terminals:
31506498Snate@binkert.org        debuglog.info("")
31516498Snate@binkert.org        debuglog.info("Unused terminals:")
31526498Snate@binkert.org        debuglog.info("")
31536498Snate@binkert.org        for term in unused_terminals:
31546498Snate@binkert.org            errorlog.warning("Token '%s' defined, but not used", term)
31556498Snate@binkert.org            debuglog.info("    %s", term)
31566498Snate@binkert.org
31576498Snate@binkert.org    # Print out all productions to the debug log
31586498Snate@binkert.org    if debug:
31596498Snate@binkert.org        debuglog.info("")
31606498Snate@binkert.org        debuglog.info("Grammar")
31616498Snate@binkert.org        debuglog.info("")
31626498Snate@binkert.org        for n,p in enumerate(grammar.Productions):
31636498Snate@binkert.org            debuglog.info("Rule %-5d %s", n, p)
31646498Snate@binkert.org
31656498Snate@binkert.org    # Find unused non-terminals
31666498Snate@binkert.org    unused_rules = grammar.unused_rules()
31676498Snate@binkert.org    for prod in unused_rules:
31686498Snate@binkert.org        errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name)
31696498Snate@binkert.org
31706498Snate@binkert.org    if len(unused_terminals) == 1:
31716498Snate@binkert.org        errorlog.warning("There is 1 unused token")
31726498Snate@binkert.org    if len(unused_terminals) > 1:
31736498Snate@binkert.org        errorlog.warning("There are %d unused tokens", len(unused_terminals))
31746498Snate@binkert.org
31756498Snate@binkert.org    if len(unused_rules) == 1:
31766498Snate@binkert.org        errorlog.warning("There is 1 unused rule")
31776498Snate@binkert.org    if len(unused_rules) > 1:
31786498Snate@binkert.org        errorlog.warning("There are %d unused rules", len(unused_rules))
31796498Snate@binkert.org
31806498Snate@binkert.org    if debug:
31816498Snate@binkert.org        debuglog.info("")
31826498Snate@binkert.org        debuglog.info("Terminals, with rules where they appear")
31836498Snate@binkert.org        debuglog.info("")
31846498Snate@binkert.org        terms = list(grammar.Terminals)
31856498Snate@binkert.org        terms.sort()
31866498Snate@binkert.org        for term in terms:
31876498Snate@binkert.org            debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]]))
31886498Snate@binkert.org
31896498Snate@binkert.org        debuglog.info("")
31906498Snate@binkert.org        debuglog.info("Nonterminals, with rules where they appear")
31916498Snate@binkert.org        debuglog.info("")
31926498Snate@binkert.org        nonterms = list(grammar.Nonterminals)
31936498Snate@binkert.org        nonterms.sort()
31946498Snate@binkert.org        for nonterm in nonterms:
31956498Snate@binkert.org            debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]]))
31966498Snate@binkert.org        debuglog.info("")
31976498Snate@binkert.org
31986498Snate@binkert.org    if check_recursion:
31996498Snate@binkert.org        unreachable = grammar.find_unreachable()
32006498Snate@binkert.org        for u in unreachable:
32016498Snate@binkert.org            errorlog.warning("Symbol '%s' is unreachable",u)
32026498Snate@binkert.org
32036498Snate@binkert.org        infinite = grammar.infinite_cycles()
32046498Snate@binkert.org        for inf in infinite:
32056498Snate@binkert.org            errorlog.error("Infinite recursion detected for symbol '%s'", inf)
32066498Snate@binkert.org            errors = 1
32076498Snate@binkert.org
32086498Snate@binkert.org    unused_prec = grammar.unused_precedence()
32096498Snate@binkert.org    for term, assoc in unused_prec:
32106498Snate@binkert.org        errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term)
32116498Snate@binkert.org        errors = 1
32126498Snate@binkert.org
32136498Snate@binkert.org    if errors:
32146498Snate@binkert.org        raise YaccError("Unable to build parser")
32156498Snate@binkert.org
32166498Snate@binkert.org    # Run the LRGeneratedTable on the grammar
32176498Snate@binkert.org    if debug:
32186498Snate@binkert.org        errorlog.debug("Generating %s tables", method)
32196498Snate@binkert.org
32206498Snate@binkert.org    lr = LRGeneratedTable(grammar,method,debuglog)
32216498Snate@binkert.org
32226498Snate@binkert.org    if debug:
32236498Snate@binkert.org        num_sr = len(lr.sr_conflicts)
32246498Snate@binkert.org
32256498Snate@binkert.org        # Report shift/reduce and reduce/reduce conflicts
32266498Snate@binkert.org        if num_sr == 1:
32276498Snate@binkert.org            errorlog.warning("1 shift/reduce conflict")
32286498Snate@binkert.org        elif num_sr > 1:
32296498Snate@binkert.org            errorlog.warning("%d shift/reduce conflicts", num_sr)
32306498Snate@binkert.org
32316498Snate@binkert.org        num_rr = len(lr.rr_conflicts)
32326498Snate@binkert.org        if num_rr == 1:
32336498Snate@binkert.org            errorlog.warning("1 reduce/reduce conflict")
32346498Snate@binkert.org        elif num_rr > 1:
32356498Snate@binkert.org            errorlog.warning("%d reduce/reduce conflicts", num_rr)
32366498Snate@binkert.org
32376498Snate@binkert.org    # Write out conflicts to the output file
32386498Snate@binkert.org    if debug and (lr.sr_conflicts or lr.rr_conflicts):
32396498Snate@binkert.org        debuglog.warning("")
32406498Snate@binkert.org        debuglog.warning("Conflicts:")
32416498Snate@binkert.org        debuglog.warning("")
32426498Snate@binkert.org
32436498Snate@binkert.org        for state, tok, resolution in lr.sr_conflicts:
32446498Snate@binkert.org            debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s",  tok, state, resolution)
32456498Snate@binkert.org
32466498Snate@binkert.org        already_reported = {}
32476498Snate@binkert.org        for state, rule, rejected in lr.rr_conflicts:
32486498Snate@binkert.org            if (state,id(rule),id(rejected)) in already_reported:
32496498Snate@binkert.org                continue
32506498Snate@binkert.org            debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
32516498Snate@binkert.org            debuglog.warning("rejected rule (%s) in state %d", rejected,state)
32526498Snate@binkert.org            errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
32536498Snate@binkert.org            errorlog.warning("rejected rule (%s) in state %d", rejected, state)
32546498Snate@binkert.org            already_reported[state,id(rule),id(rejected)] = 1
32556498Snate@binkert.org
32566498Snate@binkert.org        warned_never = []
32576498Snate@binkert.org        for state, rule, rejected in lr.rr_conflicts:
32586498Snate@binkert.org            if not rejected.reduced and (rejected not in warned_never):
32596498Snate@binkert.org                debuglog.warning("Rule (%s) is never reduced", rejected)
32606498Snate@binkert.org                errorlog.warning("Rule (%s) is never reduced", rejected)
32616498Snate@binkert.org                warned_never.append(rejected)
32626498Snate@binkert.org
32636498Snate@binkert.org    # Write the table file if requested
32646498Snate@binkert.org    if write_tables:
32656498Snate@binkert.org        lr.write_table(tabmodule,outputdir,signature)
32666498Snate@binkert.org
32676498Snate@binkert.org    # Write a pickled version of the tables
32686498Snate@binkert.org    if picklefile:
32696498Snate@binkert.org        lr.pickle_table(picklefile,signature)
32706498Snate@binkert.org
32716498Snate@binkert.org    # Build the parser
32726498Snate@binkert.org    lr.bind_callables(pinfo.pdict)
32736498Snate@binkert.org    parser = LRParser(lr,pinfo.error_func)
32746498Snate@binkert.org
32756498Snate@binkert.org    parse = parser.parse
32766498Snate@binkert.org    return parser
3277