16498Snate@binkert.org# -----------------------------------------------------------------------------
22632SN/A# ply: lex.py
32632SN/A#
46498Snate@binkert.org# Copyright (C) 2001-2009,
56498Snate@binkert.org# David M. Beazley (Dabeaz LLC)
66498Snate@binkert.org# All rights reserved.
72632SN/A#
86498Snate@binkert.org# Redistribution and use in source and binary forms, with or without
96498Snate@binkert.org# modification, are permitted provided that the following conditions are
106498Snate@binkert.org# met:
116498Snate@binkert.org#
126498Snate@binkert.org# * Redistributions of source code must retain the above copyright notice,
136498Snate@binkert.org#   this list of conditions and the following disclaimer.
146498Snate@binkert.org# * Redistributions in binary form must reproduce the above copyright notice,
156498Snate@binkert.org#   this list of conditions and the following disclaimer in the documentation
166498Snate@binkert.org#   and/or other materials provided with the distribution.
176498Snate@binkert.org# * Neither the name of the David Beazley or Dabeaz LLC may be used to
186498Snate@binkert.org#   endorse or promote products derived from this software without
196498Snate@binkert.org#  specific prior written permission.
202632SN/A#
216498Snate@binkert.org# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
226498Snate@binkert.org# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
236498Snate@binkert.org# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
246498Snate@binkert.org# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
256498Snate@binkert.org# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
266498Snate@binkert.org# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
276498Snate@binkert.org# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
286498Snate@binkert.org# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
296498Snate@binkert.org# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
306498Snate@binkert.org# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
316498Snate@binkert.org# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
326498Snate@binkert.org# -----------------------------------------------------------------------------
332632SN/A
346498Snate@binkert.org__version__    = "3.2"
356498Snate@binkert.org__tabversion__ = "3.2"       # Version of table file used
362632SN/A
376498Snate@binkert.orgimport re, sys, types, copy, os
382632SN/A
396498Snate@binkert.org# This tuple contains known string types
406498Snate@binkert.orgtry:
416498Snate@binkert.org    # Python 2.6
426498Snate@binkert.org    StringTypes = (types.StringType, types.UnicodeType)
436498Snate@binkert.orgexcept AttributeError:
446498Snate@binkert.org    # Python 3.0
456498Snate@binkert.org    StringTypes = (str, bytes)
466498Snate@binkert.org
476498Snate@binkert.org# Extract the code attribute of a function. Different implementations
486498Snate@binkert.org# are for Python 2/3 compatibility.
496498Snate@binkert.org
506498Snate@binkert.orgif sys.version_info[0] < 3:
516498Snate@binkert.org    def func_code(f):
526498Snate@binkert.org        return f.func_code
536498Snate@binkert.orgelse:
546498Snate@binkert.org    def func_code(f):
556498Snate@binkert.org        return f.__code__
566498Snate@binkert.org
576498Snate@binkert.org# This regular expression is used to match valid token names
584479Sbinkertn@umich.edu_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
592632SN/A
604479Sbinkertn@umich.edu# Exception thrown when invalid token encountered and no default error
614479Sbinkertn@umich.edu# handler is defined.
626498Snate@binkert.org
632632SN/Aclass LexError(Exception):
642632SN/A    def __init__(self,message,s):
652632SN/A         self.args = (message,)
662632SN/A         self.text = s
672632SN/A
686498Snate@binkert.org# Token class.  This class is used to represent the tokens produced.
694479Sbinkertn@umich.educlass LexToken(object):
702632SN/A    def __str__(self):
714479Sbinkertn@umich.edu        return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos)
722632SN/A    def __repr__(self):
732632SN/A        return str(self)
746498Snate@binkert.org
756498Snate@binkert.org# This object is a stand-in for a logging object created by the
766498Snate@binkert.org# logging module.
776498Snate@binkert.org
786498Snate@binkert.orgclass PlyLogger(object):
796498Snate@binkert.org    def __init__(self,f):
806498Snate@binkert.org        self.f = f
816498Snate@binkert.org    def critical(self,msg,*args,**kwargs):
826498Snate@binkert.org        self.f.write((msg % args) + "\n")
836498Snate@binkert.org
846498Snate@binkert.org    def warning(self,msg,*args,**kwargs):
856498Snate@binkert.org        self.f.write("WARNING: "+ (msg % args) + "\n")
866498Snate@binkert.org
876498Snate@binkert.org    def error(self,msg,*args,**kwargs):
886498Snate@binkert.org        self.f.write("ERROR: " + (msg % args) + "\n")
896498Snate@binkert.org
906498Snate@binkert.org    info = critical
916498Snate@binkert.org    debug = critical
926498Snate@binkert.org
936498Snate@binkert.org# Null logger is used when no output is generated. Does nothing.
946498Snate@binkert.orgclass NullLogger(object):
956498Snate@binkert.org    def __getattribute__(self,name):
966498Snate@binkert.org        return self
976498Snate@binkert.org    def __call__(self,*args,**kwargs):
986498Snate@binkert.org        return self
992632SN/A
1002632SN/A# -----------------------------------------------------------------------------
1016498Snate@binkert.org#                        === Lexing Engine ===
1022632SN/A#
1036498Snate@binkert.org# The following Lexer class implements the lexer runtime.   There are only
1046498Snate@binkert.org# a few public methods and attributes:
1054479Sbinkertn@umich.edu#
1062632SN/A#    input()          -  Store a new string in the lexer
1072632SN/A#    token()          -  Get the next token
1086498Snate@binkert.org#    clone()          -  Clone the lexer
1096498Snate@binkert.org#
1106498Snate@binkert.org#    lineno           -  Current line number
1116498Snate@binkert.org#    lexpos           -  Current position in the input string
1122632SN/A# -----------------------------------------------------------------------------
1132632SN/A
1142632SN/Aclass Lexer:
1152632SN/A    def __init__(self):
1164479Sbinkertn@umich.edu        self.lexre = None             # Master regular expression. This is a list of
1174479Sbinkertn@umich.edu                                      # tuples (re,findex) where re is a compiled
1184479Sbinkertn@umich.edu                                      # regular expression and findex is a list
1194479Sbinkertn@umich.edu                                      # mapping regex group numbers to rules
1204479Sbinkertn@umich.edu        self.lexretext = None         # Current regular expression strings
1214479Sbinkertn@umich.edu        self.lexstatere = {}          # Dictionary mapping lexer states to master regexs
1224479Sbinkertn@umich.edu        self.lexstateretext = {}      # Dictionary mapping lexer states to regex strings
1236498Snate@binkert.org        self.lexstaterenames = {}     # Dictionary mapping lexer states to symbol names
1244479Sbinkertn@umich.edu        self.lexstate = "INITIAL"     # Current lexer state
1254479Sbinkertn@umich.edu        self.lexstatestack = []       # Stack of lexer states
1264479Sbinkertn@umich.edu        self.lexstateinfo = None      # State information
1274479Sbinkertn@umich.edu        self.lexstateignore = {}      # Dictionary of ignored characters for each state
1284479Sbinkertn@umich.edu        self.lexstateerrorf = {}      # Dictionary of error functions for each state
1294479Sbinkertn@umich.edu        self.lexreflags = 0           # Optional re compile flags
1304479Sbinkertn@umich.edu        self.lexdata = None           # Actual input data (as a string)
1314479Sbinkertn@umich.edu        self.lexpos = 0               # Current position in input text
1324479Sbinkertn@umich.edu        self.lexlen = 0               # Length of the input text
1334479Sbinkertn@umich.edu        self.lexerrorf = None         # Error rule (if any)
1344479Sbinkertn@umich.edu        self.lextokens = None         # List of valid tokens
1354479Sbinkertn@umich.edu        self.lexignore = ""           # Ignored characters
1364479Sbinkertn@umich.edu        self.lexliterals = ""         # Literal characters that can be passed through
1374479Sbinkertn@umich.edu        self.lexmodule = None         # Module
1384479Sbinkertn@umich.edu        self.lineno = 1               # Current line number
1394479Sbinkertn@umich.edu        self.lexoptimize = 0          # Optimized mode
1402632SN/A
1414479Sbinkertn@umich.edu    def clone(self,object=None):
1426498Snate@binkert.org        c = copy.copy(self)
1434479Sbinkertn@umich.edu
1444479Sbinkertn@umich.edu        # If the object parameter has been supplied, it means we are attaching the
1454479Sbinkertn@umich.edu        # lexer to a new object.  In this case, we have to rebind all methods in
1464479Sbinkertn@umich.edu        # the lexstatere and lexstateerrorf tables.
1474479Sbinkertn@umich.edu
1484479Sbinkertn@umich.edu        if object:
1494479Sbinkertn@umich.edu            newtab = { }
1504479Sbinkertn@umich.edu            for key, ritem in self.lexstatere.items():
1514479Sbinkertn@umich.edu                newre = []
1524479Sbinkertn@umich.edu                for cre, findex in ritem:
1534479Sbinkertn@umich.edu                     newfindex = []
1544479Sbinkertn@umich.edu                     for f in findex:
1554479Sbinkertn@umich.edu                         if not f or not f[0]:
1564479Sbinkertn@umich.edu                             newfindex.append(f)
1574479Sbinkertn@umich.edu                             continue
1584479Sbinkertn@umich.edu                         newfindex.append((getattr(object,f[0].__name__),f[1]))
1594479Sbinkertn@umich.edu                newre.append((cre,newfindex))
1604479Sbinkertn@umich.edu                newtab[key] = newre
1614479Sbinkertn@umich.edu            c.lexstatere = newtab
1624479Sbinkertn@umich.edu            c.lexstateerrorf = { }
1634479Sbinkertn@umich.edu            for key, ef in self.lexstateerrorf.items():
1644479Sbinkertn@umich.edu                c.lexstateerrorf[key] = getattr(object,ef.__name__)
1654479Sbinkertn@umich.edu            c.lexmodule = object
1664479Sbinkertn@umich.edu        return c
1674479Sbinkertn@umich.edu
1684479Sbinkertn@umich.edu    # ------------------------------------------------------------
1694479Sbinkertn@umich.edu    # writetab() - Write lexer information to a table file
1704479Sbinkertn@umich.edu    # ------------------------------------------------------------
1716498Snate@binkert.org    def writetab(self,tabfile,outputdir=""):
1726498Snate@binkert.org        if isinstance(tabfile,types.ModuleType):
1736498Snate@binkert.org            return
1746498Snate@binkert.org        basetabfilename = tabfile.split(".")[-1]
1756498Snate@binkert.org        filename = os.path.join(outputdir,basetabfilename)+".py"
1766498Snate@binkert.org        tf = open(filename,"w")
1774479Sbinkertn@umich.edu        tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__))
1786498Snate@binkert.org        tf.write("_tabversion   = %s\n" % repr(__version__))
1794479Sbinkertn@umich.edu        tf.write("_lextokens    = %s\n" % repr(self.lextokens))
1804479Sbinkertn@umich.edu        tf.write("_lexreflags   = %s\n" % repr(self.lexreflags))
1814479Sbinkertn@umich.edu        tf.write("_lexliterals  = %s\n" % repr(self.lexliterals))
1824479Sbinkertn@umich.edu        tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo))
1834479Sbinkertn@umich.edu
1844479Sbinkertn@umich.edu        tabre = { }
1856498Snate@binkert.org        # Collect all functions in the initial state
1866498Snate@binkert.org        initial = self.lexstatere["INITIAL"]
1876498Snate@binkert.org        initialfuncs = []
1886498Snate@binkert.org        for part in initial:
1896498Snate@binkert.org            for f in part[1]:
1906498Snate@binkert.org                if f and f[0]:
1916498Snate@binkert.org                    initialfuncs.append(f)
1926498Snate@binkert.org
1934479Sbinkertn@umich.edu        for key, lre in self.lexstatere.items():
1944479Sbinkertn@umich.edu             titem = []
1954479Sbinkertn@umich.edu             for i in range(len(lre)):
1966498Snate@binkert.org                  titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1],self.lexstaterenames[key][i])))
1974479Sbinkertn@umich.edu             tabre[key] = titem
1984479Sbinkertn@umich.edu
1994479Sbinkertn@umich.edu        tf.write("_lexstatere   = %s\n" % repr(tabre))
2004479Sbinkertn@umich.edu        tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore))
2014479Sbinkertn@umich.edu
2024479Sbinkertn@umich.edu        taberr = { }
2034479Sbinkertn@umich.edu        for key, ef in self.lexstateerrorf.items():
2044479Sbinkertn@umich.edu             if ef:
2054479Sbinkertn@umich.edu                  taberr[key] = ef.__name__
2064479Sbinkertn@umich.edu             else:
2074479Sbinkertn@umich.edu                  taberr[key] = None
2084479Sbinkertn@umich.edu        tf.write("_lexstateerrorf = %s\n" % repr(taberr))
2094479Sbinkertn@umich.edu        tf.close()
2104479Sbinkertn@umich.edu
2114479Sbinkertn@umich.edu    # ------------------------------------------------------------
2124479Sbinkertn@umich.edu    # readtab() - Read lexer information from a tab file
2134479Sbinkertn@umich.edu    # ------------------------------------------------------------
2144479Sbinkertn@umich.edu    def readtab(self,tabfile,fdict):
2156498Snate@binkert.org        if isinstance(tabfile,types.ModuleType):
2166498Snate@binkert.org            lextab = tabfile
2176498Snate@binkert.org        else:
2186498Snate@binkert.org            if sys.version_info[0] < 3:
2196498Snate@binkert.org                exec("import %s as lextab" % tabfile)
2206498Snate@binkert.org            else:
2216498Snate@binkert.org                env = { }
2226498Snate@binkert.org                exec("import %s as lextab" % tabfile, env,env)
2236498Snate@binkert.org                lextab = env['lextab']
2246498Snate@binkert.org
2256498Snate@binkert.org        if getattr(lextab,"_tabversion","0.0") != __version__:
2266498Snate@binkert.org            raise ImportError("Inconsistent PLY version")
2276498Snate@binkert.org
2284479Sbinkertn@umich.edu        self.lextokens      = lextab._lextokens
2294479Sbinkertn@umich.edu        self.lexreflags     = lextab._lexreflags
2304479Sbinkertn@umich.edu        self.lexliterals    = lextab._lexliterals
2314479Sbinkertn@umich.edu        self.lexstateinfo   = lextab._lexstateinfo
2324479Sbinkertn@umich.edu        self.lexstateignore = lextab._lexstateignore
2334479Sbinkertn@umich.edu        self.lexstatere     = { }
2344479Sbinkertn@umich.edu        self.lexstateretext = { }
2354479Sbinkertn@umich.edu        for key,lre in lextab._lexstatere.items():
2364479Sbinkertn@umich.edu             titem = []
2374479Sbinkertn@umich.edu             txtitem = []
2384479Sbinkertn@umich.edu             for i in range(len(lre)):
2394479Sbinkertn@umich.edu                  titem.append((re.compile(lre[i][0],lextab._lexreflags),_names_to_funcs(lre[i][1],fdict)))
2404479Sbinkertn@umich.edu                  txtitem.append(lre[i][0])
2414479Sbinkertn@umich.edu             self.lexstatere[key] = titem
2424479Sbinkertn@umich.edu             self.lexstateretext[key] = txtitem
2434479Sbinkertn@umich.edu        self.lexstateerrorf = { }
2444479Sbinkertn@umich.edu        for key,ef in lextab._lexstateerrorf.items():
2454479Sbinkertn@umich.edu             self.lexstateerrorf[key] = fdict[ef]
2464479Sbinkertn@umich.edu        self.begin('INITIAL')
2472632SN/A
2482632SN/A    # ------------------------------------------------------------
2492632SN/A    # input() - Push a new string into the lexer
2502632SN/A    # ------------------------------------------------------------
2512632SN/A    def input(self,s):
2526498Snate@binkert.org        # Pull off the first character to see if s looks like a string
2536498Snate@binkert.org        c = s[:1]
2546498Snate@binkert.org        if not isinstance(c,StringTypes):
2556498Snate@binkert.org            raise ValueError("Expected a string")
2562632SN/A        self.lexdata = s
2572632SN/A        self.lexpos = 0
2582632SN/A        self.lexlen = len(s)
2592632SN/A
2602632SN/A    # ------------------------------------------------------------
2614479Sbinkertn@umich.edu    # begin() - Changes the lexing state
2622632SN/A    # ------------------------------------------------------------
2634479Sbinkertn@umich.edu    def begin(self,state):
2646498Snate@binkert.org        if not state in self.lexstatere:
2656498Snate@binkert.org            raise ValueError("Undefined state")
2664479Sbinkertn@umich.edu        self.lexre = self.lexstatere[state]
2674479Sbinkertn@umich.edu        self.lexretext = self.lexstateretext[state]
2684479Sbinkertn@umich.edu        self.lexignore = self.lexstateignore.get(state,"")
2694479Sbinkertn@umich.edu        self.lexerrorf = self.lexstateerrorf.get(state,None)
2704479Sbinkertn@umich.edu        self.lexstate = state
2714479Sbinkertn@umich.edu
2724479Sbinkertn@umich.edu    # ------------------------------------------------------------
2734479Sbinkertn@umich.edu    # push_state() - Changes the lexing state and saves old on stack
2744479Sbinkertn@umich.edu    # ------------------------------------------------------------
2754479Sbinkertn@umich.edu    def push_state(self,state):
2764479Sbinkertn@umich.edu        self.lexstatestack.append(self.lexstate)
2774479Sbinkertn@umich.edu        self.begin(state)
2784479Sbinkertn@umich.edu
2794479Sbinkertn@umich.edu    # ------------------------------------------------------------
2804479Sbinkertn@umich.edu    # pop_state() - Restores the previous state
2814479Sbinkertn@umich.edu    # ------------------------------------------------------------
2824479Sbinkertn@umich.edu    def pop_state(self):
2834479Sbinkertn@umich.edu        self.begin(self.lexstatestack.pop())
2844479Sbinkertn@umich.edu
2854479Sbinkertn@umich.edu    # ------------------------------------------------------------
2864479Sbinkertn@umich.edu    # current_state() - Returns the current lexing state
2874479Sbinkertn@umich.edu    # ------------------------------------------------------------
2884479Sbinkertn@umich.edu    def current_state(self):
2894479Sbinkertn@umich.edu        return self.lexstate
2904479Sbinkertn@umich.edu
2914479Sbinkertn@umich.edu    # ------------------------------------------------------------
2924479Sbinkertn@umich.edu    # skip() - Skip ahead n characters
2934479Sbinkertn@umich.edu    # ------------------------------------------------------------
2944479Sbinkertn@umich.edu    def skip(self,n):
2954479Sbinkertn@umich.edu        self.lexpos += n
2962632SN/A
2972632SN/A    # ------------------------------------------------------------
2986498Snate@binkert.org    # opttoken() - Return the next token from the Lexer
2992632SN/A    #
3002632SN/A    # Note: This function has been carefully implemented to be as fast
3012632SN/A    # as possible.  Don't make changes unless you really know what
3022632SN/A    # you are doing
3032632SN/A    # ------------------------------------------------------------
3044479Sbinkertn@umich.edu    def token(self):
3052632SN/A        # Make local copies of frequently referenced attributes
3062632SN/A        lexpos    = self.lexpos
3072632SN/A        lexlen    = self.lexlen
3082632SN/A        lexignore = self.lexignore
3092632SN/A        lexdata   = self.lexdata
3102632SN/A
3112632SN/A        while lexpos < lexlen:
3122632SN/A            # This code provides some short-circuit code for whitespace, tabs, and other ignored characters
3132632SN/A            if lexdata[lexpos] in lexignore:
3142632SN/A                lexpos += 1
3152632SN/A                continue
3162632SN/A
3172632SN/A            # Look for a regular expression match
3184479Sbinkertn@umich.edu            for lexre,lexindexfunc in self.lexre:
3194479Sbinkertn@umich.edu                m = lexre.match(lexdata,lexpos)
3204479Sbinkertn@umich.edu                if not m: continue
3214479Sbinkertn@umich.edu
3224479Sbinkertn@umich.edu                # Create a token for return
3232632SN/A                tok = LexToken()
3242632SN/A                tok.value = m.group()
3252632SN/A                tok.lineno = self.lineno
3264479Sbinkertn@umich.edu                tok.lexpos = lexpos
3274479Sbinkertn@umich.edu
3284479Sbinkertn@umich.edu                i = m.lastindex
3294479Sbinkertn@umich.edu                func,tok.type = lexindexfunc[i]
3304479Sbinkertn@umich.edu
3312632SN/A                if not func:
3324479Sbinkertn@umich.edu                   # If no token type was set, it's an ignored token
3336498Snate@binkert.org                   if tok.type:
3346498Snate@binkert.org                      self.lexpos = m.end()
3356498Snate@binkert.org                      return tok
3366498Snate@binkert.org                   else:
3376498Snate@binkert.org                      lexpos = m.end()
3386498Snate@binkert.org                      break
3394479Sbinkertn@umich.edu
3406498Snate@binkert.org                lexpos = m.end()
3412632SN/A
3422632SN/A                # If token is processed by a function, call it
3436498Snate@binkert.org
3446498Snate@binkert.org                tok.lexer = self      # Set additional attributes useful in token rules
3456498Snate@binkert.org                self.lexmatch = m
3466498Snate@binkert.org                self.lexpos = lexpos
3476498Snate@binkert.org
3482632SN/A                newtok = func(tok)
3492632SN/A
3502632SN/A                # Every function must return a token, if nothing, we just move to next token
3514479Sbinkertn@umich.edu                if not newtok:
3526498Snate@binkert.org                    lexpos    = self.lexpos         # This is here in case user has updated lexpos.
3536498Snate@binkert.org                    lexignore = self.lexignore      # This is here in case there was a state change
3544479Sbinkertn@umich.edu                    break
3552632SN/A
3562632SN/A                # Verify type of the token.  If not in the token map, raise an error
3574479Sbinkertn@umich.edu                if not self.lexoptimize:
3586498Snate@binkert.org                    if not newtok.type in self.lextokens:
3596498Snate@binkert.org                        raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % (
3606498Snate@binkert.org                            func_code(func).co_filename, func_code(func).co_firstlineno,
3612632SN/A                            func.__name__, newtok.type),lexdata[lexpos:])
3622632SN/A
3632632SN/A                return newtok
3644479Sbinkertn@umich.edu            else:
3654479Sbinkertn@umich.edu                # No match, see if in literals
3664479Sbinkertn@umich.edu                if lexdata[lexpos] in self.lexliterals:
3674479Sbinkertn@umich.edu                    tok = LexToken()
3684479Sbinkertn@umich.edu                    tok.value = lexdata[lexpos]
3694479Sbinkertn@umich.edu                    tok.lineno = self.lineno
3704479Sbinkertn@umich.edu                    tok.type = tok.value
3714479Sbinkertn@umich.edu                    tok.lexpos = lexpos
3724479Sbinkertn@umich.edu                    self.lexpos = lexpos + 1
3734479Sbinkertn@umich.edu                    return tok
3742632SN/A
3754479Sbinkertn@umich.edu                # No match. Call t_error() if defined.
3764479Sbinkertn@umich.edu                if self.lexerrorf:
3774479Sbinkertn@umich.edu                    tok = LexToken()
3784479Sbinkertn@umich.edu                    tok.value = self.lexdata[lexpos:]
3794479Sbinkertn@umich.edu                    tok.lineno = self.lineno
3804479Sbinkertn@umich.edu                    tok.type = "error"
3814479Sbinkertn@umich.edu                    tok.lexer = self
3824479Sbinkertn@umich.edu                    tok.lexpos = lexpos
3832632SN/A                    self.lexpos = lexpos
3844479Sbinkertn@umich.edu                    newtok = self.lexerrorf(tok)
3854479Sbinkertn@umich.edu                    if lexpos == self.lexpos:
3864479Sbinkertn@umich.edu                        # Error method didn't change text position at all. This is an error.
3876498Snate@binkert.org                        raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:])
3884479Sbinkertn@umich.edu                    lexpos = self.lexpos
3894479Sbinkertn@umich.edu                    if not newtok: continue
3904479Sbinkertn@umich.edu                    return newtok
3914479Sbinkertn@umich.edu
3922632SN/A                self.lexpos = lexpos
3936498Snate@binkert.org                raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:])
3942632SN/A
3952632SN/A        self.lexpos = lexpos + 1
3964479Sbinkertn@umich.edu        if self.lexdata is None:
3976498Snate@binkert.org             raise RuntimeError("No input string given with input()")
3982632SN/A        return None
3992632SN/A
4006498Snate@binkert.org    # Iterator interface
4016498Snate@binkert.org    def __iter__(self):
4026498Snate@binkert.org        return self
4036498Snate@binkert.org
4046498Snate@binkert.org    def next(self):
4056498Snate@binkert.org        t = self.token()
4066498Snate@binkert.org        if t is None:
4076498Snate@binkert.org            raise StopIteration
4086498Snate@binkert.org        return t
4096498Snate@binkert.org
4106498Snate@binkert.org    __next__ = next
4116498Snate@binkert.org
4122632SN/A# -----------------------------------------------------------------------------
4136498Snate@binkert.org#                           ==== Lex Builder ===
4142632SN/A#
4156498Snate@binkert.org# The functions and classes below are used to collect lexing information
4166498Snate@binkert.org# and build a Lexer object from it.
4172632SN/A# -----------------------------------------------------------------------------
4182632SN/A
4196498Snate@binkert.org# -----------------------------------------------------------------------------
4206498Snate@binkert.org# get_caller_module_dict()
4216498Snate@binkert.org#
4226498Snate@binkert.org# This function returns a dictionary containing all of the symbols defined within
4236498Snate@binkert.org# a caller further down the call stack.  This is used to get the environment
4246498Snate@binkert.org# associated with the yacc() call if none was provided.
4256498Snate@binkert.org# -----------------------------------------------------------------------------
4262632SN/A
4276498Snate@binkert.orgdef get_caller_module_dict(levels):
4282632SN/A    try:
4296498Snate@binkert.org        raise RuntimeError
4306498Snate@binkert.org    except RuntimeError:
4316498Snate@binkert.org        e,b,t = sys.exc_info()
4326498Snate@binkert.org        f = t.tb_frame
4336498Snate@binkert.org        while levels > 0:
4346498Snate@binkert.org            f = f.f_back
4356498Snate@binkert.org            levels -= 1
4366498Snate@binkert.org        ldict = f.f_globals.copy()
4376498Snate@binkert.org        if f.f_globals != f.f_locals:
4386498Snate@binkert.org            ldict.update(f.f_locals)
4392632SN/A
4406498Snate@binkert.org        return ldict
4412632SN/A
4422632SN/A# -----------------------------------------------------------------------------
4434479Sbinkertn@umich.edu# _funcs_to_names()
4442632SN/A#
4454479Sbinkertn@umich.edu# Given a list of regular expression functions, this converts it to a list
4464479Sbinkertn@umich.edu# suitable for output to a table file
4472632SN/A# -----------------------------------------------------------------------------
4482632SN/A
4496498Snate@binkert.orgdef _funcs_to_names(funclist,namelist):
4504479Sbinkertn@umich.edu    result = []
4516498Snate@binkert.org    for f,name in zip(funclist,namelist):
4524479Sbinkertn@umich.edu         if f and f[0]:
4536498Snate@binkert.org             result.append((name, f[1]))
4544479Sbinkertn@umich.edu         else:
4554479Sbinkertn@umich.edu             result.append(f)
4564479Sbinkertn@umich.edu    return result
4574479Sbinkertn@umich.edu
4584479Sbinkertn@umich.edu# -----------------------------------------------------------------------------
4594479Sbinkertn@umich.edu# _names_to_funcs()
4604479Sbinkertn@umich.edu#
4614479Sbinkertn@umich.edu# Given a list of regular expression function names, this converts it back to
4624479Sbinkertn@umich.edu# functions.
4634479Sbinkertn@umich.edu# -----------------------------------------------------------------------------
4644479Sbinkertn@umich.edu
4654479Sbinkertn@umich.edudef _names_to_funcs(namelist,fdict):
4664479Sbinkertn@umich.edu     result = []
4674479Sbinkertn@umich.edu     for n in namelist:
4684479Sbinkertn@umich.edu          if n and n[0]:
4694479Sbinkertn@umich.edu              result.append((fdict[n[0]],n[1]))
4704479Sbinkertn@umich.edu          else:
4714479Sbinkertn@umich.edu              result.append(n)
4724479Sbinkertn@umich.edu     return result
4734479Sbinkertn@umich.edu
4744479Sbinkertn@umich.edu# -----------------------------------------------------------------------------
4754479Sbinkertn@umich.edu# _form_master_re()
4764479Sbinkertn@umich.edu#
4774479Sbinkertn@umich.edu# This function takes a list of all of the regex components and attempts to
4784479Sbinkertn@umich.edu# form the master regular expression.  Given limitations in the Python re
4794479Sbinkertn@umich.edu# module, it may be necessary to break the master regex into separate expressions.
4804479Sbinkertn@umich.edu# -----------------------------------------------------------------------------
4814479Sbinkertn@umich.edu
4824479Sbinkertn@umich.edudef _form_master_re(relist,reflags,ldict,toknames):
4834479Sbinkertn@umich.edu    if not relist: return []
4844479Sbinkertn@umich.edu    regex = "|".join(relist)
4854479Sbinkertn@umich.edu    try:
4864479Sbinkertn@umich.edu        lexre = re.compile(regex,re.VERBOSE | reflags)
4874479Sbinkertn@umich.edu
4884479Sbinkertn@umich.edu        # Build the index to function map for the matching engine
4894479Sbinkertn@umich.edu        lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1)
4906498Snate@binkert.org        lexindexnames = lexindexfunc[:]
4916498Snate@binkert.org
4924479Sbinkertn@umich.edu        for f,i in lexre.groupindex.items():
4934479Sbinkertn@umich.edu            handle = ldict.get(f,None)
4944479Sbinkertn@umich.edu            if type(handle) in (types.FunctionType, types.MethodType):
4956498Snate@binkert.org                lexindexfunc[i] = (handle,toknames[f])
4966498Snate@binkert.org                lexindexnames[i] = f
4974479Sbinkertn@umich.edu            elif handle is not None:
4986498Snate@binkert.org                lexindexnames[i] = f
4994479Sbinkertn@umich.edu                if f.find("ignore_") > 0:
5004479Sbinkertn@umich.edu                    lexindexfunc[i] = (None,None)
5014479Sbinkertn@umich.edu                else:
5024479Sbinkertn@umich.edu                    lexindexfunc[i] = (None, toknames[f])
5036498Snate@binkert.org
5046498Snate@binkert.org        return [(lexre,lexindexfunc)],[regex],[lexindexnames]
5056498Snate@binkert.org    except Exception:
5064479Sbinkertn@umich.edu        m = int(len(relist)/2)
5074479Sbinkertn@umich.edu        if m == 0: m = 1
5086498Snate@binkert.org        llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames)
5096498Snate@binkert.org        rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames)
5106498Snate@binkert.org        return llist+rlist, lre+rre, lnames+rnames
5114479Sbinkertn@umich.edu
5124479Sbinkertn@umich.edu# -----------------------------------------------------------------------------
5134479Sbinkertn@umich.edu# def _statetoken(s,names)
5144479Sbinkertn@umich.edu#
5154479Sbinkertn@umich.edu# Given a declaration name s of the form "t_" and a dictionary whose keys are
5164479Sbinkertn@umich.edu# state names, this function returns a tuple (states,tokenname) where states
5174479Sbinkertn@umich.edu# is a tuple of state names and tokenname is the name of the token.  For example,
5184479Sbinkertn@umich.edu# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')
5194479Sbinkertn@umich.edu# -----------------------------------------------------------------------------
5204479Sbinkertn@umich.edu
5214479Sbinkertn@umich.edudef _statetoken(s,names):
5224479Sbinkertn@umich.edu    nonstate = 1
5234479Sbinkertn@umich.edu    parts = s.split("_")
5244479Sbinkertn@umich.edu    for i in range(1,len(parts)):
5256498Snate@binkert.org         if not parts[i] in names and parts[i] != 'ANY': break
5264479Sbinkertn@umich.edu    if i > 1:
5274479Sbinkertn@umich.edu       states = tuple(parts[1:i])
5284479Sbinkertn@umich.edu    else:
5294479Sbinkertn@umich.edu       states = ('INITIAL',)
5304479Sbinkertn@umich.edu
5314479Sbinkertn@umich.edu    if 'ANY' in states:
5326498Snate@binkert.org       states = tuple(names)
5334479Sbinkertn@umich.edu
5344479Sbinkertn@umich.edu    tokenname = "_".join(parts[i:])
5354479Sbinkertn@umich.edu    return (states,tokenname)
5362632SN/A
5376498Snate@binkert.org
5386498Snate@binkert.org# -----------------------------------------------------------------------------
5396498Snate@binkert.org# LexerReflect()
5406498Snate@binkert.org#
5416498Snate@binkert.org# This class represents information needed to build a lexer as extracted from a
5426498Snate@binkert.org# user's input file.
5436498Snate@binkert.org# -----------------------------------------------------------------------------
5446498Snate@binkert.orgclass LexerReflect(object):
5456498Snate@binkert.org    def __init__(self,ldict,log=None,reflags=0):
5466498Snate@binkert.org        self.ldict      = ldict
5476498Snate@binkert.org        self.error_func = None
5486498Snate@binkert.org        self.tokens     = []
5496498Snate@binkert.org        self.reflags    = reflags
5506498Snate@binkert.org        self.stateinfo  = { 'INITIAL' : 'inclusive'}
5516498Snate@binkert.org        self.files      = {}
5526498Snate@binkert.org        self.error      = 0
5536498Snate@binkert.org
5546498Snate@binkert.org        if log is None:
5556498Snate@binkert.org            self.log = PlyLogger(sys.stderr)
5566498Snate@binkert.org        else:
5576498Snate@binkert.org            self.log = log
5586498Snate@binkert.org
5596498Snate@binkert.org    # Get all of the basic information
5606498Snate@binkert.org    def get_all(self):
5616498Snate@binkert.org        self.get_tokens()
5626498Snate@binkert.org        self.get_literals()
5636498Snate@binkert.org        self.get_states()
5646498Snate@binkert.org        self.get_rules()
5656498Snate@binkert.org
5666498Snate@binkert.org    # Validate all of the information
5676498Snate@binkert.org    def validate_all(self):
5686498Snate@binkert.org        self.validate_tokens()
5696498Snate@binkert.org        self.validate_literals()
5706498Snate@binkert.org        self.validate_rules()
5716498Snate@binkert.org        return self.error
5726498Snate@binkert.org
5736498Snate@binkert.org    # Get the tokens map
5746498Snate@binkert.org    def get_tokens(self):
5756498Snate@binkert.org        tokens = self.ldict.get("tokens",None)
5766498Snate@binkert.org        if not tokens:
5776498Snate@binkert.org            self.log.error("No token list is defined")
5786498Snate@binkert.org            self.error = 1
5796498Snate@binkert.org            return
5806498Snate@binkert.org
5816498Snate@binkert.org        if not isinstance(tokens,(list, tuple)):
5826498Snate@binkert.org            self.log.error("tokens must be a list or tuple")
5836498Snate@binkert.org            self.error = 1
5846498Snate@binkert.org            return
5856498Snate@binkert.org
5866498Snate@binkert.org        if not tokens:
5876498Snate@binkert.org            self.log.error("tokens is empty")
5886498Snate@binkert.org            self.error = 1
5896498Snate@binkert.org            return
5906498Snate@binkert.org
5916498Snate@binkert.org        self.tokens = tokens
5926498Snate@binkert.org
5936498Snate@binkert.org    # Validate the tokens
5946498Snate@binkert.org    def validate_tokens(self):
5956498Snate@binkert.org        terminals = {}
5966498Snate@binkert.org        for n in self.tokens:
5976498Snate@binkert.org            if not _is_identifier.match(n):
5986498Snate@binkert.org                self.log.error("Bad token name '%s'",n)
5996498Snate@binkert.org                self.error = 1
6006498Snate@binkert.org            if n in terminals:
6016498Snate@binkert.org                self.log.warning("Token '%s' multiply defined", n)
6026498Snate@binkert.org            terminals[n] = 1
6036498Snate@binkert.org
6046498Snate@binkert.org    # Get the literals specifier
6056498Snate@binkert.org    def get_literals(self):
6066498Snate@binkert.org        self.literals = self.ldict.get("literals","")
6076498Snate@binkert.org
6086498Snate@binkert.org    # Validate literals
6096498Snate@binkert.org    def validate_literals(self):
6106498Snate@binkert.org        try:
6116498Snate@binkert.org            for c in self.literals:
6126498Snate@binkert.org                if not isinstance(c,StringTypes) or len(c) > 1:
6136498Snate@binkert.org                    self.log.error("Invalid literal %s. Must be a single character", repr(c))
6146498Snate@binkert.org                    self.error = 1
6156498Snate@binkert.org                    continue
6166498Snate@binkert.org
6176498Snate@binkert.org        except TypeError:
6186498Snate@binkert.org            self.log.error("Invalid literals specification. literals must be a sequence of characters")
6196498Snate@binkert.org            self.error = 1
6206498Snate@binkert.org
6216498Snate@binkert.org    def get_states(self):
6226498Snate@binkert.org        self.states = self.ldict.get("states",None)
6236498Snate@binkert.org        # Build statemap
6246498Snate@binkert.org        if self.states:
6256498Snate@binkert.org             if not isinstance(self.states,(tuple,list)):
6266498Snate@binkert.org                  self.log.error("states must be defined as a tuple or list")
6276498Snate@binkert.org                  self.error = 1
6286498Snate@binkert.org             else:
6296498Snate@binkert.org                  for s in self.states:
6306498Snate@binkert.org                        if not isinstance(s,tuple) or len(s) != 2:
6316498Snate@binkert.org                               self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')",repr(s))
6326498Snate@binkert.org                               self.error = 1
6336498Snate@binkert.org                               continue
6346498Snate@binkert.org                        name, statetype = s
6356498Snate@binkert.org                        if not isinstance(name,StringTypes):
6366498Snate@binkert.org                               self.log.error("State name %s must be a string", repr(name))
6376498Snate@binkert.org                               self.error = 1
6386498Snate@binkert.org                               continue
6396498Snate@binkert.org                        if not (statetype == 'inclusive' or statetype == 'exclusive'):
6406498Snate@binkert.org                               self.log.error("State type for state %s must be 'inclusive' or 'exclusive'",name)
6416498Snate@binkert.org                               self.error = 1
6426498Snate@binkert.org                               continue
6436498Snate@binkert.org                        if name in self.stateinfo:
6446498Snate@binkert.org                               self.log.error("State '%s' already defined",name)
6456498Snate@binkert.org                               self.error = 1
6466498Snate@binkert.org                               continue
6476498Snate@binkert.org                        self.stateinfo[name] = statetype
6486498Snate@binkert.org
6496498Snate@binkert.org    # Get all of the symbols with a t_ prefix and sort them into various
6506498Snate@binkert.org    # categories (functions, strings, error functions, and ignore characters)
6516498Snate@binkert.org
6526498Snate@binkert.org    def get_rules(self):
6536498Snate@binkert.org        tsymbols = [f for f in self.ldict if f[:2] == 't_' ]
6546498Snate@binkert.org
6556498Snate@binkert.org        # Now build up a list of functions and a list of strings
6566498Snate@binkert.org
6576498Snate@binkert.org        self.toknames = { }        # Mapping of symbols to token names
6586498Snate@binkert.org        self.funcsym =  { }        # Symbols defined as functions
6596498Snate@binkert.org        self.strsym =   { }        # Symbols defined as strings
6606498Snate@binkert.org        self.ignore   = { }        # Ignore strings by state
6616498Snate@binkert.org        self.errorf   = { }        # Error functions by state
6626498Snate@binkert.org
6636498Snate@binkert.org        for s in self.stateinfo:
6646498Snate@binkert.org             self.funcsym[s] = []
6656498Snate@binkert.org             self.strsym[s] = []
6666498Snate@binkert.org
6676498Snate@binkert.org        if len(tsymbols) == 0:
6686498Snate@binkert.org            self.log.error("No rules of the form t_rulename are defined")
6696498Snate@binkert.org            self.error = 1
6706498Snate@binkert.org            return
6716498Snate@binkert.org
6726498Snate@binkert.org        for f in tsymbols:
6736498Snate@binkert.org            t = self.ldict[f]
6746498Snate@binkert.org            states, tokname = _statetoken(f,self.stateinfo)
6756498Snate@binkert.org            self.toknames[f] = tokname
6766498Snate@binkert.org
6776498Snate@binkert.org            if hasattr(t,"__call__"):
6786498Snate@binkert.org                if tokname == 'error':
6796498Snate@binkert.org                    for s in states:
6806498Snate@binkert.org                        self.errorf[s] = t
6816498Snate@binkert.org                elif tokname == 'ignore':
6826498Snate@binkert.org                    line = func_code(t).co_firstlineno
6836498Snate@binkert.org                    file = func_code(t).co_filename
6846498Snate@binkert.org                    self.log.error("%s:%d: Rule '%s' must be defined as a string",file,line,t.__name__)
6856498Snate@binkert.org                    self.error = 1
6866498Snate@binkert.org                else:
6876498Snate@binkert.org                    for s in states:
6886498Snate@binkert.org                        self.funcsym[s].append((f,t))
6896498Snate@binkert.org            elif isinstance(t, StringTypes):
6906498Snate@binkert.org                if tokname == 'ignore':
6916498Snate@binkert.org                    for s in states:
6926498Snate@binkert.org                        self.ignore[s] = t
6936498Snate@binkert.org                    if "\\" in t:
6946498Snate@binkert.org                        self.log.warning("%s contains a literal backslash '\\'",f)
6956498Snate@binkert.org
6966498Snate@binkert.org                elif tokname == 'error':
6976498Snate@binkert.org                    self.log.error("Rule '%s' must be defined as a function", f)
6986498Snate@binkert.org                    self.error = 1
6996498Snate@binkert.org                else:
7006498Snate@binkert.org                    for s in states:
7016498Snate@binkert.org                        self.strsym[s].append((f,t))
7026498Snate@binkert.org            else:
7036498Snate@binkert.org                self.log.error("%s not defined as a function or string", f)
7046498Snate@binkert.org                self.error = 1
7056498Snate@binkert.org
7066498Snate@binkert.org        # Sort the functions by line number
7076498Snate@binkert.org        for f in self.funcsym.values():
7086498Snate@binkert.org            if sys.version_info[0] < 3:
7096498Snate@binkert.org                f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code(y[1]).co_firstlineno))
7106498Snate@binkert.org            else:
7116498Snate@binkert.org                # Python 3.0
7126498Snate@binkert.org                f.sort(key=lambda x: func_code(x[1]).co_firstlineno)
7136498Snate@binkert.org
7146498Snate@binkert.org        # Sort the strings by regular expression length
7156498Snate@binkert.org        for s in self.strsym.values():
7166498Snate@binkert.org            if sys.version_info[0] < 3:
7176498Snate@binkert.org                s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1])))
7186498Snate@binkert.org            else:
7196498Snate@binkert.org                # Python 3.0
7206498Snate@binkert.org                s.sort(key=lambda x: len(x[1]),reverse=True)
7216498Snate@binkert.org
7226498Snate@binkert.org    # Validate all of the t_rules collected
7236498Snate@binkert.org    def validate_rules(self):
7246498Snate@binkert.org        for state in self.stateinfo:
7256498Snate@binkert.org            # Validate all rules defined by functions
7266498Snate@binkert.org
7276498Snate@binkert.org
7286498Snate@binkert.org
7296498Snate@binkert.org            for fname, f in self.funcsym[state]:
7306498Snate@binkert.org                line = func_code(f).co_firstlineno
7316498Snate@binkert.org                file = func_code(f).co_filename
7326498Snate@binkert.org                self.files[file] = 1
7336498Snate@binkert.org
7346498Snate@binkert.org                tokname = self.toknames[fname]
7356498Snate@binkert.org                if isinstance(f, types.MethodType):
7366498Snate@binkert.org                    reqargs = 2
7376498Snate@binkert.org                else:
7386498Snate@binkert.org                    reqargs = 1
7396498Snate@binkert.org                nargs = func_code(f).co_argcount
7406498Snate@binkert.org                if nargs > reqargs:
7416498Snate@binkert.org                    self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__)
7426498Snate@binkert.org                    self.error = 1
7436498Snate@binkert.org                    continue
7446498Snate@binkert.org
7456498Snate@binkert.org                if nargs < reqargs:
7466498Snate@binkert.org                    self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__)
7476498Snate@binkert.org                    self.error = 1
7486498Snate@binkert.org                    continue
7496498Snate@binkert.org
7506498Snate@binkert.org                if not f.__doc__:
7516498Snate@binkert.org                    self.log.error("%s:%d: No regular expression defined for rule '%s'",file,line,f.__name__)
7526498Snate@binkert.org                    self.error = 1
7536498Snate@binkert.org                    continue
7546498Snate@binkert.org
7556498Snate@binkert.org                try:
7566498Snate@binkert.org                    c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE | self.reflags)
7576498Snate@binkert.org                    if c.match(""):
7586498Snate@binkert.org                        self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__)
7596498Snate@binkert.org                        self.error = 1
7606498Snate@binkert.org                except re.error:
7616498Snate@binkert.org                    _etype, e, _etrace = sys.exc_info()
7626498Snate@binkert.org                    self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file,line,f.__name__,e)
7636498Snate@binkert.org                    if '#' in f.__doc__:
7646498Snate@binkert.org                        self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'",file,line, f.__name__)
7656498Snate@binkert.org                    self.error = 1
7666498Snate@binkert.org
7676498Snate@binkert.org            # Validate all rules defined by strings
7686498Snate@binkert.org            for name,r in self.strsym[state]:
7696498Snate@binkert.org                tokname = self.toknames[name]
7706498Snate@binkert.org                if tokname == 'error':
7716498Snate@binkert.org                    self.log.error("Rule '%s' must be defined as a function", name)
7726498Snate@binkert.org                    self.error = 1
7736498Snate@binkert.org                    continue
7746498Snate@binkert.org
7756498Snate@binkert.org                if not tokname in self.tokens and tokname.find("ignore_") < 0:
7766498Snate@binkert.org                    self.log.error("Rule '%s' defined for an unspecified token %s",name,tokname)
7776498Snate@binkert.org                    self.error = 1
7786498Snate@binkert.org                    continue
7796498Snate@binkert.org
7806498Snate@binkert.org                try:
7816498Snate@binkert.org                    c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | self.reflags)
7826498Snate@binkert.org                    if (c.match("")):
7836498Snate@binkert.org                         self.log.error("Regular expression for rule '%s' matches empty string",name)
7846498Snate@binkert.org                         self.error = 1
7856498Snate@binkert.org                except re.error:
7866498Snate@binkert.org                    _etype, e, _etrace = sys.exc_info()
7876498Snate@binkert.org                    self.log.error("Invalid regular expression for rule '%s'. %s",name,e)
7886498Snate@binkert.org                    if '#' in r:
7896498Snate@binkert.org                         self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'",name)
7906498Snate@binkert.org                    self.error = 1
7916498Snate@binkert.org
7926498Snate@binkert.org            if not self.funcsym[state] and not self.strsym[state]:
7936498Snate@binkert.org                self.log.error("No rules defined for state '%s'",state)
7946498Snate@binkert.org                self.error = 1
7956498Snate@binkert.org
7966498Snate@binkert.org            # Validate the error function
7976498Snate@binkert.org            efunc = self.errorf.get(state,None)
7986498Snate@binkert.org            if efunc:
7996498Snate@binkert.org                f = efunc
8006498Snate@binkert.org                line = func_code(f).co_firstlineno
8016498Snate@binkert.org                file = func_code(f).co_filename
8026498Snate@binkert.org                self.files[file] = 1
8036498Snate@binkert.org
8046498Snate@binkert.org                if isinstance(f, types.MethodType):
8056498Snate@binkert.org                    reqargs = 2
8066498Snate@binkert.org                else:
8076498Snate@binkert.org                    reqargs = 1
8086498Snate@binkert.org                nargs = func_code(f).co_argcount
8096498Snate@binkert.org                if nargs > reqargs:
8106498Snate@binkert.org                    self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__)
8116498Snate@binkert.org                    self.error = 1
8126498Snate@binkert.org
8136498Snate@binkert.org                if nargs < reqargs:
8146498Snate@binkert.org                    self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__)
8156498Snate@binkert.org                    self.error = 1
8166498Snate@binkert.org
8176498Snate@binkert.org        for f in self.files:
8186498Snate@binkert.org            self.validate_file(f)
8196498Snate@binkert.org
8206498Snate@binkert.org
8216498Snate@binkert.org    # -----------------------------------------------------------------------------
8226498Snate@binkert.org    # validate_file()
8236498Snate@binkert.org    #
8246498Snate@binkert.org    # This checks to see if there are duplicated t_rulename() functions or strings
8256498Snate@binkert.org    # in the parser input file.  This is done using a simple regular expression
8266498Snate@binkert.org    # match on each line in the given file.
8276498Snate@binkert.org    # -----------------------------------------------------------------------------
8286498Snate@binkert.org
8296498Snate@binkert.org    def validate_file(self,filename):
8306498Snate@binkert.org        import os.path
8316498Snate@binkert.org        base,ext = os.path.splitext(filename)
8326498Snate@binkert.org        if ext != '.py': return         # No idea what the file is. Return OK
8336498Snate@binkert.org
8346498Snate@binkert.org        try:
8356498Snate@binkert.org            f = open(filename)
8366498Snate@binkert.org            lines = f.readlines()
8376498Snate@binkert.org            f.close()
8386498Snate@binkert.org        except IOError:
8396498Snate@binkert.org            return                      # Couldn't find the file.  Don't worry about it
8406498Snate@binkert.org
8416498Snate@binkert.org        fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(')
8426498Snate@binkert.org        sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=')
8436498Snate@binkert.org
8446498Snate@binkert.org        counthash = { }
8456498Snate@binkert.org        linen = 1
8466498Snate@binkert.org        for l in lines:
8476498Snate@binkert.org            m = fre.match(l)
8486498Snate@binkert.org            if not m:
8496498Snate@binkert.org                m = sre.match(l)
8506498Snate@binkert.org            if m:
8516498Snate@binkert.org                name = m.group(1)
8526498Snate@binkert.org                prev = counthash.get(name)
8536498Snate@binkert.org                if not prev:
8546498Snate@binkert.org                    counthash[name] = linen
8556498Snate@binkert.org                else:
8566498Snate@binkert.org                    self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev)
8576498Snate@binkert.org                    self.error = 1
8586498Snate@binkert.org            linen += 1
8596498Snate@binkert.org
8602632SN/A# -----------------------------------------------------------------------------
8612632SN/A# lex(module)
8622632SN/A#
8632632SN/A# Build all of the regular expression rules from definitions in the supplied module
8642632SN/A# -----------------------------------------------------------------------------
8656498Snate@binkert.orgdef lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir="", debuglog=None, errorlog=None):
8664479Sbinkertn@umich.edu    global lexer
8672632SN/A    ldict = None
8684479Sbinkertn@umich.edu    stateinfo  = { 'INITIAL' : 'inclusive'}
8694479Sbinkertn@umich.edu    lexobj = Lexer()
8704479Sbinkertn@umich.edu    lexobj.lexoptimize = optimize
8712632SN/A    global token,input
8722632SN/A
8736498Snate@binkert.org    if errorlog is None:
8746498Snate@binkert.org        errorlog = PlyLogger(sys.stderr)
8754479Sbinkertn@umich.edu
8766498Snate@binkert.org    if debug:
8776498Snate@binkert.org        if debuglog is None:
8786498Snate@binkert.org            debuglog = PlyLogger(sys.stderr)
8796498Snate@binkert.org
8806498Snate@binkert.org    # Get the module dictionary used for the lexer
8814479Sbinkertn@umich.edu    if object: module = object
8824479Sbinkertn@umich.edu
8832632SN/A    if module:
8846498Snate@binkert.org        _items = [(k,getattr(module,k)) for k in dir(module)]
8856498Snate@binkert.org        ldict = dict(_items)
8866498Snate@binkert.org    else:
8876498Snate@binkert.org        ldict = get_caller_module_dict(2)
8882632SN/A
8896498Snate@binkert.org    # Collect parser information from the dictionary
8906498Snate@binkert.org    linfo = LexerReflect(ldict,log=errorlog,reflags=reflags)
8916498Snate@binkert.org    linfo.get_all()
8926498Snate@binkert.org    if not optimize:
8936498Snate@binkert.org        if linfo.validate_all():
8946498Snate@binkert.org            raise SyntaxError("Can't build lexer")
8952632SN/A
8962632SN/A    if optimize and lextab:
8972632SN/A        try:
8984479Sbinkertn@umich.edu            lexobj.readtab(lextab,ldict)
8994479Sbinkertn@umich.edu            token = lexobj.token
9004479Sbinkertn@umich.edu            input = lexobj.input
9014479Sbinkertn@umich.edu            lexer = lexobj
9024479Sbinkertn@umich.edu            return lexobj
9032632SN/A
9042632SN/A        except ImportError:
9052632SN/A            pass
9062632SN/A
9076498Snate@binkert.org    # Dump some basic debugging information
9086498Snate@binkert.org    if debug:
9096498Snate@binkert.org        debuglog.info("lex: tokens   = %r", linfo.tokens)
9106498Snate@binkert.org        debuglog.info("lex: literals = %r", linfo.literals)
9116498Snate@binkert.org        debuglog.info("lex: states   = %r", linfo.stateinfo)
9122632SN/A
9132632SN/A    # Build a dictionary of valid token names
9144479Sbinkertn@umich.edu    lexobj.lextokens = { }
9156498Snate@binkert.org    for n in linfo.tokens:
9166498Snate@binkert.org        lexobj.lextokens[n] = 1
9176498Snate@binkert.org
9186498Snate@binkert.org    # Get literals specification
9196498Snate@binkert.org    if isinstance(linfo.literals,(list,tuple)):
9206498Snate@binkert.org        lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals)
9212632SN/A    else:
9226498Snate@binkert.org        lexobj.lexliterals = linfo.literals
9232632SN/A
9246498Snate@binkert.org    # Get the stateinfo dictionary
9256498Snate@binkert.org    stateinfo = linfo.stateinfo
9262632SN/A
9274479Sbinkertn@umich.edu    regexs = { }
9284479Sbinkertn@umich.edu    # Build the master regular expressions
9296498Snate@binkert.org    for state in stateinfo:
9304479Sbinkertn@umich.edu        regex_list = []
9312632SN/A
9324479Sbinkertn@umich.edu        # Add rules defined by functions first
9336498Snate@binkert.org        for fname, f in linfo.funcsym[state]:
9346498Snate@binkert.org            line = func_code(f).co_firstlineno
9356498Snate@binkert.org            file = func_code(f).co_filename
9366498Snate@binkert.org            regex_list.append("(?P<%s>%s)" % (fname,f.__doc__))
9376498Snate@binkert.org            if debug:
9386498Snate@binkert.org                debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state)
9394479Sbinkertn@umich.edu
9404479Sbinkertn@umich.edu        # Now add all of the simple rules
9416498Snate@binkert.org        for name,r in linfo.strsym[state]:
9424479Sbinkertn@umich.edu            regex_list.append("(?P<%s>%s)" % (name,r))
9436498Snate@binkert.org            if debug:
9446498Snate@binkert.org                debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state)
9454479Sbinkertn@umich.edu
9464479Sbinkertn@umich.edu        regexs[state] = regex_list
9474479Sbinkertn@umich.edu
9484479Sbinkertn@umich.edu    # Build the master regular expressions
9494479Sbinkertn@umich.edu
9506498Snate@binkert.org    if debug:
9516498Snate@binkert.org        debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====")
9526498Snate@binkert.org
9536498Snate@binkert.org    for state in regexs:
9546498Snate@binkert.org        lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames)
9554479Sbinkertn@umich.edu        lexobj.lexstatere[state] = lexre
9564479Sbinkertn@umich.edu        lexobj.lexstateretext[state] = re_text
9576498Snate@binkert.org        lexobj.lexstaterenames[state] = re_names
9584479Sbinkertn@umich.edu        if debug:
9594479Sbinkertn@umich.edu            for i in range(len(re_text)):
9606498Snate@binkert.org                debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i])
9614479Sbinkertn@umich.edu
9626498Snate@binkert.org    # For inclusive states, we need to add the regular expressions from the INITIAL state
9636498Snate@binkert.org    for state,stype in stateinfo.items():
9646498Snate@binkert.org        if state != "INITIAL" and stype == 'inclusive':
9654479Sbinkertn@umich.edu             lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL'])
9664479Sbinkertn@umich.edu             lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL'])
9676498Snate@binkert.org             lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL'])
9684479Sbinkertn@umich.edu
9694479Sbinkertn@umich.edu    lexobj.lexstateinfo = stateinfo
9704479Sbinkertn@umich.edu    lexobj.lexre = lexobj.lexstatere["INITIAL"]
9714479Sbinkertn@umich.edu    lexobj.lexretext = lexobj.lexstateretext["INITIAL"]
9724479Sbinkertn@umich.edu
9734479Sbinkertn@umich.edu    # Set up ignore variables
9746498Snate@binkert.org    lexobj.lexstateignore = linfo.ignore
9754479Sbinkertn@umich.edu    lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","")
9764479Sbinkertn@umich.edu
9774479Sbinkertn@umich.edu    # Set up error functions
9786498Snate@binkert.org    lexobj.lexstateerrorf = linfo.errorf
9796498Snate@binkert.org    lexobj.lexerrorf = linfo.errorf.get("INITIAL",None)
9806498Snate@binkert.org    if not lexobj.lexerrorf:
9816498Snate@binkert.org        errorlog.warning("No t_error rule is defined")
9824479Sbinkertn@umich.edu
9834479Sbinkertn@umich.edu    # Check state information for ignore and error rules
9844479Sbinkertn@umich.edu    for s,stype in stateinfo.items():
9854479Sbinkertn@umich.edu        if stype == 'exclusive':
9866498Snate@binkert.org              if not s in linfo.errorf:
9876498Snate@binkert.org                   errorlog.warning("No error rule is defined for exclusive state '%s'", s)
9886498Snate@binkert.org              if not s in linfo.ignore and lexobj.lexignore:
9896498Snate@binkert.org                   errorlog.warning("No ignore rule is defined for exclusive state '%s'", s)
9904479Sbinkertn@umich.edu        elif stype == 'inclusive':
9916498Snate@binkert.org              if not s in linfo.errorf:
9926498Snate@binkert.org                   linfo.errorf[s] = linfo.errorf.get("INITIAL",None)
9936498Snate@binkert.org              if not s in linfo.ignore:
9946498Snate@binkert.org                   linfo.ignore[s] = linfo.ignore.get("INITIAL","")
9952632SN/A
9962632SN/A    # Create global versions of the token() and input() functions
9974479Sbinkertn@umich.edu    token = lexobj.token
9984479Sbinkertn@umich.edu    input = lexobj.input
9994479Sbinkertn@umich.edu    lexer = lexobj
10002632SN/A
10014479Sbinkertn@umich.edu    # If in optimize mode, we write the lextab
10024479Sbinkertn@umich.edu    if lextab and optimize:
10036498Snate@binkert.org        lexobj.writetab(lextab,outputdir)
10044479Sbinkertn@umich.edu
10054479Sbinkertn@umich.edu    return lexobj
10062632SN/A
10072632SN/A# -----------------------------------------------------------------------------
10084479Sbinkertn@umich.edu# runmain()
10092632SN/A#
10102632SN/A# This runs the lexer as a main program
10112632SN/A# -----------------------------------------------------------------------------
10122632SN/A
10132632SN/Adef runmain(lexer=None,data=None):
10142632SN/A    if not data:
10152632SN/A        try:
10162632SN/A            filename = sys.argv[1]
10172632SN/A            f = open(filename)
10182632SN/A            data = f.read()
10192632SN/A            f.close()
10202632SN/A        except IndexError:
10216498Snate@binkert.org            sys.stdout.write("Reading from standard input (type EOF to end):\n")
10222632SN/A            data = sys.stdin.read()
10232632SN/A
10242632SN/A    if lexer:
10252632SN/A        _input = lexer.input
10262632SN/A    else:
10272632SN/A        _input = input
10282632SN/A    _input(data)
10292632SN/A    if lexer:
10302632SN/A        _token = lexer.token
10312632SN/A    else:
10322632SN/A        _token = token
10332632SN/A
10342632SN/A    while 1:
10352632SN/A        tok = _token()
10362632SN/A        if not tok: break
10376498Snate@binkert.org        sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,tok.lexpos))
10382632SN/A
10394479Sbinkertn@umich.edu# -----------------------------------------------------------------------------
10404479Sbinkertn@umich.edu# @TOKEN(regex)
10414479Sbinkertn@umich.edu#
10424479Sbinkertn@umich.edu# This decorator function can be used to set the regex expression on a function
10434479Sbinkertn@umich.edu# when its docstring might need to be set in an alternative way
10444479Sbinkertn@umich.edu# -----------------------------------------------------------------------------
10452632SN/A
10464479Sbinkertn@umich.edudef TOKEN(r):
10474479Sbinkertn@umich.edu    def set_doc(f):
10486498Snate@binkert.org        if hasattr(r,"__call__"):
10496498Snate@binkert.org            f.__doc__ = r.__doc__
10506498Snate@binkert.org        else:
10516498Snate@binkert.org            f.__doc__ = r
10524479Sbinkertn@umich.edu        return f
10534479Sbinkertn@umich.edu    return set_doc
10542632SN/A
10554479Sbinkertn@umich.edu# Alternative spelling of the TOKEN decorator
10564479Sbinkertn@umich.eduToken = TOKEN
10574479Sbinkertn@umich.edu
1058