16498Snate@binkert.org# ----------------------------------------------------------------------------- 22632SN/A# ply: lex.py 32632SN/A# 46498Snate@binkert.org# Copyright (C) 2001-2009, 56498Snate@binkert.org# David M. Beazley (Dabeaz LLC) 66498Snate@binkert.org# All rights reserved. 72632SN/A# 86498Snate@binkert.org# Redistribution and use in source and binary forms, with or without 96498Snate@binkert.org# modification, are permitted provided that the following conditions are 106498Snate@binkert.org# met: 116498Snate@binkert.org# 126498Snate@binkert.org# * Redistributions of source code must retain the above copyright notice, 136498Snate@binkert.org# this list of conditions and the following disclaimer. 146498Snate@binkert.org# * Redistributions in binary form must reproduce the above copyright notice, 156498Snate@binkert.org# this list of conditions and the following disclaimer in the documentation 166498Snate@binkert.org# and/or other materials provided with the distribution. 176498Snate@binkert.org# * Neither the name of the David Beazley or Dabeaz LLC may be used to 186498Snate@binkert.org# endorse or promote products derived from this software without 196498Snate@binkert.org# specific prior written permission. 202632SN/A# 216498Snate@binkert.org# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 226498Snate@binkert.org# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 236498Snate@binkert.org# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 246498Snate@binkert.org# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 256498Snate@binkert.org# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 266498Snate@binkert.org# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 276498Snate@binkert.org# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 286498Snate@binkert.org# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 296498Snate@binkert.org# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 306498Snate@binkert.org# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 316498Snate@binkert.org# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 326498Snate@binkert.org# ----------------------------------------------------------------------------- 332632SN/A 346498Snate@binkert.org__version__ = "3.2" 356498Snate@binkert.org__tabversion__ = "3.2" # Version of table file used 362632SN/A 376498Snate@binkert.orgimport re, sys, types, copy, os 382632SN/A 396498Snate@binkert.org# This tuple contains known string types 406498Snate@binkert.orgtry: 416498Snate@binkert.org # Python 2.6 426498Snate@binkert.org StringTypes = (types.StringType, types.UnicodeType) 436498Snate@binkert.orgexcept AttributeError: 446498Snate@binkert.org # Python 3.0 456498Snate@binkert.org StringTypes = (str, bytes) 466498Snate@binkert.org 476498Snate@binkert.org# Extract the code attribute of a function. Different implementations 486498Snate@binkert.org# are for Python 2/3 compatibility. 496498Snate@binkert.org 506498Snate@binkert.orgif sys.version_info[0] < 3: 516498Snate@binkert.org def func_code(f): 526498Snate@binkert.org return f.func_code 536498Snate@binkert.orgelse: 546498Snate@binkert.org def func_code(f): 556498Snate@binkert.org return f.__code__ 566498Snate@binkert.org 576498Snate@binkert.org# This regular expression is used to match valid token names 584479Sbinkertn@umich.edu_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') 592632SN/A 604479Sbinkertn@umich.edu# Exception thrown when invalid token encountered and no default error 614479Sbinkertn@umich.edu# handler is defined. 626498Snate@binkert.org 632632SN/Aclass LexError(Exception): 642632SN/A def __init__(self,message,s): 652632SN/A self.args = (message,) 662632SN/A self.text = s 672632SN/A 686498Snate@binkert.org# Token class. This class is used to represent the tokens produced. 694479Sbinkertn@umich.educlass LexToken(object): 702632SN/A def __str__(self): 714479Sbinkertn@umich.edu return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos) 722632SN/A def __repr__(self): 732632SN/A return str(self) 746498Snate@binkert.org 756498Snate@binkert.org# This object is a stand-in for a logging object created by the 766498Snate@binkert.org# logging module. 776498Snate@binkert.org 786498Snate@binkert.orgclass PlyLogger(object): 796498Snate@binkert.org def __init__(self,f): 806498Snate@binkert.org self.f = f 816498Snate@binkert.org def critical(self,msg,*args,**kwargs): 826498Snate@binkert.org self.f.write((msg % args) + "\n") 836498Snate@binkert.org 846498Snate@binkert.org def warning(self,msg,*args,**kwargs): 856498Snate@binkert.org self.f.write("WARNING: "+ (msg % args) + "\n") 866498Snate@binkert.org 876498Snate@binkert.org def error(self,msg,*args,**kwargs): 886498Snate@binkert.org self.f.write("ERROR: " + (msg % args) + "\n") 896498Snate@binkert.org 906498Snate@binkert.org info = critical 916498Snate@binkert.org debug = critical 926498Snate@binkert.org 936498Snate@binkert.org# Null logger is used when no output is generated. Does nothing. 946498Snate@binkert.orgclass NullLogger(object): 956498Snate@binkert.org def __getattribute__(self,name): 966498Snate@binkert.org return self 976498Snate@binkert.org def __call__(self,*args,**kwargs): 986498Snate@binkert.org return self 992632SN/A 1002632SN/A# ----------------------------------------------------------------------------- 1016498Snate@binkert.org# === Lexing Engine === 1022632SN/A# 1036498Snate@binkert.org# The following Lexer class implements the lexer runtime. There are only 1046498Snate@binkert.org# a few public methods and attributes: 1054479Sbinkertn@umich.edu# 1062632SN/A# input() - Store a new string in the lexer 1072632SN/A# token() - Get the next token 1086498Snate@binkert.org# clone() - Clone the lexer 1096498Snate@binkert.org# 1106498Snate@binkert.org# lineno - Current line number 1116498Snate@binkert.org# lexpos - Current position in the input string 1122632SN/A# ----------------------------------------------------------------------------- 1132632SN/A 1142632SN/Aclass Lexer: 1152632SN/A def __init__(self): 1164479Sbinkertn@umich.edu self.lexre = None # Master regular expression. This is a list of 1174479Sbinkertn@umich.edu # tuples (re,findex) where re is a compiled 1184479Sbinkertn@umich.edu # regular expression and findex is a list 1194479Sbinkertn@umich.edu # mapping regex group numbers to rules 1204479Sbinkertn@umich.edu self.lexretext = None # Current regular expression strings 1214479Sbinkertn@umich.edu self.lexstatere = {} # Dictionary mapping lexer states to master regexs 1224479Sbinkertn@umich.edu self.lexstateretext = {} # Dictionary mapping lexer states to regex strings 1236498Snate@binkert.org self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names 1244479Sbinkertn@umich.edu self.lexstate = "INITIAL" # Current lexer state 1254479Sbinkertn@umich.edu self.lexstatestack = [] # Stack of lexer states 1264479Sbinkertn@umich.edu self.lexstateinfo = None # State information 1274479Sbinkertn@umich.edu self.lexstateignore = {} # Dictionary of ignored characters for each state 1284479Sbinkertn@umich.edu self.lexstateerrorf = {} # Dictionary of error functions for each state 1294479Sbinkertn@umich.edu self.lexreflags = 0 # Optional re compile flags 1304479Sbinkertn@umich.edu self.lexdata = None # Actual input data (as a string) 1314479Sbinkertn@umich.edu self.lexpos = 0 # Current position in input text 1324479Sbinkertn@umich.edu self.lexlen = 0 # Length of the input text 1334479Sbinkertn@umich.edu self.lexerrorf = None # Error rule (if any) 1344479Sbinkertn@umich.edu self.lextokens = None # List of valid tokens 1354479Sbinkertn@umich.edu self.lexignore = "" # Ignored characters 1364479Sbinkertn@umich.edu self.lexliterals = "" # Literal characters that can be passed through 1374479Sbinkertn@umich.edu self.lexmodule = None # Module 1384479Sbinkertn@umich.edu self.lineno = 1 # Current line number 1394479Sbinkertn@umich.edu self.lexoptimize = 0 # Optimized mode 1402632SN/A 1414479Sbinkertn@umich.edu def clone(self,object=None): 1426498Snate@binkert.org c = copy.copy(self) 1434479Sbinkertn@umich.edu 1444479Sbinkertn@umich.edu # If the object parameter has been supplied, it means we are attaching the 1454479Sbinkertn@umich.edu # lexer to a new object. In this case, we have to rebind all methods in 1464479Sbinkertn@umich.edu # the lexstatere and lexstateerrorf tables. 1474479Sbinkertn@umich.edu 1484479Sbinkertn@umich.edu if object: 1494479Sbinkertn@umich.edu newtab = { } 1504479Sbinkertn@umich.edu for key, ritem in self.lexstatere.items(): 1514479Sbinkertn@umich.edu newre = [] 1524479Sbinkertn@umich.edu for cre, findex in ritem: 1534479Sbinkertn@umich.edu newfindex = [] 1544479Sbinkertn@umich.edu for f in findex: 1554479Sbinkertn@umich.edu if not f or not f[0]: 1564479Sbinkertn@umich.edu newfindex.append(f) 1574479Sbinkertn@umich.edu continue 1584479Sbinkertn@umich.edu newfindex.append((getattr(object,f[0].__name__),f[1])) 1594479Sbinkertn@umich.edu newre.append((cre,newfindex)) 1604479Sbinkertn@umich.edu newtab[key] = newre 1614479Sbinkertn@umich.edu c.lexstatere = newtab 1624479Sbinkertn@umich.edu c.lexstateerrorf = { } 1634479Sbinkertn@umich.edu for key, ef in self.lexstateerrorf.items(): 1644479Sbinkertn@umich.edu c.lexstateerrorf[key] = getattr(object,ef.__name__) 1654479Sbinkertn@umich.edu c.lexmodule = object 1664479Sbinkertn@umich.edu return c 1674479Sbinkertn@umich.edu 1684479Sbinkertn@umich.edu # ------------------------------------------------------------ 1694479Sbinkertn@umich.edu # writetab() - Write lexer information to a table file 1704479Sbinkertn@umich.edu # ------------------------------------------------------------ 1716498Snate@binkert.org def writetab(self,tabfile,outputdir=""): 1726498Snate@binkert.org if isinstance(tabfile,types.ModuleType): 1736498Snate@binkert.org return 1746498Snate@binkert.org basetabfilename = tabfile.split(".")[-1] 1756498Snate@binkert.org filename = os.path.join(outputdir,basetabfilename)+".py" 1766498Snate@binkert.org tf = open(filename,"w") 1774479Sbinkertn@umich.edu tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__)) 1786498Snate@binkert.org tf.write("_tabversion = %s\n" % repr(__version__)) 1794479Sbinkertn@umich.edu tf.write("_lextokens = %s\n" % repr(self.lextokens)) 1804479Sbinkertn@umich.edu tf.write("_lexreflags = %s\n" % repr(self.lexreflags)) 1814479Sbinkertn@umich.edu tf.write("_lexliterals = %s\n" % repr(self.lexliterals)) 1824479Sbinkertn@umich.edu tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo)) 1834479Sbinkertn@umich.edu 1844479Sbinkertn@umich.edu tabre = { } 1856498Snate@binkert.org # Collect all functions in the initial state 1866498Snate@binkert.org initial = self.lexstatere["INITIAL"] 1876498Snate@binkert.org initialfuncs = [] 1886498Snate@binkert.org for part in initial: 1896498Snate@binkert.org for f in part[1]: 1906498Snate@binkert.org if f and f[0]: 1916498Snate@binkert.org initialfuncs.append(f) 1926498Snate@binkert.org 1934479Sbinkertn@umich.edu for key, lre in self.lexstatere.items(): 1944479Sbinkertn@umich.edu titem = [] 1954479Sbinkertn@umich.edu for i in range(len(lre)): 1966498Snate@binkert.org titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1],self.lexstaterenames[key][i]))) 1974479Sbinkertn@umich.edu tabre[key] = titem 1984479Sbinkertn@umich.edu 1994479Sbinkertn@umich.edu tf.write("_lexstatere = %s\n" % repr(tabre)) 2004479Sbinkertn@umich.edu tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore)) 2014479Sbinkertn@umich.edu 2024479Sbinkertn@umich.edu taberr = { } 2034479Sbinkertn@umich.edu for key, ef in self.lexstateerrorf.items(): 2044479Sbinkertn@umich.edu if ef: 2054479Sbinkertn@umich.edu taberr[key] = ef.__name__ 2064479Sbinkertn@umich.edu else: 2074479Sbinkertn@umich.edu taberr[key] = None 2084479Sbinkertn@umich.edu tf.write("_lexstateerrorf = %s\n" % repr(taberr)) 2094479Sbinkertn@umich.edu tf.close() 2104479Sbinkertn@umich.edu 2114479Sbinkertn@umich.edu # ------------------------------------------------------------ 2124479Sbinkertn@umich.edu # readtab() - Read lexer information from a tab file 2134479Sbinkertn@umich.edu # ------------------------------------------------------------ 2144479Sbinkertn@umich.edu def readtab(self,tabfile,fdict): 2156498Snate@binkert.org if isinstance(tabfile,types.ModuleType): 2166498Snate@binkert.org lextab = tabfile 2176498Snate@binkert.org else: 2186498Snate@binkert.org if sys.version_info[0] < 3: 2196498Snate@binkert.org exec("import %s as lextab" % tabfile) 2206498Snate@binkert.org else: 2216498Snate@binkert.org env = { } 2226498Snate@binkert.org exec("import %s as lextab" % tabfile, env,env) 2236498Snate@binkert.org lextab = env['lextab'] 2246498Snate@binkert.org 2256498Snate@binkert.org if getattr(lextab,"_tabversion","0.0") != __version__: 2266498Snate@binkert.org raise ImportError("Inconsistent PLY version") 2276498Snate@binkert.org 2284479Sbinkertn@umich.edu self.lextokens = lextab._lextokens 2294479Sbinkertn@umich.edu self.lexreflags = lextab._lexreflags 2304479Sbinkertn@umich.edu self.lexliterals = lextab._lexliterals 2314479Sbinkertn@umich.edu self.lexstateinfo = lextab._lexstateinfo 2324479Sbinkertn@umich.edu self.lexstateignore = lextab._lexstateignore 2334479Sbinkertn@umich.edu self.lexstatere = { } 2344479Sbinkertn@umich.edu self.lexstateretext = { } 2354479Sbinkertn@umich.edu for key,lre in lextab._lexstatere.items(): 2364479Sbinkertn@umich.edu titem = [] 2374479Sbinkertn@umich.edu txtitem = [] 2384479Sbinkertn@umich.edu for i in range(len(lre)): 2394479Sbinkertn@umich.edu titem.append((re.compile(lre[i][0],lextab._lexreflags),_names_to_funcs(lre[i][1],fdict))) 2404479Sbinkertn@umich.edu txtitem.append(lre[i][0]) 2414479Sbinkertn@umich.edu self.lexstatere[key] = titem 2424479Sbinkertn@umich.edu self.lexstateretext[key] = txtitem 2434479Sbinkertn@umich.edu self.lexstateerrorf = { } 2444479Sbinkertn@umich.edu for key,ef in lextab._lexstateerrorf.items(): 2454479Sbinkertn@umich.edu self.lexstateerrorf[key] = fdict[ef] 2464479Sbinkertn@umich.edu self.begin('INITIAL') 2472632SN/A 2482632SN/A # ------------------------------------------------------------ 2492632SN/A # input() - Push a new string into the lexer 2502632SN/A # ------------------------------------------------------------ 2512632SN/A def input(self,s): 2526498Snate@binkert.org # Pull off the first character to see if s looks like a string 2536498Snate@binkert.org c = s[:1] 2546498Snate@binkert.org if not isinstance(c,StringTypes): 2556498Snate@binkert.org raise ValueError("Expected a string") 2562632SN/A self.lexdata = s 2572632SN/A self.lexpos = 0 2582632SN/A self.lexlen = len(s) 2592632SN/A 2602632SN/A # ------------------------------------------------------------ 2614479Sbinkertn@umich.edu # begin() - Changes the lexing state 2622632SN/A # ------------------------------------------------------------ 2634479Sbinkertn@umich.edu def begin(self,state): 2646498Snate@binkert.org if not state in self.lexstatere: 2656498Snate@binkert.org raise ValueError("Undefined state") 2664479Sbinkertn@umich.edu self.lexre = self.lexstatere[state] 2674479Sbinkertn@umich.edu self.lexretext = self.lexstateretext[state] 2684479Sbinkertn@umich.edu self.lexignore = self.lexstateignore.get(state,"") 2694479Sbinkertn@umich.edu self.lexerrorf = self.lexstateerrorf.get(state,None) 2704479Sbinkertn@umich.edu self.lexstate = state 2714479Sbinkertn@umich.edu 2724479Sbinkertn@umich.edu # ------------------------------------------------------------ 2734479Sbinkertn@umich.edu # push_state() - Changes the lexing state and saves old on stack 2744479Sbinkertn@umich.edu # ------------------------------------------------------------ 2754479Sbinkertn@umich.edu def push_state(self,state): 2764479Sbinkertn@umich.edu self.lexstatestack.append(self.lexstate) 2774479Sbinkertn@umich.edu self.begin(state) 2784479Sbinkertn@umich.edu 2794479Sbinkertn@umich.edu # ------------------------------------------------------------ 2804479Sbinkertn@umich.edu # pop_state() - Restores the previous state 2814479Sbinkertn@umich.edu # ------------------------------------------------------------ 2824479Sbinkertn@umich.edu def pop_state(self): 2834479Sbinkertn@umich.edu self.begin(self.lexstatestack.pop()) 2844479Sbinkertn@umich.edu 2854479Sbinkertn@umich.edu # ------------------------------------------------------------ 2864479Sbinkertn@umich.edu # current_state() - Returns the current lexing state 2874479Sbinkertn@umich.edu # ------------------------------------------------------------ 2884479Sbinkertn@umich.edu def current_state(self): 2894479Sbinkertn@umich.edu return self.lexstate 2904479Sbinkertn@umich.edu 2914479Sbinkertn@umich.edu # ------------------------------------------------------------ 2924479Sbinkertn@umich.edu # skip() - Skip ahead n characters 2934479Sbinkertn@umich.edu # ------------------------------------------------------------ 2944479Sbinkertn@umich.edu def skip(self,n): 2954479Sbinkertn@umich.edu self.lexpos += n 2962632SN/A 2972632SN/A # ------------------------------------------------------------ 2986498Snate@binkert.org # opttoken() - Return the next token from the Lexer 2992632SN/A # 3002632SN/A # Note: This function has been carefully implemented to be as fast 3012632SN/A # as possible. Don't make changes unless you really know what 3022632SN/A # you are doing 3032632SN/A # ------------------------------------------------------------ 3044479Sbinkertn@umich.edu def token(self): 3052632SN/A # Make local copies of frequently referenced attributes 3062632SN/A lexpos = self.lexpos 3072632SN/A lexlen = self.lexlen 3082632SN/A lexignore = self.lexignore 3092632SN/A lexdata = self.lexdata 3102632SN/A 3112632SN/A while lexpos < lexlen: 3122632SN/A # This code provides some short-circuit code for whitespace, tabs, and other ignored characters 3132632SN/A if lexdata[lexpos] in lexignore: 3142632SN/A lexpos += 1 3152632SN/A continue 3162632SN/A 3172632SN/A # Look for a regular expression match 3184479Sbinkertn@umich.edu for lexre,lexindexfunc in self.lexre: 3194479Sbinkertn@umich.edu m = lexre.match(lexdata,lexpos) 3204479Sbinkertn@umich.edu if not m: continue 3214479Sbinkertn@umich.edu 3224479Sbinkertn@umich.edu # Create a token for return 3232632SN/A tok = LexToken() 3242632SN/A tok.value = m.group() 3252632SN/A tok.lineno = self.lineno 3264479Sbinkertn@umich.edu tok.lexpos = lexpos 3274479Sbinkertn@umich.edu 3284479Sbinkertn@umich.edu i = m.lastindex 3294479Sbinkertn@umich.edu func,tok.type = lexindexfunc[i] 3304479Sbinkertn@umich.edu 3312632SN/A if not func: 3324479Sbinkertn@umich.edu # If no token type was set, it's an ignored token 3336498Snate@binkert.org if tok.type: 3346498Snate@binkert.org self.lexpos = m.end() 3356498Snate@binkert.org return tok 3366498Snate@binkert.org else: 3376498Snate@binkert.org lexpos = m.end() 3386498Snate@binkert.org break 3394479Sbinkertn@umich.edu 3406498Snate@binkert.org lexpos = m.end() 3412632SN/A 3422632SN/A # If token is processed by a function, call it 3436498Snate@binkert.org 3446498Snate@binkert.org tok.lexer = self # Set additional attributes useful in token rules 3456498Snate@binkert.org self.lexmatch = m 3466498Snate@binkert.org self.lexpos = lexpos 3476498Snate@binkert.org 3482632SN/A newtok = func(tok) 3492632SN/A 3502632SN/A # Every function must return a token, if nothing, we just move to next token 3514479Sbinkertn@umich.edu if not newtok: 3526498Snate@binkert.org lexpos = self.lexpos # This is here in case user has updated lexpos. 3536498Snate@binkert.org lexignore = self.lexignore # This is here in case there was a state change 3544479Sbinkertn@umich.edu break 3552632SN/A 3562632SN/A # Verify type of the token. If not in the token map, raise an error 3574479Sbinkertn@umich.edu if not self.lexoptimize: 3586498Snate@binkert.org if not newtok.type in self.lextokens: 3596498Snate@binkert.org raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( 3606498Snate@binkert.org func_code(func).co_filename, func_code(func).co_firstlineno, 3612632SN/A func.__name__, newtok.type),lexdata[lexpos:]) 3622632SN/A 3632632SN/A return newtok 3644479Sbinkertn@umich.edu else: 3654479Sbinkertn@umich.edu # No match, see if in literals 3664479Sbinkertn@umich.edu if lexdata[lexpos] in self.lexliterals: 3674479Sbinkertn@umich.edu tok = LexToken() 3684479Sbinkertn@umich.edu tok.value = lexdata[lexpos] 3694479Sbinkertn@umich.edu tok.lineno = self.lineno 3704479Sbinkertn@umich.edu tok.type = tok.value 3714479Sbinkertn@umich.edu tok.lexpos = lexpos 3724479Sbinkertn@umich.edu self.lexpos = lexpos + 1 3734479Sbinkertn@umich.edu return tok 3742632SN/A 3754479Sbinkertn@umich.edu # No match. Call t_error() if defined. 3764479Sbinkertn@umich.edu if self.lexerrorf: 3774479Sbinkertn@umich.edu tok = LexToken() 3784479Sbinkertn@umich.edu tok.value = self.lexdata[lexpos:] 3794479Sbinkertn@umich.edu tok.lineno = self.lineno 3804479Sbinkertn@umich.edu tok.type = "error" 3814479Sbinkertn@umich.edu tok.lexer = self 3824479Sbinkertn@umich.edu tok.lexpos = lexpos 3832632SN/A self.lexpos = lexpos 3844479Sbinkertn@umich.edu newtok = self.lexerrorf(tok) 3854479Sbinkertn@umich.edu if lexpos == self.lexpos: 3864479Sbinkertn@umich.edu # Error method didn't change text position at all. This is an error. 3876498Snate@binkert.org raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) 3884479Sbinkertn@umich.edu lexpos = self.lexpos 3894479Sbinkertn@umich.edu if not newtok: continue 3904479Sbinkertn@umich.edu return newtok 3914479Sbinkertn@umich.edu 3922632SN/A self.lexpos = lexpos 3936498Snate@binkert.org raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:]) 3942632SN/A 3952632SN/A self.lexpos = lexpos + 1 3964479Sbinkertn@umich.edu if self.lexdata is None: 3976498Snate@binkert.org raise RuntimeError("No input string given with input()") 3982632SN/A return None 3992632SN/A 4006498Snate@binkert.org # Iterator interface 4016498Snate@binkert.org def __iter__(self): 4026498Snate@binkert.org return self 4036498Snate@binkert.org 4046498Snate@binkert.org def next(self): 4056498Snate@binkert.org t = self.token() 4066498Snate@binkert.org if t is None: 4076498Snate@binkert.org raise StopIteration 4086498Snate@binkert.org return t 4096498Snate@binkert.org 4106498Snate@binkert.org __next__ = next 4116498Snate@binkert.org 4122632SN/A# ----------------------------------------------------------------------------- 4136498Snate@binkert.org# ==== Lex Builder === 4142632SN/A# 4156498Snate@binkert.org# The functions and classes below are used to collect lexing information 4166498Snate@binkert.org# and build a Lexer object from it. 4172632SN/A# ----------------------------------------------------------------------------- 4182632SN/A 4196498Snate@binkert.org# ----------------------------------------------------------------------------- 4206498Snate@binkert.org# get_caller_module_dict() 4216498Snate@binkert.org# 4226498Snate@binkert.org# This function returns a dictionary containing all of the symbols defined within 4236498Snate@binkert.org# a caller further down the call stack. This is used to get the environment 4246498Snate@binkert.org# associated with the yacc() call if none was provided. 4256498Snate@binkert.org# ----------------------------------------------------------------------------- 4262632SN/A 4276498Snate@binkert.orgdef get_caller_module_dict(levels): 4282632SN/A try: 4296498Snate@binkert.org raise RuntimeError 4306498Snate@binkert.org except RuntimeError: 4316498Snate@binkert.org e,b,t = sys.exc_info() 4326498Snate@binkert.org f = t.tb_frame 4336498Snate@binkert.org while levels > 0: 4346498Snate@binkert.org f = f.f_back 4356498Snate@binkert.org levels -= 1 4366498Snate@binkert.org ldict = f.f_globals.copy() 4376498Snate@binkert.org if f.f_globals != f.f_locals: 4386498Snate@binkert.org ldict.update(f.f_locals) 4392632SN/A 4406498Snate@binkert.org return ldict 4412632SN/A 4422632SN/A# ----------------------------------------------------------------------------- 4434479Sbinkertn@umich.edu# _funcs_to_names() 4442632SN/A# 4454479Sbinkertn@umich.edu# Given a list of regular expression functions, this converts it to a list 4464479Sbinkertn@umich.edu# suitable for output to a table file 4472632SN/A# ----------------------------------------------------------------------------- 4482632SN/A 4496498Snate@binkert.orgdef _funcs_to_names(funclist,namelist): 4504479Sbinkertn@umich.edu result = [] 4516498Snate@binkert.org for f,name in zip(funclist,namelist): 4524479Sbinkertn@umich.edu if f and f[0]: 4536498Snate@binkert.org result.append((name, f[1])) 4544479Sbinkertn@umich.edu else: 4554479Sbinkertn@umich.edu result.append(f) 4564479Sbinkertn@umich.edu return result 4574479Sbinkertn@umich.edu 4584479Sbinkertn@umich.edu# ----------------------------------------------------------------------------- 4594479Sbinkertn@umich.edu# _names_to_funcs() 4604479Sbinkertn@umich.edu# 4614479Sbinkertn@umich.edu# Given a list of regular expression function names, this converts it back to 4624479Sbinkertn@umich.edu# functions. 4634479Sbinkertn@umich.edu# ----------------------------------------------------------------------------- 4644479Sbinkertn@umich.edu 4654479Sbinkertn@umich.edudef _names_to_funcs(namelist,fdict): 4664479Sbinkertn@umich.edu result = [] 4674479Sbinkertn@umich.edu for n in namelist: 4684479Sbinkertn@umich.edu if n and n[0]: 4694479Sbinkertn@umich.edu result.append((fdict[n[0]],n[1])) 4704479Sbinkertn@umich.edu else: 4714479Sbinkertn@umich.edu result.append(n) 4724479Sbinkertn@umich.edu return result 4734479Sbinkertn@umich.edu 4744479Sbinkertn@umich.edu# ----------------------------------------------------------------------------- 4754479Sbinkertn@umich.edu# _form_master_re() 4764479Sbinkertn@umich.edu# 4774479Sbinkertn@umich.edu# This function takes a list of all of the regex components and attempts to 4784479Sbinkertn@umich.edu# form the master regular expression. Given limitations in the Python re 4794479Sbinkertn@umich.edu# module, it may be necessary to break the master regex into separate expressions. 4804479Sbinkertn@umich.edu# ----------------------------------------------------------------------------- 4814479Sbinkertn@umich.edu 4824479Sbinkertn@umich.edudef _form_master_re(relist,reflags,ldict,toknames): 4834479Sbinkertn@umich.edu if not relist: return [] 4844479Sbinkertn@umich.edu regex = "|".join(relist) 4854479Sbinkertn@umich.edu try: 4864479Sbinkertn@umich.edu lexre = re.compile(regex,re.VERBOSE | reflags) 4874479Sbinkertn@umich.edu 4884479Sbinkertn@umich.edu # Build the index to function map for the matching engine 4894479Sbinkertn@umich.edu lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1) 4906498Snate@binkert.org lexindexnames = lexindexfunc[:] 4916498Snate@binkert.org 4924479Sbinkertn@umich.edu for f,i in lexre.groupindex.items(): 4934479Sbinkertn@umich.edu handle = ldict.get(f,None) 4944479Sbinkertn@umich.edu if type(handle) in (types.FunctionType, types.MethodType): 4956498Snate@binkert.org lexindexfunc[i] = (handle,toknames[f]) 4966498Snate@binkert.org lexindexnames[i] = f 4974479Sbinkertn@umich.edu elif handle is not None: 4986498Snate@binkert.org lexindexnames[i] = f 4994479Sbinkertn@umich.edu if f.find("ignore_") > 0: 5004479Sbinkertn@umich.edu lexindexfunc[i] = (None,None) 5014479Sbinkertn@umich.edu else: 5024479Sbinkertn@umich.edu lexindexfunc[i] = (None, toknames[f]) 5036498Snate@binkert.org 5046498Snate@binkert.org return [(lexre,lexindexfunc)],[regex],[lexindexnames] 5056498Snate@binkert.org except Exception: 5064479Sbinkertn@umich.edu m = int(len(relist)/2) 5074479Sbinkertn@umich.edu if m == 0: m = 1 5086498Snate@binkert.org llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames) 5096498Snate@binkert.org rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames) 5106498Snate@binkert.org return llist+rlist, lre+rre, lnames+rnames 5114479Sbinkertn@umich.edu 5124479Sbinkertn@umich.edu# ----------------------------------------------------------------------------- 5134479Sbinkertn@umich.edu# def _statetoken(s,names) 5144479Sbinkertn@umich.edu# 5154479Sbinkertn@umich.edu# Given a declaration name s of the form "t_" and a dictionary whose keys are 5164479Sbinkertn@umich.edu# state names, this function returns a tuple (states,tokenname) where states 5174479Sbinkertn@umich.edu# is a tuple of state names and tokenname is the name of the token. For example, 5184479Sbinkertn@umich.edu# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') 5194479Sbinkertn@umich.edu# ----------------------------------------------------------------------------- 5204479Sbinkertn@umich.edu 5214479Sbinkertn@umich.edudef _statetoken(s,names): 5224479Sbinkertn@umich.edu nonstate = 1 5234479Sbinkertn@umich.edu parts = s.split("_") 5244479Sbinkertn@umich.edu for i in range(1,len(parts)): 5256498Snate@binkert.org if not parts[i] in names and parts[i] != 'ANY': break 5264479Sbinkertn@umich.edu if i > 1: 5274479Sbinkertn@umich.edu states = tuple(parts[1:i]) 5284479Sbinkertn@umich.edu else: 5294479Sbinkertn@umich.edu states = ('INITIAL',) 5304479Sbinkertn@umich.edu 5314479Sbinkertn@umich.edu if 'ANY' in states: 5326498Snate@binkert.org states = tuple(names) 5334479Sbinkertn@umich.edu 5344479Sbinkertn@umich.edu tokenname = "_".join(parts[i:]) 5354479Sbinkertn@umich.edu return (states,tokenname) 5362632SN/A 5376498Snate@binkert.org 5386498Snate@binkert.org# ----------------------------------------------------------------------------- 5396498Snate@binkert.org# LexerReflect() 5406498Snate@binkert.org# 5416498Snate@binkert.org# This class represents information needed to build a lexer as extracted from a 5426498Snate@binkert.org# user's input file. 5436498Snate@binkert.org# ----------------------------------------------------------------------------- 5446498Snate@binkert.orgclass LexerReflect(object): 5456498Snate@binkert.org def __init__(self,ldict,log=None,reflags=0): 5466498Snate@binkert.org self.ldict = ldict 5476498Snate@binkert.org self.error_func = None 5486498Snate@binkert.org self.tokens = [] 5496498Snate@binkert.org self.reflags = reflags 5506498Snate@binkert.org self.stateinfo = { 'INITIAL' : 'inclusive'} 5516498Snate@binkert.org self.files = {} 5526498Snate@binkert.org self.error = 0 5536498Snate@binkert.org 5546498Snate@binkert.org if log is None: 5556498Snate@binkert.org self.log = PlyLogger(sys.stderr) 5566498Snate@binkert.org else: 5576498Snate@binkert.org self.log = log 5586498Snate@binkert.org 5596498Snate@binkert.org # Get all of the basic information 5606498Snate@binkert.org def get_all(self): 5616498Snate@binkert.org self.get_tokens() 5626498Snate@binkert.org self.get_literals() 5636498Snate@binkert.org self.get_states() 5646498Snate@binkert.org self.get_rules() 5656498Snate@binkert.org 5666498Snate@binkert.org # Validate all of the information 5676498Snate@binkert.org def validate_all(self): 5686498Snate@binkert.org self.validate_tokens() 5696498Snate@binkert.org self.validate_literals() 5706498Snate@binkert.org self.validate_rules() 5716498Snate@binkert.org return self.error 5726498Snate@binkert.org 5736498Snate@binkert.org # Get the tokens map 5746498Snate@binkert.org def get_tokens(self): 5756498Snate@binkert.org tokens = self.ldict.get("tokens",None) 5766498Snate@binkert.org if not tokens: 5776498Snate@binkert.org self.log.error("No token list is defined") 5786498Snate@binkert.org self.error = 1 5796498Snate@binkert.org return 5806498Snate@binkert.org 5816498Snate@binkert.org if not isinstance(tokens,(list, tuple)): 5826498Snate@binkert.org self.log.error("tokens must be a list or tuple") 5836498Snate@binkert.org self.error = 1 5846498Snate@binkert.org return 5856498Snate@binkert.org 5866498Snate@binkert.org if not tokens: 5876498Snate@binkert.org self.log.error("tokens is empty") 5886498Snate@binkert.org self.error = 1 5896498Snate@binkert.org return 5906498Snate@binkert.org 5916498Snate@binkert.org self.tokens = tokens 5926498Snate@binkert.org 5936498Snate@binkert.org # Validate the tokens 5946498Snate@binkert.org def validate_tokens(self): 5956498Snate@binkert.org terminals = {} 5966498Snate@binkert.org for n in self.tokens: 5976498Snate@binkert.org if not _is_identifier.match(n): 5986498Snate@binkert.org self.log.error("Bad token name '%s'",n) 5996498Snate@binkert.org self.error = 1 6006498Snate@binkert.org if n in terminals: 6016498Snate@binkert.org self.log.warning("Token '%s' multiply defined", n) 6026498Snate@binkert.org terminals[n] = 1 6036498Snate@binkert.org 6046498Snate@binkert.org # Get the literals specifier 6056498Snate@binkert.org def get_literals(self): 6066498Snate@binkert.org self.literals = self.ldict.get("literals","") 6076498Snate@binkert.org 6086498Snate@binkert.org # Validate literals 6096498Snate@binkert.org def validate_literals(self): 6106498Snate@binkert.org try: 6116498Snate@binkert.org for c in self.literals: 6126498Snate@binkert.org if not isinstance(c,StringTypes) or len(c) > 1: 6136498Snate@binkert.org self.log.error("Invalid literal %s. Must be a single character", repr(c)) 6146498Snate@binkert.org self.error = 1 6156498Snate@binkert.org continue 6166498Snate@binkert.org 6176498Snate@binkert.org except TypeError: 6186498Snate@binkert.org self.log.error("Invalid literals specification. literals must be a sequence of characters") 6196498Snate@binkert.org self.error = 1 6206498Snate@binkert.org 6216498Snate@binkert.org def get_states(self): 6226498Snate@binkert.org self.states = self.ldict.get("states",None) 6236498Snate@binkert.org # Build statemap 6246498Snate@binkert.org if self.states: 6256498Snate@binkert.org if not isinstance(self.states,(tuple,list)): 6266498Snate@binkert.org self.log.error("states must be defined as a tuple or list") 6276498Snate@binkert.org self.error = 1 6286498Snate@binkert.org else: 6296498Snate@binkert.org for s in self.states: 6306498Snate@binkert.org if not isinstance(s,tuple) or len(s) != 2: 6316498Snate@binkert.org self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')",repr(s)) 6326498Snate@binkert.org self.error = 1 6336498Snate@binkert.org continue 6346498Snate@binkert.org name, statetype = s 6356498Snate@binkert.org if not isinstance(name,StringTypes): 6366498Snate@binkert.org self.log.error("State name %s must be a string", repr(name)) 6376498Snate@binkert.org self.error = 1 6386498Snate@binkert.org continue 6396498Snate@binkert.org if not (statetype == 'inclusive' or statetype == 'exclusive'): 6406498Snate@binkert.org self.log.error("State type for state %s must be 'inclusive' or 'exclusive'",name) 6416498Snate@binkert.org self.error = 1 6426498Snate@binkert.org continue 6436498Snate@binkert.org if name in self.stateinfo: 6446498Snate@binkert.org self.log.error("State '%s' already defined",name) 6456498Snate@binkert.org self.error = 1 6466498Snate@binkert.org continue 6476498Snate@binkert.org self.stateinfo[name] = statetype 6486498Snate@binkert.org 6496498Snate@binkert.org # Get all of the symbols with a t_ prefix and sort them into various 6506498Snate@binkert.org # categories (functions, strings, error functions, and ignore characters) 6516498Snate@binkert.org 6526498Snate@binkert.org def get_rules(self): 6536498Snate@binkert.org tsymbols = [f for f in self.ldict if f[:2] == 't_' ] 6546498Snate@binkert.org 6556498Snate@binkert.org # Now build up a list of functions and a list of strings 6566498Snate@binkert.org 6576498Snate@binkert.org self.toknames = { } # Mapping of symbols to token names 6586498Snate@binkert.org self.funcsym = { } # Symbols defined as functions 6596498Snate@binkert.org self.strsym = { } # Symbols defined as strings 6606498Snate@binkert.org self.ignore = { } # Ignore strings by state 6616498Snate@binkert.org self.errorf = { } # Error functions by state 6626498Snate@binkert.org 6636498Snate@binkert.org for s in self.stateinfo: 6646498Snate@binkert.org self.funcsym[s] = [] 6656498Snate@binkert.org self.strsym[s] = [] 6666498Snate@binkert.org 6676498Snate@binkert.org if len(tsymbols) == 0: 6686498Snate@binkert.org self.log.error("No rules of the form t_rulename are defined") 6696498Snate@binkert.org self.error = 1 6706498Snate@binkert.org return 6716498Snate@binkert.org 6726498Snate@binkert.org for f in tsymbols: 6736498Snate@binkert.org t = self.ldict[f] 6746498Snate@binkert.org states, tokname = _statetoken(f,self.stateinfo) 6756498Snate@binkert.org self.toknames[f] = tokname 6766498Snate@binkert.org 6776498Snate@binkert.org if hasattr(t,"__call__"): 6786498Snate@binkert.org if tokname == 'error': 6796498Snate@binkert.org for s in states: 6806498Snate@binkert.org self.errorf[s] = t 6816498Snate@binkert.org elif tokname == 'ignore': 6826498Snate@binkert.org line = func_code(t).co_firstlineno 6836498Snate@binkert.org file = func_code(t).co_filename 6846498Snate@binkert.org self.log.error("%s:%d: Rule '%s' must be defined as a string",file,line,t.__name__) 6856498Snate@binkert.org self.error = 1 6866498Snate@binkert.org else: 6876498Snate@binkert.org for s in states: 6886498Snate@binkert.org self.funcsym[s].append((f,t)) 6896498Snate@binkert.org elif isinstance(t, StringTypes): 6906498Snate@binkert.org if tokname == 'ignore': 6916498Snate@binkert.org for s in states: 6926498Snate@binkert.org self.ignore[s] = t 6936498Snate@binkert.org if "\\" in t: 6946498Snate@binkert.org self.log.warning("%s contains a literal backslash '\\'",f) 6956498Snate@binkert.org 6966498Snate@binkert.org elif tokname == 'error': 6976498Snate@binkert.org self.log.error("Rule '%s' must be defined as a function", f) 6986498Snate@binkert.org self.error = 1 6996498Snate@binkert.org else: 7006498Snate@binkert.org for s in states: 7016498Snate@binkert.org self.strsym[s].append((f,t)) 7026498Snate@binkert.org else: 7036498Snate@binkert.org self.log.error("%s not defined as a function or string", f) 7046498Snate@binkert.org self.error = 1 7056498Snate@binkert.org 7066498Snate@binkert.org # Sort the functions by line number 7076498Snate@binkert.org for f in self.funcsym.values(): 7086498Snate@binkert.org if sys.version_info[0] < 3: 7096498Snate@binkert.org f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code(y[1]).co_firstlineno)) 7106498Snate@binkert.org else: 7116498Snate@binkert.org # Python 3.0 7126498Snate@binkert.org f.sort(key=lambda x: func_code(x[1]).co_firstlineno) 7136498Snate@binkert.org 7146498Snate@binkert.org # Sort the strings by regular expression length 7156498Snate@binkert.org for s in self.strsym.values(): 7166498Snate@binkert.org if sys.version_info[0] < 3: 7176498Snate@binkert.org s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1]))) 7186498Snate@binkert.org else: 7196498Snate@binkert.org # Python 3.0 7206498Snate@binkert.org s.sort(key=lambda x: len(x[1]),reverse=True) 7216498Snate@binkert.org 7226498Snate@binkert.org # Validate all of the t_rules collected 7236498Snate@binkert.org def validate_rules(self): 7246498Snate@binkert.org for state in self.stateinfo: 7256498Snate@binkert.org # Validate all rules defined by functions 7266498Snate@binkert.org 7276498Snate@binkert.org 7286498Snate@binkert.org 7296498Snate@binkert.org for fname, f in self.funcsym[state]: 7306498Snate@binkert.org line = func_code(f).co_firstlineno 7316498Snate@binkert.org file = func_code(f).co_filename 7326498Snate@binkert.org self.files[file] = 1 7336498Snate@binkert.org 7346498Snate@binkert.org tokname = self.toknames[fname] 7356498Snate@binkert.org if isinstance(f, types.MethodType): 7366498Snate@binkert.org reqargs = 2 7376498Snate@binkert.org else: 7386498Snate@binkert.org reqargs = 1 7396498Snate@binkert.org nargs = func_code(f).co_argcount 7406498Snate@binkert.org if nargs > reqargs: 7416498Snate@binkert.org self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) 7426498Snate@binkert.org self.error = 1 7436498Snate@binkert.org continue 7446498Snate@binkert.org 7456498Snate@binkert.org if nargs < reqargs: 7466498Snate@binkert.org self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) 7476498Snate@binkert.org self.error = 1 7486498Snate@binkert.org continue 7496498Snate@binkert.org 7506498Snate@binkert.org if not f.__doc__: 7516498Snate@binkert.org self.log.error("%s:%d: No regular expression defined for rule '%s'",file,line,f.__name__) 7526498Snate@binkert.org self.error = 1 7536498Snate@binkert.org continue 7546498Snate@binkert.org 7556498Snate@binkert.org try: 7566498Snate@binkert.org c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE | self.reflags) 7576498Snate@binkert.org if c.match(""): 7586498Snate@binkert.org self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__) 7596498Snate@binkert.org self.error = 1 7606498Snate@binkert.org except re.error: 7616498Snate@binkert.org _etype, e, _etrace = sys.exc_info() 7626498Snate@binkert.org self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file,line,f.__name__,e) 7636498Snate@binkert.org if '#' in f.__doc__: 7646498Snate@binkert.org self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'",file,line, f.__name__) 7656498Snate@binkert.org self.error = 1 7666498Snate@binkert.org 7676498Snate@binkert.org # Validate all rules defined by strings 7686498Snate@binkert.org for name,r in self.strsym[state]: 7696498Snate@binkert.org tokname = self.toknames[name] 7706498Snate@binkert.org if tokname == 'error': 7716498Snate@binkert.org self.log.error("Rule '%s' must be defined as a function", name) 7726498Snate@binkert.org self.error = 1 7736498Snate@binkert.org continue 7746498Snate@binkert.org 7756498Snate@binkert.org if not tokname in self.tokens and tokname.find("ignore_") < 0: 7766498Snate@binkert.org self.log.error("Rule '%s' defined for an unspecified token %s",name,tokname) 7776498Snate@binkert.org self.error = 1 7786498Snate@binkert.org continue 7796498Snate@binkert.org 7806498Snate@binkert.org try: 7816498Snate@binkert.org c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | self.reflags) 7826498Snate@binkert.org if (c.match("")): 7836498Snate@binkert.org self.log.error("Regular expression for rule '%s' matches empty string",name) 7846498Snate@binkert.org self.error = 1 7856498Snate@binkert.org except re.error: 7866498Snate@binkert.org _etype, e, _etrace = sys.exc_info() 7876498Snate@binkert.org self.log.error("Invalid regular expression for rule '%s'. %s",name,e) 7886498Snate@binkert.org if '#' in r: 7896498Snate@binkert.org self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'",name) 7906498Snate@binkert.org self.error = 1 7916498Snate@binkert.org 7926498Snate@binkert.org if not self.funcsym[state] and not self.strsym[state]: 7936498Snate@binkert.org self.log.error("No rules defined for state '%s'",state) 7946498Snate@binkert.org self.error = 1 7956498Snate@binkert.org 7966498Snate@binkert.org # Validate the error function 7976498Snate@binkert.org efunc = self.errorf.get(state,None) 7986498Snate@binkert.org if efunc: 7996498Snate@binkert.org f = efunc 8006498Snate@binkert.org line = func_code(f).co_firstlineno 8016498Snate@binkert.org file = func_code(f).co_filename 8026498Snate@binkert.org self.files[file] = 1 8036498Snate@binkert.org 8046498Snate@binkert.org if isinstance(f, types.MethodType): 8056498Snate@binkert.org reqargs = 2 8066498Snate@binkert.org else: 8076498Snate@binkert.org reqargs = 1 8086498Snate@binkert.org nargs = func_code(f).co_argcount 8096498Snate@binkert.org if nargs > reqargs: 8106498Snate@binkert.org self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) 8116498Snate@binkert.org self.error = 1 8126498Snate@binkert.org 8136498Snate@binkert.org if nargs < reqargs: 8146498Snate@binkert.org self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) 8156498Snate@binkert.org self.error = 1 8166498Snate@binkert.org 8176498Snate@binkert.org for f in self.files: 8186498Snate@binkert.org self.validate_file(f) 8196498Snate@binkert.org 8206498Snate@binkert.org 8216498Snate@binkert.org # ----------------------------------------------------------------------------- 8226498Snate@binkert.org # validate_file() 8236498Snate@binkert.org # 8246498Snate@binkert.org # This checks to see if there are duplicated t_rulename() functions or strings 8256498Snate@binkert.org # in the parser input file. This is done using a simple regular expression 8266498Snate@binkert.org # match on each line in the given file. 8276498Snate@binkert.org # ----------------------------------------------------------------------------- 8286498Snate@binkert.org 8296498Snate@binkert.org def validate_file(self,filename): 8306498Snate@binkert.org import os.path 8316498Snate@binkert.org base,ext = os.path.splitext(filename) 8326498Snate@binkert.org if ext != '.py': return # No idea what the file is. Return OK 8336498Snate@binkert.org 8346498Snate@binkert.org try: 8356498Snate@binkert.org f = open(filename) 8366498Snate@binkert.org lines = f.readlines() 8376498Snate@binkert.org f.close() 8386498Snate@binkert.org except IOError: 8396498Snate@binkert.org return # Couldn't find the file. Don't worry about it 8406498Snate@binkert.org 8416498Snate@binkert.org fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') 8426498Snate@binkert.org sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') 8436498Snate@binkert.org 8446498Snate@binkert.org counthash = { } 8456498Snate@binkert.org linen = 1 8466498Snate@binkert.org for l in lines: 8476498Snate@binkert.org m = fre.match(l) 8486498Snate@binkert.org if not m: 8496498Snate@binkert.org m = sre.match(l) 8506498Snate@binkert.org if m: 8516498Snate@binkert.org name = m.group(1) 8526498Snate@binkert.org prev = counthash.get(name) 8536498Snate@binkert.org if not prev: 8546498Snate@binkert.org counthash[name] = linen 8556498Snate@binkert.org else: 8566498Snate@binkert.org self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev) 8576498Snate@binkert.org self.error = 1 8586498Snate@binkert.org linen += 1 8596498Snate@binkert.org 8602632SN/A# ----------------------------------------------------------------------------- 8612632SN/A# lex(module) 8622632SN/A# 8632632SN/A# Build all of the regular expression rules from definitions in the supplied module 8642632SN/A# ----------------------------------------------------------------------------- 8656498Snate@binkert.orgdef lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir="", debuglog=None, errorlog=None): 8664479Sbinkertn@umich.edu global lexer 8672632SN/A ldict = None 8684479Sbinkertn@umich.edu stateinfo = { 'INITIAL' : 'inclusive'} 8694479Sbinkertn@umich.edu lexobj = Lexer() 8704479Sbinkertn@umich.edu lexobj.lexoptimize = optimize 8712632SN/A global token,input 8722632SN/A 8736498Snate@binkert.org if errorlog is None: 8746498Snate@binkert.org errorlog = PlyLogger(sys.stderr) 8754479Sbinkertn@umich.edu 8766498Snate@binkert.org if debug: 8776498Snate@binkert.org if debuglog is None: 8786498Snate@binkert.org debuglog = PlyLogger(sys.stderr) 8796498Snate@binkert.org 8806498Snate@binkert.org # Get the module dictionary used for the lexer 8814479Sbinkertn@umich.edu if object: module = object 8824479Sbinkertn@umich.edu 8832632SN/A if module: 8846498Snate@binkert.org _items = [(k,getattr(module,k)) for k in dir(module)] 8856498Snate@binkert.org ldict = dict(_items) 8866498Snate@binkert.org else: 8876498Snate@binkert.org ldict = get_caller_module_dict(2) 8882632SN/A 8896498Snate@binkert.org # Collect parser information from the dictionary 8906498Snate@binkert.org linfo = LexerReflect(ldict,log=errorlog,reflags=reflags) 8916498Snate@binkert.org linfo.get_all() 8926498Snate@binkert.org if not optimize: 8936498Snate@binkert.org if linfo.validate_all(): 8946498Snate@binkert.org raise SyntaxError("Can't build lexer") 8952632SN/A 8962632SN/A if optimize and lextab: 8972632SN/A try: 8984479Sbinkertn@umich.edu lexobj.readtab(lextab,ldict) 8994479Sbinkertn@umich.edu token = lexobj.token 9004479Sbinkertn@umich.edu input = lexobj.input 9014479Sbinkertn@umich.edu lexer = lexobj 9024479Sbinkertn@umich.edu return lexobj 9032632SN/A 9042632SN/A except ImportError: 9052632SN/A pass 9062632SN/A 9076498Snate@binkert.org # Dump some basic debugging information 9086498Snate@binkert.org if debug: 9096498Snate@binkert.org debuglog.info("lex: tokens = %r", linfo.tokens) 9106498Snate@binkert.org debuglog.info("lex: literals = %r", linfo.literals) 9116498Snate@binkert.org debuglog.info("lex: states = %r", linfo.stateinfo) 9122632SN/A 9132632SN/A # Build a dictionary of valid token names 9144479Sbinkertn@umich.edu lexobj.lextokens = { } 9156498Snate@binkert.org for n in linfo.tokens: 9166498Snate@binkert.org lexobj.lextokens[n] = 1 9176498Snate@binkert.org 9186498Snate@binkert.org # Get literals specification 9196498Snate@binkert.org if isinstance(linfo.literals,(list,tuple)): 9206498Snate@binkert.org lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) 9212632SN/A else: 9226498Snate@binkert.org lexobj.lexliterals = linfo.literals 9232632SN/A 9246498Snate@binkert.org # Get the stateinfo dictionary 9256498Snate@binkert.org stateinfo = linfo.stateinfo 9262632SN/A 9274479Sbinkertn@umich.edu regexs = { } 9284479Sbinkertn@umich.edu # Build the master regular expressions 9296498Snate@binkert.org for state in stateinfo: 9304479Sbinkertn@umich.edu regex_list = [] 9312632SN/A 9324479Sbinkertn@umich.edu # Add rules defined by functions first 9336498Snate@binkert.org for fname, f in linfo.funcsym[state]: 9346498Snate@binkert.org line = func_code(f).co_firstlineno 9356498Snate@binkert.org file = func_code(f).co_filename 9366498Snate@binkert.org regex_list.append("(?P<%s>%s)" % (fname,f.__doc__)) 9376498Snate@binkert.org if debug: 9386498Snate@binkert.org debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state) 9394479Sbinkertn@umich.edu 9404479Sbinkertn@umich.edu # Now add all of the simple rules 9416498Snate@binkert.org for name,r in linfo.strsym[state]: 9424479Sbinkertn@umich.edu regex_list.append("(?P<%s>%s)" % (name,r)) 9436498Snate@binkert.org if debug: 9446498Snate@binkert.org debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state) 9454479Sbinkertn@umich.edu 9464479Sbinkertn@umich.edu regexs[state] = regex_list 9474479Sbinkertn@umich.edu 9484479Sbinkertn@umich.edu # Build the master regular expressions 9494479Sbinkertn@umich.edu 9506498Snate@binkert.org if debug: 9516498Snate@binkert.org debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====") 9526498Snate@binkert.org 9536498Snate@binkert.org for state in regexs: 9546498Snate@binkert.org lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames) 9554479Sbinkertn@umich.edu lexobj.lexstatere[state] = lexre 9564479Sbinkertn@umich.edu lexobj.lexstateretext[state] = re_text 9576498Snate@binkert.org lexobj.lexstaterenames[state] = re_names 9584479Sbinkertn@umich.edu if debug: 9594479Sbinkertn@umich.edu for i in range(len(re_text)): 9606498Snate@binkert.org debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i]) 9614479Sbinkertn@umich.edu 9626498Snate@binkert.org # For inclusive states, we need to add the regular expressions from the INITIAL state 9636498Snate@binkert.org for state,stype in stateinfo.items(): 9646498Snate@binkert.org if state != "INITIAL" and stype == 'inclusive': 9654479Sbinkertn@umich.edu lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) 9664479Sbinkertn@umich.edu lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) 9676498Snate@binkert.org lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) 9684479Sbinkertn@umich.edu 9694479Sbinkertn@umich.edu lexobj.lexstateinfo = stateinfo 9704479Sbinkertn@umich.edu lexobj.lexre = lexobj.lexstatere["INITIAL"] 9714479Sbinkertn@umich.edu lexobj.lexretext = lexobj.lexstateretext["INITIAL"] 9724479Sbinkertn@umich.edu 9734479Sbinkertn@umich.edu # Set up ignore variables 9746498Snate@binkert.org lexobj.lexstateignore = linfo.ignore 9754479Sbinkertn@umich.edu lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","") 9764479Sbinkertn@umich.edu 9774479Sbinkertn@umich.edu # Set up error functions 9786498Snate@binkert.org lexobj.lexstateerrorf = linfo.errorf 9796498Snate@binkert.org lexobj.lexerrorf = linfo.errorf.get("INITIAL",None) 9806498Snate@binkert.org if not lexobj.lexerrorf: 9816498Snate@binkert.org errorlog.warning("No t_error rule is defined") 9824479Sbinkertn@umich.edu 9834479Sbinkertn@umich.edu # Check state information for ignore and error rules 9844479Sbinkertn@umich.edu for s,stype in stateinfo.items(): 9854479Sbinkertn@umich.edu if stype == 'exclusive': 9866498Snate@binkert.org if not s in linfo.errorf: 9876498Snate@binkert.org errorlog.warning("No error rule is defined for exclusive state '%s'", s) 9886498Snate@binkert.org if not s in linfo.ignore and lexobj.lexignore: 9896498Snate@binkert.org errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) 9904479Sbinkertn@umich.edu elif stype == 'inclusive': 9916498Snate@binkert.org if not s in linfo.errorf: 9926498Snate@binkert.org linfo.errorf[s] = linfo.errorf.get("INITIAL",None) 9936498Snate@binkert.org if not s in linfo.ignore: 9946498Snate@binkert.org linfo.ignore[s] = linfo.ignore.get("INITIAL","") 9952632SN/A 9962632SN/A # Create global versions of the token() and input() functions 9974479Sbinkertn@umich.edu token = lexobj.token 9984479Sbinkertn@umich.edu input = lexobj.input 9994479Sbinkertn@umich.edu lexer = lexobj 10002632SN/A 10014479Sbinkertn@umich.edu # If in optimize mode, we write the lextab 10024479Sbinkertn@umich.edu if lextab and optimize: 10036498Snate@binkert.org lexobj.writetab(lextab,outputdir) 10044479Sbinkertn@umich.edu 10054479Sbinkertn@umich.edu return lexobj 10062632SN/A 10072632SN/A# ----------------------------------------------------------------------------- 10084479Sbinkertn@umich.edu# runmain() 10092632SN/A# 10102632SN/A# This runs the lexer as a main program 10112632SN/A# ----------------------------------------------------------------------------- 10122632SN/A 10132632SN/Adef runmain(lexer=None,data=None): 10142632SN/A if not data: 10152632SN/A try: 10162632SN/A filename = sys.argv[1] 10172632SN/A f = open(filename) 10182632SN/A data = f.read() 10192632SN/A f.close() 10202632SN/A except IndexError: 10216498Snate@binkert.org sys.stdout.write("Reading from standard input (type EOF to end):\n") 10222632SN/A data = sys.stdin.read() 10232632SN/A 10242632SN/A if lexer: 10252632SN/A _input = lexer.input 10262632SN/A else: 10272632SN/A _input = input 10282632SN/A _input(data) 10292632SN/A if lexer: 10302632SN/A _token = lexer.token 10312632SN/A else: 10322632SN/A _token = token 10332632SN/A 10342632SN/A while 1: 10352632SN/A tok = _token() 10362632SN/A if not tok: break 10376498Snate@binkert.org sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,tok.lexpos)) 10382632SN/A 10394479Sbinkertn@umich.edu# ----------------------------------------------------------------------------- 10404479Sbinkertn@umich.edu# @TOKEN(regex) 10414479Sbinkertn@umich.edu# 10424479Sbinkertn@umich.edu# This decorator function can be used to set the regex expression on a function 10434479Sbinkertn@umich.edu# when its docstring might need to be set in an alternative way 10444479Sbinkertn@umich.edu# ----------------------------------------------------------------------------- 10452632SN/A 10464479Sbinkertn@umich.edudef TOKEN(r): 10474479Sbinkertn@umich.edu def set_doc(f): 10486498Snate@binkert.org if hasattr(r,"__call__"): 10496498Snate@binkert.org f.__doc__ = r.__doc__ 10506498Snate@binkert.org else: 10516498Snate@binkert.org f.__doc__ = r 10524479Sbinkertn@umich.edu return f 10534479Sbinkertn@umich.edu return set_doc 10542632SN/A 10554479Sbinkertn@umich.edu# Alternative spelling of the TOKEN decorator 10564479Sbinkertn@umich.eduToken = TOKEN 10574479Sbinkertn@umich.edu 1058