isa_parser.py revision 6985
112952Sgabeblack@google.com# Copyright (c) 2003-2005 The Regents of The University of Michigan 212952Sgabeblack@google.com# All rights reserved. 312952Sgabeblack@google.com# 412952Sgabeblack@google.com# Redistribution and use in source and binary forms, with or without 512952Sgabeblack@google.com# modification, are permitted provided that the following conditions are 612952Sgabeblack@google.com# met: redistributions of source code must retain the above copyright 712952Sgabeblack@google.com# notice, this list of conditions and the following disclaimer; 812952Sgabeblack@google.com# redistributions in binary form must reproduce the above copyright 912952Sgabeblack@google.com# notice, this list of conditions and the following disclaimer in the 1012952Sgabeblack@google.com# documentation and/or other materials provided with the distribution; 1112952Sgabeblack@google.com# neither the name of the copyright holders nor the names of its 1212952Sgabeblack@google.com# contributors may be used to endorse or promote products derived from 1312952Sgabeblack@google.com# this software without specific prior written permission. 1412952Sgabeblack@google.com# 1512952Sgabeblack@google.com# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1612952Sgabeblack@google.com# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1712952Sgabeblack@google.com# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1812952Sgabeblack@google.com# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 1912952Sgabeblack@google.com# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 2012952Sgabeblack@google.com# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 2112952Sgabeblack@google.com# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2212952Sgabeblack@google.com# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2312952Sgabeblack@google.com# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2412952Sgabeblack@google.com# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 2512952Sgabeblack@google.com# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2612952Sgabeblack@google.com# 2712952Sgabeblack@google.com# Authors: Steve Reinhardt 2812952Sgabeblack@google.com 2912952Sgabeblack@google.comimport os 3012952Sgabeblack@google.comimport sys 3112952Sgabeblack@google.comimport re 3212952Sgabeblack@google.comimport string 3312952Sgabeblack@google.comimport traceback 3412997Sgabeblack@google.com# get type names 3512957Sgabeblack@google.comfrom types import * 3612952Sgabeblack@google.com 3712952Sgabeblack@google.comfrom m5.util.grammar import Grammar 3812957Sgabeblack@google.com 3912957Sgabeblack@google.com################### 4013132Sgabeblack@google.com# Utility functions 4112953Sgabeblack@google.com 4212952Sgabeblack@google.com# 4313063Sgabeblack@google.com# Indent every line in string 's' by two spaces 4412952Sgabeblack@google.com# (except preprocessor directives). 4512991Sgabeblack@google.com# Used to make nested code blocks look pretty. 4612957Sgabeblack@google.com# 4712952Sgabeblack@google.comdef indent(s): 4812957Sgabeblack@google.com return re.sub(r'(?m)^(?!#)', ' ', s) 4912952Sgabeblack@google.com 5012997Sgabeblack@google.com# 5112952Sgabeblack@google.com# Munge a somewhat arbitrarily formatted piece of Python code 5212952Sgabeblack@google.com# (e.g. from a format 'let' block) into something whose indentation 5312952Sgabeblack@google.com# will get by the Python parser. 5412952Sgabeblack@google.com# 5513175Sgabeblack@google.com# The two keys here are that Python will give a syntax error if 5613175Sgabeblack@google.com# there's any whitespace at the beginning of the first line, and that 5713175Sgabeblack@google.com# all lines at the same lexical nesting level must have identical 5812957Sgabeblack@google.com# indentation. Unfortunately the way code literals work, an entire 5912957Sgabeblack@google.com# let block tends to have some initial indentation. Rather than 6012957Sgabeblack@google.com# trying to figure out what that is and strip it off, we prepend 'if 6112957Sgabeblack@google.com# 1:' to make the let code the nested block inside the if (and have 6212957Sgabeblack@google.com# the parser automatically deal with the indentation for us). 6312957Sgabeblack@google.com# 6412957Sgabeblack@google.com# We don't want to do this if (1) the code block is empty or (2) the 6512957Sgabeblack@google.com# first line of the block doesn't have any whitespace at the front. 6612957Sgabeblack@google.com 6712959Sgabeblack@google.comdef fixPythonIndentation(s): 6812957Sgabeblack@google.com # get rid of blank lines first 6912957Sgabeblack@google.com s = re.sub(r'(?m)^\s*\n', '', s); 7012957Sgabeblack@google.com if (s != '' and re.match(r'[ \t]', s[0])): 7112957Sgabeblack@google.com s = 'if 1:\n' + s 7212957Sgabeblack@google.com return s 7312957Sgabeblack@google.com 7412957Sgabeblack@google.com# Error handler. Just call exit. Output formatted to work under 7512957Sgabeblack@google.com# Emacs compile-mode. Optional 'print_traceback' arg, if set to True, 7612957Sgabeblack@google.com# prints a Python stack backtrace too (can be handy when trying to 7712962Sgabeblack@google.com# debug the parser itself). 7813063Sgabeblack@google.comdef error(lineno, string, print_traceback = False): 7912957Sgabeblack@google.com spaces = "" 8012957Sgabeblack@google.com for (filename, line) in fileNameStack[0:-1]: 8112957Sgabeblack@google.com print spaces + "In file included from " + filename + ":" 8212957Sgabeblack@google.com spaces += " " 8312957Sgabeblack@google.com # Print a Python stack backtrace if requested. 8412957Sgabeblack@google.com if (print_traceback): 8512957Sgabeblack@google.com traceback.print_exc() 8612957Sgabeblack@google.com if lineno != 0: 8712957Sgabeblack@google.com line_str = "%d:" % lineno 8812957Sgabeblack@google.com else: 8912957Sgabeblack@google.com line_str = "" 9012957Sgabeblack@google.com sys.exit(spaces + "%s:%s %s" % (fileNameStack[-1][0], line_str, string)) 9112957Sgabeblack@google.com 9212957Sgabeblack@google.com#################### 9312957Sgabeblack@google.com# Template objects. 9412957Sgabeblack@google.com# 9512957Sgabeblack@google.com# Template objects are format strings that allow substitution from 9612957Sgabeblack@google.com# the attribute spaces of other objects (e.g. InstObjParams instances). 9712957Sgabeblack@google.com 9812957Sgabeblack@google.comlabelRE = re.compile(r'(?<!%)%\(([^\)]+)\)[sd]') 9912957Sgabeblack@google.com 10012957Sgabeblack@google.comclass Template(object): 10112957Sgabeblack@google.com def __init__(self, t): 10212957Sgabeblack@google.com self.template = t 10312957Sgabeblack@google.com 10412957Sgabeblack@google.com def subst(self, d): 10512957Sgabeblack@google.com myDict = None 10612957Sgabeblack@google.com 10712957Sgabeblack@google.com # Protect non-Python-dict substitutions (e.g. if there's a printf 10812957Sgabeblack@google.com # in the templated C++ code) 10912962Sgabeblack@google.com template = protect_non_subst_percents(self.template) 11012957Sgabeblack@google.com # CPU-model-specific substitutions are handled later (in GenCode). 11112957Sgabeblack@google.com template = protect_cpu_symbols(template) 11212957Sgabeblack@google.com 11312957Sgabeblack@google.com # Build a dict ('myDict') to use for the template substitution. 11412957Sgabeblack@google.com # Start with the template namespace. Make a copy since we're 11512957Sgabeblack@google.com # going to modify it. 11612957Sgabeblack@google.com myDict = parser.templateMap.copy() 11712957Sgabeblack@google.com 11812957Sgabeblack@google.com if isinstance(d, InstObjParams): 11912957Sgabeblack@google.com # If we're dealing with an InstObjParams object, we need 12012957Sgabeblack@google.com # to be a little more sophisticated. The instruction-wide 12112957Sgabeblack@google.com # parameters are already formed, but the parameters which 12212957Sgabeblack@google.com # are only function wide still need to be generated. 12312957Sgabeblack@google.com compositeCode = '' 12412957Sgabeblack@google.com 12512957Sgabeblack@google.com myDict.update(d.__dict__) 12612957Sgabeblack@google.com # The "operands" and "snippets" attributes of the InstObjParams 12712957Sgabeblack@google.com # objects are for internal use and not substitution. 12812957Sgabeblack@google.com del myDict['operands'] 12912957Sgabeblack@google.com del myDict['snippets'] 13012957Sgabeblack@google.com 13112957Sgabeblack@google.com snippetLabels = [l for l in labelRE.findall(template) 13212957Sgabeblack@google.com if d.snippets.has_key(l)] 13312957Sgabeblack@google.com 13412957Sgabeblack@google.com snippets = dict([(s, mungeSnippet(d.snippets[s])) 13512957Sgabeblack@google.com for s in snippetLabels]) 13612957Sgabeblack@google.com 13712957Sgabeblack@google.com myDict.update(snippets) 13812957Sgabeblack@google.com 13912957Sgabeblack@google.com compositeCode = ' '.join(map(str, snippets.values())) 14012957Sgabeblack@google.com 14112957Sgabeblack@google.com # Add in template itself in case it references any 14212957Sgabeblack@google.com # operands explicitly (like Mem) 14312957Sgabeblack@google.com compositeCode += ' ' + template 14412957Sgabeblack@google.com 14513075Sgabeblack@google.com operands = SubOperandList(compositeCode, d.operands) 14613075Sgabeblack@google.com 14712957Sgabeblack@google.com myDict['op_decl'] = operands.concatAttrStrings('op_decl') 14812957Sgabeblack@google.com 14912957Sgabeblack@google.com is_src = lambda op: op.is_src 15012957Sgabeblack@google.com is_dest = lambda op: op.is_dest 15112957Sgabeblack@google.com 15212957Sgabeblack@google.com myDict['op_src_decl'] = \ 15312957Sgabeblack@google.com operands.concatSomeAttrStrings(is_src, 'op_src_decl') 15412957Sgabeblack@google.com myDict['op_dest_decl'] = \ 15512957Sgabeblack@google.com operands.concatSomeAttrStrings(is_dest, 'op_dest_decl') 15612957Sgabeblack@google.com 15712957Sgabeblack@google.com myDict['op_rd'] = operands.concatAttrStrings('op_rd') 15812957Sgabeblack@google.com myDict['op_wb'] = operands.concatAttrStrings('op_wb') 15912957Sgabeblack@google.com 16012957Sgabeblack@google.com if d.operands.memOperand: 16112957Sgabeblack@google.com myDict['mem_acc_size'] = d.operands.memOperand.mem_acc_size 16212957Sgabeblack@google.com myDict['mem_acc_type'] = d.operands.memOperand.mem_acc_type 16312957Sgabeblack@google.com 16412957Sgabeblack@google.com elif isinstance(d, dict): 16512957Sgabeblack@google.com # if the argument is a dictionary, we just use it. 16612957Sgabeblack@google.com myDict.update(d) 16712957Sgabeblack@google.com elif hasattr(d, '__dict__'): 16812957Sgabeblack@google.com # if the argument is an object, we use its attribute map. 16912957Sgabeblack@google.com myDict.update(d.__dict__) 17012957Sgabeblack@google.com else: 17112957Sgabeblack@google.com raise TypeError, "Template.subst() arg must be or have dictionary" 17212957Sgabeblack@google.com return template % myDict 17312957Sgabeblack@google.com 17412957Sgabeblack@google.com # Convert to string. This handles the case when a template with a 17512957Sgabeblack@google.com # CPU-specific term gets interpolated into another template or into 17612957Sgabeblack@google.com # an output block. 17712957Sgabeblack@google.com def __str__(self): 17812957Sgabeblack@google.com return expand_cpu_symbols_to_string(self.template) 17912957Sgabeblack@google.com 18012957Sgabeblack@google.com################ 18112957Sgabeblack@google.com# Format object. 18212957Sgabeblack@google.com# 18312957Sgabeblack@google.com# A format object encapsulates an instruction format. It must provide 18412957Sgabeblack@google.com# a defineInst() method that generates the code for an instruction 18512957Sgabeblack@google.com# definition. 18612957Sgabeblack@google.com 18712957Sgabeblack@google.comexportContextSymbols = ('InstObjParams', 'makeList', 're', 'string') 18812957Sgabeblack@google.com 18912957Sgabeblack@google.comexportContext = {} 19012957Sgabeblack@google.com 19112957Sgabeblack@google.comdef updateExportContext(): 19212957Sgabeblack@google.com exportContext.update(exportDict(*exportContextSymbols)) 19312957Sgabeblack@google.com exportContext.update(parser.templateMap) 19412957Sgabeblack@google.com 19512957Sgabeblack@google.comdef exportDict(*symNames): 19612957Sgabeblack@google.com return dict([(s, eval(s)) for s in symNames]) 19712957Sgabeblack@google.com 19812957Sgabeblack@google.com 19912957Sgabeblack@google.comclass Format(object): 20012957Sgabeblack@google.com def __init__(self, id, params, code): 20112957Sgabeblack@google.com # constructor: just save away arguments 20212957Sgabeblack@google.com self.id = id 20312957Sgabeblack@google.com self.params = params 20412957Sgabeblack@google.com label = 'def format ' + id 20512957Sgabeblack@google.com self.user_code = compile(fixPythonIndentation(code), label, 'exec') 20612957Sgabeblack@google.com param_list = string.join(params, ", ") 20712957Sgabeblack@google.com f = '''def defInst(_code, _context, %s): 20812957Sgabeblack@google.com my_locals = vars().copy() 20912957Sgabeblack@google.com exec _code in _context, my_locals 21012957Sgabeblack@google.com return my_locals\n''' % param_list 21112957Sgabeblack@google.com c = compile(f, label + ' wrapper', 'exec') 21212957Sgabeblack@google.com exec c 21312957Sgabeblack@google.com self.func = defInst 21412957Sgabeblack@google.com 21512957Sgabeblack@google.com def defineInst(self, name, args, lineno): 21612957Sgabeblack@google.com context = {} 21712957Sgabeblack@google.com updateExportContext() 21812957Sgabeblack@google.com context.update(exportContext) 21912957Sgabeblack@google.com if len(name): 22012957Sgabeblack@google.com Name = name[0].upper() 22112957Sgabeblack@google.com if len(name) > 1: 22212957Sgabeblack@google.com Name += name[1:] 22312957Sgabeblack@google.com context.update({ 'name': name, 'Name': Name }) 22412957Sgabeblack@google.com try: 22512957Sgabeblack@google.com vars = self.func(self.user_code, context, *args[0], **args[1]) 22612957Sgabeblack@google.com except Exception, exc: 22712957Sgabeblack@google.com error(lineno, 'error defining "%s": %s.' % (name, exc)) 22812957Sgabeblack@google.com for k in vars.keys(): 22913053Sgabeblack@google.com if k not in ('header_output', 'decoder_output', 23012957Sgabeblack@google.com 'exec_output', 'decode_block'): 23112957Sgabeblack@google.com del vars[k] 23212957Sgabeblack@google.com return GenCode(**vars) 23312957Sgabeblack@google.com 23412957Sgabeblack@google.com# Special null format to catch an implicit-format instruction 23512991Sgabeblack@google.com# definition outside of any format block. 23612991Sgabeblack@google.comclass NoFormat(object): 23712991Sgabeblack@google.com def __init__(self): 23812991Sgabeblack@google.com self.defaultInst = '' 23912991Sgabeblack@google.com 24012991Sgabeblack@google.com def defineInst(self, name, args, lineno): 24112991Sgabeblack@google.com error(lineno, 24212991Sgabeblack@google.com 'instruction definition "%s" with no active format!' % name) 24312991Sgabeblack@google.com 24412991Sgabeblack@google.com# This dictionary maps format name strings to Format objects. 24512991Sgabeblack@google.comformatMap = {} 24612991Sgabeblack@google.com 24712991Sgabeblack@google.com# Define a new format 24812991Sgabeblack@google.comdef defFormat(id, params, code, lineno): 24912991Sgabeblack@google.com # make sure we haven't already defined this one 25012991Sgabeblack@google.com if formatMap.get(id, None) != None: 25112991Sgabeblack@google.com error(lineno, 'format %s redefined.' % id) 25212991Sgabeblack@google.com # create new object and store in global map 25312957Sgabeblack@google.com formatMap[id] = Format(id, params, code) 25412957Sgabeblack@google.com 25512957Sgabeblack@google.com##################################################################### 25612957Sgabeblack@google.com# 25712957Sgabeblack@google.com# Support Classes 25812957Sgabeblack@google.com# 25912957Sgabeblack@google.com##################################################################### 26012957Sgabeblack@google.com 26112957Sgabeblack@google.com# Expand template with CPU-specific references into a dictionary with 26212957Sgabeblack@google.com# an entry for each CPU model name. The entry key is the model name 26312957Sgabeblack@google.com# and the corresponding value is the template with the CPU-specific 26412957Sgabeblack@google.com# refs substituted for that model. 26512957Sgabeblack@google.comdef expand_cpu_symbols_to_dict(template): 26613132Sgabeblack@google.com # Protect '%'s that don't go with CPU-specific terms 26713132Sgabeblack@google.com t = re.sub(r'%(?!\(CPU_)', '%%', template) 26813132Sgabeblack@google.com result = {} 26913132Sgabeblack@google.com for cpu in cpu_models: 27013132Sgabeblack@google.com result[cpu.name] = t % cpu.strings 27113132Sgabeblack@google.com return result 27213132Sgabeblack@google.com 27312957Sgabeblack@google.com# *If* the template has CPU-specific references, return a single 27412957Sgabeblack@google.com# string containing a copy of the template for each CPU model with the 27512957Sgabeblack@google.com# corresponding values substituted in. If the template has no 27612957Sgabeblack@google.com# CPU-specific references, it is returned unmodified. 27712957Sgabeblack@google.comdef expand_cpu_symbols_to_string(template): 27812957Sgabeblack@google.com if template.find('%(CPU_') != -1: 27913087Sgabeblack@google.com return reduce(lambda x,y: x+y, 28012952Sgabeblack@google.com expand_cpu_symbols_to_dict(template).values()) 28112952Sgabeblack@google.com else: 28212952Sgabeblack@google.com return template 28312961Sgabeblack@google.com 28413093Sgabeblack@google.com# Protect CPU-specific references by doubling the corresponding '%'s 28512952Sgabeblack@google.com# (in preparation for substituting a different set of references into 28612952Sgabeblack@google.com# the template). 28712997Sgabeblack@google.comdef protect_cpu_symbols(template): 28812952Sgabeblack@google.com return re.sub(r'%(?=\(CPU_)', '%%', template) 28912952Sgabeblack@google.com 29012952Sgabeblack@google.com# Protect any non-dict-substitution '%'s in a format string 29112952Sgabeblack@google.com# (i.e. those not followed by '(') 29212952Sgabeblack@google.comdef protect_non_subst_percents(s): 29312952Sgabeblack@google.com return re.sub(r'%(?!\()', '%%', s) 29412952Sgabeblack@google.com 29512952Sgabeblack@google.com############### 29612952Sgabeblack@google.com# GenCode class 29712952Sgabeblack@google.com# 29812952Sgabeblack@google.com# The GenCode class encapsulates generated code destined for various 29912952Sgabeblack@google.com# output files. The header_output and decoder_output attributes are 30012952Sgabeblack@google.com# strings containing code destined for decoder.hh and decoder.cc 30112952Sgabeblack@google.com# respectively. The decode_block attribute contains code to be 30212952Sgabeblack@google.com# incorporated in the decode function itself (that will also end up in 30312952Sgabeblack@google.com# decoder.cc). The exec_output attribute is a dictionary with a key 30412952Sgabeblack@google.com# for each CPU model name; the value associated with a particular key 30512952Sgabeblack@google.com# is the string of code for that CPU model's exec.cc file. The 30612952Sgabeblack@google.com# has_decode_default attribute is used in the decode block to allow 30712952Sgabeblack@google.com# explicit default clauses to override default default clauses. 30812952Sgabeblack@google.com 30912952Sgabeblack@google.comclass GenCode(object): 31012952Sgabeblack@google.com # Constructor. At this point we substitute out all CPU-specific 31112952Sgabeblack@google.com # symbols. For the exec output, these go into the per-model 31212952Sgabeblack@google.com # dictionary. For all other output types they get collapsed into 31312952Sgabeblack@google.com # a single string. 31412952Sgabeblack@google.com def __init__(self, 31512952Sgabeblack@google.com header_output = '', decoder_output = '', exec_output = '', 31612953Sgabeblack@google.com decode_block = '', has_decode_default = False): 31712957Sgabeblack@google.com self.header_output = expand_cpu_symbols_to_string(header_output) 31812953Sgabeblack@google.com self.decoder_output = expand_cpu_symbols_to_string(decoder_output) 31912953Sgabeblack@google.com if isinstance(exec_output, dict): 32012953Sgabeblack@google.com self.exec_output = exec_output 32112957Sgabeblack@google.com elif isinstance(exec_output, str): 32212957Sgabeblack@google.com # If the exec_output arg is a single string, we replicate 32312953Sgabeblack@google.com # it for each of the CPU models, substituting and 32412953Sgabeblack@google.com # %(CPU_foo)s params appropriately. 32512957Sgabeblack@google.com self.exec_output = expand_cpu_symbols_to_dict(exec_output) 32612957Sgabeblack@google.com self.decode_block = expand_cpu_symbols_to_string(decode_block) 32712957Sgabeblack@google.com self.has_decode_default = has_decode_default 32812959Sgabeblack@google.com 32912959Sgabeblack@google.com # Override '+' operator: generate a new GenCode object that 33012959Sgabeblack@google.com # concatenates all the individual strings in the operands. 33112959Sgabeblack@google.com def __add__(self, other): 33212953Sgabeblack@google.com exec_output = {} 33312953Sgabeblack@google.com for cpu in cpu_models: 33412953Sgabeblack@google.com n = cpu.name 33512953Sgabeblack@google.com exec_output[n] = self.exec_output[n] + other.exec_output[n] 33612997Sgabeblack@google.com return GenCode(self.header_output + other.header_output, 33712997Sgabeblack@google.com self.decoder_output + other.decoder_output, 33812997Sgabeblack@google.com exec_output, 33912952Sgabeblack@google.com self.decode_block + other.decode_block, 34013131Sgabeblack@google.com self.has_decode_default or other.has_decode_default) 34112953Sgabeblack@google.com 34212953Sgabeblack@google.com # Prepend a string (typically a comment) to all the strings. 34312952Sgabeblack@google.com def prepend_all(self, pre): 34412957Sgabeblack@google.com self.header_output = pre + self.header_output 34512957Sgabeblack@google.com self.decoder_output = pre + self.decoder_output 34613072Sgabeblack@google.com self.decode_block = pre + self.decode_block 34712957Sgabeblack@google.com for cpu in cpu_models: 34812957Sgabeblack@google.com self.exec_output[cpu.name] = pre + self.exec_output[cpu.name] 34912957Sgabeblack@google.com 35012957Sgabeblack@google.com # Wrap the decode block in a pair of strings (e.g., 'case foo:' 35112952Sgabeblack@google.com # and 'break;'). Used to build the big nested switch statement. 35212952Sgabeblack@google.com def wrap_decode_block(self, pre, post = ''): 35312952Sgabeblack@google.com self.decode_block = pre + indent(self.decode_block) + post 35412952Sgabeblack@google.com 35512952Sgabeblack@google.com##################################################################### 35612952Sgabeblack@google.com# 35712961Sgabeblack@google.com# Bitfield Operator Support 35812952Sgabeblack@google.com# 35912952Sgabeblack@google.com##################################################################### 36012952Sgabeblack@google.com 36112952Sgabeblack@google.combitOp1ArgRE = re.compile(r'<\s*(\w+)\s*:\s*>') 36212998Sgabeblack@google.com 36312998Sgabeblack@google.combitOpWordRE = re.compile(r'(?<![\w\.])([\w\.]+)<\s*(\w+)\s*:\s*(\w+)\s*>') 36412952Sgabeblack@google.combitOpExprRE = re.compile(r'\)<\s*(\w+)\s*:\s*(\w+)\s*>') 36512959Sgabeblack@google.com 36612952Sgabeblack@google.comdef substBitOps(code): 36712952Sgabeblack@google.com # first convert single-bit selectors to two-index form 36812952Sgabeblack@google.com # i.e., <n> --> <n:n> 36912952Sgabeblack@google.com code = bitOp1ArgRE.sub(r'<\1:\1>', code) 37012952Sgabeblack@google.com # simple case: selector applied to ID (name) 37112952Sgabeblack@google.com # i.e., foo<a:b> --> bits(foo, a, b) 37212953Sgabeblack@google.com code = bitOpWordRE.sub(r'bits(\1, \2, \3)', code) 37312957Sgabeblack@google.com # if selector is applied to expression (ending in ')'), 37412957Sgabeblack@google.com # we need to search backward for matching '(' 37512957Sgabeblack@google.com match = bitOpExprRE.search(code) 37612957Sgabeblack@google.com while match: 37712957Sgabeblack@google.com exprEnd = match.start() 37812997Sgabeblack@google.com here = exprEnd - 1 37912997Sgabeblack@google.com nestLevel = 1 38012952Sgabeblack@google.com while nestLevel > 0: 38112952Sgabeblack@google.com if code[here] == '(': 38212957Sgabeblack@google.com nestLevel -= 1 38312959Sgabeblack@google.com elif code[here] == ')': 38412959Sgabeblack@google.com nestLevel += 1 38512959Sgabeblack@google.com here -= 1 38612959Sgabeblack@google.com if here < 0: 38712959Sgabeblack@google.com sys.exit("Didn't find '('!") 38812959Sgabeblack@google.com exprStart = here+1 38912957Sgabeblack@google.com newExpr = r'bits(%s, %s, %s)' % (code[exprStart:exprEnd+1], 39012957Sgabeblack@google.com match.group(1), match.group(2)) 39112957Sgabeblack@google.com code = code[:exprStart] + newExpr + code[match.end():] 39212957Sgabeblack@google.com match = bitOpExprRE.search(code) 39312957Sgabeblack@google.com return code 39412957Sgabeblack@google.com 39512957Sgabeblack@google.com 39612957Sgabeblack@google.com##################################################################### 39712957Sgabeblack@google.com# 39812957Sgabeblack@google.com# Code Parser 39912957Sgabeblack@google.com# 40012957Sgabeblack@google.com# The remaining code is the support for automatically extracting 40112952Sgabeblack@google.com# instruction characteristics from pseudocode. 40212952Sgabeblack@google.com# 40312952Sgabeblack@google.com##################################################################### 404 405# Force the argument to be a list. Useful for flags, where a caller 406# can specify a singleton flag or a list of flags. Also usful for 407# converting tuples to lists so they can be modified. 408def makeList(arg): 409 if isinstance(arg, list): 410 return arg 411 elif isinstance(arg, tuple): 412 return list(arg) 413 elif not arg: 414 return [] 415 else: 416 return [ arg ] 417 418# Generate operandTypeMap from the user's 'def operand_types' 419# statement. 420def buildOperandTypeMap(user_dict, lineno): 421 global operandTypeMap 422 operandTypeMap = {} 423 for (ext, (desc, size)) in user_dict.iteritems(): 424 if desc == 'signed int': 425 ctype = 'int%d_t' % size 426 is_signed = 1 427 elif desc == 'unsigned int': 428 ctype = 'uint%d_t' % size 429 is_signed = 0 430 elif desc == 'float': 431 is_signed = 1 # shouldn't really matter 432 if size == 32: 433 ctype = 'float' 434 elif size == 64: 435 ctype = 'double' 436 elif desc == 'twin64 int': 437 is_signed = 0 438 ctype = 'Twin64_t' 439 elif desc == 'twin32 int': 440 is_signed = 0 441 ctype = 'Twin32_t' 442 if ctype == '': 443 error(lineno, 'Unrecognized type description "%s" in user_dict') 444 operandTypeMap[ext] = (size, ctype, is_signed) 445 446class Operand(object): 447 '''Base class for operand descriptors. An instance of this class 448 (or actually a class derived from this one) represents a specific 449 operand for a code block (e.g, "Rc.sq" as a dest). Intermediate 450 derived classes encapsulates the traits of a particular operand 451 type (e.g., "32-bit integer register").''' 452 453 def buildReadCode(self, func = None): 454 code = self.read_code % {"name": self.base_name, 455 "func": func, 456 "op_idx": self.src_reg_idx, 457 "reg_idx": self.reg_spec, 458 "size": self.size, 459 "ctype": self.ctype} 460 if self.size != self.dflt_size: 461 return '%s = bits(%s, %d, 0);\n' % \ 462 (self.base_name, code, self.size-1) 463 else: 464 return '%s = %s;\n' % \ 465 (self.base_name, code) 466 467 def buildWriteCode(self, func = None): 468 if (self.size != self.dflt_size and self.is_signed): 469 final_val = 'sext<%d>(%s)' % (self.size, self.base_name) 470 else: 471 final_val = self.base_name 472 code = self.write_code % {"name": self.base_name, 473 "func": func, 474 "op_idx": self.dest_reg_idx, 475 "reg_idx": self.reg_spec, 476 "size": self.size, 477 "ctype": self.ctype, 478 "final_val": final_val} 479 return ''' 480 { 481 %s final_val = %s; 482 %s; 483 if (traceData) { traceData->setData(final_val); } 484 }''' % (self.dflt_ctype, final_val, code) 485 486 def __init__(self, full_name, ext, is_src, is_dest): 487 self.full_name = full_name 488 self.ext = ext 489 self.is_src = is_src 490 self.is_dest = is_dest 491 # The 'effective extension' (eff_ext) is either the actual 492 # extension, if one was explicitly provided, or the default. 493 if ext: 494 self.eff_ext = ext 495 else: 496 self.eff_ext = self.dflt_ext 497 498 (self.size, self.ctype, self.is_signed) = operandTypeMap[self.eff_ext] 499 500 # note that mem_acc_size is undefined for non-mem operands... 501 # template must be careful not to use it if it doesn't apply. 502 if self.isMem(): 503 self.mem_acc_size = self.makeAccSize() 504 if self.ctype in ['Twin32_t', 'Twin64_t']: 505 self.mem_acc_type = 'Twin' 506 else: 507 self.mem_acc_type = 'uint' 508 509 # Finalize additional fields (primarily code fields). This step 510 # is done separately since some of these fields may depend on the 511 # register index enumeration that hasn't been performed yet at the 512 # time of __init__(). 513 def finalize(self): 514 self.flags = self.getFlags() 515 self.constructor = self.makeConstructor() 516 self.op_decl = self.makeDecl() 517 518 if self.is_src: 519 self.op_rd = self.makeRead() 520 self.op_src_decl = self.makeDecl() 521 else: 522 self.op_rd = '' 523 self.op_src_decl = '' 524 525 if self.is_dest: 526 self.op_wb = self.makeWrite() 527 self.op_dest_decl = self.makeDecl() 528 else: 529 self.op_wb = '' 530 self.op_dest_decl = '' 531 532 def isMem(self): 533 return 0 534 535 def isReg(self): 536 return 0 537 538 def isFloatReg(self): 539 return 0 540 541 def isIntReg(self): 542 return 0 543 544 def isControlReg(self): 545 return 0 546 547 def getFlags(self): 548 # note the empty slice '[:]' gives us a copy of self.flags[0] 549 # instead of a reference to it 550 my_flags = self.flags[0][:] 551 if self.is_src: 552 my_flags += self.flags[1] 553 if self.is_dest: 554 my_flags += self.flags[2] 555 return my_flags 556 557 def makeDecl(self): 558 # Note that initializations in the declarations are solely 559 # to avoid 'uninitialized variable' errors from the compiler. 560 return self.ctype + ' ' + self.base_name + ' = 0;\n'; 561 562class IntRegOperand(Operand): 563 def isReg(self): 564 return 1 565 566 def isIntReg(self): 567 return 1 568 569 def makeConstructor(self): 570 c = '' 571 if self.is_src: 572 c += '\n\t_srcRegIdx[%d] = %s;' % \ 573 (self.src_reg_idx, self.reg_spec) 574 if self.is_dest: 575 c += '\n\t_destRegIdx[%d] = %s;' % \ 576 (self.dest_reg_idx, self.reg_spec) 577 return c 578 579 def makeRead(self): 580 if (self.ctype == 'float' or self.ctype == 'double'): 581 error(0, 'Attempt to read integer register as FP') 582 if self.read_code != None: 583 return self.buildReadCode('readIntRegOperand') 584 if (self.size == self.dflt_size): 585 return '%s = xc->readIntRegOperand(this, %d);\n' % \ 586 (self.base_name, self.src_reg_idx) 587 elif (self.size > self.dflt_size): 588 int_reg_val = 'xc->readIntRegOperand(this, %d)' % \ 589 (self.src_reg_idx) 590 if (self.is_signed): 591 int_reg_val = 'sext<%d>(%s)' % (self.dflt_size, int_reg_val) 592 return '%s = %s;\n' % (self.base_name, int_reg_val) 593 else: 594 return '%s = bits(xc->readIntRegOperand(this, %d), %d, 0);\n' % \ 595 (self.base_name, self.src_reg_idx, self.size-1) 596 597 def makeWrite(self): 598 if (self.ctype == 'float' or self.ctype == 'double'): 599 error(0, 'Attempt to write integer register as FP') 600 if self.write_code != None: 601 return self.buildWriteCode('setIntRegOperand') 602 if (self.size != self.dflt_size and self.is_signed): 603 final_val = 'sext<%d>(%s)' % (self.size, self.base_name) 604 else: 605 final_val = self.base_name 606 wb = ''' 607 { 608 %s final_val = %s; 609 xc->setIntRegOperand(this, %d, final_val);\n 610 if (traceData) { traceData->setData(final_val); } 611 }''' % (self.dflt_ctype, final_val, self.dest_reg_idx) 612 return wb 613 614class FloatRegOperand(Operand): 615 def isReg(self): 616 return 1 617 618 def isFloatReg(self): 619 return 1 620 621 def makeConstructor(self): 622 c = '' 623 if self.is_src: 624 c += '\n\t_srcRegIdx[%d] = %s + FP_Base_DepTag;' % \ 625 (self.src_reg_idx, self.reg_spec) 626 if self.is_dest: 627 c += '\n\t_destRegIdx[%d] = %s + FP_Base_DepTag;' % \ 628 (self.dest_reg_idx, self.reg_spec) 629 return c 630 631 def makeRead(self): 632 bit_select = 0 633 if (self.ctype == 'float' or self.ctype == 'double'): 634 func = 'readFloatRegOperand' 635 else: 636 func = 'readFloatRegOperandBits' 637 if (self.size != self.dflt_size): 638 bit_select = 1 639 base = 'xc->%s(this, %d)' % (func, self.src_reg_idx) 640 if self.read_code != None: 641 return self.buildReadCode(func) 642 if bit_select: 643 return '%s = bits(%s, %d, 0);\n' % \ 644 (self.base_name, base, self.size-1) 645 else: 646 return '%s = %s;\n' % (self.base_name, base) 647 648 def makeWrite(self): 649 final_val = self.base_name 650 final_ctype = self.ctype 651 if (self.ctype == 'float' or self.ctype == 'double'): 652 func = 'setFloatRegOperand' 653 elif (self.ctype == 'uint32_t' or self.ctype == 'uint64_t'): 654 func = 'setFloatRegOperandBits' 655 else: 656 func = 'setFloatRegOperandBits' 657 final_ctype = 'uint%d_t' % self.dflt_size 658 if (self.size != self.dflt_size and self.is_signed): 659 final_val = 'sext<%d>(%s)' % (self.size, self.base_name) 660 if self.write_code != None: 661 return self.buildWriteCode(func) 662 wb = ''' 663 { 664 %s final_val = %s; 665 xc->%s(this, %d, final_val);\n 666 if (traceData) { traceData->setData(final_val); } 667 }''' % (final_ctype, final_val, func, self.dest_reg_idx) 668 return wb 669 670class ControlRegOperand(Operand): 671 def isReg(self): 672 return 1 673 674 def isControlReg(self): 675 return 1 676 677 def makeConstructor(self): 678 c = '' 679 if self.is_src: 680 c += '\n\t_srcRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \ 681 (self.src_reg_idx, self.reg_spec) 682 if self.is_dest: 683 c += '\n\t_destRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \ 684 (self.dest_reg_idx, self.reg_spec) 685 return c 686 687 def makeRead(self): 688 bit_select = 0 689 if (self.ctype == 'float' or self.ctype == 'double'): 690 error(0, 'Attempt to read control register as FP') 691 if self.read_code != None: 692 return self.buildReadCode('readMiscRegOperand') 693 base = 'xc->readMiscRegOperand(this, %s)' % self.src_reg_idx 694 if self.size == self.dflt_size: 695 return '%s = %s;\n' % (self.base_name, base) 696 else: 697 return '%s = bits(%s, %d, 0);\n' % \ 698 (self.base_name, base, self.size-1) 699 700 def makeWrite(self): 701 if (self.ctype == 'float' or self.ctype == 'double'): 702 error(0, 'Attempt to write control register as FP') 703 if self.write_code != None: 704 return self.buildWriteCode('setMiscRegOperand') 705 wb = 'xc->setMiscRegOperand(this, %s, %s);\n' % \ 706 (self.dest_reg_idx, self.base_name) 707 wb += 'if (traceData) { traceData->setData(%s); }' % \ 708 self.base_name 709 return wb 710 711class MemOperand(Operand): 712 def isMem(self): 713 return 1 714 715 def makeConstructor(self): 716 return '' 717 718 def makeDecl(self): 719 # Note that initializations in the declarations are solely 720 # to avoid 'uninitialized variable' errors from the compiler. 721 # Declare memory data variable. 722 if self.ctype in ['Twin32_t','Twin64_t']: 723 return "%s %s; %s.a = 0; %s.b = 0;\n" % \ 724 (self.ctype, self.base_name, self.base_name, self.base_name) 725 return '%s %s = 0;\n' % (self.ctype, self.base_name) 726 727 def makeRead(self): 728 if self.read_code != None: 729 return self.buildReadCode() 730 return '' 731 732 def makeWrite(self): 733 if self.write_code != None: 734 return self.buildWriteCode() 735 return '' 736 737 # Return the memory access size *in bits*, suitable for 738 # forming a type via "uint%d_t". Divide by 8 if you want bytes. 739 def makeAccSize(self): 740 return self.size 741 742class PCOperand(Operand): 743 def makeConstructor(self): 744 return '' 745 746 def makeRead(self): 747 return '%s = xc->readPC();\n' % self.base_name 748 749 def makeWrite(self): 750 return 'xc->setPC(%s);\n' % self.base_name 751 752class UPCOperand(Operand): 753 def makeConstructor(self): 754 return '' 755 756 def makeRead(self): 757 if self.read_code != None: 758 return self.buildReadCode('readMicroPC') 759 return '%s = xc->readMicroPC();\n' % self.base_name 760 761 def makeWrite(self): 762 if self.write_code != None: 763 return self.buildWriteCode('setMicroPC') 764 return 'xc->setMicroPC(%s);\n' % self.base_name 765 766class NUPCOperand(Operand): 767 def makeConstructor(self): 768 return '' 769 770 def makeRead(self): 771 if self.read_code != None: 772 return self.buildReadCode('readNextMicroPC') 773 return '%s = xc->readNextMicroPC();\n' % self.base_name 774 775 def makeWrite(self): 776 if self.write_code != None: 777 return self.buildWriteCode('setNextMicroPC') 778 return 'xc->setNextMicroPC(%s);\n' % self.base_name 779 780class NPCOperand(Operand): 781 def makeConstructor(self): 782 return '' 783 784 def makeRead(self): 785 if self.read_code != None: 786 return self.buildReadCode('readNextPC') 787 return '%s = xc->readNextPC();\n' % self.base_name 788 789 def makeWrite(self): 790 if self.write_code != None: 791 return self.buildWriteCode('setNextPC') 792 return 'xc->setNextPC(%s);\n' % self.base_name 793 794class NNPCOperand(Operand): 795 def makeConstructor(self): 796 return '' 797 798 def makeRead(self): 799 if self.read_code != None: 800 return self.buildReadCode('readNextNPC') 801 return '%s = xc->readNextNPC();\n' % self.base_name 802 803 def makeWrite(self): 804 if self.write_code != None: 805 return self.buildWriteCode('setNextNPC') 806 return 'xc->setNextNPC(%s);\n' % self.base_name 807 808def buildOperandNameMap(user_dict, lineno): 809 global operandNameMap 810 operandNameMap = {} 811 for (op_name, val) in user_dict.iteritems(): 812 (base_cls_name, dflt_ext, reg_spec, flags, sort_pri) = val[:5] 813 if len(val) > 5: 814 read_code = val[5] 815 else: 816 read_code = None 817 if len(val) > 6: 818 write_code = val[6] 819 else: 820 write_code = None 821 if len(val) > 7: 822 error(lineno, 823 'error: too many attributes for operand "%s"' % 824 base_cls_name) 825 826 (dflt_size, dflt_ctype, dflt_is_signed) = operandTypeMap[dflt_ext] 827 # Canonical flag structure is a triple of lists, where each list 828 # indicates the set of flags implied by this operand always, when 829 # used as a source, and when used as a dest, respectively. 830 # For simplicity this can be initialized using a variety of fairly 831 # obvious shortcuts; we convert these to canonical form here. 832 if not flags: 833 # no flags specified (e.g., 'None') 834 flags = ( [], [], [] ) 835 elif isinstance(flags, str): 836 # a single flag: assumed to be unconditional 837 flags = ( [ flags ], [], [] ) 838 elif isinstance(flags, list): 839 # a list of flags: also assumed to be unconditional 840 flags = ( flags, [], [] ) 841 elif isinstance(flags, tuple): 842 # it's a tuple: it should be a triple, 843 # but each item could be a single string or a list 844 (uncond_flags, src_flags, dest_flags) = flags 845 flags = (makeList(uncond_flags), 846 makeList(src_flags), makeList(dest_flags)) 847 # Accumulate attributes of new operand class in tmp_dict 848 tmp_dict = {} 849 for attr in ('dflt_ext', 'reg_spec', 'flags', 'sort_pri', 850 'dflt_size', 'dflt_ctype', 'dflt_is_signed', 851 'read_code', 'write_code'): 852 tmp_dict[attr] = eval(attr) 853 tmp_dict['base_name'] = op_name 854 # New class name will be e.g. "IntReg_Ra" 855 cls_name = base_cls_name + '_' + op_name 856 # Evaluate string arg to get class object. Note that the 857 # actual base class for "IntReg" is "IntRegOperand", i.e. we 858 # have to append "Operand". 859 try: 860 base_cls = eval(base_cls_name + 'Operand') 861 except NameError: 862 error(lineno, 863 'error: unknown operand base class "%s"' % base_cls_name) 864 # The following statement creates a new class called 865 # <cls_name> as a subclass of <base_cls> with the attributes 866 # in tmp_dict, just as if we evaluated a class declaration. 867 operandNameMap[op_name] = type(cls_name, (base_cls,), tmp_dict) 868 869 # Define operand variables. 870 operands = user_dict.keys() 871 872 operandsREString = (r''' 873 (?<![\w\.]) # neg. lookbehind assertion: prevent partial matches 874 ((%s)(?:\.(\w+))?) # match: operand with optional '.' then suffix 875 (?![\w\.]) # neg. lookahead assertion: prevent partial matches 876 ''' 877 % string.join(operands, '|')) 878 879 global operandsRE 880 operandsRE = re.compile(operandsREString, re.MULTILINE|re.VERBOSE) 881 882 # Same as operandsREString, but extension is mandatory, and only two 883 # groups are returned (base and ext, not full name as above). 884 # Used for subtituting '_' for '.' to make C++ identifiers. 885 operandsWithExtREString = (r'(?<![\w\.])(%s)\.(\w+)(?![\w\.])' 886 % string.join(operands, '|')) 887 888 global operandsWithExtRE 889 operandsWithExtRE = re.compile(operandsWithExtREString, re.MULTILINE) 890 891maxInstSrcRegs = 0 892maxInstDestRegs = 0 893 894class OperandList(object): 895 '''Find all the operands in the given code block. Returns an operand 896 descriptor list (instance of class OperandList).''' 897 def __init__(self, code): 898 self.items = [] 899 self.bases = {} 900 # delete comments so we don't match on reg specifiers inside 901 code = commentRE.sub('', code) 902 # search for operands 903 next_pos = 0 904 while 1: 905 match = operandsRE.search(code, next_pos) 906 if not match: 907 # no more matches: we're done 908 break 909 op = match.groups() 910 # regexp groups are operand full name, base, and extension 911 (op_full, op_base, op_ext) = op 912 # if the token following the operand is an assignment, this is 913 # a destination (LHS), else it's a source (RHS) 914 is_dest = (assignRE.match(code, match.end()) != None) 915 is_src = not is_dest 916 # see if we've already seen this one 917 op_desc = self.find_base(op_base) 918 if op_desc: 919 if op_desc.ext != op_ext: 920 error(0, 'Inconsistent extensions for operand %s' % \ 921 op_base) 922 op_desc.is_src = op_desc.is_src or is_src 923 op_desc.is_dest = op_desc.is_dest or is_dest 924 else: 925 # new operand: create new descriptor 926 op_desc = operandNameMap[op_base](op_full, op_ext, 927 is_src, is_dest) 928 self.append(op_desc) 929 # start next search after end of current match 930 next_pos = match.end() 931 self.sort() 932 # enumerate source & dest register operands... used in building 933 # constructor later 934 self.numSrcRegs = 0 935 self.numDestRegs = 0 936 self.numFPDestRegs = 0 937 self.numIntDestRegs = 0 938 self.memOperand = None 939 for op_desc in self.items: 940 if op_desc.isReg(): 941 if op_desc.is_src: 942 op_desc.src_reg_idx = self.numSrcRegs 943 self.numSrcRegs += 1 944 if op_desc.is_dest: 945 op_desc.dest_reg_idx = self.numDestRegs 946 self.numDestRegs += 1 947 if op_desc.isFloatReg(): 948 self.numFPDestRegs += 1 949 elif op_desc.isIntReg(): 950 self.numIntDestRegs += 1 951 elif op_desc.isMem(): 952 if self.memOperand: 953 error(0, "Code block has more than one memory operand.") 954 self.memOperand = op_desc 955 global maxInstSrcRegs 956 global maxInstDestRegs 957 if maxInstSrcRegs < self.numSrcRegs: 958 maxInstSrcRegs = self.numSrcRegs 959 if maxInstDestRegs < self.numDestRegs: 960 maxInstDestRegs = self.numDestRegs 961 # now make a final pass to finalize op_desc fields that may depend 962 # on the register enumeration 963 for op_desc in self.items: 964 op_desc.finalize() 965 966 def __len__(self): 967 return len(self.items) 968 969 def __getitem__(self, index): 970 return self.items[index] 971 972 def append(self, op_desc): 973 self.items.append(op_desc) 974 self.bases[op_desc.base_name] = op_desc 975 976 def find_base(self, base_name): 977 # like self.bases[base_name], but returns None if not found 978 # (rather than raising exception) 979 return self.bases.get(base_name) 980 981 # internal helper function for concat[Some]Attr{Strings|Lists} 982 def __internalConcatAttrs(self, attr_name, filter, result): 983 for op_desc in self.items: 984 if filter(op_desc): 985 result += getattr(op_desc, attr_name) 986 return result 987 988 # return a single string that is the concatenation of the (string) 989 # values of the specified attribute for all operands 990 def concatAttrStrings(self, attr_name): 991 return self.__internalConcatAttrs(attr_name, lambda x: 1, '') 992 993 # like concatAttrStrings, but only include the values for the operands 994 # for which the provided filter function returns true 995 def concatSomeAttrStrings(self, filter, attr_name): 996 return self.__internalConcatAttrs(attr_name, filter, '') 997 998 # return a single list that is the concatenation of the (list) 999 # values of the specified attribute for all operands 1000 def concatAttrLists(self, attr_name): 1001 return self.__internalConcatAttrs(attr_name, lambda x: 1, []) 1002 1003 # like concatAttrLists, but only include the values for the operands 1004 # for which the provided filter function returns true 1005 def concatSomeAttrLists(self, filter, attr_name): 1006 return self.__internalConcatAttrs(attr_name, filter, []) 1007 1008 def sort(self): 1009 self.items.sort(lambda a, b: a.sort_pri - b.sort_pri) 1010 1011class SubOperandList(OperandList): 1012 '''Find all the operands in the given code block. Returns an operand 1013 descriptor list (instance of class OperandList).''' 1014 def __init__(self, code, master_list): 1015 self.items = [] 1016 self.bases = {} 1017 # delete comments so we don't match on reg specifiers inside 1018 code = commentRE.sub('', code) 1019 # search for operands 1020 next_pos = 0 1021 while 1: 1022 match = operandsRE.search(code, next_pos) 1023 if not match: 1024 # no more matches: we're done 1025 break 1026 op = match.groups() 1027 # regexp groups are operand full name, base, and extension 1028 (op_full, op_base, op_ext) = op 1029 # find this op in the master list 1030 op_desc = master_list.find_base(op_base) 1031 if not op_desc: 1032 error(0, 'Found operand %s which is not in the master list!' \ 1033 ' This is an internal error' % \ 1034 op_base) 1035 else: 1036 # See if we've already found this operand 1037 op_desc = self.find_base(op_base) 1038 if not op_desc: 1039 # if not, add a reference to it to this sub list 1040 self.append(master_list.bases[op_base]) 1041 1042 # start next search after end of current match 1043 next_pos = match.end() 1044 self.sort() 1045 self.memOperand = None 1046 for op_desc in self.items: 1047 if op_desc.isMem(): 1048 if self.memOperand: 1049 error(0, "Code block has more than one memory operand.") 1050 self.memOperand = op_desc 1051 1052# Regular expression object to match C++ comments 1053# (used in findOperands()) 1054commentRE = re.compile(r'//.*\n') 1055 1056# Regular expression object to match assignment statements 1057# (used in findOperands()) 1058assignRE = re.compile(r'\s*=(?!=)', re.MULTILINE) 1059 1060# Munge operand names in code string to make legal C++ variable names. 1061# This means getting rid of the type extension if any. 1062# (Will match base_name attribute of Operand object.) 1063def substMungedOpNames(code): 1064 return operandsWithExtRE.sub(r'\1', code) 1065 1066# Fix up code snippets for final substitution in templates. 1067def mungeSnippet(s): 1068 if isinstance(s, str): 1069 return substMungedOpNames(substBitOps(s)) 1070 else: 1071 return s 1072 1073def makeFlagConstructor(flag_list): 1074 if len(flag_list) == 0: 1075 return '' 1076 # filter out repeated flags 1077 flag_list.sort() 1078 i = 1 1079 while i < len(flag_list): 1080 if flag_list[i] == flag_list[i-1]: 1081 del flag_list[i] 1082 else: 1083 i += 1 1084 pre = '\n\tflags[' 1085 post = '] = true;' 1086 code = pre + string.join(flag_list, post + pre) + post 1087 return code 1088 1089# Assume all instruction flags are of the form 'IsFoo' 1090instFlagRE = re.compile(r'Is.*') 1091 1092# OpClass constants end in 'Op' except No_OpClass 1093opClassRE = re.compile(r'.*Op|No_OpClass') 1094 1095class InstObjParams(object): 1096 def __init__(self, mnem, class_name, base_class = '', 1097 snippets = {}, opt_args = []): 1098 self.mnemonic = mnem 1099 self.class_name = class_name 1100 self.base_class = base_class 1101 if not isinstance(snippets, dict): 1102 snippets = {'code' : snippets} 1103 compositeCode = ' '.join(map(str, snippets.values())) 1104 self.snippets = snippets 1105 1106 self.operands = OperandList(compositeCode) 1107 self.constructor = self.operands.concatAttrStrings('constructor') 1108 self.constructor += \ 1109 '\n\t_numSrcRegs = %d;' % self.operands.numSrcRegs 1110 self.constructor += \ 1111 '\n\t_numDestRegs = %d;' % self.operands.numDestRegs 1112 self.constructor += \ 1113 '\n\t_numFPDestRegs = %d;' % self.operands.numFPDestRegs 1114 self.constructor += \ 1115 '\n\t_numIntDestRegs = %d;' % self.operands.numIntDestRegs 1116 self.flags = self.operands.concatAttrLists('flags') 1117 1118 # Make a basic guess on the operand class (function unit type). 1119 # These are good enough for most cases, and can be overridden 1120 # later otherwise. 1121 if 'IsStore' in self.flags: 1122 self.op_class = 'MemWriteOp' 1123 elif 'IsLoad' in self.flags or 'IsPrefetch' in self.flags: 1124 self.op_class = 'MemReadOp' 1125 elif 'IsFloating' in self.flags: 1126 self.op_class = 'FloatAddOp' 1127 else: 1128 self.op_class = 'IntAluOp' 1129 1130 # Optional arguments are assumed to be either StaticInst flags 1131 # or an OpClass value. To avoid having to import a complete 1132 # list of these values to match against, we do it ad-hoc 1133 # with regexps. 1134 for oa in opt_args: 1135 if instFlagRE.match(oa): 1136 self.flags.append(oa) 1137 elif opClassRE.match(oa): 1138 self.op_class = oa 1139 else: 1140 error(0, 'InstObjParams: optional arg "%s" not recognized ' 1141 'as StaticInst::Flag or OpClass.' % oa) 1142 1143 # add flag initialization to contructor here to include 1144 # any flags added via opt_args 1145 self.constructor += makeFlagConstructor(self.flags) 1146 1147 # if 'IsFloating' is set, add call to the FP enable check 1148 # function (which should be provided by isa_desc via a declare) 1149 if 'IsFloating' in self.flags: 1150 self.fp_enable_check = 'fault = checkFpEnableFault(xc);' 1151 else: 1152 self.fp_enable_check = '' 1153 1154############## 1155# Stack: a simple stack object. Used for both formats (formatStack) 1156# and default cases (defaultStack). Simply wraps a list to give more 1157# stack-like syntax and enable initialization with an argument list 1158# (as opposed to an argument that's a list). 1159 1160class Stack(list): 1161 def __init__(self, *items): 1162 list.__init__(self, items) 1163 1164 def push(self, item): 1165 self.append(item); 1166 1167 def top(self): 1168 return self[-1] 1169 1170# The global format stack. 1171formatStack = Stack(NoFormat()) 1172 1173# The global default case stack. 1174defaultStack = Stack(None) 1175 1176# Global stack that tracks current file and line number. 1177# Each element is a tuple (filename, lineno) that records the 1178# *current* filename and the line number in the *previous* file where 1179# it was included. 1180fileNameStack = Stack() 1181 1182 1183####################### 1184# 1185# Output file template 1186# 1187 1188file_template = ''' 1189/* 1190 * DO NOT EDIT THIS FILE!!! 1191 * 1192 * It was automatically generated from the ISA description in %(filename)s 1193 */ 1194 1195%(includes)s 1196 1197%(global_output)s 1198 1199namespace %(namespace)s { 1200 1201%(namespace_output)s 1202 1203} // namespace %(namespace)s 1204 1205%(decode_function)s 1206''' 1207 1208max_inst_regs_template = ''' 1209/* 1210 * DO NOT EDIT THIS FILE!!! 1211 * 1212 * It was automatically generated from the ISA description in %(filename)s 1213 */ 1214 1215namespace %(namespace)s { 1216 1217 const int MaxInstSrcRegs = %(MaxInstSrcRegs)d; 1218 const int MaxInstDestRegs = %(MaxInstDestRegs)d; 1219 1220} // namespace %(namespace)s 1221 1222''' 1223 1224class ISAParser(Grammar): 1225 def __init__(self, output_dir): 1226 super(ISAParser, self).__init__() 1227 self.output_dir = output_dir 1228 1229 self.templateMap = {} 1230 1231 ##################################################################### 1232 # 1233 # Lexer 1234 # 1235 # The PLY lexer module takes two things as input: 1236 # - A list of token names (the string list 'tokens') 1237 # - A regular expression describing a match for each token. The 1238 # regexp for token FOO can be provided in two ways: 1239 # - as a string variable named t_FOO 1240 # - as the doc string for a function named t_FOO. In this case, 1241 # the function is also executed, allowing an action to be 1242 # associated with each token match. 1243 # 1244 ##################################################################### 1245 1246 # Reserved words. These are listed separately as they are matched 1247 # using the same regexp as generic IDs, but distinguished in the 1248 # t_ID() function. The PLY documentation suggests this approach. 1249 reserved = ( 1250 'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT', 1251 'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS', 1252 'OUTPUT', 'SIGNED', 'TEMPLATE' 1253 ) 1254 1255 # List of tokens. The lex module requires this. 1256 tokens = reserved + ( 1257 # identifier 1258 'ID', 1259 1260 # integer literal 1261 'INTLIT', 1262 1263 # string literal 1264 'STRLIT', 1265 1266 # code literal 1267 'CODELIT', 1268 1269 # ( ) [ ] { } < > , ; . : :: * 1270 'LPAREN', 'RPAREN', 1271 'LBRACKET', 'RBRACKET', 1272 'LBRACE', 'RBRACE', 1273 'LESS', 'GREATER', 'EQUALS', 1274 'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON', 1275 'ASTERISK', 1276 1277 # C preprocessor directives 1278 'CPPDIRECTIVE' 1279 1280 # The following are matched but never returned. commented out to 1281 # suppress PLY warning 1282 # newfile directive 1283 # 'NEWFILE', 1284 1285 # endfile directive 1286 # 'ENDFILE' 1287 ) 1288 1289 # Regular expressions for token matching 1290 t_LPAREN = r'\(' 1291 t_RPAREN = r'\)' 1292 t_LBRACKET = r'\[' 1293 t_RBRACKET = r'\]' 1294 t_LBRACE = r'\{' 1295 t_RBRACE = r'\}' 1296 t_LESS = r'\<' 1297 t_GREATER = r'\>' 1298 t_EQUALS = r'=' 1299 t_COMMA = r',' 1300 t_SEMI = r';' 1301 t_DOT = r'\.' 1302 t_COLON = r':' 1303 t_DBLCOLON = r'::' 1304 t_ASTERISK = r'\*' 1305 1306 # Identifiers and reserved words 1307 reserved_map = { } 1308 for r in reserved: 1309 reserved_map[r.lower()] = r 1310 1311 def t_ID(self, t): 1312 r'[A-Za-z_]\w*' 1313 t.type = self.reserved_map.get(t.value, 'ID') 1314 return t 1315 1316 # Integer literal 1317 def t_INTLIT(self, t): 1318 r'-?(0x[\da-fA-F]+)|\d+' 1319 try: 1320 t.value = int(t.value,0) 1321 except ValueError: 1322 error(t.lexer.lineno, 'Integer value "%s" too large' % t.value) 1323 t.value = 0 1324 return t 1325 1326 # String literal. Note that these use only single quotes, and 1327 # can span multiple lines. 1328 def t_STRLIT(self, t): 1329 r"(?m)'([^'])+'" 1330 # strip off quotes 1331 t.value = t.value[1:-1] 1332 t.lexer.lineno += t.value.count('\n') 1333 return t 1334 1335 1336 # "Code literal"... like a string literal, but delimiters are 1337 # '{{' and '}}' so they get formatted nicely under emacs c-mode 1338 def t_CODELIT(self, t): 1339 r"(?m)\{\{([^\}]|}(?!\}))+\}\}" 1340 # strip off {{ & }} 1341 t.value = t.value[2:-2] 1342 t.lexer.lineno += t.value.count('\n') 1343 return t 1344 1345 def t_CPPDIRECTIVE(self, t): 1346 r'^\#[^\#].*\n' 1347 t.lexer.lineno += t.value.count('\n') 1348 return t 1349 1350 def t_NEWFILE(self, t): 1351 r'^\#\#newfile\s+"[\w/.-]*"' 1352 fileNameStack.push((t.value[11:-1], t.lexer.lineno)) 1353 t.lexer.lineno = 0 1354 1355 def t_ENDFILE(self, t): 1356 r'^\#\#endfile' 1357 (old_filename, t.lexer.lineno) = fileNameStack.pop() 1358 1359 # 1360 # The functions t_NEWLINE, t_ignore, and t_error are 1361 # special for the lex module. 1362 # 1363 1364 # Newlines 1365 def t_NEWLINE(self, t): 1366 r'\n+' 1367 t.lexer.lineno += t.value.count('\n') 1368 1369 # Comments 1370 def t_comment(self, t): 1371 r'//.*' 1372 1373 # Completely ignored characters 1374 t_ignore = ' \t\x0c' 1375 1376 # Error handler 1377 def t_error(self, t): 1378 error(t.lexer.lineno, "illegal character '%s'" % t.value[0]) 1379 t.skip(1) 1380 1381 ##################################################################### 1382 # 1383 # Parser 1384 # 1385 # Every function whose name starts with 'p_' defines a grammar 1386 # rule. The rule is encoded in the function's doc string, while 1387 # the function body provides the action taken when the rule is 1388 # matched. The argument to each function is a list of the values 1389 # of the rule's symbols: t[0] for the LHS, and t[1..n] for the 1390 # symbols on the RHS. For tokens, the value is copied from the 1391 # t.value attribute provided by the lexer. For non-terminals, the 1392 # value is assigned by the producing rule; i.e., the job of the 1393 # grammar rule function is to set the value for the non-terminal 1394 # on the LHS (by assigning to t[0]). 1395 ##################################################################### 1396 1397 # The LHS of the first grammar rule is used as the start symbol 1398 # (in this case, 'specification'). Note that this rule enforces 1399 # that there will be exactly one namespace declaration, with 0 or 1400 # more global defs/decls before and after it. The defs & decls 1401 # before the namespace decl will be outside the namespace; those 1402 # after will be inside. The decoder function is always inside the 1403 # namespace. 1404 def p_specification(self, t): 1405 'specification : opt_defs_and_outputs name_decl opt_defs_and_outputs decode_block' 1406 global_code = t[1] 1407 isa_name = t[2] 1408 namespace = isa_name + "Inst" 1409 # wrap the decode block as a function definition 1410 t[4].wrap_decode_block(''' 1411StaticInstPtr 1412%(isa_name)s::decodeInst(%(isa_name)s::ExtMachInst machInst) 1413{ 1414 using namespace %(namespace)s; 1415''' % vars(), '}') 1416 # both the latter output blocks and the decode block are in 1417 # the namespace 1418 namespace_code = t[3] + t[4] 1419 # pass it all back to the caller of yacc.parse() 1420 t[0] = (isa_name, namespace, global_code, namespace_code) 1421 1422 # ISA name declaration looks like "namespace <foo>;" 1423 def p_name_decl(self, t): 1424 'name_decl : NAMESPACE ID SEMI' 1425 t[0] = t[2] 1426 1427 # 'opt_defs_and_outputs' is a possibly empty sequence of 1428 # def and/or output statements. 1429 def p_opt_defs_and_outputs_0(self, t): 1430 'opt_defs_and_outputs : empty' 1431 t[0] = GenCode() 1432 1433 def p_opt_defs_and_outputs_1(self, t): 1434 'opt_defs_and_outputs : defs_and_outputs' 1435 t[0] = t[1] 1436 1437 def p_defs_and_outputs_0(self, t): 1438 'defs_and_outputs : def_or_output' 1439 t[0] = t[1] 1440 1441 def p_defs_and_outputs_1(self, t): 1442 'defs_and_outputs : defs_and_outputs def_or_output' 1443 t[0] = t[1] + t[2] 1444 1445 # The list of possible definition/output statements. 1446 def p_def_or_output(self, t): 1447 '''def_or_output : def_format 1448 | def_bitfield 1449 | def_bitfield_struct 1450 | def_template 1451 | def_operand_types 1452 | def_operands 1453 | output_header 1454 | output_decoder 1455 | output_exec 1456 | global_let''' 1457 t[0] = t[1] 1458 1459 # Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied 1460 # directly to the appropriate output section. 1461 1462 # Massage output block by substituting in template definitions and 1463 # bit operators. We handle '%'s embedded in the string that don't 1464 # indicate template substitutions (or CPU-specific symbols, which 1465 # get handled in GenCode) by doubling them first so that the 1466 # format operation will reduce them back to single '%'s. 1467 def process_output(self, s): 1468 s = protect_non_subst_percents(s) 1469 # protects cpu-specific symbols too 1470 s = protect_cpu_symbols(s) 1471 return substBitOps(s % self.templateMap) 1472 1473 def p_output_header(self, t): 1474 'output_header : OUTPUT HEADER CODELIT SEMI' 1475 t[0] = GenCode(header_output = self.process_output(t[3])) 1476 1477 def p_output_decoder(self, t): 1478 'output_decoder : OUTPUT DECODER CODELIT SEMI' 1479 t[0] = GenCode(decoder_output = self.process_output(t[3])) 1480 1481 def p_output_exec(self, t): 1482 'output_exec : OUTPUT EXEC CODELIT SEMI' 1483 t[0] = GenCode(exec_output = self.process_output(t[3])) 1484 1485 # global let blocks 'let {{...}}' (Python code blocks) are 1486 # executed directly when seen. Note that these execute in a 1487 # special variable context 'exportContext' to prevent the code 1488 # from polluting this script's namespace. 1489 def p_global_let(self, t): 1490 'global_let : LET CODELIT SEMI' 1491 updateExportContext() 1492 exportContext["header_output"] = '' 1493 exportContext["decoder_output"] = '' 1494 exportContext["exec_output"] = '' 1495 exportContext["decode_block"] = '' 1496 try: 1497 exec fixPythonIndentation(t[2]) in exportContext 1498 except Exception, exc: 1499 error(t.lexer.lineno, 1500 'error: %s in global let block "%s".' % (exc, t[2])) 1501 t[0] = GenCode(header_output = exportContext["header_output"], 1502 decoder_output = exportContext["decoder_output"], 1503 exec_output = exportContext["exec_output"], 1504 decode_block = exportContext["decode_block"]) 1505 1506 # Define the mapping from operand type extensions to C++ types and 1507 # bit widths (stored in operandTypeMap). 1508 def p_def_operand_types(self, t): 1509 'def_operand_types : DEF OPERAND_TYPES CODELIT SEMI' 1510 try: 1511 user_dict = eval('{' + t[3] + '}') 1512 except Exception, exc: 1513 error(t.lexer.lineno, 1514 'error: %s in def operand_types block "%s".' % (exc, t[3])) 1515 buildOperandTypeMap(user_dict, t.lexer.lineno) 1516 t[0] = GenCode() # contributes nothing to the output C++ file 1517 1518 # Define the mapping from operand names to operand classes and 1519 # other traits. Stored in operandNameMap. 1520 def p_def_operands(self, t): 1521 'def_operands : DEF OPERANDS CODELIT SEMI' 1522 if not globals().has_key('operandTypeMap'): 1523 error(t.lexer.lineno, 1524 'error: operand types must be defined before operands') 1525 try: 1526 user_dict = eval('{' + t[3] + '}', exportContext) 1527 except Exception, exc: 1528 error(t.lexer.lineno, 1529 'error: %s in def operands block "%s".' % (exc, t[3])) 1530 buildOperandNameMap(user_dict, t.lexer.lineno) 1531 t[0] = GenCode() # contributes nothing to the output C++ file 1532 1533 # A bitfield definition looks like: 1534 # 'def [signed] bitfield <ID> [<first>:<last>]' 1535 # This generates a preprocessor macro in the output file. 1536 def p_def_bitfield_0(self, t): 1537 'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI' 1538 expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8]) 1539 if (t[2] == 'signed'): 1540 expr = 'sext<%d>(%s)' % (t[6] - t[8] + 1, expr) 1541 hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr) 1542 t[0] = GenCode(header_output = hash_define) 1543 1544 # alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]' 1545 def p_def_bitfield_1(self, t): 1546 'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI' 1547 expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6]) 1548 if (t[2] == 'signed'): 1549 expr = 'sext<%d>(%s)' % (1, expr) 1550 hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr) 1551 t[0] = GenCode(header_output = hash_define) 1552 1553 # alternate form for structure member: 'def bitfield <ID> <ID>' 1554 def p_def_bitfield_struct(self, t): 1555 'def_bitfield_struct : DEF opt_signed BITFIELD ID id_with_dot SEMI' 1556 if (t[2] != ''): 1557 error(t.lexer.lineno, 1558 'error: structure bitfields are always unsigned.') 1559 expr = 'machInst.%s' % t[5] 1560 hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr) 1561 t[0] = GenCode(header_output = hash_define) 1562 1563 def p_id_with_dot_0(self, t): 1564 'id_with_dot : ID' 1565 t[0] = t[1] 1566 1567 def p_id_with_dot_1(self, t): 1568 'id_with_dot : ID DOT id_with_dot' 1569 t[0] = t[1] + t[2] + t[3] 1570 1571 def p_opt_signed_0(self, t): 1572 'opt_signed : SIGNED' 1573 t[0] = t[1] 1574 1575 def p_opt_signed_1(self, t): 1576 'opt_signed : empty' 1577 t[0] = '' 1578 1579 def p_def_template(self, t): 1580 'def_template : DEF TEMPLATE ID CODELIT SEMI' 1581 self.templateMap[t[3]] = Template(t[4]) 1582 t[0] = GenCode() 1583 1584 # An instruction format definition looks like 1585 # "def format <fmt>(<params>) {{...}};" 1586 def p_def_format(self, t): 1587 'def_format : DEF FORMAT ID LPAREN param_list RPAREN CODELIT SEMI' 1588 (id, params, code) = (t[3], t[5], t[7]) 1589 defFormat(id, params, code, t.lexer.lineno) 1590 t[0] = GenCode() 1591 1592 # The formal parameter list for an instruction format is a 1593 # possibly empty list of comma-separated parameters. Positional 1594 # (standard, non-keyword) parameters must come first, followed by 1595 # keyword parameters, followed by a '*foo' parameter that gets 1596 # excess positional arguments (as in Python). Each of these three 1597 # parameter categories is optional. 1598 # 1599 # Note that we do not support the '**foo' parameter for collecting 1600 # otherwise undefined keyword args. Otherwise the parameter list 1601 # is (I believe) identical to what is supported in Python. 1602 # 1603 # The param list generates a tuple, where the first element is a 1604 # list of the positional params and the second element is a dict 1605 # containing the keyword params. 1606 def p_param_list_0(self, t): 1607 'param_list : positional_param_list COMMA nonpositional_param_list' 1608 t[0] = t[1] + t[3] 1609 1610 def p_param_list_1(self, t): 1611 '''param_list : positional_param_list 1612 | nonpositional_param_list''' 1613 t[0] = t[1] 1614 1615 def p_positional_param_list_0(self, t): 1616 'positional_param_list : empty' 1617 t[0] = [] 1618 1619 def p_positional_param_list_1(self, t): 1620 'positional_param_list : ID' 1621 t[0] = [t[1]] 1622 1623 def p_positional_param_list_2(self, t): 1624 'positional_param_list : positional_param_list COMMA ID' 1625 t[0] = t[1] + [t[3]] 1626 1627 def p_nonpositional_param_list_0(self, t): 1628 'nonpositional_param_list : keyword_param_list COMMA excess_args_param' 1629 t[0] = t[1] + t[3] 1630 1631 def p_nonpositional_param_list_1(self, t): 1632 '''nonpositional_param_list : keyword_param_list 1633 | excess_args_param''' 1634 t[0] = t[1] 1635 1636 def p_keyword_param_list_0(self, t): 1637 'keyword_param_list : keyword_param' 1638 t[0] = [t[1]] 1639 1640 def p_keyword_param_list_1(self, t): 1641 'keyword_param_list : keyword_param_list COMMA keyword_param' 1642 t[0] = t[1] + [t[3]] 1643 1644 def p_keyword_param(self, t): 1645 'keyword_param : ID EQUALS expr' 1646 t[0] = t[1] + ' = ' + t[3].__repr__() 1647 1648 def p_excess_args_param(self, t): 1649 'excess_args_param : ASTERISK ID' 1650 # Just concatenate them: '*ID'. Wrap in list to be consistent 1651 # with positional_param_list and keyword_param_list. 1652 t[0] = [t[1] + t[2]] 1653 1654 # End of format definition-related rules. 1655 ############## 1656 1657 # 1658 # A decode block looks like: 1659 # decode <field1> [, <field2>]* [default <inst>] { ... } 1660 # 1661 def p_decode_block(self, t): 1662 'decode_block : DECODE ID opt_default LBRACE decode_stmt_list RBRACE' 1663 default_defaults = defaultStack.pop() 1664 codeObj = t[5] 1665 # use the "default defaults" only if there was no explicit 1666 # default statement in decode_stmt_list 1667 if not codeObj.has_decode_default: 1668 codeObj += default_defaults 1669 codeObj.wrap_decode_block('switch (%s) {\n' % t[2], '}\n') 1670 t[0] = codeObj 1671 1672 # The opt_default statement serves only to push the "default 1673 # defaults" onto defaultStack. This value will be used by nested 1674 # decode blocks, and used and popped off when the current 1675 # decode_block is processed (in p_decode_block() above). 1676 def p_opt_default_0(self, t): 1677 'opt_default : empty' 1678 # no default specified: reuse the one currently at the top of 1679 # the stack 1680 defaultStack.push(defaultStack.top()) 1681 # no meaningful value returned 1682 t[0] = None 1683 1684 def p_opt_default_1(self, t): 1685 'opt_default : DEFAULT inst' 1686 # push the new default 1687 codeObj = t[2] 1688 codeObj.wrap_decode_block('\ndefault:\n', 'break;\n') 1689 defaultStack.push(codeObj) 1690 # no meaningful value returned 1691 t[0] = None 1692 1693 def p_decode_stmt_list_0(self, t): 1694 'decode_stmt_list : decode_stmt' 1695 t[0] = t[1] 1696 1697 def p_decode_stmt_list_1(self, t): 1698 'decode_stmt_list : decode_stmt decode_stmt_list' 1699 if (t[1].has_decode_default and t[2].has_decode_default): 1700 error(t.lexer.lineno, 'Two default cases in decode block') 1701 t[0] = t[1] + t[2] 1702 1703 # 1704 # Decode statement rules 1705 # 1706 # There are four types of statements allowed in a decode block: 1707 # 1. Format blocks 'format <foo> { ... }' 1708 # 2. Nested decode blocks 1709 # 3. Instruction definitions. 1710 # 4. C preprocessor directives. 1711 1712 1713 # Preprocessor directives found in a decode statement list are 1714 # passed through to the output, replicated to all of the output 1715 # code streams. This works well for ifdefs, so we can ifdef out 1716 # both the declarations and the decode cases generated by an 1717 # instruction definition. Handling them as part of the grammar 1718 # makes it easy to keep them in the right place with respect to 1719 # the code generated by the other statements. 1720 def p_decode_stmt_cpp(self, t): 1721 'decode_stmt : CPPDIRECTIVE' 1722 t[0] = GenCode(t[1], t[1], t[1], t[1]) 1723 1724 # A format block 'format <foo> { ... }' sets the default 1725 # instruction format used to handle instruction definitions inside 1726 # the block. This format can be overridden by using an explicit 1727 # format on the instruction definition or with a nested format 1728 # block. 1729 def p_decode_stmt_format(self, t): 1730 'decode_stmt : FORMAT push_format_id LBRACE decode_stmt_list RBRACE' 1731 # The format will be pushed on the stack when 'push_format_id' 1732 # is processed (see below). Once the parser has recognized 1733 # the full production (though the right brace), we're done 1734 # with the format, so now we can pop it. 1735 formatStack.pop() 1736 t[0] = t[4] 1737 1738 # This rule exists so we can set the current format (& push the 1739 # stack) when we recognize the format name part of the format 1740 # block. 1741 def p_push_format_id(self, t): 1742 'push_format_id : ID' 1743 try: 1744 formatStack.push(formatMap[t[1]]) 1745 t[0] = ('', '// format %s' % t[1]) 1746 except KeyError: 1747 error(t.lexer.lineno, 1748 'instruction format "%s" not defined.' % t[1]) 1749 1750 # Nested decode block: if the value of the current field matches 1751 # the specified constant, do a nested decode on some other field. 1752 def p_decode_stmt_decode(self, t): 1753 'decode_stmt : case_label COLON decode_block' 1754 label = t[1] 1755 codeObj = t[3] 1756 # just wrap the decoding code from the block as a case in the 1757 # outer switch statement. 1758 codeObj.wrap_decode_block('\n%s:\n' % label) 1759 codeObj.has_decode_default = (label == 'default') 1760 t[0] = codeObj 1761 1762 # Instruction definition (finally!). 1763 def p_decode_stmt_inst(self, t): 1764 'decode_stmt : case_label COLON inst SEMI' 1765 label = t[1] 1766 codeObj = t[3] 1767 codeObj.wrap_decode_block('\n%s:' % label, 'break;\n') 1768 codeObj.has_decode_default = (label == 'default') 1769 t[0] = codeObj 1770 1771 # The case label is either a list of one or more constants or 1772 # 'default' 1773 def p_case_label_0(self, t): 1774 'case_label : intlit_list' 1775 def make_case(intlit): 1776 if intlit >= 2**32: 1777 return 'case ULL(%#x)' % intlit 1778 else: 1779 return 'case %#x' % intlit 1780 t[0] = ': '.join(map(make_case, t[1])) 1781 1782 def p_case_label_1(self, t): 1783 'case_label : DEFAULT' 1784 t[0] = 'default' 1785 1786 # 1787 # The constant list for a decode case label must be non-empty, but 1788 # may have one or more comma-separated integer literals in it. 1789 # 1790 def p_intlit_list_0(self, t): 1791 'intlit_list : INTLIT' 1792 t[0] = [t[1]] 1793 1794 def p_intlit_list_1(self, t): 1795 'intlit_list : intlit_list COMMA INTLIT' 1796 t[0] = t[1] 1797 t[0].append(t[3]) 1798 1799 # Define an instruction using the current instruction format 1800 # (specified by an enclosing format block). 1801 # "<mnemonic>(<args>)" 1802 def p_inst_0(self, t): 1803 'inst : ID LPAREN arg_list RPAREN' 1804 # Pass the ID and arg list to the current format class to deal with. 1805 currentFormat = formatStack.top() 1806 codeObj = currentFormat.defineInst(t[1], t[3], t.lexer.lineno) 1807 args = ','.join(map(str, t[3])) 1808 args = re.sub('(?m)^', '//', args) 1809 args = re.sub('^//', '', args) 1810 comment = '\n// %s::%s(%s)\n' % (currentFormat.id, t[1], args) 1811 codeObj.prepend_all(comment) 1812 t[0] = codeObj 1813 1814 # Define an instruction using an explicitly specified format: 1815 # "<fmt>::<mnemonic>(<args>)" 1816 def p_inst_1(self, t): 1817 'inst : ID DBLCOLON ID LPAREN arg_list RPAREN' 1818 try: 1819 format = formatMap[t[1]] 1820 except KeyError: 1821 error(t.lexer.lineno, 1822 'instruction format "%s" not defined.' % t[1]) 1823 codeObj = format.defineInst(t[3], t[5], t.lexer.lineno) 1824 comment = '\n// %s::%s(%s)\n' % (t[1], t[3], t[5]) 1825 codeObj.prepend_all(comment) 1826 t[0] = codeObj 1827 1828 # The arg list generates a tuple, where the first element is a 1829 # list of the positional args and the second element is a dict 1830 # containing the keyword args. 1831 def p_arg_list_0(self, t): 1832 'arg_list : positional_arg_list COMMA keyword_arg_list' 1833 t[0] = ( t[1], t[3] ) 1834 1835 def p_arg_list_1(self, t): 1836 'arg_list : positional_arg_list' 1837 t[0] = ( t[1], {} ) 1838 1839 def p_arg_list_2(self, t): 1840 'arg_list : keyword_arg_list' 1841 t[0] = ( [], t[1] ) 1842 1843 def p_positional_arg_list_0(self, t): 1844 'positional_arg_list : empty' 1845 t[0] = [] 1846 1847 def p_positional_arg_list_1(self, t): 1848 'positional_arg_list : expr' 1849 t[0] = [t[1]] 1850 1851 def p_positional_arg_list_2(self, t): 1852 'positional_arg_list : positional_arg_list COMMA expr' 1853 t[0] = t[1] + [t[3]] 1854 1855 def p_keyword_arg_list_0(self, t): 1856 'keyword_arg_list : keyword_arg' 1857 t[0] = t[1] 1858 1859 def p_keyword_arg_list_1(self, t): 1860 'keyword_arg_list : keyword_arg_list COMMA keyword_arg' 1861 t[0] = t[1] 1862 t[0].update(t[3]) 1863 1864 def p_keyword_arg(self, t): 1865 'keyword_arg : ID EQUALS expr' 1866 t[0] = { t[1] : t[3] } 1867 1868 # 1869 # Basic expressions. These constitute the argument values of 1870 # "function calls" (i.e. instruction definitions in the decode 1871 # block) and default values for formal parameters of format 1872 # functions. 1873 # 1874 # Right now, these are either strings, integers, or (recursively) 1875 # lists of exprs (using Python square-bracket list syntax). Note 1876 # that bare identifiers are trated as string constants here (since 1877 # there isn't really a variable namespace to refer to). 1878 # 1879 def p_expr_0(self, t): 1880 '''expr : ID 1881 | INTLIT 1882 | STRLIT 1883 | CODELIT''' 1884 t[0] = t[1] 1885 1886 def p_expr_1(self, t): 1887 '''expr : LBRACKET list_expr RBRACKET''' 1888 t[0] = t[2] 1889 1890 def p_list_expr_0(self, t): 1891 'list_expr : expr' 1892 t[0] = [t[1]] 1893 1894 def p_list_expr_1(self, t): 1895 'list_expr : list_expr COMMA expr' 1896 t[0] = t[1] + [t[3]] 1897 1898 def p_list_expr_2(self, t): 1899 'list_expr : empty' 1900 t[0] = [] 1901 1902 # 1903 # Empty production... use in other rules for readability. 1904 # 1905 def p_empty(self, t): 1906 'empty :' 1907 pass 1908 1909 # Parse error handler. Note that the argument here is the 1910 # offending *token*, not a grammar symbol (hence the need to use 1911 # t.value) 1912 def p_error(self, t): 1913 if t: 1914 error(t.lexer.lineno, "syntax error at '%s'" % t.value) 1915 else: 1916 error(0, "unknown syntax error", True) 1917 1918 # END OF GRAMMAR RULES 1919 1920 def update_if_needed(self, file, contents): 1921 '''Update the output file only if the new contents are 1922 different from the current contents. Minimizes the files that 1923 need to be rebuilt after minor changes.''' 1924 1925 file = os.path.join(self.output_dir, file) 1926 update = False 1927 if os.access(file, os.R_OK): 1928 f = open(file, 'r') 1929 old_contents = f.read() 1930 f.close() 1931 if contents != old_contents: 1932 print 'Updating', file 1933 os.remove(file) # in case it's write-protected 1934 update = True 1935 else: 1936 print 'File', file, 'is unchanged' 1937 else: 1938 print 'Generating', file 1939 update = True 1940 if update: 1941 f = open(file, 'w') 1942 f.write(contents) 1943 f.close() 1944 1945 # This regular expression matches '##include' directives 1946 includeRE = re.compile(r'^\s*##include\s+"(?P<filename>[\w/.-]*)".*$', 1947 re.MULTILINE) 1948 1949 def replace_include(self, matchobj, dirname): 1950 """Function to replace a matched '##include' directive with the 1951 contents of the specified file (with nested ##includes 1952 replaced recursively). 'matchobj' is an re match object 1953 (from a match of includeRE) and 'dirname' is the directory 1954 relative to which the file path should be resolved.""" 1955 1956 fname = matchobj.group('filename') 1957 full_fname = os.path.normpath(os.path.join(dirname, fname)) 1958 contents = '##newfile "%s"\n%s\n##endfile\n' % \ 1959 (full_fname, self.read_and_flatten(full_fname)) 1960 return contents 1961 1962 def read_and_flatten(self, filename): 1963 """Read a file and recursively flatten nested '##include' files.""" 1964 1965 current_dir = os.path.dirname(filename) 1966 try: 1967 contents = open(filename).read() 1968 except IOError: 1969 error(0, 'Error including file "%s"' % filename) 1970 1971 fileNameStack.push((filename, 0)) 1972 1973 # Find any includes and include them 1974 def replace(matchobj): 1975 return self.replace_include(matchobj, current_dir) 1976 contents = self.includeRE.sub(replace, contents) 1977 1978 fileNameStack.pop() 1979 return contents 1980 1981 def parse_isa_desc(self, isa_desc_file): 1982 '''Read in and parse the ISA description.''' 1983 1984 # Read file and (recursively) all included files into a string. 1985 # PLY requires that the input be in a single string so we have to 1986 # do this up front. 1987 isa_desc = self.read_and_flatten(isa_desc_file) 1988 1989 # Initialize filename stack with outer file. 1990 fileNameStack.push((isa_desc_file, 0)) 1991 1992 # Parse it. 1993 (isa_name, namespace, global_code, namespace_code) = \ 1994 self.parse(isa_desc) 1995 1996 # grab the last three path components of isa_desc_file to put in 1997 # the output 1998 filename = '/'.join(isa_desc_file.split('/')[-3:]) 1999 2000 # generate decoder.hh 2001 includes = '#include "base/bitfield.hh" // for bitfield support' 2002 global_output = global_code.header_output 2003 namespace_output = namespace_code.header_output 2004 decode_function = '' 2005 self.update_if_needed('decoder.hh', file_template % vars()) 2006 2007 # generate decoder.cc 2008 includes = '#include "decoder.hh"' 2009 global_output = global_code.decoder_output 2010 namespace_output = namespace_code.decoder_output 2011 # namespace_output += namespace_code.decode_block 2012 decode_function = namespace_code.decode_block 2013 self.update_if_needed('decoder.cc', file_template % vars()) 2014 2015 # generate per-cpu exec files 2016 for cpu in cpu_models: 2017 includes = '#include "decoder.hh"\n' 2018 includes += cpu.includes 2019 global_output = global_code.exec_output[cpu.name] 2020 namespace_output = namespace_code.exec_output[cpu.name] 2021 decode_function = '' 2022 self.update_if_needed(cpu.filename, file_template % vars()) 2023 2024 # The variable names here are hacky, but this will creat local 2025 # variables which will be referenced in vars() which have the 2026 # value of the globals. 2027 global maxInstSrcRegs 2028 MaxInstSrcRegs = maxInstSrcRegs 2029 global maxInstDestRegs 2030 MaxInstDestRegs = maxInstDestRegs 2031 # max_inst_regs.hh 2032 self.update_if_needed('max_inst_regs.hh', 2033 max_inst_regs_template % vars()) 2034 2035# global list of CpuModel objects (see cpu_models.py) 2036cpu_models = [] 2037 2038# Called as script: get args from command line. 2039# Args are: <path to cpu_models.py> <isa desc file> <output dir> <cpu models> 2040if __name__ == '__main__': 2041 execfile(sys.argv[1]) # read in CpuModel definitions 2042 cpu_models = [CpuModel.dict[cpu] for cpu in sys.argv[4:]] 2043 parser = ISAParser(sys.argv[3]) 2044 parser.parse_isa_desc(sys.argv[2]) 2045