isa_parser.py revision 6987:31ba8b062d08
1# Copyright (c) 2003-2005 The Regents of The University of Michigan
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met: redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer;
8# redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution;
11# neither the name of the copyright holders nor the names of its
12# contributors may be used to endorse or promote products derived from
13# this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26#
27# Authors: Steve Reinhardt
28
29import os
30import sys
31import re
32import string
33import inspect, traceback
34# get type names
35from types import *
36
37from m5.util.grammar import Grammar
38
39debug=False
40
41###################
42# Utility functions
43
44#
45# Indent every line in string 's' by two spaces
46# (except preprocessor directives).
47# Used to make nested code blocks look pretty.
48#
49def indent(s):
50    return re.sub(r'(?m)^(?!#)', '  ', s)
51
52#
53# Munge a somewhat arbitrarily formatted piece of Python code
54# (e.g. from a format 'let' block) into something whose indentation
55# will get by the Python parser.
56#
57# The two keys here are that Python will give a syntax error if
58# there's any whitespace at the beginning of the first line, and that
59# all lines at the same lexical nesting level must have identical
60# indentation.  Unfortunately the way code literals work, an entire
61# let block tends to have some initial indentation.  Rather than
62# trying to figure out what that is and strip it off, we prepend 'if
63# 1:' to make the let code the nested block inside the if (and have
64# the parser automatically deal with the indentation for us).
65#
66# We don't want to do this if (1) the code block is empty or (2) the
67# first line of the block doesn't have any whitespace at the front.
68
69def fixPythonIndentation(s):
70    # get rid of blank lines first
71    s = re.sub(r'(?m)^\s*\n', '', s);
72    if (s != '' and re.match(r'[ \t]', s[0])):
73        s = 'if 1:\n' + s
74    return s
75
76class ISAParserError(Exception):
77    """Error handler for parser errors"""
78    def __init__(self, first, second=None):
79        if second is None:
80            self.lineno = 0
81            self.string = first
82        else:
83            if hasattr(first, 'lexer'):
84                first = first.lexer.lineno
85            self.lineno = first
86            self.string = second
87
88    def display(self, filename_stack, print_traceback=debug):
89        # Output formatted to work under Emacs compile-mode.  Optional
90        # 'print_traceback' arg, if set to True, prints a Python stack
91        # backtrace too (can be handy when trying to debug the parser
92        # itself).
93
94        spaces = ""
95        for (filename, line) in filename_stack[:-1]:
96            print "%sIn file included from %s:" % (spaces, filename)
97            spaces += "  "
98
99        # Print a Python stack backtrace if requested.
100        if print_traceback or not self.lineno:
101            traceback.print_exc()
102
103        line_str = "%s:" % (filename_stack[-1][0], )
104        if self.lineno:
105            line_str += "%d:" % (self.lineno, )
106
107        return "%s%s %s" % (spaces, line_str, self.string)
108
109    def exit(self, filename_stack, print_traceback=debug):
110        # Just call exit.
111
112        sys.exit(self.display(filename_stack, print_traceback))
113
114def error(*args):
115    raise ISAParserError(*args)
116
117####################
118# Template objects.
119#
120# Template objects are format strings that allow substitution from
121# the attribute spaces of other objects (e.g. InstObjParams instances).
122
123labelRE = re.compile(r'(?<!%)%\(([^\)]+)\)[sd]')
124
125class Template(object):
126    def __init__(self, t):
127        self.template = t
128
129    def subst(self, d):
130        myDict = None
131
132        # Protect non-Python-dict substitutions (e.g. if there's a printf
133        # in the templated C++ code)
134        template = protect_non_subst_percents(self.template)
135        # CPU-model-specific substitutions are handled later (in GenCode).
136        template = protect_cpu_symbols(template)
137
138        # Build a dict ('myDict') to use for the template substitution.
139        # Start with the template namespace.  Make a copy since we're
140        # going to modify it.
141        myDict = parser.templateMap.copy()
142
143        if isinstance(d, InstObjParams):
144            # If we're dealing with an InstObjParams object, we need
145            # to be a little more sophisticated.  The instruction-wide
146            # parameters are already formed, but the parameters which
147            # are only function wide still need to be generated.
148            compositeCode = ''
149
150            myDict.update(d.__dict__)
151            # The "operands" and "snippets" attributes of the InstObjParams
152            # objects are for internal use and not substitution.
153            del myDict['operands']
154            del myDict['snippets']
155
156            snippetLabels = [l for l in labelRE.findall(template)
157                             if d.snippets.has_key(l)]
158
159            snippets = dict([(s, mungeSnippet(d.snippets[s]))
160                             for s in snippetLabels])
161
162            myDict.update(snippets)
163
164            compositeCode = ' '.join(map(str, snippets.values()))
165
166            # Add in template itself in case it references any
167            # operands explicitly (like Mem)
168            compositeCode += ' ' + template
169
170            operands = SubOperandList(compositeCode, d.operands)
171
172            myDict['op_decl'] = operands.concatAttrStrings('op_decl')
173
174            is_src = lambda op: op.is_src
175            is_dest = lambda op: op.is_dest
176
177            myDict['op_src_decl'] = \
178                      operands.concatSomeAttrStrings(is_src, 'op_src_decl')
179            myDict['op_dest_decl'] = \
180                      operands.concatSomeAttrStrings(is_dest, 'op_dest_decl')
181
182            myDict['op_rd'] = operands.concatAttrStrings('op_rd')
183            myDict['op_wb'] = operands.concatAttrStrings('op_wb')
184
185            if d.operands.memOperand:
186                myDict['mem_acc_size'] = d.operands.memOperand.mem_acc_size
187                myDict['mem_acc_type'] = d.operands.memOperand.mem_acc_type
188
189        elif isinstance(d, dict):
190            # if the argument is a dictionary, we just use it.
191            myDict.update(d)
192        elif hasattr(d, '__dict__'):
193            # if the argument is an object, we use its attribute map.
194            myDict.update(d.__dict__)
195        else:
196            raise TypeError, "Template.subst() arg must be or have dictionary"
197        return template % myDict
198
199    # Convert to string.  This handles the case when a template with a
200    # CPU-specific term gets interpolated into another template or into
201    # an output block.
202    def __str__(self):
203        return expand_cpu_symbols_to_string(self.template)
204
205################
206# Format object.
207#
208# A format object encapsulates an instruction format.  It must provide
209# a defineInst() method that generates the code for an instruction
210# definition.
211
212exportContextSymbols = ('InstObjParams', 'makeList', 're', 'string')
213
214exportContext = {}
215
216def updateExportContext():
217    exportContext.update(exportDict(*exportContextSymbols))
218    exportContext.update(parser.templateMap)
219
220def exportDict(*symNames):
221    return dict([(s, eval(s)) for s in symNames])
222
223
224class Format(object):
225    def __init__(self, id, params, code):
226        # constructor: just save away arguments
227        self.id = id
228        self.params = params
229        label = 'def format ' + id
230        self.user_code = compile(fixPythonIndentation(code), label, 'exec')
231        param_list = string.join(params, ", ")
232        f = '''def defInst(_code, _context, %s):
233                my_locals = vars().copy()
234                exec _code in _context, my_locals
235                return my_locals\n''' % param_list
236        c = compile(f, label + ' wrapper', 'exec')
237        exec c
238        self.func = defInst
239
240    def defineInst(self, name, args, lineno):
241        context = {}
242        updateExportContext()
243        context.update(exportContext)
244        if len(name):
245            Name = name[0].upper()
246            if len(name) > 1:
247                Name += name[1:]
248        context.update({ 'name': name, 'Name': Name })
249        try:
250            vars = self.func(self.user_code, context, *args[0], **args[1])
251        except Exception, exc:
252            if debug:
253                raise
254            error(lineno, 'error defining "%s": %s.' % (name, exc))
255        for k in vars.keys():
256            if k not in ('header_output', 'decoder_output',
257                         'exec_output', 'decode_block'):
258                del vars[k]
259        return GenCode(**vars)
260
261# Special null format to catch an implicit-format instruction
262# definition outside of any format block.
263class NoFormat(object):
264    def __init__(self):
265        self.defaultInst = ''
266
267    def defineInst(self, name, args, lineno):
268        error(lineno,
269              'instruction definition "%s" with no active format!' % name)
270
271# This dictionary maps format name strings to Format objects.
272formatMap = {}
273
274# Define a new format
275def defFormat(id, params, code, lineno):
276    # make sure we haven't already defined this one
277    if formatMap.get(id, None) != None:
278        error(lineno, 'format %s redefined.' % id)
279    # create new object and store in global map
280    formatMap[id] = Format(id, params, code)
281
282#####################################################################
283#
284#                           Support Classes
285#
286#####################################################################
287
288# Expand template with CPU-specific references into a dictionary with
289# an entry for each CPU model name.  The entry key is the model name
290# and the corresponding value is the template with the CPU-specific
291# refs substituted for that model.
292def expand_cpu_symbols_to_dict(template):
293    # Protect '%'s that don't go with CPU-specific terms
294    t = re.sub(r'%(?!\(CPU_)', '%%', template)
295    result = {}
296    for cpu in cpu_models:
297        result[cpu.name] = t % cpu.strings
298    return result
299
300# *If* the template has CPU-specific references, return a single
301# string containing a copy of the template for each CPU model with the
302# corresponding values substituted in.  If the template has no
303# CPU-specific references, it is returned unmodified.
304def expand_cpu_symbols_to_string(template):
305    if template.find('%(CPU_') != -1:
306        return reduce(lambda x,y: x+y,
307                      expand_cpu_symbols_to_dict(template).values())
308    else:
309        return template
310
311# Protect CPU-specific references by doubling the corresponding '%'s
312# (in preparation for substituting a different set of references into
313# the template).
314def protect_cpu_symbols(template):
315    return re.sub(r'%(?=\(CPU_)', '%%', template)
316
317# Protect any non-dict-substitution '%'s in a format string
318# (i.e. those not followed by '(')
319def protect_non_subst_percents(s):
320    return re.sub(r'%(?!\()', '%%', s)
321
322###############
323# GenCode class
324#
325# The GenCode class encapsulates generated code destined for various
326# output files.  The header_output and decoder_output attributes are
327# strings containing code destined for decoder.hh and decoder.cc
328# respectively.  The decode_block attribute contains code to be
329# incorporated in the decode function itself (that will also end up in
330# decoder.cc).  The exec_output attribute is a dictionary with a key
331# for each CPU model name; the value associated with a particular key
332# is the string of code for that CPU model's exec.cc file.  The
333# has_decode_default attribute is used in the decode block to allow
334# explicit default clauses to override default default clauses.
335
336class GenCode(object):
337    # Constructor.  At this point we substitute out all CPU-specific
338    # symbols.  For the exec output, these go into the per-model
339    # dictionary.  For all other output types they get collapsed into
340    # a single string.
341    def __init__(self,
342                 header_output = '', decoder_output = '', exec_output = '',
343                 decode_block = '', has_decode_default = False):
344        self.header_output = expand_cpu_symbols_to_string(header_output)
345        self.decoder_output = expand_cpu_symbols_to_string(decoder_output)
346        if isinstance(exec_output, dict):
347            self.exec_output = exec_output
348        elif isinstance(exec_output, str):
349            # If the exec_output arg is a single string, we replicate
350            # it for each of the CPU models, substituting and
351            # %(CPU_foo)s params appropriately.
352            self.exec_output = expand_cpu_symbols_to_dict(exec_output)
353        self.decode_block = expand_cpu_symbols_to_string(decode_block)
354        self.has_decode_default = has_decode_default
355
356    # Override '+' operator: generate a new GenCode object that
357    # concatenates all the individual strings in the operands.
358    def __add__(self, other):
359        exec_output = {}
360        for cpu in cpu_models:
361            n = cpu.name
362            exec_output[n] = self.exec_output[n] + other.exec_output[n]
363        return GenCode(self.header_output + other.header_output,
364                       self.decoder_output + other.decoder_output,
365                       exec_output,
366                       self.decode_block + other.decode_block,
367                       self.has_decode_default or other.has_decode_default)
368
369    # Prepend a string (typically a comment) to all the strings.
370    def prepend_all(self, pre):
371        self.header_output = pre + self.header_output
372        self.decoder_output  = pre + self.decoder_output
373        self.decode_block = pre + self.decode_block
374        for cpu in cpu_models:
375            self.exec_output[cpu.name] = pre + self.exec_output[cpu.name]
376
377    # Wrap the decode block in a pair of strings (e.g., 'case foo:'
378    # and 'break;').  Used to build the big nested switch statement.
379    def wrap_decode_block(self, pre, post = ''):
380        self.decode_block = pre + indent(self.decode_block) + post
381
382#####################################################################
383#
384#                      Bitfield Operator Support
385#
386#####################################################################
387
388bitOp1ArgRE = re.compile(r'<\s*(\w+)\s*:\s*>')
389
390bitOpWordRE = re.compile(r'(?<![\w\.])([\w\.]+)<\s*(\w+)\s*:\s*(\w+)\s*>')
391bitOpExprRE = re.compile(r'\)<\s*(\w+)\s*:\s*(\w+)\s*>')
392
393def substBitOps(code):
394    # first convert single-bit selectors to two-index form
395    # i.e., <n> --> <n:n>
396    code = bitOp1ArgRE.sub(r'<\1:\1>', code)
397    # simple case: selector applied to ID (name)
398    # i.e., foo<a:b> --> bits(foo, a, b)
399    code = bitOpWordRE.sub(r'bits(\1, \2, \3)', code)
400    # if selector is applied to expression (ending in ')'),
401    # we need to search backward for matching '('
402    match = bitOpExprRE.search(code)
403    while match:
404        exprEnd = match.start()
405        here = exprEnd - 1
406        nestLevel = 1
407        while nestLevel > 0:
408            if code[here] == '(':
409                nestLevel -= 1
410            elif code[here] == ')':
411                nestLevel += 1
412            here -= 1
413            if here < 0:
414                sys.exit("Didn't find '('!")
415        exprStart = here+1
416        newExpr = r'bits(%s, %s, %s)' % (code[exprStart:exprEnd+1],
417                                         match.group(1), match.group(2))
418        code = code[:exprStart] + newExpr + code[match.end():]
419        match = bitOpExprRE.search(code)
420    return code
421
422
423#####################################################################
424#
425#                             Code Parser
426#
427# The remaining code is the support for automatically extracting
428# instruction characteristics from pseudocode.
429#
430#####################################################################
431
432# Force the argument to be a list.  Useful for flags, where a caller
433# can specify a singleton flag or a list of flags.  Also usful for
434# converting tuples to lists so they can be modified.
435def makeList(arg):
436    if isinstance(arg, list):
437        return arg
438    elif isinstance(arg, tuple):
439        return list(arg)
440    elif not arg:
441        return []
442    else:
443        return [ arg ]
444
445# Generate operandTypeMap from the user's 'def operand_types'
446# statement.
447def buildOperandTypeMap(user_dict, lineno):
448    global operandTypeMap
449    operandTypeMap = {}
450    for (ext, (desc, size)) in user_dict.iteritems():
451        if desc == 'signed int':
452            ctype = 'int%d_t' % size
453            is_signed = 1
454        elif desc == 'unsigned int':
455            ctype = 'uint%d_t' % size
456            is_signed = 0
457        elif desc == 'float':
458            is_signed = 1       # shouldn't really matter
459            if size == 32:
460                ctype = 'float'
461            elif size == 64:
462                ctype = 'double'
463        elif desc == 'twin64 int':
464            is_signed = 0
465            ctype = 'Twin64_t'
466        elif desc == 'twin32 int':
467            is_signed = 0
468            ctype = 'Twin32_t'
469        if ctype == '':
470            error(lineno, 'Unrecognized type description "%s" in user_dict')
471        operandTypeMap[ext] = (size, ctype, is_signed)
472
473class Operand(object):
474    '''Base class for operand descriptors.  An instance of this class
475    (or actually a class derived from this one) represents a specific
476    operand for a code block (e.g, "Rc.sq" as a dest). Intermediate
477    derived classes encapsulates the traits of a particular operand
478    type (e.g., "32-bit integer register").'''
479
480    def buildReadCode(self, func = None):
481        code = self.read_code % {"name": self.base_name,
482                                 "func": func,
483                                 "op_idx": self.src_reg_idx,
484                                 "reg_idx": self.reg_spec,
485                                 "size": self.size,
486                                 "ctype": self.ctype}
487        if self.size != self.dflt_size:
488            return '%s = bits(%s, %d, 0);\n' % \
489                   (self.base_name, code, self.size-1)
490        else:
491            return '%s = %s;\n' % \
492                   (self.base_name, code)
493
494    def buildWriteCode(self, func = None):
495        if (self.size != self.dflt_size and self.is_signed):
496            final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
497        else:
498            final_val = self.base_name
499        code = self.write_code % {"name": self.base_name,
500                                  "func": func,
501                                  "op_idx": self.dest_reg_idx,
502                                  "reg_idx": self.reg_spec,
503                                  "size": self.size,
504                                  "ctype": self.ctype,
505                                  "final_val": final_val}
506        return '''
507        {
508            %s final_val = %s;
509            %s;
510            if (traceData) { traceData->setData(final_val); }
511        }''' % (self.dflt_ctype, final_val, code)
512
513    def __init__(self, full_name, ext, is_src, is_dest):
514        self.full_name = full_name
515        self.ext = ext
516        self.is_src = is_src
517        self.is_dest = is_dest
518        # The 'effective extension' (eff_ext) is either the actual
519        # extension, if one was explicitly provided, or the default.
520        if ext:
521            self.eff_ext = ext
522        else:
523            self.eff_ext = self.dflt_ext
524
525        (self.size, self.ctype, self.is_signed) = operandTypeMap[self.eff_ext]
526
527        # note that mem_acc_size is undefined for non-mem operands...
528        # template must be careful not to use it if it doesn't apply.
529        if self.isMem():
530            self.mem_acc_size = self.makeAccSize()
531            if self.ctype in ['Twin32_t', 'Twin64_t']:
532                self.mem_acc_type = 'Twin'
533            else:
534                self.mem_acc_type = 'uint'
535
536    # Finalize additional fields (primarily code fields).  This step
537    # is done separately since some of these fields may depend on the
538    # register index enumeration that hasn't been performed yet at the
539    # time of __init__().
540    def finalize(self):
541        self.flags = self.getFlags()
542        self.constructor = self.makeConstructor()
543        self.op_decl = self.makeDecl()
544
545        if self.is_src:
546            self.op_rd = self.makeRead()
547            self.op_src_decl = self.makeDecl()
548        else:
549            self.op_rd = ''
550            self.op_src_decl = ''
551
552        if self.is_dest:
553            self.op_wb = self.makeWrite()
554            self.op_dest_decl = self.makeDecl()
555        else:
556            self.op_wb = ''
557            self.op_dest_decl = ''
558
559    def isMem(self):
560        return 0
561
562    def isReg(self):
563        return 0
564
565    def isFloatReg(self):
566        return 0
567
568    def isIntReg(self):
569        return 0
570
571    def isControlReg(self):
572        return 0
573
574    def getFlags(self):
575        # note the empty slice '[:]' gives us a copy of self.flags[0]
576        # instead of a reference to it
577        my_flags = self.flags[0][:]
578        if self.is_src:
579            my_flags += self.flags[1]
580        if self.is_dest:
581            my_flags += self.flags[2]
582        return my_flags
583
584    def makeDecl(self):
585        # Note that initializations in the declarations are solely
586        # to avoid 'uninitialized variable' errors from the compiler.
587        return self.ctype + ' ' + self.base_name + ' = 0;\n';
588
589class IntRegOperand(Operand):
590    def isReg(self):
591        return 1
592
593    def isIntReg(self):
594        return 1
595
596    def makeConstructor(self):
597        c = ''
598        if self.is_src:
599            c += '\n\t_srcRegIdx[%d] = %s;' % \
600                 (self.src_reg_idx, self.reg_spec)
601        if self.is_dest:
602            c += '\n\t_destRegIdx[%d] = %s;' % \
603                 (self.dest_reg_idx, self.reg_spec)
604        return c
605
606    def makeRead(self):
607        if (self.ctype == 'float' or self.ctype == 'double'):
608            error('Attempt to read integer register as FP')
609        if self.read_code != None:
610            return self.buildReadCode('readIntRegOperand')
611        if (self.size == self.dflt_size):
612            return '%s = xc->readIntRegOperand(this, %d);\n' % \
613                   (self.base_name, self.src_reg_idx)
614        elif (self.size > self.dflt_size):
615            int_reg_val = 'xc->readIntRegOperand(this, %d)' % \
616                          (self.src_reg_idx)
617            if (self.is_signed):
618                int_reg_val = 'sext<%d>(%s)' % (self.dflt_size, int_reg_val)
619            return '%s = %s;\n' % (self.base_name, int_reg_val)
620        else:
621            return '%s = bits(xc->readIntRegOperand(this, %d), %d, 0);\n' % \
622                   (self.base_name, self.src_reg_idx, self.size-1)
623
624    def makeWrite(self):
625        if (self.ctype == 'float' or self.ctype == 'double'):
626            error('Attempt to write integer register as FP')
627        if self.write_code != None:
628            return self.buildWriteCode('setIntRegOperand')
629        if (self.size != self.dflt_size and self.is_signed):
630            final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
631        else:
632            final_val = self.base_name
633        wb = '''
634        {
635            %s final_val = %s;
636            xc->setIntRegOperand(this, %d, final_val);\n
637            if (traceData) { traceData->setData(final_val); }
638        }''' % (self.dflt_ctype, final_val, self.dest_reg_idx)
639        return wb
640
641class FloatRegOperand(Operand):
642    def isReg(self):
643        return 1
644
645    def isFloatReg(self):
646        return 1
647
648    def makeConstructor(self):
649        c = ''
650        if self.is_src:
651            c += '\n\t_srcRegIdx[%d] = %s + FP_Base_DepTag;' % \
652                 (self.src_reg_idx, self.reg_spec)
653        if self.is_dest:
654            c += '\n\t_destRegIdx[%d] = %s + FP_Base_DepTag;' % \
655                 (self.dest_reg_idx, self.reg_spec)
656        return c
657
658    def makeRead(self):
659        bit_select = 0
660        if (self.ctype == 'float' or self.ctype == 'double'):
661            func = 'readFloatRegOperand'
662        else:
663            func = 'readFloatRegOperandBits'
664            if (self.size != self.dflt_size):
665                bit_select = 1
666        base = 'xc->%s(this, %d)' % (func, self.src_reg_idx)
667        if self.read_code != None:
668            return self.buildReadCode(func)
669        if bit_select:
670            return '%s = bits(%s, %d, 0);\n' % \
671                   (self.base_name, base, self.size-1)
672        else:
673            return '%s = %s;\n' % (self.base_name, base)
674
675    def makeWrite(self):
676        final_val = self.base_name
677        final_ctype = self.ctype
678        if (self.ctype == 'float' or self.ctype == 'double'):
679            func = 'setFloatRegOperand'
680        elif (self.ctype == 'uint32_t' or self.ctype == 'uint64_t'):
681            func = 'setFloatRegOperandBits'
682        else:
683            func = 'setFloatRegOperandBits'
684            final_ctype = 'uint%d_t' % self.dflt_size
685            if (self.size != self.dflt_size and self.is_signed):
686                final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
687        if self.write_code != None:
688            return self.buildWriteCode(func)
689        wb = '''
690        {
691            %s final_val = %s;
692            xc->%s(this, %d, final_val);\n
693            if (traceData) { traceData->setData(final_val); }
694        }''' % (final_ctype, final_val, func, self.dest_reg_idx)
695        return wb
696
697class ControlRegOperand(Operand):
698    def isReg(self):
699        return 1
700
701    def isControlReg(self):
702        return 1
703
704    def makeConstructor(self):
705        c = ''
706        if self.is_src:
707            c += '\n\t_srcRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \
708                 (self.src_reg_idx, self.reg_spec)
709        if self.is_dest:
710            c += '\n\t_destRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \
711                 (self.dest_reg_idx, self.reg_spec)
712        return c
713
714    def makeRead(self):
715        bit_select = 0
716        if (self.ctype == 'float' or self.ctype == 'double'):
717            error('Attempt to read control register as FP')
718        if self.read_code != None:
719            return self.buildReadCode('readMiscRegOperand')
720        base = 'xc->readMiscRegOperand(this, %s)' % self.src_reg_idx
721        if self.size == self.dflt_size:
722            return '%s = %s;\n' % (self.base_name, base)
723        else:
724            return '%s = bits(%s, %d, 0);\n' % \
725                   (self.base_name, base, self.size-1)
726
727    def makeWrite(self):
728        if (self.ctype == 'float' or self.ctype == 'double'):
729            error('Attempt to write control register as FP')
730        if self.write_code != None:
731            return self.buildWriteCode('setMiscRegOperand')
732        wb = 'xc->setMiscRegOperand(this, %s, %s);\n' % \
733             (self.dest_reg_idx, self.base_name)
734        wb += 'if (traceData) { traceData->setData(%s); }' % \
735              self.base_name
736        return wb
737
738class MemOperand(Operand):
739    def isMem(self):
740        return 1
741
742    def makeConstructor(self):
743        return ''
744
745    def makeDecl(self):
746        # Note that initializations in the declarations are solely
747        # to avoid 'uninitialized variable' errors from the compiler.
748        # Declare memory data variable.
749        if self.ctype in ['Twin32_t','Twin64_t']:
750            return "%s %s; %s.a = 0; %s.b = 0;\n" % \
751                   (self.ctype, self.base_name, self.base_name, self.base_name)
752        return '%s %s = 0;\n' % (self.ctype, self.base_name)
753
754    def makeRead(self):
755        if self.read_code != None:
756            return self.buildReadCode()
757        return ''
758
759    def makeWrite(self):
760        if self.write_code != None:
761            return self.buildWriteCode()
762        return ''
763
764    # Return the memory access size *in bits*, suitable for
765    # forming a type via "uint%d_t".  Divide by 8 if you want bytes.
766    def makeAccSize(self):
767        return self.size
768
769class PCOperand(Operand):
770    def makeConstructor(self):
771        return ''
772
773    def makeRead(self):
774        return '%s = xc->readPC();\n' % self.base_name
775
776    def makeWrite(self):
777        return 'xc->setPC(%s);\n' % self.base_name
778
779class UPCOperand(Operand):
780    def makeConstructor(self):
781        return ''
782
783    def makeRead(self):
784        if self.read_code != None:
785            return self.buildReadCode('readMicroPC')
786        return '%s = xc->readMicroPC();\n' % self.base_name
787
788    def makeWrite(self):
789        if self.write_code != None:
790            return self.buildWriteCode('setMicroPC')
791        return 'xc->setMicroPC(%s);\n' % self.base_name
792
793class NUPCOperand(Operand):
794    def makeConstructor(self):
795        return ''
796
797    def makeRead(self):
798        if self.read_code != None:
799            return self.buildReadCode('readNextMicroPC')
800        return '%s = xc->readNextMicroPC();\n' % self.base_name
801
802    def makeWrite(self):
803        if self.write_code != None:
804            return self.buildWriteCode('setNextMicroPC')
805        return 'xc->setNextMicroPC(%s);\n' % self.base_name
806
807class NPCOperand(Operand):
808    def makeConstructor(self):
809        return ''
810
811    def makeRead(self):
812        if self.read_code != None:
813            return self.buildReadCode('readNextPC')
814        return '%s = xc->readNextPC();\n' % self.base_name
815
816    def makeWrite(self):
817        if self.write_code != None:
818            return self.buildWriteCode('setNextPC')
819        return 'xc->setNextPC(%s);\n' % self.base_name
820
821class NNPCOperand(Operand):
822    def makeConstructor(self):
823        return ''
824
825    def makeRead(self):
826        if self.read_code != None:
827            return self.buildReadCode('readNextNPC')
828        return '%s = xc->readNextNPC();\n' % self.base_name
829
830    def makeWrite(self):
831        if self.write_code != None:
832            return self.buildWriteCode('setNextNPC')
833        return 'xc->setNextNPC(%s);\n' % self.base_name
834
835def buildOperandNameMap(user_dict, lineno):
836    global operandNameMap
837    operandNameMap = {}
838    for (op_name, val) in user_dict.iteritems():
839        (base_cls_name, dflt_ext, reg_spec, flags, sort_pri) = val[:5]
840        if len(val) > 5:
841            read_code = val[5]
842        else:
843            read_code = None
844        if len(val) > 6:
845            write_code = val[6]
846        else:
847            write_code = None
848        if len(val) > 7:
849            error(lineno,
850                  'error: too many attributes for operand "%s"' %
851                  base_cls_name)
852
853        (dflt_size, dflt_ctype, dflt_is_signed) = operandTypeMap[dflt_ext]
854        # Canonical flag structure is a triple of lists, where each list
855        # indicates the set of flags implied by this operand always, when
856        # used as a source, and when used as a dest, respectively.
857        # For simplicity this can be initialized using a variety of fairly
858        # obvious shortcuts; we convert these to canonical form here.
859        if not flags:
860            # no flags specified (e.g., 'None')
861            flags = ( [], [], [] )
862        elif isinstance(flags, str):
863            # a single flag: assumed to be unconditional
864            flags = ( [ flags ], [], [] )
865        elif isinstance(flags, list):
866            # a list of flags: also assumed to be unconditional
867            flags = ( flags, [], [] )
868        elif isinstance(flags, tuple):
869            # it's a tuple: it should be a triple,
870            # but each item could be a single string or a list
871            (uncond_flags, src_flags, dest_flags) = flags
872            flags = (makeList(uncond_flags),
873                     makeList(src_flags), makeList(dest_flags))
874        # Accumulate attributes of new operand class in tmp_dict
875        tmp_dict = {}
876        for attr in ('dflt_ext', 'reg_spec', 'flags', 'sort_pri',
877                     'dflt_size', 'dflt_ctype', 'dflt_is_signed',
878                     'read_code', 'write_code'):
879            tmp_dict[attr] = eval(attr)
880        tmp_dict['base_name'] = op_name
881        # New class name will be e.g. "IntReg_Ra"
882        cls_name = base_cls_name + '_' + op_name
883        # Evaluate string arg to get class object.  Note that the
884        # actual base class for "IntReg" is "IntRegOperand", i.e. we
885        # have to append "Operand".
886        try:
887            base_cls = eval(base_cls_name + 'Operand')
888        except NameError:
889            if debug:
890                raise
891            error(lineno,
892                  'error: unknown operand base class "%s"' % base_cls_name)
893        # The following statement creates a new class called
894        # <cls_name> as a subclass of <base_cls> with the attributes
895        # in tmp_dict, just as if we evaluated a class declaration.
896        operandNameMap[op_name] = type(cls_name, (base_cls,), tmp_dict)
897
898    # Define operand variables.
899    operands = user_dict.keys()
900
901    operandsREString = (r'''
902    (?<![\w\.])      # neg. lookbehind assertion: prevent partial matches
903    ((%s)(?:\.(\w+))?)   # match: operand with optional '.' then suffix
904    (?![\w\.])       # neg. lookahead assertion: prevent partial matches
905    '''
906                        % string.join(operands, '|'))
907
908    global operandsRE
909    operandsRE = re.compile(operandsREString, re.MULTILINE|re.VERBOSE)
910
911    # Same as operandsREString, but extension is mandatory, and only two
912    # groups are returned (base and ext, not full name as above).
913    # Used for subtituting '_' for '.' to make C++ identifiers.
914    operandsWithExtREString = (r'(?<![\w\.])(%s)\.(\w+)(?![\w\.])'
915                               % string.join(operands, '|'))
916
917    global operandsWithExtRE
918    operandsWithExtRE = re.compile(operandsWithExtREString, re.MULTILINE)
919
920maxInstSrcRegs = 0
921maxInstDestRegs = 0
922
923class OperandList(object):
924    '''Find all the operands in the given code block.  Returns an operand
925    descriptor list (instance of class OperandList).'''
926    def __init__(self, code):
927        self.items = []
928        self.bases = {}
929        # delete comments so we don't match on reg specifiers inside
930        code = commentRE.sub('', code)
931        # search for operands
932        next_pos = 0
933        while 1:
934            match = operandsRE.search(code, next_pos)
935            if not match:
936                # no more matches: we're done
937                break
938            op = match.groups()
939            # regexp groups are operand full name, base, and extension
940            (op_full, op_base, op_ext) = op
941            # if the token following the operand is an assignment, this is
942            # a destination (LHS), else it's a source (RHS)
943            is_dest = (assignRE.match(code, match.end()) != None)
944            is_src = not is_dest
945            # see if we've already seen this one
946            op_desc = self.find_base(op_base)
947            if op_desc:
948                if op_desc.ext != op_ext:
949                    error('Inconsistent extensions for operand %s' % \
950                          op_base)
951                op_desc.is_src = op_desc.is_src or is_src
952                op_desc.is_dest = op_desc.is_dest or is_dest
953            else:
954                # new operand: create new descriptor
955                op_desc = operandNameMap[op_base](op_full, op_ext,
956                                                  is_src, is_dest)
957                self.append(op_desc)
958            # start next search after end of current match
959            next_pos = match.end()
960        self.sort()
961        # enumerate source & dest register operands... used in building
962        # constructor later
963        self.numSrcRegs = 0
964        self.numDestRegs = 0
965        self.numFPDestRegs = 0
966        self.numIntDestRegs = 0
967        self.memOperand = None
968        for op_desc in self.items:
969            if op_desc.isReg():
970                if op_desc.is_src:
971                    op_desc.src_reg_idx = self.numSrcRegs
972                    self.numSrcRegs += 1
973                if op_desc.is_dest:
974                    op_desc.dest_reg_idx = self.numDestRegs
975                    self.numDestRegs += 1
976                    if op_desc.isFloatReg():
977                        self.numFPDestRegs += 1
978                    elif op_desc.isIntReg():
979                        self.numIntDestRegs += 1
980            elif op_desc.isMem():
981                if self.memOperand:
982                    error("Code block has more than one memory operand.")
983                self.memOperand = op_desc
984        global maxInstSrcRegs
985        global maxInstDestRegs
986        if maxInstSrcRegs < self.numSrcRegs:
987            maxInstSrcRegs = self.numSrcRegs
988        if maxInstDestRegs < self.numDestRegs:
989            maxInstDestRegs = self.numDestRegs
990        # now make a final pass to finalize op_desc fields that may depend
991        # on the register enumeration
992        for op_desc in self.items:
993            op_desc.finalize()
994
995    def __len__(self):
996        return len(self.items)
997
998    def __getitem__(self, index):
999        return self.items[index]
1000
1001    def append(self, op_desc):
1002        self.items.append(op_desc)
1003        self.bases[op_desc.base_name] = op_desc
1004
1005    def find_base(self, base_name):
1006        # like self.bases[base_name], but returns None if not found
1007        # (rather than raising exception)
1008        return self.bases.get(base_name)
1009
1010    # internal helper function for concat[Some]Attr{Strings|Lists}
1011    def __internalConcatAttrs(self, attr_name, filter, result):
1012        for op_desc in self.items:
1013            if filter(op_desc):
1014                result += getattr(op_desc, attr_name)
1015        return result
1016
1017    # return a single string that is the concatenation of the (string)
1018    # values of the specified attribute for all operands
1019    def concatAttrStrings(self, attr_name):
1020        return self.__internalConcatAttrs(attr_name, lambda x: 1, '')
1021
1022    # like concatAttrStrings, but only include the values for the operands
1023    # for which the provided filter function returns true
1024    def concatSomeAttrStrings(self, filter, attr_name):
1025        return self.__internalConcatAttrs(attr_name, filter, '')
1026
1027    # return a single list that is the concatenation of the (list)
1028    # values of the specified attribute for all operands
1029    def concatAttrLists(self, attr_name):
1030        return self.__internalConcatAttrs(attr_name, lambda x: 1, [])
1031
1032    # like concatAttrLists, but only include the values for the operands
1033    # for which the provided filter function returns true
1034    def concatSomeAttrLists(self, filter, attr_name):
1035        return self.__internalConcatAttrs(attr_name, filter, [])
1036
1037    def sort(self):
1038        self.items.sort(lambda a, b: a.sort_pri - b.sort_pri)
1039
1040class SubOperandList(OperandList):
1041    '''Find all the operands in the given code block.  Returns an operand
1042    descriptor list (instance of class OperandList).'''
1043    def __init__(self, code, master_list):
1044        self.items = []
1045        self.bases = {}
1046        # delete comments so we don't match on reg specifiers inside
1047        code = commentRE.sub('', code)
1048        # search for operands
1049        next_pos = 0
1050        while 1:
1051            match = operandsRE.search(code, next_pos)
1052            if not match:
1053                # no more matches: we're done
1054                break
1055            op = match.groups()
1056            # regexp groups are operand full name, base, and extension
1057            (op_full, op_base, op_ext) = op
1058            # find this op in the master list
1059            op_desc = master_list.find_base(op_base)
1060            if not op_desc:
1061                error('Found operand %s which is not in the master list!' \
1062                      ' This is an internal error' % op_base)
1063            else:
1064                # See if we've already found this operand
1065                op_desc = self.find_base(op_base)
1066                if not op_desc:
1067                    # if not, add a reference to it to this sub list
1068                    self.append(master_list.bases[op_base])
1069
1070            # start next search after end of current match
1071            next_pos = match.end()
1072        self.sort()
1073        self.memOperand = None
1074        for op_desc in self.items:
1075            if op_desc.isMem():
1076                if self.memOperand:
1077                    error("Code block has more than one memory operand.")
1078                self.memOperand = op_desc
1079
1080# Regular expression object to match C++ comments
1081# (used in findOperands())
1082commentRE = re.compile(r'//.*\n')
1083
1084# Regular expression object to match assignment statements
1085# (used in findOperands())
1086assignRE = re.compile(r'\s*=(?!=)', re.MULTILINE)
1087
1088# Munge operand names in code string to make legal C++ variable names.
1089# This means getting rid of the type extension if any.
1090# (Will match base_name attribute of Operand object.)
1091def substMungedOpNames(code):
1092    return operandsWithExtRE.sub(r'\1', code)
1093
1094# Fix up code snippets for final substitution in templates.
1095def mungeSnippet(s):
1096    if isinstance(s, str):
1097        return substMungedOpNames(substBitOps(s))
1098    else:
1099        return s
1100
1101def makeFlagConstructor(flag_list):
1102    if len(flag_list) == 0:
1103        return ''
1104    # filter out repeated flags
1105    flag_list.sort()
1106    i = 1
1107    while i < len(flag_list):
1108        if flag_list[i] == flag_list[i-1]:
1109            del flag_list[i]
1110        else:
1111            i += 1
1112    pre = '\n\tflags['
1113    post = '] = true;'
1114    code = pre + string.join(flag_list, post + pre) + post
1115    return code
1116
1117# Assume all instruction flags are of the form 'IsFoo'
1118instFlagRE = re.compile(r'Is.*')
1119
1120# OpClass constants end in 'Op' except No_OpClass
1121opClassRE = re.compile(r'.*Op|No_OpClass')
1122
1123class InstObjParams(object):
1124    def __init__(self, mnem, class_name, base_class = '',
1125                 snippets = {}, opt_args = []):
1126        self.mnemonic = mnem
1127        self.class_name = class_name
1128        self.base_class = base_class
1129        if not isinstance(snippets, dict):
1130            snippets = {'code' : snippets}
1131        compositeCode = ' '.join(map(str, snippets.values()))
1132        self.snippets = snippets
1133
1134        self.operands = OperandList(compositeCode)
1135        self.constructor = self.operands.concatAttrStrings('constructor')
1136        self.constructor += \
1137                 '\n\t_numSrcRegs = %d;' % self.operands.numSrcRegs
1138        self.constructor += \
1139                 '\n\t_numDestRegs = %d;' % self.operands.numDestRegs
1140        self.constructor += \
1141                 '\n\t_numFPDestRegs = %d;' % self.operands.numFPDestRegs
1142        self.constructor += \
1143                 '\n\t_numIntDestRegs = %d;' % self.operands.numIntDestRegs
1144        self.flags = self.operands.concatAttrLists('flags')
1145
1146        # Make a basic guess on the operand class (function unit type).
1147        # These are good enough for most cases, and can be overridden
1148        # later otherwise.
1149        if 'IsStore' in self.flags:
1150            self.op_class = 'MemWriteOp'
1151        elif 'IsLoad' in self.flags or 'IsPrefetch' in self.flags:
1152            self.op_class = 'MemReadOp'
1153        elif 'IsFloating' in self.flags:
1154            self.op_class = 'FloatAddOp'
1155        else:
1156            self.op_class = 'IntAluOp'
1157
1158        # Optional arguments are assumed to be either StaticInst flags
1159        # or an OpClass value.  To avoid having to import a complete
1160        # list of these values to match against, we do it ad-hoc
1161        # with regexps.
1162        for oa in opt_args:
1163            if instFlagRE.match(oa):
1164                self.flags.append(oa)
1165            elif opClassRE.match(oa):
1166                self.op_class = oa
1167            else:
1168                error('InstObjParams: optional arg "%s" not recognized '
1169                      'as StaticInst::Flag or OpClass.' % oa)
1170
1171        # add flag initialization to contructor here to include
1172        # any flags added via opt_args
1173        self.constructor += makeFlagConstructor(self.flags)
1174
1175        # if 'IsFloating' is set, add call to the FP enable check
1176        # function (which should be provided by isa_desc via a declare)
1177        if 'IsFloating' in self.flags:
1178            self.fp_enable_check = 'fault = checkFpEnableFault(xc);'
1179        else:
1180            self.fp_enable_check = ''
1181
1182##############
1183# Stack: a simple stack object.  Used for both formats (formatStack)
1184# and default cases (defaultStack).  Simply wraps a list to give more
1185# stack-like syntax and enable initialization with an argument list
1186# (as opposed to an argument that's a list).
1187
1188class Stack(list):
1189    def __init__(self, *items):
1190        list.__init__(self, items)
1191
1192    def push(self, item):
1193        self.append(item);
1194
1195    def top(self):
1196        return self[-1]
1197
1198# The global format stack.
1199formatStack = Stack(NoFormat())
1200
1201# The global default case stack.
1202defaultStack = Stack(None)
1203
1204# Global stack that tracks current file and line number.
1205# Each element is a tuple (filename, lineno) that records the
1206# *current* filename and the line number in the *previous* file where
1207# it was included.
1208fileNameStack = Stack()
1209
1210
1211#######################
1212#
1213# Output file template
1214#
1215
1216file_template = '''
1217/*
1218 * DO NOT EDIT THIS FILE!!!
1219 *
1220 * It was automatically generated from the ISA description in %(filename)s
1221 */
1222
1223%(includes)s
1224
1225%(global_output)s
1226
1227namespace %(namespace)s {
1228
1229%(namespace_output)s
1230
1231} // namespace %(namespace)s
1232
1233%(decode_function)s
1234'''
1235
1236max_inst_regs_template = '''
1237/*
1238 * DO NOT EDIT THIS FILE!!!
1239 *
1240 * It was automatically generated from the ISA description in %(filename)s
1241 */
1242
1243namespace %(namespace)s {
1244
1245    const int MaxInstSrcRegs = %(MaxInstSrcRegs)d;
1246    const int MaxInstDestRegs = %(MaxInstDestRegs)d;
1247
1248} // namespace %(namespace)s
1249
1250'''
1251
1252class ISAParser(Grammar):
1253    def __init__(self, output_dir):
1254        super(ISAParser, self).__init__()
1255        self.output_dir = output_dir
1256
1257        self.templateMap = {}
1258
1259    #####################################################################
1260    #
1261    #                                Lexer
1262    #
1263    # The PLY lexer module takes two things as input:
1264    # - A list of token names (the string list 'tokens')
1265    # - A regular expression describing a match for each token.  The
1266    #   regexp for token FOO can be provided in two ways:
1267    #   - as a string variable named t_FOO
1268    #   - as the doc string for a function named t_FOO.  In this case,
1269    #     the function is also executed, allowing an action to be
1270    #     associated with each token match.
1271    #
1272    #####################################################################
1273
1274    # Reserved words.  These are listed separately as they are matched
1275    # using the same regexp as generic IDs, but distinguished in the
1276    # t_ID() function.  The PLY documentation suggests this approach.
1277    reserved = (
1278        'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT',
1279        'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS',
1280        'OUTPUT', 'SIGNED', 'TEMPLATE'
1281        )
1282
1283    # List of tokens.  The lex module requires this.
1284    tokens = reserved + (
1285        # identifier
1286        'ID',
1287
1288        # integer literal
1289        'INTLIT',
1290
1291        # string literal
1292        'STRLIT',
1293
1294        # code literal
1295        'CODELIT',
1296
1297        # ( ) [ ] { } < > , ; . : :: *
1298        'LPAREN', 'RPAREN',
1299        'LBRACKET', 'RBRACKET',
1300        'LBRACE', 'RBRACE',
1301        'LESS', 'GREATER', 'EQUALS',
1302        'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON',
1303        'ASTERISK',
1304
1305        # C preprocessor directives
1306        'CPPDIRECTIVE'
1307
1308    # The following are matched but never returned. commented out to
1309    # suppress PLY warning
1310        # newfile directive
1311    #    'NEWFILE',
1312
1313        # endfile directive
1314    #    'ENDFILE'
1315    )
1316
1317    # Regular expressions for token matching
1318    t_LPAREN           = r'\('
1319    t_RPAREN           = r'\)'
1320    t_LBRACKET         = r'\['
1321    t_RBRACKET         = r'\]'
1322    t_LBRACE           = r'\{'
1323    t_RBRACE           = r'\}'
1324    t_LESS             = r'\<'
1325    t_GREATER          = r'\>'
1326    t_EQUALS           = r'='
1327    t_COMMA            = r','
1328    t_SEMI             = r';'
1329    t_DOT              = r'\.'
1330    t_COLON            = r':'
1331    t_DBLCOLON         = r'::'
1332    t_ASTERISK         = r'\*'
1333
1334    # Identifiers and reserved words
1335    reserved_map = { }
1336    for r in reserved:
1337        reserved_map[r.lower()] = r
1338
1339    def t_ID(self, t):
1340        r'[A-Za-z_]\w*'
1341        t.type = self.reserved_map.get(t.value, 'ID')
1342        return t
1343
1344    # Integer literal
1345    def t_INTLIT(self, t):
1346        r'-?(0x[\da-fA-F]+)|\d+'
1347        try:
1348            t.value = int(t.value,0)
1349        except ValueError:
1350            error(t, 'Integer value "%s" too large' % t.value)
1351            t.value = 0
1352        return t
1353
1354    # String literal.  Note that these use only single quotes, and
1355    # can span multiple lines.
1356    def t_STRLIT(self, t):
1357        r"(?m)'([^'])+'"
1358        # strip off quotes
1359        t.value = t.value[1:-1]
1360        t.lexer.lineno += t.value.count('\n')
1361        return t
1362
1363
1364    # "Code literal"... like a string literal, but delimiters are
1365    # '{{' and '}}' so they get formatted nicely under emacs c-mode
1366    def t_CODELIT(self, t):
1367        r"(?m)\{\{([^\}]|}(?!\}))+\}\}"
1368        # strip off {{ & }}
1369        t.value = t.value[2:-2]
1370        t.lexer.lineno += t.value.count('\n')
1371        return t
1372
1373    def t_CPPDIRECTIVE(self, t):
1374        r'^\#[^\#].*\n'
1375        t.lexer.lineno += t.value.count('\n')
1376        return t
1377
1378    def t_NEWFILE(self, t):
1379        r'^\#\#newfile\s+"[\w/.-]*"'
1380        fileNameStack.push((t.value[11:-1], t.lexer.lineno))
1381        t.lexer.lineno = 0
1382
1383    def t_ENDFILE(self, t):
1384        r'^\#\#endfile'
1385        (old_filename, t.lexer.lineno) = fileNameStack.pop()
1386
1387    #
1388    # The functions t_NEWLINE, t_ignore, and t_error are
1389    # special for the lex module.
1390    #
1391
1392    # Newlines
1393    def t_NEWLINE(self, t):
1394        r'\n+'
1395        t.lexer.lineno += t.value.count('\n')
1396
1397    # Comments
1398    def t_comment(self, t):
1399        r'//.*'
1400
1401    # Completely ignored characters
1402    t_ignore = ' \t\x0c'
1403
1404    # Error handler
1405    def t_error(self, t):
1406        error(t, "illegal character '%s'" % t.value[0])
1407        t.skip(1)
1408
1409    #####################################################################
1410    #
1411    #                                Parser
1412    #
1413    # Every function whose name starts with 'p_' defines a grammar
1414    # rule.  The rule is encoded in the function's doc string, while
1415    # the function body provides the action taken when the rule is
1416    # matched.  The argument to each function is a list of the values
1417    # of the rule's symbols: t[0] for the LHS, and t[1..n] for the
1418    # symbols on the RHS.  For tokens, the value is copied from the
1419    # t.value attribute provided by the lexer.  For non-terminals, the
1420    # value is assigned by the producing rule; i.e., the job of the
1421    # grammar rule function is to set the value for the non-terminal
1422    # on the LHS (by assigning to t[0]).
1423    #####################################################################
1424
1425    # The LHS of the first grammar rule is used as the start symbol
1426    # (in this case, 'specification').  Note that this rule enforces
1427    # that there will be exactly one namespace declaration, with 0 or
1428    # more global defs/decls before and after it.  The defs & decls
1429    # before the namespace decl will be outside the namespace; those
1430    # after will be inside.  The decoder function is always inside the
1431    # namespace.
1432    def p_specification(self, t):
1433        'specification : opt_defs_and_outputs name_decl opt_defs_and_outputs decode_block'
1434        global_code = t[1]
1435        isa_name = t[2]
1436        namespace = isa_name + "Inst"
1437        # wrap the decode block as a function definition
1438        t[4].wrap_decode_block('''
1439StaticInstPtr
1440%(isa_name)s::decodeInst(%(isa_name)s::ExtMachInst machInst)
1441{
1442    using namespace %(namespace)s;
1443''' % vars(), '}')
1444        # both the latter output blocks and the decode block are in
1445        # the namespace
1446        namespace_code = t[3] + t[4]
1447        # pass it all back to the caller of yacc.parse()
1448        t[0] = (isa_name, namespace, global_code, namespace_code)
1449
1450    # ISA name declaration looks like "namespace <foo>;"
1451    def p_name_decl(self, t):
1452        'name_decl : NAMESPACE ID SEMI'
1453        t[0] = t[2]
1454
1455    # 'opt_defs_and_outputs' is a possibly empty sequence of
1456    # def and/or output statements.
1457    def p_opt_defs_and_outputs_0(self, t):
1458        'opt_defs_and_outputs : empty'
1459        t[0] = GenCode()
1460
1461    def p_opt_defs_and_outputs_1(self, t):
1462        'opt_defs_and_outputs : defs_and_outputs'
1463        t[0] = t[1]
1464
1465    def p_defs_and_outputs_0(self, t):
1466        'defs_and_outputs : def_or_output'
1467        t[0] = t[1]
1468
1469    def p_defs_and_outputs_1(self, t):
1470        'defs_and_outputs : defs_and_outputs def_or_output'
1471        t[0] = t[1] + t[2]
1472
1473    # The list of possible definition/output statements.
1474    def p_def_or_output(self, t):
1475        '''def_or_output : def_format
1476                         | def_bitfield
1477                         | def_bitfield_struct
1478                         | def_template
1479                         | def_operand_types
1480                         | def_operands
1481                         | output_header
1482                         | output_decoder
1483                         | output_exec
1484                         | global_let'''
1485        t[0] = t[1]
1486
1487    # Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied
1488    # directly to the appropriate output section.
1489
1490    # Massage output block by substituting in template definitions and
1491    # bit operators.  We handle '%'s embedded in the string that don't
1492    # indicate template substitutions (or CPU-specific symbols, which
1493    # get handled in GenCode) by doubling them first so that the
1494    # format operation will reduce them back to single '%'s.
1495    def process_output(self, s):
1496        s = protect_non_subst_percents(s)
1497        # protects cpu-specific symbols too
1498        s = protect_cpu_symbols(s)
1499        return substBitOps(s % self.templateMap)
1500
1501    def p_output_header(self, t):
1502        'output_header : OUTPUT HEADER CODELIT SEMI'
1503        t[0] = GenCode(header_output = self.process_output(t[3]))
1504
1505    def p_output_decoder(self, t):
1506        'output_decoder : OUTPUT DECODER CODELIT SEMI'
1507        t[0] = GenCode(decoder_output = self.process_output(t[3]))
1508
1509    def p_output_exec(self, t):
1510        'output_exec : OUTPUT EXEC CODELIT SEMI'
1511        t[0] = GenCode(exec_output = self.process_output(t[3]))
1512
1513    # global let blocks 'let {{...}}' (Python code blocks) are
1514    # executed directly when seen.  Note that these execute in a
1515    # special variable context 'exportContext' to prevent the code
1516    # from polluting this script's namespace.
1517    def p_global_let(self, t):
1518        'global_let : LET CODELIT SEMI'
1519        updateExportContext()
1520        exportContext["header_output"] = ''
1521        exportContext["decoder_output"] = ''
1522        exportContext["exec_output"] = ''
1523        exportContext["decode_block"] = ''
1524        try:
1525            exec fixPythonIndentation(t[2]) in exportContext
1526        except Exception, exc:
1527            if debug:
1528                raise
1529            error(t, 'error: %s in global let block "%s".' % (exc, t[2]))
1530        t[0] = GenCode(header_output = exportContext["header_output"],
1531                       decoder_output = exportContext["decoder_output"],
1532                       exec_output = exportContext["exec_output"],
1533                       decode_block = exportContext["decode_block"])
1534
1535    # Define the mapping from operand type extensions to C++ types and
1536    # bit widths (stored in operandTypeMap).
1537    def p_def_operand_types(self, t):
1538        'def_operand_types : DEF OPERAND_TYPES CODELIT SEMI'
1539        try:
1540            user_dict = eval('{' + t[3] + '}')
1541        except Exception, exc:
1542            if debug:
1543                raise
1544            error(t,
1545                  'error: %s in def operand_types block "%s".' % (exc, t[3]))
1546        buildOperandTypeMap(user_dict, t.lexer.lineno)
1547        t[0] = GenCode() # contributes nothing to the output C++ file
1548
1549    # Define the mapping from operand names to operand classes and
1550    # other traits.  Stored in operandNameMap.
1551    def p_def_operands(self, t):
1552        'def_operands : DEF OPERANDS CODELIT SEMI'
1553        if not globals().has_key('operandTypeMap'):
1554            error(t, 'error: operand types must be defined before operands')
1555        try:
1556            user_dict = eval('{' + t[3] + '}', exportContext)
1557        except Exception, exc:
1558            if debug:
1559                raise
1560            error(t, 'error: %s in def operands block "%s".' % (exc, t[3]))
1561        buildOperandNameMap(user_dict, t.lexer.lineno)
1562        t[0] = GenCode() # contributes nothing to the output C++ file
1563
1564    # A bitfield definition looks like:
1565    # 'def [signed] bitfield <ID> [<first>:<last>]'
1566    # This generates a preprocessor macro in the output file.
1567    def p_def_bitfield_0(self, t):
1568        'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI'
1569        expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8])
1570        if (t[2] == 'signed'):
1571            expr = 'sext<%d>(%s)' % (t[6] - t[8] + 1, expr)
1572        hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1573        t[0] = GenCode(header_output = hash_define)
1574
1575    # alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]'
1576    def p_def_bitfield_1(self, t):
1577        'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI'
1578        expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6])
1579        if (t[2] == 'signed'):
1580            expr = 'sext<%d>(%s)' % (1, expr)
1581        hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1582        t[0] = GenCode(header_output = hash_define)
1583
1584    # alternate form for structure member: 'def bitfield <ID> <ID>'
1585    def p_def_bitfield_struct(self, t):
1586        'def_bitfield_struct : DEF opt_signed BITFIELD ID id_with_dot SEMI'
1587        if (t[2] != ''):
1588            error(t, 'error: structure bitfields are always unsigned.')
1589        expr = 'machInst.%s' % t[5]
1590        hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1591        t[0] = GenCode(header_output = hash_define)
1592
1593    def p_id_with_dot_0(self, t):
1594        'id_with_dot : ID'
1595        t[0] = t[1]
1596
1597    def p_id_with_dot_1(self, t):
1598        'id_with_dot : ID DOT id_with_dot'
1599        t[0] = t[1] + t[2] + t[3]
1600
1601    def p_opt_signed_0(self, t):
1602        'opt_signed : SIGNED'
1603        t[0] = t[1]
1604
1605    def p_opt_signed_1(self, t):
1606        'opt_signed : empty'
1607        t[0] = ''
1608
1609    def p_def_template(self, t):
1610        'def_template : DEF TEMPLATE ID CODELIT SEMI'
1611        self.templateMap[t[3]] = Template(t[4])
1612        t[0] = GenCode()
1613
1614    # An instruction format definition looks like
1615    # "def format <fmt>(<params>) {{...}};"
1616    def p_def_format(self, t):
1617        'def_format : DEF FORMAT ID LPAREN param_list RPAREN CODELIT SEMI'
1618        (id, params, code) = (t[3], t[5], t[7])
1619        defFormat(id, params, code, t.lexer.lineno)
1620        t[0] = GenCode()
1621
1622    # The formal parameter list for an instruction format is a
1623    # possibly empty list of comma-separated parameters.  Positional
1624    # (standard, non-keyword) parameters must come first, followed by
1625    # keyword parameters, followed by a '*foo' parameter that gets
1626    # excess positional arguments (as in Python).  Each of these three
1627    # parameter categories is optional.
1628    #
1629    # Note that we do not support the '**foo' parameter for collecting
1630    # otherwise undefined keyword args.  Otherwise the parameter list
1631    # is (I believe) identical to what is supported in Python.
1632    #
1633    # The param list generates a tuple, where the first element is a
1634    # list of the positional params and the second element is a dict
1635    # containing the keyword params.
1636    def p_param_list_0(self, t):
1637        'param_list : positional_param_list COMMA nonpositional_param_list'
1638        t[0] = t[1] + t[3]
1639
1640    def p_param_list_1(self, t):
1641        '''param_list : positional_param_list
1642                      | nonpositional_param_list'''
1643        t[0] = t[1]
1644
1645    def p_positional_param_list_0(self, t):
1646        'positional_param_list : empty'
1647        t[0] = []
1648
1649    def p_positional_param_list_1(self, t):
1650        'positional_param_list : ID'
1651        t[0] = [t[1]]
1652
1653    def p_positional_param_list_2(self, t):
1654        'positional_param_list : positional_param_list COMMA ID'
1655        t[0] = t[1] + [t[3]]
1656
1657    def p_nonpositional_param_list_0(self, t):
1658        'nonpositional_param_list : keyword_param_list COMMA excess_args_param'
1659        t[0] = t[1] + t[3]
1660
1661    def p_nonpositional_param_list_1(self, t):
1662        '''nonpositional_param_list : keyword_param_list
1663                                    | excess_args_param'''
1664        t[0] = t[1]
1665
1666    def p_keyword_param_list_0(self, t):
1667        'keyword_param_list : keyword_param'
1668        t[0] = [t[1]]
1669
1670    def p_keyword_param_list_1(self, t):
1671        'keyword_param_list : keyword_param_list COMMA keyword_param'
1672        t[0] = t[1] + [t[3]]
1673
1674    def p_keyword_param(self, t):
1675        'keyword_param : ID EQUALS expr'
1676        t[0] = t[1] + ' = ' + t[3].__repr__()
1677
1678    def p_excess_args_param(self, t):
1679        'excess_args_param : ASTERISK ID'
1680        # Just concatenate them: '*ID'.  Wrap in list to be consistent
1681        # with positional_param_list and keyword_param_list.
1682        t[0] = [t[1] + t[2]]
1683
1684    # End of format definition-related rules.
1685    ##############
1686
1687    #
1688    # A decode block looks like:
1689    #       decode <field1> [, <field2>]* [default <inst>] { ... }
1690    #
1691    def p_decode_block(self, t):
1692        'decode_block : DECODE ID opt_default LBRACE decode_stmt_list RBRACE'
1693        default_defaults = defaultStack.pop()
1694        codeObj = t[5]
1695        # use the "default defaults" only if there was no explicit
1696        # default statement in decode_stmt_list
1697        if not codeObj.has_decode_default:
1698            codeObj += default_defaults
1699        codeObj.wrap_decode_block('switch (%s) {\n' % t[2], '}\n')
1700        t[0] = codeObj
1701
1702    # The opt_default statement serves only to push the "default
1703    # defaults" onto defaultStack.  This value will be used by nested
1704    # decode blocks, and used and popped off when the current
1705    # decode_block is processed (in p_decode_block() above).
1706    def p_opt_default_0(self, t):
1707        'opt_default : empty'
1708        # no default specified: reuse the one currently at the top of
1709        # the stack
1710        defaultStack.push(defaultStack.top())
1711        # no meaningful value returned
1712        t[0] = None
1713
1714    def p_opt_default_1(self, t):
1715        'opt_default : DEFAULT inst'
1716        # push the new default
1717        codeObj = t[2]
1718        codeObj.wrap_decode_block('\ndefault:\n', 'break;\n')
1719        defaultStack.push(codeObj)
1720        # no meaningful value returned
1721        t[0] = None
1722
1723    def p_decode_stmt_list_0(self, t):
1724        'decode_stmt_list : decode_stmt'
1725        t[0] = t[1]
1726
1727    def p_decode_stmt_list_1(self, t):
1728        'decode_stmt_list : decode_stmt decode_stmt_list'
1729        if (t[1].has_decode_default and t[2].has_decode_default):
1730            error(t, 'Two default cases in decode block')
1731        t[0] = t[1] + t[2]
1732
1733    #
1734    # Decode statement rules
1735    #
1736    # There are four types of statements allowed in a decode block:
1737    # 1. Format blocks 'format <foo> { ... }'
1738    # 2. Nested decode blocks
1739    # 3. Instruction definitions.
1740    # 4. C preprocessor directives.
1741
1742
1743    # Preprocessor directives found in a decode statement list are
1744    # passed through to the output, replicated to all of the output
1745    # code streams.  This works well for ifdefs, so we can ifdef out
1746    # both the declarations and the decode cases generated by an
1747    # instruction definition.  Handling them as part of the grammar
1748    # makes it easy to keep them in the right place with respect to
1749    # the code generated by the other statements.
1750    def p_decode_stmt_cpp(self, t):
1751        'decode_stmt : CPPDIRECTIVE'
1752        t[0] = GenCode(t[1], t[1], t[1], t[1])
1753
1754    # A format block 'format <foo> { ... }' sets the default
1755    # instruction format used to handle instruction definitions inside
1756    # the block.  This format can be overridden by using an explicit
1757    # format on the instruction definition or with a nested format
1758    # block.
1759    def p_decode_stmt_format(self, t):
1760        'decode_stmt : FORMAT push_format_id LBRACE decode_stmt_list RBRACE'
1761        # The format will be pushed on the stack when 'push_format_id'
1762        # is processed (see below).  Once the parser has recognized
1763        # the full production (though the right brace), we're done
1764        # with the format, so now we can pop it.
1765        formatStack.pop()
1766        t[0] = t[4]
1767
1768    # This rule exists so we can set the current format (& push the
1769    # stack) when we recognize the format name part of the format
1770    # block.
1771    def p_push_format_id(self, t):
1772        'push_format_id : ID'
1773        try:
1774            formatStack.push(formatMap[t[1]])
1775            t[0] = ('', '// format %s' % t[1])
1776        except KeyError:
1777            error(t, 'instruction format "%s" not defined.' % t[1])
1778
1779    # Nested decode block: if the value of the current field matches
1780    # the specified constant, do a nested decode on some other field.
1781    def p_decode_stmt_decode(self, t):
1782        'decode_stmt : case_label COLON decode_block'
1783        label = t[1]
1784        codeObj = t[3]
1785        # just wrap the decoding code from the block as a case in the
1786        # outer switch statement.
1787        codeObj.wrap_decode_block('\n%s:\n' % label)
1788        codeObj.has_decode_default = (label == 'default')
1789        t[0] = codeObj
1790
1791    # Instruction definition (finally!).
1792    def p_decode_stmt_inst(self, t):
1793        'decode_stmt : case_label COLON inst SEMI'
1794        label = t[1]
1795        codeObj = t[3]
1796        codeObj.wrap_decode_block('\n%s:' % label, 'break;\n')
1797        codeObj.has_decode_default = (label == 'default')
1798        t[0] = codeObj
1799
1800    # The case label is either a list of one or more constants or
1801    # 'default'
1802    def p_case_label_0(self, t):
1803        'case_label : intlit_list'
1804        def make_case(intlit):
1805            if intlit >= 2**32:
1806                return 'case ULL(%#x)' % intlit
1807            else:
1808                return 'case %#x' % intlit
1809        t[0] = ': '.join(map(make_case, t[1]))
1810
1811    def p_case_label_1(self, t):
1812        'case_label : DEFAULT'
1813        t[0] = 'default'
1814
1815    #
1816    # The constant list for a decode case label must be non-empty, but
1817    # may have one or more comma-separated integer literals in it.
1818    #
1819    def p_intlit_list_0(self, t):
1820        'intlit_list : INTLIT'
1821        t[0] = [t[1]]
1822
1823    def p_intlit_list_1(self, t):
1824        'intlit_list : intlit_list COMMA INTLIT'
1825        t[0] = t[1]
1826        t[0].append(t[3])
1827
1828    # Define an instruction using the current instruction format
1829    # (specified by an enclosing format block).
1830    # "<mnemonic>(<args>)"
1831    def p_inst_0(self, t):
1832        'inst : ID LPAREN arg_list RPAREN'
1833        # Pass the ID and arg list to the current format class to deal with.
1834        currentFormat = formatStack.top()
1835        codeObj = currentFormat.defineInst(t[1], t[3], t.lexer.lineno)
1836        args = ','.join(map(str, t[3]))
1837        args = re.sub('(?m)^', '//', args)
1838        args = re.sub('^//', '', args)
1839        comment = '\n// %s::%s(%s)\n' % (currentFormat.id, t[1], args)
1840        codeObj.prepend_all(comment)
1841        t[0] = codeObj
1842
1843    # Define an instruction using an explicitly specified format:
1844    # "<fmt>::<mnemonic>(<args>)"
1845    def p_inst_1(self, t):
1846        'inst : ID DBLCOLON ID LPAREN arg_list RPAREN'
1847        try:
1848            format = formatMap[t[1]]
1849        except KeyError:
1850            error(t, 'instruction format "%s" not defined.' % t[1])
1851        codeObj = format.defineInst(t[3], t[5], t.lexer.lineno)
1852        comment = '\n// %s::%s(%s)\n' % (t[1], t[3], t[5])
1853        codeObj.prepend_all(comment)
1854        t[0] = codeObj
1855
1856    # The arg list generates a tuple, where the first element is a
1857    # list of the positional args and the second element is a dict
1858    # containing the keyword args.
1859    def p_arg_list_0(self, t):
1860        'arg_list : positional_arg_list COMMA keyword_arg_list'
1861        t[0] = ( t[1], t[3] )
1862
1863    def p_arg_list_1(self, t):
1864        'arg_list : positional_arg_list'
1865        t[0] = ( t[1], {} )
1866
1867    def p_arg_list_2(self, t):
1868        'arg_list : keyword_arg_list'
1869        t[0] = ( [], t[1] )
1870
1871    def p_positional_arg_list_0(self, t):
1872        'positional_arg_list : empty'
1873        t[0] = []
1874
1875    def p_positional_arg_list_1(self, t):
1876        'positional_arg_list : expr'
1877        t[0] = [t[1]]
1878
1879    def p_positional_arg_list_2(self, t):
1880        'positional_arg_list : positional_arg_list COMMA expr'
1881        t[0] = t[1] + [t[3]]
1882
1883    def p_keyword_arg_list_0(self, t):
1884        'keyword_arg_list : keyword_arg'
1885        t[0] = t[1]
1886
1887    def p_keyword_arg_list_1(self, t):
1888        'keyword_arg_list : keyword_arg_list COMMA keyword_arg'
1889        t[0] = t[1]
1890        t[0].update(t[3])
1891
1892    def p_keyword_arg(self, t):
1893        'keyword_arg : ID EQUALS expr'
1894        t[0] = { t[1] : t[3] }
1895
1896    #
1897    # Basic expressions.  These constitute the argument values of
1898    # "function calls" (i.e. instruction definitions in the decode
1899    # block) and default values for formal parameters of format
1900    # functions.
1901    #
1902    # Right now, these are either strings, integers, or (recursively)
1903    # lists of exprs (using Python square-bracket list syntax).  Note
1904    # that bare identifiers are trated as string constants here (since
1905    # there isn't really a variable namespace to refer to).
1906    #
1907    def p_expr_0(self, t):
1908        '''expr : ID
1909                | INTLIT
1910                | STRLIT
1911                | CODELIT'''
1912        t[0] = t[1]
1913
1914    def p_expr_1(self, t):
1915        '''expr : LBRACKET list_expr RBRACKET'''
1916        t[0] = t[2]
1917
1918    def p_list_expr_0(self, t):
1919        'list_expr : expr'
1920        t[0] = [t[1]]
1921
1922    def p_list_expr_1(self, t):
1923        'list_expr : list_expr COMMA expr'
1924        t[0] = t[1] + [t[3]]
1925
1926    def p_list_expr_2(self, t):
1927        'list_expr : empty'
1928        t[0] = []
1929
1930    #
1931    # Empty production... use in other rules for readability.
1932    #
1933    def p_empty(self, t):
1934        'empty :'
1935        pass
1936
1937    # Parse error handler.  Note that the argument here is the
1938    # offending *token*, not a grammar symbol (hence the need to use
1939    # t.value)
1940    def p_error(self, t):
1941        if t:
1942            error(t, "syntax error at '%s'" % t.value)
1943        else:
1944            error("unknown syntax error")
1945
1946    # END OF GRAMMAR RULES
1947
1948    def update_if_needed(self, file, contents):
1949        '''Update the output file only if the new contents are
1950        different from the current contents.  Minimizes the files that
1951        need to be rebuilt after minor changes.'''
1952
1953        file = os.path.join(self.output_dir, file)
1954        update = False
1955        if os.access(file, os.R_OK):
1956            f = open(file, 'r')
1957            old_contents = f.read()
1958            f.close()
1959            if contents != old_contents:
1960                print 'Updating', file
1961                os.remove(file) # in case it's write-protected
1962                update = True
1963            else:
1964                print 'File', file, 'is unchanged'
1965        else:
1966            print 'Generating', file
1967            update = True
1968        if update:
1969            f = open(file, 'w')
1970            f.write(contents)
1971            f.close()
1972
1973    # This regular expression matches '##include' directives
1974    includeRE = re.compile(r'^\s*##include\s+"(?P<filename>[\w/.-]*)".*$',
1975                           re.MULTILINE)
1976
1977    def replace_include(self, matchobj, dirname):
1978        """Function to replace a matched '##include' directive with the
1979        contents of the specified file (with nested ##includes
1980        replaced recursively).  'matchobj' is an re match object
1981        (from a match of includeRE) and 'dirname' is the directory
1982        relative to which the file path should be resolved."""
1983
1984        fname = matchobj.group('filename')
1985        full_fname = os.path.normpath(os.path.join(dirname, fname))
1986        contents = '##newfile "%s"\n%s\n##endfile\n' % \
1987                   (full_fname, self.read_and_flatten(full_fname))
1988        return contents
1989
1990    def read_and_flatten(self, filename):
1991        """Read a file and recursively flatten nested '##include' files."""
1992
1993        current_dir = os.path.dirname(filename)
1994        try:
1995            contents = open(filename).read()
1996        except IOError:
1997            error('Error including file "%s"' % filename)
1998
1999        fileNameStack.push((filename, 0))
2000
2001        # Find any includes and include them
2002        def replace(matchobj):
2003            return self.replace_include(matchobj, current_dir)
2004        contents = self.includeRE.sub(replace, contents)
2005
2006        fileNameStack.pop()
2007        return contents
2008
2009    def _parse_isa_desc(self, isa_desc_file):
2010        '''Read in and parse the ISA description.'''
2011
2012        # Read file and (recursively) all included files into a string.
2013        # PLY requires that the input be in a single string so we have to
2014        # do this up front.
2015        isa_desc = self.read_and_flatten(isa_desc_file)
2016
2017        # Initialize filename stack with outer file.
2018        fileNameStack.push((isa_desc_file, 0))
2019
2020        # Parse it.
2021        (isa_name, namespace, global_code, namespace_code) = \
2022                   self.parse(isa_desc)
2023
2024        # grab the last three path components of isa_desc_file to put in
2025        # the output
2026        filename = '/'.join(isa_desc_file.split('/')[-3:])
2027
2028        # generate decoder.hh
2029        includes = '#include "base/bitfield.hh" // for bitfield support'
2030        global_output = global_code.header_output
2031        namespace_output = namespace_code.header_output
2032        decode_function = ''
2033        self.update_if_needed('decoder.hh', file_template % vars())
2034
2035        # generate decoder.cc
2036        includes = '#include "decoder.hh"'
2037        global_output = global_code.decoder_output
2038        namespace_output = namespace_code.decoder_output
2039        # namespace_output += namespace_code.decode_block
2040        decode_function = namespace_code.decode_block
2041        self.update_if_needed('decoder.cc', file_template % vars())
2042
2043        # generate per-cpu exec files
2044        for cpu in cpu_models:
2045            includes = '#include "decoder.hh"\n'
2046            includes += cpu.includes
2047            global_output = global_code.exec_output[cpu.name]
2048            namespace_output = namespace_code.exec_output[cpu.name]
2049            decode_function = ''
2050            self.update_if_needed(cpu.filename, file_template % vars())
2051
2052        # The variable names here are hacky, but this will creat local
2053        # variables which will be referenced in vars() which have the
2054        # value of the globals.
2055        global maxInstSrcRegs
2056        MaxInstSrcRegs = maxInstSrcRegs
2057        global maxInstDestRegs
2058        MaxInstDestRegs = maxInstDestRegs
2059        # max_inst_regs.hh
2060        self.update_if_needed('max_inst_regs.hh',
2061                              max_inst_regs_template % vars())
2062
2063    def parse_isa_desc(self, *args, **kwargs):
2064        try:
2065            self._parse_isa_desc(*args, **kwargs)
2066        except ISAParserError, e:
2067            e.exit(fileNameStack)
2068
2069# global list of CpuModel objects (see cpu_models.py)
2070cpu_models = []
2071
2072# Called as script: get args from command line.
2073# Args are: <path to cpu_models.py> <isa desc file> <output dir> <cpu models>
2074if __name__ == '__main__':
2075    execfile(sys.argv[1])  # read in CpuModel definitions
2076    cpu_models = [CpuModel.dict[cpu] for cpu in sys.argv[4:]]
2077    parser = ISAParser(sys.argv[3])
2078    parser.parse_isa_desc(sys.argv[2])
2079