isa_parser.py revision 9918:2c7219e2d999
1# Copyright (c) 2003-2005 The Regents of The University of Michigan
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met: redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer;
8# redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution;
11# neither the name of the copyright holders nor the names of its
12# contributors may be used to endorse or promote products derived from
13# this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26#
27# Authors: Steve Reinhardt
28
29import os
30import sys
31import re
32import string
33import inspect, traceback
34# get type names
35from types import *
36
37from m5.util.grammar import Grammar
38
39debug=False
40
41###################
42# Utility functions
43
44#
45# Indent every line in string 's' by two spaces
46# (except preprocessor directives).
47# Used to make nested code blocks look pretty.
48#
49def indent(s):
50    return re.sub(r'(?m)^(?!#)', '  ', s)
51
52#
53# Munge a somewhat arbitrarily formatted piece of Python code
54# (e.g. from a format 'let' block) into something whose indentation
55# will get by the Python parser.
56#
57# The two keys here are that Python will give a syntax error if
58# there's any whitespace at the beginning of the first line, and that
59# all lines at the same lexical nesting level must have identical
60# indentation.  Unfortunately the way code literals work, an entire
61# let block tends to have some initial indentation.  Rather than
62# trying to figure out what that is and strip it off, we prepend 'if
63# 1:' to make the let code the nested block inside the if (and have
64# the parser automatically deal with the indentation for us).
65#
66# We don't want to do this if (1) the code block is empty or (2) the
67# first line of the block doesn't have any whitespace at the front.
68
69def fixPythonIndentation(s):
70    # get rid of blank lines first
71    s = re.sub(r'(?m)^\s*\n', '', s);
72    if (s != '' and re.match(r'[ \t]', s[0])):
73        s = 'if 1:\n' + s
74    return s
75
76class ISAParserError(Exception):
77    """Error handler for parser errors"""
78    def __init__(self, first, second=None):
79        if second is None:
80            self.lineno = 0
81            self.string = first
82        else:
83            if hasattr(first, 'lexer'):
84                first = first.lexer.lineno
85            self.lineno = first
86            self.string = second
87
88    def display(self, filename_stack, print_traceback=debug):
89        # Output formatted to work under Emacs compile-mode.  Optional
90        # 'print_traceback' arg, if set to True, prints a Python stack
91        # backtrace too (can be handy when trying to debug the parser
92        # itself).
93
94        spaces = ""
95        for (filename, line) in filename_stack[:-1]:
96            print "%sIn file included from %s:" % (spaces, filename)
97            spaces += "  "
98
99        # Print a Python stack backtrace if requested.
100        if print_traceback or not self.lineno:
101            traceback.print_exc()
102
103        line_str = "%s:" % (filename_stack[-1][0], )
104        if self.lineno:
105            line_str += "%d:" % (self.lineno, )
106
107        return "%s%s %s" % (spaces, line_str, self.string)
108
109    def exit(self, filename_stack, print_traceback=debug):
110        # Just call exit.
111
112        sys.exit(self.display(filename_stack, print_traceback))
113
114def error(*args):
115    raise ISAParserError(*args)
116
117####################
118# Template objects.
119#
120# Template objects are format strings that allow substitution from
121# the attribute spaces of other objects (e.g. InstObjParams instances).
122
123labelRE = re.compile(r'(?<!%)%\(([^\)]+)\)[sd]')
124
125class Template(object):
126    def __init__(self, parser, t):
127        self.parser = parser
128        self.template = t
129
130    def subst(self, d):
131        myDict = None
132
133        # Protect non-Python-dict substitutions (e.g. if there's a printf
134        # in the templated C++ code)
135        template = self.parser.protectNonSubstPercents(self.template)
136        # CPU-model-specific substitutions are handled later (in GenCode).
137        template = self.parser.protectCpuSymbols(template)
138
139        # Build a dict ('myDict') to use for the template substitution.
140        # Start with the template namespace.  Make a copy since we're
141        # going to modify it.
142        myDict = self.parser.templateMap.copy()
143
144        if isinstance(d, InstObjParams):
145            # If we're dealing with an InstObjParams object, we need
146            # to be a little more sophisticated.  The instruction-wide
147            # parameters are already formed, but the parameters which
148            # are only function wide still need to be generated.
149            compositeCode = ''
150
151            myDict.update(d.__dict__)
152            # The "operands" and "snippets" attributes of the InstObjParams
153            # objects are for internal use and not substitution.
154            del myDict['operands']
155            del myDict['snippets']
156
157            snippetLabels = [l for l in labelRE.findall(template)
158                             if d.snippets.has_key(l)]
159
160            snippets = dict([(s, self.parser.mungeSnippet(d.snippets[s]))
161                             for s in snippetLabels])
162
163            myDict.update(snippets)
164
165            compositeCode = ' '.join(map(str, snippets.values()))
166
167            # Add in template itself in case it references any
168            # operands explicitly (like Mem)
169            compositeCode += ' ' + template
170
171            operands = SubOperandList(self.parser, compositeCode, d.operands)
172
173            myDict['op_decl'] = operands.concatAttrStrings('op_decl')
174            if operands.readPC or operands.setPC:
175                myDict['op_decl'] += 'TheISA::PCState __parserAutoPCState;\n'
176
177            # In case there are predicated register reads and write, declare
178            # the variables for register indicies. It is being assumed that
179            # all the operands in the OperandList are also in the
180            # SubOperandList and in the same order. Otherwise, it is
181            # expected that predication would not be used for the operands.
182            if operands.predRead:
183                myDict['op_decl'] += 'uint8_t _sourceIndex = 0;\n'
184            if operands.predWrite:
185                myDict['op_decl'] += 'uint8_t M5_VAR_USED _destIndex = 0;\n'
186
187            is_src = lambda op: op.is_src
188            is_dest = lambda op: op.is_dest
189
190            myDict['op_src_decl'] = \
191                      operands.concatSomeAttrStrings(is_src, 'op_src_decl')
192            myDict['op_dest_decl'] = \
193                      operands.concatSomeAttrStrings(is_dest, 'op_dest_decl')
194            if operands.readPC:
195                myDict['op_src_decl'] += \
196                    'TheISA::PCState __parserAutoPCState;\n'
197            if operands.setPC:
198                myDict['op_dest_decl'] += \
199                    'TheISA::PCState __parserAutoPCState;\n'
200
201            myDict['op_rd'] = operands.concatAttrStrings('op_rd')
202            if operands.readPC:
203                myDict['op_rd'] = '__parserAutoPCState = xc->pcState();\n' + \
204                                  myDict['op_rd']
205
206            # Compose the op_wb string. If we're going to write back the
207            # PC state because we changed some of its elements, we'll need to
208            # do that as early as possible. That allows later uncoordinated
209            # modifications to the PC to layer appropriately.
210            reordered = list(operands.items)
211            reordered.reverse()
212            op_wb_str = ''
213            pcWbStr = 'xc->pcState(__parserAutoPCState);\n'
214            for op_desc in reordered:
215                if op_desc.isPCPart() and op_desc.is_dest:
216                    op_wb_str = op_desc.op_wb + pcWbStr + op_wb_str
217                    pcWbStr = ''
218                else:
219                    op_wb_str = op_desc.op_wb + op_wb_str
220            myDict['op_wb'] = op_wb_str
221
222        elif isinstance(d, dict):
223            # if the argument is a dictionary, we just use it.
224            myDict.update(d)
225        elif hasattr(d, '__dict__'):
226            # if the argument is an object, we use its attribute map.
227            myDict.update(d.__dict__)
228        else:
229            raise TypeError, "Template.subst() arg must be or have dictionary"
230        return template % myDict
231
232    # Convert to string.  This handles the case when a template with a
233    # CPU-specific term gets interpolated into another template or into
234    # an output block.
235    def __str__(self):
236        return self.parser.expandCpuSymbolsToString(self.template)
237
238################
239# Format object.
240#
241# A format object encapsulates an instruction format.  It must provide
242# a defineInst() method that generates the code for an instruction
243# definition.
244
245class Format(object):
246    def __init__(self, id, params, code):
247        self.id = id
248        self.params = params
249        label = 'def format ' + id
250        self.user_code = compile(fixPythonIndentation(code), label, 'exec')
251        param_list = string.join(params, ", ")
252        f = '''def defInst(_code, _context, %s):
253                my_locals = vars().copy()
254                exec _code in _context, my_locals
255                return my_locals\n''' % param_list
256        c = compile(f, label + ' wrapper', 'exec')
257        exec c
258        self.func = defInst
259
260    def defineInst(self, parser, name, args, lineno):
261        parser.updateExportContext()
262        context = parser.exportContext.copy()
263        if len(name):
264            Name = name[0].upper()
265            if len(name) > 1:
266                Name += name[1:]
267        context.update({ 'name' : name, 'Name' : Name })
268        try:
269            vars = self.func(self.user_code, context, *args[0], **args[1])
270        except Exception, exc:
271            if debug:
272                raise
273            error(lineno, 'error defining "%s": %s.' % (name, exc))
274        for k in vars.keys():
275            if k not in ('header_output', 'decoder_output',
276                         'exec_output', 'decode_block'):
277                del vars[k]
278        return GenCode(parser, **vars)
279
280# Special null format to catch an implicit-format instruction
281# definition outside of any format block.
282class NoFormat(object):
283    def __init__(self):
284        self.defaultInst = ''
285
286    def defineInst(self, parser, name, args, lineno):
287        error(lineno,
288              'instruction definition "%s" with no active format!' % name)
289
290###############
291# GenCode class
292#
293# The GenCode class encapsulates generated code destined for various
294# output files.  The header_output and decoder_output attributes are
295# strings containing code destined for decoder.hh and decoder.cc
296# respectively.  The decode_block attribute contains code to be
297# incorporated in the decode function itself (that will also end up in
298# decoder.cc).  The exec_output attribute is a dictionary with a key
299# for each CPU model name; the value associated with a particular key
300# is the string of code for that CPU model's exec.cc file.  The
301# has_decode_default attribute is used in the decode block to allow
302# explicit default clauses to override default default clauses.
303
304class GenCode(object):
305    # Constructor.  At this point we substitute out all CPU-specific
306    # symbols.  For the exec output, these go into the per-model
307    # dictionary.  For all other output types they get collapsed into
308    # a single string.
309    def __init__(self, parser,
310                 header_output = '', decoder_output = '', exec_output = '',
311                 decode_block = '', has_decode_default = False):
312        self.parser = parser
313        self.header_output = parser.expandCpuSymbolsToString(header_output)
314        self.decoder_output = parser.expandCpuSymbolsToString(decoder_output)
315        if isinstance(exec_output, dict):
316            self.exec_output = exec_output
317        elif isinstance(exec_output, str):
318            # If the exec_output arg is a single string, we replicate
319            # it for each of the CPU models, substituting and
320            # %(CPU_foo)s params appropriately.
321            self.exec_output = parser.expandCpuSymbolsToDict(exec_output)
322        self.decode_block = parser.expandCpuSymbolsToString(decode_block)
323        self.has_decode_default = has_decode_default
324
325    # Override '+' operator: generate a new GenCode object that
326    # concatenates all the individual strings in the operands.
327    def __add__(self, other):
328        exec_output = {}
329        for cpu in self.parser.cpuModels:
330            n = cpu.name
331            exec_output[n] = self.exec_output[n] + other.exec_output[n]
332        return GenCode(self.parser,
333                       self.header_output + other.header_output,
334                       self.decoder_output + other.decoder_output,
335                       exec_output,
336                       self.decode_block + other.decode_block,
337                       self.has_decode_default or other.has_decode_default)
338
339    # Prepend a string (typically a comment) to all the strings.
340    def prepend_all(self, pre):
341        self.header_output = pre + self.header_output
342        self.decoder_output  = pre + self.decoder_output
343        self.decode_block = pre + self.decode_block
344        for cpu in self.parser.cpuModels:
345            self.exec_output[cpu.name] = pre + self.exec_output[cpu.name]
346
347    # Wrap the decode block in a pair of strings (e.g., 'case foo:'
348    # and 'break;').  Used to build the big nested switch statement.
349    def wrap_decode_block(self, pre, post = ''):
350        self.decode_block = pre + indent(self.decode_block) + post
351
352#####################################################################
353#
354#                      Bitfield Operator Support
355#
356#####################################################################
357
358bitOp1ArgRE = re.compile(r'<\s*(\w+)\s*:\s*>')
359
360bitOpWordRE = re.compile(r'(?<![\w\.])([\w\.]+)<\s*(\w+)\s*:\s*(\w+)\s*>')
361bitOpExprRE = re.compile(r'\)<\s*(\w+)\s*:\s*(\w+)\s*>')
362
363def substBitOps(code):
364    # first convert single-bit selectors to two-index form
365    # i.e., <n> --> <n:n>
366    code = bitOp1ArgRE.sub(r'<\1:\1>', code)
367    # simple case: selector applied to ID (name)
368    # i.e., foo<a:b> --> bits(foo, a, b)
369    code = bitOpWordRE.sub(r'bits(\1, \2, \3)', code)
370    # if selector is applied to expression (ending in ')'),
371    # we need to search backward for matching '('
372    match = bitOpExprRE.search(code)
373    while match:
374        exprEnd = match.start()
375        here = exprEnd - 1
376        nestLevel = 1
377        while nestLevel > 0:
378            if code[here] == '(':
379                nestLevel -= 1
380            elif code[here] == ')':
381                nestLevel += 1
382            here -= 1
383            if here < 0:
384                sys.exit("Didn't find '('!")
385        exprStart = here+1
386        newExpr = r'bits(%s, %s, %s)' % (code[exprStart:exprEnd+1],
387                                         match.group(1), match.group(2))
388        code = code[:exprStart] + newExpr + code[match.end():]
389        match = bitOpExprRE.search(code)
390    return code
391
392
393#####################################################################
394#
395#                             Code Parser
396#
397# The remaining code is the support for automatically extracting
398# instruction characteristics from pseudocode.
399#
400#####################################################################
401
402# Force the argument to be a list.  Useful for flags, where a caller
403# can specify a singleton flag or a list of flags.  Also usful for
404# converting tuples to lists so they can be modified.
405def makeList(arg):
406    if isinstance(arg, list):
407        return arg
408    elif isinstance(arg, tuple):
409        return list(arg)
410    elif not arg:
411        return []
412    else:
413        return [ arg ]
414
415class Operand(object):
416    '''Base class for operand descriptors.  An instance of this class
417    (or actually a class derived from this one) represents a specific
418    operand for a code block (e.g, "Rc.sq" as a dest). Intermediate
419    derived classes encapsulates the traits of a particular operand
420    type (e.g., "32-bit integer register").'''
421
422    def buildReadCode(self, func = None):
423        subst_dict = {"name": self.base_name,
424                      "func": func,
425                      "reg_idx": self.reg_spec,
426                      "ctype": self.ctype}
427        if hasattr(self, 'src_reg_idx'):
428            subst_dict['op_idx'] = self.src_reg_idx
429        code = self.read_code % subst_dict
430        return '%s = %s;\n' % (self.base_name, code)
431
432    def buildWriteCode(self, func = None):
433        subst_dict = {"name": self.base_name,
434                      "func": func,
435                      "reg_idx": self.reg_spec,
436                      "ctype": self.ctype,
437                      "final_val": self.base_name}
438        if hasattr(self, 'dest_reg_idx'):
439            subst_dict['op_idx'] = self.dest_reg_idx
440        code = self.write_code % subst_dict
441        return '''
442        {
443            %s final_val = %s;
444            %s;
445            if (traceData) { traceData->setData(final_val); }
446        }''' % (self.dflt_ctype, self.base_name, code)
447
448    def __init__(self, parser, full_name, ext, is_src, is_dest):
449        self.full_name = full_name
450        self.ext = ext
451        self.is_src = is_src
452        self.is_dest = is_dest
453        # The 'effective extension' (eff_ext) is either the actual
454        # extension, if one was explicitly provided, or the default.
455        if ext:
456            self.eff_ext = ext
457        elif hasattr(self, 'dflt_ext'):
458            self.eff_ext = self.dflt_ext
459
460        if hasattr(self, 'eff_ext'):
461            self.ctype = parser.operandTypeMap[self.eff_ext]
462
463    # Finalize additional fields (primarily code fields).  This step
464    # is done separately since some of these fields may depend on the
465    # register index enumeration that hasn't been performed yet at the
466    # time of __init__(). The register index enumeration is affected
467    # by predicated register reads/writes. Hence, we forward the flags
468    # that indicate whether or not predication is in use.
469    def finalize(self, predRead, predWrite):
470        self.flags = self.getFlags()
471        self.constructor = self.makeConstructor(predRead, predWrite)
472        self.op_decl = self.makeDecl()
473
474        if self.is_src:
475            self.op_rd = self.makeRead(predRead)
476            self.op_src_decl = self.makeDecl()
477        else:
478            self.op_rd = ''
479            self.op_src_decl = ''
480
481        if self.is_dest:
482            self.op_wb = self.makeWrite(predWrite)
483            self.op_dest_decl = self.makeDecl()
484        else:
485            self.op_wb = ''
486            self.op_dest_decl = ''
487
488    def isMem(self):
489        return 0
490
491    def isReg(self):
492        return 0
493
494    def isFloatReg(self):
495        return 0
496
497    def isIntReg(self):
498        return 0
499
500    def isControlReg(self):
501        return 0
502
503    def isPCState(self):
504        return 0
505
506    def isPCPart(self):
507        return self.isPCState() and self.reg_spec
508
509    def hasReadPred(self):
510        return self.read_predicate != None
511
512    def hasWritePred(self):
513        return self.write_predicate != None
514
515    def getFlags(self):
516        # note the empty slice '[:]' gives us a copy of self.flags[0]
517        # instead of a reference to it
518        my_flags = self.flags[0][:]
519        if self.is_src:
520            my_flags += self.flags[1]
521        if self.is_dest:
522            my_flags += self.flags[2]
523        return my_flags
524
525    def makeDecl(self):
526        # Note that initializations in the declarations are solely
527        # to avoid 'uninitialized variable' errors from the compiler.
528        return self.ctype + ' ' + self.base_name + ' = 0;\n';
529
530class IntRegOperand(Operand):
531    def isReg(self):
532        return 1
533
534    def isIntReg(self):
535        return 1
536
537    def makeConstructor(self, predRead, predWrite):
538        c_src = ''
539        c_dest = ''
540
541        if self.is_src:
542            c_src = '\n\t_srcRegIdx[_numSrcRegs++] = %s;' % (self.reg_spec)
543            if self.hasReadPred():
544                c_src = '\n\tif (%s) {%s\n\t}' % \
545                        (self.read_predicate, c_src)
546
547        if self.is_dest:
548            c_dest = '\n\t_destRegIdx[_numDestRegs++] = %s;' % \
549                    (self.reg_spec)
550            c_dest += '\n\t_numIntDestRegs++;'
551            if self.hasWritePred():
552                c_dest = '\n\tif (%s) {%s\n\t}' % \
553                         (self.write_predicate, c_dest)
554
555        return c_src + c_dest
556
557    def makeRead(self, predRead):
558        if (self.ctype == 'float' or self.ctype == 'double'):
559            error('Attempt to read integer register as FP')
560        if self.read_code != None:
561            return self.buildReadCode('readIntRegOperand')
562
563        int_reg_val = ''
564        if predRead:
565            int_reg_val = 'xc->readIntRegOperand(this, _sourceIndex++)'
566            if self.hasReadPred():
567                int_reg_val = '(%s) ? %s : 0' % \
568                              (self.read_predicate, int_reg_val)
569        else:
570            int_reg_val = 'xc->readIntRegOperand(this, %d)' % self.src_reg_idx
571
572        return '%s = %s;\n' % (self.base_name, int_reg_val)
573
574    def makeWrite(self, predWrite):
575        if (self.ctype == 'float' or self.ctype == 'double'):
576            error('Attempt to write integer register as FP')
577        if self.write_code != None:
578            return self.buildWriteCode('setIntRegOperand')
579
580        if predWrite:
581            wp = 'true'
582            if self.hasWritePred():
583                wp = self.write_predicate
584
585            wcond = 'if (%s)' % (wp)
586            windex = '_destIndex++'
587        else:
588            wcond = ''
589            windex = '%d' % self.dest_reg_idx
590
591        wb = '''
592        %s
593        {
594            %s final_val = %s;
595            xc->setIntRegOperand(this, %s, final_val);\n
596            if (traceData) { traceData->setData(final_val); }
597        }''' % (wcond, self.ctype, self.base_name, windex)
598
599        return wb
600
601class FloatRegOperand(Operand):
602    def isReg(self):
603        return 1
604
605    def isFloatReg(self):
606        return 1
607
608    def makeConstructor(self, predRead, predWrite):
609        c_src = ''
610        c_dest = ''
611
612        if self.is_src:
613            c_src = '\n\t_srcRegIdx[_numSrcRegs++] = %s + FP_Reg_Base;' % \
614                    (self.reg_spec)
615
616        if self.is_dest:
617            c_dest = \
618              '\n\t_destRegIdx[_numDestRegs++] = %s + FP_Reg_Base;' % \
619              (self.reg_spec)
620            c_dest += '\n\t_numFPDestRegs++;'
621
622        return c_src + c_dest
623
624    def makeRead(self, predRead):
625        bit_select = 0
626        if (self.ctype == 'float' or self.ctype == 'double'):
627            func = 'readFloatRegOperand'
628        else:
629            func = 'readFloatRegOperandBits'
630        if self.read_code != None:
631            return self.buildReadCode(func)
632
633        if predRead:
634            rindex = '_sourceIndex++'
635        else:
636            rindex = '%d' % self.src_reg_idx
637
638        return '%s = xc->%s(this, %s);\n' % \
639            (self.base_name, func, rindex)
640
641    def makeWrite(self, predWrite):
642        if (self.ctype == 'float' or self.ctype == 'double'):
643            func = 'setFloatRegOperand'
644        else:
645            func = 'setFloatRegOperandBits'
646        if self.write_code != None:
647            return self.buildWriteCode(func)
648
649        if predWrite:
650            wp = '_destIndex++'
651        else:
652            wp = '%d' % self.dest_reg_idx
653        wp = 'xc->%s(this, %s, final_val);' % (func, wp)
654
655        wb = '''
656        {
657            %s final_val = %s;
658            %s\n
659            if (traceData) { traceData->setData(final_val); }
660        }''' % (self.ctype, self.base_name, wp)
661        return wb
662
663class ControlRegOperand(Operand):
664    def isReg(self):
665        return 1
666
667    def isControlReg(self):
668        return 1
669
670    def makeConstructor(self, predRead, predWrite):
671        c_src = ''
672        c_dest = ''
673
674        if self.is_src:
675            c_src = \
676              '\n\t_srcRegIdx[_numSrcRegs++] = %s + Misc_Reg_Base;' % \
677              (self.reg_spec)
678
679        if self.is_dest:
680            c_dest = \
681              '\n\t_destRegIdx[_numDestRegs++] = %s + Misc_Reg_Base;' % \
682              (self.reg_spec)
683
684        return c_src + c_dest
685
686    def makeRead(self, predRead):
687        bit_select = 0
688        if (self.ctype == 'float' or self.ctype == 'double'):
689            error('Attempt to read control register as FP')
690        if self.read_code != None:
691            return self.buildReadCode('readMiscRegOperand')
692
693        if predRead:
694            rindex = '_sourceIndex++'
695        else:
696            rindex = '%d' % self.src_reg_idx
697
698        return '%s = xc->readMiscRegOperand(this, %s);\n' % \
699            (self.base_name, rindex)
700
701    def makeWrite(self, predWrite):
702        if (self.ctype == 'float' or self.ctype == 'double'):
703            error('Attempt to write control register as FP')
704        if self.write_code != None:
705            return self.buildWriteCode('setMiscRegOperand')
706
707        if predWrite:
708            windex = '_destIndex++'
709        else:
710            windex = '%d' % self.dest_reg_idx
711
712        wb = 'xc->setMiscRegOperand(this, %s, %s);\n' % \
713             (windex, self.base_name)
714        wb += 'if (traceData) { traceData->setData(%s); }' % \
715              self.base_name
716
717        return wb
718
719class MemOperand(Operand):
720    def isMem(self):
721        return 1
722
723    def makeConstructor(self, predRead, predWrite):
724        return ''
725
726    def makeDecl(self):
727        # Note that initializations in the declarations are solely
728        # to avoid 'uninitialized variable' errors from the compiler.
729        # Declare memory data variable.
730        return '%s %s = 0;\n' % (self.ctype, self.base_name)
731
732    def makeRead(self, predRead):
733        if self.read_code != None:
734            return self.buildReadCode()
735        return ''
736
737    def makeWrite(self, predWrite):
738        if self.write_code != None:
739            return self.buildWriteCode()
740        return ''
741
742class PCStateOperand(Operand):
743    def makeConstructor(self, predRead, predWrite):
744        return ''
745
746    def makeRead(self, predRead):
747        if self.reg_spec:
748            # A component of the PC state.
749            return '%s = __parserAutoPCState.%s();\n' % \
750                (self.base_name, self.reg_spec)
751        else:
752            # The whole PC state itself.
753            return '%s = xc->pcState();\n' % self.base_name
754
755    def makeWrite(self, predWrite):
756        if self.reg_spec:
757            # A component of the PC state.
758            return '__parserAutoPCState.%s(%s);\n' % \
759                (self.reg_spec, self.base_name)
760        else:
761            # The whole PC state itself.
762            return 'xc->pcState(%s);\n' % self.base_name
763
764    def makeDecl(self):
765        ctype = 'TheISA::PCState'
766        if self.isPCPart():
767            ctype = self.ctype
768        return "%s %s;\n" % (ctype, self.base_name)
769
770    def isPCState(self):
771        return 1
772
773class OperandList(object):
774    '''Find all the operands in the given code block.  Returns an operand
775    descriptor list (instance of class OperandList).'''
776    def __init__(self, parser, code):
777        self.items = []
778        self.bases = {}
779        # delete strings and comments so we don't match on operands inside
780        for regEx in (stringRE, commentRE):
781            code = regEx.sub('', code)
782        # search for operands
783        next_pos = 0
784        while 1:
785            match = parser.operandsRE.search(code, next_pos)
786            if not match:
787                # no more matches: we're done
788                break
789            op = match.groups()
790            # regexp groups are operand full name, base, and extension
791            (op_full, op_base, op_ext) = op
792            # if the token following the operand is an assignment, this is
793            # a destination (LHS), else it's a source (RHS)
794            is_dest = (assignRE.match(code, match.end()) != None)
795            is_src = not is_dest
796            # see if we've already seen this one
797            op_desc = self.find_base(op_base)
798            if op_desc:
799                if op_desc.ext != op_ext:
800                    error('Inconsistent extensions for operand %s' % \
801                          op_base)
802                op_desc.is_src = op_desc.is_src or is_src
803                op_desc.is_dest = op_desc.is_dest or is_dest
804            else:
805                # new operand: create new descriptor
806                op_desc = parser.operandNameMap[op_base](parser,
807                    op_full, op_ext, is_src, is_dest)
808                self.append(op_desc)
809            # start next search after end of current match
810            next_pos = match.end()
811        self.sort()
812        # enumerate source & dest register operands... used in building
813        # constructor later
814        self.numSrcRegs = 0
815        self.numDestRegs = 0
816        self.numFPDestRegs = 0
817        self.numIntDestRegs = 0
818        self.numMiscDestRegs = 0
819        self.memOperand = None
820
821        # Flags to keep track if one or more operands are to be read/written
822        # conditionally.
823        self.predRead = False
824        self.predWrite = False
825
826        for op_desc in self.items:
827            if op_desc.isReg():
828                if op_desc.is_src:
829                    op_desc.src_reg_idx = self.numSrcRegs
830                    self.numSrcRegs += 1
831                if op_desc.is_dest:
832                    op_desc.dest_reg_idx = self.numDestRegs
833                    self.numDestRegs += 1
834                    if op_desc.isFloatReg():
835                        self.numFPDestRegs += 1
836                    elif op_desc.isIntReg():
837                        self.numIntDestRegs += 1
838                    elif op_desc.isControlReg():
839                        self.numMiscDestRegs += 1
840            elif op_desc.isMem():
841                if self.memOperand:
842                    error("Code block has more than one memory operand.")
843                self.memOperand = op_desc
844
845            # Check if this operand has read/write predication. If true, then
846            # the microop will dynamically index source/dest registers.
847            self.predRead = self.predRead or op_desc.hasReadPred()
848            self.predWrite = self.predWrite or op_desc.hasWritePred()
849
850        if parser.maxInstSrcRegs < self.numSrcRegs:
851            parser.maxInstSrcRegs = self.numSrcRegs
852        if parser.maxInstDestRegs < self.numDestRegs:
853            parser.maxInstDestRegs = self.numDestRegs
854        if parser.maxMiscDestRegs < self.numMiscDestRegs:
855            parser.maxMiscDestRegs = self.numMiscDestRegs
856
857        # now make a final pass to finalize op_desc fields that may depend
858        # on the register enumeration
859        for op_desc in self.items:
860            op_desc.finalize(self.predRead, self.predWrite)
861
862    def __len__(self):
863        return len(self.items)
864
865    def __getitem__(self, index):
866        return self.items[index]
867
868    def append(self, op_desc):
869        self.items.append(op_desc)
870        self.bases[op_desc.base_name] = op_desc
871
872    def find_base(self, base_name):
873        # like self.bases[base_name], but returns None if not found
874        # (rather than raising exception)
875        return self.bases.get(base_name)
876
877    # internal helper function for concat[Some]Attr{Strings|Lists}
878    def __internalConcatAttrs(self, attr_name, filter, result):
879        for op_desc in self.items:
880            if filter(op_desc):
881                result += getattr(op_desc, attr_name)
882        return result
883
884    # return a single string that is the concatenation of the (string)
885    # values of the specified attribute for all operands
886    def concatAttrStrings(self, attr_name):
887        return self.__internalConcatAttrs(attr_name, lambda x: 1, '')
888
889    # like concatAttrStrings, but only include the values for the operands
890    # for which the provided filter function returns true
891    def concatSomeAttrStrings(self, filter, attr_name):
892        return self.__internalConcatAttrs(attr_name, filter, '')
893
894    # return a single list that is the concatenation of the (list)
895    # values of the specified attribute for all operands
896    def concatAttrLists(self, attr_name):
897        return self.__internalConcatAttrs(attr_name, lambda x: 1, [])
898
899    # like concatAttrLists, but only include the values for the operands
900    # for which the provided filter function returns true
901    def concatSomeAttrLists(self, filter, attr_name):
902        return self.__internalConcatAttrs(attr_name, filter, [])
903
904    def sort(self):
905        self.items.sort(lambda a, b: a.sort_pri - b.sort_pri)
906
907class SubOperandList(OperandList):
908    '''Find all the operands in the given code block.  Returns an operand
909    descriptor list (instance of class OperandList).'''
910    def __init__(self, parser, code, master_list):
911        self.items = []
912        self.bases = {}
913        # delete strings and comments so we don't match on operands inside
914        for regEx in (stringRE, commentRE):
915            code = regEx.sub('', code)
916        # search for operands
917        next_pos = 0
918        while 1:
919            match = parser.operandsRE.search(code, next_pos)
920            if not match:
921                # no more matches: we're done
922                break
923            op = match.groups()
924            # regexp groups are operand full name, base, and extension
925            (op_full, op_base, op_ext) = op
926            # find this op in the master list
927            op_desc = master_list.find_base(op_base)
928            if not op_desc:
929                error('Found operand %s which is not in the master list!' \
930                      ' This is an internal error' % op_base)
931            else:
932                # See if we've already found this operand
933                op_desc = self.find_base(op_base)
934                if not op_desc:
935                    # if not, add a reference to it to this sub list
936                    self.append(master_list.bases[op_base])
937
938            # start next search after end of current match
939            next_pos = match.end()
940        self.sort()
941        self.memOperand = None
942        # Whether the whole PC needs to be read so parts of it can be accessed
943        self.readPC = False
944        # Whether the whole PC needs to be written after parts of it were
945        # changed
946        self.setPC = False
947        # Whether this instruction manipulates the whole PC or parts of it.
948        # Mixing the two is a bad idea and flagged as an error.
949        self.pcPart = None
950
951        # Flags to keep track if one or more operands are to be read/written
952        # conditionally.
953        self.predRead = False
954        self.predWrite = False
955
956        for op_desc in self.items:
957            if op_desc.isPCPart():
958                self.readPC = True
959                if op_desc.is_dest:
960                    self.setPC = True
961
962            if op_desc.isPCState():
963                if self.pcPart is not None:
964                    if self.pcPart and not op_desc.isPCPart() or \
965                            not self.pcPart and op_desc.isPCPart():
966                        error("Mixed whole and partial PC state operands.")
967                self.pcPart = op_desc.isPCPart()
968
969            if op_desc.isMem():
970                if self.memOperand:
971                    error("Code block has more than one memory operand.")
972                self.memOperand = op_desc
973
974            # Check if this operand has read/write predication. If true, then
975            # the microop will dynamically index source/dest registers.
976            self.predRead = self.predRead or op_desc.hasReadPred()
977            self.predWrite = self.predWrite or op_desc.hasWritePred()
978
979# Regular expression object to match C++ strings
980stringRE = re.compile(r'"([^"\\]|\\.)*"')
981
982# Regular expression object to match C++ comments
983# (used in findOperands())
984commentRE = re.compile(r'(^)?[^\S\n]*/(?:\*(.*?)\*/[^\S\n]*|/[^\n]*)($)?',
985        re.DOTALL | re.MULTILINE)
986
987# Regular expression object to match assignment statements
988# (used in findOperands())
989assignRE = re.compile(r'\s*=(?!=)', re.MULTILINE)
990
991def makeFlagConstructor(flag_list):
992    if len(flag_list) == 0:
993        return ''
994    # filter out repeated flags
995    flag_list.sort()
996    i = 1
997    while i < len(flag_list):
998        if flag_list[i] == flag_list[i-1]:
999            del flag_list[i]
1000        else:
1001            i += 1
1002    pre = '\n\tflags['
1003    post = '] = true;'
1004    code = pre + string.join(flag_list, post + pre) + post
1005    return code
1006
1007# Assume all instruction flags are of the form 'IsFoo'
1008instFlagRE = re.compile(r'Is.*')
1009
1010# OpClass constants end in 'Op' except No_OpClass
1011opClassRE = re.compile(r'.*Op|No_OpClass')
1012
1013class InstObjParams(object):
1014    def __init__(self, parser, mnem, class_name, base_class = '',
1015                 snippets = {}, opt_args = []):
1016        self.mnemonic = mnem
1017        self.class_name = class_name
1018        self.base_class = base_class
1019        if not isinstance(snippets, dict):
1020            snippets = {'code' : snippets}
1021        compositeCode = ' '.join(map(str, snippets.values()))
1022        self.snippets = snippets
1023
1024        self.operands = OperandList(parser, compositeCode)
1025
1026        # The header of the constructor declares the variables to be used
1027        # in the body of the constructor.
1028        header = ''
1029        header += '\n\t_numSrcRegs = 0;'
1030        header += '\n\t_numDestRegs = 0;'
1031        header += '\n\t_numFPDestRegs = 0;'
1032        header += '\n\t_numIntDestRegs = 0;'
1033
1034        self.constructor = header + \
1035                           self.operands.concatAttrStrings('constructor')
1036
1037        self.flags = self.operands.concatAttrLists('flags')
1038
1039        # Make a basic guess on the operand class (function unit type).
1040        # These are good enough for most cases, and can be overridden
1041        # later otherwise.
1042        if 'IsStore' in self.flags:
1043            self.op_class = 'MemWriteOp'
1044        elif 'IsLoad' in self.flags or 'IsPrefetch' in self.flags:
1045            self.op_class = 'MemReadOp'
1046        elif 'IsFloating' in self.flags:
1047            self.op_class = 'FloatAddOp'
1048        else:
1049            self.op_class = 'IntAluOp'
1050
1051        # Optional arguments are assumed to be either StaticInst flags
1052        # or an OpClass value.  To avoid having to import a complete
1053        # list of these values to match against, we do it ad-hoc
1054        # with regexps.
1055        for oa in opt_args:
1056            if instFlagRE.match(oa):
1057                self.flags.append(oa)
1058            elif opClassRE.match(oa):
1059                self.op_class = oa
1060            else:
1061                error('InstObjParams: optional arg "%s" not recognized '
1062                      'as StaticInst::Flag or OpClass.' % oa)
1063
1064        # add flag initialization to contructor here to include
1065        # any flags added via opt_args
1066        self.constructor += makeFlagConstructor(self.flags)
1067
1068        # if 'IsFloating' is set, add call to the FP enable check
1069        # function (which should be provided by isa_desc via a declare)
1070        if 'IsFloating' in self.flags:
1071            self.fp_enable_check = 'fault = checkFpEnableFault(xc);'
1072        else:
1073            self.fp_enable_check = ''
1074
1075##############
1076# Stack: a simple stack object.  Used for both formats (formatStack)
1077# and default cases (defaultStack).  Simply wraps a list to give more
1078# stack-like syntax and enable initialization with an argument list
1079# (as opposed to an argument that's a list).
1080
1081class Stack(list):
1082    def __init__(self, *items):
1083        list.__init__(self, items)
1084
1085    def push(self, item):
1086        self.append(item);
1087
1088    def top(self):
1089        return self[-1]
1090
1091#######################
1092#
1093# Output file template
1094#
1095
1096file_template = '''
1097/*
1098 * DO NOT EDIT THIS FILE!!!
1099 *
1100 * It was automatically generated from the ISA description in %(filename)s
1101 */
1102
1103%(includes)s
1104
1105%(global_output)s
1106
1107namespace %(namespace)s {
1108
1109%(namespace_output)s
1110
1111} // namespace %(namespace)s
1112
1113%(decode_function)s
1114'''
1115
1116max_inst_regs_template = '''
1117/*
1118 * DO NOT EDIT THIS FILE!!!
1119 *
1120 * It was automatically generated from the ISA description in %(filename)s
1121 */
1122
1123namespace %(namespace)s {
1124
1125    const int MaxInstSrcRegs = %(MaxInstSrcRegs)d;
1126    const int MaxInstDestRegs = %(MaxInstDestRegs)d;
1127    const int MaxMiscDestRegs = %(MaxMiscDestRegs)d;
1128
1129} // namespace %(namespace)s
1130
1131'''
1132
1133class ISAParser(Grammar):
1134    def __init__(self, output_dir, cpu_models):
1135        super(ISAParser, self).__init__()
1136        self.output_dir = output_dir
1137
1138        self.cpuModels = cpu_models
1139
1140        # variable to hold templates
1141        self.templateMap = {}
1142
1143        # This dictionary maps format name strings to Format objects.
1144        self.formatMap = {}
1145
1146        # The format stack.
1147        self.formatStack = Stack(NoFormat())
1148
1149        # The default case stack.
1150        self.defaultStack = Stack(None)
1151
1152        # Stack that tracks current file and line number.  Each
1153        # element is a tuple (filename, lineno) that records the
1154        # *current* filename and the line number in the *previous*
1155        # file where it was included.
1156        self.fileNameStack = Stack()
1157
1158        symbols = ('makeList', 're', 'string')
1159        self.exportContext = dict([(s, eval(s)) for s in symbols])
1160
1161        self.maxInstSrcRegs = 0
1162        self.maxInstDestRegs = 0
1163        self.maxMiscDestRegs = 0
1164
1165    #####################################################################
1166    #
1167    #                                Lexer
1168    #
1169    # The PLY lexer module takes two things as input:
1170    # - A list of token names (the string list 'tokens')
1171    # - A regular expression describing a match for each token.  The
1172    #   regexp for token FOO can be provided in two ways:
1173    #   - as a string variable named t_FOO
1174    #   - as the doc string for a function named t_FOO.  In this case,
1175    #     the function is also executed, allowing an action to be
1176    #     associated with each token match.
1177    #
1178    #####################################################################
1179
1180    # Reserved words.  These are listed separately as they are matched
1181    # using the same regexp as generic IDs, but distinguished in the
1182    # t_ID() function.  The PLY documentation suggests this approach.
1183    reserved = (
1184        'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT',
1185        'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS',
1186        'OUTPUT', 'SIGNED', 'TEMPLATE'
1187        )
1188
1189    # List of tokens.  The lex module requires this.
1190    tokens = reserved + (
1191        # identifier
1192        'ID',
1193
1194        # integer literal
1195        'INTLIT',
1196
1197        # string literal
1198        'STRLIT',
1199
1200        # code literal
1201        'CODELIT',
1202
1203        # ( ) [ ] { } < > , ; . : :: *
1204        'LPAREN', 'RPAREN',
1205        'LBRACKET', 'RBRACKET',
1206        'LBRACE', 'RBRACE',
1207        'LESS', 'GREATER', 'EQUALS',
1208        'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON',
1209        'ASTERISK',
1210
1211        # C preprocessor directives
1212        'CPPDIRECTIVE'
1213
1214    # The following are matched but never returned. commented out to
1215    # suppress PLY warning
1216        # newfile directive
1217    #    'NEWFILE',
1218
1219        # endfile directive
1220    #    'ENDFILE'
1221    )
1222
1223    # Regular expressions for token matching
1224    t_LPAREN           = r'\('
1225    t_RPAREN           = r'\)'
1226    t_LBRACKET         = r'\['
1227    t_RBRACKET         = r'\]'
1228    t_LBRACE           = r'\{'
1229    t_RBRACE           = r'\}'
1230    t_LESS             = r'\<'
1231    t_GREATER          = r'\>'
1232    t_EQUALS           = r'='
1233    t_COMMA            = r','
1234    t_SEMI             = r';'
1235    t_DOT              = r'\.'
1236    t_COLON            = r':'
1237    t_DBLCOLON         = r'::'
1238    t_ASTERISK         = r'\*'
1239
1240    # Identifiers and reserved words
1241    reserved_map = { }
1242    for r in reserved:
1243        reserved_map[r.lower()] = r
1244
1245    def t_ID(self, t):
1246        r'[A-Za-z_]\w*'
1247        t.type = self.reserved_map.get(t.value, 'ID')
1248        return t
1249
1250    # Integer literal
1251    def t_INTLIT(self, t):
1252        r'-?(0x[\da-fA-F]+)|\d+'
1253        try:
1254            t.value = int(t.value,0)
1255        except ValueError:
1256            error(t, 'Integer value "%s" too large' % t.value)
1257            t.value = 0
1258        return t
1259
1260    # String literal.  Note that these use only single quotes, and
1261    # can span multiple lines.
1262    def t_STRLIT(self, t):
1263        r"(?m)'([^'])+'"
1264        # strip off quotes
1265        t.value = t.value[1:-1]
1266        t.lexer.lineno += t.value.count('\n')
1267        return t
1268
1269
1270    # "Code literal"... like a string literal, but delimiters are
1271    # '{{' and '}}' so they get formatted nicely under emacs c-mode
1272    def t_CODELIT(self, t):
1273        r"(?m)\{\{([^\}]|}(?!\}))+\}\}"
1274        # strip off {{ & }}
1275        t.value = t.value[2:-2]
1276        t.lexer.lineno += t.value.count('\n')
1277        return t
1278
1279    def t_CPPDIRECTIVE(self, t):
1280        r'^\#[^\#].*\n'
1281        t.lexer.lineno += t.value.count('\n')
1282        return t
1283
1284    def t_NEWFILE(self, t):
1285        r'^\#\#newfile\s+"[^"]*"'
1286        self.fileNameStack.push((t.value[11:-1], t.lexer.lineno))
1287        t.lexer.lineno = 0
1288
1289    def t_ENDFILE(self, t):
1290        r'^\#\#endfile'
1291        (old_filename, t.lexer.lineno) = self.fileNameStack.pop()
1292
1293    #
1294    # The functions t_NEWLINE, t_ignore, and t_error are
1295    # special for the lex module.
1296    #
1297
1298    # Newlines
1299    def t_NEWLINE(self, t):
1300        r'\n+'
1301        t.lexer.lineno += t.value.count('\n')
1302
1303    # Comments
1304    def t_comment(self, t):
1305        r'//.*'
1306
1307    # Completely ignored characters
1308    t_ignore = ' \t\x0c'
1309
1310    # Error handler
1311    def t_error(self, t):
1312        error(t, "illegal character '%s'" % t.value[0])
1313        t.skip(1)
1314
1315    #####################################################################
1316    #
1317    #                                Parser
1318    #
1319    # Every function whose name starts with 'p_' defines a grammar
1320    # rule.  The rule is encoded in the function's doc string, while
1321    # the function body provides the action taken when the rule is
1322    # matched.  The argument to each function is a list of the values
1323    # of the rule's symbols: t[0] for the LHS, and t[1..n] for the
1324    # symbols on the RHS.  For tokens, the value is copied from the
1325    # t.value attribute provided by the lexer.  For non-terminals, the
1326    # value is assigned by the producing rule; i.e., the job of the
1327    # grammar rule function is to set the value for the non-terminal
1328    # on the LHS (by assigning to t[0]).
1329    #####################################################################
1330
1331    # The LHS of the first grammar rule is used as the start symbol
1332    # (in this case, 'specification').  Note that this rule enforces
1333    # that there will be exactly one namespace declaration, with 0 or
1334    # more global defs/decls before and after it.  The defs & decls
1335    # before the namespace decl will be outside the namespace; those
1336    # after will be inside.  The decoder function is always inside the
1337    # namespace.
1338    def p_specification(self, t):
1339        'specification : opt_defs_and_outputs name_decl opt_defs_and_outputs decode_block'
1340        global_code = t[1]
1341        isa_name = t[2]
1342        namespace = isa_name + "Inst"
1343        # wrap the decode block as a function definition
1344        t[4].wrap_decode_block('''
1345StaticInstPtr
1346%(isa_name)s::Decoder::decodeInst(%(isa_name)s::ExtMachInst machInst)
1347{
1348    using namespace %(namespace)s;
1349''' % vars(), '}')
1350        # both the latter output blocks and the decode block are in
1351        # the namespace
1352        namespace_code = t[3] + t[4]
1353        # pass it all back to the caller of yacc.parse()
1354        t[0] = (isa_name, namespace, global_code, namespace_code)
1355
1356    # ISA name declaration looks like "namespace <foo>;"
1357    def p_name_decl(self, t):
1358        'name_decl : NAMESPACE ID SEMI'
1359        t[0] = t[2]
1360
1361    # 'opt_defs_and_outputs' is a possibly empty sequence of
1362    # def and/or output statements.
1363    def p_opt_defs_and_outputs_0(self, t):
1364        'opt_defs_and_outputs : empty'
1365        t[0] = GenCode(self)
1366
1367    def p_opt_defs_and_outputs_1(self, t):
1368        'opt_defs_and_outputs : defs_and_outputs'
1369        t[0] = t[1]
1370
1371    def p_defs_and_outputs_0(self, t):
1372        'defs_and_outputs : def_or_output'
1373        t[0] = t[1]
1374
1375    def p_defs_and_outputs_1(self, t):
1376        'defs_and_outputs : defs_and_outputs def_or_output'
1377        t[0] = t[1] + t[2]
1378
1379    # The list of possible definition/output statements.
1380    def p_def_or_output(self, t):
1381        '''def_or_output : def_format
1382                         | def_bitfield
1383                         | def_bitfield_struct
1384                         | def_template
1385                         | def_operand_types
1386                         | def_operands
1387                         | output_header
1388                         | output_decoder
1389                         | output_exec
1390                         | global_let'''
1391        t[0] = t[1]
1392
1393    # Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied
1394    # directly to the appropriate output section.
1395
1396    # Massage output block by substituting in template definitions and
1397    # bit operators.  We handle '%'s embedded in the string that don't
1398    # indicate template substitutions (or CPU-specific symbols, which
1399    # get handled in GenCode) by doubling them first so that the
1400    # format operation will reduce them back to single '%'s.
1401    def process_output(self, s):
1402        s = self.protectNonSubstPercents(s)
1403        # protects cpu-specific symbols too
1404        s = self.protectCpuSymbols(s)
1405        return substBitOps(s % self.templateMap)
1406
1407    def p_output_header(self, t):
1408        'output_header : OUTPUT HEADER CODELIT SEMI'
1409        t[0] = GenCode(self, header_output = self.process_output(t[3]))
1410
1411    def p_output_decoder(self, t):
1412        'output_decoder : OUTPUT DECODER CODELIT SEMI'
1413        t[0] = GenCode(self, decoder_output = self.process_output(t[3]))
1414
1415    def p_output_exec(self, t):
1416        'output_exec : OUTPUT EXEC CODELIT SEMI'
1417        t[0] = GenCode(self, exec_output = self.process_output(t[3]))
1418
1419    # global let blocks 'let {{...}}' (Python code blocks) are
1420    # executed directly when seen.  Note that these execute in a
1421    # special variable context 'exportContext' to prevent the code
1422    # from polluting this script's namespace.
1423    def p_global_let(self, t):
1424        'global_let : LET CODELIT SEMI'
1425        self.updateExportContext()
1426        self.exportContext["header_output"] = ''
1427        self.exportContext["decoder_output"] = ''
1428        self.exportContext["exec_output"] = ''
1429        self.exportContext["decode_block"] = ''
1430        try:
1431            exec fixPythonIndentation(t[2]) in self.exportContext
1432        except Exception, exc:
1433            if debug:
1434                raise
1435            error(t, 'error: %s in global let block "%s".' % (exc, t[2]))
1436        t[0] = GenCode(self,
1437                       header_output=self.exportContext["header_output"],
1438                       decoder_output=self.exportContext["decoder_output"],
1439                       exec_output=self.exportContext["exec_output"],
1440                       decode_block=self.exportContext["decode_block"])
1441
1442    # Define the mapping from operand type extensions to C++ types and
1443    # bit widths (stored in operandTypeMap).
1444    def p_def_operand_types(self, t):
1445        'def_operand_types : DEF OPERAND_TYPES CODELIT SEMI'
1446        try:
1447            self.operandTypeMap = eval('{' + t[3] + '}')
1448        except Exception, exc:
1449            if debug:
1450                raise
1451            error(t,
1452                  'error: %s in def operand_types block "%s".' % (exc, t[3]))
1453        t[0] = GenCode(self) # contributes nothing to the output C++ file
1454
1455    # Define the mapping from operand names to operand classes and
1456    # other traits.  Stored in operandNameMap.
1457    def p_def_operands(self, t):
1458        'def_operands : DEF OPERANDS CODELIT SEMI'
1459        if not hasattr(self, 'operandTypeMap'):
1460            error(t, 'error: operand types must be defined before operands')
1461        try:
1462            user_dict = eval('{' + t[3] + '}', self.exportContext)
1463        except Exception, exc:
1464            if debug:
1465                raise
1466            error(t, 'error: %s in def operands block "%s".' % (exc, t[3]))
1467        self.buildOperandNameMap(user_dict, t.lexer.lineno)
1468        t[0] = GenCode(self) # contributes nothing to the output C++ file
1469
1470    # A bitfield definition looks like:
1471    # 'def [signed] bitfield <ID> [<first>:<last>]'
1472    # This generates a preprocessor macro in the output file.
1473    def p_def_bitfield_0(self, t):
1474        'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI'
1475        expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8])
1476        if (t[2] == 'signed'):
1477            expr = 'sext<%d>(%s)' % (t[6] - t[8] + 1, expr)
1478        hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1479        t[0] = GenCode(self, header_output=hash_define)
1480
1481    # alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]'
1482    def p_def_bitfield_1(self, t):
1483        'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI'
1484        expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6])
1485        if (t[2] == 'signed'):
1486            expr = 'sext<%d>(%s)' % (1, expr)
1487        hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1488        t[0] = GenCode(self, header_output=hash_define)
1489
1490    # alternate form for structure member: 'def bitfield <ID> <ID>'
1491    def p_def_bitfield_struct(self, t):
1492        'def_bitfield_struct : DEF opt_signed BITFIELD ID id_with_dot SEMI'
1493        if (t[2] != ''):
1494            error(t, 'error: structure bitfields are always unsigned.')
1495        expr = 'machInst.%s' % t[5]
1496        hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1497        t[0] = GenCode(self, header_output=hash_define)
1498
1499    def p_id_with_dot_0(self, t):
1500        'id_with_dot : ID'
1501        t[0] = t[1]
1502
1503    def p_id_with_dot_1(self, t):
1504        'id_with_dot : ID DOT id_with_dot'
1505        t[0] = t[1] + t[2] + t[3]
1506
1507    def p_opt_signed_0(self, t):
1508        'opt_signed : SIGNED'
1509        t[0] = t[1]
1510
1511    def p_opt_signed_1(self, t):
1512        'opt_signed : empty'
1513        t[0] = ''
1514
1515    def p_def_template(self, t):
1516        'def_template : DEF TEMPLATE ID CODELIT SEMI'
1517        self.templateMap[t[3]] = Template(self, t[4])
1518        t[0] = GenCode(self)
1519
1520    # An instruction format definition looks like
1521    # "def format <fmt>(<params>) {{...}};"
1522    def p_def_format(self, t):
1523        'def_format : DEF FORMAT ID LPAREN param_list RPAREN CODELIT SEMI'
1524        (id, params, code) = (t[3], t[5], t[7])
1525        self.defFormat(id, params, code, t.lexer.lineno)
1526        t[0] = GenCode(self)
1527
1528    # The formal parameter list for an instruction format is a
1529    # possibly empty list of comma-separated parameters.  Positional
1530    # (standard, non-keyword) parameters must come first, followed by
1531    # keyword parameters, followed by a '*foo' parameter that gets
1532    # excess positional arguments (as in Python).  Each of these three
1533    # parameter categories is optional.
1534    #
1535    # Note that we do not support the '**foo' parameter for collecting
1536    # otherwise undefined keyword args.  Otherwise the parameter list
1537    # is (I believe) identical to what is supported in Python.
1538    #
1539    # The param list generates a tuple, where the first element is a
1540    # list of the positional params and the second element is a dict
1541    # containing the keyword params.
1542    def p_param_list_0(self, t):
1543        'param_list : positional_param_list COMMA nonpositional_param_list'
1544        t[0] = t[1] + t[3]
1545
1546    def p_param_list_1(self, t):
1547        '''param_list : positional_param_list
1548                      | nonpositional_param_list'''
1549        t[0] = t[1]
1550
1551    def p_positional_param_list_0(self, t):
1552        'positional_param_list : empty'
1553        t[0] = []
1554
1555    def p_positional_param_list_1(self, t):
1556        'positional_param_list : ID'
1557        t[0] = [t[1]]
1558
1559    def p_positional_param_list_2(self, t):
1560        'positional_param_list : positional_param_list COMMA ID'
1561        t[0] = t[1] + [t[3]]
1562
1563    def p_nonpositional_param_list_0(self, t):
1564        'nonpositional_param_list : keyword_param_list COMMA excess_args_param'
1565        t[0] = t[1] + t[3]
1566
1567    def p_nonpositional_param_list_1(self, t):
1568        '''nonpositional_param_list : keyword_param_list
1569                                    | excess_args_param'''
1570        t[0] = t[1]
1571
1572    def p_keyword_param_list_0(self, t):
1573        'keyword_param_list : keyword_param'
1574        t[0] = [t[1]]
1575
1576    def p_keyword_param_list_1(self, t):
1577        'keyword_param_list : keyword_param_list COMMA keyword_param'
1578        t[0] = t[1] + [t[3]]
1579
1580    def p_keyword_param(self, t):
1581        'keyword_param : ID EQUALS expr'
1582        t[0] = t[1] + ' = ' + t[3].__repr__()
1583
1584    def p_excess_args_param(self, t):
1585        'excess_args_param : ASTERISK ID'
1586        # Just concatenate them: '*ID'.  Wrap in list to be consistent
1587        # with positional_param_list and keyword_param_list.
1588        t[0] = [t[1] + t[2]]
1589
1590    # End of format definition-related rules.
1591    ##############
1592
1593    #
1594    # A decode block looks like:
1595    #       decode <field1> [, <field2>]* [default <inst>] { ... }
1596    #
1597    def p_decode_block(self, t):
1598        'decode_block : DECODE ID opt_default LBRACE decode_stmt_list RBRACE'
1599        default_defaults = self.defaultStack.pop()
1600        codeObj = t[5]
1601        # use the "default defaults" only if there was no explicit
1602        # default statement in decode_stmt_list
1603        if not codeObj.has_decode_default:
1604            codeObj += default_defaults
1605        codeObj.wrap_decode_block('switch (%s) {\n' % t[2], '}\n')
1606        t[0] = codeObj
1607
1608    # The opt_default statement serves only to push the "default
1609    # defaults" onto defaultStack.  This value will be used by nested
1610    # decode blocks, and used and popped off when the current
1611    # decode_block is processed (in p_decode_block() above).
1612    def p_opt_default_0(self, t):
1613        'opt_default : empty'
1614        # no default specified: reuse the one currently at the top of
1615        # the stack
1616        self.defaultStack.push(self.defaultStack.top())
1617        # no meaningful value returned
1618        t[0] = None
1619
1620    def p_opt_default_1(self, t):
1621        'opt_default : DEFAULT inst'
1622        # push the new default
1623        codeObj = t[2]
1624        codeObj.wrap_decode_block('\ndefault:\n', 'break;\n')
1625        self.defaultStack.push(codeObj)
1626        # no meaningful value returned
1627        t[0] = None
1628
1629    def p_decode_stmt_list_0(self, t):
1630        'decode_stmt_list : decode_stmt'
1631        t[0] = t[1]
1632
1633    def p_decode_stmt_list_1(self, t):
1634        'decode_stmt_list : decode_stmt decode_stmt_list'
1635        if (t[1].has_decode_default and t[2].has_decode_default):
1636            error(t, 'Two default cases in decode block')
1637        t[0] = t[1] + t[2]
1638
1639    #
1640    # Decode statement rules
1641    #
1642    # There are four types of statements allowed in a decode block:
1643    # 1. Format blocks 'format <foo> { ... }'
1644    # 2. Nested decode blocks
1645    # 3. Instruction definitions.
1646    # 4. C preprocessor directives.
1647
1648
1649    # Preprocessor directives found in a decode statement list are
1650    # passed through to the output, replicated to all of the output
1651    # code streams.  This works well for ifdefs, so we can ifdef out
1652    # both the declarations and the decode cases generated by an
1653    # instruction definition.  Handling them as part of the grammar
1654    # makes it easy to keep them in the right place with respect to
1655    # the code generated by the other statements.
1656    def p_decode_stmt_cpp(self, t):
1657        'decode_stmt : CPPDIRECTIVE'
1658        t[0] = GenCode(self, t[1], t[1], t[1], t[1])
1659
1660    # A format block 'format <foo> { ... }' sets the default
1661    # instruction format used to handle instruction definitions inside
1662    # the block.  This format can be overridden by using an explicit
1663    # format on the instruction definition or with a nested format
1664    # block.
1665    def p_decode_stmt_format(self, t):
1666        'decode_stmt : FORMAT push_format_id LBRACE decode_stmt_list RBRACE'
1667        # The format will be pushed on the stack when 'push_format_id'
1668        # is processed (see below).  Once the parser has recognized
1669        # the full production (though the right brace), we're done
1670        # with the format, so now we can pop it.
1671        self.formatStack.pop()
1672        t[0] = t[4]
1673
1674    # This rule exists so we can set the current format (& push the
1675    # stack) when we recognize the format name part of the format
1676    # block.
1677    def p_push_format_id(self, t):
1678        'push_format_id : ID'
1679        try:
1680            self.formatStack.push(self.formatMap[t[1]])
1681            t[0] = ('', '// format %s' % t[1])
1682        except KeyError:
1683            error(t, 'instruction format "%s" not defined.' % t[1])
1684
1685    # Nested decode block: if the value of the current field matches
1686    # the specified constant, do a nested decode on some other field.
1687    def p_decode_stmt_decode(self, t):
1688        'decode_stmt : case_label COLON decode_block'
1689        label = t[1]
1690        codeObj = t[3]
1691        # just wrap the decoding code from the block as a case in the
1692        # outer switch statement.
1693        codeObj.wrap_decode_block('\n%s:\n' % label)
1694        codeObj.has_decode_default = (label == 'default')
1695        t[0] = codeObj
1696
1697    # Instruction definition (finally!).
1698    def p_decode_stmt_inst(self, t):
1699        'decode_stmt : case_label COLON inst SEMI'
1700        label = t[1]
1701        codeObj = t[3]
1702        codeObj.wrap_decode_block('\n%s:' % label, 'break;\n')
1703        codeObj.has_decode_default = (label == 'default')
1704        t[0] = codeObj
1705
1706    # The case label is either a list of one or more constants or
1707    # 'default'
1708    def p_case_label_0(self, t):
1709        'case_label : intlit_list'
1710        def make_case(intlit):
1711            if intlit >= 2**32:
1712                return 'case ULL(%#x)' % intlit
1713            else:
1714                return 'case %#x' % intlit
1715        t[0] = ': '.join(map(make_case, t[1]))
1716
1717    def p_case_label_1(self, t):
1718        'case_label : DEFAULT'
1719        t[0] = 'default'
1720
1721    #
1722    # The constant list for a decode case label must be non-empty, but
1723    # may have one or more comma-separated integer literals in it.
1724    #
1725    def p_intlit_list_0(self, t):
1726        'intlit_list : INTLIT'
1727        t[0] = [t[1]]
1728
1729    def p_intlit_list_1(self, t):
1730        'intlit_list : intlit_list COMMA INTLIT'
1731        t[0] = t[1]
1732        t[0].append(t[3])
1733
1734    # Define an instruction using the current instruction format
1735    # (specified by an enclosing format block).
1736    # "<mnemonic>(<args>)"
1737    def p_inst_0(self, t):
1738        'inst : ID LPAREN arg_list RPAREN'
1739        # Pass the ID and arg list to the current format class to deal with.
1740        currentFormat = self.formatStack.top()
1741        codeObj = currentFormat.defineInst(self, t[1], t[3], t.lexer.lineno)
1742        args = ','.join(map(str, t[3]))
1743        args = re.sub('(?m)^', '//', args)
1744        args = re.sub('^//', '', args)
1745        comment = '\n// %s::%s(%s)\n' % (currentFormat.id, t[1], args)
1746        codeObj.prepend_all(comment)
1747        t[0] = codeObj
1748
1749    # Define an instruction using an explicitly specified format:
1750    # "<fmt>::<mnemonic>(<args>)"
1751    def p_inst_1(self, t):
1752        'inst : ID DBLCOLON ID LPAREN arg_list RPAREN'
1753        try:
1754            format = self.formatMap[t[1]]
1755        except KeyError:
1756            error(t, 'instruction format "%s" not defined.' % t[1])
1757
1758        codeObj = format.defineInst(self, t[3], t[5], t.lexer.lineno)
1759        comment = '\n// %s::%s(%s)\n' % (t[1], t[3], t[5])
1760        codeObj.prepend_all(comment)
1761        t[0] = codeObj
1762
1763    # The arg list generates a tuple, where the first element is a
1764    # list of the positional args and the second element is a dict
1765    # containing the keyword args.
1766    def p_arg_list_0(self, t):
1767        'arg_list : positional_arg_list COMMA keyword_arg_list'
1768        t[0] = ( t[1], t[3] )
1769
1770    def p_arg_list_1(self, t):
1771        'arg_list : positional_arg_list'
1772        t[0] = ( t[1], {} )
1773
1774    def p_arg_list_2(self, t):
1775        'arg_list : keyword_arg_list'
1776        t[0] = ( [], t[1] )
1777
1778    def p_positional_arg_list_0(self, t):
1779        'positional_arg_list : empty'
1780        t[0] = []
1781
1782    def p_positional_arg_list_1(self, t):
1783        'positional_arg_list : expr'
1784        t[0] = [t[1]]
1785
1786    def p_positional_arg_list_2(self, t):
1787        'positional_arg_list : positional_arg_list COMMA expr'
1788        t[0] = t[1] + [t[3]]
1789
1790    def p_keyword_arg_list_0(self, t):
1791        'keyword_arg_list : keyword_arg'
1792        t[0] = t[1]
1793
1794    def p_keyword_arg_list_1(self, t):
1795        'keyword_arg_list : keyword_arg_list COMMA keyword_arg'
1796        t[0] = t[1]
1797        t[0].update(t[3])
1798
1799    def p_keyword_arg(self, t):
1800        'keyword_arg : ID EQUALS expr'
1801        t[0] = { t[1] : t[3] }
1802
1803    #
1804    # Basic expressions.  These constitute the argument values of
1805    # "function calls" (i.e. instruction definitions in the decode
1806    # block) and default values for formal parameters of format
1807    # functions.
1808    #
1809    # Right now, these are either strings, integers, or (recursively)
1810    # lists of exprs (using Python square-bracket list syntax).  Note
1811    # that bare identifiers are trated as string constants here (since
1812    # there isn't really a variable namespace to refer to).
1813    #
1814    def p_expr_0(self, t):
1815        '''expr : ID
1816                | INTLIT
1817                | STRLIT
1818                | CODELIT'''
1819        t[0] = t[1]
1820
1821    def p_expr_1(self, t):
1822        '''expr : LBRACKET list_expr RBRACKET'''
1823        t[0] = t[2]
1824
1825    def p_list_expr_0(self, t):
1826        'list_expr : expr'
1827        t[0] = [t[1]]
1828
1829    def p_list_expr_1(self, t):
1830        'list_expr : list_expr COMMA expr'
1831        t[0] = t[1] + [t[3]]
1832
1833    def p_list_expr_2(self, t):
1834        'list_expr : empty'
1835        t[0] = []
1836
1837    #
1838    # Empty production... use in other rules for readability.
1839    #
1840    def p_empty(self, t):
1841        'empty :'
1842        pass
1843
1844    # Parse error handler.  Note that the argument here is the
1845    # offending *token*, not a grammar symbol (hence the need to use
1846    # t.value)
1847    def p_error(self, t):
1848        if t:
1849            error(t, "syntax error at '%s'" % t.value)
1850        else:
1851            error("unknown syntax error")
1852
1853    # END OF GRAMMAR RULES
1854
1855    def updateExportContext(self):
1856
1857        # create a continuation that allows us to grab the current parser
1858        def wrapInstObjParams(*args):
1859            return InstObjParams(self, *args)
1860        self.exportContext['InstObjParams'] = wrapInstObjParams
1861        self.exportContext.update(self.templateMap)
1862
1863    def defFormat(self, id, params, code, lineno):
1864        '''Define a new format'''
1865
1866        # make sure we haven't already defined this one
1867        if id in self.formatMap:
1868            error(lineno, 'format %s redefined.' % id)
1869
1870        # create new object and store in global map
1871        self.formatMap[id] = Format(id, params, code)
1872
1873    def expandCpuSymbolsToDict(self, template):
1874        '''Expand template with CPU-specific references into a
1875        dictionary with an entry for each CPU model name.  The entry
1876        key is the model name and the corresponding value is the
1877        template with the CPU-specific refs substituted for that
1878        model.'''
1879
1880        # Protect '%'s that don't go with CPU-specific terms
1881        t = re.sub(r'%(?!\(CPU_)', '%%', template)
1882        result = {}
1883        for cpu in self.cpuModels:
1884            result[cpu.name] = t % cpu.strings
1885        return result
1886
1887    def expandCpuSymbolsToString(self, template):
1888        '''*If* the template has CPU-specific references, return a
1889        single string containing a copy of the template for each CPU
1890        model with the corresponding values substituted in.  If the
1891        template has no CPU-specific references, it is returned
1892        unmodified.'''
1893
1894        if template.find('%(CPU_') != -1:
1895            return reduce(lambda x,y: x+y,
1896                          self.expandCpuSymbolsToDict(template).values())
1897        else:
1898            return template
1899
1900    def protectCpuSymbols(self, template):
1901        '''Protect CPU-specific references by doubling the
1902        corresponding '%'s (in preparation for substituting a different
1903        set of references into the template).'''
1904
1905        return re.sub(r'%(?=\(CPU_)', '%%', template)
1906
1907    def protectNonSubstPercents(self, s):
1908        '''Protect any non-dict-substitution '%'s in a format string
1909        (i.e. those not followed by '(')'''
1910
1911        return re.sub(r'%(?!\()', '%%', s)
1912
1913    def buildOperandNameMap(self, user_dict, lineno):
1914        operand_name = {}
1915        for op_name, val in user_dict.iteritems():
1916
1917            # Check if extra attributes have been specified.
1918            if len(val) > 9:
1919                error(lineno, 'error: too many attributes for operand "%s"' %
1920                      base_cls_name)
1921
1922            # Pad val with None in case optional args are missing
1923            val += (None, None, None, None)
1924            base_cls_name, dflt_ext, reg_spec, flags, sort_pri, \
1925            read_code, write_code, read_predicate, write_predicate = val[:9]
1926
1927            # Canonical flag structure is a triple of lists, where each list
1928            # indicates the set of flags implied by this operand always, when
1929            # used as a source, and when used as a dest, respectively.
1930            # For simplicity this can be initialized using a variety of fairly
1931            # obvious shortcuts; we convert these to canonical form here.
1932            if not flags:
1933                # no flags specified (e.g., 'None')
1934                flags = ( [], [], [] )
1935            elif isinstance(flags, str):
1936                # a single flag: assumed to be unconditional
1937                flags = ( [ flags ], [], [] )
1938            elif isinstance(flags, list):
1939                # a list of flags: also assumed to be unconditional
1940                flags = ( flags, [], [] )
1941            elif isinstance(flags, tuple):
1942                # it's a tuple: it should be a triple,
1943                # but each item could be a single string or a list
1944                (uncond_flags, src_flags, dest_flags) = flags
1945                flags = (makeList(uncond_flags),
1946                         makeList(src_flags), makeList(dest_flags))
1947
1948            # Accumulate attributes of new operand class in tmp_dict
1949            tmp_dict = {}
1950            attrList = ['reg_spec', 'flags', 'sort_pri',
1951                        'read_code', 'write_code',
1952                        'read_predicate', 'write_predicate']
1953            if dflt_ext:
1954                dflt_ctype = self.operandTypeMap[dflt_ext]
1955                attrList.extend(['dflt_ctype', 'dflt_ext'])
1956            for attr in attrList:
1957                tmp_dict[attr] = eval(attr)
1958            tmp_dict['base_name'] = op_name
1959
1960            # New class name will be e.g. "IntReg_Ra"
1961            cls_name = base_cls_name + '_' + op_name
1962            # Evaluate string arg to get class object.  Note that the
1963            # actual base class for "IntReg" is "IntRegOperand", i.e. we
1964            # have to append "Operand".
1965            try:
1966                base_cls = eval(base_cls_name + 'Operand')
1967            except NameError:
1968                error(lineno,
1969                      'error: unknown operand base class "%s"' % base_cls_name)
1970            # The following statement creates a new class called
1971            # <cls_name> as a subclass of <base_cls> with the attributes
1972            # in tmp_dict, just as if we evaluated a class declaration.
1973            operand_name[op_name] = type(cls_name, (base_cls,), tmp_dict)
1974
1975        self.operandNameMap = operand_name
1976
1977        # Define operand variables.
1978        operands = user_dict.keys()
1979        extensions = self.operandTypeMap.keys()
1980
1981        operandsREString = r'''
1982        (?<!\w)      # neg. lookbehind assertion: prevent partial matches
1983        ((%s)(?:_(%s))?)   # match: operand with optional '_' then suffix
1984        (?!\w)       # neg. lookahead assertion: prevent partial matches
1985        ''' % (string.join(operands, '|'), string.join(extensions, '|'))
1986
1987        self.operandsRE = re.compile(operandsREString, re.MULTILINE|re.VERBOSE)
1988
1989        # Same as operandsREString, but extension is mandatory, and only two
1990        # groups are returned (base and ext, not full name as above).
1991        # Used for subtituting '_' for '.' to make C++ identifiers.
1992        operandsWithExtREString = r'(?<!\w)(%s)_(%s)(?!\w)' \
1993            % (string.join(operands, '|'), string.join(extensions, '|'))
1994
1995        self.operandsWithExtRE = \
1996            re.compile(operandsWithExtREString, re.MULTILINE)
1997
1998    def substMungedOpNames(self, code):
1999        '''Munge operand names in code string to make legal C++
2000        variable names.  This means getting rid of the type extension
2001        if any.  Will match base_name attribute of Operand object.)'''
2002        return self.operandsWithExtRE.sub(r'\1', code)
2003
2004    def mungeSnippet(self, s):
2005        '''Fix up code snippets for final substitution in templates.'''
2006        if isinstance(s, str):
2007            return self.substMungedOpNames(substBitOps(s))
2008        else:
2009            return s
2010
2011    def update_if_needed(self, file, contents):
2012        '''Update the output file only if the new contents are
2013        different from the current contents.  Minimizes the files that
2014        need to be rebuilt after minor changes.'''
2015
2016        file = os.path.join(self.output_dir, file)
2017        update = False
2018        if os.access(file, os.R_OK):
2019            f = open(file, 'r')
2020            old_contents = f.read()
2021            f.close()
2022            if contents != old_contents:
2023                os.remove(file) # in case it's write-protected
2024                update = True
2025            else:
2026                print 'File', file, 'is unchanged'
2027        else:
2028            update = True
2029        if update:
2030            f = open(file, 'w')
2031            f.write(contents)
2032            f.close()
2033
2034    # This regular expression matches '##include' directives
2035    includeRE = re.compile(r'^\s*##include\s+"(?P<filename>[^"]*)".*$',
2036                           re.MULTILINE)
2037
2038    def replace_include(self, matchobj, dirname):
2039        """Function to replace a matched '##include' directive with the
2040        contents of the specified file (with nested ##includes
2041        replaced recursively).  'matchobj' is an re match object
2042        (from a match of includeRE) and 'dirname' is the directory
2043        relative to which the file path should be resolved."""
2044
2045        fname = matchobj.group('filename')
2046        full_fname = os.path.normpath(os.path.join(dirname, fname))
2047        contents = '##newfile "%s"\n%s\n##endfile\n' % \
2048                   (full_fname, self.read_and_flatten(full_fname))
2049        return contents
2050
2051    def read_and_flatten(self, filename):
2052        """Read a file and recursively flatten nested '##include' files."""
2053
2054        current_dir = os.path.dirname(filename)
2055        try:
2056            contents = open(filename).read()
2057        except IOError:
2058            error('Error including file "%s"' % filename)
2059
2060        self.fileNameStack.push((filename, 0))
2061
2062        # Find any includes and include them
2063        def replace(matchobj):
2064            return self.replace_include(matchobj, current_dir)
2065        contents = self.includeRE.sub(replace, contents)
2066
2067        self.fileNameStack.pop()
2068        return contents
2069
2070    def _parse_isa_desc(self, isa_desc_file):
2071        '''Read in and parse the ISA description.'''
2072
2073        # Read file and (recursively) all included files into a string.
2074        # PLY requires that the input be in a single string so we have to
2075        # do this up front.
2076        isa_desc = self.read_and_flatten(isa_desc_file)
2077
2078        # Initialize filename stack with outer file.
2079        self.fileNameStack.push((isa_desc_file, 0))
2080
2081        # Parse it.
2082        (isa_name, namespace, global_code, namespace_code) = \
2083                   self.parse_string(isa_desc)
2084
2085        # grab the last three path components of isa_desc_file to put in
2086        # the output
2087        filename = '/'.join(isa_desc_file.split('/')[-3:])
2088
2089        # generate decoder.hh
2090        includes = '#include "base/bitfield.hh" // for bitfield support'
2091        global_output = global_code.header_output
2092        namespace_output = namespace_code.header_output
2093        decode_function = ''
2094        self.update_if_needed('decoder.hh', file_template % vars())
2095
2096        # generate decoder.cc
2097        includes = '#include "decoder.hh"'
2098        global_output = global_code.decoder_output
2099        namespace_output = namespace_code.decoder_output
2100        # namespace_output += namespace_code.decode_block
2101        decode_function = namespace_code.decode_block
2102        self.update_if_needed('decoder.cc', file_template % vars())
2103
2104        # generate per-cpu exec files
2105        for cpu in self.cpuModels:
2106            includes = '#include "decoder.hh"\n'
2107            includes += cpu.includes
2108            global_output = global_code.exec_output[cpu.name]
2109            namespace_output = namespace_code.exec_output[cpu.name]
2110            decode_function = ''
2111            self.update_if_needed(cpu.filename, file_template % vars())
2112
2113        # The variable names here are hacky, but this will creat local
2114        # variables which will be referenced in vars() which have the
2115        # value of the globals.
2116        MaxInstSrcRegs = self.maxInstSrcRegs
2117        MaxInstDestRegs = self.maxInstDestRegs
2118        MaxMiscDestRegs = self.maxMiscDestRegs
2119        # max_inst_regs.hh
2120        self.update_if_needed('max_inst_regs.hh',
2121                              max_inst_regs_template % vars())
2122
2123    def parse_isa_desc(self, *args, **kwargs):
2124        try:
2125            self._parse_isa_desc(*args, **kwargs)
2126        except ISAParserError, e:
2127            e.exit(self.fileNameStack)
2128
2129# Called as script: get args from command line.
2130# Args are: <path to cpu_models.py> <isa desc file> <output dir> <cpu models>
2131if __name__ == '__main__':
2132    execfile(sys.argv[1])  # read in CpuModel definitions
2133    cpu_models = [CpuModel.dict[cpu] for cpu in sys.argv[4:]]
2134    ISAParser(sys.argv[3], cpu_models).parse_isa_desc(sys.argv[2])
2135