isa_parser.py revision 10170:7e7cd19c9d9e
1# Copyright (c) 2003-2005 The Regents of The University of Michigan
2# Copyright (c) 2013 Advanced Micro Devices, Inc.
3# All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met: redistributions of source code must retain the above copyright
8# notice, this list of conditions and the following disclaimer;
9# redistributions in binary form must reproduce the above copyright
10# notice, this list of conditions and the following disclaimer in the
11# documentation and/or other materials provided with the distribution;
12# neither the name of the copyright holders nor the names of its
13# contributors may be used to endorse or promote products derived from
14# this software without specific prior written permission.
15#
16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27#
28# Authors: Steve Reinhardt
29
30import os
31import sys
32import re
33import string
34import inspect, traceback
35# get type names
36from types import *
37
38from m5.util.grammar import Grammar
39
40debug=False
41
42###################
43# Utility functions
44
45#
46# Indent every line in string 's' by two spaces
47# (except preprocessor directives).
48# Used to make nested code blocks look pretty.
49#
50def indent(s):
51    return re.sub(r'(?m)^(?!#)', '  ', s)
52
53#
54# Munge a somewhat arbitrarily formatted piece of Python code
55# (e.g. from a format 'let' block) into something whose indentation
56# will get by the Python parser.
57#
58# The two keys here are that Python will give a syntax error if
59# there's any whitespace at the beginning of the first line, and that
60# all lines at the same lexical nesting level must have identical
61# indentation.  Unfortunately the way code literals work, an entire
62# let block tends to have some initial indentation.  Rather than
63# trying to figure out what that is and strip it off, we prepend 'if
64# 1:' to make the let code the nested block inside the if (and have
65# the parser automatically deal with the indentation for us).
66#
67# We don't want to do this if (1) the code block is empty or (2) the
68# first line of the block doesn't have any whitespace at the front.
69
70def fixPythonIndentation(s):
71    # get rid of blank lines first
72    s = re.sub(r'(?m)^\s*\n', '', s);
73    if (s != '' and re.match(r'[ \t]', s[0])):
74        s = 'if 1:\n' + s
75    return s
76
77class ISAParserError(Exception):
78    """Error handler for parser errors"""
79    def __init__(self, first, second=None):
80        if second is None:
81            self.lineno = 0
82            self.string = first
83        else:
84            if hasattr(first, 'lexer'):
85                first = first.lexer.lineno
86            self.lineno = first
87            self.string = second
88
89    def display(self, filename_stack, print_traceback=debug):
90        # Output formatted to work under Emacs compile-mode.  Optional
91        # 'print_traceback' arg, if set to True, prints a Python stack
92        # backtrace too (can be handy when trying to debug the parser
93        # itself).
94
95        spaces = ""
96        for (filename, line) in filename_stack[:-1]:
97            print "%sIn file included from %s:" % (spaces, filename)
98            spaces += "  "
99
100        # Print a Python stack backtrace if requested.
101        if print_traceback or not self.lineno:
102            traceback.print_exc()
103
104        line_str = "%s:" % (filename_stack[-1][0], )
105        if self.lineno:
106            line_str += "%d:" % (self.lineno, )
107
108        return "%s%s %s" % (spaces, line_str, self.string)
109
110    def exit(self, filename_stack, print_traceback=debug):
111        # Just call exit.
112
113        sys.exit(self.display(filename_stack, print_traceback))
114
115def error(*args):
116    raise ISAParserError(*args)
117
118####################
119# Template objects.
120#
121# Template objects are format strings that allow substitution from
122# the attribute spaces of other objects (e.g. InstObjParams instances).
123
124labelRE = re.compile(r'(?<!%)%\(([^\)]+)\)[sd]')
125
126class Template(object):
127    def __init__(self, parser, t):
128        self.parser = parser
129        self.template = t
130
131    def subst(self, d):
132        myDict = None
133
134        # Protect non-Python-dict substitutions (e.g. if there's a printf
135        # in the templated C++ code)
136        template = self.parser.protectNonSubstPercents(self.template)
137        # CPU-model-specific substitutions are handled later (in GenCode).
138        template = self.parser.protectCpuSymbols(template)
139
140        # Build a dict ('myDict') to use for the template substitution.
141        # Start with the template namespace.  Make a copy since we're
142        # going to modify it.
143        myDict = self.parser.templateMap.copy()
144
145        if isinstance(d, InstObjParams):
146            # If we're dealing with an InstObjParams object, we need
147            # to be a little more sophisticated.  The instruction-wide
148            # parameters are already formed, but the parameters which
149            # are only function wide still need to be generated.
150            compositeCode = ''
151
152            myDict.update(d.__dict__)
153            # The "operands" and "snippets" attributes of the InstObjParams
154            # objects are for internal use and not substitution.
155            del myDict['operands']
156            del myDict['snippets']
157
158            snippetLabels = [l for l in labelRE.findall(template)
159                             if d.snippets.has_key(l)]
160
161            snippets = dict([(s, self.parser.mungeSnippet(d.snippets[s]))
162                             for s in snippetLabels])
163
164            myDict.update(snippets)
165
166            compositeCode = ' '.join(map(str, snippets.values()))
167
168            # Add in template itself in case it references any
169            # operands explicitly (like Mem)
170            compositeCode += ' ' + template
171
172            operands = SubOperandList(self.parser, compositeCode, d.operands)
173
174            myDict['op_decl'] = operands.concatAttrStrings('op_decl')
175            if operands.readPC or operands.setPC:
176                myDict['op_decl'] += 'TheISA::PCState __parserAutoPCState;\n'
177
178            # In case there are predicated register reads and write, declare
179            # the variables for register indicies. It is being assumed that
180            # all the operands in the OperandList are also in the
181            # SubOperandList and in the same order. Otherwise, it is
182            # expected that predication would not be used for the operands.
183            if operands.predRead:
184                myDict['op_decl'] += 'uint8_t _sourceIndex = 0;\n'
185            if operands.predWrite:
186                myDict['op_decl'] += 'uint8_t M5_VAR_USED _destIndex = 0;\n'
187
188            is_src = lambda op: op.is_src
189            is_dest = lambda op: op.is_dest
190
191            myDict['op_src_decl'] = \
192                      operands.concatSomeAttrStrings(is_src, 'op_src_decl')
193            myDict['op_dest_decl'] = \
194                      operands.concatSomeAttrStrings(is_dest, 'op_dest_decl')
195            if operands.readPC:
196                myDict['op_src_decl'] += \
197                    'TheISA::PCState __parserAutoPCState;\n'
198            if operands.setPC:
199                myDict['op_dest_decl'] += \
200                    'TheISA::PCState __parserAutoPCState;\n'
201
202            myDict['op_rd'] = operands.concatAttrStrings('op_rd')
203            if operands.readPC:
204                myDict['op_rd'] = '__parserAutoPCState = xc->pcState();\n' + \
205                                  myDict['op_rd']
206
207            # Compose the op_wb string. If we're going to write back the
208            # PC state because we changed some of its elements, we'll need to
209            # do that as early as possible. That allows later uncoordinated
210            # modifications to the PC to layer appropriately.
211            reordered = list(operands.items)
212            reordered.reverse()
213            op_wb_str = ''
214            pcWbStr = 'xc->pcState(__parserAutoPCState);\n'
215            for op_desc in reordered:
216                if op_desc.isPCPart() and op_desc.is_dest:
217                    op_wb_str = op_desc.op_wb + pcWbStr + op_wb_str
218                    pcWbStr = ''
219                else:
220                    op_wb_str = op_desc.op_wb + op_wb_str
221            myDict['op_wb'] = op_wb_str
222
223        elif isinstance(d, dict):
224            # if the argument is a dictionary, we just use it.
225            myDict.update(d)
226        elif hasattr(d, '__dict__'):
227            # if the argument is an object, we use its attribute map.
228            myDict.update(d.__dict__)
229        else:
230            raise TypeError, "Template.subst() arg must be or have dictionary"
231        return template % myDict
232
233    # Convert to string.  This handles the case when a template with a
234    # CPU-specific term gets interpolated into another template or into
235    # an output block.
236    def __str__(self):
237        return self.parser.expandCpuSymbolsToString(self.template)
238
239################
240# Format object.
241#
242# A format object encapsulates an instruction format.  It must provide
243# a defineInst() method that generates the code for an instruction
244# definition.
245
246class Format(object):
247    def __init__(self, id, params, code):
248        self.id = id
249        self.params = params
250        label = 'def format ' + id
251        self.user_code = compile(fixPythonIndentation(code), label, 'exec')
252        param_list = string.join(params, ", ")
253        f = '''def defInst(_code, _context, %s):
254                my_locals = vars().copy()
255                exec _code in _context, my_locals
256                return my_locals\n''' % param_list
257        c = compile(f, label + ' wrapper', 'exec')
258        exec c
259        self.func = defInst
260
261    def defineInst(self, parser, name, args, lineno):
262        parser.updateExportContext()
263        context = parser.exportContext.copy()
264        if len(name):
265            Name = name[0].upper()
266            if len(name) > 1:
267                Name += name[1:]
268        context.update({ 'name' : name, 'Name' : Name })
269        try:
270            vars = self.func(self.user_code, context, *args[0], **args[1])
271        except Exception, exc:
272            if debug:
273                raise
274            error(lineno, 'error defining "%s": %s.' % (name, exc))
275        for k in vars.keys():
276            if k not in ('header_output', 'decoder_output',
277                         'exec_output', 'decode_block'):
278                del vars[k]
279        return GenCode(parser, **vars)
280
281# Special null format to catch an implicit-format instruction
282# definition outside of any format block.
283class NoFormat(object):
284    def __init__(self):
285        self.defaultInst = ''
286
287    def defineInst(self, parser, name, args, lineno):
288        error(lineno,
289              'instruction definition "%s" with no active format!' % name)
290
291###############
292# GenCode class
293#
294# The GenCode class encapsulates generated code destined for various
295# output files.  The header_output and decoder_output attributes are
296# strings containing code destined for decoder.hh and decoder.cc
297# respectively.  The decode_block attribute contains code to be
298# incorporated in the decode function itself (that will also end up in
299# decoder.cc).  The exec_output attribute is a dictionary with a key
300# for each CPU model name; the value associated with a particular key
301# is the string of code for that CPU model's exec.cc file.  The
302# has_decode_default attribute is used in the decode block to allow
303# explicit default clauses to override default default clauses.
304
305class GenCode(object):
306    # Constructor.  At this point we substitute out all CPU-specific
307    # symbols.  For the exec output, these go into the per-model
308    # dictionary.  For all other output types they get collapsed into
309    # a single string.
310    def __init__(self, parser,
311                 header_output = '', decoder_output = '', exec_output = '',
312                 decode_block = '', has_decode_default = False):
313        self.parser = parser
314        self.header_output = parser.expandCpuSymbolsToString(header_output)
315        self.decoder_output = parser.expandCpuSymbolsToString(decoder_output)
316        if isinstance(exec_output, dict):
317            self.exec_output = exec_output
318        elif isinstance(exec_output, str):
319            # If the exec_output arg is a single string, we replicate
320            # it for each of the CPU models, substituting and
321            # %(CPU_foo)s params appropriately.
322            self.exec_output = parser.expandCpuSymbolsToDict(exec_output)
323        self.decode_block = parser.expandCpuSymbolsToString(decode_block)
324        self.has_decode_default = has_decode_default
325
326    # Override '+' operator: generate a new GenCode object that
327    # concatenates all the individual strings in the operands.
328    def __add__(self, other):
329        exec_output = {}
330        for cpu in self.parser.cpuModels:
331            n = cpu.name
332            exec_output[n] = self.exec_output[n] + other.exec_output[n]
333        return GenCode(self.parser,
334                       self.header_output + other.header_output,
335                       self.decoder_output + other.decoder_output,
336                       exec_output,
337                       self.decode_block + other.decode_block,
338                       self.has_decode_default or other.has_decode_default)
339
340    # Prepend a string (typically a comment) to all the strings.
341    def prepend_all(self, pre):
342        self.header_output = pre + self.header_output
343        self.decoder_output  = pre + self.decoder_output
344        self.decode_block = pre + self.decode_block
345        for cpu in self.parser.cpuModels:
346            self.exec_output[cpu.name] = pre + self.exec_output[cpu.name]
347
348    # Wrap the decode block in a pair of strings (e.g., 'case foo:'
349    # and 'break;').  Used to build the big nested switch statement.
350    def wrap_decode_block(self, pre, post = ''):
351        self.decode_block = pre + indent(self.decode_block) + post
352
353#####################################################################
354#
355#                      Bitfield Operator Support
356#
357#####################################################################
358
359bitOp1ArgRE = re.compile(r'<\s*(\w+)\s*:\s*>')
360
361bitOpWordRE = re.compile(r'(?<![\w\.])([\w\.]+)<\s*(\w+)\s*:\s*(\w+)\s*>')
362bitOpExprRE = re.compile(r'\)<\s*(\w+)\s*:\s*(\w+)\s*>')
363
364def substBitOps(code):
365    # first convert single-bit selectors to two-index form
366    # i.e., <n> --> <n:n>
367    code = bitOp1ArgRE.sub(r'<\1:\1>', code)
368    # simple case: selector applied to ID (name)
369    # i.e., foo<a:b> --> bits(foo, a, b)
370    code = bitOpWordRE.sub(r'bits(\1, \2, \3)', code)
371    # if selector is applied to expression (ending in ')'),
372    # we need to search backward for matching '('
373    match = bitOpExprRE.search(code)
374    while match:
375        exprEnd = match.start()
376        here = exprEnd - 1
377        nestLevel = 1
378        while nestLevel > 0:
379            if code[here] == '(':
380                nestLevel -= 1
381            elif code[here] == ')':
382                nestLevel += 1
383            here -= 1
384            if here < 0:
385                sys.exit("Didn't find '('!")
386        exprStart = here+1
387        newExpr = r'bits(%s, %s, %s)' % (code[exprStart:exprEnd+1],
388                                         match.group(1), match.group(2))
389        code = code[:exprStart] + newExpr + code[match.end():]
390        match = bitOpExprRE.search(code)
391    return code
392
393
394#####################################################################
395#
396#                             Code Parser
397#
398# The remaining code is the support for automatically extracting
399# instruction characteristics from pseudocode.
400#
401#####################################################################
402
403# Force the argument to be a list.  Useful for flags, where a caller
404# can specify a singleton flag or a list of flags.  Also usful for
405# converting tuples to lists so they can be modified.
406def makeList(arg):
407    if isinstance(arg, list):
408        return arg
409    elif isinstance(arg, tuple):
410        return list(arg)
411    elif not arg:
412        return []
413    else:
414        return [ arg ]
415
416class Operand(object):
417    '''Base class for operand descriptors.  An instance of this class
418    (or actually a class derived from this one) represents a specific
419    operand for a code block (e.g, "Rc.sq" as a dest). Intermediate
420    derived classes encapsulates the traits of a particular operand
421    type (e.g., "32-bit integer register").'''
422
423    def buildReadCode(self, func = None):
424        subst_dict = {"name": self.base_name,
425                      "func": func,
426                      "reg_idx": self.reg_spec,
427                      "ctype": self.ctype}
428        if hasattr(self, 'src_reg_idx'):
429            subst_dict['op_idx'] = self.src_reg_idx
430        code = self.read_code % subst_dict
431        return '%s = %s;\n' % (self.base_name, code)
432
433    def buildWriteCode(self, func = None):
434        subst_dict = {"name": self.base_name,
435                      "func": func,
436                      "reg_idx": self.reg_spec,
437                      "ctype": self.ctype,
438                      "final_val": self.base_name}
439        if hasattr(self, 'dest_reg_idx'):
440            subst_dict['op_idx'] = self.dest_reg_idx
441        code = self.write_code % subst_dict
442        return '''
443        {
444            %s final_val = %s;
445            %s;
446            if (traceData) { traceData->setData(final_val); }
447        }''' % (self.dflt_ctype, self.base_name, code)
448
449    def __init__(self, parser, full_name, ext, is_src, is_dest):
450        self.full_name = full_name
451        self.ext = ext
452        self.is_src = is_src
453        self.is_dest = is_dest
454        # The 'effective extension' (eff_ext) is either the actual
455        # extension, if one was explicitly provided, or the default.
456        if ext:
457            self.eff_ext = ext
458        elif hasattr(self, 'dflt_ext'):
459            self.eff_ext = self.dflt_ext
460
461        if hasattr(self, 'eff_ext'):
462            self.ctype = parser.operandTypeMap[self.eff_ext]
463
464    # Finalize additional fields (primarily code fields).  This step
465    # is done separately since some of these fields may depend on the
466    # register index enumeration that hasn't been performed yet at the
467    # time of __init__(). The register index enumeration is affected
468    # by predicated register reads/writes. Hence, we forward the flags
469    # that indicate whether or not predication is in use.
470    def finalize(self, predRead, predWrite):
471        self.flags = self.getFlags()
472        self.constructor = self.makeConstructor(predRead, predWrite)
473        self.op_decl = self.makeDecl()
474
475        if self.is_src:
476            self.op_rd = self.makeRead(predRead)
477            self.op_src_decl = self.makeDecl()
478        else:
479            self.op_rd = ''
480            self.op_src_decl = ''
481
482        if self.is_dest:
483            self.op_wb = self.makeWrite(predWrite)
484            self.op_dest_decl = self.makeDecl()
485        else:
486            self.op_wb = ''
487            self.op_dest_decl = ''
488
489    def isMem(self):
490        return 0
491
492    def isReg(self):
493        return 0
494
495    def isFloatReg(self):
496        return 0
497
498    def isIntReg(self):
499        return 0
500
501    def isCCReg(self):
502        return 0
503
504    def isControlReg(self):
505        return 0
506
507    def isPCState(self):
508        return 0
509
510    def isPCPart(self):
511        return self.isPCState() and self.reg_spec
512
513    def hasReadPred(self):
514        return self.read_predicate != None
515
516    def hasWritePred(self):
517        return self.write_predicate != None
518
519    def getFlags(self):
520        # note the empty slice '[:]' gives us a copy of self.flags[0]
521        # instead of a reference to it
522        my_flags = self.flags[0][:]
523        if self.is_src:
524            my_flags += self.flags[1]
525        if self.is_dest:
526            my_flags += self.flags[2]
527        return my_flags
528
529    def makeDecl(self):
530        # Note that initializations in the declarations are solely
531        # to avoid 'uninitialized variable' errors from the compiler.
532        return self.ctype + ' ' + self.base_name + ' = 0;\n';
533
534class IntRegOperand(Operand):
535    def isReg(self):
536        return 1
537
538    def isIntReg(self):
539        return 1
540
541    def makeConstructor(self, predRead, predWrite):
542        c_src = ''
543        c_dest = ''
544
545        if self.is_src:
546            c_src = '\n\t_srcRegIdx[_numSrcRegs++] = %s;' % (self.reg_spec)
547            if self.hasReadPred():
548                c_src = '\n\tif (%s) {%s\n\t}' % \
549                        (self.read_predicate, c_src)
550
551        if self.is_dest:
552            c_dest = '\n\t_destRegIdx[_numDestRegs++] = %s;' % \
553                    (self.reg_spec)
554            c_dest += '\n\t_numIntDestRegs++;'
555            if self.hasWritePred():
556                c_dest = '\n\tif (%s) {%s\n\t}' % \
557                         (self.write_predicate, c_dest)
558
559        return c_src + c_dest
560
561    def makeRead(self, predRead):
562        if (self.ctype == 'float' or self.ctype == 'double'):
563            error('Attempt to read integer register as FP')
564        if self.read_code != None:
565            return self.buildReadCode('readIntRegOperand')
566
567        int_reg_val = ''
568        if predRead:
569            int_reg_val = 'xc->readIntRegOperand(this, _sourceIndex++)'
570            if self.hasReadPred():
571                int_reg_val = '(%s) ? %s : 0' % \
572                              (self.read_predicate, int_reg_val)
573        else:
574            int_reg_val = 'xc->readIntRegOperand(this, %d)' % self.src_reg_idx
575
576        return '%s = %s;\n' % (self.base_name, int_reg_val)
577
578    def makeWrite(self, predWrite):
579        if (self.ctype == 'float' or self.ctype == 'double'):
580            error('Attempt to write integer register as FP')
581        if self.write_code != None:
582            return self.buildWriteCode('setIntRegOperand')
583
584        if predWrite:
585            wp = 'true'
586            if self.hasWritePred():
587                wp = self.write_predicate
588
589            wcond = 'if (%s)' % (wp)
590            windex = '_destIndex++'
591        else:
592            wcond = ''
593            windex = '%d' % self.dest_reg_idx
594
595        wb = '''
596        %s
597        {
598            %s final_val = %s;
599            xc->setIntRegOperand(this, %s, final_val);\n
600            if (traceData) { traceData->setData(final_val); }
601        }''' % (wcond, self.ctype, self.base_name, windex)
602
603        return wb
604
605class FloatRegOperand(Operand):
606    def isReg(self):
607        return 1
608
609    def isFloatReg(self):
610        return 1
611
612    def makeConstructor(self, predRead, predWrite):
613        c_src = ''
614        c_dest = ''
615
616        if self.is_src:
617            c_src = '\n\t_srcRegIdx[_numSrcRegs++] = %s + FP_Reg_Base;' % \
618                    (self.reg_spec)
619
620        if self.is_dest:
621            c_dest = \
622              '\n\t_destRegIdx[_numDestRegs++] = %s + FP_Reg_Base;' % \
623              (self.reg_spec)
624            c_dest += '\n\t_numFPDestRegs++;'
625
626        return c_src + c_dest
627
628    def makeRead(self, predRead):
629        bit_select = 0
630        if (self.ctype == 'float' or self.ctype == 'double'):
631            func = 'readFloatRegOperand'
632        else:
633            func = 'readFloatRegOperandBits'
634        if self.read_code != None:
635            return self.buildReadCode(func)
636
637        if predRead:
638            rindex = '_sourceIndex++'
639        else:
640            rindex = '%d' % self.src_reg_idx
641
642        return '%s = xc->%s(this, %s);\n' % \
643            (self.base_name, func, rindex)
644
645    def makeWrite(self, predWrite):
646        if (self.ctype == 'float' or self.ctype == 'double'):
647            func = 'setFloatRegOperand'
648        else:
649            func = 'setFloatRegOperandBits'
650        if self.write_code != None:
651            return self.buildWriteCode(func)
652
653        if predWrite:
654            wp = '_destIndex++'
655        else:
656            wp = '%d' % self.dest_reg_idx
657        wp = 'xc->%s(this, %s, final_val);' % (func, wp)
658
659        wb = '''
660        {
661            %s final_val = %s;
662            %s\n
663            if (traceData) { traceData->setData(final_val); }
664        }''' % (self.ctype, self.base_name, wp)
665        return wb
666
667class CCRegOperand(Operand):
668    def isReg(self):
669        return 1
670
671    def isCCReg(self):
672        return 1
673
674    def makeConstructor(self, predRead, predWrite):
675        c_src = ''
676        c_dest = ''
677
678        if self.is_src:
679            c_src = '\n\t_srcRegIdx[_numSrcRegs++] = %s + CC_Reg_Base;' % \
680                     (self.reg_spec)
681            if self.hasReadPred():
682                c_src = '\n\tif (%s) {%s\n\t}' % \
683                        (self.read_predicate, c_src)
684
685        if self.is_dest:
686            c_dest = \
687              '\n\t_destRegIdx[_numDestRegs++] = %s + CC_Reg_Base;' % \
688              (self.reg_spec)
689            c_dest += '\n\t_numCCDestRegs++;'
690            if self.hasWritePred():
691                c_dest = '\n\tif (%s) {%s\n\t}' % \
692                         (self.write_predicate, c_dest)
693
694        return c_src + c_dest
695
696    def makeRead(self, predRead):
697        if (self.ctype == 'float' or self.ctype == 'double'):
698            error('Attempt to read condition-code register as FP')
699        if self.read_code != None:
700            return self.buildReadCode('readCCRegOperand')
701
702        int_reg_val = ''
703        if predRead:
704            int_reg_val = 'xc->readCCRegOperand(this, _sourceIndex++)'
705            if self.hasReadPred():
706                int_reg_val = '(%s) ? %s : 0' % \
707                              (self.read_predicate, int_reg_val)
708        else:
709            int_reg_val = 'xc->readCCRegOperand(this, %d)' % self.src_reg_idx
710
711        return '%s = %s;\n' % (self.base_name, int_reg_val)
712
713    def makeWrite(self, predWrite):
714        if (self.ctype == 'float' or self.ctype == 'double'):
715            error('Attempt to write condition-code register as FP')
716        if self.write_code != None:
717            return self.buildWriteCode('setCCRegOperand')
718
719        if predWrite:
720            wp = 'true'
721            if self.hasWritePred():
722                wp = self.write_predicate
723
724            wcond = 'if (%s)' % (wp)
725            windex = '_destIndex++'
726        else:
727            wcond = ''
728            windex = '%d' % self.dest_reg_idx
729
730        wb = '''
731        %s
732        {
733            %s final_val = %s;
734            xc->setCCRegOperand(this, %s, final_val);\n
735            if (traceData) { traceData->setData(final_val); }
736        }''' % (wcond, self.ctype, self.base_name, windex)
737
738        return wb
739
740class ControlRegOperand(Operand):
741    def isReg(self):
742        return 1
743
744    def isControlReg(self):
745        return 1
746
747    def makeConstructor(self, predRead, predWrite):
748        c_src = ''
749        c_dest = ''
750
751        if self.is_src:
752            c_src = \
753              '\n\t_srcRegIdx[_numSrcRegs++] = %s + Misc_Reg_Base;' % \
754              (self.reg_spec)
755
756        if self.is_dest:
757            c_dest = \
758              '\n\t_destRegIdx[_numDestRegs++] = %s + Misc_Reg_Base;' % \
759              (self.reg_spec)
760
761        return c_src + c_dest
762
763    def makeRead(self, predRead):
764        bit_select = 0
765        if (self.ctype == 'float' or self.ctype == 'double'):
766            error('Attempt to read control register as FP')
767        if self.read_code != None:
768            return self.buildReadCode('readMiscRegOperand')
769
770        if predRead:
771            rindex = '_sourceIndex++'
772        else:
773            rindex = '%d' % self.src_reg_idx
774
775        return '%s = xc->readMiscRegOperand(this, %s);\n' % \
776            (self.base_name, rindex)
777
778    def makeWrite(self, predWrite):
779        if (self.ctype == 'float' or self.ctype == 'double'):
780            error('Attempt to write control register as FP')
781        if self.write_code != None:
782            return self.buildWriteCode('setMiscRegOperand')
783
784        if predWrite:
785            windex = '_destIndex++'
786        else:
787            windex = '%d' % self.dest_reg_idx
788
789        wb = 'xc->setMiscRegOperand(this, %s, %s);\n' % \
790             (windex, self.base_name)
791        wb += 'if (traceData) { traceData->setData(%s); }' % \
792              self.base_name
793
794        return wb
795
796class MemOperand(Operand):
797    def isMem(self):
798        return 1
799
800    def makeConstructor(self, predRead, predWrite):
801        return ''
802
803    def makeDecl(self):
804        # Note that initializations in the declarations are solely
805        # to avoid 'uninitialized variable' errors from the compiler.
806        # Declare memory data variable.
807        return '%s %s = 0;\n' % (self.ctype, self.base_name)
808
809    def makeRead(self, predRead):
810        if self.read_code != None:
811            return self.buildReadCode()
812        return ''
813
814    def makeWrite(self, predWrite):
815        if self.write_code != None:
816            return self.buildWriteCode()
817        return ''
818
819class PCStateOperand(Operand):
820    def makeConstructor(self, predRead, predWrite):
821        return ''
822
823    def makeRead(self, predRead):
824        if self.reg_spec:
825            # A component of the PC state.
826            return '%s = __parserAutoPCState.%s();\n' % \
827                (self.base_name, self.reg_spec)
828        else:
829            # The whole PC state itself.
830            return '%s = xc->pcState();\n' % self.base_name
831
832    def makeWrite(self, predWrite):
833        if self.reg_spec:
834            # A component of the PC state.
835            return '__parserAutoPCState.%s(%s);\n' % \
836                (self.reg_spec, self.base_name)
837        else:
838            # The whole PC state itself.
839            return 'xc->pcState(%s);\n' % self.base_name
840
841    def makeDecl(self):
842        ctype = 'TheISA::PCState'
843        if self.isPCPart():
844            ctype = self.ctype
845        return "%s %s;\n" % (ctype, self.base_name)
846
847    def isPCState(self):
848        return 1
849
850class OperandList(object):
851    '''Find all the operands in the given code block.  Returns an operand
852    descriptor list (instance of class OperandList).'''
853    def __init__(self, parser, code):
854        self.items = []
855        self.bases = {}
856        # delete strings and comments so we don't match on operands inside
857        for regEx in (stringRE, commentRE):
858            code = regEx.sub('', code)
859        # search for operands
860        next_pos = 0
861        while 1:
862            match = parser.operandsRE.search(code, next_pos)
863            if not match:
864                # no more matches: we're done
865                break
866            op = match.groups()
867            # regexp groups are operand full name, base, and extension
868            (op_full, op_base, op_ext) = op
869            # if the token following the operand is an assignment, this is
870            # a destination (LHS), else it's a source (RHS)
871            is_dest = (assignRE.match(code, match.end()) != None)
872            is_src = not is_dest
873            # see if we've already seen this one
874            op_desc = self.find_base(op_base)
875            if op_desc:
876                if op_desc.ext != op_ext:
877                    error('Inconsistent extensions for operand %s' % \
878                          op_base)
879                op_desc.is_src = op_desc.is_src or is_src
880                op_desc.is_dest = op_desc.is_dest or is_dest
881            else:
882                # new operand: create new descriptor
883                op_desc = parser.operandNameMap[op_base](parser,
884                    op_full, op_ext, is_src, is_dest)
885                self.append(op_desc)
886            # start next search after end of current match
887            next_pos = match.end()
888        self.sort()
889        # enumerate source & dest register operands... used in building
890        # constructor later
891        self.numSrcRegs = 0
892        self.numDestRegs = 0
893        self.numFPDestRegs = 0
894        self.numIntDestRegs = 0
895        self.numCCDestRegs = 0
896        self.numMiscDestRegs = 0
897        self.memOperand = None
898
899        # Flags to keep track if one or more operands are to be read/written
900        # conditionally.
901        self.predRead = False
902        self.predWrite = False
903
904        for op_desc in self.items:
905            if op_desc.isReg():
906                if op_desc.is_src:
907                    op_desc.src_reg_idx = self.numSrcRegs
908                    self.numSrcRegs += 1
909                if op_desc.is_dest:
910                    op_desc.dest_reg_idx = self.numDestRegs
911                    self.numDestRegs += 1
912                    if op_desc.isFloatReg():
913                        self.numFPDestRegs += 1
914                    elif op_desc.isIntReg():
915                        self.numIntDestRegs += 1
916                    elif op_desc.isCCReg():
917                        self.numCCDestRegs += 1
918                    elif op_desc.isControlReg():
919                        self.numMiscDestRegs += 1
920            elif op_desc.isMem():
921                if self.memOperand:
922                    error("Code block has more than one memory operand.")
923                self.memOperand = op_desc
924
925            # Check if this operand has read/write predication. If true, then
926            # the microop will dynamically index source/dest registers.
927            self.predRead = self.predRead or op_desc.hasReadPred()
928            self.predWrite = self.predWrite or op_desc.hasWritePred()
929
930        if parser.maxInstSrcRegs < self.numSrcRegs:
931            parser.maxInstSrcRegs = self.numSrcRegs
932        if parser.maxInstDestRegs < self.numDestRegs:
933            parser.maxInstDestRegs = self.numDestRegs
934        if parser.maxMiscDestRegs < self.numMiscDestRegs:
935            parser.maxMiscDestRegs = self.numMiscDestRegs
936
937        # now make a final pass to finalize op_desc fields that may depend
938        # on the register enumeration
939        for op_desc in self.items:
940            op_desc.finalize(self.predRead, self.predWrite)
941
942    def __len__(self):
943        return len(self.items)
944
945    def __getitem__(self, index):
946        return self.items[index]
947
948    def append(self, op_desc):
949        self.items.append(op_desc)
950        self.bases[op_desc.base_name] = op_desc
951
952    def find_base(self, base_name):
953        # like self.bases[base_name], but returns None if not found
954        # (rather than raising exception)
955        return self.bases.get(base_name)
956
957    # internal helper function for concat[Some]Attr{Strings|Lists}
958    def __internalConcatAttrs(self, attr_name, filter, result):
959        for op_desc in self.items:
960            if filter(op_desc):
961                result += getattr(op_desc, attr_name)
962        return result
963
964    # return a single string that is the concatenation of the (string)
965    # values of the specified attribute for all operands
966    def concatAttrStrings(self, attr_name):
967        return self.__internalConcatAttrs(attr_name, lambda x: 1, '')
968
969    # like concatAttrStrings, but only include the values for the operands
970    # for which the provided filter function returns true
971    def concatSomeAttrStrings(self, filter, attr_name):
972        return self.__internalConcatAttrs(attr_name, filter, '')
973
974    # return a single list that is the concatenation of the (list)
975    # values of the specified attribute for all operands
976    def concatAttrLists(self, attr_name):
977        return self.__internalConcatAttrs(attr_name, lambda x: 1, [])
978
979    # like concatAttrLists, but only include the values for the operands
980    # for which the provided filter function returns true
981    def concatSomeAttrLists(self, filter, attr_name):
982        return self.__internalConcatAttrs(attr_name, filter, [])
983
984    def sort(self):
985        self.items.sort(lambda a, b: a.sort_pri - b.sort_pri)
986
987class SubOperandList(OperandList):
988    '''Find all the operands in the given code block.  Returns an operand
989    descriptor list (instance of class OperandList).'''
990    def __init__(self, parser, code, master_list):
991        self.items = []
992        self.bases = {}
993        # delete strings and comments so we don't match on operands inside
994        for regEx in (stringRE, commentRE):
995            code = regEx.sub('', code)
996        # search for operands
997        next_pos = 0
998        while 1:
999            match = parser.operandsRE.search(code, next_pos)
1000            if not match:
1001                # no more matches: we're done
1002                break
1003            op = match.groups()
1004            # regexp groups are operand full name, base, and extension
1005            (op_full, op_base, op_ext) = op
1006            # find this op in the master list
1007            op_desc = master_list.find_base(op_base)
1008            if not op_desc:
1009                error('Found operand %s which is not in the master list!' \
1010                      ' This is an internal error' % op_base)
1011            else:
1012                # See if we've already found this operand
1013                op_desc = self.find_base(op_base)
1014                if not op_desc:
1015                    # if not, add a reference to it to this sub list
1016                    self.append(master_list.bases[op_base])
1017
1018            # start next search after end of current match
1019            next_pos = match.end()
1020        self.sort()
1021        self.memOperand = None
1022        # Whether the whole PC needs to be read so parts of it can be accessed
1023        self.readPC = False
1024        # Whether the whole PC needs to be written after parts of it were
1025        # changed
1026        self.setPC = False
1027        # Whether this instruction manipulates the whole PC or parts of it.
1028        # Mixing the two is a bad idea and flagged as an error.
1029        self.pcPart = None
1030
1031        # Flags to keep track if one or more operands are to be read/written
1032        # conditionally.
1033        self.predRead = False
1034        self.predWrite = False
1035
1036        for op_desc in self.items:
1037            if op_desc.isPCPart():
1038                self.readPC = True
1039                if op_desc.is_dest:
1040                    self.setPC = True
1041
1042            if op_desc.isPCState():
1043                if self.pcPart is not None:
1044                    if self.pcPart and not op_desc.isPCPart() or \
1045                            not self.pcPart and op_desc.isPCPart():
1046                        error("Mixed whole and partial PC state operands.")
1047                self.pcPart = op_desc.isPCPart()
1048
1049            if op_desc.isMem():
1050                if self.memOperand:
1051                    error("Code block has more than one memory operand.")
1052                self.memOperand = op_desc
1053
1054            # Check if this operand has read/write predication. If true, then
1055            # the microop will dynamically index source/dest registers.
1056            self.predRead = self.predRead or op_desc.hasReadPred()
1057            self.predWrite = self.predWrite or op_desc.hasWritePred()
1058
1059# Regular expression object to match C++ strings
1060stringRE = re.compile(r'"([^"\\]|\\.)*"')
1061
1062# Regular expression object to match C++ comments
1063# (used in findOperands())
1064commentRE = re.compile(r'(^)?[^\S\n]*/(?:\*(.*?)\*/[^\S\n]*|/[^\n]*)($)?',
1065        re.DOTALL | re.MULTILINE)
1066
1067# Regular expression object to match assignment statements
1068# (used in findOperands())
1069assignRE = re.compile(r'\s*=(?!=)', re.MULTILINE)
1070
1071def makeFlagConstructor(flag_list):
1072    if len(flag_list) == 0:
1073        return ''
1074    # filter out repeated flags
1075    flag_list.sort()
1076    i = 1
1077    while i < len(flag_list):
1078        if flag_list[i] == flag_list[i-1]:
1079            del flag_list[i]
1080        else:
1081            i += 1
1082    pre = '\n\tflags['
1083    post = '] = true;'
1084    code = pre + string.join(flag_list, post + pre) + post
1085    return code
1086
1087# Assume all instruction flags are of the form 'IsFoo'
1088instFlagRE = re.compile(r'Is.*')
1089
1090# OpClass constants end in 'Op' except No_OpClass
1091opClassRE = re.compile(r'.*Op|No_OpClass')
1092
1093class InstObjParams(object):
1094    def __init__(self, parser, mnem, class_name, base_class = '',
1095                 snippets = {}, opt_args = []):
1096        self.mnemonic = mnem
1097        self.class_name = class_name
1098        self.base_class = base_class
1099        if not isinstance(snippets, dict):
1100            snippets = {'code' : snippets}
1101        compositeCode = ' '.join(map(str, snippets.values()))
1102        self.snippets = snippets
1103
1104        self.operands = OperandList(parser, compositeCode)
1105
1106        # The header of the constructor declares the variables to be used
1107        # in the body of the constructor.
1108        header = ''
1109        header += '\n\t_numSrcRegs = 0;'
1110        header += '\n\t_numDestRegs = 0;'
1111        header += '\n\t_numFPDestRegs = 0;'
1112        header += '\n\t_numIntDestRegs = 0;'
1113        header += '\n\t_numCCDestRegs = 0;'
1114
1115        self.constructor = header + \
1116                           self.operands.concatAttrStrings('constructor')
1117
1118        self.flags = self.operands.concatAttrLists('flags')
1119
1120        # Make a basic guess on the operand class (function unit type).
1121        # These are good enough for most cases, and can be overridden
1122        # later otherwise.
1123        if 'IsStore' in self.flags:
1124            self.op_class = 'MemWriteOp'
1125        elif 'IsLoad' in self.flags or 'IsPrefetch' in self.flags:
1126            self.op_class = 'MemReadOp'
1127        elif 'IsFloating' in self.flags:
1128            self.op_class = 'FloatAddOp'
1129        else:
1130            self.op_class = 'IntAluOp'
1131
1132        # Optional arguments are assumed to be either StaticInst flags
1133        # or an OpClass value.  To avoid having to import a complete
1134        # list of these values to match against, we do it ad-hoc
1135        # with regexps.
1136        for oa in opt_args:
1137            if instFlagRE.match(oa):
1138                self.flags.append(oa)
1139            elif opClassRE.match(oa):
1140                self.op_class = oa
1141            else:
1142                error('InstObjParams: optional arg "%s" not recognized '
1143                      'as StaticInst::Flag or OpClass.' % oa)
1144
1145        # add flag initialization to contructor here to include
1146        # any flags added via opt_args
1147        self.constructor += makeFlagConstructor(self.flags)
1148
1149        # if 'IsFloating' is set, add call to the FP enable check
1150        # function (which should be provided by isa_desc via a declare)
1151        if 'IsFloating' in self.flags:
1152            self.fp_enable_check = 'fault = checkFpEnableFault(xc);'
1153        else:
1154            self.fp_enable_check = ''
1155
1156##############
1157# Stack: a simple stack object.  Used for both formats (formatStack)
1158# and default cases (defaultStack).  Simply wraps a list to give more
1159# stack-like syntax and enable initialization with an argument list
1160# (as opposed to an argument that's a list).
1161
1162class Stack(list):
1163    def __init__(self, *items):
1164        list.__init__(self, items)
1165
1166    def push(self, item):
1167        self.append(item);
1168
1169    def top(self):
1170        return self[-1]
1171
1172#######################
1173#
1174# Output file template
1175#
1176
1177file_template = '''
1178/*
1179 * DO NOT EDIT THIS FILE!!!
1180 *
1181 * It was automatically generated from the ISA description in %(filename)s
1182 */
1183
1184%(includes)s
1185
1186%(global_output)s
1187
1188namespace %(namespace)s {
1189
1190%(namespace_output)s
1191
1192} // namespace %(namespace)s
1193
1194%(decode_function)s
1195'''
1196
1197max_inst_regs_template = '''
1198/*
1199 * DO NOT EDIT THIS FILE!!!
1200 *
1201 * It was automatically generated from the ISA description in %(filename)s
1202 */
1203
1204namespace %(namespace)s {
1205
1206    const int MaxInstSrcRegs = %(MaxInstSrcRegs)d;
1207    const int MaxInstDestRegs = %(MaxInstDestRegs)d;
1208    const int MaxMiscDestRegs = %(MaxMiscDestRegs)d;
1209
1210} // namespace %(namespace)s
1211
1212'''
1213
1214class ISAParser(Grammar):
1215    def __init__(self, output_dir, cpu_models):
1216        super(ISAParser, self).__init__()
1217        self.output_dir = output_dir
1218
1219        self.cpuModels = cpu_models
1220
1221        # variable to hold templates
1222        self.templateMap = {}
1223
1224        # This dictionary maps format name strings to Format objects.
1225        self.formatMap = {}
1226
1227        # The format stack.
1228        self.formatStack = Stack(NoFormat())
1229
1230        # The default case stack.
1231        self.defaultStack = Stack(None)
1232
1233        # Stack that tracks current file and line number.  Each
1234        # element is a tuple (filename, lineno) that records the
1235        # *current* filename and the line number in the *previous*
1236        # file where it was included.
1237        self.fileNameStack = Stack()
1238
1239        symbols = ('makeList', 're', 'string')
1240        self.exportContext = dict([(s, eval(s)) for s in symbols])
1241
1242        self.maxInstSrcRegs = 0
1243        self.maxInstDestRegs = 0
1244        self.maxMiscDestRegs = 0
1245
1246    #####################################################################
1247    #
1248    #                                Lexer
1249    #
1250    # The PLY lexer module takes two things as input:
1251    # - A list of token names (the string list 'tokens')
1252    # - A regular expression describing a match for each token.  The
1253    #   regexp for token FOO can be provided in two ways:
1254    #   - as a string variable named t_FOO
1255    #   - as the doc string for a function named t_FOO.  In this case,
1256    #     the function is also executed, allowing an action to be
1257    #     associated with each token match.
1258    #
1259    #####################################################################
1260
1261    # Reserved words.  These are listed separately as they are matched
1262    # using the same regexp as generic IDs, but distinguished in the
1263    # t_ID() function.  The PLY documentation suggests this approach.
1264    reserved = (
1265        'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT',
1266        'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS',
1267        'OUTPUT', 'SIGNED', 'TEMPLATE'
1268        )
1269
1270    # List of tokens.  The lex module requires this.
1271    tokens = reserved + (
1272        # identifier
1273        'ID',
1274
1275        # integer literal
1276        'INTLIT',
1277
1278        # string literal
1279        'STRLIT',
1280
1281        # code literal
1282        'CODELIT',
1283
1284        # ( ) [ ] { } < > , ; . : :: *
1285        'LPAREN', 'RPAREN',
1286        'LBRACKET', 'RBRACKET',
1287        'LBRACE', 'RBRACE',
1288        'LESS', 'GREATER', 'EQUALS',
1289        'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON',
1290        'ASTERISK',
1291
1292        # C preprocessor directives
1293        'CPPDIRECTIVE'
1294
1295    # The following are matched but never returned. commented out to
1296    # suppress PLY warning
1297        # newfile directive
1298    #    'NEWFILE',
1299
1300        # endfile directive
1301    #    'ENDFILE'
1302    )
1303
1304    # Regular expressions for token matching
1305    t_LPAREN           = r'\('
1306    t_RPAREN           = r'\)'
1307    t_LBRACKET         = r'\['
1308    t_RBRACKET         = r'\]'
1309    t_LBRACE           = r'\{'
1310    t_RBRACE           = r'\}'
1311    t_LESS             = r'\<'
1312    t_GREATER          = r'\>'
1313    t_EQUALS           = r'='
1314    t_COMMA            = r','
1315    t_SEMI             = r';'
1316    t_DOT              = r'\.'
1317    t_COLON            = r':'
1318    t_DBLCOLON         = r'::'
1319    t_ASTERISK         = r'\*'
1320
1321    # Identifiers and reserved words
1322    reserved_map = { }
1323    for r in reserved:
1324        reserved_map[r.lower()] = r
1325
1326    def t_ID(self, t):
1327        r'[A-Za-z_]\w*'
1328        t.type = self.reserved_map.get(t.value, 'ID')
1329        return t
1330
1331    # Integer literal
1332    def t_INTLIT(self, t):
1333        r'-?(0x[\da-fA-F]+)|\d+'
1334        try:
1335            t.value = int(t.value,0)
1336        except ValueError:
1337            error(t, 'Integer value "%s" too large' % t.value)
1338            t.value = 0
1339        return t
1340
1341    # String literal.  Note that these use only single quotes, and
1342    # can span multiple lines.
1343    def t_STRLIT(self, t):
1344        r"(?m)'([^'])+'"
1345        # strip off quotes
1346        t.value = t.value[1:-1]
1347        t.lexer.lineno += t.value.count('\n')
1348        return t
1349
1350
1351    # "Code literal"... like a string literal, but delimiters are
1352    # '{{' and '}}' so they get formatted nicely under emacs c-mode
1353    def t_CODELIT(self, t):
1354        r"(?m)\{\{([^\}]|}(?!\}))+\}\}"
1355        # strip off {{ & }}
1356        t.value = t.value[2:-2]
1357        t.lexer.lineno += t.value.count('\n')
1358        return t
1359
1360    def t_CPPDIRECTIVE(self, t):
1361        r'^\#[^\#].*\n'
1362        t.lexer.lineno += t.value.count('\n')
1363        return t
1364
1365    def t_NEWFILE(self, t):
1366        r'^\#\#newfile\s+"[^"]*"'
1367        self.fileNameStack.push((t.value[11:-1], t.lexer.lineno))
1368        t.lexer.lineno = 0
1369
1370    def t_ENDFILE(self, t):
1371        r'^\#\#endfile'
1372        (old_filename, t.lexer.lineno) = self.fileNameStack.pop()
1373
1374    #
1375    # The functions t_NEWLINE, t_ignore, and t_error are
1376    # special for the lex module.
1377    #
1378
1379    # Newlines
1380    def t_NEWLINE(self, t):
1381        r'\n+'
1382        t.lexer.lineno += t.value.count('\n')
1383
1384    # Comments
1385    def t_comment(self, t):
1386        r'//.*'
1387
1388    # Completely ignored characters
1389    t_ignore = ' \t\x0c'
1390
1391    # Error handler
1392    def t_error(self, t):
1393        error(t, "illegal character '%s'" % t.value[0])
1394        t.skip(1)
1395
1396    #####################################################################
1397    #
1398    #                                Parser
1399    #
1400    # Every function whose name starts with 'p_' defines a grammar
1401    # rule.  The rule is encoded in the function's doc string, while
1402    # the function body provides the action taken when the rule is
1403    # matched.  The argument to each function is a list of the values
1404    # of the rule's symbols: t[0] for the LHS, and t[1..n] for the
1405    # symbols on the RHS.  For tokens, the value is copied from the
1406    # t.value attribute provided by the lexer.  For non-terminals, the
1407    # value is assigned by the producing rule; i.e., the job of the
1408    # grammar rule function is to set the value for the non-terminal
1409    # on the LHS (by assigning to t[0]).
1410    #####################################################################
1411
1412    # The LHS of the first grammar rule is used as the start symbol
1413    # (in this case, 'specification').  Note that this rule enforces
1414    # that there will be exactly one namespace declaration, with 0 or
1415    # more global defs/decls before and after it.  The defs & decls
1416    # before the namespace decl will be outside the namespace; those
1417    # after will be inside.  The decoder function is always inside the
1418    # namespace.
1419    def p_specification(self, t):
1420        'specification : opt_defs_and_outputs name_decl opt_defs_and_outputs decode_block'
1421        global_code = t[1]
1422        isa_name = t[2]
1423        namespace = isa_name + "Inst"
1424        # wrap the decode block as a function definition
1425        t[4].wrap_decode_block('''
1426StaticInstPtr
1427%(isa_name)s::Decoder::decodeInst(%(isa_name)s::ExtMachInst machInst)
1428{
1429    using namespace %(namespace)s;
1430''' % vars(), '}')
1431        # both the latter output blocks and the decode block are in
1432        # the namespace
1433        namespace_code = t[3] + t[4]
1434        # pass it all back to the caller of yacc.parse()
1435        t[0] = (isa_name, namespace, global_code, namespace_code)
1436
1437    # ISA name declaration looks like "namespace <foo>;"
1438    def p_name_decl(self, t):
1439        'name_decl : NAMESPACE ID SEMI'
1440        t[0] = t[2]
1441
1442    # 'opt_defs_and_outputs' is a possibly empty sequence of
1443    # def and/or output statements.
1444    def p_opt_defs_and_outputs_0(self, t):
1445        'opt_defs_and_outputs : empty'
1446        t[0] = GenCode(self)
1447
1448    def p_opt_defs_and_outputs_1(self, t):
1449        'opt_defs_and_outputs : defs_and_outputs'
1450        t[0] = t[1]
1451
1452    def p_defs_and_outputs_0(self, t):
1453        'defs_and_outputs : def_or_output'
1454        t[0] = t[1]
1455
1456    def p_defs_and_outputs_1(self, t):
1457        'defs_and_outputs : defs_and_outputs def_or_output'
1458        t[0] = t[1] + t[2]
1459
1460    # The list of possible definition/output statements.
1461    def p_def_or_output(self, t):
1462        '''def_or_output : def_format
1463                         | def_bitfield
1464                         | def_bitfield_struct
1465                         | def_template
1466                         | def_operand_types
1467                         | def_operands
1468                         | output_header
1469                         | output_decoder
1470                         | output_exec
1471                         | global_let'''
1472        t[0] = t[1]
1473
1474    # Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied
1475    # directly to the appropriate output section.
1476
1477    # Massage output block by substituting in template definitions and
1478    # bit operators.  We handle '%'s embedded in the string that don't
1479    # indicate template substitutions (or CPU-specific symbols, which
1480    # get handled in GenCode) by doubling them first so that the
1481    # format operation will reduce them back to single '%'s.
1482    def process_output(self, s):
1483        s = self.protectNonSubstPercents(s)
1484        # protects cpu-specific symbols too
1485        s = self.protectCpuSymbols(s)
1486        return substBitOps(s % self.templateMap)
1487
1488    def p_output_header(self, t):
1489        'output_header : OUTPUT HEADER CODELIT SEMI'
1490        t[0] = GenCode(self, header_output = self.process_output(t[3]))
1491
1492    def p_output_decoder(self, t):
1493        'output_decoder : OUTPUT DECODER CODELIT SEMI'
1494        t[0] = GenCode(self, decoder_output = self.process_output(t[3]))
1495
1496    def p_output_exec(self, t):
1497        'output_exec : OUTPUT EXEC CODELIT SEMI'
1498        t[0] = GenCode(self, exec_output = self.process_output(t[3]))
1499
1500    # global let blocks 'let {{...}}' (Python code blocks) are
1501    # executed directly when seen.  Note that these execute in a
1502    # special variable context 'exportContext' to prevent the code
1503    # from polluting this script's namespace.
1504    def p_global_let(self, t):
1505        'global_let : LET CODELIT SEMI'
1506        self.updateExportContext()
1507        self.exportContext["header_output"] = ''
1508        self.exportContext["decoder_output"] = ''
1509        self.exportContext["exec_output"] = ''
1510        self.exportContext["decode_block"] = ''
1511        try:
1512            exec fixPythonIndentation(t[2]) in self.exportContext
1513        except Exception, exc:
1514            if debug:
1515                raise
1516            error(t, 'error: %s in global let block "%s".' % (exc, t[2]))
1517        t[0] = GenCode(self,
1518                       header_output=self.exportContext["header_output"],
1519                       decoder_output=self.exportContext["decoder_output"],
1520                       exec_output=self.exportContext["exec_output"],
1521                       decode_block=self.exportContext["decode_block"])
1522
1523    # Define the mapping from operand type extensions to C++ types and
1524    # bit widths (stored in operandTypeMap).
1525    def p_def_operand_types(self, t):
1526        'def_operand_types : DEF OPERAND_TYPES CODELIT SEMI'
1527        try:
1528            self.operandTypeMap = eval('{' + t[3] + '}')
1529        except Exception, exc:
1530            if debug:
1531                raise
1532            error(t,
1533                  'error: %s in def operand_types block "%s".' % (exc, t[3]))
1534        t[0] = GenCode(self) # contributes nothing to the output C++ file
1535
1536    # Define the mapping from operand names to operand classes and
1537    # other traits.  Stored in operandNameMap.
1538    def p_def_operands(self, t):
1539        'def_operands : DEF OPERANDS CODELIT SEMI'
1540        if not hasattr(self, 'operandTypeMap'):
1541            error(t, 'error: operand types must be defined before operands')
1542        try:
1543            user_dict = eval('{' + t[3] + '}', self.exportContext)
1544        except Exception, exc:
1545            if debug:
1546                raise
1547            error(t, 'error: %s in def operands block "%s".' % (exc, t[3]))
1548        self.buildOperandNameMap(user_dict, t.lexer.lineno)
1549        t[0] = GenCode(self) # contributes nothing to the output C++ file
1550
1551    # A bitfield definition looks like:
1552    # 'def [signed] bitfield <ID> [<first>:<last>]'
1553    # This generates a preprocessor macro in the output file.
1554    def p_def_bitfield_0(self, t):
1555        'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI'
1556        expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8])
1557        if (t[2] == 'signed'):
1558            expr = 'sext<%d>(%s)' % (t[6] - t[8] + 1, expr)
1559        hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1560        t[0] = GenCode(self, header_output=hash_define)
1561
1562    # alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]'
1563    def p_def_bitfield_1(self, t):
1564        'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI'
1565        expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6])
1566        if (t[2] == 'signed'):
1567            expr = 'sext<%d>(%s)' % (1, expr)
1568        hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1569        t[0] = GenCode(self, header_output=hash_define)
1570
1571    # alternate form for structure member: 'def bitfield <ID> <ID>'
1572    def p_def_bitfield_struct(self, t):
1573        'def_bitfield_struct : DEF opt_signed BITFIELD ID id_with_dot SEMI'
1574        if (t[2] != ''):
1575            error(t, 'error: structure bitfields are always unsigned.')
1576        expr = 'machInst.%s' % t[5]
1577        hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1578        t[0] = GenCode(self, header_output=hash_define)
1579
1580    def p_id_with_dot_0(self, t):
1581        'id_with_dot : ID'
1582        t[0] = t[1]
1583
1584    def p_id_with_dot_1(self, t):
1585        'id_with_dot : ID DOT id_with_dot'
1586        t[0] = t[1] + t[2] + t[3]
1587
1588    def p_opt_signed_0(self, t):
1589        'opt_signed : SIGNED'
1590        t[0] = t[1]
1591
1592    def p_opt_signed_1(self, t):
1593        'opt_signed : empty'
1594        t[0] = ''
1595
1596    def p_def_template(self, t):
1597        'def_template : DEF TEMPLATE ID CODELIT SEMI'
1598        self.templateMap[t[3]] = Template(self, t[4])
1599        t[0] = GenCode(self)
1600
1601    # An instruction format definition looks like
1602    # "def format <fmt>(<params>) {{...}};"
1603    def p_def_format(self, t):
1604        'def_format : DEF FORMAT ID LPAREN param_list RPAREN CODELIT SEMI'
1605        (id, params, code) = (t[3], t[5], t[7])
1606        self.defFormat(id, params, code, t.lexer.lineno)
1607        t[0] = GenCode(self)
1608
1609    # The formal parameter list for an instruction format is a
1610    # possibly empty list of comma-separated parameters.  Positional
1611    # (standard, non-keyword) parameters must come first, followed by
1612    # keyword parameters, followed by a '*foo' parameter that gets
1613    # excess positional arguments (as in Python).  Each of these three
1614    # parameter categories is optional.
1615    #
1616    # Note that we do not support the '**foo' parameter for collecting
1617    # otherwise undefined keyword args.  Otherwise the parameter list
1618    # is (I believe) identical to what is supported in Python.
1619    #
1620    # The param list generates a tuple, where the first element is a
1621    # list of the positional params and the second element is a dict
1622    # containing the keyword params.
1623    def p_param_list_0(self, t):
1624        'param_list : positional_param_list COMMA nonpositional_param_list'
1625        t[0] = t[1] + t[3]
1626
1627    def p_param_list_1(self, t):
1628        '''param_list : positional_param_list
1629                      | nonpositional_param_list'''
1630        t[0] = t[1]
1631
1632    def p_positional_param_list_0(self, t):
1633        'positional_param_list : empty'
1634        t[0] = []
1635
1636    def p_positional_param_list_1(self, t):
1637        'positional_param_list : ID'
1638        t[0] = [t[1]]
1639
1640    def p_positional_param_list_2(self, t):
1641        'positional_param_list : positional_param_list COMMA ID'
1642        t[0] = t[1] + [t[3]]
1643
1644    def p_nonpositional_param_list_0(self, t):
1645        'nonpositional_param_list : keyword_param_list COMMA excess_args_param'
1646        t[0] = t[1] + t[3]
1647
1648    def p_nonpositional_param_list_1(self, t):
1649        '''nonpositional_param_list : keyword_param_list
1650                                    | excess_args_param'''
1651        t[0] = t[1]
1652
1653    def p_keyword_param_list_0(self, t):
1654        'keyword_param_list : keyword_param'
1655        t[0] = [t[1]]
1656
1657    def p_keyword_param_list_1(self, t):
1658        'keyword_param_list : keyword_param_list COMMA keyword_param'
1659        t[0] = t[1] + [t[3]]
1660
1661    def p_keyword_param(self, t):
1662        'keyword_param : ID EQUALS expr'
1663        t[0] = t[1] + ' = ' + t[3].__repr__()
1664
1665    def p_excess_args_param(self, t):
1666        'excess_args_param : ASTERISK ID'
1667        # Just concatenate them: '*ID'.  Wrap in list to be consistent
1668        # with positional_param_list and keyword_param_list.
1669        t[0] = [t[1] + t[2]]
1670
1671    # End of format definition-related rules.
1672    ##############
1673
1674    #
1675    # A decode block looks like:
1676    #       decode <field1> [, <field2>]* [default <inst>] { ... }
1677    #
1678    def p_decode_block(self, t):
1679        'decode_block : DECODE ID opt_default LBRACE decode_stmt_list RBRACE'
1680        default_defaults = self.defaultStack.pop()
1681        codeObj = t[5]
1682        # use the "default defaults" only if there was no explicit
1683        # default statement in decode_stmt_list
1684        if not codeObj.has_decode_default:
1685            codeObj += default_defaults
1686        codeObj.wrap_decode_block('switch (%s) {\n' % t[2], '}\n')
1687        t[0] = codeObj
1688
1689    # The opt_default statement serves only to push the "default
1690    # defaults" onto defaultStack.  This value will be used by nested
1691    # decode blocks, and used and popped off when the current
1692    # decode_block is processed (in p_decode_block() above).
1693    def p_opt_default_0(self, t):
1694        'opt_default : empty'
1695        # no default specified: reuse the one currently at the top of
1696        # the stack
1697        self.defaultStack.push(self.defaultStack.top())
1698        # no meaningful value returned
1699        t[0] = None
1700
1701    def p_opt_default_1(self, t):
1702        'opt_default : DEFAULT inst'
1703        # push the new default
1704        codeObj = t[2]
1705        codeObj.wrap_decode_block('\ndefault:\n', 'break;\n')
1706        self.defaultStack.push(codeObj)
1707        # no meaningful value returned
1708        t[0] = None
1709
1710    def p_decode_stmt_list_0(self, t):
1711        'decode_stmt_list : decode_stmt'
1712        t[0] = t[1]
1713
1714    def p_decode_stmt_list_1(self, t):
1715        'decode_stmt_list : decode_stmt decode_stmt_list'
1716        if (t[1].has_decode_default and t[2].has_decode_default):
1717            error(t, 'Two default cases in decode block')
1718        t[0] = t[1] + t[2]
1719
1720    #
1721    # Decode statement rules
1722    #
1723    # There are four types of statements allowed in a decode block:
1724    # 1. Format blocks 'format <foo> { ... }'
1725    # 2. Nested decode blocks
1726    # 3. Instruction definitions.
1727    # 4. C preprocessor directives.
1728
1729
1730    # Preprocessor directives found in a decode statement list are
1731    # passed through to the output, replicated to all of the output
1732    # code streams.  This works well for ifdefs, so we can ifdef out
1733    # both the declarations and the decode cases generated by an
1734    # instruction definition.  Handling them as part of the grammar
1735    # makes it easy to keep them in the right place with respect to
1736    # the code generated by the other statements.
1737    def p_decode_stmt_cpp(self, t):
1738        'decode_stmt : CPPDIRECTIVE'
1739        t[0] = GenCode(self, t[1], t[1], t[1], t[1])
1740
1741    # A format block 'format <foo> { ... }' sets the default
1742    # instruction format used to handle instruction definitions inside
1743    # the block.  This format can be overridden by using an explicit
1744    # format on the instruction definition or with a nested format
1745    # block.
1746    def p_decode_stmt_format(self, t):
1747        'decode_stmt : FORMAT push_format_id LBRACE decode_stmt_list RBRACE'
1748        # The format will be pushed on the stack when 'push_format_id'
1749        # is processed (see below).  Once the parser has recognized
1750        # the full production (though the right brace), we're done
1751        # with the format, so now we can pop it.
1752        self.formatStack.pop()
1753        t[0] = t[4]
1754
1755    # This rule exists so we can set the current format (& push the
1756    # stack) when we recognize the format name part of the format
1757    # block.
1758    def p_push_format_id(self, t):
1759        'push_format_id : ID'
1760        try:
1761            self.formatStack.push(self.formatMap[t[1]])
1762            t[0] = ('', '// format %s' % t[1])
1763        except KeyError:
1764            error(t, 'instruction format "%s" not defined.' % t[1])
1765
1766    # Nested decode block: if the value of the current field matches
1767    # the specified constant, do a nested decode on some other field.
1768    def p_decode_stmt_decode(self, t):
1769        'decode_stmt : case_label COLON decode_block'
1770        label = t[1]
1771        codeObj = t[3]
1772        # just wrap the decoding code from the block as a case in the
1773        # outer switch statement.
1774        codeObj.wrap_decode_block('\n%s:\n' % label)
1775        codeObj.has_decode_default = (label == 'default')
1776        t[0] = codeObj
1777
1778    # Instruction definition (finally!).
1779    def p_decode_stmt_inst(self, t):
1780        'decode_stmt : case_label COLON inst SEMI'
1781        label = t[1]
1782        codeObj = t[3]
1783        codeObj.wrap_decode_block('\n%s:' % label, 'break;\n')
1784        codeObj.has_decode_default = (label == 'default')
1785        t[0] = codeObj
1786
1787    # The case label is either a list of one or more constants or
1788    # 'default'
1789    def p_case_label_0(self, t):
1790        'case_label : intlit_list'
1791        def make_case(intlit):
1792            if intlit >= 2**32:
1793                return 'case ULL(%#x)' % intlit
1794            else:
1795                return 'case %#x' % intlit
1796        t[0] = ': '.join(map(make_case, t[1]))
1797
1798    def p_case_label_1(self, t):
1799        'case_label : DEFAULT'
1800        t[0] = 'default'
1801
1802    #
1803    # The constant list for a decode case label must be non-empty, but
1804    # may have one or more comma-separated integer literals in it.
1805    #
1806    def p_intlit_list_0(self, t):
1807        'intlit_list : INTLIT'
1808        t[0] = [t[1]]
1809
1810    def p_intlit_list_1(self, t):
1811        'intlit_list : intlit_list COMMA INTLIT'
1812        t[0] = t[1]
1813        t[0].append(t[3])
1814
1815    # Define an instruction using the current instruction format
1816    # (specified by an enclosing format block).
1817    # "<mnemonic>(<args>)"
1818    def p_inst_0(self, t):
1819        'inst : ID LPAREN arg_list RPAREN'
1820        # Pass the ID and arg list to the current format class to deal with.
1821        currentFormat = self.formatStack.top()
1822        codeObj = currentFormat.defineInst(self, t[1], t[3], t.lexer.lineno)
1823        args = ','.join(map(str, t[3]))
1824        args = re.sub('(?m)^', '//', args)
1825        args = re.sub('^//', '', args)
1826        comment = '\n// %s::%s(%s)\n' % (currentFormat.id, t[1], args)
1827        codeObj.prepend_all(comment)
1828        t[0] = codeObj
1829
1830    # Define an instruction using an explicitly specified format:
1831    # "<fmt>::<mnemonic>(<args>)"
1832    def p_inst_1(self, t):
1833        'inst : ID DBLCOLON ID LPAREN arg_list RPAREN'
1834        try:
1835            format = self.formatMap[t[1]]
1836        except KeyError:
1837            error(t, 'instruction format "%s" not defined.' % t[1])
1838
1839        codeObj = format.defineInst(self, t[3], t[5], t.lexer.lineno)
1840        comment = '\n// %s::%s(%s)\n' % (t[1], t[3], t[5])
1841        codeObj.prepend_all(comment)
1842        t[0] = codeObj
1843
1844    # The arg list generates a tuple, where the first element is a
1845    # list of the positional args and the second element is a dict
1846    # containing the keyword args.
1847    def p_arg_list_0(self, t):
1848        'arg_list : positional_arg_list COMMA keyword_arg_list'
1849        t[0] = ( t[1], t[3] )
1850
1851    def p_arg_list_1(self, t):
1852        'arg_list : positional_arg_list'
1853        t[0] = ( t[1], {} )
1854
1855    def p_arg_list_2(self, t):
1856        'arg_list : keyword_arg_list'
1857        t[0] = ( [], t[1] )
1858
1859    def p_positional_arg_list_0(self, t):
1860        'positional_arg_list : empty'
1861        t[0] = []
1862
1863    def p_positional_arg_list_1(self, t):
1864        'positional_arg_list : expr'
1865        t[0] = [t[1]]
1866
1867    def p_positional_arg_list_2(self, t):
1868        'positional_arg_list : positional_arg_list COMMA expr'
1869        t[0] = t[1] + [t[3]]
1870
1871    def p_keyword_arg_list_0(self, t):
1872        'keyword_arg_list : keyword_arg'
1873        t[0] = t[1]
1874
1875    def p_keyword_arg_list_1(self, t):
1876        'keyword_arg_list : keyword_arg_list COMMA keyword_arg'
1877        t[0] = t[1]
1878        t[0].update(t[3])
1879
1880    def p_keyword_arg(self, t):
1881        'keyword_arg : ID EQUALS expr'
1882        t[0] = { t[1] : t[3] }
1883
1884    #
1885    # Basic expressions.  These constitute the argument values of
1886    # "function calls" (i.e. instruction definitions in the decode
1887    # block) and default values for formal parameters of format
1888    # functions.
1889    #
1890    # Right now, these are either strings, integers, or (recursively)
1891    # lists of exprs (using Python square-bracket list syntax).  Note
1892    # that bare identifiers are trated as string constants here (since
1893    # there isn't really a variable namespace to refer to).
1894    #
1895    def p_expr_0(self, t):
1896        '''expr : ID
1897                | INTLIT
1898                | STRLIT
1899                | CODELIT'''
1900        t[0] = t[1]
1901
1902    def p_expr_1(self, t):
1903        '''expr : LBRACKET list_expr RBRACKET'''
1904        t[0] = t[2]
1905
1906    def p_list_expr_0(self, t):
1907        'list_expr : expr'
1908        t[0] = [t[1]]
1909
1910    def p_list_expr_1(self, t):
1911        'list_expr : list_expr COMMA expr'
1912        t[0] = t[1] + [t[3]]
1913
1914    def p_list_expr_2(self, t):
1915        'list_expr : empty'
1916        t[0] = []
1917
1918    #
1919    # Empty production... use in other rules for readability.
1920    #
1921    def p_empty(self, t):
1922        'empty :'
1923        pass
1924
1925    # Parse error handler.  Note that the argument here is the
1926    # offending *token*, not a grammar symbol (hence the need to use
1927    # t.value)
1928    def p_error(self, t):
1929        if t:
1930            error(t, "syntax error at '%s'" % t.value)
1931        else:
1932            error("unknown syntax error")
1933
1934    # END OF GRAMMAR RULES
1935
1936    def updateExportContext(self):
1937
1938        # create a continuation that allows us to grab the current parser
1939        def wrapInstObjParams(*args):
1940            return InstObjParams(self, *args)
1941        self.exportContext['InstObjParams'] = wrapInstObjParams
1942        self.exportContext.update(self.templateMap)
1943
1944    def defFormat(self, id, params, code, lineno):
1945        '''Define a new format'''
1946
1947        # make sure we haven't already defined this one
1948        if id in self.formatMap:
1949            error(lineno, 'format %s redefined.' % id)
1950
1951        # create new object and store in global map
1952        self.formatMap[id] = Format(id, params, code)
1953
1954    def expandCpuSymbolsToDict(self, template):
1955        '''Expand template with CPU-specific references into a
1956        dictionary with an entry for each CPU model name.  The entry
1957        key is the model name and the corresponding value is the
1958        template with the CPU-specific refs substituted for that
1959        model.'''
1960
1961        # Protect '%'s that don't go with CPU-specific terms
1962        t = re.sub(r'%(?!\(CPU_)', '%%', template)
1963        result = {}
1964        for cpu in self.cpuModels:
1965            result[cpu.name] = t % cpu.strings
1966        return result
1967
1968    def expandCpuSymbolsToString(self, template):
1969        '''*If* the template has CPU-specific references, return a
1970        single string containing a copy of the template for each CPU
1971        model with the corresponding values substituted in.  If the
1972        template has no CPU-specific references, it is returned
1973        unmodified.'''
1974
1975        if template.find('%(CPU_') != -1:
1976            return reduce(lambda x,y: x+y,
1977                          self.expandCpuSymbolsToDict(template).values())
1978        else:
1979            return template
1980
1981    def protectCpuSymbols(self, template):
1982        '''Protect CPU-specific references by doubling the
1983        corresponding '%'s (in preparation for substituting a different
1984        set of references into the template).'''
1985
1986        return re.sub(r'%(?=\(CPU_)', '%%', template)
1987
1988    def protectNonSubstPercents(self, s):
1989        '''Protect any non-dict-substitution '%'s in a format string
1990        (i.e. those not followed by '(')'''
1991
1992        return re.sub(r'%(?!\()', '%%', s)
1993
1994    def buildOperandNameMap(self, user_dict, lineno):
1995        operand_name = {}
1996        for op_name, val in user_dict.iteritems():
1997
1998            # Check if extra attributes have been specified.
1999            if len(val) > 9:
2000                error(lineno, 'error: too many attributes for operand "%s"' %
2001                      base_cls_name)
2002
2003            # Pad val with None in case optional args are missing
2004            val += (None, None, None, None)
2005            base_cls_name, dflt_ext, reg_spec, flags, sort_pri, \
2006            read_code, write_code, read_predicate, write_predicate = val[:9]
2007
2008            # Canonical flag structure is a triple of lists, where each list
2009            # indicates the set of flags implied by this operand always, when
2010            # used as a source, and when used as a dest, respectively.
2011            # For simplicity this can be initialized using a variety of fairly
2012            # obvious shortcuts; we convert these to canonical form here.
2013            if not flags:
2014                # no flags specified (e.g., 'None')
2015                flags = ( [], [], [] )
2016            elif isinstance(flags, str):
2017                # a single flag: assumed to be unconditional
2018                flags = ( [ flags ], [], [] )
2019            elif isinstance(flags, list):
2020                # a list of flags: also assumed to be unconditional
2021                flags = ( flags, [], [] )
2022            elif isinstance(flags, tuple):
2023                # it's a tuple: it should be a triple,
2024                # but each item could be a single string or a list
2025                (uncond_flags, src_flags, dest_flags) = flags
2026                flags = (makeList(uncond_flags),
2027                         makeList(src_flags), makeList(dest_flags))
2028
2029            # Accumulate attributes of new operand class in tmp_dict
2030            tmp_dict = {}
2031            attrList = ['reg_spec', 'flags', 'sort_pri',
2032                        'read_code', 'write_code',
2033                        'read_predicate', 'write_predicate']
2034            if dflt_ext:
2035                dflt_ctype = self.operandTypeMap[dflt_ext]
2036                attrList.extend(['dflt_ctype', 'dflt_ext'])
2037            for attr in attrList:
2038                tmp_dict[attr] = eval(attr)
2039            tmp_dict['base_name'] = op_name
2040
2041            # New class name will be e.g. "IntReg_Ra"
2042            cls_name = base_cls_name + '_' + op_name
2043            # Evaluate string arg to get class object.  Note that the
2044            # actual base class for "IntReg" is "IntRegOperand", i.e. we
2045            # have to append "Operand".
2046            try:
2047                base_cls = eval(base_cls_name + 'Operand')
2048            except NameError:
2049                error(lineno,
2050                      'error: unknown operand base class "%s"' % base_cls_name)
2051            # The following statement creates a new class called
2052            # <cls_name> as a subclass of <base_cls> with the attributes
2053            # in tmp_dict, just as if we evaluated a class declaration.
2054            operand_name[op_name] = type(cls_name, (base_cls,), tmp_dict)
2055
2056        self.operandNameMap = operand_name
2057
2058        # Define operand variables.
2059        operands = user_dict.keys()
2060        extensions = self.operandTypeMap.keys()
2061
2062        operandsREString = r'''
2063        (?<!\w)      # neg. lookbehind assertion: prevent partial matches
2064        ((%s)(?:_(%s))?)   # match: operand with optional '_' then suffix
2065        (?!\w)       # neg. lookahead assertion: prevent partial matches
2066        ''' % (string.join(operands, '|'), string.join(extensions, '|'))
2067
2068        self.operandsRE = re.compile(operandsREString, re.MULTILINE|re.VERBOSE)
2069
2070        # Same as operandsREString, but extension is mandatory, and only two
2071        # groups are returned (base and ext, not full name as above).
2072        # Used for subtituting '_' for '.' to make C++ identifiers.
2073        operandsWithExtREString = r'(?<!\w)(%s)_(%s)(?!\w)' \
2074            % (string.join(operands, '|'), string.join(extensions, '|'))
2075
2076        self.operandsWithExtRE = \
2077            re.compile(operandsWithExtREString, re.MULTILINE)
2078
2079    def substMungedOpNames(self, code):
2080        '''Munge operand names in code string to make legal C++
2081        variable names.  This means getting rid of the type extension
2082        if any.  Will match base_name attribute of Operand object.)'''
2083        return self.operandsWithExtRE.sub(r'\1', code)
2084
2085    def mungeSnippet(self, s):
2086        '''Fix up code snippets for final substitution in templates.'''
2087        if isinstance(s, str):
2088            return self.substMungedOpNames(substBitOps(s))
2089        else:
2090            return s
2091
2092    def update(self, file, contents):
2093        '''Update the output file.  If the contents are unchanged,
2094           the scons hash feature will avoid recompilation.'''
2095        file = os.path.join(self.output_dir, file)
2096        f = open(file, 'w')
2097        f.write(contents)
2098        f.close()
2099
2100    # This regular expression matches '##include' directives
2101    includeRE = re.compile(r'^\s*##include\s+"(?P<filename>[^"]*)".*$',
2102                           re.MULTILINE)
2103
2104    def replace_include(self, matchobj, dirname):
2105        """Function to replace a matched '##include' directive with the
2106        contents of the specified file (with nested ##includes
2107        replaced recursively).  'matchobj' is an re match object
2108        (from a match of includeRE) and 'dirname' is the directory
2109        relative to which the file path should be resolved."""
2110
2111        fname = matchobj.group('filename')
2112        full_fname = os.path.normpath(os.path.join(dirname, fname))
2113        contents = '##newfile "%s"\n%s\n##endfile\n' % \
2114                   (full_fname, self.read_and_flatten(full_fname))
2115        return contents
2116
2117    def read_and_flatten(self, filename):
2118        """Read a file and recursively flatten nested '##include' files."""
2119
2120        current_dir = os.path.dirname(filename)
2121        try:
2122            contents = open(filename).read()
2123        except IOError:
2124            error('Error including file "%s"' % filename)
2125
2126        self.fileNameStack.push((filename, 0))
2127
2128        # Find any includes and include them
2129        def replace(matchobj):
2130            return self.replace_include(matchobj, current_dir)
2131        contents = self.includeRE.sub(replace, contents)
2132
2133        self.fileNameStack.pop()
2134        return contents
2135
2136    def _parse_isa_desc(self, isa_desc_file):
2137        '''Read in and parse the ISA description.'''
2138
2139        # Read file and (recursively) all included files into a string.
2140        # PLY requires that the input be in a single string so we have to
2141        # do this up front.
2142        isa_desc = self.read_and_flatten(isa_desc_file)
2143
2144        # Initialize filename stack with outer file.
2145        self.fileNameStack.push((isa_desc_file, 0))
2146
2147        # Parse it.
2148        (isa_name, namespace, global_code, namespace_code) = \
2149                   self.parse_string(isa_desc)
2150
2151        # grab the last three path components of isa_desc_file to put in
2152        # the output
2153        filename = '/'.join(isa_desc_file.split('/')[-3:])
2154
2155        # generate decoder.hh
2156        includes = '#include "base/bitfield.hh" // for bitfield support'
2157        global_output = global_code.header_output
2158        namespace_output = namespace_code.header_output
2159        decode_function = ''
2160        self.update('decoder.hh', file_template % vars())
2161
2162        # generate decoder.cc
2163        includes = '#include "decoder.hh"'
2164        global_output = global_code.decoder_output
2165        namespace_output = namespace_code.decoder_output
2166        # namespace_output += namespace_code.decode_block
2167        decode_function = namespace_code.decode_block
2168        self.update('decoder.cc', file_template % vars())
2169
2170        # generate per-cpu exec files
2171        for cpu in self.cpuModels:
2172            includes = '#include "decoder.hh"\n'
2173            includes += cpu.includes
2174            global_output = global_code.exec_output[cpu.name]
2175            namespace_output = namespace_code.exec_output[cpu.name]
2176            decode_function = ''
2177            self.update(cpu.filename, file_template % vars())
2178
2179        # The variable names here are hacky, but this will creat local
2180        # variables which will be referenced in vars() which have the
2181        # value of the globals.
2182        MaxInstSrcRegs = self.maxInstSrcRegs
2183        MaxInstDestRegs = self.maxInstDestRegs
2184        MaxMiscDestRegs = self.maxMiscDestRegs
2185        # max_inst_regs.hh
2186        self.update('max_inst_regs.hh',
2187                    max_inst_regs_template % vars())
2188
2189    def parse_isa_desc(self, *args, **kwargs):
2190        try:
2191            self._parse_isa_desc(*args, **kwargs)
2192        except ISAParserError, e:
2193            e.exit(self.fileNameStack)
2194
2195# Called as script: get args from command line.
2196# Args are: <path to cpu_models.py> <isa desc file> <output dir> <cpu models>
2197if __name__ == '__main__':
2198    execfile(sys.argv[1])  # read in CpuModel definitions
2199    cpu_models = [CpuModel.dict[cpu] for cpu in sys.argv[4:]]
2200    ISAParser(sys.argv[3], cpu_models).parse_isa_desc(sys.argv[2])
2201