isa_parser.py revision 8176
1# Copyright (c) 2003-2005 The Regents of The University of Michigan
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met: redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer;
8# redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution;
11# neither the name of the copyright holders nor the names of its
12# contributors may be used to endorse or promote products derived from
13# this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26#
27# Authors: Steve Reinhardt
28
29import os
30import sys
31import re
32import string
33import inspect, traceback
34# get type names
35from types import *
36
37from m5.util.grammar import Grammar
38
39debug=False
40
41###################
42# Utility functions
43
44#
45# Indent every line in string 's' by two spaces
46# (except preprocessor directives).
47# Used to make nested code blocks look pretty.
48#
49def indent(s):
50    return re.sub(r'(?m)^(?!#)', '  ', s)
51
52#
53# Munge a somewhat arbitrarily formatted piece of Python code
54# (e.g. from a format 'let' block) into something whose indentation
55# will get by the Python parser.
56#
57# The two keys here are that Python will give a syntax error if
58# there's any whitespace at the beginning of the first line, and that
59# all lines at the same lexical nesting level must have identical
60# indentation.  Unfortunately the way code literals work, an entire
61# let block tends to have some initial indentation.  Rather than
62# trying to figure out what that is and strip it off, we prepend 'if
63# 1:' to make the let code the nested block inside the if (and have
64# the parser automatically deal with the indentation for us).
65#
66# We don't want to do this if (1) the code block is empty or (2) the
67# first line of the block doesn't have any whitespace at the front.
68
69def fixPythonIndentation(s):
70    # get rid of blank lines first
71    s = re.sub(r'(?m)^\s*\n', '', s);
72    if (s != '' and re.match(r'[ \t]', s[0])):
73        s = 'if 1:\n' + s
74    return s
75
76class ISAParserError(Exception):
77    """Error handler for parser errors"""
78    def __init__(self, first, second=None):
79        if second is None:
80            self.lineno = 0
81            self.string = first
82        else:
83            if hasattr(first, 'lexer'):
84                first = first.lexer.lineno
85            self.lineno = first
86            self.string = second
87
88    def display(self, filename_stack, print_traceback=debug):
89        # Output formatted to work under Emacs compile-mode.  Optional
90        # 'print_traceback' arg, if set to True, prints a Python stack
91        # backtrace too (can be handy when trying to debug the parser
92        # itself).
93
94        spaces = ""
95        for (filename, line) in filename_stack[:-1]:
96            print "%sIn file included from %s:" % (spaces, filename)
97            spaces += "  "
98
99        # Print a Python stack backtrace if requested.
100        if print_traceback or not self.lineno:
101            traceback.print_exc()
102
103        line_str = "%s:" % (filename_stack[-1][0], )
104        if self.lineno:
105            line_str += "%d:" % (self.lineno, )
106
107        return "%s%s %s" % (spaces, line_str, self.string)
108
109    def exit(self, filename_stack, print_traceback=debug):
110        # Just call exit.
111
112        sys.exit(self.display(filename_stack, print_traceback))
113
114def error(*args):
115    raise ISAParserError(*args)
116
117####################
118# Template objects.
119#
120# Template objects are format strings that allow substitution from
121# the attribute spaces of other objects (e.g. InstObjParams instances).
122
123labelRE = re.compile(r'(?<!%)%\(([^\)]+)\)[sd]')
124
125class Template(object):
126    def __init__(self, parser, t):
127        self.parser = parser
128        self.template = t
129
130    def subst(self, d):
131        myDict = None
132
133        # Protect non-Python-dict substitutions (e.g. if there's a printf
134        # in the templated C++ code)
135        template = self.parser.protectNonSubstPercents(self.template)
136        # CPU-model-specific substitutions are handled later (in GenCode).
137        template = self.parser.protectCpuSymbols(template)
138
139        # Build a dict ('myDict') to use for the template substitution.
140        # Start with the template namespace.  Make a copy since we're
141        # going to modify it.
142        myDict = self.parser.templateMap.copy()
143
144        if isinstance(d, InstObjParams):
145            # If we're dealing with an InstObjParams object, we need
146            # to be a little more sophisticated.  The instruction-wide
147            # parameters are already formed, but the parameters which
148            # are only function wide still need to be generated.
149            compositeCode = ''
150
151            myDict.update(d.__dict__)
152            # The "operands" and "snippets" attributes of the InstObjParams
153            # objects are for internal use and not substitution.
154            del myDict['operands']
155            del myDict['snippets']
156
157            snippetLabels = [l for l in labelRE.findall(template)
158                             if d.snippets.has_key(l)]
159
160            snippets = dict([(s, self.parser.mungeSnippet(d.snippets[s]))
161                             for s in snippetLabels])
162
163            myDict.update(snippets)
164
165            compositeCode = ' '.join(map(str, snippets.values()))
166
167            # Add in template itself in case it references any
168            # operands explicitly (like Mem)
169            compositeCode += ' ' + template
170
171            operands = SubOperandList(self.parser, compositeCode, d.operands)
172
173            myDict['op_decl'] = operands.concatAttrStrings('op_decl')
174            if operands.readPC or operands.setPC:
175                myDict['op_decl'] += 'TheISA::PCState __parserAutoPCState;\n'
176
177            is_src = lambda op: op.is_src
178            is_dest = lambda op: op.is_dest
179
180            myDict['op_src_decl'] = \
181                      operands.concatSomeAttrStrings(is_src, 'op_src_decl')
182            myDict['op_dest_decl'] = \
183                      operands.concatSomeAttrStrings(is_dest, 'op_dest_decl')
184            if operands.readPC:
185                myDict['op_src_decl'] += \
186                    'TheISA::PCState __parserAutoPCState;\n'
187            if operands.setPC:
188                myDict['op_dest_decl'] += \
189                    'TheISA::PCState __parserAutoPCState;\n'
190
191            myDict['op_rd'] = operands.concatAttrStrings('op_rd')
192            if operands.readPC:
193                myDict['op_rd'] = '__parserAutoPCState = xc->pcState();\n' + \
194                                  myDict['op_rd']
195
196            # Compose the op_wb string. If we're going to write back the
197            # PC state because we changed some of its elements, we'll need to
198            # do that as early as possible. That allows later uncoordinated
199            # modifications to the PC to layer appropriately.
200            reordered = list(operands.items)
201            reordered.reverse()
202            op_wb_str = ''
203            pcWbStr = 'xc->pcState(__parserAutoPCState);\n'
204            for op_desc in reordered:
205                if op_desc.isPCPart() and op_desc.is_dest:
206                    op_wb_str = op_desc.op_wb + pcWbStr + op_wb_str
207                    pcWbStr = ''
208                else:
209                    op_wb_str = op_desc.op_wb + op_wb_str
210            myDict['op_wb'] = op_wb_str
211
212            if d.operands.memOperand:
213                myDict['mem_acc_size'] = d.operands.memOperand.mem_acc_size
214                myDict['mem_acc_type'] = d.operands.memOperand.mem_acc_type
215
216        elif isinstance(d, dict):
217            # if the argument is a dictionary, we just use it.
218            myDict.update(d)
219        elif hasattr(d, '__dict__'):
220            # if the argument is an object, we use its attribute map.
221            myDict.update(d.__dict__)
222        else:
223            raise TypeError, "Template.subst() arg must be or have dictionary"
224        return template % myDict
225
226    # Convert to string.  This handles the case when a template with a
227    # CPU-specific term gets interpolated into another template or into
228    # an output block.
229    def __str__(self):
230        return self.parser.expandCpuSymbolsToString(self.template)
231
232################
233# Format object.
234#
235# A format object encapsulates an instruction format.  It must provide
236# a defineInst() method that generates the code for an instruction
237# definition.
238
239class Format(object):
240    def __init__(self, id, params, code):
241        self.id = id
242        self.params = params
243        label = 'def format ' + id
244        self.user_code = compile(fixPythonIndentation(code), label, 'exec')
245        param_list = string.join(params, ", ")
246        f = '''def defInst(_code, _context, %s):
247                my_locals = vars().copy()
248                exec _code in _context, my_locals
249                return my_locals\n''' % param_list
250        c = compile(f, label + ' wrapper', 'exec')
251        exec c
252        self.func = defInst
253
254    def defineInst(self, parser, name, args, lineno):
255        parser.updateExportContext()
256        context = parser.exportContext.copy()
257        if len(name):
258            Name = name[0].upper()
259            if len(name) > 1:
260                Name += name[1:]
261        context.update({ 'name' : name, 'Name' : Name })
262        try:
263            vars = self.func(self.user_code, context, *args[0], **args[1])
264        except Exception, exc:
265            if debug:
266                raise
267            error(lineno, 'error defining "%s": %s.' % (name, exc))
268        for k in vars.keys():
269            if k not in ('header_output', 'decoder_output',
270                         'exec_output', 'decode_block'):
271                del vars[k]
272        return GenCode(parser, **vars)
273
274# Special null format to catch an implicit-format instruction
275# definition outside of any format block.
276class NoFormat(object):
277    def __init__(self):
278        self.defaultInst = ''
279
280    def defineInst(self, parser, name, args, lineno):
281        error(lineno,
282              'instruction definition "%s" with no active format!' % name)
283
284###############
285# GenCode class
286#
287# The GenCode class encapsulates generated code destined for various
288# output files.  The header_output and decoder_output attributes are
289# strings containing code destined for decoder.hh and decoder.cc
290# respectively.  The decode_block attribute contains code to be
291# incorporated in the decode function itself (that will also end up in
292# decoder.cc).  The exec_output attribute is a dictionary with a key
293# for each CPU model name; the value associated with a particular key
294# is the string of code for that CPU model's exec.cc file.  The
295# has_decode_default attribute is used in the decode block to allow
296# explicit default clauses to override default default clauses.
297
298class GenCode(object):
299    # Constructor.  At this point we substitute out all CPU-specific
300    # symbols.  For the exec output, these go into the per-model
301    # dictionary.  For all other output types they get collapsed into
302    # a single string.
303    def __init__(self, parser,
304                 header_output = '', decoder_output = '', exec_output = '',
305                 decode_block = '', has_decode_default = False):
306        self.parser = parser
307        self.header_output = parser.expandCpuSymbolsToString(header_output)
308        self.decoder_output = parser.expandCpuSymbolsToString(decoder_output)
309        if isinstance(exec_output, dict):
310            self.exec_output = exec_output
311        elif isinstance(exec_output, str):
312            # If the exec_output arg is a single string, we replicate
313            # it for each of the CPU models, substituting and
314            # %(CPU_foo)s params appropriately.
315            self.exec_output = parser.expandCpuSymbolsToDict(exec_output)
316        self.decode_block = parser.expandCpuSymbolsToString(decode_block)
317        self.has_decode_default = has_decode_default
318
319    # Override '+' operator: generate a new GenCode object that
320    # concatenates all the individual strings in the operands.
321    def __add__(self, other):
322        exec_output = {}
323        for cpu in self.parser.cpuModels:
324            n = cpu.name
325            exec_output[n] = self.exec_output[n] + other.exec_output[n]
326        return GenCode(self.parser,
327                       self.header_output + other.header_output,
328                       self.decoder_output + other.decoder_output,
329                       exec_output,
330                       self.decode_block + other.decode_block,
331                       self.has_decode_default or other.has_decode_default)
332
333    # Prepend a string (typically a comment) to all the strings.
334    def prepend_all(self, pre):
335        self.header_output = pre + self.header_output
336        self.decoder_output  = pre + self.decoder_output
337        self.decode_block = pre + self.decode_block
338        for cpu in self.parser.cpuModels:
339            self.exec_output[cpu.name] = pre + self.exec_output[cpu.name]
340
341    # Wrap the decode block in a pair of strings (e.g., 'case foo:'
342    # and 'break;').  Used to build the big nested switch statement.
343    def wrap_decode_block(self, pre, post = ''):
344        self.decode_block = pre + indent(self.decode_block) + post
345
346#####################################################################
347#
348#                      Bitfield Operator Support
349#
350#####################################################################
351
352bitOp1ArgRE = re.compile(r'<\s*(\w+)\s*:\s*>')
353
354bitOpWordRE = re.compile(r'(?<![\w\.])([\w\.]+)<\s*(\w+)\s*:\s*(\w+)\s*>')
355bitOpExprRE = re.compile(r'\)<\s*(\w+)\s*:\s*(\w+)\s*>')
356
357def substBitOps(code):
358    # first convert single-bit selectors to two-index form
359    # i.e., <n> --> <n:n>
360    code = bitOp1ArgRE.sub(r'<\1:\1>', code)
361    # simple case: selector applied to ID (name)
362    # i.e., foo<a:b> --> bits(foo, a, b)
363    code = bitOpWordRE.sub(r'bits(\1, \2, \3)', code)
364    # if selector is applied to expression (ending in ')'),
365    # we need to search backward for matching '('
366    match = bitOpExprRE.search(code)
367    while match:
368        exprEnd = match.start()
369        here = exprEnd - 1
370        nestLevel = 1
371        while nestLevel > 0:
372            if code[here] == '(':
373                nestLevel -= 1
374            elif code[here] == ')':
375                nestLevel += 1
376            here -= 1
377            if here < 0:
378                sys.exit("Didn't find '('!")
379        exprStart = here+1
380        newExpr = r'bits(%s, %s, %s)' % (code[exprStart:exprEnd+1],
381                                         match.group(1), match.group(2))
382        code = code[:exprStart] + newExpr + code[match.end():]
383        match = bitOpExprRE.search(code)
384    return code
385
386
387#####################################################################
388#
389#                             Code Parser
390#
391# The remaining code is the support for automatically extracting
392# instruction characteristics from pseudocode.
393#
394#####################################################################
395
396# Force the argument to be a list.  Useful for flags, where a caller
397# can specify a singleton flag or a list of flags.  Also usful for
398# converting tuples to lists so they can be modified.
399def makeList(arg):
400    if isinstance(arg, list):
401        return arg
402    elif isinstance(arg, tuple):
403        return list(arg)
404    elif not arg:
405        return []
406    else:
407        return [ arg ]
408
409class Operand(object):
410    '''Base class for operand descriptors.  An instance of this class
411    (or actually a class derived from this one) represents a specific
412    operand for a code block (e.g, "Rc.sq" as a dest). Intermediate
413    derived classes encapsulates the traits of a particular operand
414    type (e.g., "32-bit integer register").'''
415
416    def buildReadCode(self, func = None):
417        subst_dict = {"name": self.base_name,
418                      "func": func,
419                      "reg_idx": self.reg_spec,
420                      "size": self.size,
421                      "ctype": self.ctype}
422        if hasattr(self, 'src_reg_idx'):
423            subst_dict['op_idx'] = self.src_reg_idx
424        code = self.read_code % subst_dict
425        if self.size != self.dflt_size:
426            return '%s = bits(%s, %d, 0);\n' % \
427                   (self.base_name, code, self.size-1)
428        else:
429            return '%s = %s;\n' % \
430                   (self.base_name, code)
431
432    def buildWriteCode(self, func = None):
433        if (self.size != self.dflt_size and self.is_signed):
434            final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
435        else:
436            final_val = self.base_name
437        subst_dict = {"name": self.base_name,
438                      "func": func,
439                      "reg_idx": self.reg_spec,
440                      "size": self.size,
441                      "ctype": self.ctype,
442                      "final_val": final_val}
443        if hasattr(self, 'dest_reg_idx'):
444            subst_dict['op_idx'] = self.dest_reg_idx
445        code = self.write_code % subst_dict
446        return '''
447        {
448            %s final_val = %s;
449            %s;
450            if (traceData) { traceData->setData(final_val); }
451        }''' % (self.dflt_ctype, final_val, code)
452
453    def __init__(self, parser, full_name, ext, is_src, is_dest):
454        self.full_name = full_name
455        self.ext = ext
456        self.is_src = is_src
457        self.is_dest = is_dest
458        # The 'effective extension' (eff_ext) is either the actual
459        # extension, if one was explicitly provided, or the default.
460        if ext:
461            self.eff_ext = ext
462        elif hasattr(self, 'dflt_ext'):
463            self.eff_ext = self.dflt_ext
464
465        if hasattr(self, 'eff_ext'):
466            self.size, self.ctype, self.is_signed = \
467                        parser.operandTypeMap[self.eff_ext]
468
469        # note that mem_acc_size is undefined for non-mem operands...
470        # template must be careful not to use it if it doesn't apply.
471        if self.isMem():
472            self.mem_acc_size = self.makeAccSize()
473            if self.ctype in ['Twin32_t', 'Twin64_t']:
474                self.mem_acc_type = 'Twin'
475            else:
476                self.mem_acc_type = 'uint'
477
478    # Finalize additional fields (primarily code fields).  This step
479    # is done separately since some of these fields may depend on the
480    # register index enumeration that hasn't been performed yet at the
481    # time of __init__().
482    def finalize(self):
483        self.flags = self.getFlags()
484        self.constructor = self.makeConstructor()
485        self.op_decl = self.makeDecl()
486
487        if self.is_src:
488            self.op_rd = self.makeRead()
489            self.op_src_decl = self.makeDecl()
490        else:
491            self.op_rd = ''
492            self.op_src_decl = ''
493
494        if self.is_dest:
495            self.op_wb = self.makeWrite()
496            self.op_dest_decl = self.makeDecl()
497        else:
498            self.op_wb = ''
499            self.op_dest_decl = ''
500
501    def isMem(self):
502        return 0
503
504    def isReg(self):
505        return 0
506
507    def isFloatReg(self):
508        return 0
509
510    def isIntReg(self):
511        return 0
512
513    def isControlReg(self):
514        return 0
515
516    def isPCState(self):
517        return 0
518
519    def isPCPart(self):
520        return self.isPCState() and self.reg_spec
521
522    def getFlags(self):
523        # note the empty slice '[:]' gives us a copy of self.flags[0]
524        # instead of a reference to it
525        my_flags = self.flags[0][:]
526        if self.is_src:
527            my_flags += self.flags[1]
528        if self.is_dest:
529            my_flags += self.flags[2]
530        return my_flags
531
532    def makeDecl(self):
533        # Note that initializations in the declarations are solely
534        # to avoid 'uninitialized variable' errors from the compiler.
535        return self.ctype + ' ' + self.base_name + ' = 0;\n';
536
537class IntRegOperand(Operand):
538    def isReg(self):
539        return 1
540
541    def isIntReg(self):
542        return 1
543
544    def makeConstructor(self):
545        c = ''
546        if self.is_src:
547            c += '\n\t_srcRegIdx[%d] = %s;' % \
548                 (self.src_reg_idx, self.reg_spec)
549        if self.is_dest:
550            c += '\n\t_destRegIdx[%d] = %s;' % \
551                 (self.dest_reg_idx, self.reg_spec)
552        return c
553
554    def makeRead(self):
555        if (self.ctype == 'float' or self.ctype == 'double'):
556            error('Attempt to read integer register as FP')
557        if self.read_code != None:
558            return self.buildReadCode('readIntRegOperand')
559        if (self.size == self.dflt_size):
560            return '%s = xc->readIntRegOperand(this, %d);\n' % \
561                   (self.base_name, self.src_reg_idx)
562        elif (self.size > self.dflt_size):
563            int_reg_val = 'xc->readIntRegOperand(this, %d)' % \
564                          (self.src_reg_idx)
565            if (self.is_signed):
566                int_reg_val = 'sext<%d>(%s)' % (self.dflt_size, int_reg_val)
567            return '%s = %s;\n' % (self.base_name, int_reg_val)
568        else:
569            return '%s = bits(xc->readIntRegOperand(this, %d), %d, 0);\n' % \
570                   (self.base_name, self.src_reg_idx, self.size-1)
571
572    def makeWrite(self):
573        if (self.ctype == 'float' or self.ctype == 'double'):
574            error('Attempt to write integer register as FP')
575        if self.write_code != None:
576            return self.buildWriteCode('setIntRegOperand')
577        if (self.size != self.dflt_size and self.is_signed):
578            final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
579        else:
580            final_val = self.base_name
581        wb = '''
582        {
583            %s final_val = %s;
584            xc->setIntRegOperand(this, %d, final_val);\n
585            if (traceData) { traceData->setData(final_val); }
586        }''' % (self.dflt_ctype, final_val, self.dest_reg_idx)
587        return wb
588
589class FloatRegOperand(Operand):
590    def isReg(self):
591        return 1
592
593    def isFloatReg(self):
594        return 1
595
596    def makeConstructor(self):
597        c = ''
598        if self.is_src:
599            c += '\n\t_srcRegIdx[%d] = %s + FP_Base_DepTag;' % \
600                 (self.src_reg_idx, self.reg_spec)
601        if self.is_dest:
602            c += '\n\t_destRegIdx[%d] = %s + FP_Base_DepTag;' % \
603                 (self.dest_reg_idx, self.reg_spec)
604        return c
605
606    def makeRead(self):
607        bit_select = 0
608        if (self.ctype == 'float' or self.ctype == 'double'):
609            func = 'readFloatRegOperand'
610        else:
611            func = 'readFloatRegOperandBits'
612            if (self.size != self.dflt_size):
613                bit_select = 1
614        base = 'xc->%s(this, %d)' % (func, self.src_reg_idx)
615        if self.read_code != None:
616            return self.buildReadCode(func)
617        if bit_select:
618            return '%s = bits(%s, %d, 0);\n' % \
619                   (self.base_name, base, self.size-1)
620        else:
621            return '%s = %s;\n' % (self.base_name, base)
622
623    def makeWrite(self):
624        final_val = self.base_name
625        final_ctype = self.ctype
626        if (self.ctype == 'float' or self.ctype == 'double'):
627            func = 'setFloatRegOperand'
628        elif (self.ctype == 'uint32_t' or self.ctype == 'uint64_t'):
629            func = 'setFloatRegOperandBits'
630        else:
631            func = 'setFloatRegOperandBits'
632            final_ctype = 'uint%d_t' % self.dflt_size
633            if (self.size != self.dflt_size and self.is_signed):
634                final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
635        if self.write_code != None:
636            return self.buildWriteCode(func)
637        wb = '''
638        {
639            %s final_val = %s;
640            xc->%s(this, %d, final_val);\n
641            if (traceData) { traceData->setData(final_val); }
642        }''' % (final_ctype, final_val, func, self.dest_reg_idx)
643        return wb
644
645class ControlRegOperand(Operand):
646    def isReg(self):
647        return 1
648
649    def isControlReg(self):
650        return 1
651
652    def makeConstructor(self):
653        c = ''
654        if self.is_src:
655            c += '\n\t_srcRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \
656                 (self.src_reg_idx, self.reg_spec)
657        if self.is_dest:
658            c += '\n\t_destRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \
659                 (self.dest_reg_idx, self.reg_spec)
660        return c
661
662    def makeRead(self):
663        bit_select = 0
664        if (self.ctype == 'float' or self.ctype == 'double'):
665            error('Attempt to read control register as FP')
666        if self.read_code != None:
667            return self.buildReadCode('readMiscRegOperand')
668        base = 'xc->readMiscRegOperand(this, %s)' % self.src_reg_idx
669        if self.size == self.dflt_size:
670            return '%s = %s;\n' % (self.base_name, base)
671        else:
672            return '%s = bits(%s, %d, 0);\n' % \
673                   (self.base_name, base, self.size-1)
674
675    def makeWrite(self):
676        if (self.ctype == 'float' or self.ctype == 'double'):
677            error('Attempt to write control register as FP')
678        if self.write_code != None:
679            return self.buildWriteCode('setMiscRegOperand')
680        wb = 'xc->setMiscRegOperand(this, %s, %s);\n' % \
681             (self.dest_reg_idx, self.base_name)
682        wb += 'if (traceData) { traceData->setData(%s); }' % \
683              self.base_name
684        return wb
685
686class MemOperand(Operand):
687    def isMem(self):
688        return 1
689
690    def makeConstructor(self):
691        return ''
692
693    def makeDecl(self):
694        # Note that initializations in the declarations are solely
695        # to avoid 'uninitialized variable' errors from the compiler.
696        # Declare memory data variable.
697        if self.ctype in ['Twin32_t','Twin64_t']:
698            return "%s %s; %s.a = 0; %s.b = 0;\n" % \
699                   (self.ctype, self.base_name, self.base_name, self.base_name)
700        return '%s %s = 0;\n' % (self.ctype, self.base_name)
701
702    def makeRead(self):
703        if self.read_code != None:
704            return self.buildReadCode()
705        return ''
706
707    def makeWrite(self):
708        if self.write_code != None:
709            return self.buildWriteCode()
710        return ''
711
712    # Return the memory access size *in bits*, suitable for
713    # forming a type via "uint%d_t".  Divide by 8 if you want bytes.
714    def makeAccSize(self):
715        return self.size
716
717class PCStateOperand(Operand):
718    def makeConstructor(self):
719        return ''
720
721    def makeRead(self):
722        if self.reg_spec:
723            # A component of the PC state.
724            return '%s = __parserAutoPCState.%s();\n' % \
725                (self.base_name, self.reg_spec)
726        else:
727            # The whole PC state itself.
728            return '%s = xc->pcState();\n' % self.base_name
729
730    def makeWrite(self):
731        if self.reg_spec:
732            # A component of the PC state.
733            return '__parserAutoPCState.%s(%s);\n' % \
734                (self.reg_spec, self.base_name)
735        else:
736            # The whole PC state itself.
737            return 'xc->pcState(%s);\n' % self.base_name
738
739    def makeDecl(self):
740        ctype = 'TheISA::PCState'
741        if self.isPCPart():
742            ctype = self.ctype
743        return "%s %s;\n" % (ctype, self.base_name)
744
745    def isPCState(self):
746        return 1
747
748class OperandList(object):
749    '''Find all the operands in the given code block.  Returns an operand
750    descriptor list (instance of class OperandList).'''
751    def __init__(self, parser, code):
752        self.items = []
753        self.bases = {}
754        # delete comments so we don't match on reg specifiers inside
755        code = commentRE.sub('', code)
756        # search for operands
757        next_pos = 0
758        while 1:
759            match = parser.operandsRE.search(code, next_pos)
760            if not match:
761                # no more matches: we're done
762                break
763            op = match.groups()
764            # regexp groups are operand full name, base, and extension
765            (op_full, op_base, op_ext) = op
766            # if the token following the operand is an assignment, this is
767            # a destination (LHS), else it's a source (RHS)
768            is_dest = (assignRE.match(code, match.end()) != None)
769            is_src = not is_dest
770            # see if we've already seen this one
771            op_desc = self.find_base(op_base)
772            if op_desc:
773                if op_desc.ext != op_ext:
774                    error('Inconsistent extensions for operand %s' % \
775                          op_base)
776                op_desc.is_src = op_desc.is_src or is_src
777                op_desc.is_dest = op_desc.is_dest or is_dest
778            else:
779                # new operand: create new descriptor
780                op_desc = parser.operandNameMap[op_base](parser,
781                    op_full, op_ext, is_src, is_dest)
782                self.append(op_desc)
783            # start next search after end of current match
784            next_pos = match.end()
785        self.sort()
786        # enumerate source & dest register operands... used in building
787        # constructor later
788        self.numSrcRegs = 0
789        self.numDestRegs = 0
790        self.numFPDestRegs = 0
791        self.numIntDestRegs = 0
792        self.memOperand = None
793        for op_desc in self.items:
794            if op_desc.isReg():
795                if op_desc.is_src:
796                    op_desc.src_reg_idx = self.numSrcRegs
797                    self.numSrcRegs += 1
798                if op_desc.is_dest:
799                    op_desc.dest_reg_idx = self.numDestRegs
800                    self.numDestRegs += 1
801                    if op_desc.isFloatReg():
802                        self.numFPDestRegs += 1
803                    elif op_desc.isIntReg():
804                        self.numIntDestRegs += 1
805            elif op_desc.isMem():
806                if self.memOperand:
807                    error("Code block has more than one memory operand.")
808                self.memOperand = op_desc
809        if parser.maxInstSrcRegs < self.numSrcRegs:
810            parser.maxInstSrcRegs = self.numSrcRegs
811        if parser.maxInstDestRegs < self.numDestRegs:
812            parser.maxInstDestRegs = self.numDestRegs
813        # now make a final pass to finalize op_desc fields that may depend
814        # on the register enumeration
815        for op_desc in self.items:
816            op_desc.finalize()
817
818    def __len__(self):
819        return len(self.items)
820
821    def __getitem__(self, index):
822        return self.items[index]
823
824    def append(self, op_desc):
825        self.items.append(op_desc)
826        self.bases[op_desc.base_name] = op_desc
827
828    def find_base(self, base_name):
829        # like self.bases[base_name], but returns None if not found
830        # (rather than raising exception)
831        return self.bases.get(base_name)
832
833    # internal helper function for concat[Some]Attr{Strings|Lists}
834    def __internalConcatAttrs(self, attr_name, filter, result):
835        for op_desc in self.items:
836            if filter(op_desc):
837                result += getattr(op_desc, attr_name)
838        return result
839
840    # return a single string that is the concatenation of the (string)
841    # values of the specified attribute for all operands
842    def concatAttrStrings(self, attr_name):
843        return self.__internalConcatAttrs(attr_name, lambda x: 1, '')
844
845    # like concatAttrStrings, but only include the values for the operands
846    # for which the provided filter function returns true
847    def concatSomeAttrStrings(self, filter, attr_name):
848        return self.__internalConcatAttrs(attr_name, filter, '')
849
850    # return a single list that is the concatenation of the (list)
851    # values of the specified attribute for all operands
852    def concatAttrLists(self, attr_name):
853        return self.__internalConcatAttrs(attr_name, lambda x: 1, [])
854
855    # like concatAttrLists, but only include the values for the operands
856    # for which the provided filter function returns true
857    def concatSomeAttrLists(self, filter, attr_name):
858        return self.__internalConcatAttrs(attr_name, filter, [])
859
860    def sort(self):
861        self.items.sort(lambda a, b: a.sort_pri - b.sort_pri)
862
863class SubOperandList(OperandList):
864    '''Find all the operands in the given code block.  Returns an operand
865    descriptor list (instance of class OperandList).'''
866    def __init__(self, parser, code, master_list):
867        self.items = []
868        self.bases = {}
869        # delete comments so we don't match on reg specifiers inside
870        code = commentRE.sub('', code)
871        # search for operands
872        next_pos = 0
873        while 1:
874            match = parser.operandsRE.search(code, next_pos)
875            if not match:
876                # no more matches: we're done
877                break
878            op = match.groups()
879            # regexp groups are operand full name, base, and extension
880            (op_full, op_base, op_ext) = op
881            # find this op in the master list
882            op_desc = master_list.find_base(op_base)
883            if not op_desc:
884                error('Found operand %s which is not in the master list!' \
885                      ' This is an internal error' % op_base)
886            else:
887                # See if we've already found this operand
888                op_desc = self.find_base(op_base)
889                if not op_desc:
890                    # if not, add a reference to it to this sub list
891                    self.append(master_list.bases[op_base])
892
893            # start next search after end of current match
894            next_pos = match.end()
895        self.sort()
896        self.memOperand = None
897        # Whether the whole PC needs to be read so parts of it can be accessed
898        self.readPC = False
899        # Whether the whole PC needs to be written after parts of it were
900        # changed
901        self.setPC = False
902        # Whether this instruction manipulates the whole PC or parts of it.
903        # Mixing the two is a bad idea and flagged as an error.
904        self.pcPart = None
905        for op_desc in self.items:
906            if op_desc.isPCPart():
907                self.readPC = True
908                if op_desc.is_dest:
909                    self.setPC = True
910            if op_desc.isPCState():
911                if self.pcPart is not None:
912                    if self.pcPart and not op_desc.isPCPart() or \
913                            not self.pcPart and op_desc.isPCPart():
914                        error("Mixed whole and partial PC state operands.")
915                self.pcPart = op_desc.isPCPart()
916            if op_desc.isMem():
917                if self.memOperand:
918                    error("Code block has more than one memory operand.")
919                self.memOperand = op_desc
920
921# Regular expression object to match C++ comments
922# (used in findOperands())
923commentRE = re.compile(r'//.*\n')
924
925# Regular expression object to match assignment statements
926# (used in findOperands())
927assignRE = re.compile(r'\s*=(?!=)', re.MULTILINE)
928
929def makeFlagConstructor(flag_list):
930    if len(flag_list) == 0:
931        return ''
932    # filter out repeated flags
933    flag_list.sort()
934    i = 1
935    while i < len(flag_list):
936        if flag_list[i] == flag_list[i-1]:
937            del flag_list[i]
938        else:
939            i += 1
940    pre = '\n\tflags['
941    post = '] = true;'
942    code = pre + string.join(flag_list, post + pre) + post
943    return code
944
945# Assume all instruction flags are of the form 'IsFoo'
946instFlagRE = re.compile(r'Is.*')
947
948# OpClass constants end in 'Op' except No_OpClass
949opClassRE = re.compile(r'.*Op|No_OpClass')
950
951class InstObjParams(object):
952    def __init__(self, parser, mnem, class_name, base_class = '',
953                 snippets = {}, opt_args = []):
954        self.mnemonic = mnem
955        self.class_name = class_name
956        self.base_class = base_class
957        if not isinstance(snippets, dict):
958            snippets = {'code' : snippets}
959        compositeCode = ' '.join(map(str, snippets.values()))
960        self.snippets = snippets
961
962        self.operands = OperandList(parser, compositeCode)
963        self.constructor = self.operands.concatAttrStrings('constructor')
964        self.constructor += \
965                 '\n\t_numSrcRegs = %d;' % self.operands.numSrcRegs
966        self.constructor += \
967                 '\n\t_numDestRegs = %d;' % self.operands.numDestRegs
968        self.constructor += \
969                 '\n\t_numFPDestRegs = %d;' % self.operands.numFPDestRegs
970        self.constructor += \
971                 '\n\t_numIntDestRegs = %d;' % self.operands.numIntDestRegs
972        self.flags = self.operands.concatAttrLists('flags')
973
974        # Make a basic guess on the operand class (function unit type).
975        # These are good enough for most cases, and can be overridden
976        # later otherwise.
977        if 'IsStore' in self.flags:
978            self.op_class = 'MemWriteOp'
979        elif 'IsLoad' in self.flags or 'IsPrefetch' in self.flags:
980            self.op_class = 'MemReadOp'
981        elif 'IsFloating' in self.flags:
982            self.op_class = 'FloatAddOp'
983        else:
984            self.op_class = 'IntAluOp'
985
986        # Optional arguments are assumed to be either StaticInst flags
987        # or an OpClass value.  To avoid having to import a complete
988        # list of these values to match against, we do it ad-hoc
989        # with regexps.
990        for oa in opt_args:
991            if instFlagRE.match(oa):
992                self.flags.append(oa)
993            elif opClassRE.match(oa):
994                self.op_class = oa
995            else:
996                error('InstObjParams: optional arg "%s" not recognized '
997                      'as StaticInst::Flag or OpClass.' % oa)
998
999        # add flag initialization to contructor here to include
1000        # any flags added via opt_args
1001        self.constructor += makeFlagConstructor(self.flags)
1002
1003        # if 'IsFloating' is set, add call to the FP enable check
1004        # function (which should be provided by isa_desc via a declare)
1005        if 'IsFloating' in self.flags:
1006            self.fp_enable_check = 'fault = checkFpEnableFault(xc);'
1007        else:
1008            self.fp_enable_check = ''
1009
1010##############
1011# Stack: a simple stack object.  Used for both formats (formatStack)
1012# and default cases (defaultStack).  Simply wraps a list to give more
1013# stack-like syntax and enable initialization with an argument list
1014# (as opposed to an argument that's a list).
1015
1016class Stack(list):
1017    def __init__(self, *items):
1018        list.__init__(self, items)
1019
1020    def push(self, item):
1021        self.append(item);
1022
1023    def top(self):
1024        return self[-1]
1025
1026#######################
1027#
1028# Output file template
1029#
1030
1031file_template = '''
1032/*
1033 * DO NOT EDIT THIS FILE!!!
1034 *
1035 * It was automatically generated from the ISA description in %(filename)s
1036 */
1037
1038%(includes)s
1039
1040%(global_output)s
1041
1042namespace %(namespace)s {
1043
1044%(namespace_output)s
1045
1046} // namespace %(namespace)s
1047
1048%(decode_function)s
1049'''
1050
1051max_inst_regs_template = '''
1052/*
1053 * DO NOT EDIT THIS FILE!!!
1054 *
1055 * It was automatically generated from the ISA description in %(filename)s
1056 */
1057
1058namespace %(namespace)s {
1059
1060    const int MaxInstSrcRegs = %(MaxInstSrcRegs)d;
1061    const int MaxInstDestRegs = %(MaxInstDestRegs)d;
1062
1063} // namespace %(namespace)s
1064
1065'''
1066
1067class ISAParser(Grammar):
1068    def __init__(self, output_dir, cpu_models):
1069        super(ISAParser, self).__init__()
1070        self.output_dir = output_dir
1071
1072        self.cpuModels = cpu_models
1073
1074        # variable to hold templates
1075        self.templateMap = {}
1076
1077        # This dictionary maps format name strings to Format objects.
1078        self.formatMap = {}
1079
1080        # The format stack.
1081        self.formatStack = Stack(NoFormat())
1082
1083        # The default case stack.
1084        self.defaultStack = Stack(None)
1085
1086        # Stack that tracks current file and line number.  Each
1087        # element is a tuple (filename, lineno) that records the
1088        # *current* filename and the line number in the *previous*
1089        # file where it was included.
1090        self.fileNameStack = Stack()
1091
1092        symbols = ('makeList', 're', 'string')
1093        self.exportContext = dict([(s, eval(s)) for s in symbols])
1094
1095        self.maxInstSrcRegs = 0
1096        self.maxInstDestRegs = 0
1097
1098    #####################################################################
1099    #
1100    #                                Lexer
1101    #
1102    # The PLY lexer module takes two things as input:
1103    # - A list of token names (the string list 'tokens')
1104    # - A regular expression describing a match for each token.  The
1105    #   regexp for token FOO can be provided in two ways:
1106    #   - as a string variable named t_FOO
1107    #   - as the doc string for a function named t_FOO.  In this case,
1108    #     the function is also executed, allowing an action to be
1109    #     associated with each token match.
1110    #
1111    #####################################################################
1112
1113    # Reserved words.  These are listed separately as they are matched
1114    # using the same regexp as generic IDs, but distinguished in the
1115    # t_ID() function.  The PLY documentation suggests this approach.
1116    reserved = (
1117        'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT',
1118        'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS',
1119        'OUTPUT', 'SIGNED', 'TEMPLATE'
1120        )
1121
1122    # List of tokens.  The lex module requires this.
1123    tokens = reserved + (
1124        # identifier
1125        'ID',
1126
1127        # integer literal
1128        'INTLIT',
1129
1130        # string literal
1131        'STRLIT',
1132
1133        # code literal
1134        'CODELIT',
1135
1136        # ( ) [ ] { } < > , ; . : :: *
1137        'LPAREN', 'RPAREN',
1138        'LBRACKET', 'RBRACKET',
1139        'LBRACE', 'RBRACE',
1140        'LESS', 'GREATER', 'EQUALS',
1141        'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON',
1142        'ASTERISK',
1143
1144        # C preprocessor directives
1145        'CPPDIRECTIVE'
1146
1147    # The following are matched but never returned. commented out to
1148    # suppress PLY warning
1149        # newfile directive
1150    #    'NEWFILE',
1151
1152        # endfile directive
1153    #    'ENDFILE'
1154    )
1155
1156    # Regular expressions for token matching
1157    t_LPAREN           = r'\('
1158    t_RPAREN           = r'\)'
1159    t_LBRACKET         = r'\['
1160    t_RBRACKET         = r'\]'
1161    t_LBRACE           = r'\{'
1162    t_RBRACE           = r'\}'
1163    t_LESS             = r'\<'
1164    t_GREATER          = r'\>'
1165    t_EQUALS           = r'='
1166    t_COMMA            = r','
1167    t_SEMI             = r';'
1168    t_DOT              = r'\.'
1169    t_COLON            = r':'
1170    t_DBLCOLON         = r'::'
1171    t_ASTERISK         = r'\*'
1172
1173    # Identifiers and reserved words
1174    reserved_map = { }
1175    for r in reserved:
1176        reserved_map[r.lower()] = r
1177
1178    def t_ID(self, t):
1179        r'[A-Za-z_]\w*'
1180        t.type = self.reserved_map.get(t.value, 'ID')
1181        return t
1182
1183    # Integer literal
1184    def t_INTLIT(self, t):
1185        r'-?(0x[\da-fA-F]+)|\d+'
1186        try:
1187            t.value = int(t.value,0)
1188        except ValueError:
1189            error(t, 'Integer value "%s" too large' % t.value)
1190            t.value = 0
1191        return t
1192
1193    # String literal.  Note that these use only single quotes, and
1194    # can span multiple lines.
1195    def t_STRLIT(self, t):
1196        r"(?m)'([^'])+'"
1197        # strip off quotes
1198        t.value = t.value[1:-1]
1199        t.lexer.lineno += t.value.count('\n')
1200        return t
1201
1202
1203    # "Code literal"... like a string literal, but delimiters are
1204    # '{{' and '}}' so they get formatted nicely under emacs c-mode
1205    def t_CODELIT(self, t):
1206        r"(?m)\{\{([^\}]|}(?!\}))+\}\}"
1207        # strip off {{ & }}
1208        t.value = t.value[2:-2]
1209        t.lexer.lineno += t.value.count('\n')
1210        return t
1211
1212    def t_CPPDIRECTIVE(self, t):
1213        r'^\#[^\#].*\n'
1214        t.lexer.lineno += t.value.count('\n')
1215        return t
1216
1217    def t_NEWFILE(self, t):
1218        r'^\#\#newfile\s+"[\w/.-]*"'
1219        self.fileNameStack.push((t.value[11:-1], t.lexer.lineno))
1220        t.lexer.lineno = 0
1221
1222    def t_ENDFILE(self, t):
1223        r'^\#\#endfile'
1224        (old_filename, t.lexer.lineno) = self.fileNameStack.pop()
1225
1226    #
1227    # The functions t_NEWLINE, t_ignore, and t_error are
1228    # special for the lex module.
1229    #
1230
1231    # Newlines
1232    def t_NEWLINE(self, t):
1233        r'\n+'
1234        t.lexer.lineno += t.value.count('\n')
1235
1236    # Comments
1237    def t_comment(self, t):
1238        r'//.*'
1239
1240    # Completely ignored characters
1241    t_ignore = ' \t\x0c'
1242
1243    # Error handler
1244    def t_error(self, t):
1245        error(t, "illegal character '%s'" % t.value[0])
1246        t.skip(1)
1247
1248    #####################################################################
1249    #
1250    #                                Parser
1251    #
1252    # Every function whose name starts with 'p_' defines a grammar
1253    # rule.  The rule is encoded in the function's doc string, while
1254    # the function body provides the action taken when the rule is
1255    # matched.  The argument to each function is a list of the values
1256    # of the rule's symbols: t[0] for the LHS, and t[1..n] for the
1257    # symbols on the RHS.  For tokens, the value is copied from the
1258    # t.value attribute provided by the lexer.  For non-terminals, the
1259    # value is assigned by the producing rule; i.e., the job of the
1260    # grammar rule function is to set the value for the non-terminal
1261    # on the LHS (by assigning to t[0]).
1262    #####################################################################
1263
1264    # The LHS of the first grammar rule is used as the start symbol
1265    # (in this case, 'specification').  Note that this rule enforces
1266    # that there will be exactly one namespace declaration, with 0 or
1267    # more global defs/decls before and after it.  The defs & decls
1268    # before the namespace decl will be outside the namespace; those
1269    # after will be inside.  The decoder function is always inside the
1270    # namespace.
1271    def p_specification(self, t):
1272        'specification : opt_defs_and_outputs name_decl opt_defs_and_outputs decode_block'
1273        global_code = t[1]
1274        isa_name = t[2]
1275        namespace = isa_name + "Inst"
1276        # wrap the decode block as a function definition
1277        t[4].wrap_decode_block('''
1278StaticInstPtr
1279%(isa_name)s::decodeInst(%(isa_name)s::ExtMachInst machInst)
1280{
1281    using namespace %(namespace)s;
1282''' % vars(), '}')
1283        # both the latter output blocks and the decode block are in
1284        # the namespace
1285        namespace_code = t[3] + t[4]
1286        # pass it all back to the caller of yacc.parse()
1287        t[0] = (isa_name, namespace, global_code, namespace_code)
1288
1289    # ISA name declaration looks like "namespace <foo>;"
1290    def p_name_decl(self, t):
1291        'name_decl : NAMESPACE ID SEMI'
1292        t[0] = t[2]
1293
1294    # 'opt_defs_and_outputs' is a possibly empty sequence of
1295    # def and/or output statements.
1296    def p_opt_defs_and_outputs_0(self, t):
1297        'opt_defs_and_outputs : empty'
1298        t[0] = GenCode(self)
1299
1300    def p_opt_defs_and_outputs_1(self, t):
1301        'opt_defs_and_outputs : defs_and_outputs'
1302        t[0] = t[1]
1303
1304    def p_defs_and_outputs_0(self, t):
1305        'defs_and_outputs : def_or_output'
1306        t[0] = t[1]
1307
1308    def p_defs_and_outputs_1(self, t):
1309        'defs_and_outputs : defs_and_outputs def_or_output'
1310        t[0] = t[1] + t[2]
1311
1312    # The list of possible definition/output statements.
1313    def p_def_or_output(self, t):
1314        '''def_or_output : def_format
1315                         | def_bitfield
1316                         | def_bitfield_struct
1317                         | def_template
1318                         | def_operand_types
1319                         | def_operands
1320                         | output_header
1321                         | output_decoder
1322                         | output_exec
1323                         | global_let'''
1324        t[0] = t[1]
1325
1326    # Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied
1327    # directly to the appropriate output section.
1328
1329    # Massage output block by substituting in template definitions and
1330    # bit operators.  We handle '%'s embedded in the string that don't
1331    # indicate template substitutions (or CPU-specific symbols, which
1332    # get handled in GenCode) by doubling them first so that the
1333    # format operation will reduce them back to single '%'s.
1334    def process_output(self, s):
1335        s = self.protectNonSubstPercents(s)
1336        # protects cpu-specific symbols too
1337        s = self.protectCpuSymbols(s)
1338        return substBitOps(s % self.templateMap)
1339
1340    def p_output_header(self, t):
1341        'output_header : OUTPUT HEADER CODELIT SEMI'
1342        t[0] = GenCode(self, header_output = self.process_output(t[3]))
1343
1344    def p_output_decoder(self, t):
1345        'output_decoder : OUTPUT DECODER CODELIT SEMI'
1346        t[0] = GenCode(self, decoder_output = self.process_output(t[3]))
1347
1348    def p_output_exec(self, t):
1349        'output_exec : OUTPUT EXEC CODELIT SEMI'
1350        t[0] = GenCode(self, exec_output = self.process_output(t[3]))
1351
1352    # global let blocks 'let {{...}}' (Python code blocks) are
1353    # executed directly when seen.  Note that these execute in a
1354    # special variable context 'exportContext' to prevent the code
1355    # from polluting this script's namespace.
1356    def p_global_let(self, t):
1357        'global_let : LET CODELIT SEMI'
1358        self.updateExportContext()
1359        self.exportContext["header_output"] = ''
1360        self.exportContext["decoder_output"] = ''
1361        self.exportContext["exec_output"] = ''
1362        self.exportContext["decode_block"] = ''
1363        try:
1364            exec fixPythonIndentation(t[2]) in self.exportContext
1365        except Exception, exc:
1366            if debug:
1367                raise
1368            error(t, 'error: %s in global let block "%s".' % (exc, t[2]))
1369        t[0] = GenCode(self,
1370                       header_output=self.exportContext["header_output"],
1371                       decoder_output=self.exportContext["decoder_output"],
1372                       exec_output=self.exportContext["exec_output"],
1373                       decode_block=self.exportContext["decode_block"])
1374
1375    # Define the mapping from operand type extensions to C++ types and
1376    # bit widths (stored in operandTypeMap).
1377    def p_def_operand_types(self, t):
1378        'def_operand_types : DEF OPERAND_TYPES CODELIT SEMI'
1379        try:
1380            user_dict = eval('{' + t[3] + '}')
1381        except Exception, exc:
1382            if debug:
1383                raise
1384            error(t,
1385                  'error: %s in def operand_types block "%s".' % (exc, t[3]))
1386        self.buildOperandTypeMap(user_dict, t.lexer.lineno)
1387        t[0] = GenCode(self) # contributes nothing to the output C++ file
1388
1389    # Define the mapping from operand names to operand classes and
1390    # other traits.  Stored in operandNameMap.
1391    def p_def_operands(self, t):
1392        'def_operands : DEF OPERANDS CODELIT SEMI'
1393        if not hasattr(self, 'operandTypeMap'):
1394            error(t, 'error: operand types must be defined before operands')
1395        try:
1396            user_dict = eval('{' + t[3] + '}', self.exportContext)
1397        except Exception, exc:
1398            if debug:
1399                raise
1400            error(t, 'error: %s in def operands block "%s".' % (exc, t[3]))
1401        self.buildOperandNameMap(user_dict, t.lexer.lineno)
1402        t[0] = GenCode(self) # contributes nothing to the output C++ file
1403
1404    # A bitfield definition looks like:
1405    # 'def [signed] bitfield <ID> [<first>:<last>]'
1406    # This generates a preprocessor macro in the output file.
1407    def p_def_bitfield_0(self, t):
1408        'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI'
1409        expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8])
1410        if (t[2] == 'signed'):
1411            expr = 'sext<%d>(%s)' % (t[6] - t[8] + 1, expr)
1412        hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1413        t[0] = GenCode(self, header_output=hash_define)
1414
1415    # alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]'
1416    def p_def_bitfield_1(self, t):
1417        'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI'
1418        expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6])
1419        if (t[2] == 'signed'):
1420            expr = 'sext<%d>(%s)' % (1, expr)
1421        hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1422        t[0] = GenCode(self, header_output=hash_define)
1423
1424    # alternate form for structure member: 'def bitfield <ID> <ID>'
1425    def p_def_bitfield_struct(self, t):
1426        'def_bitfield_struct : DEF opt_signed BITFIELD ID id_with_dot SEMI'
1427        if (t[2] != ''):
1428            error(t, 'error: structure bitfields are always unsigned.')
1429        expr = 'machInst.%s' % t[5]
1430        hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1431        t[0] = GenCode(self, header_output=hash_define)
1432
1433    def p_id_with_dot_0(self, t):
1434        'id_with_dot : ID'
1435        t[0] = t[1]
1436
1437    def p_id_with_dot_1(self, t):
1438        'id_with_dot : ID DOT id_with_dot'
1439        t[0] = t[1] + t[2] + t[3]
1440
1441    def p_opt_signed_0(self, t):
1442        'opt_signed : SIGNED'
1443        t[0] = t[1]
1444
1445    def p_opt_signed_1(self, t):
1446        'opt_signed : empty'
1447        t[0] = ''
1448
1449    def p_def_template(self, t):
1450        'def_template : DEF TEMPLATE ID CODELIT SEMI'
1451        self.templateMap[t[3]] = Template(self, t[4])
1452        t[0] = GenCode(self)
1453
1454    # An instruction format definition looks like
1455    # "def format <fmt>(<params>) {{...}};"
1456    def p_def_format(self, t):
1457        'def_format : DEF FORMAT ID LPAREN param_list RPAREN CODELIT SEMI'
1458        (id, params, code) = (t[3], t[5], t[7])
1459        self.defFormat(id, params, code, t.lexer.lineno)
1460        t[0] = GenCode(self)
1461
1462    # The formal parameter list for an instruction format is a
1463    # possibly empty list of comma-separated parameters.  Positional
1464    # (standard, non-keyword) parameters must come first, followed by
1465    # keyword parameters, followed by a '*foo' parameter that gets
1466    # excess positional arguments (as in Python).  Each of these three
1467    # parameter categories is optional.
1468    #
1469    # Note that we do not support the '**foo' parameter for collecting
1470    # otherwise undefined keyword args.  Otherwise the parameter list
1471    # is (I believe) identical to what is supported in Python.
1472    #
1473    # The param list generates a tuple, where the first element is a
1474    # list of the positional params and the second element is a dict
1475    # containing the keyword params.
1476    def p_param_list_0(self, t):
1477        'param_list : positional_param_list COMMA nonpositional_param_list'
1478        t[0] = t[1] + t[3]
1479
1480    def p_param_list_1(self, t):
1481        '''param_list : positional_param_list
1482                      | nonpositional_param_list'''
1483        t[0] = t[1]
1484
1485    def p_positional_param_list_0(self, t):
1486        'positional_param_list : empty'
1487        t[0] = []
1488
1489    def p_positional_param_list_1(self, t):
1490        'positional_param_list : ID'
1491        t[0] = [t[1]]
1492
1493    def p_positional_param_list_2(self, t):
1494        'positional_param_list : positional_param_list COMMA ID'
1495        t[0] = t[1] + [t[3]]
1496
1497    def p_nonpositional_param_list_0(self, t):
1498        'nonpositional_param_list : keyword_param_list COMMA excess_args_param'
1499        t[0] = t[1] + t[3]
1500
1501    def p_nonpositional_param_list_1(self, t):
1502        '''nonpositional_param_list : keyword_param_list
1503                                    | excess_args_param'''
1504        t[0] = t[1]
1505
1506    def p_keyword_param_list_0(self, t):
1507        'keyword_param_list : keyword_param'
1508        t[0] = [t[1]]
1509
1510    def p_keyword_param_list_1(self, t):
1511        'keyword_param_list : keyword_param_list COMMA keyword_param'
1512        t[0] = t[1] + [t[3]]
1513
1514    def p_keyword_param(self, t):
1515        'keyword_param : ID EQUALS expr'
1516        t[0] = t[1] + ' = ' + t[3].__repr__()
1517
1518    def p_excess_args_param(self, t):
1519        'excess_args_param : ASTERISK ID'
1520        # Just concatenate them: '*ID'.  Wrap in list to be consistent
1521        # with positional_param_list and keyword_param_list.
1522        t[0] = [t[1] + t[2]]
1523
1524    # End of format definition-related rules.
1525    ##############
1526
1527    #
1528    # A decode block looks like:
1529    #       decode <field1> [, <field2>]* [default <inst>] { ... }
1530    #
1531    def p_decode_block(self, t):
1532        'decode_block : DECODE ID opt_default LBRACE decode_stmt_list RBRACE'
1533        default_defaults = self.defaultStack.pop()
1534        codeObj = t[5]
1535        # use the "default defaults" only if there was no explicit
1536        # default statement in decode_stmt_list
1537        if not codeObj.has_decode_default:
1538            codeObj += default_defaults
1539        codeObj.wrap_decode_block('switch (%s) {\n' % t[2], '}\n')
1540        t[0] = codeObj
1541
1542    # The opt_default statement serves only to push the "default
1543    # defaults" onto defaultStack.  This value will be used by nested
1544    # decode blocks, and used and popped off when the current
1545    # decode_block is processed (in p_decode_block() above).
1546    def p_opt_default_0(self, t):
1547        'opt_default : empty'
1548        # no default specified: reuse the one currently at the top of
1549        # the stack
1550        self.defaultStack.push(self.defaultStack.top())
1551        # no meaningful value returned
1552        t[0] = None
1553
1554    def p_opt_default_1(self, t):
1555        'opt_default : DEFAULT inst'
1556        # push the new default
1557        codeObj = t[2]
1558        codeObj.wrap_decode_block('\ndefault:\n', 'break;\n')
1559        self.defaultStack.push(codeObj)
1560        # no meaningful value returned
1561        t[0] = None
1562
1563    def p_decode_stmt_list_0(self, t):
1564        'decode_stmt_list : decode_stmt'
1565        t[0] = t[1]
1566
1567    def p_decode_stmt_list_1(self, t):
1568        'decode_stmt_list : decode_stmt decode_stmt_list'
1569        if (t[1].has_decode_default and t[2].has_decode_default):
1570            error(t, 'Two default cases in decode block')
1571        t[0] = t[1] + t[2]
1572
1573    #
1574    # Decode statement rules
1575    #
1576    # There are four types of statements allowed in a decode block:
1577    # 1. Format blocks 'format <foo> { ... }'
1578    # 2. Nested decode blocks
1579    # 3. Instruction definitions.
1580    # 4. C preprocessor directives.
1581
1582
1583    # Preprocessor directives found in a decode statement list are
1584    # passed through to the output, replicated to all of the output
1585    # code streams.  This works well for ifdefs, so we can ifdef out
1586    # both the declarations and the decode cases generated by an
1587    # instruction definition.  Handling them as part of the grammar
1588    # makes it easy to keep them in the right place with respect to
1589    # the code generated by the other statements.
1590    def p_decode_stmt_cpp(self, t):
1591        'decode_stmt : CPPDIRECTIVE'
1592        t[0] = GenCode(self, t[1], t[1], t[1], t[1])
1593
1594    # A format block 'format <foo> { ... }' sets the default
1595    # instruction format used to handle instruction definitions inside
1596    # the block.  This format can be overridden by using an explicit
1597    # format on the instruction definition or with a nested format
1598    # block.
1599    def p_decode_stmt_format(self, t):
1600        'decode_stmt : FORMAT push_format_id LBRACE decode_stmt_list RBRACE'
1601        # The format will be pushed on the stack when 'push_format_id'
1602        # is processed (see below).  Once the parser has recognized
1603        # the full production (though the right brace), we're done
1604        # with the format, so now we can pop it.
1605        self.formatStack.pop()
1606        t[0] = t[4]
1607
1608    # This rule exists so we can set the current format (& push the
1609    # stack) when we recognize the format name part of the format
1610    # block.
1611    def p_push_format_id(self, t):
1612        'push_format_id : ID'
1613        try:
1614            self.formatStack.push(self.formatMap[t[1]])
1615            t[0] = ('', '// format %s' % t[1])
1616        except KeyError:
1617            error(t, 'instruction format "%s" not defined.' % t[1])
1618
1619    # Nested decode block: if the value of the current field matches
1620    # the specified constant, do a nested decode on some other field.
1621    def p_decode_stmt_decode(self, t):
1622        'decode_stmt : case_label COLON decode_block'
1623        label = t[1]
1624        codeObj = t[3]
1625        # just wrap the decoding code from the block as a case in the
1626        # outer switch statement.
1627        codeObj.wrap_decode_block('\n%s:\n' % label)
1628        codeObj.has_decode_default = (label == 'default')
1629        t[0] = codeObj
1630
1631    # Instruction definition (finally!).
1632    def p_decode_stmt_inst(self, t):
1633        'decode_stmt : case_label COLON inst SEMI'
1634        label = t[1]
1635        codeObj = t[3]
1636        codeObj.wrap_decode_block('\n%s:' % label, 'break;\n')
1637        codeObj.has_decode_default = (label == 'default')
1638        t[0] = codeObj
1639
1640    # The case label is either a list of one or more constants or
1641    # 'default'
1642    def p_case_label_0(self, t):
1643        'case_label : intlit_list'
1644        def make_case(intlit):
1645            if intlit >= 2**32:
1646                return 'case ULL(%#x)' % intlit
1647            else:
1648                return 'case %#x' % intlit
1649        t[0] = ': '.join(map(make_case, t[1]))
1650
1651    def p_case_label_1(self, t):
1652        'case_label : DEFAULT'
1653        t[0] = 'default'
1654
1655    #
1656    # The constant list for a decode case label must be non-empty, but
1657    # may have one or more comma-separated integer literals in it.
1658    #
1659    def p_intlit_list_0(self, t):
1660        'intlit_list : INTLIT'
1661        t[0] = [t[1]]
1662
1663    def p_intlit_list_1(self, t):
1664        'intlit_list : intlit_list COMMA INTLIT'
1665        t[0] = t[1]
1666        t[0].append(t[3])
1667
1668    # Define an instruction using the current instruction format
1669    # (specified by an enclosing format block).
1670    # "<mnemonic>(<args>)"
1671    def p_inst_0(self, t):
1672        'inst : ID LPAREN arg_list RPAREN'
1673        # Pass the ID and arg list to the current format class to deal with.
1674        currentFormat = self.formatStack.top()
1675        codeObj = currentFormat.defineInst(self, t[1], t[3], t.lexer.lineno)
1676        args = ','.join(map(str, t[3]))
1677        args = re.sub('(?m)^', '//', args)
1678        args = re.sub('^//', '', args)
1679        comment = '\n// %s::%s(%s)\n' % (currentFormat.id, t[1], args)
1680        codeObj.prepend_all(comment)
1681        t[0] = codeObj
1682
1683    # Define an instruction using an explicitly specified format:
1684    # "<fmt>::<mnemonic>(<args>)"
1685    def p_inst_1(self, t):
1686        'inst : ID DBLCOLON ID LPAREN arg_list RPAREN'
1687        try:
1688            format = self.formatMap[t[1]]
1689        except KeyError:
1690            error(t, 'instruction format "%s" not defined.' % t[1])
1691
1692        codeObj = format.defineInst(self, t[3], t[5], t.lexer.lineno)
1693        comment = '\n// %s::%s(%s)\n' % (t[1], t[3], t[5])
1694        codeObj.prepend_all(comment)
1695        t[0] = codeObj
1696
1697    # The arg list generates a tuple, where the first element is a
1698    # list of the positional args and the second element is a dict
1699    # containing the keyword args.
1700    def p_arg_list_0(self, t):
1701        'arg_list : positional_arg_list COMMA keyword_arg_list'
1702        t[0] = ( t[1], t[3] )
1703
1704    def p_arg_list_1(self, t):
1705        'arg_list : positional_arg_list'
1706        t[0] = ( t[1], {} )
1707
1708    def p_arg_list_2(self, t):
1709        'arg_list : keyword_arg_list'
1710        t[0] = ( [], t[1] )
1711
1712    def p_positional_arg_list_0(self, t):
1713        'positional_arg_list : empty'
1714        t[0] = []
1715
1716    def p_positional_arg_list_1(self, t):
1717        'positional_arg_list : expr'
1718        t[0] = [t[1]]
1719
1720    def p_positional_arg_list_2(self, t):
1721        'positional_arg_list : positional_arg_list COMMA expr'
1722        t[0] = t[1] + [t[3]]
1723
1724    def p_keyword_arg_list_0(self, t):
1725        'keyword_arg_list : keyword_arg'
1726        t[0] = t[1]
1727
1728    def p_keyword_arg_list_1(self, t):
1729        'keyword_arg_list : keyword_arg_list COMMA keyword_arg'
1730        t[0] = t[1]
1731        t[0].update(t[3])
1732
1733    def p_keyword_arg(self, t):
1734        'keyword_arg : ID EQUALS expr'
1735        t[0] = { t[1] : t[3] }
1736
1737    #
1738    # Basic expressions.  These constitute the argument values of
1739    # "function calls" (i.e. instruction definitions in the decode
1740    # block) and default values for formal parameters of format
1741    # functions.
1742    #
1743    # Right now, these are either strings, integers, or (recursively)
1744    # lists of exprs (using Python square-bracket list syntax).  Note
1745    # that bare identifiers are trated as string constants here (since
1746    # there isn't really a variable namespace to refer to).
1747    #
1748    def p_expr_0(self, t):
1749        '''expr : ID
1750                | INTLIT
1751                | STRLIT
1752                | CODELIT'''
1753        t[0] = t[1]
1754
1755    def p_expr_1(self, t):
1756        '''expr : LBRACKET list_expr RBRACKET'''
1757        t[0] = t[2]
1758
1759    def p_list_expr_0(self, t):
1760        'list_expr : expr'
1761        t[0] = [t[1]]
1762
1763    def p_list_expr_1(self, t):
1764        'list_expr : list_expr COMMA expr'
1765        t[0] = t[1] + [t[3]]
1766
1767    def p_list_expr_2(self, t):
1768        'list_expr : empty'
1769        t[0] = []
1770
1771    #
1772    # Empty production... use in other rules for readability.
1773    #
1774    def p_empty(self, t):
1775        'empty :'
1776        pass
1777
1778    # Parse error handler.  Note that the argument here is the
1779    # offending *token*, not a grammar symbol (hence the need to use
1780    # t.value)
1781    def p_error(self, t):
1782        if t:
1783            error(t, "syntax error at '%s'" % t.value)
1784        else:
1785            error("unknown syntax error")
1786
1787    # END OF GRAMMAR RULES
1788
1789    def updateExportContext(self):
1790
1791        # create a continuation that allows us to grab the current parser
1792        def wrapInstObjParams(*args):
1793            return InstObjParams(self, *args)
1794        self.exportContext['InstObjParams'] = wrapInstObjParams
1795        self.exportContext.update(self.templateMap)
1796
1797    def defFormat(self, id, params, code, lineno):
1798        '''Define a new format'''
1799
1800        # make sure we haven't already defined this one
1801        if id in self.formatMap:
1802            error(lineno, 'format %s redefined.' % id)
1803
1804        # create new object and store in global map
1805        self.formatMap[id] = Format(id, params, code)
1806
1807    def expandCpuSymbolsToDict(self, template):
1808        '''Expand template with CPU-specific references into a
1809        dictionary with an entry for each CPU model name.  The entry
1810        key is the model name and the corresponding value is the
1811        template with the CPU-specific refs substituted for that
1812        model.'''
1813
1814        # Protect '%'s that don't go with CPU-specific terms
1815        t = re.sub(r'%(?!\(CPU_)', '%%', template)
1816        result = {}
1817        for cpu in self.cpuModels:
1818            result[cpu.name] = t % cpu.strings
1819        return result
1820
1821    def expandCpuSymbolsToString(self, template):
1822        '''*If* the template has CPU-specific references, return a
1823        single string containing a copy of the template for each CPU
1824        model with the corresponding values substituted in.  If the
1825        template has no CPU-specific references, it is returned
1826        unmodified.'''
1827
1828        if template.find('%(CPU_') != -1:
1829            return reduce(lambda x,y: x+y,
1830                          self.expandCpuSymbolsToDict(template).values())
1831        else:
1832            return template
1833
1834    def protectCpuSymbols(self, template):
1835        '''Protect CPU-specific references by doubling the
1836        corresponding '%'s (in preparation for substituting a different
1837        set of references into the template).'''
1838
1839        return re.sub(r'%(?=\(CPU_)', '%%', template)
1840
1841    def protectNonSubstPercents(self, s):
1842        '''Protect any non-dict-substitution '%'s in a format string
1843        (i.e. those not followed by '(')'''
1844
1845        return re.sub(r'%(?!\()', '%%', s)
1846
1847    def buildOperandTypeMap(self, user_dict, lineno):
1848        """Generate operandTypeMap from the user's 'def operand_types'
1849        statement."""
1850        operand_type = {}
1851        for (ext, (desc, size)) in user_dict.iteritems():
1852            if desc == 'signed int':
1853                ctype = 'int%d_t' % size
1854                is_signed = 1
1855            elif desc == 'unsigned int':
1856                ctype = 'uint%d_t' % size
1857                is_signed = 0
1858            elif desc == 'float':
1859                is_signed = 1       # shouldn't really matter
1860                if size == 32:
1861                    ctype = 'float'
1862                elif size == 64:
1863                    ctype = 'double'
1864            elif desc == 'twin64 int':
1865                is_signed = 0
1866                ctype = 'Twin64_t'
1867            elif desc == 'twin32 int':
1868                is_signed = 0
1869                ctype = 'Twin32_t'
1870            if ctype == '':
1871                error(parser, lineno,
1872                      'Unrecognized type description "%s" in user_dict')
1873            operand_type[ext] = (size, ctype, is_signed)
1874
1875        self.operandTypeMap = operand_type
1876
1877    def buildOperandNameMap(self, user_dict, lineno):
1878        operand_name = {}
1879        for op_name, val in user_dict.iteritems():
1880            base_cls_name, dflt_ext, reg_spec, flags, sort_pri = val[:5]
1881            if len(val) > 5:
1882                read_code = val[5]
1883            else:
1884                read_code = None
1885            if len(val) > 6:
1886                write_code = val[6]
1887            else:
1888                write_code = None
1889            if len(val) > 7:
1890                error(lineno,
1891                      'error: too many attributes for operand "%s"' %
1892                      base_cls_name)
1893
1894            # Canonical flag structure is a triple of lists, where each list
1895            # indicates the set of flags implied by this operand always, when
1896            # used as a source, and when used as a dest, respectively.
1897            # For simplicity this can be initialized using a variety of fairly
1898            # obvious shortcuts; we convert these to canonical form here.
1899            if not flags:
1900                # no flags specified (e.g., 'None')
1901                flags = ( [], [], [] )
1902            elif isinstance(flags, str):
1903                # a single flag: assumed to be unconditional
1904                flags = ( [ flags ], [], [] )
1905            elif isinstance(flags, list):
1906                # a list of flags: also assumed to be unconditional
1907                flags = ( flags, [], [] )
1908            elif isinstance(flags, tuple):
1909                # it's a tuple: it should be a triple,
1910                # but each item could be a single string or a list
1911                (uncond_flags, src_flags, dest_flags) = flags
1912                flags = (makeList(uncond_flags),
1913                         makeList(src_flags), makeList(dest_flags))
1914            # Accumulate attributes of new operand class in tmp_dict
1915            tmp_dict = {}
1916            attrList = ['reg_spec', 'flags', 'sort_pri',
1917                        'read_code', 'write_code']
1918            if dflt_ext:
1919                (dflt_size, dflt_ctype, dflt_is_signed) = \
1920                            self.operandTypeMap[dflt_ext]
1921                attrList.extend(['dflt_size', 'dflt_ctype',
1922                                 'dflt_is_signed', 'dflt_ext'])
1923            for attr in attrList:
1924                tmp_dict[attr] = eval(attr)
1925            tmp_dict['base_name'] = op_name
1926            # New class name will be e.g. "IntReg_Ra"
1927            cls_name = base_cls_name + '_' + op_name
1928            # Evaluate string arg to get class object.  Note that the
1929            # actual base class for "IntReg" is "IntRegOperand", i.e. we
1930            # have to append "Operand".
1931            try:
1932                base_cls = eval(base_cls_name + 'Operand')
1933            except NameError:
1934                error(lineno,
1935                      'error: unknown operand base class "%s"' % base_cls_name)
1936            # The following statement creates a new class called
1937            # <cls_name> as a subclass of <base_cls> with the attributes
1938            # in tmp_dict, just as if we evaluated a class declaration.
1939            operand_name[op_name] = type(cls_name, (base_cls,), tmp_dict)
1940
1941        self.operandNameMap = operand_name
1942
1943        # Define operand variables.
1944        operands = user_dict.keys()
1945
1946        operandsREString = (r'''
1947        (?<![\w\.])      # neg. lookbehind assertion: prevent partial matches
1948        ((%s)(?:\.(\w+))?)   # match: operand with optional '.' then suffix
1949        (?![\w\.])       # neg. lookahead assertion: prevent partial matches
1950        '''
1951                            % string.join(operands, '|'))
1952
1953        self.operandsRE = re.compile(operandsREString, re.MULTILINE|re.VERBOSE)
1954
1955        # Same as operandsREString, but extension is mandatory, and only two
1956        # groups are returned (base and ext, not full name as above).
1957        # Used for subtituting '_' for '.' to make C++ identifiers.
1958        operandsWithExtREString = (r'(?<![\w\.])(%s)\.(\w+)(?![\w\.])'
1959                                   % string.join(operands, '|'))
1960
1961        self.operandsWithExtRE = \
1962            re.compile(operandsWithExtREString, re.MULTILINE)
1963
1964    def substMungedOpNames(self, code):
1965        '''Munge operand names in code string to make legal C++
1966        variable names.  This means getting rid of the type extension
1967        if any.  Will match base_name attribute of Operand object.)'''
1968        return self.operandsWithExtRE.sub(r'\1', code)
1969
1970    def mungeSnippet(self, s):
1971        '''Fix up code snippets for final substitution in templates.'''
1972        if isinstance(s, str):
1973            return self.substMungedOpNames(substBitOps(s))
1974        else:
1975            return s
1976
1977    def update_if_needed(self, file, contents):
1978        '''Update the output file only if the new contents are
1979        different from the current contents.  Minimizes the files that
1980        need to be rebuilt after minor changes.'''
1981
1982        file = os.path.join(self.output_dir, file)
1983        update = False
1984        if os.access(file, os.R_OK):
1985            f = open(file, 'r')
1986            old_contents = f.read()
1987            f.close()
1988            if contents != old_contents:
1989                os.remove(file) # in case it's write-protected
1990                update = True
1991            else:
1992                print 'File', file, 'is unchanged'
1993        else:
1994            update = True
1995        if update:
1996            f = open(file, 'w')
1997            f.write(contents)
1998            f.close()
1999
2000    # This regular expression matches '##include' directives
2001    includeRE = re.compile(r'^\s*##include\s+"(?P<filename>[\w/.-]*)".*$',
2002                           re.MULTILINE)
2003
2004    def replace_include(self, matchobj, dirname):
2005        """Function to replace a matched '##include' directive with the
2006        contents of the specified file (with nested ##includes
2007        replaced recursively).  'matchobj' is an re match object
2008        (from a match of includeRE) and 'dirname' is the directory
2009        relative to which the file path should be resolved."""
2010
2011        fname = matchobj.group('filename')
2012        full_fname = os.path.normpath(os.path.join(dirname, fname))
2013        contents = '##newfile "%s"\n%s\n##endfile\n' % \
2014                   (full_fname, self.read_and_flatten(full_fname))
2015        return contents
2016
2017    def read_and_flatten(self, filename):
2018        """Read a file and recursively flatten nested '##include' files."""
2019
2020        current_dir = os.path.dirname(filename)
2021        try:
2022            contents = open(filename).read()
2023        except IOError:
2024            error('Error including file "%s"' % filename)
2025
2026        self.fileNameStack.push((filename, 0))
2027
2028        # Find any includes and include them
2029        def replace(matchobj):
2030            return self.replace_include(matchobj, current_dir)
2031        contents = self.includeRE.sub(replace, contents)
2032
2033        self.fileNameStack.pop()
2034        return contents
2035
2036    def _parse_isa_desc(self, isa_desc_file):
2037        '''Read in and parse the ISA description.'''
2038
2039        # Read file and (recursively) all included files into a string.
2040        # PLY requires that the input be in a single string so we have to
2041        # do this up front.
2042        isa_desc = self.read_and_flatten(isa_desc_file)
2043
2044        # Initialize filename stack with outer file.
2045        self.fileNameStack.push((isa_desc_file, 0))
2046
2047        # Parse it.
2048        (isa_name, namespace, global_code, namespace_code) = \
2049                   self.parse(isa_desc)
2050
2051        # grab the last three path components of isa_desc_file to put in
2052        # the output
2053        filename = '/'.join(isa_desc_file.split('/')[-3:])
2054
2055        # generate decoder.hh
2056        includes = '#include "base/bitfield.hh" // for bitfield support'
2057        global_output = global_code.header_output
2058        namespace_output = namespace_code.header_output
2059        decode_function = ''
2060        self.update_if_needed('decoder.hh', file_template % vars())
2061
2062        # generate decoder.cc
2063        includes = '#include "decoder.hh"'
2064        global_output = global_code.decoder_output
2065        namespace_output = namespace_code.decoder_output
2066        # namespace_output += namespace_code.decode_block
2067        decode_function = namespace_code.decode_block
2068        self.update_if_needed('decoder.cc', file_template % vars())
2069
2070        # generate per-cpu exec files
2071        for cpu in self.cpuModels:
2072            includes = '#include "decoder.hh"\n'
2073            includes += cpu.includes
2074            global_output = global_code.exec_output[cpu.name]
2075            namespace_output = namespace_code.exec_output[cpu.name]
2076            decode_function = ''
2077            self.update_if_needed(cpu.filename, file_template % vars())
2078
2079        # The variable names here are hacky, but this will creat local
2080        # variables which will be referenced in vars() which have the
2081        # value of the globals.
2082        MaxInstSrcRegs = self.maxInstSrcRegs
2083        MaxInstDestRegs = self.maxInstDestRegs
2084        # max_inst_regs.hh
2085        self.update_if_needed('max_inst_regs.hh',
2086                              max_inst_regs_template % vars())
2087
2088    def parse_isa_desc(self, *args, **kwargs):
2089        try:
2090            self._parse_isa_desc(*args, **kwargs)
2091        except ISAParserError, e:
2092            e.exit(self.fileNameStack)
2093
2094# Called as script: get args from command line.
2095# Args are: <path to cpu_models.py> <isa desc file> <output dir> <cpu models>
2096if __name__ == '__main__':
2097    execfile(sys.argv[1])  # read in CpuModel definitions
2098    cpu_models = [CpuModel.dict[cpu] for cpu in sys.argv[4:]]
2099    ISAParser(sys.argv[3], cpu_models).parse_isa_desc(sys.argv[2])
2100