isa_parser.py revision 7816:b5003ac75977
1# Copyright (c) 2003-2005 The Regents of The University of Michigan
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met: redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer;
8# redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution;
11# neither the name of the copyright holders nor the names of its
12# contributors may be used to endorse or promote products derived from
13# this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26#
27# Authors: Steve Reinhardt
28
29import os
30import sys
31import re
32import string
33import inspect, traceback
34# get type names
35from types import *
36
37from m5.util.grammar import Grammar
38
39debug=False
40
41###################
42# Utility functions
43
44#
45# Indent every line in string 's' by two spaces
46# (except preprocessor directives).
47# Used to make nested code blocks look pretty.
48#
49def indent(s):
50    return re.sub(r'(?m)^(?!#)', '  ', s)
51
52#
53# Munge a somewhat arbitrarily formatted piece of Python code
54# (e.g. from a format 'let' block) into something whose indentation
55# will get by the Python parser.
56#
57# The two keys here are that Python will give a syntax error if
58# there's any whitespace at the beginning of the first line, and that
59# all lines at the same lexical nesting level must have identical
60# indentation.  Unfortunately the way code literals work, an entire
61# let block tends to have some initial indentation.  Rather than
62# trying to figure out what that is and strip it off, we prepend 'if
63# 1:' to make the let code the nested block inside the if (and have
64# the parser automatically deal with the indentation for us).
65#
66# We don't want to do this if (1) the code block is empty or (2) the
67# first line of the block doesn't have any whitespace at the front.
68
69def fixPythonIndentation(s):
70    # get rid of blank lines first
71    s = re.sub(r'(?m)^\s*\n', '', s);
72    if (s != '' and re.match(r'[ \t]', s[0])):
73        s = 'if 1:\n' + s
74    return s
75
76class ISAParserError(Exception):
77    """Error handler for parser errors"""
78    def __init__(self, first, second=None):
79        if second is None:
80            self.lineno = 0
81            self.string = first
82        else:
83            if hasattr(first, 'lexer'):
84                first = first.lexer.lineno
85            self.lineno = first
86            self.string = second
87
88    def display(self, filename_stack, print_traceback=debug):
89        # Output formatted to work under Emacs compile-mode.  Optional
90        # 'print_traceback' arg, if set to True, prints a Python stack
91        # backtrace too (can be handy when trying to debug the parser
92        # itself).
93
94        spaces = ""
95        for (filename, line) in filename_stack[:-1]:
96            print "%sIn file included from %s:" % (spaces, filename)
97            spaces += "  "
98
99        # Print a Python stack backtrace if requested.
100        if print_traceback or not self.lineno:
101            traceback.print_exc()
102
103        line_str = "%s:" % (filename_stack[-1][0], )
104        if self.lineno:
105            line_str += "%d:" % (self.lineno, )
106
107        return "%s%s %s" % (spaces, line_str, self.string)
108
109    def exit(self, filename_stack, print_traceback=debug):
110        # Just call exit.
111
112        sys.exit(self.display(filename_stack, print_traceback))
113
114def error(*args):
115    raise ISAParserError(*args)
116
117####################
118# Template objects.
119#
120# Template objects are format strings that allow substitution from
121# the attribute spaces of other objects (e.g. InstObjParams instances).
122
123labelRE = re.compile(r'(?<!%)%\(([^\)]+)\)[sd]')
124
125class Template(object):
126    def __init__(self, parser, t):
127        self.parser = parser
128        self.template = t
129
130    def subst(self, d):
131        myDict = None
132
133        # Protect non-Python-dict substitutions (e.g. if there's a printf
134        # in the templated C++ code)
135        template = self.parser.protectNonSubstPercents(self.template)
136        # CPU-model-specific substitutions are handled later (in GenCode).
137        template = self.parser.protectCpuSymbols(template)
138
139        # Build a dict ('myDict') to use for the template substitution.
140        # Start with the template namespace.  Make a copy since we're
141        # going to modify it.
142        myDict = self.parser.templateMap.copy()
143
144        if isinstance(d, InstObjParams):
145            # If we're dealing with an InstObjParams object, we need
146            # to be a little more sophisticated.  The instruction-wide
147            # parameters are already formed, but the parameters which
148            # are only function wide still need to be generated.
149            compositeCode = ''
150
151            myDict.update(d.__dict__)
152            # The "operands" and "snippets" attributes of the InstObjParams
153            # objects are for internal use and not substitution.
154            del myDict['operands']
155            del myDict['snippets']
156
157            snippetLabels = [l for l in labelRE.findall(template)
158                             if d.snippets.has_key(l)]
159
160            snippets = dict([(s, self.parser.mungeSnippet(d.snippets[s]))
161                             for s in snippetLabels])
162
163            myDict.update(snippets)
164
165            compositeCode = ' '.join(map(str, snippets.values()))
166
167            # Add in template itself in case it references any
168            # operands explicitly (like Mem)
169            compositeCode += ' ' + template
170
171            operands = SubOperandList(self.parser, compositeCode, d.operands)
172
173            myDict['op_decl'] = operands.concatAttrStrings('op_decl')
174            if operands.readPC or operands.setPC:
175                myDict['op_decl'] += 'TheISA::PCState __parserAutoPCState;\n'
176
177            is_src = lambda op: op.is_src
178            is_dest = lambda op: op.is_dest
179
180            myDict['op_src_decl'] = \
181                      operands.concatSomeAttrStrings(is_src, 'op_src_decl')
182            myDict['op_dest_decl'] = \
183                      operands.concatSomeAttrStrings(is_dest, 'op_dest_decl')
184
185            myDict['op_rd'] = operands.concatAttrStrings('op_rd')
186            if operands.readPC:
187                myDict['op_rd'] = '__parserAutoPCState = xc->pcState();\n' + \
188                                  myDict['op_rd']
189
190            # Compose the op_wb string. If we're going to write back the
191            # PC state because we changed some of its elements, we'll need to
192            # do that as early as possible. That allows later uncoordinated
193            # modifications to the PC to layer appropriately.
194            reordered = list(operands.items)
195            reordered.reverse()
196            op_wb_str = ''
197            pcWbStr = 'xc->pcState(__parserAutoPCState);\n'
198            for op_desc in reordered:
199                if op_desc.isPCPart() and op_desc.is_dest:
200                    op_wb_str = op_desc.op_wb + pcWbStr + op_wb_str
201                    pcWbStr = ''
202                else:
203                    op_wb_str = op_desc.op_wb + op_wb_str
204            myDict['op_wb'] = op_wb_str
205
206            if d.operands.memOperand:
207                myDict['mem_acc_size'] = d.operands.memOperand.mem_acc_size
208                myDict['mem_acc_type'] = d.operands.memOperand.mem_acc_type
209
210        elif isinstance(d, dict):
211            # if the argument is a dictionary, we just use it.
212            myDict.update(d)
213        elif hasattr(d, '__dict__'):
214            # if the argument is an object, we use its attribute map.
215            myDict.update(d.__dict__)
216        else:
217            raise TypeError, "Template.subst() arg must be or have dictionary"
218        return template % myDict
219
220    # Convert to string.  This handles the case when a template with a
221    # CPU-specific term gets interpolated into another template or into
222    # an output block.
223    def __str__(self):
224        return self.parser.expandCpuSymbolsToString(self.template)
225
226################
227# Format object.
228#
229# A format object encapsulates an instruction format.  It must provide
230# a defineInst() method that generates the code for an instruction
231# definition.
232
233class Format(object):
234    def __init__(self, id, params, code):
235        self.id = id
236        self.params = params
237        label = 'def format ' + id
238        self.user_code = compile(fixPythonIndentation(code), label, 'exec')
239        param_list = string.join(params, ", ")
240        f = '''def defInst(_code, _context, %s):
241                my_locals = vars().copy()
242                exec _code in _context, my_locals
243                return my_locals\n''' % param_list
244        c = compile(f, label + ' wrapper', 'exec')
245        exec c
246        self.func = defInst
247
248    def defineInst(self, parser, name, args, lineno):
249        parser.updateExportContext()
250        context = parser.exportContext.copy()
251        if len(name):
252            Name = name[0].upper()
253            if len(name) > 1:
254                Name += name[1:]
255        context.update({ 'name' : name, 'Name' : Name })
256        try:
257            vars = self.func(self.user_code, context, *args[0], **args[1])
258        except Exception, exc:
259            if debug:
260                raise
261            error(lineno, 'error defining "%s": %s.' % (name, exc))
262        for k in vars.keys():
263            if k not in ('header_output', 'decoder_output',
264                         'exec_output', 'decode_block'):
265                del vars[k]
266        return GenCode(parser, **vars)
267
268# Special null format to catch an implicit-format instruction
269# definition outside of any format block.
270class NoFormat(object):
271    def __init__(self):
272        self.defaultInst = ''
273
274    def defineInst(self, parser, name, args, lineno):
275        error(lineno,
276              'instruction definition "%s" with no active format!' % name)
277
278###############
279# GenCode class
280#
281# The GenCode class encapsulates generated code destined for various
282# output files.  The header_output and decoder_output attributes are
283# strings containing code destined for decoder.hh and decoder.cc
284# respectively.  The decode_block attribute contains code to be
285# incorporated in the decode function itself (that will also end up in
286# decoder.cc).  The exec_output attribute is a dictionary with a key
287# for each CPU model name; the value associated with a particular key
288# is the string of code for that CPU model's exec.cc file.  The
289# has_decode_default attribute is used in the decode block to allow
290# explicit default clauses to override default default clauses.
291
292class GenCode(object):
293    # Constructor.  At this point we substitute out all CPU-specific
294    # symbols.  For the exec output, these go into the per-model
295    # dictionary.  For all other output types they get collapsed into
296    # a single string.
297    def __init__(self, parser,
298                 header_output = '', decoder_output = '', exec_output = '',
299                 decode_block = '', has_decode_default = False):
300        self.parser = parser
301        self.header_output = parser.expandCpuSymbolsToString(header_output)
302        self.decoder_output = parser.expandCpuSymbolsToString(decoder_output)
303        if isinstance(exec_output, dict):
304            self.exec_output = exec_output
305        elif isinstance(exec_output, str):
306            # If the exec_output arg is a single string, we replicate
307            # it for each of the CPU models, substituting and
308            # %(CPU_foo)s params appropriately.
309            self.exec_output = parser.expandCpuSymbolsToDict(exec_output)
310        self.decode_block = parser.expandCpuSymbolsToString(decode_block)
311        self.has_decode_default = has_decode_default
312
313    # Override '+' operator: generate a new GenCode object that
314    # concatenates all the individual strings in the operands.
315    def __add__(self, other):
316        exec_output = {}
317        for cpu in self.parser.cpuModels:
318            n = cpu.name
319            exec_output[n] = self.exec_output[n] + other.exec_output[n]
320        return GenCode(self.parser,
321                       self.header_output + other.header_output,
322                       self.decoder_output + other.decoder_output,
323                       exec_output,
324                       self.decode_block + other.decode_block,
325                       self.has_decode_default or other.has_decode_default)
326
327    # Prepend a string (typically a comment) to all the strings.
328    def prepend_all(self, pre):
329        self.header_output = pre + self.header_output
330        self.decoder_output  = pre + self.decoder_output
331        self.decode_block = pre + self.decode_block
332        for cpu in self.parser.cpuModels:
333            self.exec_output[cpu.name] = pre + self.exec_output[cpu.name]
334
335    # Wrap the decode block in a pair of strings (e.g., 'case foo:'
336    # and 'break;').  Used to build the big nested switch statement.
337    def wrap_decode_block(self, pre, post = ''):
338        self.decode_block = pre + indent(self.decode_block) + post
339
340#####################################################################
341#
342#                      Bitfield Operator Support
343#
344#####################################################################
345
346bitOp1ArgRE = re.compile(r'<\s*(\w+)\s*:\s*>')
347
348bitOpWordRE = re.compile(r'(?<![\w\.])([\w\.]+)<\s*(\w+)\s*:\s*(\w+)\s*>')
349bitOpExprRE = re.compile(r'\)<\s*(\w+)\s*:\s*(\w+)\s*>')
350
351def substBitOps(code):
352    # first convert single-bit selectors to two-index form
353    # i.e., <n> --> <n:n>
354    code = bitOp1ArgRE.sub(r'<\1:\1>', code)
355    # simple case: selector applied to ID (name)
356    # i.e., foo<a:b> --> bits(foo, a, b)
357    code = bitOpWordRE.sub(r'bits(\1, \2, \3)', code)
358    # if selector is applied to expression (ending in ')'),
359    # we need to search backward for matching '('
360    match = bitOpExprRE.search(code)
361    while match:
362        exprEnd = match.start()
363        here = exprEnd - 1
364        nestLevel = 1
365        while nestLevel > 0:
366            if code[here] == '(':
367                nestLevel -= 1
368            elif code[here] == ')':
369                nestLevel += 1
370            here -= 1
371            if here < 0:
372                sys.exit("Didn't find '('!")
373        exprStart = here+1
374        newExpr = r'bits(%s, %s, %s)' % (code[exprStart:exprEnd+1],
375                                         match.group(1), match.group(2))
376        code = code[:exprStart] + newExpr + code[match.end():]
377        match = bitOpExprRE.search(code)
378    return code
379
380
381#####################################################################
382#
383#                             Code Parser
384#
385# The remaining code is the support for automatically extracting
386# instruction characteristics from pseudocode.
387#
388#####################################################################
389
390# Force the argument to be a list.  Useful for flags, where a caller
391# can specify a singleton flag or a list of flags.  Also usful for
392# converting tuples to lists so they can be modified.
393def makeList(arg):
394    if isinstance(arg, list):
395        return arg
396    elif isinstance(arg, tuple):
397        return list(arg)
398    elif not arg:
399        return []
400    else:
401        return [ arg ]
402
403class Operand(object):
404    '''Base class for operand descriptors.  An instance of this class
405    (or actually a class derived from this one) represents a specific
406    operand for a code block (e.g, "Rc.sq" as a dest). Intermediate
407    derived classes encapsulates the traits of a particular operand
408    type (e.g., "32-bit integer register").'''
409
410    def buildReadCode(self, func = None):
411        subst_dict = {"name": self.base_name,
412                      "func": func,
413                      "reg_idx": self.reg_spec,
414                      "size": self.size,
415                      "ctype": self.ctype}
416        if hasattr(self, 'src_reg_idx'):
417            subst_dict['op_idx'] = self.src_reg_idx
418        code = self.read_code % subst_dict
419        if self.size != self.dflt_size:
420            return '%s = bits(%s, %d, 0);\n' % \
421                   (self.base_name, code, self.size-1)
422        else:
423            return '%s = %s;\n' % \
424                   (self.base_name, code)
425
426    def buildWriteCode(self, func = None):
427        if (self.size != self.dflt_size and self.is_signed):
428            final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
429        else:
430            final_val = self.base_name
431        subst_dict = {"name": self.base_name,
432                      "func": func,
433                      "reg_idx": self.reg_spec,
434                      "size": self.size,
435                      "ctype": self.ctype,
436                      "final_val": final_val}
437        if hasattr(self, 'dest_reg_idx'):
438            subst_dict['op_idx'] = self.dest_reg_idx
439        code = self.write_code % subst_dict
440        return '''
441        {
442            %s final_val = %s;
443            %s;
444            if (traceData) { traceData->setData(final_val); }
445        }''' % (self.dflt_ctype, final_val, code)
446
447    def __init__(self, parser, full_name, ext, is_src, is_dest):
448        self.full_name = full_name
449        self.ext = ext
450        self.is_src = is_src
451        self.is_dest = is_dest
452        # The 'effective extension' (eff_ext) is either the actual
453        # extension, if one was explicitly provided, or the default.
454        if ext:
455            self.eff_ext = ext
456        elif hasattr(self, 'dflt_ext'):
457            self.eff_ext = self.dflt_ext
458
459        if hasattr(self, 'eff_ext'):
460            self.size, self.ctype, self.is_signed = \
461                        parser.operandTypeMap[self.eff_ext]
462
463        # note that mem_acc_size is undefined for non-mem operands...
464        # template must be careful not to use it if it doesn't apply.
465        if self.isMem():
466            self.mem_acc_size = self.makeAccSize()
467            if self.ctype in ['Twin32_t', 'Twin64_t']:
468                self.mem_acc_type = 'Twin'
469            else:
470                self.mem_acc_type = 'uint'
471
472    # Finalize additional fields (primarily code fields).  This step
473    # is done separately since some of these fields may depend on the
474    # register index enumeration that hasn't been performed yet at the
475    # time of __init__().
476    def finalize(self):
477        self.flags = self.getFlags()
478        self.constructor = self.makeConstructor()
479        self.op_decl = self.makeDecl()
480
481        if self.is_src:
482            self.op_rd = self.makeRead()
483            self.op_src_decl = self.makeDecl()
484        else:
485            self.op_rd = ''
486            self.op_src_decl = ''
487
488        if self.is_dest:
489            self.op_wb = self.makeWrite()
490            self.op_dest_decl = self.makeDecl()
491        else:
492            self.op_wb = ''
493            self.op_dest_decl = ''
494
495    def isMem(self):
496        return 0
497
498    def isReg(self):
499        return 0
500
501    def isFloatReg(self):
502        return 0
503
504    def isIntReg(self):
505        return 0
506
507    def isControlReg(self):
508        return 0
509
510    def isPCState(self):
511        return 0
512
513    def isPCPart(self):
514        return self.isPCState() and self.reg_spec
515
516    def getFlags(self):
517        # note the empty slice '[:]' gives us a copy of self.flags[0]
518        # instead of a reference to it
519        my_flags = self.flags[0][:]
520        if self.is_src:
521            my_flags += self.flags[1]
522        if self.is_dest:
523            my_flags += self.flags[2]
524        return my_flags
525
526    def makeDecl(self):
527        # Note that initializations in the declarations are solely
528        # to avoid 'uninitialized variable' errors from the compiler.
529        return self.ctype + ' ' + self.base_name + ' = 0;\n';
530
531class IntRegOperand(Operand):
532    def isReg(self):
533        return 1
534
535    def isIntReg(self):
536        return 1
537
538    def makeConstructor(self):
539        c = ''
540        if self.is_src:
541            c += '\n\t_srcRegIdx[%d] = %s;' % \
542                 (self.src_reg_idx, self.reg_spec)
543        if self.is_dest:
544            c += '\n\t_destRegIdx[%d] = %s;' % \
545                 (self.dest_reg_idx, self.reg_spec)
546        return c
547
548    def makeRead(self):
549        if (self.ctype == 'float' or self.ctype == 'double'):
550            error('Attempt to read integer register as FP')
551        if self.read_code != None:
552            return self.buildReadCode('readIntRegOperand')
553        if (self.size == self.dflt_size):
554            return '%s = xc->readIntRegOperand(this, %d);\n' % \
555                   (self.base_name, self.src_reg_idx)
556        elif (self.size > self.dflt_size):
557            int_reg_val = 'xc->readIntRegOperand(this, %d)' % \
558                          (self.src_reg_idx)
559            if (self.is_signed):
560                int_reg_val = 'sext<%d>(%s)' % (self.dflt_size, int_reg_val)
561            return '%s = %s;\n' % (self.base_name, int_reg_val)
562        else:
563            return '%s = bits(xc->readIntRegOperand(this, %d), %d, 0);\n' % \
564                   (self.base_name, self.src_reg_idx, self.size-1)
565
566    def makeWrite(self):
567        if (self.ctype == 'float' or self.ctype == 'double'):
568            error('Attempt to write integer register as FP')
569        if self.write_code != None:
570            return self.buildWriteCode('setIntRegOperand')
571        if (self.size != self.dflt_size and self.is_signed):
572            final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
573        else:
574            final_val = self.base_name
575        wb = '''
576        {
577            %s final_val = %s;
578            xc->setIntRegOperand(this, %d, final_val);\n
579            if (traceData) { traceData->setData(final_val); }
580        }''' % (self.dflt_ctype, final_val, self.dest_reg_idx)
581        return wb
582
583class FloatRegOperand(Operand):
584    def isReg(self):
585        return 1
586
587    def isFloatReg(self):
588        return 1
589
590    def makeConstructor(self):
591        c = ''
592        if self.is_src:
593            c += '\n\t_srcRegIdx[%d] = %s + FP_Base_DepTag;' % \
594                 (self.src_reg_idx, self.reg_spec)
595        if self.is_dest:
596            c += '\n\t_destRegIdx[%d] = %s + FP_Base_DepTag;' % \
597                 (self.dest_reg_idx, self.reg_spec)
598        return c
599
600    def makeRead(self):
601        bit_select = 0
602        if (self.ctype == 'float' or self.ctype == 'double'):
603            func = 'readFloatRegOperand'
604        else:
605            func = 'readFloatRegOperandBits'
606            if (self.size != self.dflt_size):
607                bit_select = 1
608        base = 'xc->%s(this, %d)' % (func, self.src_reg_idx)
609        if self.read_code != None:
610            return self.buildReadCode(func)
611        if bit_select:
612            return '%s = bits(%s, %d, 0);\n' % \
613                   (self.base_name, base, self.size-1)
614        else:
615            return '%s = %s;\n' % (self.base_name, base)
616
617    def makeWrite(self):
618        final_val = self.base_name
619        final_ctype = self.ctype
620        if (self.ctype == 'float' or self.ctype == 'double'):
621            func = 'setFloatRegOperand'
622        elif (self.ctype == 'uint32_t' or self.ctype == 'uint64_t'):
623            func = 'setFloatRegOperandBits'
624        else:
625            func = 'setFloatRegOperandBits'
626            final_ctype = 'uint%d_t' % self.dflt_size
627            if (self.size != self.dflt_size and self.is_signed):
628                final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
629        if self.write_code != None:
630            return self.buildWriteCode(func)
631        wb = '''
632        {
633            %s final_val = %s;
634            xc->%s(this, %d, final_val);\n
635            if (traceData) { traceData->setData(final_val); }
636        }''' % (final_ctype, final_val, func, self.dest_reg_idx)
637        return wb
638
639class ControlRegOperand(Operand):
640    def isReg(self):
641        return 1
642
643    def isControlReg(self):
644        return 1
645
646    def makeConstructor(self):
647        c = ''
648        if self.is_src:
649            c += '\n\t_srcRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \
650                 (self.src_reg_idx, self.reg_spec)
651        if self.is_dest:
652            c += '\n\t_destRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \
653                 (self.dest_reg_idx, self.reg_spec)
654        return c
655
656    def makeRead(self):
657        bit_select = 0
658        if (self.ctype == 'float' or self.ctype == 'double'):
659            error('Attempt to read control register as FP')
660        if self.read_code != None:
661            return self.buildReadCode('readMiscRegOperand')
662        base = 'xc->readMiscRegOperand(this, %s)' % self.src_reg_idx
663        if self.size == self.dflt_size:
664            return '%s = %s;\n' % (self.base_name, base)
665        else:
666            return '%s = bits(%s, %d, 0);\n' % \
667                   (self.base_name, base, self.size-1)
668
669    def makeWrite(self):
670        if (self.ctype == 'float' or self.ctype == 'double'):
671            error('Attempt to write control register as FP')
672        if self.write_code != None:
673            return self.buildWriteCode('setMiscRegOperand')
674        wb = 'xc->setMiscRegOperand(this, %s, %s);\n' % \
675             (self.dest_reg_idx, self.base_name)
676        wb += 'if (traceData) { traceData->setData(%s); }' % \
677              self.base_name
678        return wb
679
680class MemOperand(Operand):
681    def isMem(self):
682        return 1
683
684    def makeConstructor(self):
685        return ''
686
687    def makeDecl(self):
688        # Note that initializations in the declarations are solely
689        # to avoid 'uninitialized variable' errors from the compiler.
690        # Declare memory data variable.
691        if self.ctype in ['Twin32_t','Twin64_t']:
692            return "%s %s; %s.a = 0; %s.b = 0;\n" % \
693                   (self.ctype, self.base_name, self.base_name, self.base_name)
694        return '%s %s = 0;\n' % (self.ctype, self.base_name)
695
696    def makeRead(self):
697        if self.read_code != None:
698            return self.buildReadCode()
699        return ''
700
701    def makeWrite(self):
702        if self.write_code != None:
703            return self.buildWriteCode()
704        return ''
705
706    # Return the memory access size *in bits*, suitable for
707    # forming a type via "uint%d_t".  Divide by 8 if you want bytes.
708    def makeAccSize(self):
709        return self.size
710
711class PCStateOperand(Operand):
712    def makeConstructor(self):
713        return ''
714
715    def makeRead(self):
716        if self.reg_spec:
717            # A component of the PC state.
718            return '%s = __parserAutoPCState.%s();\n' % \
719                (self.base_name, self.reg_spec)
720        else:
721            # The whole PC state itself.
722            return '%s = xc->pcState();\n' % self.base_name
723
724    def makeWrite(self):
725        if self.reg_spec:
726            # A component of the PC state.
727            return '__parserAutoPCState.%s(%s);\n' % \
728                (self.reg_spec, self.base_name)
729        else:
730            # The whole PC state itself.
731            return 'xc->pcState(%s);\n' % self.base_name
732
733    def makeDecl(self):
734        ctype = 'TheISA::PCState'
735        if self.isPCPart():
736            ctype = self.ctype
737        return "%s %s;\n" % (ctype, self.base_name)
738
739    def isPCState(self):
740        return 1
741
742class OperandList(object):
743    '''Find all the operands in the given code block.  Returns an operand
744    descriptor list (instance of class OperandList).'''
745    def __init__(self, parser, code):
746        self.items = []
747        self.bases = {}
748        # delete comments so we don't match on reg specifiers inside
749        code = commentRE.sub('', code)
750        # search for operands
751        next_pos = 0
752        while 1:
753            match = parser.operandsRE.search(code, next_pos)
754            if not match:
755                # no more matches: we're done
756                break
757            op = match.groups()
758            # regexp groups are operand full name, base, and extension
759            (op_full, op_base, op_ext) = op
760            # if the token following the operand is an assignment, this is
761            # a destination (LHS), else it's a source (RHS)
762            is_dest = (assignRE.match(code, match.end()) != None)
763            is_src = not is_dest
764            # see if we've already seen this one
765            op_desc = self.find_base(op_base)
766            if op_desc:
767                if op_desc.ext != op_ext:
768                    error('Inconsistent extensions for operand %s' % \
769                          op_base)
770                op_desc.is_src = op_desc.is_src or is_src
771                op_desc.is_dest = op_desc.is_dest or is_dest
772            else:
773                # new operand: create new descriptor
774                op_desc = parser.operandNameMap[op_base](parser,
775                    op_full, op_ext, is_src, is_dest)
776                self.append(op_desc)
777            # start next search after end of current match
778            next_pos = match.end()
779        self.sort()
780        # enumerate source & dest register operands... used in building
781        # constructor later
782        self.numSrcRegs = 0
783        self.numDestRegs = 0
784        self.numFPDestRegs = 0
785        self.numIntDestRegs = 0
786        self.memOperand = None
787        for op_desc in self.items:
788            if op_desc.isReg():
789                if op_desc.is_src:
790                    op_desc.src_reg_idx = self.numSrcRegs
791                    self.numSrcRegs += 1
792                if op_desc.is_dest:
793                    op_desc.dest_reg_idx = self.numDestRegs
794                    self.numDestRegs += 1
795                    if op_desc.isFloatReg():
796                        self.numFPDestRegs += 1
797                    elif op_desc.isIntReg():
798                        self.numIntDestRegs += 1
799            elif op_desc.isMem():
800                if self.memOperand:
801                    error("Code block has more than one memory operand.")
802                self.memOperand = op_desc
803        if parser.maxInstSrcRegs < self.numSrcRegs:
804            parser.maxInstSrcRegs = self.numSrcRegs
805        if parser.maxInstDestRegs < self.numDestRegs:
806            parser.maxInstDestRegs = self.numDestRegs
807        # now make a final pass to finalize op_desc fields that may depend
808        # on the register enumeration
809        for op_desc in self.items:
810            op_desc.finalize()
811
812    def __len__(self):
813        return len(self.items)
814
815    def __getitem__(self, index):
816        return self.items[index]
817
818    def append(self, op_desc):
819        self.items.append(op_desc)
820        self.bases[op_desc.base_name] = op_desc
821
822    def find_base(self, base_name):
823        # like self.bases[base_name], but returns None if not found
824        # (rather than raising exception)
825        return self.bases.get(base_name)
826
827    # internal helper function for concat[Some]Attr{Strings|Lists}
828    def __internalConcatAttrs(self, attr_name, filter, result):
829        for op_desc in self.items:
830            if filter(op_desc):
831                result += getattr(op_desc, attr_name)
832        return result
833
834    # return a single string that is the concatenation of the (string)
835    # values of the specified attribute for all operands
836    def concatAttrStrings(self, attr_name):
837        return self.__internalConcatAttrs(attr_name, lambda x: 1, '')
838
839    # like concatAttrStrings, but only include the values for the operands
840    # for which the provided filter function returns true
841    def concatSomeAttrStrings(self, filter, attr_name):
842        return self.__internalConcatAttrs(attr_name, filter, '')
843
844    # return a single list that is the concatenation of the (list)
845    # values of the specified attribute for all operands
846    def concatAttrLists(self, attr_name):
847        return self.__internalConcatAttrs(attr_name, lambda x: 1, [])
848
849    # like concatAttrLists, but only include the values for the operands
850    # for which the provided filter function returns true
851    def concatSomeAttrLists(self, filter, attr_name):
852        return self.__internalConcatAttrs(attr_name, filter, [])
853
854    def sort(self):
855        self.items.sort(lambda a, b: a.sort_pri - b.sort_pri)
856
857class SubOperandList(OperandList):
858    '''Find all the operands in the given code block.  Returns an operand
859    descriptor list (instance of class OperandList).'''
860    def __init__(self, parser, code, master_list):
861        self.items = []
862        self.bases = {}
863        # delete comments so we don't match on reg specifiers inside
864        code = commentRE.sub('', code)
865        # search for operands
866        next_pos = 0
867        while 1:
868            match = parser.operandsRE.search(code, next_pos)
869            if not match:
870                # no more matches: we're done
871                break
872            op = match.groups()
873            # regexp groups are operand full name, base, and extension
874            (op_full, op_base, op_ext) = op
875            # find this op in the master list
876            op_desc = master_list.find_base(op_base)
877            if not op_desc:
878                error('Found operand %s which is not in the master list!' \
879                      ' This is an internal error' % op_base)
880            else:
881                # See if we've already found this operand
882                op_desc = self.find_base(op_base)
883                if not op_desc:
884                    # if not, add a reference to it to this sub list
885                    self.append(master_list.bases[op_base])
886
887            # start next search after end of current match
888            next_pos = match.end()
889        self.sort()
890        self.memOperand = None
891        # Whether the whole PC needs to be read so parts of it can be accessed
892        self.readPC = False
893        # Whether the whole PC needs to be written after parts of it were
894        # changed
895        self.setPC = False
896        # Whether this instruction manipulates the whole PC or parts of it.
897        # Mixing the two is a bad idea and flagged as an error.
898        self.pcPart = None
899        for op_desc in self.items:
900            if op_desc.isPCPart():
901                self.readPC = True
902                if op_desc.is_dest:
903                    self.setPC = True
904            if op_desc.isPCState():
905                if self.pcPart is not None:
906                    if self.pcPart and not op_desc.isPCPart() or \
907                            not self.pcPart and op_desc.isPCPart():
908                        error("Mixed whole and partial PC state operands.")
909                self.pcPart = op_desc.isPCPart()
910            if op_desc.isMem():
911                if self.memOperand:
912                    error("Code block has more than one memory operand.")
913                self.memOperand = op_desc
914
915# Regular expression object to match C++ comments
916# (used in findOperands())
917commentRE = re.compile(r'//.*\n')
918
919# Regular expression object to match assignment statements
920# (used in findOperands())
921assignRE = re.compile(r'\s*=(?!=)', re.MULTILINE)
922
923def makeFlagConstructor(flag_list):
924    if len(flag_list) == 0:
925        return ''
926    # filter out repeated flags
927    flag_list.sort()
928    i = 1
929    while i < len(flag_list):
930        if flag_list[i] == flag_list[i-1]:
931            del flag_list[i]
932        else:
933            i += 1
934    pre = '\n\tflags['
935    post = '] = true;'
936    code = pre + string.join(flag_list, post + pre) + post
937    return code
938
939# Assume all instruction flags are of the form 'IsFoo'
940instFlagRE = re.compile(r'Is.*')
941
942# OpClass constants end in 'Op' except No_OpClass
943opClassRE = re.compile(r'.*Op|No_OpClass')
944
945class InstObjParams(object):
946    def __init__(self, parser, mnem, class_name, base_class = '',
947                 snippets = {}, opt_args = []):
948        self.mnemonic = mnem
949        self.class_name = class_name
950        self.base_class = base_class
951        if not isinstance(snippets, dict):
952            snippets = {'code' : snippets}
953        compositeCode = ' '.join(map(str, snippets.values()))
954        self.snippets = snippets
955
956        self.operands = OperandList(parser, compositeCode)
957        self.constructor = self.operands.concatAttrStrings('constructor')
958        self.constructor += \
959                 '\n\t_numSrcRegs = %d;' % self.operands.numSrcRegs
960        self.constructor += \
961                 '\n\t_numDestRegs = %d;' % self.operands.numDestRegs
962        self.constructor += \
963                 '\n\t_numFPDestRegs = %d;' % self.operands.numFPDestRegs
964        self.constructor += \
965                 '\n\t_numIntDestRegs = %d;' % self.operands.numIntDestRegs
966        self.flags = self.operands.concatAttrLists('flags')
967
968        # Make a basic guess on the operand class (function unit type).
969        # These are good enough for most cases, and can be overridden
970        # later otherwise.
971        if 'IsStore' in self.flags:
972            self.op_class = 'MemWriteOp'
973        elif 'IsLoad' in self.flags or 'IsPrefetch' in self.flags:
974            self.op_class = 'MemReadOp'
975        elif 'IsFloating' in self.flags:
976            self.op_class = 'FloatAddOp'
977        else:
978            self.op_class = 'IntAluOp'
979
980        # Optional arguments are assumed to be either StaticInst flags
981        # or an OpClass value.  To avoid having to import a complete
982        # list of these values to match against, we do it ad-hoc
983        # with regexps.
984        for oa in opt_args:
985            if instFlagRE.match(oa):
986                self.flags.append(oa)
987            elif opClassRE.match(oa):
988                self.op_class = oa
989            else:
990                error('InstObjParams: optional arg "%s" not recognized '
991                      'as StaticInst::Flag or OpClass.' % oa)
992
993        # add flag initialization to contructor here to include
994        # any flags added via opt_args
995        self.constructor += makeFlagConstructor(self.flags)
996
997        # if 'IsFloating' is set, add call to the FP enable check
998        # function (which should be provided by isa_desc via a declare)
999        if 'IsFloating' in self.flags:
1000            self.fp_enable_check = 'fault = checkFpEnableFault(xc);'
1001        else:
1002            self.fp_enable_check = ''
1003
1004##############
1005# Stack: a simple stack object.  Used for both formats (formatStack)
1006# and default cases (defaultStack).  Simply wraps a list to give more
1007# stack-like syntax and enable initialization with an argument list
1008# (as opposed to an argument that's a list).
1009
1010class Stack(list):
1011    def __init__(self, *items):
1012        list.__init__(self, items)
1013
1014    def push(self, item):
1015        self.append(item);
1016
1017    def top(self):
1018        return self[-1]
1019
1020#######################
1021#
1022# Output file template
1023#
1024
1025file_template = '''
1026/*
1027 * DO NOT EDIT THIS FILE!!!
1028 *
1029 * It was automatically generated from the ISA description in %(filename)s
1030 */
1031
1032%(includes)s
1033
1034%(global_output)s
1035
1036namespace %(namespace)s {
1037
1038%(namespace_output)s
1039
1040} // namespace %(namespace)s
1041
1042%(decode_function)s
1043'''
1044
1045max_inst_regs_template = '''
1046/*
1047 * DO NOT EDIT THIS FILE!!!
1048 *
1049 * It was automatically generated from the ISA description in %(filename)s
1050 */
1051
1052namespace %(namespace)s {
1053
1054    const int MaxInstSrcRegs = %(MaxInstSrcRegs)d;
1055    const int MaxInstDestRegs = %(MaxInstDestRegs)d;
1056
1057} // namespace %(namespace)s
1058
1059'''
1060
1061class ISAParser(Grammar):
1062    def __init__(self, output_dir, cpu_models):
1063        super(ISAParser, self).__init__()
1064        self.output_dir = output_dir
1065
1066        self.cpuModels = cpu_models
1067
1068        # variable to hold templates
1069        self.templateMap = {}
1070
1071        # This dictionary maps format name strings to Format objects.
1072        self.formatMap = {}
1073
1074        # The format stack.
1075        self.formatStack = Stack(NoFormat())
1076
1077        # The default case stack.
1078        self.defaultStack = Stack(None)
1079
1080        # Stack that tracks current file and line number.  Each
1081        # element is a tuple (filename, lineno) that records the
1082        # *current* filename and the line number in the *previous*
1083        # file where it was included.
1084        self.fileNameStack = Stack()
1085
1086        symbols = ('makeList', 're', 'string')
1087        self.exportContext = dict([(s, eval(s)) for s in symbols])
1088
1089        self.maxInstSrcRegs = 0
1090        self.maxInstDestRegs = 0
1091
1092    #####################################################################
1093    #
1094    #                                Lexer
1095    #
1096    # The PLY lexer module takes two things as input:
1097    # - A list of token names (the string list 'tokens')
1098    # - A regular expression describing a match for each token.  The
1099    #   regexp for token FOO can be provided in two ways:
1100    #   - as a string variable named t_FOO
1101    #   - as the doc string for a function named t_FOO.  In this case,
1102    #     the function is also executed, allowing an action to be
1103    #     associated with each token match.
1104    #
1105    #####################################################################
1106
1107    # Reserved words.  These are listed separately as they are matched
1108    # using the same regexp as generic IDs, but distinguished in the
1109    # t_ID() function.  The PLY documentation suggests this approach.
1110    reserved = (
1111        'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT',
1112        'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS',
1113        'OUTPUT', 'SIGNED', 'TEMPLATE'
1114        )
1115
1116    # List of tokens.  The lex module requires this.
1117    tokens = reserved + (
1118        # identifier
1119        'ID',
1120
1121        # integer literal
1122        'INTLIT',
1123
1124        # string literal
1125        'STRLIT',
1126
1127        # code literal
1128        'CODELIT',
1129
1130        # ( ) [ ] { } < > , ; . : :: *
1131        'LPAREN', 'RPAREN',
1132        'LBRACKET', 'RBRACKET',
1133        'LBRACE', 'RBRACE',
1134        'LESS', 'GREATER', 'EQUALS',
1135        'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON',
1136        'ASTERISK',
1137
1138        # C preprocessor directives
1139        'CPPDIRECTIVE'
1140
1141    # The following are matched but never returned. commented out to
1142    # suppress PLY warning
1143        # newfile directive
1144    #    'NEWFILE',
1145
1146        # endfile directive
1147    #    'ENDFILE'
1148    )
1149
1150    # Regular expressions for token matching
1151    t_LPAREN           = r'\('
1152    t_RPAREN           = r'\)'
1153    t_LBRACKET         = r'\['
1154    t_RBRACKET         = r'\]'
1155    t_LBRACE           = r'\{'
1156    t_RBRACE           = r'\}'
1157    t_LESS             = r'\<'
1158    t_GREATER          = r'\>'
1159    t_EQUALS           = r'='
1160    t_COMMA            = r','
1161    t_SEMI             = r';'
1162    t_DOT              = r'\.'
1163    t_COLON            = r':'
1164    t_DBLCOLON         = r'::'
1165    t_ASTERISK         = r'\*'
1166
1167    # Identifiers and reserved words
1168    reserved_map = { }
1169    for r in reserved:
1170        reserved_map[r.lower()] = r
1171
1172    def t_ID(self, t):
1173        r'[A-Za-z_]\w*'
1174        t.type = self.reserved_map.get(t.value, 'ID')
1175        return t
1176
1177    # Integer literal
1178    def t_INTLIT(self, t):
1179        r'-?(0x[\da-fA-F]+)|\d+'
1180        try:
1181            t.value = int(t.value,0)
1182        except ValueError:
1183            error(t, 'Integer value "%s" too large' % t.value)
1184            t.value = 0
1185        return t
1186
1187    # String literal.  Note that these use only single quotes, and
1188    # can span multiple lines.
1189    def t_STRLIT(self, t):
1190        r"(?m)'([^'])+'"
1191        # strip off quotes
1192        t.value = t.value[1:-1]
1193        t.lexer.lineno += t.value.count('\n')
1194        return t
1195
1196
1197    # "Code literal"... like a string literal, but delimiters are
1198    # '{{' and '}}' so they get formatted nicely under emacs c-mode
1199    def t_CODELIT(self, t):
1200        r"(?m)\{\{([^\}]|}(?!\}))+\}\}"
1201        # strip off {{ & }}
1202        t.value = t.value[2:-2]
1203        t.lexer.lineno += t.value.count('\n')
1204        return t
1205
1206    def t_CPPDIRECTIVE(self, t):
1207        r'^\#[^\#].*\n'
1208        t.lexer.lineno += t.value.count('\n')
1209        return t
1210
1211    def t_NEWFILE(self, t):
1212        r'^\#\#newfile\s+"[\w/.-]*"'
1213        self.fileNameStack.push((t.value[11:-1], t.lexer.lineno))
1214        t.lexer.lineno = 0
1215
1216    def t_ENDFILE(self, t):
1217        r'^\#\#endfile'
1218        (old_filename, t.lexer.lineno) = self.fileNameStack.pop()
1219
1220    #
1221    # The functions t_NEWLINE, t_ignore, and t_error are
1222    # special for the lex module.
1223    #
1224
1225    # Newlines
1226    def t_NEWLINE(self, t):
1227        r'\n+'
1228        t.lexer.lineno += t.value.count('\n')
1229
1230    # Comments
1231    def t_comment(self, t):
1232        r'//.*'
1233
1234    # Completely ignored characters
1235    t_ignore = ' \t\x0c'
1236
1237    # Error handler
1238    def t_error(self, t):
1239        error(t, "illegal character '%s'" % t.value[0])
1240        t.skip(1)
1241
1242    #####################################################################
1243    #
1244    #                                Parser
1245    #
1246    # Every function whose name starts with 'p_' defines a grammar
1247    # rule.  The rule is encoded in the function's doc string, while
1248    # the function body provides the action taken when the rule is
1249    # matched.  The argument to each function is a list of the values
1250    # of the rule's symbols: t[0] for the LHS, and t[1..n] for the
1251    # symbols on the RHS.  For tokens, the value is copied from the
1252    # t.value attribute provided by the lexer.  For non-terminals, the
1253    # value is assigned by the producing rule; i.e., the job of the
1254    # grammar rule function is to set the value for the non-terminal
1255    # on the LHS (by assigning to t[0]).
1256    #####################################################################
1257
1258    # The LHS of the first grammar rule is used as the start symbol
1259    # (in this case, 'specification').  Note that this rule enforces
1260    # that there will be exactly one namespace declaration, with 0 or
1261    # more global defs/decls before and after it.  The defs & decls
1262    # before the namespace decl will be outside the namespace; those
1263    # after will be inside.  The decoder function is always inside the
1264    # namespace.
1265    def p_specification(self, t):
1266        'specification : opt_defs_and_outputs name_decl opt_defs_and_outputs decode_block'
1267        global_code = t[1]
1268        isa_name = t[2]
1269        namespace = isa_name + "Inst"
1270        # wrap the decode block as a function definition
1271        t[4].wrap_decode_block('''
1272StaticInstPtr
1273%(isa_name)s::decodeInst(%(isa_name)s::ExtMachInst machInst)
1274{
1275    using namespace %(namespace)s;
1276''' % vars(), '}')
1277        # both the latter output blocks and the decode block are in
1278        # the namespace
1279        namespace_code = t[3] + t[4]
1280        # pass it all back to the caller of yacc.parse()
1281        t[0] = (isa_name, namespace, global_code, namespace_code)
1282
1283    # ISA name declaration looks like "namespace <foo>;"
1284    def p_name_decl(self, t):
1285        'name_decl : NAMESPACE ID SEMI'
1286        t[0] = t[2]
1287
1288    # 'opt_defs_and_outputs' is a possibly empty sequence of
1289    # def and/or output statements.
1290    def p_opt_defs_and_outputs_0(self, t):
1291        'opt_defs_and_outputs : empty'
1292        t[0] = GenCode(self)
1293
1294    def p_opt_defs_and_outputs_1(self, t):
1295        'opt_defs_and_outputs : defs_and_outputs'
1296        t[0] = t[1]
1297
1298    def p_defs_and_outputs_0(self, t):
1299        'defs_and_outputs : def_or_output'
1300        t[0] = t[1]
1301
1302    def p_defs_and_outputs_1(self, t):
1303        'defs_and_outputs : defs_and_outputs def_or_output'
1304        t[0] = t[1] + t[2]
1305
1306    # The list of possible definition/output statements.
1307    def p_def_or_output(self, t):
1308        '''def_or_output : def_format
1309                         | def_bitfield
1310                         | def_bitfield_struct
1311                         | def_template
1312                         | def_operand_types
1313                         | def_operands
1314                         | output_header
1315                         | output_decoder
1316                         | output_exec
1317                         | global_let'''
1318        t[0] = t[1]
1319
1320    # Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied
1321    # directly to the appropriate output section.
1322
1323    # Massage output block by substituting in template definitions and
1324    # bit operators.  We handle '%'s embedded in the string that don't
1325    # indicate template substitutions (or CPU-specific symbols, which
1326    # get handled in GenCode) by doubling them first so that the
1327    # format operation will reduce them back to single '%'s.
1328    def process_output(self, s):
1329        s = self.protectNonSubstPercents(s)
1330        # protects cpu-specific symbols too
1331        s = self.protectCpuSymbols(s)
1332        return substBitOps(s % self.templateMap)
1333
1334    def p_output_header(self, t):
1335        'output_header : OUTPUT HEADER CODELIT SEMI'
1336        t[0] = GenCode(self, header_output = self.process_output(t[3]))
1337
1338    def p_output_decoder(self, t):
1339        'output_decoder : OUTPUT DECODER CODELIT SEMI'
1340        t[0] = GenCode(self, decoder_output = self.process_output(t[3]))
1341
1342    def p_output_exec(self, t):
1343        'output_exec : OUTPUT EXEC CODELIT SEMI'
1344        t[0] = GenCode(self, exec_output = self.process_output(t[3]))
1345
1346    # global let blocks 'let {{...}}' (Python code blocks) are
1347    # executed directly when seen.  Note that these execute in a
1348    # special variable context 'exportContext' to prevent the code
1349    # from polluting this script's namespace.
1350    def p_global_let(self, t):
1351        'global_let : LET CODELIT SEMI'
1352        self.updateExportContext()
1353        self.exportContext["header_output"] = ''
1354        self.exportContext["decoder_output"] = ''
1355        self.exportContext["exec_output"] = ''
1356        self.exportContext["decode_block"] = ''
1357        try:
1358            exec fixPythonIndentation(t[2]) in self.exportContext
1359        except Exception, exc:
1360            if debug:
1361                raise
1362            error(t, 'error: %s in global let block "%s".' % (exc, t[2]))
1363        t[0] = GenCode(self,
1364                       header_output=self.exportContext["header_output"],
1365                       decoder_output=self.exportContext["decoder_output"],
1366                       exec_output=self.exportContext["exec_output"],
1367                       decode_block=self.exportContext["decode_block"])
1368
1369    # Define the mapping from operand type extensions to C++ types and
1370    # bit widths (stored in operandTypeMap).
1371    def p_def_operand_types(self, t):
1372        'def_operand_types : DEF OPERAND_TYPES CODELIT SEMI'
1373        try:
1374            user_dict = eval('{' + t[3] + '}')
1375        except Exception, exc:
1376            if debug:
1377                raise
1378            error(t,
1379                  'error: %s in def operand_types block "%s".' % (exc, t[3]))
1380        self.buildOperandTypeMap(user_dict, t.lexer.lineno)
1381        t[0] = GenCode(self) # contributes nothing to the output C++ file
1382
1383    # Define the mapping from operand names to operand classes and
1384    # other traits.  Stored in operandNameMap.
1385    def p_def_operands(self, t):
1386        'def_operands : DEF OPERANDS CODELIT SEMI'
1387        if not hasattr(self, 'operandTypeMap'):
1388            error(t, 'error: operand types must be defined before operands')
1389        try:
1390            user_dict = eval('{' + t[3] + '}', self.exportContext)
1391        except Exception, exc:
1392            if debug:
1393                raise
1394            error(t, 'error: %s in def operands block "%s".' % (exc, t[3]))
1395        self.buildOperandNameMap(user_dict, t.lexer.lineno)
1396        t[0] = GenCode(self) # contributes nothing to the output C++ file
1397
1398    # A bitfield definition looks like:
1399    # 'def [signed] bitfield <ID> [<first>:<last>]'
1400    # This generates a preprocessor macro in the output file.
1401    def p_def_bitfield_0(self, t):
1402        'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI'
1403        expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8])
1404        if (t[2] == 'signed'):
1405            expr = 'sext<%d>(%s)' % (t[6] - t[8] + 1, expr)
1406        hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1407        t[0] = GenCode(self, header_output=hash_define)
1408
1409    # alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]'
1410    def p_def_bitfield_1(self, t):
1411        'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI'
1412        expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6])
1413        if (t[2] == 'signed'):
1414            expr = 'sext<%d>(%s)' % (1, expr)
1415        hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1416        t[0] = GenCode(self, header_output=hash_define)
1417
1418    # alternate form for structure member: 'def bitfield <ID> <ID>'
1419    def p_def_bitfield_struct(self, t):
1420        'def_bitfield_struct : DEF opt_signed BITFIELD ID id_with_dot SEMI'
1421        if (t[2] != ''):
1422            error(t, 'error: structure bitfields are always unsigned.')
1423        expr = 'machInst.%s' % t[5]
1424        hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1425        t[0] = GenCode(self, header_output=hash_define)
1426
1427    def p_id_with_dot_0(self, t):
1428        'id_with_dot : ID'
1429        t[0] = t[1]
1430
1431    def p_id_with_dot_1(self, t):
1432        'id_with_dot : ID DOT id_with_dot'
1433        t[0] = t[1] + t[2] + t[3]
1434
1435    def p_opt_signed_0(self, t):
1436        'opt_signed : SIGNED'
1437        t[0] = t[1]
1438
1439    def p_opt_signed_1(self, t):
1440        'opt_signed : empty'
1441        t[0] = ''
1442
1443    def p_def_template(self, t):
1444        'def_template : DEF TEMPLATE ID CODELIT SEMI'
1445        self.templateMap[t[3]] = Template(self, t[4])
1446        t[0] = GenCode(self)
1447
1448    # An instruction format definition looks like
1449    # "def format <fmt>(<params>) {{...}};"
1450    def p_def_format(self, t):
1451        'def_format : DEF FORMAT ID LPAREN param_list RPAREN CODELIT SEMI'
1452        (id, params, code) = (t[3], t[5], t[7])
1453        self.defFormat(id, params, code, t.lexer.lineno)
1454        t[0] = GenCode(self)
1455
1456    # The formal parameter list for an instruction format is a
1457    # possibly empty list of comma-separated parameters.  Positional
1458    # (standard, non-keyword) parameters must come first, followed by
1459    # keyword parameters, followed by a '*foo' parameter that gets
1460    # excess positional arguments (as in Python).  Each of these three
1461    # parameter categories is optional.
1462    #
1463    # Note that we do not support the '**foo' parameter for collecting
1464    # otherwise undefined keyword args.  Otherwise the parameter list
1465    # is (I believe) identical to what is supported in Python.
1466    #
1467    # The param list generates a tuple, where the first element is a
1468    # list of the positional params and the second element is a dict
1469    # containing the keyword params.
1470    def p_param_list_0(self, t):
1471        'param_list : positional_param_list COMMA nonpositional_param_list'
1472        t[0] = t[1] + t[3]
1473
1474    def p_param_list_1(self, t):
1475        '''param_list : positional_param_list
1476                      | nonpositional_param_list'''
1477        t[0] = t[1]
1478
1479    def p_positional_param_list_0(self, t):
1480        'positional_param_list : empty'
1481        t[0] = []
1482
1483    def p_positional_param_list_1(self, t):
1484        'positional_param_list : ID'
1485        t[0] = [t[1]]
1486
1487    def p_positional_param_list_2(self, t):
1488        'positional_param_list : positional_param_list COMMA ID'
1489        t[0] = t[1] + [t[3]]
1490
1491    def p_nonpositional_param_list_0(self, t):
1492        'nonpositional_param_list : keyword_param_list COMMA excess_args_param'
1493        t[0] = t[1] + t[3]
1494
1495    def p_nonpositional_param_list_1(self, t):
1496        '''nonpositional_param_list : keyword_param_list
1497                                    | excess_args_param'''
1498        t[0] = t[1]
1499
1500    def p_keyword_param_list_0(self, t):
1501        'keyword_param_list : keyword_param'
1502        t[0] = [t[1]]
1503
1504    def p_keyword_param_list_1(self, t):
1505        'keyword_param_list : keyword_param_list COMMA keyword_param'
1506        t[0] = t[1] + [t[3]]
1507
1508    def p_keyword_param(self, t):
1509        'keyword_param : ID EQUALS expr'
1510        t[0] = t[1] + ' = ' + t[3].__repr__()
1511
1512    def p_excess_args_param(self, t):
1513        'excess_args_param : ASTERISK ID'
1514        # Just concatenate them: '*ID'.  Wrap in list to be consistent
1515        # with positional_param_list and keyword_param_list.
1516        t[0] = [t[1] + t[2]]
1517
1518    # End of format definition-related rules.
1519    ##############
1520
1521    #
1522    # A decode block looks like:
1523    #       decode <field1> [, <field2>]* [default <inst>] { ... }
1524    #
1525    def p_decode_block(self, t):
1526        'decode_block : DECODE ID opt_default LBRACE decode_stmt_list RBRACE'
1527        default_defaults = self.defaultStack.pop()
1528        codeObj = t[5]
1529        # use the "default defaults" only if there was no explicit
1530        # default statement in decode_stmt_list
1531        if not codeObj.has_decode_default:
1532            codeObj += default_defaults
1533        codeObj.wrap_decode_block('switch (%s) {\n' % t[2], '}\n')
1534        t[0] = codeObj
1535
1536    # The opt_default statement serves only to push the "default
1537    # defaults" onto defaultStack.  This value will be used by nested
1538    # decode blocks, and used and popped off when the current
1539    # decode_block is processed (in p_decode_block() above).
1540    def p_opt_default_0(self, t):
1541        'opt_default : empty'
1542        # no default specified: reuse the one currently at the top of
1543        # the stack
1544        self.defaultStack.push(self.defaultStack.top())
1545        # no meaningful value returned
1546        t[0] = None
1547
1548    def p_opt_default_1(self, t):
1549        'opt_default : DEFAULT inst'
1550        # push the new default
1551        codeObj = t[2]
1552        codeObj.wrap_decode_block('\ndefault:\n', 'break;\n')
1553        self.defaultStack.push(codeObj)
1554        # no meaningful value returned
1555        t[0] = None
1556
1557    def p_decode_stmt_list_0(self, t):
1558        'decode_stmt_list : decode_stmt'
1559        t[0] = t[1]
1560
1561    def p_decode_stmt_list_1(self, t):
1562        'decode_stmt_list : decode_stmt decode_stmt_list'
1563        if (t[1].has_decode_default and t[2].has_decode_default):
1564            error(t, 'Two default cases in decode block')
1565        t[0] = t[1] + t[2]
1566
1567    #
1568    # Decode statement rules
1569    #
1570    # There are four types of statements allowed in a decode block:
1571    # 1. Format blocks 'format <foo> { ... }'
1572    # 2. Nested decode blocks
1573    # 3. Instruction definitions.
1574    # 4. C preprocessor directives.
1575
1576
1577    # Preprocessor directives found in a decode statement list are
1578    # passed through to the output, replicated to all of the output
1579    # code streams.  This works well for ifdefs, so we can ifdef out
1580    # both the declarations and the decode cases generated by an
1581    # instruction definition.  Handling them as part of the grammar
1582    # makes it easy to keep them in the right place with respect to
1583    # the code generated by the other statements.
1584    def p_decode_stmt_cpp(self, t):
1585        'decode_stmt : CPPDIRECTIVE'
1586        t[0] = GenCode(self, t[1], t[1], t[1], t[1])
1587
1588    # A format block 'format <foo> { ... }' sets the default
1589    # instruction format used to handle instruction definitions inside
1590    # the block.  This format can be overridden by using an explicit
1591    # format on the instruction definition or with a nested format
1592    # block.
1593    def p_decode_stmt_format(self, t):
1594        'decode_stmt : FORMAT push_format_id LBRACE decode_stmt_list RBRACE'
1595        # The format will be pushed on the stack when 'push_format_id'
1596        # is processed (see below).  Once the parser has recognized
1597        # the full production (though the right brace), we're done
1598        # with the format, so now we can pop it.
1599        self.formatStack.pop()
1600        t[0] = t[4]
1601
1602    # This rule exists so we can set the current format (& push the
1603    # stack) when we recognize the format name part of the format
1604    # block.
1605    def p_push_format_id(self, t):
1606        'push_format_id : ID'
1607        try:
1608            self.formatStack.push(self.formatMap[t[1]])
1609            t[0] = ('', '// format %s' % t[1])
1610        except KeyError:
1611            error(t, 'instruction format "%s" not defined.' % t[1])
1612
1613    # Nested decode block: if the value of the current field matches
1614    # the specified constant, do a nested decode on some other field.
1615    def p_decode_stmt_decode(self, t):
1616        'decode_stmt : case_label COLON decode_block'
1617        label = t[1]
1618        codeObj = t[3]
1619        # just wrap the decoding code from the block as a case in the
1620        # outer switch statement.
1621        codeObj.wrap_decode_block('\n%s:\n' % label)
1622        codeObj.has_decode_default = (label == 'default')
1623        t[0] = codeObj
1624
1625    # Instruction definition (finally!).
1626    def p_decode_stmt_inst(self, t):
1627        'decode_stmt : case_label COLON inst SEMI'
1628        label = t[1]
1629        codeObj = t[3]
1630        codeObj.wrap_decode_block('\n%s:' % label, 'break;\n')
1631        codeObj.has_decode_default = (label == 'default')
1632        t[0] = codeObj
1633
1634    # The case label is either a list of one or more constants or
1635    # 'default'
1636    def p_case_label_0(self, t):
1637        'case_label : intlit_list'
1638        def make_case(intlit):
1639            if intlit >= 2**32:
1640                return 'case ULL(%#x)' % intlit
1641            else:
1642                return 'case %#x' % intlit
1643        t[0] = ': '.join(map(make_case, t[1]))
1644
1645    def p_case_label_1(self, t):
1646        'case_label : DEFAULT'
1647        t[0] = 'default'
1648
1649    #
1650    # The constant list for a decode case label must be non-empty, but
1651    # may have one or more comma-separated integer literals in it.
1652    #
1653    def p_intlit_list_0(self, t):
1654        'intlit_list : INTLIT'
1655        t[0] = [t[1]]
1656
1657    def p_intlit_list_1(self, t):
1658        'intlit_list : intlit_list COMMA INTLIT'
1659        t[0] = t[1]
1660        t[0].append(t[3])
1661
1662    # Define an instruction using the current instruction format
1663    # (specified by an enclosing format block).
1664    # "<mnemonic>(<args>)"
1665    def p_inst_0(self, t):
1666        'inst : ID LPAREN arg_list RPAREN'
1667        # Pass the ID and arg list to the current format class to deal with.
1668        currentFormat = self.formatStack.top()
1669        codeObj = currentFormat.defineInst(self, t[1], t[3], t.lexer.lineno)
1670        args = ','.join(map(str, t[3]))
1671        args = re.sub('(?m)^', '//', args)
1672        args = re.sub('^//', '', args)
1673        comment = '\n// %s::%s(%s)\n' % (currentFormat.id, t[1], args)
1674        codeObj.prepend_all(comment)
1675        t[0] = codeObj
1676
1677    # Define an instruction using an explicitly specified format:
1678    # "<fmt>::<mnemonic>(<args>)"
1679    def p_inst_1(self, t):
1680        'inst : ID DBLCOLON ID LPAREN arg_list RPAREN'
1681        try:
1682            format = self.formatMap[t[1]]
1683        except KeyError:
1684            error(t, 'instruction format "%s" not defined.' % t[1])
1685
1686        codeObj = format.defineInst(self, t[3], t[5], t.lexer.lineno)
1687        comment = '\n// %s::%s(%s)\n' % (t[1], t[3], t[5])
1688        codeObj.prepend_all(comment)
1689        t[0] = codeObj
1690
1691    # The arg list generates a tuple, where the first element is a
1692    # list of the positional args and the second element is a dict
1693    # containing the keyword args.
1694    def p_arg_list_0(self, t):
1695        'arg_list : positional_arg_list COMMA keyword_arg_list'
1696        t[0] = ( t[1], t[3] )
1697
1698    def p_arg_list_1(self, t):
1699        'arg_list : positional_arg_list'
1700        t[0] = ( t[1], {} )
1701
1702    def p_arg_list_2(self, t):
1703        'arg_list : keyword_arg_list'
1704        t[0] = ( [], t[1] )
1705
1706    def p_positional_arg_list_0(self, t):
1707        'positional_arg_list : empty'
1708        t[0] = []
1709
1710    def p_positional_arg_list_1(self, t):
1711        'positional_arg_list : expr'
1712        t[0] = [t[1]]
1713
1714    def p_positional_arg_list_2(self, t):
1715        'positional_arg_list : positional_arg_list COMMA expr'
1716        t[0] = t[1] + [t[3]]
1717
1718    def p_keyword_arg_list_0(self, t):
1719        'keyword_arg_list : keyword_arg'
1720        t[0] = t[1]
1721
1722    def p_keyword_arg_list_1(self, t):
1723        'keyword_arg_list : keyword_arg_list COMMA keyword_arg'
1724        t[0] = t[1]
1725        t[0].update(t[3])
1726
1727    def p_keyword_arg(self, t):
1728        'keyword_arg : ID EQUALS expr'
1729        t[0] = { t[1] : t[3] }
1730
1731    #
1732    # Basic expressions.  These constitute the argument values of
1733    # "function calls" (i.e. instruction definitions in the decode
1734    # block) and default values for formal parameters of format
1735    # functions.
1736    #
1737    # Right now, these are either strings, integers, or (recursively)
1738    # lists of exprs (using Python square-bracket list syntax).  Note
1739    # that bare identifiers are trated as string constants here (since
1740    # there isn't really a variable namespace to refer to).
1741    #
1742    def p_expr_0(self, t):
1743        '''expr : ID
1744                | INTLIT
1745                | STRLIT
1746                | CODELIT'''
1747        t[0] = t[1]
1748
1749    def p_expr_1(self, t):
1750        '''expr : LBRACKET list_expr RBRACKET'''
1751        t[0] = t[2]
1752
1753    def p_list_expr_0(self, t):
1754        'list_expr : expr'
1755        t[0] = [t[1]]
1756
1757    def p_list_expr_1(self, t):
1758        'list_expr : list_expr COMMA expr'
1759        t[0] = t[1] + [t[3]]
1760
1761    def p_list_expr_2(self, t):
1762        'list_expr : empty'
1763        t[0] = []
1764
1765    #
1766    # Empty production... use in other rules for readability.
1767    #
1768    def p_empty(self, t):
1769        'empty :'
1770        pass
1771
1772    # Parse error handler.  Note that the argument here is the
1773    # offending *token*, not a grammar symbol (hence the need to use
1774    # t.value)
1775    def p_error(self, t):
1776        if t:
1777            error(t, "syntax error at '%s'" % t.value)
1778        else:
1779            error("unknown syntax error")
1780
1781    # END OF GRAMMAR RULES
1782
1783    def updateExportContext(self):
1784
1785        # create a continuation that allows us to grab the current parser
1786        def wrapInstObjParams(*args):
1787            return InstObjParams(self, *args)
1788        self.exportContext['InstObjParams'] = wrapInstObjParams
1789        self.exportContext.update(self.templateMap)
1790
1791    def defFormat(self, id, params, code, lineno):
1792        '''Define a new format'''
1793
1794        # make sure we haven't already defined this one
1795        if id in self.formatMap:
1796            error(lineno, 'format %s redefined.' % id)
1797
1798        # create new object and store in global map
1799        self.formatMap[id] = Format(id, params, code)
1800
1801    def expandCpuSymbolsToDict(self, template):
1802        '''Expand template with CPU-specific references into a
1803        dictionary with an entry for each CPU model name.  The entry
1804        key is the model name and the corresponding value is the
1805        template with the CPU-specific refs substituted for that
1806        model.'''
1807
1808        # Protect '%'s that don't go with CPU-specific terms
1809        t = re.sub(r'%(?!\(CPU_)', '%%', template)
1810        result = {}
1811        for cpu in self.cpuModels:
1812            result[cpu.name] = t % cpu.strings
1813        return result
1814
1815    def expandCpuSymbolsToString(self, template):
1816        '''*If* the template has CPU-specific references, return a
1817        single string containing a copy of the template for each CPU
1818        model with the corresponding values substituted in.  If the
1819        template has no CPU-specific references, it is returned
1820        unmodified.'''
1821
1822        if template.find('%(CPU_') != -1:
1823            return reduce(lambda x,y: x+y,
1824                          self.expandCpuSymbolsToDict(template).values())
1825        else:
1826            return template
1827
1828    def protectCpuSymbols(self, template):
1829        '''Protect CPU-specific references by doubling the
1830        corresponding '%'s (in preparation for substituting a different
1831        set of references into the template).'''
1832
1833        return re.sub(r'%(?=\(CPU_)', '%%', template)
1834
1835    def protectNonSubstPercents(self, s):
1836        '''Protect any non-dict-substitution '%'s in a format string
1837        (i.e. those not followed by '(')'''
1838
1839        return re.sub(r'%(?!\()', '%%', s)
1840
1841    def buildOperandTypeMap(self, user_dict, lineno):
1842        """Generate operandTypeMap from the user's 'def operand_types'
1843        statement."""
1844        operand_type = {}
1845        for (ext, (desc, size)) in user_dict.iteritems():
1846            if desc == 'signed int':
1847                ctype = 'int%d_t' % size
1848                is_signed = 1
1849            elif desc == 'unsigned int':
1850                ctype = 'uint%d_t' % size
1851                is_signed = 0
1852            elif desc == 'float':
1853                is_signed = 1       # shouldn't really matter
1854                if size == 32:
1855                    ctype = 'float'
1856                elif size == 64:
1857                    ctype = 'double'
1858            elif desc == 'twin64 int':
1859                is_signed = 0
1860                ctype = 'Twin64_t'
1861            elif desc == 'twin32 int':
1862                is_signed = 0
1863                ctype = 'Twin32_t'
1864            if ctype == '':
1865                error(parser, lineno,
1866                      'Unrecognized type description "%s" in user_dict')
1867            operand_type[ext] = (size, ctype, is_signed)
1868
1869        self.operandTypeMap = operand_type
1870
1871    def buildOperandNameMap(self, user_dict, lineno):
1872        operand_name = {}
1873        for op_name, val in user_dict.iteritems():
1874            base_cls_name, dflt_ext, reg_spec, flags, sort_pri = val[:5]
1875            if len(val) > 5:
1876                read_code = val[5]
1877            else:
1878                read_code = None
1879            if len(val) > 6:
1880                write_code = val[6]
1881            else:
1882                write_code = None
1883            if len(val) > 7:
1884                error(lineno,
1885                      'error: too many attributes for operand "%s"' %
1886                      base_cls_name)
1887
1888            # Canonical flag structure is a triple of lists, where each list
1889            # indicates the set of flags implied by this operand always, when
1890            # used as a source, and when used as a dest, respectively.
1891            # For simplicity this can be initialized using a variety of fairly
1892            # obvious shortcuts; we convert these to canonical form here.
1893            if not flags:
1894                # no flags specified (e.g., 'None')
1895                flags = ( [], [], [] )
1896            elif isinstance(flags, str):
1897                # a single flag: assumed to be unconditional
1898                flags = ( [ flags ], [], [] )
1899            elif isinstance(flags, list):
1900                # a list of flags: also assumed to be unconditional
1901                flags = ( flags, [], [] )
1902            elif isinstance(flags, tuple):
1903                # it's a tuple: it should be a triple,
1904                # but each item could be a single string or a list
1905                (uncond_flags, src_flags, dest_flags) = flags
1906                flags = (makeList(uncond_flags),
1907                         makeList(src_flags), makeList(dest_flags))
1908            # Accumulate attributes of new operand class in tmp_dict
1909            tmp_dict = {}
1910            attrList = ['reg_spec', 'flags', 'sort_pri',
1911                        'read_code', 'write_code']
1912            if dflt_ext:
1913                (dflt_size, dflt_ctype, dflt_is_signed) = \
1914                            self.operandTypeMap[dflt_ext]
1915                attrList.extend(['dflt_size', 'dflt_ctype',
1916                                 'dflt_is_signed', 'dflt_ext'])
1917            for attr in attrList:
1918                tmp_dict[attr] = eval(attr)
1919            tmp_dict['base_name'] = op_name
1920            # New class name will be e.g. "IntReg_Ra"
1921            cls_name = base_cls_name + '_' + op_name
1922            # Evaluate string arg to get class object.  Note that the
1923            # actual base class for "IntReg" is "IntRegOperand", i.e. we
1924            # have to append "Operand".
1925            try:
1926                base_cls = eval(base_cls_name + 'Operand')
1927            except NameError:
1928                error(lineno,
1929                      'error: unknown operand base class "%s"' % base_cls_name)
1930            # The following statement creates a new class called
1931            # <cls_name> as a subclass of <base_cls> with the attributes
1932            # in tmp_dict, just as if we evaluated a class declaration.
1933            operand_name[op_name] = type(cls_name, (base_cls,), tmp_dict)
1934
1935        self.operandNameMap = operand_name
1936
1937        # Define operand variables.
1938        operands = user_dict.keys()
1939
1940        operandsREString = (r'''
1941        (?<![\w\.])      # neg. lookbehind assertion: prevent partial matches
1942        ((%s)(?:\.(\w+))?)   # match: operand with optional '.' then suffix
1943        (?![\w\.])       # neg. lookahead assertion: prevent partial matches
1944        '''
1945                            % string.join(operands, '|'))
1946
1947        self.operandsRE = re.compile(operandsREString, re.MULTILINE|re.VERBOSE)
1948
1949        # Same as operandsREString, but extension is mandatory, and only two
1950        # groups are returned (base and ext, not full name as above).
1951        # Used for subtituting '_' for '.' to make C++ identifiers.
1952        operandsWithExtREString = (r'(?<![\w\.])(%s)\.(\w+)(?![\w\.])'
1953                                   % string.join(operands, '|'))
1954
1955        self.operandsWithExtRE = \
1956            re.compile(operandsWithExtREString, re.MULTILINE)
1957
1958    def substMungedOpNames(self, code):
1959        '''Munge operand names in code string to make legal C++
1960        variable names.  This means getting rid of the type extension
1961        if any.  Will match base_name attribute of Operand object.)'''
1962        return self.operandsWithExtRE.sub(r'\1', code)
1963
1964    def mungeSnippet(self, s):
1965        '''Fix up code snippets for final substitution in templates.'''
1966        if isinstance(s, str):
1967            return self.substMungedOpNames(substBitOps(s))
1968        else:
1969            return s
1970
1971    def update_if_needed(self, file, contents):
1972        '''Update the output file only if the new contents are
1973        different from the current contents.  Minimizes the files that
1974        need to be rebuilt after minor changes.'''
1975
1976        file = os.path.join(self.output_dir, file)
1977        update = False
1978        if os.access(file, os.R_OK):
1979            f = open(file, 'r')
1980            old_contents = f.read()
1981            f.close()
1982            if contents != old_contents:
1983                os.remove(file) # in case it's write-protected
1984                update = True
1985            else:
1986                print 'File', file, 'is unchanged'
1987        else:
1988            update = True
1989        if update:
1990            f = open(file, 'w')
1991            f.write(contents)
1992            f.close()
1993
1994    # This regular expression matches '##include' directives
1995    includeRE = re.compile(r'^\s*##include\s+"(?P<filename>[\w/.-]*)".*$',
1996                           re.MULTILINE)
1997
1998    def replace_include(self, matchobj, dirname):
1999        """Function to replace a matched '##include' directive with the
2000        contents of the specified file (with nested ##includes
2001        replaced recursively).  'matchobj' is an re match object
2002        (from a match of includeRE) and 'dirname' is the directory
2003        relative to which the file path should be resolved."""
2004
2005        fname = matchobj.group('filename')
2006        full_fname = os.path.normpath(os.path.join(dirname, fname))
2007        contents = '##newfile "%s"\n%s\n##endfile\n' % \
2008                   (full_fname, self.read_and_flatten(full_fname))
2009        return contents
2010
2011    def read_and_flatten(self, filename):
2012        """Read a file and recursively flatten nested '##include' files."""
2013
2014        current_dir = os.path.dirname(filename)
2015        try:
2016            contents = open(filename).read()
2017        except IOError:
2018            error('Error including file "%s"' % filename)
2019
2020        self.fileNameStack.push((filename, 0))
2021
2022        # Find any includes and include them
2023        def replace(matchobj):
2024            return self.replace_include(matchobj, current_dir)
2025        contents = self.includeRE.sub(replace, contents)
2026
2027        self.fileNameStack.pop()
2028        return contents
2029
2030    def _parse_isa_desc(self, isa_desc_file):
2031        '''Read in and parse the ISA description.'''
2032
2033        # Read file and (recursively) all included files into a string.
2034        # PLY requires that the input be in a single string so we have to
2035        # do this up front.
2036        isa_desc = self.read_and_flatten(isa_desc_file)
2037
2038        # Initialize filename stack with outer file.
2039        self.fileNameStack.push((isa_desc_file, 0))
2040
2041        # Parse it.
2042        (isa_name, namespace, global_code, namespace_code) = \
2043                   self.parse(isa_desc)
2044
2045        # grab the last three path components of isa_desc_file to put in
2046        # the output
2047        filename = '/'.join(isa_desc_file.split('/')[-3:])
2048
2049        # generate decoder.hh
2050        includes = '#include "base/bitfield.hh" // for bitfield support'
2051        global_output = global_code.header_output
2052        namespace_output = namespace_code.header_output
2053        decode_function = ''
2054        self.update_if_needed('decoder.hh', file_template % vars())
2055
2056        # generate decoder.cc
2057        includes = '#include "decoder.hh"'
2058        global_output = global_code.decoder_output
2059        namespace_output = namespace_code.decoder_output
2060        # namespace_output += namespace_code.decode_block
2061        decode_function = namespace_code.decode_block
2062        self.update_if_needed('decoder.cc', file_template % vars())
2063
2064        # generate per-cpu exec files
2065        for cpu in self.cpuModels:
2066            includes = '#include "decoder.hh"\n'
2067            includes += cpu.includes
2068            global_output = global_code.exec_output[cpu.name]
2069            namespace_output = namespace_code.exec_output[cpu.name]
2070            decode_function = ''
2071            self.update_if_needed(cpu.filename, file_template % vars())
2072
2073        # The variable names here are hacky, but this will creat local
2074        # variables which will be referenced in vars() which have the
2075        # value of the globals.
2076        MaxInstSrcRegs = self.maxInstSrcRegs
2077        MaxInstDestRegs = self.maxInstDestRegs
2078        # max_inst_regs.hh
2079        self.update_if_needed('max_inst_regs.hh',
2080                              max_inst_regs_template % vars())
2081
2082    def parse_isa_desc(self, *args, **kwargs):
2083        try:
2084            self._parse_isa_desc(*args, **kwargs)
2085        except ISAParserError, e:
2086            e.exit(self.fileNameStack)
2087
2088# Called as script: get args from command line.
2089# Args are: <path to cpu_models.py> <isa desc file> <output dir> <cpu models>
2090if __name__ == '__main__':
2091    execfile(sys.argv[1])  # read in CpuModel definitions
2092    cpu_models = [CpuModel.dict[cpu] for cpu in sys.argv[4:]]
2093    ISAParser(sys.argv[3], cpu_models).parse_isa_desc(sys.argv[2])
2094