isa_parser.py revision 7092:fbdf4fca0844
1# Copyright (c) 2003-2005 The Regents of The University of Michigan
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met: redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer;
8# redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution;
11# neither the name of the copyright holders nor the names of its
12# contributors may be used to endorse or promote products derived from
13# this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26#
27# Authors: Steve Reinhardt
28
29import os
30import sys
31import re
32import string
33import inspect, traceback
34# get type names
35from types import *
36
37from m5.util.grammar import Grammar
38
39debug=False
40
41###################
42# Utility functions
43
44#
45# Indent every line in string 's' by two spaces
46# (except preprocessor directives).
47# Used to make nested code blocks look pretty.
48#
49def indent(s):
50    return re.sub(r'(?m)^(?!#)', '  ', s)
51
52#
53# Munge a somewhat arbitrarily formatted piece of Python code
54# (e.g. from a format 'let' block) into something whose indentation
55# will get by the Python parser.
56#
57# The two keys here are that Python will give a syntax error if
58# there's any whitespace at the beginning of the first line, and that
59# all lines at the same lexical nesting level must have identical
60# indentation.  Unfortunately the way code literals work, an entire
61# let block tends to have some initial indentation.  Rather than
62# trying to figure out what that is and strip it off, we prepend 'if
63# 1:' to make the let code the nested block inside the if (and have
64# the parser automatically deal with the indentation for us).
65#
66# We don't want to do this if (1) the code block is empty or (2) the
67# first line of the block doesn't have any whitespace at the front.
68
69def fixPythonIndentation(s):
70    # get rid of blank lines first
71    s = re.sub(r'(?m)^\s*\n', '', s);
72    if (s != '' and re.match(r'[ \t]', s[0])):
73        s = 'if 1:\n' + s
74    return s
75
76class ISAParserError(Exception):
77    """Error handler for parser errors"""
78    def __init__(self, first, second=None):
79        if second is None:
80            self.lineno = 0
81            self.string = first
82        else:
83            if hasattr(first, 'lexer'):
84                first = first.lexer.lineno
85            self.lineno = first
86            self.string = second
87
88    def display(self, filename_stack, print_traceback=debug):
89        # Output formatted to work under Emacs compile-mode.  Optional
90        # 'print_traceback' arg, if set to True, prints a Python stack
91        # backtrace too (can be handy when trying to debug the parser
92        # itself).
93
94        spaces = ""
95        for (filename, line) in filename_stack[:-1]:
96            print "%sIn file included from %s:" % (spaces, filename)
97            spaces += "  "
98
99        # Print a Python stack backtrace if requested.
100        if print_traceback or not self.lineno:
101            traceback.print_exc()
102
103        line_str = "%s:" % (filename_stack[-1][0], )
104        if self.lineno:
105            line_str += "%d:" % (self.lineno, )
106
107        return "%s%s %s" % (spaces, line_str, self.string)
108
109    def exit(self, filename_stack, print_traceback=debug):
110        # Just call exit.
111
112        sys.exit(self.display(filename_stack, print_traceback))
113
114def error(*args):
115    raise ISAParserError(*args)
116
117####################
118# Template objects.
119#
120# Template objects are format strings that allow substitution from
121# the attribute spaces of other objects (e.g. InstObjParams instances).
122
123labelRE = re.compile(r'(?<!%)%\(([^\)]+)\)[sd]')
124
125class Template(object):
126    def __init__(self, parser, t):
127        self.parser = parser
128        self.template = t
129
130    def subst(self, d):
131        myDict = None
132
133        # Protect non-Python-dict substitutions (e.g. if there's a printf
134        # in the templated C++ code)
135        template = self.parser.protectNonSubstPercents(self.template)
136        # CPU-model-specific substitutions are handled later (in GenCode).
137        template = self.parser.protectCpuSymbols(template)
138
139        # Build a dict ('myDict') to use for the template substitution.
140        # Start with the template namespace.  Make a copy since we're
141        # going to modify it.
142        myDict = self.parser.templateMap.copy()
143
144        if isinstance(d, InstObjParams):
145            # If we're dealing with an InstObjParams object, we need
146            # to be a little more sophisticated.  The instruction-wide
147            # parameters are already formed, but the parameters which
148            # are only function wide still need to be generated.
149            compositeCode = ''
150
151            myDict.update(d.__dict__)
152            # The "operands" and "snippets" attributes of the InstObjParams
153            # objects are for internal use and not substitution.
154            del myDict['operands']
155            del myDict['snippets']
156
157            snippetLabels = [l for l in labelRE.findall(template)
158                             if d.snippets.has_key(l)]
159
160            snippets = dict([(s, self.parser.mungeSnippet(d.snippets[s]))
161                             for s in snippetLabels])
162
163            myDict.update(snippets)
164
165            compositeCode = ' '.join(map(str, snippets.values()))
166
167            # Add in template itself in case it references any
168            # operands explicitly (like Mem)
169            compositeCode += ' ' + template
170
171            operands = SubOperandList(self.parser, compositeCode, d.operands)
172
173            myDict['op_decl'] = operands.concatAttrStrings('op_decl')
174
175            is_src = lambda op: op.is_src
176            is_dest = lambda op: op.is_dest
177
178            myDict['op_src_decl'] = \
179                      operands.concatSomeAttrStrings(is_src, 'op_src_decl')
180            myDict['op_dest_decl'] = \
181                      operands.concatSomeAttrStrings(is_dest, 'op_dest_decl')
182
183            myDict['op_rd'] = operands.concatAttrStrings('op_rd')
184            myDict['op_wb'] = operands.concatAttrStrings('op_wb')
185
186            if d.operands.memOperand:
187                myDict['mem_acc_size'] = d.operands.memOperand.mem_acc_size
188                myDict['mem_acc_type'] = d.operands.memOperand.mem_acc_type
189
190        elif isinstance(d, dict):
191            # if the argument is a dictionary, we just use it.
192            myDict.update(d)
193        elif hasattr(d, '__dict__'):
194            # if the argument is an object, we use its attribute map.
195            myDict.update(d.__dict__)
196        else:
197            raise TypeError, "Template.subst() arg must be or have dictionary"
198        return template % myDict
199
200    # Convert to string.  This handles the case when a template with a
201    # CPU-specific term gets interpolated into another template or into
202    # an output block.
203    def __str__(self):
204        return self.parser.expandCpuSymbolsToString(self.template)
205
206################
207# Format object.
208#
209# A format object encapsulates an instruction format.  It must provide
210# a defineInst() method that generates the code for an instruction
211# definition.
212
213class Format(object):
214    def __init__(self, id, params, code):
215        self.id = id
216        self.params = params
217        label = 'def format ' + id
218        self.user_code = compile(fixPythonIndentation(code), label, 'exec')
219        param_list = string.join(params, ", ")
220        f = '''def defInst(_code, _context, %s):
221                my_locals = vars().copy()
222                exec _code in _context, my_locals
223                return my_locals\n''' % param_list
224        c = compile(f, label + ' wrapper', 'exec')
225        exec c
226        self.func = defInst
227
228    def defineInst(self, parser, name, args, lineno):
229        parser.updateExportContext()
230        context = parser.exportContext.copy()
231        if len(name):
232            Name = name[0].upper()
233            if len(name) > 1:
234                Name += name[1:]
235        context.update({ 'name' : name, 'Name' : Name })
236        try:
237            vars = self.func(self.user_code, context, *args[0], **args[1])
238        except Exception, exc:
239            if debug:
240                raise
241            error(lineno, 'error defining "%s": %s.' % (name, exc))
242        for k in vars.keys():
243            if k not in ('header_output', 'decoder_output',
244                         'exec_output', 'decode_block'):
245                del vars[k]
246        return GenCode(parser, **vars)
247
248# Special null format to catch an implicit-format instruction
249# definition outside of any format block.
250class NoFormat(object):
251    def __init__(self):
252        self.defaultInst = ''
253
254    def defineInst(self, parser, name, args, lineno):
255        error(lineno,
256              'instruction definition "%s" with no active format!' % name)
257
258###############
259# GenCode class
260#
261# The GenCode class encapsulates generated code destined for various
262# output files.  The header_output and decoder_output attributes are
263# strings containing code destined for decoder.hh and decoder.cc
264# respectively.  The decode_block attribute contains code to be
265# incorporated in the decode function itself (that will also end up in
266# decoder.cc).  The exec_output attribute is a dictionary with a key
267# for each CPU model name; the value associated with a particular key
268# is the string of code for that CPU model's exec.cc file.  The
269# has_decode_default attribute is used in the decode block to allow
270# explicit default clauses to override default default clauses.
271
272class GenCode(object):
273    # Constructor.  At this point we substitute out all CPU-specific
274    # symbols.  For the exec output, these go into the per-model
275    # dictionary.  For all other output types they get collapsed into
276    # a single string.
277    def __init__(self, parser,
278                 header_output = '', decoder_output = '', exec_output = '',
279                 decode_block = '', has_decode_default = False):
280        self.parser = parser
281        self.header_output = parser.expandCpuSymbolsToString(header_output)
282        self.decoder_output = parser.expandCpuSymbolsToString(decoder_output)
283        if isinstance(exec_output, dict):
284            self.exec_output = exec_output
285        elif isinstance(exec_output, str):
286            # If the exec_output arg is a single string, we replicate
287            # it for each of the CPU models, substituting and
288            # %(CPU_foo)s params appropriately.
289            self.exec_output = parser.expandCpuSymbolsToDict(exec_output)
290        self.decode_block = parser.expandCpuSymbolsToString(decode_block)
291        self.has_decode_default = has_decode_default
292
293    # Override '+' operator: generate a new GenCode object that
294    # concatenates all the individual strings in the operands.
295    def __add__(self, other):
296        exec_output = {}
297        for cpu in self.parser.cpuModels:
298            n = cpu.name
299            exec_output[n] = self.exec_output[n] + other.exec_output[n]
300        return GenCode(self.parser,
301                       self.header_output + other.header_output,
302                       self.decoder_output + other.decoder_output,
303                       exec_output,
304                       self.decode_block + other.decode_block,
305                       self.has_decode_default or other.has_decode_default)
306
307    # Prepend a string (typically a comment) to all the strings.
308    def prepend_all(self, pre):
309        self.header_output = pre + self.header_output
310        self.decoder_output  = pre + self.decoder_output
311        self.decode_block = pre + self.decode_block
312        for cpu in self.parser.cpuModels:
313            self.exec_output[cpu.name] = pre + self.exec_output[cpu.name]
314
315    # Wrap the decode block in a pair of strings (e.g., 'case foo:'
316    # and 'break;').  Used to build the big nested switch statement.
317    def wrap_decode_block(self, pre, post = ''):
318        self.decode_block = pre + indent(self.decode_block) + post
319
320#####################################################################
321#
322#                      Bitfield Operator Support
323#
324#####################################################################
325
326bitOp1ArgRE = re.compile(r'<\s*(\w+)\s*:\s*>')
327
328bitOpWordRE = re.compile(r'(?<![\w\.])([\w\.]+)<\s*(\w+)\s*:\s*(\w+)\s*>')
329bitOpExprRE = re.compile(r'\)<\s*(\w+)\s*:\s*(\w+)\s*>')
330
331def substBitOps(code):
332    # first convert single-bit selectors to two-index form
333    # i.e., <n> --> <n:n>
334    code = bitOp1ArgRE.sub(r'<\1:\1>', code)
335    # simple case: selector applied to ID (name)
336    # i.e., foo<a:b> --> bits(foo, a, b)
337    code = bitOpWordRE.sub(r'bits(\1, \2, \3)', code)
338    # if selector is applied to expression (ending in ')'),
339    # we need to search backward for matching '('
340    match = bitOpExprRE.search(code)
341    while match:
342        exprEnd = match.start()
343        here = exprEnd - 1
344        nestLevel = 1
345        while nestLevel > 0:
346            if code[here] == '(':
347                nestLevel -= 1
348            elif code[here] == ')':
349                nestLevel += 1
350            here -= 1
351            if here < 0:
352                sys.exit("Didn't find '('!")
353        exprStart = here+1
354        newExpr = r'bits(%s, %s, %s)' % (code[exprStart:exprEnd+1],
355                                         match.group(1), match.group(2))
356        code = code[:exprStart] + newExpr + code[match.end():]
357        match = bitOpExprRE.search(code)
358    return code
359
360
361#####################################################################
362#
363#                             Code Parser
364#
365# The remaining code is the support for automatically extracting
366# instruction characteristics from pseudocode.
367#
368#####################################################################
369
370# Force the argument to be a list.  Useful for flags, where a caller
371# can specify a singleton flag or a list of flags.  Also usful for
372# converting tuples to lists so they can be modified.
373def makeList(arg):
374    if isinstance(arg, list):
375        return arg
376    elif isinstance(arg, tuple):
377        return list(arg)
378    elif not arg:
379        return []
380    else:
381        return [ arg ]
382
383class Operand(object):
384    '''Base class for operand descriptors.  An instance of this class
385    (or actually a class derived from this one) represents a specific
386    operand for a code block (e.g, "Rc.sq" as a dest). Intermediate
387    derived classes encapsulates the traits of a particular operand
388    type (e.g., "32-bit integer register").'''
389
390    def buildReadCode(self, func = None):
391        subst_dict = {"name": self.base_name,
392                      "func": func,
393                      "reg_idx": self.reg_spec,
394                      "size": self.size,
395                      "ctype": self.ctype}
396        if hasattr(self, 'src_reg_idx'):
397            subst_dict['op_idx'] = self.src_reg_idx
398        code = self.read_code % subst_dict
399        if self.size != self.dflt_size:
400            return '%s = bits(%s, %d, 0);\n' % \
401                   (self.base_name, code, self.size-1)
402        else:
403            return '%s = %s;\n' % \
404                   (self.base_name, code)
405
406    def buildWriteCode(self, func = None):
407        if (self.size != self.dflt_size and self.is_signed):
408            final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
409        else:
410            final_val = self.base_name
411        subst_dict = {"name": self.base_name,
412                      "func": func,
413                      "reg_idx": self.reg_spec,
414                      "size": self.size,
415                      "ctype": self.ctype,
416                      "final_val": final_val}
417        if hasattr(self, 'dest_reg_idx'):
418            subst_dict['op_idx'] = self.dest_reg_idx
419        code = self.write_code % subst_dict
420        return '''
421        {
422            %s final_val = %s;
423            %s;
424            if (traceData) { traceData->setData(final_val); }
425        }''' % (self.dflt_ctype, final_val, code)
426
427    def __init__(self, parser, full_name, ext, is_src, is_dest):
428        self.full_name = full_name
429        self.ext = ext
430        self.is_src = is_src
431        self.is_dest = is_dest
432        # The 'effective extension' (eff_ext) is either the actual
433        # extension, if one was explicitly provided, or the default.
434        if ext:
435            self.eff_ext = ext
436        else:
437            self.eff_ext = self.dflt_ext
438
439        self.size, self.ctype, self.is_signed = \
440                    parser.operandTypeMap[self.eff_ext]
441
442        # note that mem_acc_size is undefined for non-mem operands...
443        # template must be careful not to use it if it doesn't apply.
444        if self.isMem():
445            self.mem_acc_size = self.makeAccSize()
446            if self.ctype in ['Twin32_t', 'Twin64_t']:
447                self.mem_acc_type = 'Twin'
448            else:
449                self.mem_acc_type = 'uint'
450
451    # Finalize additional fields (primarily code fields).  This step
452    # is done separately since some of these fields may depend on the
453    # register index enumeration that hasn't been performed yet at the
454    # time of __init__().
455    def finalize(self):
456        self.flags = self.getFlags()
457        self.constructor = self.makeConstructor()
458        self.op_decl = self.makeDecl()
459
460        if self.is_src:
461            self.op_rd = self.makeRead()
462            self.op_src_decl = self.makeDecl()
463        else:
464            self.op_rd = ''
465            self.op_src_decl = ''
466
467        if self.is_dest:
468            self.op_wb = self.makeWrite()
469            self.op_dest_decl = self.makeDecl()
470        else:
471            self.op_wb = ''
472            self.op_dest_decl = ''
473
474    def isMem(self):
475        return 0
476
477    def isReg(self):
478        return 0
479
480    def isFloatReg(self):
481        return 0
482
483    def isIntReg(self):
484        return 0
485
486    def isControlReg(self):
487        return 0
488
489    def getFlags(self):
490        # note the empty slice '[:]' gives us a copy of self.flags[0]
491        # instead of a reference to it
492        my_flags = self.flags[0][:]
493        if self.is_src:
494            my_flags += self.flags[1]
495        if self.is_dest:
496            my_flags += self.flags[2]
497        return my_flags
498
499    def makeDecl(self):
500        # Note that initializations in the declarations are solely
501        # to avoid 'uninitialized variable' errors from the compiler.
502        return self.ctype + ' ' + self.base_name + ' = 0;\n';
503
504class IntRegOperand(Operand):
505    def isReg(self):
506        return 1
507
508    def isIntReg(self):
509        return 1
510
511    def makeConstructor(self):
512        c = ''
513        if self.is_src:
514            c += '\n\t_srcRegIdx[%d] = %s;' % \
515                 (self.src_reg_idx, self.reg_spec)
516        if self.is_dest:
517            c += '\n\t_destRegIdx[%d] = %s;' % \
518                 (self.dest_reg_idx, self.reg_spec)
519        return c
520
521    def makeRead(self):
522        if (self.ctype == 'float' or self.ctype == 'double'):
523            error('Attempt to read integer register as FP')
524        if self.read_code != None:
525            return self.buildReadCode('readIntRegOperand')
526        if (self.size == self.dflt_size):
527            return '%s = xc->readIntRegOperand(this, %d);\n' % \
528                   (self.base_name, self.src_reg_idx)
529        elif (self.size > self.dflt_size):
530            int_reg_val = 'xc->readIntRegOperand(this, %d)' % \
531                          (self.src_reg_idx)
532            if (self.is_signed):
533                int_reg_val = 'sext<%d>(%s)' % (self.dflt_size, int_reg_val)
534            return '%s = %s;\n' % (self.base_name, int_reg_val)
535        else:
536            return '%s = bits(xc->readIntRegOperand(this, %d), %d, 0);\n' % \
537                   (self.base_name, self.src_reg_idx, self.size-1)
538
539    def makeWrite(self):
540        if (self.ctype == 'float' or self.ctype == 'double'):
541            error('Attempt to write integer register as FP')
542        if self.write_code != None:
543            return self.buildWriteCode('setIntRegOperand')
544        if (self.size != self.dflt_size and self.is_signed):
545            final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
546        else:
547            final_val = self.base_name
548        wb = '''
549        {
550            %s final_val = %s;
551            xc->setIntRegOperand(this, %d, final_val);\n
552            if (traceData) { traceData->setData(final_val); }
553        }''' % (self.dflt_ctype, final_val, self.dest_reg_idx)
554        return wb
555
556class FloatRegOperand(Operand):
557    def isReg(self):
558        return 1
559
560    def isFloatReg(self):
561        return 1
562
563    def makeConstructor(self):
564        c = ''
565        if self.is_src:
566            c += '\n\t_srcRegIdx[%d] = %s + FP_Base_DepTag;' % \
567                 (self.src_reg_idx, self.reg_spec)
568        if self.is_dest:
569            c += '\n\t_destRegIdx[%d] = %s + FP_Base_DepTag;' % \
570                 (self.dest_reg_idx, self.reg_spec)
571        return c
572
573    def makeRead(self):
574        bit_select = 0
575        if (self.ctype == 'float' or self.ctype == 'double'):
576            func = 'readFloatRegOperand'
577        else:
578            func = 'readFloatRegOperandBits'
579            if (self.size != self.dflt_size):
580                bit_select = 1
581        base = 'xc->%s(this, %d)' % (func, self.src_reg_idx)
582        if self.read_code != None:
583            return self.buildReadCode(func)
584        if bit_select:
585            return '%s = bits(%s, %d, 0);\n' % \
586                   (self.base_name, base, self.size-1)
587        else:
588            return '%s = %s;\n' % (self.base_name, base)
589
590    def makeWrite(self):
591        final_val = self.base_name
592        final_ctype = self.ctype
593        if (self.ctype == 'float' or self.ctype == 'double'):
594            func = 'setFloatRegOperand'
595        elif (self.ctype == 'uint32_t' or self.ctype == 'uint64_t'):
596            func = 'setFloatRegOperandBits'
597        else:
598            func = 'setFloatRegOperandBits'
599            final_ctype = 'uint%d_t' % self.dflt_size
600            if (self.size != self.dflt_size and self.is_signed):
601                final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
602        if self.write_code != None:
603            return self.buildWriteCode(func)
604        wb = '''
605        {
606            %s final_val = %s;
607            xc->%s(this, %d, final_val);\n
608            if (traceData) { traceData->setData(final_val); }
609        }''' % (final_ctype, final_val, func, self.dest_reg_idx)
610        return wb
611
612class ControlRegOperand(Operand):
613    def isReg(self):
614        return 1
615
616    def isControlReg(self):
617        return 1
618
619    def makeConstructor(self):
620        c = ''
621        if self.is_src:
622            c += '\n\t_srcRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \
623                 (self.src_reg_idx, self.reg_spec)
624        if self.is_dest:
625            c += '\n\t_destRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \
626                 (self.dest_reg_idx, self.reg_spec)
627        return c
628
629    def makeRead(self):
630        bit_select = 0
631        if (self.ctype == 'float' or self.ctype == 'double'):
632            error('Attempt to read control register as FP')
633        if self.read_code != None:
634            return self.buildReadCode('readMiscRegOperand')
635        base = 'xc->readMiscRegOperand(this, %s)' % self.src_reg_idx
636        if self.size == self.dflt_size:
637            return '%s = %s;\n' % (self.base_name, base)
638        else:
639            return '%s = bits(%s, %d, 0);\n' % \
640                   (self.base_name, base, self.size-1)
641
642    def makeWrite(self):
643        if (self.ctype == 'float' or self.ctype == 'double'):
644            error('Attempt to write control register as FP')
645        if self.write_code != None:
646            return self.buildWriteCode('setMiscRegOperand')
647        wb = 'xc->setMiscRegOperand(this, %s, %s);\n' % \
648             (self.dest_reg_idx, self.base_name)
649        wb += 'if (traceData) { traceData->setData(%s); }' % \
650              self.base_name
651        return wb
652
653class MemOperand(Operand):
654    def isMem(self):
655        return 1
656
657    def makeConstructor(self):
658        return ''
659
660    def makeDecl(self):
661        # Note that initializations in the declarations are solely
662        # to avoid 'uninitialized variable' errors from the compiler.
663        # Declare memory data variable.
664        if self.ctype in ['Twin32_t','Twin64_t']:
665            return "%s %s; %s.a = 0; %s.b = 0;\n" % \
666                   (self.ctype, self.base_name, self.base_name, self.base_name)
667        return '%s %s = 0;\n' % (self.ctype, self.base_name)
668
669    def makeRead(self):
670        if self.read_code != None:
671            return self.buildReadCode()
672        return ''
673
674    def makeWrite(self):
675        if self.write_code != None:
676            return self.buildWriteCode()
677        return ''
678
679    # Return the memory access size *in bits*, suitable for
680    # forming a type via "uint%d_t".  Divide by 8 if you want bytes.
681    def makeAccSize(self):
682        return self.size
683
684class PCOperand(Operand):
685    def makeConstructor(self):
686        return ''
687
688    def makeRead(self):
689        return '%s = xc->readPC();\n' % self.base_name
690
691    def makeWrite(self):
692        return 'xc->setPC(%s);\n' % self.base_name
693
694class UPCOperand(Operand):
695    def makeConstructor(self):
696        return ''
697
698    def makeRead(self):
699        if self.read_code != None:
700            return self.buildReadCode('readMicroPC')
701        return '%s = xc->readMicroPC();\n' % self.base_name
702
703    def makeWrite(self):
704        if self.write_code != None:
705            return self.buildWriteCode('setMicroPC')
706        return 'xc->setMicroPC(%s);\n' % self.base_name
707
708class NUPCOperand(Operand):
709    def makeConstructor(self):
710        return ''
711
712    def makeRead(self):
713        if self.read_code != None:
714            return self.buildReadCode('readNextMicroPC')
715        return '%s = xc->readNextMicroPC();\n' % self.base_name
716
717    def makeWrite(self):
718        if self.write_code != None:
719            return self.buildWriteCode('setNextMicroPC')
720        return 'xc->setNextMicroPC(%s);\n' % self.base_name
721
722class NPCOperand(Operand):
723    def makeConstructor(self):
724        return ''
725
726    def makeRead(self):
727        if self.read_code != None:
728            return self.buildReadCode('readNextPC')
729        return '%s = xc->readNextPC();\n' % self.base_name
730
731    def makeWrite(self):
732        if self.write_code != None:
733            return self.buildWriteCode('setNextPC')
734        return 'xc->setNextPC(%s);\n' % self.base_name
735
736class NNPCOperand(Operand):
737    def makeConstructor(self):
738        return ''
739
740    def makeRead(self):
741        if self.read_code != None:
742            return self.buildReadCode('readNextNPC')
743        return '%s = xc->readNextNPC();\n' % self.base_name
744
745    def makeWrite(self):
746        if self.write_code != None:
747            return self.buildWriteCode('setNextNPC')
748        return 'xc->setNextNPC(%s);\n' % self.base_name
749
750class OperandList(object):
751    '''Find all the operands in the given code block.  Returns an operand
752    descriptor list (instance of class OperandList).'''
753    def __init__(self, parser, code):
754        self.items = []
755        self.bases = {}
756        # delete comments so we don't match on reg specifiers inside
757        code = commentRE.sub('', code)
758        # search for operands
759        next_pos = 0
760        while 1:
761            match = parser.operandsRE.search(code, next_pos)
762            if not match:
763                # no more matches: we're done
764                break
765            op = match.groups()
766            # regexp groups are operand full name, base, and extension
767            (op_full, op_base, op_ext) = op
768            # if the token following the operand is an assignment, this is
769            # a destination (LHS), else it's a source (RHS)
770            is_dest = (assignRE.match(code, match.end()) != None)
771            is_src = not is_dest
772            # see if we've already seen this one
773            op_desc = self.find_base(op_base)
774            if op_desc:
775                if op_desc.ext != op_ext:
776                    error('Inconsistent extensions for operand %s' % \
777                          op_base)
778                op_desc.is_src = op_desc.is_src or is_src
779                op_desc.is_dest = op_desc.is_dest or is_dest
780            else:
781                # new operand: create new descriptor
782                op_desc = parser.operandNameMap[op_base](parser,
783                    op_full, op_ext, is_src, is_dest)
784                self.append(op_desc)
785            # start next search after end of current match
786            next_pos = match.end()
787        self.sort()
788        # enumerate source & dest register operands... used in building
789        # constructor later
790        self.numSrcRegs = 0
791        self.numDestRegs = 0
792        self.numFPDestRegs = 0
793        self.numIntDestRegs = 0
794        self.memOperand = None
795        for op_desc in self.items:
796            if op_desc.isReg():
797                if op_desc.is_src:
798                    op_desc.src_reg_idx = self.numSrcRegs
799                    self.numSrcRegs += 1
800                if op_desc.is_dest:
801                    op_desc.dest_reg_idx = self.numDestRegs
802                    self.numDestRegs += 1
803                    if op_desc.isFloatReg():
804                        self.numFPDestRegs += 1
805                    elif op_desc.isIntReg():
806                        self.numIntDestRegs += 1
807            elif op_desc.isMem():
808                if self.memOperand:
809                    error("Code block has more than one memory operand.")
810                self.memOperand = op_desc
811        if parser.maxInstSrcRegs < self.numSrcRegs:
812            parser.maxInstSrcRegs = self.numSrcRegs
813        if parser.maxInstDestRegs < self.numDestRegs:
814            parser.maxInstDestRegs = self.numDestRegs
815        # now make a final pass to finalize op_desc fields that may depend
816        # on the register enumeration
817        for op_desc in self.items:
818            op_desc.finalize()
819
820    def __len__(self):
821        return len(self.items)
822
823    def __getitem__(self, index):
824        return self.items[index]
825
826    def append(self, op_desc):
827        self.items.append(op_desc)
828        self.bases[op_desc.base_name] = op_desc
829
830    def find_base(self, base_name):
831        # like self.bases[base_name], but returns None if not found
832        # (rather than raising exception)
833        return self.bases.get(base_name)
834
835    # internal helper function for concat[Some]Attr{Strings|Lists}
836    def __internalConcatAttrs(self, attr_name, filter, result):
837        for op_desc in self.items:
838            if filter(op_desc):
839                result += getattr(op_desc, attr_name)
840        return result
841
842    # return a single string that is the concatenation of the (string)
843    # values of the specified attribute for all operands
844    def concatAttrStrings(self, attr_name):
845        return self.__internalConcatAttrs(attr_name, lambda x: 1, '')
846
847    # like concatAttrStrings, but only include the values for the operands
848    # for which the provided filter function returns true
849    def concatSomeAttrStrings(self, filter, attr_name):
850        return self.__internalConcatAttrs(attr_name, filter, '')
851
852    # return a single list that is the concatenation of the (list)
853    # values of the specified attribute for all operands
854    def concatAttrLists(self, attr_name):
855        return self.__internalConcatAttrs(attr_name, lambda x: 1, [])
856
857    # like concatAttrLists, but only include the values for the operands
858    # for which the provided filter function returns true
859    def concatSomeAttrLists(self, filter, attr_name):
860        return self.__internalConcatAttrs(attr_name, filter, [])
861
862    def sort(self):
863        self.items.sort(lambda a, b: a.sort_pri - b.sort_pri)
864
865class SubOperandList(OperandList):
866    '''Find all the operands in the given code block.  Returns an operand
867    descriptor list (instance of class OperandList).'''
868    def __init__(self, parser, code, master_list):
869        self.items = []
870        self.bases = {}
871        # delete comments so we don't match on reg specifiers inside
872        code = commentRE.sub('', code)
873        # search for operands
874        next_pos = 0
875        while 1:
876            match = parser.operandsRE.search(code, next_pos)
877            if not match:
878                # no more matches: we're done
879                break
880            op = match.groups()
881            # regexp groups are operand full name, base, and extension
882            (op_full, op_base, op_ext) = op
883            # find this op in the master list
884            op_desc = master_list.find_base(op_base)
885            if not op_desc:
886                error('Found operand %s which is not in the master list!' \
887                      ' This is an internal error' % op_base)
888            else:
889                # See if we've already found this operand
890                op_desc = self.find_base(op_base)
891                if not op_desc:
892                    # if not, add a reference to it to this sub list
893                    self.append(master_list.bases[op_base])
894
895            # start next search after end of current match
896            next_pos = match.end()
897        self.sort()
898        self.memOperand = None
899        for op_desc in self.items:
900            if op_desc.isMem():
901                if self.memOperand:
902                    error("Code block has more than one memory operand.")
903                self.memOperand = op_desc
904
905# Regular expression object to match C++ comments
906# (used in findOperands())
907commentRE = re.compile(r'//.*\n')
908
909# Regular expression object to match assignment statements
910# (used in findOperands())
911assignRE = re.compile(r'\s*=(?!=)', re.MULTILINE)
912
913def makeFlagConstructor(flag_list):
914    if len(flag_list) == 0:
915        return ''
916    # filter out repeated flags
917    flag_list.sort()
918    i = 1
919    while i < len(flag_list):
920        if flag_list[i] == flag_list[i-1]:
921            del flag_list[i]
922        else:
923            i += 1
924    pre = '\n\tflags['
925    post = '] = true;'
926    code = pre + string.join(flag_list, post + pre) + post
927    return code
928
929# Assume all instruction flags are of the form 'IsFoo'
930instFlagRE = re.compile(r'Is.*')
931
932# OpClass constants end in 'Op' except No_OpClass
933opClassRE = re.compile(r'.*Op|No_OpClass')
934
935class InstObjParams(object):
936    def __init__(self, parser, mnem, class_name, base_class = '',
937                 snippets = {}, opt_args = []):
938        self.mnemonic = mnem
939        self.class_name = class_name
940        self.base_class = base_class
941        if not isinstance(snippets, dict):
942            snippets = {'code' : snippets}
943        compositeCode = ' '.join(map(str, snippets.values()))
944        self.snippets = snippets
945
946        self.operands = OperandList(parser, compositeCode)
947        self.constructor = self.operands.concatAttrStrings('constructor')
948        self.constructor += \
949                 '\n\t_numSrcRegs = %d;' % self.operands.numSrcRegs
950        self.constructor += \
951                 '\n\t_numDestRegs = %d;' % self.operands.numDestRegs
952        self.constructor += \
953                 '\n\t_numFPDestRegs = %d;' % self.operands.numFPDestRegs
954        self.constructor += \
955                 '\n\t_numIntDestRegs = %d;' % self.operands.numIntDestRegs
956        self.flags = self.operands.concatAttrLists('flags')
957
958        # Make a basic guess on the operand class (function unit type).
959        # These are good enough for most cases, and can be overridden
960        # later otherwise.
961        if 'IsStore' in self.flags:
962            self.op_class = 'MemWriteOp'
963        elif 'IsLoad' in self.flags or 'IsPrefetch' in self.flags:
964            self.op_class = 'MemReadOp'
965        elif 'IsFloating' in self.flags:
966            self.op_class = 'FloatAddOp'
967        else:
968            self.op_class = 'IntAluOp'
969
970        # Optional arguments are assumed to be either StaticInst flags
971        # or an OpClass value.  To avoid having to import a complete
972        # list of these values to match against, we do it ad-hoc
973        # with regexps.
974        for oa in opt_args:
975            if instFlagRE.match(oa):
976                self.flags.append(oa)
977            elif opClassRE.match(oa):
978                self.op_class = oa
979            else:
980                error('InstObjParams: optional arg "%s" not recognized '
981                      'as StaticInst::Flag or OpClass.' % oa)
982
983        # add flag initialization to contructor here to include
984        # any flags added via opt_args
985        self.constructor += makeFlagConstructor(self.flags)
986
987        # if 'IsFloating' is set, add call to the FP enable check
988        # function (which should be provided by isa_desc via a declare)
989        if 'IsFloating' in self.flags:
990            self.fp_enable_check = 'fault = checkFpEnableFault(xc);'
991        else:
992            self.fp_enable_check = ''
993
994##############
995# Stack: a simple stack object.  Used for both formats (formatStack)
996# and default cases (defaultStack).  Simply wraps a list to give more
997# stack-like syntax and enable initialization with an argument list
998# (as opposed to an argument that's a list).
999
1000class Stack(list):
1001    def __init__(self, *items):
1002        list.__init__(self, items)
1003
1004    def push(self, item):
1005        self.append(item);
1006
1007    def top(self):
1008        return self[-1]
1009
1010#######################
1011#
1012# Output file template
1013#
1014
1015file_template = '''
1016/*
1017 * DO NOT EDIT THIS FILE!!!
1018 *
1019 * It was automatically generated from the ISA description in %(filename)s
1020 */
1021
1022%(includes)s
1023
1024%(global_output)s
1025
1026namespace %(namespace)s {
1027
1028%(namespace_output)s
1029
1030} // namespace %(namespace)s
1031
1032%(decode_function)s
1033'''
1034
1035max_inst_regs_template = '''
1036/*
1037 * DO NOT EDIT THIS FILE!!!
1038 *
1039 * It was automatically generated from the ISA description in %(filename)s
1040 */
1041
1042namespace %(namespace)s {
1043
1044    const int MaxInstSrcRegs = %(MaxInstSrcRegs)d;
1045    const int MaxInstDestRegs = %(MaxInstDestRegs)d;
1046
1047} // namespace %(namespace)s
1048
1049'''
1050
1051class ISAParser(Grammar):
1052    def __init__(self, output_dir, cpu_models):
1053        super(ISAParser, self).__init__()
1054        self.output_dir = output_dir
1055
1056        self.cpuModels = cpu_models
1057
1058        # variable to hold templates
1059        self.templateMap = {}
1060
1061        # This dictionary maps format name strings to Format objects.
1062        self.formatMap = {}
1063
1064        # The format stack.
1065        self.formatStack = Stack(NoFormat())
1066
1067        # The default case stack.
1068        self.defaultStack = Stack(None)
1069
1070        # Stack that tracks current file and line number.  Each
1071        # element is a tuple (filename, lineno) that records the
1072        # *current* filename and the line number in the *previous*
1073        # file where it was included.
1074        self.fileNameStack = Stack()
1075
1076        symbols = ('makeList', 're', 'string')
1077        self.exportContext = dict([(s, eval(s)) for s in symbols])
1078
1079        self.maxInstSrcRegs = 0
1080        self.maxInstDestRegs = 0
1081
1082    #####################################################################
1083    #
1084    #                                Lexer
1085    #
1086    # The PLY lexer module takes two things as input:
1087    # - A list of token names (the string list 'tokens')
1088    # - A regular expression describing a match for each token.  The
1089    #   regexp for token FOO can be provided in two ways:
1090    #   - as a string variable named t_FOO
1091    #   - as the doc string for a function named t_FOO.  In this case,
1092    #     the function is also executed, allowing an action to be
1093    #     associated with each token match.
1094    #
1095    #####################################################################
1096
1097    # Reserved words.  These are listed separately as they are matched
1098    # using the same regexp as generic IDs, but distinguished in the
1099    # t_ID() function.  The PLY documentation suggests this approach.
1100    reserved = (
1101        'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT',
1102        'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS',
1103        'OUTPUT', 'SIGNED', 'TEMPLATE'
1104        )
1105
1106    # List of tokens.  The lex module requires this.
1107    tokens = reserved + (
1108        # identifier
1109        'ID',
1110
1111        # integer literal
1112        'INTLIT',
1113
1114        # string literal
1115        'STRLIT',
1116
1117        # code literal
1118        'CODELIT',
1119
1120        # ( ) [ ] { } < > , ; . : :: *
1121        'LPAREN', 'RPAREN',
1122        'LBRACKET', 'RBRACKET',
1123        'LBRACE', 'RBRACE',
1124        'LESS', 'GREATER', 'EQUALS',
1125        'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON',
1126        'ASTERISK',
1127
1128        # C preprocessor directives
1129        'CPPDIRECTIVE'
1130
1131    # The following are matched but never returned. commented out to
1132    # suppress PLY warning
1133        # newfile directive
1134    #    'NEWFILE',
1135
1136        # endfile directive
1137    #    'ENDFILE'
1138    )
1139
1140    # Regular expressions for token matching
1141    t_LPAREN           = r'\('
1142    t_RPAREN           = r'\)'
1143    t_LBRACKET         = r'\['
1144    t_RBRACKET         = r'\]'
1145    t_LBRACE           = r'\{'
1146    t_RBRACE           = r'\}'
1147    t_LESS             = r'\<'
1148    t_GREATER          = r'\>'
1149    t_EQUALS           = r'='
1150    t_COMMA            = r','
1151    t_SEMI             = r';'
1152    t_DOT              = r'\.'
1153    t_COLON            = r':'
1154    t_DBLCOLON         = r'::'
1155    t_ASTERISK         = r'\*'
1156
1157    # Identifiers and reserved words
1158    reserved_map = { }
1159    for r in reserved:
1160        reserved_map[r.lower()] = r
1161
1162    def t_ID(self, t):
1163        r'[A-Za-z_]\w*'
1164        t.type = self.reserved_map.get(t.value, 'ID')
1165        return t
1166
1167    # Integer literal
1168    def t_INTLIT(self, t):
1169        r'-?(0x[\da-fA-F]+)|\d+'
1170        try:
1171            t.value = int(t.value,0)
1172        except ValueError:
1173            error(t, 'Integer value "%s" too large' % t.value)
1174            t.value = 0
1175        return t
1176
1177    # String literal.  Note that these use only single quotes, and
1178    # can span multiple lines.
1179    def t_STRLIT(self, t):
1180        r"(?m)'([^'])+'"
1181        # strip off quotes
1182        t.value = t.value[1:-1]
1183        t.lexer.lineno += t.value.count('\n')
1184        return t
1185
1186
1187    # "Code literal"... like a string literal, but delimiters are
1188    # '{{' and '}}' so they get formatted nicely under emacs c-mode
1189    def t_CODELIT(self, t):
1190        r"(?m)\{\{([^\}]|}(?!\}))+\}\}"
1191        # strip off {{ & }}
1192        t.value = t.value[2:-2]
1193        t.lexer.lineno += t.value.count('\n')
1194        return t
1195
1196    def t_CPPDIRECTIVE(self, t):
1197        r'^\#[^\#].*\n'
1198        t.lexer.lineno += t.value.count('\n')
1199        return t
1200
1201    def t_NEWFILE(self, t):
1202        r'^\#\#newfile\s+"[\w/.-]*"'
1203        self.fileNameStack.push((t.value[11:-1], t.lexer.lineno))
1204        t.lexer.lineno = 0
1205
1206    def t_ENDFILE(self, t):
1207        r'^\#\#endfile'
1208        (old_filename, t.lexer.lineno) = self.fileNameStack.pop()
1209
1210    #
1211    # The functions t_NEWLINE, t_ignore, and t_error are
1212    # special for the lex module.
1213    #
1214
1215    # Newlines
1216    def t_NEWLINE(self, t):
1217        r'\n+'
1218        t.lexer.lineno += t.value.count('\n')
1219
1220    # Comments
1221    def t_comment(self, t):
1222        r'//.*'
1223
1224    # Completely ignored characters
1225    t_ignore = ' \t\x0c'
1226
1227    # Error handler
1228    def t_error(self, t):
1229        error(t, "illegal character '%s'" % t.value[0])
1230        t.skip(1)
1231
1232    #####################################################################
1233    #
1234    #                                Parser
1235    #
1236    # Every function whose name starts with 'p_' defines a grammar
1237    # rule.  The rule is encoded in the function's doc string, while
1238    # the function body provides the action taken when the rule is
1239    # matched.  The argument to each function is a list of the values
1240    # of the rule's symbols: t[0] for the LHS, and t[1..n] for the
1241    # symbols on the RHS.  For tokens, the value is copied from the
1242    # t.value attribute provided by the lexer.  For non-terminals, the
1243    # value is assigned by the producing rule; i.e., the job of the
1244    # grammar rule function is to set the value for the non-terminal
1245    # on the LHS (by assigning to t[0]).
1246    #####################################################################
1247
1248    # The LHS of the first grammar rule is used as the start symbol
1249    # (in this case, 'specification').  Note that this rule enforces
1250    # that there will be exactly one namespace declaration, with 0 or
1251    # more global defs/decls before and after it.  The defs & decls
1252    # before the namespace decl will be outside the namespace; those
1253    # after will be inside.  The decoder function is always inside the
1254    # namespace.
1255    def p_specification(self, t):
1256        'specification : opt_defs_and_outputs name_decl opt_defs_and_outputs decode_block'
1257        global_code = t[1]
1258        isa_name = t[2]
1259        namespace = isa_name + "Inst"
1260        # wrap the decode block as a function definition
1261        t[4].wrap_decode_block('''
1262StaticInstPtr
1263%(isa_name)s::decodeInst(%(isa_name)s::ExtMachInst machInst)
1264{
1265    using namespace %(namespace)s;
1266''' % vars(), '}')
1267        # both the latter output blocks and the decode block are in
1268        # the namespace
1269        namespace_code = t[3] + t[4]
1270        # pass it all back to the caller of yacc.parse()
1271        t[0] = (isa_name, namespace, global_code, namespace_code)
1272
1273    # ISA name declaration looks like "namespace <foo>;"
1274    def p_name_decl(self, t):
1275        'name_decl : NAMESPACE ID SEMI'
1276        t[0] = t[2]
1277
1278    # 'opt_defs_and_outputs' is a possibly empty sequence of
1279    # def and/or output statements.
1280    def p_opt_defs_and_outputs_0(self, t):
1281        'opt_defs_and_outputs : empty'
1282        t[0] = GenCode(self)
1283
1284    def p_opt_defs_and_outputs_1(self, t):
1285        'opt_defs_and_outputs : defs_and_outputs'
1286        t[0] = t[1]
1287
1288    def p_defs_and_outputs_0(self, t):
1289        'defs_and_outputs : def_or_output'
1290        t[0] = t[1]
1291
1292    def p_defs_and_outputs_1(self, t):
1293        'defs_and_outputs : defs_and_outputs def_or_output'
1294        t[0] = t[1] + t[2]
1295
1296    # The list of possible definition/output statements.
1297    def p_def_or_output(self, t):
1298        '''def_or_output : def_format
1299                         | def_bitfield
1300                         | def_bitfield_struct
1301                         | def_template
1302                         | def_operand_types
1303                         | def_operands
1304                         | output_header
1305                         | output_decoder
1306                         | output_exec
1307                         | global_let'''
1308        t[0] = t[1]
1309
1310    # Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied
1311    # directly to the appropriate output section.
1312
1313    # Massage output block by substituting in template definitions and
1314    # bit operators.  We handle '%'s embedded in the string that don't
1315    # indicate template substitutions (or CPU-specific symbols, which
1316    # get handled in GenCode) by doubling them first so that the
1317    # format operation will reduce them back to single '%'s.
1318    def process_output(self, s):
1319        s = self.protectNonSubstPercents(s)
1320        # protects cpu-specific symbols too
1321        s = self.protectCpuSymbols(s)
1322        return substBitOps(s % self.templateMap)
1323
1324    def p_output_header(self, t):
1325        'output_header : OUTPUT HEADER CODELIT SEMI'
1326        t[0] = GenCode(self, header_output = self.process_output(t[3]))
1327
1328    def p_output_decoder(self, t):
1329        'output_decoder : OUTPUT DECODER CODELIT SEMI'
1330        t[0] = GenCode(self, decoder_output = self.process_output(t[3]))
1331
1332    def p_output_exec(self, t):
1333        'output_exec : OUTPUT EXEC CODELIT SEMI'
1334        t[0] = GenCode(self, exec_output = self.process_output(t[3]))
1335
1336    # global let blocks 'let {{...}}' (Python code blocks) are
1337    # executed directly when seen.  Note that these execute in a
1338    # special variable context 'exportContext' to prevent the code
1339    # from polluting this script's namespace.
1340    def p_global_let(self, t):
1341        'global_let : LET CODELIT SEMI'
1342        self.updateExportContext()
1343        self.exportContext["header_output"] = ''
1344        self.exportContext["decoder_output"] = ''
1345        self.exportContext["exec_output"] = ''
1346        self.exportContext["decode_block"] = ''
1347        try:
1348            exec fixPythonIndentation(t[2]) in self.exportContext
1349        except Exception, exc:
1350            if debug:
1351                raise
1352            error(t, 'error: %s in global let block "%s".' % (exc, t[2]))
1353        t[0] = GenCode(self,
1354                       header_output=self.exportContext["header_output"],
1355                       decoder_output=self.exportContext["decoder_output"],
1356                       exec_output=self.exportContext["exec_output"],
1357                       decode_block=self.exportContext["decode_block"])
1358
1359    # Define the mapping from operand type extensions to C++ types and
1360    # bit widths (stored in operandTypeMap).
1361    def p_def_operand_types(self, t):
1362        'def_operand_types : DEF OPERAND_TYPES CODELIT SEMI'
1363        try:
1364            user_dict = eval('{' + t[3] + '}')
1365        except Exception, exc:
1366            if debug:
1367                raise
1368            error(t,
1369                  'error: %s in def operand_types block "%s".' % (exc, t[3]))
1370        self.buildOperandTypeMap(user_dict, t.lexer.lineno)
1371        t[0] = GenCode(self) # contributes nothing to the output C++ file
1372
1373    # Define the mapping from operand names to operand classes and
1374    # other traits.  Stored in operandNameMap.
1375    def p_def_operands(self, t):
1376        'def_operands : DEF OPERANDS CODELIT SEMI'
1377        if not hasattr(self, 'operandTypeMap'):
1378            error(t, 'error: operand types must be defined before operands')
1379        try:
1380            user_dict = eval('{' + t[3] + '}', self.exportContext)
1381        except Exception, exc:
1382            if debug:
1383                raise
1384            error(t, 'error: %s in def operands block "%s".' % (exc, t[3]))
1385        self.buildOperandNameMap(user_dict, t.lexer.lineno)
1386        t[0] = GenCode(self) # contributes nothing to the output C++ file
1387
1388    # A bitfield definition looks like:
1389    # 'def [signed] bitfield <ID> [<first>:<last>]'
1390    # This generates a preprocessor macro in the output file.
1391    def p_def_bitfield_0(self, t):
1392        'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI'
1393        expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8])
1394        if (t[2] == 'signed'):
1395            expr = 'sext<%d>(%s)' % (t[6] - t[8] + 1, expr)
1396        hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1397        t[0] = GenCode(self, header_output=hash_define)
1398
1399    # alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]'
1400    def p_def_bitfield_1(self, t):
1401        'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI'
1402        expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6])
1403        if (t[2] == 'signed'):
1404            expr = 'sext<%d>(%s)' % (1, expr)
1405        hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1406        t[0] = GenCode(self, header_output=hash_define)
1407
1408    # alternate form for structure member: 'def bitfield <ID> <ID>'
1409    def p_def_bitfield_struct(self, t):
1410        'def_bitfield_struct : DEF opt_signed BITFIELD ID id_with_dot SEMI'
1411        if (t[2] != ''):
1412            error(t, 'error: structure bitfields are always unsigned.')
1413        expr = 'machInst.%s' % t[5]
1414        hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
1415        t[0] = GenCode(self, header_output=hash_define)
1416
1417    def p_id_with_dot_0(self, t):
1418        'id_with_dot : ID'
1419        t[0] = t[1]
1420
1421    def p_id_with_dot_1(self, t):
1422        'id_with_dot : ID DOT id_with_dot'
1423        t[0] = t[1] + t[2] + t[3]
1424
1425    def p_opt_signed_0(self, t):
1426        'opt_signed : SIGNED'
1427        t[0] = t[1]
1428
1429    def p_opt_signed_1(self, t):
1430        'opt_signed : empty'
1431        t[0] = ''
1432
1433    def p_def_template(self, t):
1434        'def_template : DEF TEMPLATE ID CODELIT SEMI'
1435        self.templateMap[t[3]] = Template(self, t[4])
1436        t[0] = GenCode(self)
1437
1438    # An instruction format definition looks like
1439    # "def format <fmt>(<params>) {{...}};"
1440    def p_def_format(self, t):
1441        'def_format : DEF FORMAT ID LPAREN param_list RPAREN CODELIT SEMI'
1442        (id, params, code) = (t[3], t[5], t[7])
1443        self.defFormat(id, params, code, t.lexer.lineno)
1444        t[0] = GenCode(self)
1445
1446    # The formal parameter list for an instruction format is a
1447    # possibly empty list of comma-separated parameters.  Positional
1448    # (standard, non-keyword) parameters must come first, followed by
1449    # keyword parameters, followed by a '*foo' parameter that gets
1450    # excess positional arguments (as in Python).  Each of these three
1451    # parameter categories is optional.
1452    #
1453    # Note that we do not support the '**foo' parameter for collecting
1454    # otherwise undefined keyword args.  Otherwise the parameter list
1455    # is (I believe) identical to what is supported in Python.
1456    #
1457    # The param list generates a tuple, where the first element is a
1458    # list of the positional params and the second element is a dict
1459    # containing the keyword params.
1460    def p_param_list_0(self, t):
1461        'param_list : positional_param_list COMMA nonpositional_param_list'
1462        t[0] = t[1] + t[3]
1463
1464    def p_param_list_1(self, t):
1465        '''param_list : positional_param_list
1466                      | nonpositional_param_list'''
1467        t[0] = t[1]
1468
1469    def p_positional_param_list_0(self, t):
1470        'positional_param_list : empty'
1471        t[0] = []
1472
1473    def p_positional_param_list_1(self, t):
1474        'positional_param_list : ID'
1475        t[0] = [t[1]]
1476
1477    def p_positional_param_list_2(self, t):
1478        'positional_param_list : positional_param_list COMMA ID'
1479        t[0] = t[1] + [t[3]]
1480
1481    def p_nonpositional_param_list_0(self, t):
1482        'nonpositional_param_list : keyword_param_list COMMA excess_args_param'
1483        t[0] = t[1] + t[3]
1484
1485    def p_nonpositional_param_list_1(self, t):
1486        '''nonpositional_param_list : keyword_param_list
1487                                    | excess_args_param'''
1488        t[0] = t[1]
1489
1490    def p_keyword_param_list_0(self, t):
1491        'keyword_param_list : keyword_param'
1492        t[0] = [t[1]]
1493
1494    def p_keyword_param_list_1(self, t):
1495        'keyword_param_list : keyword_param_list COMMA keyword_param'
1496        t[0] = t[1] + [t[3]]
1497
1498    def p_keyword_param(self, t):
1499        'keyword_param : ID EQUALS expr'
1500        t[0] = t[1] + ' = ' + t[3].__repr__()
1501
1502    def p_excess_args_param(self, t):
1503        'excess_args_param : ASTERISK ID'
1504        # Just concatenate them: '*ID'.  Wrap in list to be consistent
1505        # with positional_param_list and keyword_param_list.
1506        t[0] = [t[1] + t[2]]
1507
1508    # End of format definition-related rules.
1509    ##############
1510
1511    #
1512    # A decode block looks like:
1513    #       decode <field1> [, <field2>]* [default <inst>] { ... }
1514    #
1515    def p_decode_block(self, t):
1516        'decode_block : DECODE ID opt_default LBRACE decode_stmt_list RBRACE'
1517        default_defaults = self.defaultStack.pop()
1518        codeObj = t[5]
1519        # use the "default defaults" only if there was no explicit
1520        # default statement in decode_stmt_list
1521        if not codeObj.has_decode_default:
1522            codeObj += default_defaults
1523        codeObj.wrap_decode_block('switch (%s) {\n' % t[2], '}\n')
1524        t[0] = codeObj
1525
1526    # The opt_default statement serves only to push the "default
1527    # defaults" onto defaultStack.  This value will be used by nested
1528    # decode blocks, and used and popped off when the current
1529    # decode_block is processed (in p_decode_block() above).
1530    def p_opt_default_0(self, t):
1531        'opt_default : empty'
1532        # no default specified: reuse the one currently at the top of
1533        # the stack
1534        self.defaultStack.push(self.defaultStack.top())
1535        # no meaningful value returned
1536        t[0] = None
1537
1538    def p_opt_default_1(self, t):
1539        'opt_default : DEFAULT inst'
1540        # push the new default
1541        codeObj = t[2]
1542        codeObj.wrap_decode_block('\ndefault:\n', 'break;\n')
1543        self.defaultStack.push(codeObj)
1544        # no meaningful value returned
1545        t[0] = None
1546
1547    def p_decode_stmt_list_0(self, t):
1548        'decode_stmt_list : decode_stmt'
1549        t[0] = t[1]
1550
1551    def p_decode_stmt_list_1(self, t):
1552        'decode_stmt_list : decode_stmt decode_stmt_list'
1553        if (t[1].has_decode_default and t[2].has_decode_default):
1554            error(t, 'Two default cases in decode block')
1555        t[0] = t[1] + t[2]
1556
1557    #
1558    # Decode statement rules
1559    #
1560    # There are four types of statements allowed in a decode block:
1561    # 1. Format blocks 'format <foo> { ... }'
1562    # 2. Nested decode blocks
1563    # 3. Instruction definitions.
1564    # 4. C preprocessor directives.
1565
1566
1567    # Preprocessor directives found in a decode statement list are
1568    # passed through to the output, replicated to all of the output
1569    # code streams.  This works well for ifdefs, so we can ifdef out
1570    # both the declarations and the decode cases generated by an
1571    # instruction definition.  Handling them as part of the grammar
1572    # makes it easy to keep them in the right place with respect to
1573    # the code generated by the other statements.
1574    def p_decode_stmt_cpp(self, t):
1575        'decode_stmt : CPPDIRECTIVE'
1576        t[0] = GenCode(self, t[1], t[1], t[1], t[1])
1577
1578    # A format block 'format <foo> { ... }' sets the default
1579    # instruction format used to handle instruction definitions inside
1580    # the block.  This format can be overridden by using an explicit
1581    # format on the instruction definition or with a nested format
1582    # block.
1583    def p_decode_stmt_format(self, t):
1584        'decode_stmt : FORMAT push_format_id LBRACE decode_stmt_list RBRACE'
1585        # The format will be pushed on the stack when 'push_format_id'
1586        # is processed (see below).  Once the parser has recognized
1587        # the full production (though the right brace), we're done
1588        # with the format, so now we can pop it.
1589        self.formatStack.pop()
1590        t[0] = t[4]
1591
1592    # This rule exists so we can set the current format (& push the
1593    # stack) when we recognize the format name part of the format
1594    # block.
1595    def p_push_format_id(self, t):
1596        'push_format_id : ID'
1597        try:
1598            self.formatStack.push(self.formatMap[t[1]])
1599            t[0] = ('', '// format %s' % t[1])
1600        except KeyError:
1601            error(t, 'instruction format "%s" not defined.' % t[1])
1602
1603    # Nested decode block: if the value of the current field matches
1604    # the specified constant, do a nested decode on some other field.
1605    def p_decode_stmt_decode(self, t):
1606        'decode_stmt : case_label COLON decode_block'
1607        label = t[1]
1608        codeObj = t[3]
1609        # just wrap the decoding code from the block as a case in the
1610        # outer switch statement.
1611        codeObj.wrap_decode_block('\n%s:\n' % label)
1612        codeObj.has_decode_default = (label == 'default')
1613        t[0] = codeObj
1614
1615    # Instruction definition (finally!).
1616    def p_decode_stmt_inst(self, t):
1617        'decode_stmt : case_label COLON inst SEMI'
1618        label = t[1]
1619        codeObj = t[3]
1620        codeObj.wrap_decode_block('\n%s:' % label, 'break;\n')
1621        codeObj.has_decode_default = (label == 'default')
1622        t[0] = codeObj
1623
1624    # The case label is either a list of one or more constants or
1625    # 'default'
1626    def p_case_label_0(self, t):
1627        'case_label : intlit_list'
1628        def make_case(intlit):
1629            if intlit >= 2**32:
1630                return 'case ULL(%#x)' % intlit
1631            else:
1632                return 'case %#x' % intlit
1633        t[0] = ': '.join(map(make_case, t[1]))
1634
1635    def p_case_label_1(self, t):
1636        'case_label : DEFAULT'
1637        t[0] = 'default'
1638
1639    #
1640    # The constant list for a decode case label must be non-empty, but
1641    # may have one or more comma-separated integer literals in it.
1642    #
1643    def p_intlit_list_0(self, t):
1644        'intlit_list : INTLIT'
1645        t[0] = [t[1]]
1646
1647    def p_intlit_list_1(self, t):
1648        'intlit_list : intlit_list COMMA INTLIT'
1649        t[0] = t[1]
1650        t[0].append(t[3])
1651
1652    # Define an instruction using the current instruction format
1653    # (specified by an enclosing format block).
1654    # "<mnemonic>(<args>)"
1655    def p_inst_0(self, t):
1656        'inst : ID LPAREN arg_list RPAREN'
1657        # Pass the ID and arg list to the current format class to deal with.
1658        currentFormat = self.formatStack.top()
1659        codeObj = currentFormat.defineInst(self, t[1], t[3], t.lexer.lineno)
1660        args = ','.join(map(str, t[3]))
1661        args = re.sub('(?m)^', '//', args)
1662        args = re.sub('^//', '', args)
1663        comment = '\n// %s::%s(%s)\n' % (currentFormat.id, t[1], args)
1664        codeObj.prepend_all(comment)
1665        t[0] = codeObj
1666
1667    # Define an instruction using an explicitly specified format:
1668    # "<fmt>::<mnemonic>(<args>)"
1669    def p_inst_1(self, t):
1670        'inst : ID DBLCOLON ID LPAREN arg_list RPAREN'
1671        try:
1672            format = self.formatMap[t[1]]
1673        except KeyError:
1674            error(t, 'instruction format "%s" not defined.' % t[1])
1675
1676        codeObj = format.defineInst(self, t[3], t[5], t.lexer.lineno)
1677        comment = '\n// %s::%s(%s)\n' % (t[1], t[3], t[5])
1678        codeObj.prepend_all(comment)
1679        t[0] = codeObj
1680
1681    # The arg list generates a tuple, where the first element is a
1682    # list of the positional args and the second element is a dict
1683    # containing the keyword args.
1684    def p_arg_list_0(self, t):
1685        'arg_list : positional_arg_list COMMA keyword_arg_list'
1686        t[0] = ( t[1], t[3] )
1687
1688    def p_arg_list_1(self, t):
1689        'arg_list : positional_arg_list'
1690        t[0] = ( t[1], {} )
1691
1692    def p_arg_list_2(self, t):
1693        'arg_list : keyword_arg_list'
1694        t[0] = ( [], t[1] )
1695
1696    def p_positional_arg_list_0(self, t):
1697        'positional_arg_list : empty'
1698        t[0] = []
1699
1700    def p_positional_arg_list_1(self, t):
1701        'positional_arg_list : expr'
1702        t[0] = [t[1]]
1703
1704    def p_positional_arg_list_2(self, t):
1705        'positional_arg_list : positional_arg_list COMMA expr'
1706        t[0] = t[1] + [t[3]]
1707
1708    def p_keyword_arg_list_0(self, t):
1709        'keyword_arg_list : keyword_arg'
1710        t[0] = t[1]
1711
1712    def p_keyword_arg_list_1(self, t):
1713        'keyword_arg_list : keyword_arg_list COMMA keyword_arg'
1714        t[0] = t[1]
1715        t[0].update(t[3])
1716
1717    def p_keyword_arg(self, t):
1718        'keyword_arg : ID EQUALS expr'
1719        t[0] = { t[1] : t[3] }
1720
1721    #
1722    # Basic expressions.  These constitute the argument values of
1723    # "function calls" (i.e. instruction definitions in the decode
1724    # block) and default values for formal parameters of format
1725    # functions.
1726    #
1727    # Right now, these are either strings, integers, or (recursively)
1728    # lists of exprs (using Python square-bracket list syntax).  Note
1729    # that bare identifiers are trated as string constants here (since
1730    # there isn't really a variable namespace to refer to).
1731    #
1732    def p_expr_0(self, t):
1733        '''expr : ID
1734                | INTLIT
1735                | STRLIT
1736                | CODELIT'''
1737        t[0] = t[1]
1738
1739    def p_expr_1(self, t):
1740        '''expr : LBRACKET list_expr RBRACKET'''
1741        t[0] = t[2]
1742
1743    def p_list_expr_0(self, t):
1744        'list_expr : expr'
1745        t[0] = [t[1]]
1746
1747    def p_list_expr_1(self, t):
1748        'list_expr : list_expr COMMA expr'
1749        t[0] = t[1] + [t[3]]
1750
1751    def p_list_expr_2(self, t):
1752        'list_expr : empty'
1753        t[0] = []
1754
1755    #
1756    # Empty production... use in other rules for readability.
1757    #
1758    def p_empty(self, t):
1759        'empty :'
1760        pass
1761
1762    # Parse error handler.  Note that the argument here is the
1763    # offending *token*, not a grammar symbol (hence the need to use
1764    # t.value)
1765    def p_error(self, t):
1766        if t:
1767            error(t, "syntax error at '%s'" % t.value)
1768        else:
1769            error("unknown syntax error")
1770
1771    # END OF GRAMMAR RULES
1772
1773    def updateExportContext(self):
1774
1775        # create a continuation that allows us to grab the current parser
1776        def wrapInstObjParams(*args):
1777            return InstObjParams(self, *args)
1778        self.exportContext['InstObjParams'] = wrapInstObjParams
1779        self.exportContext.update(self.templateMap)
1780
1781    def defFormat(self, id, params, code, lineno):
1782        '''Define a new format'''
1783
1784        # make sure we haven't already defined this one
1785        if id in self.formatMap:
1786            error(lineno, 'format %s redefined.' % id)
1787
1788        # create new object and store in global map
1789        self.formatMap[id] = Format(id, params, code)
1790
1791    def expandCpuSymbolsToDict(self, template):
1792        '''Expand template with CPU-specific references into a
1793        dictionary with an entry for each CPU model name.  The entry
1794        key is the model name and the corresponding value is the
1795        template with the CPU-specific refs substituted for that
1796        model.'''
1797
1798        # Protect '%'s that don't go with CPU-specific terms
1799        t = re.sub(r'%(?!\(CPU_)', '%%', template)
1800        result = {}
1801        for cpu in self.cpuModels:
1802            result[cpu.name] = t % cpu.strings
1803        return result
1804
1805    def expandCpuSymbolsToString(self, template):
1806        '''*If* the template has CPU-specific references, return a
1807        single string containing a copy of the template for each CPU
1808        model with the corresponding values substituted in.  If the
1809        template has no CPU-specific references, it is returned
1810        unmodified.'''
1811
1812        if template.find('%(CPU_') != -1:
1813            return reduce(lambda x,y: x+y,
1814                          self.expandCpuSymbolsToDict(template).values())
1815        else:
1816            return template
1817
1818    def protectCpuSymbols(self, template):
1819        '''Protect CPU-specific references by doubling the
1820        corresponding '%'s (in preparation for substituting a different
1821        set of references into the template).'''
1822
1823        return re.sub(r'%(?=\(CPU_)', '%%', template)
1824
1825    def protectNonSubstPercents(self, s):
1826        '''Protect any non-dict-substitution '%'s in a format string
1827        (i.e. those not followed by '(')'''
1828
1829        return re.sub(r'%(?!\()', '%%', s)
1830
1831    def buildOperandTypeMap(self, user_dict, lineno):
1832        """Generate operandTypeMap from the user's 'def operand_types'
1833        statement."""
1834        operand_type = {}
1835        for (ext, (desc, size)) in user_dict.iteritems():
1836            if desc == 'signed int':
1837                ctype = 'int%d_t' % size
1838                is_signed = 1
1839            elif desc == 'unsigned int':
1840                ctype = 'uint%d_t' % size
1841                is_signed = 0
1842            elif desc == 'float':
1843                is_signed = 1       # shouldn't really matter
1844                if size == 32:
1845                    ctype = 'float'
1846                elif size == 64:
1847                    ctype = 'double'
1848            elif desc == 'twin64 int':
1849                is_signed = 0
1850                ctype = 'Twin64_t'
1851            elif desc == 'twin32 int':
1852                is_signed = 0
1853                ctype = 'Twin32_t'
1854            if ctype == '':
1855                error(parser, lineno,
1856                      'Unrecognized type description "%s" in user_dict')
1857            operand_type[ext] = (size, ctype, is_signed)
1858
1859        self.operandTypeMap = operand_type
1860
1861    def buildOperandNameMap(self, user_dict, lineno):
1862        operand_name = {}
1863        for op_name, val in user_dict.iteritems():
1864            base_cls_name, dflt_ext, reg_spec, flags, sort_pri = val[:5]
1865            if len(val) > 5:
1866                read_code = val[5]
1867            else:
1868                read_code = None
1869            if len(val) > 6:
1870                write_code = val[6]
1871            else:
1872                write_code = None
1873            if len(val) > 7:
1874                error(lineno,
1875                      'error: too many attributes for operand "%s"' %
1876                      base_cls_name)
1877
1878            (dflt_size, dflt_ctype, dflt_is_signed) = \
1879                        self.operandTypeMap[dflt_ext]
1880            # Canonical flag structure is a triple of lists, where each list
1881            # indicates the set of flags implied by this operand always, when
1882            # used as a source, and when used as a dest, respectively.
1883            # For simplicity this can be initialized using a variety of fairly
1884            # obvious shortcuts; we convert these to canonical form here.
1885            if not flags:
1886                # no flags specified (e.g., 'None')
1887                flags = ( [], [], [] )
1888            elif isinstance(flags, str):
1889                # a single flag: assumed to be unconditional
1890                flags = ( [ flags ], [], [] )
1891            elif isinstance(flags, list):
1892                # a list of flags: also assumed to be unconditional
1893                flags = ( flags, [], [] )
1894            elif isinstance(flags, tuple):
1895                # it's a tuple: it should be a triple,
1896                # but each item could be a single string or a list
1897                (uncond_flags, src_flags, dest_flags) = flags
1898                flags = (makeList(uncond_flags),
1899                         makeList(src_flags), makeList(dest_flags))
1900            # Accumulate attributes of new operand class in tmp_dict
1901            tmp_dict = {}
1902            for attr in ('dflt_ext', 'reg_spec', 'flags', 'sort_pri',
1903                         'dflt_size', 'dflt_ctype', 'dflt_is_signed',
1904                         'read_code', 'write_code'):
1905                tmp_dict[attr] = eval(attr)
1906            tmp_dict['base_name'] = op_name
1907            # New class name will be e.g. "IntReg_Ra"
1908            cls_name = base_cls_name + '_' + op_name
1909            # Evaluate string arg to get class object.  Note that the
1910            # actual base class for "IntReg" is "IntRegOperand", i.e. we
1911            # have to append "Operand".
1912            try:
1913                base_cls = eval(base_cls_name + 'Operand')
1914            except NameError:
1915                error(lineno,
1916                      'error: unknown operand base class "%s"' % base_cls_name)
1917            # The following statement creates a new class called
1918            # <cls_name> as a subclass of <base_cls> with the attributes
1919            # in tmp_dict, just as if we evaluated a class declaration.
1920            operand_name[op_name] = type(cls_name, (base_cls,), tmp_dict)
1921
1922        self.operandNameMap = operand_name
1923
1924        # Define operand variables.
1925        operands = user_dict.keys()
1926
1927        operandsREString = (r'''
1928        (?<![\w\.])      # neg. lookbehind assertion: prevent partial matches
1929        ((%s)(?:\.(\w+))?)   # match: operand with optional '.' then suffix
1930        (?![\w\.])       # neg. lookahead assertion: prevent partial matches
1931        '''
1932                            % string.join(operands, '|'))
1933
1934        self.operandsRE = re.compile(operandsREString, re.MULTILINE|re.VERBOSE)
1935
1936        # Same as operandsREString, but extension is mandatory, and only two
1937        # groups are returned (base and ext, not full name as above).
1938        # Used for subtituting '_' for '.' to make C++ identifiers.
1939        operandsWithExtREString = (r'(?<![\w\.])(%s)\.(\w+)(?![\w\.])'
1940                                   % string.join(operands, '|'))
1941
1942        self.operandsWithExtRE = \
1943            re.compile(operandsWithExtREString, re.MULTILINE)
1944
1945    def substMungedOpNames(self, code):
1946        '''Munge operand names in code string to make legal C++
1947        variable names.  This means getting rid of the type extension
1948        if any.  Will match base_name attribute of Operand object.)'''
1949        return self.operandsWithExtRE.sub(r'\1', code)
1950
1951    def mungeSnippet(self, s):
1952        '''Fix up code snippets for final substitution in templates.'''
1953        if isinstance(s, str):
1954            return self.substMungedOpNames(substBitOps(s))
1955        else:
1956            return s
1957
1958    def update_if_needed(self, file, contents):
1959        '''Update the output file only if the new contents are
1960        different from the current contents.  Minimizes the files that
1961        need to be rebuilt after minor changes.'''
1962
1963        file = os.path.join(self.output_dir, file)
1964        update = False
1965        if os.access(file, os.R_OK):
1966            f = open(file, 'r')
1967            old_contents = f.read()
1968            f.close()
1969            if contents != old_contents:
1970                print 'Updating', file
1971                os.remove(file) # in case it's write-protected
1972                update = True
1973            else:
1974                print 'File', file, 'is unchanged'
1975        else:
1976            print 'Generating', file
1977            update = True
1978        if update:
1979            f = open(file, 'w')
1980            f.write(contents)
1981            f.close()
1982
1983    # This regular expression matches '##include' directives
1984    includeRE = re.compile(r'^\s*##include\s+"(?P<filename>[\w/.-]*)".*$',
1985                           re.MULTILINE)
1986
1987    def replace_include(self, matchobj, dirname):
1988        """Function to replace a matched '##include' directive with the
1989        contents of the specified file (with nested ##includes
1990        replaced recursively).  'matchobj' is an re match object
1991        (from a match of includeRE) and 'dirname' is the directory
1992        relative to which the file path should be resolved."""
1993
1994        fname = matchobj.group('filename')
1995        full_fname = os.path.normpath(os.path.join(dirname, fname))
1996        contents = '##newfile "%s"\n%s\n##endfile\n' % \
1997                   (full_fname, self.read_and_flatten(full_fname))
1998        return contents
1999
2000    def read_and_flatten(self, filename):
2001        """Read a file and recursively flatten nested '##include' files."""
2002
2003        current_dir = os.path.dirname(filename)
2004        try:
2005            contents = open(filename).read()
2006        except IOError:
2007            error('Error including file "%s"' % filename)
2008
2009        self.fileNameStack.push((filename, 0))
2010
2011        # Find any includes and include them
2012        def replace(matchobj):
2013            return self.replace_include(matchobj, current_dir)
2014        contents = self.includeRE.sub(replace, contents)
2015
2016        self.fileNameStack.pop()
2017        return contents
2018
2019    def _parse_isa_desc(self, isa_desc_file):
2020        '''Read in and parse the ISA description.'''
2021
2022        # Read file and (recursively) all included files into a string.
2023        # PLY requires that the input be in a single string so we have to
2024        # do this up front.
2025        isa_desc = self.read_and_flatten(isa_desc_file)
2026
2027        # Initialize filename stack with outer file.
2028        self.fileNameStack.push((isa_desc_file, 0))
2029
2030        # Parse it.
2031        (isa_name, namespace, global_code, namespace_code) = \
2032                   self.parse(isa_desc)
2033
2034        # grab the last three path components of isa_desc_file to put in
2035        # the output
2036        filename = '/'.join(isa_desc_file.split('/')[-3:])
2037
2038        # generate decoder.hh
2039        includes = '#include "base/bitfield.hh" // for bitfield support'
2040        global_output = global_code.header_output
2041        namespace_output = namespace_code.header_output
2042        decode_function = ''
2043        self.update_if_needed('decoder.hh', file_template % vars())
2044
2045        # generate decoder.cc
2046        includes = '#include "decoder.hh"'
2047        global_output = global_code.decoder_output
2048        namespace_output = namespace_code.decoder_output
2049        # namespace_output += namespace_code.decode_block
2050        decode_function = namespace_code.decode_block
2051        self.update_if_needed('decoder.cc', file_template % vars())
2052
2053        # generate per-cpu exec files
2054        for cpu in self.cpuModels:
2055            includes = '#include "decoder.hh"\n'
2056            includes += cpu.includes
2057            global_output = global_code.exec_output[cpu.name]
2058            namespace_output = namespace_code.exec_output[cpu.name]
2059            decode_function = ''
2060            self.update_if_needed(cpu.filename, file_template % vars())
2061
2062        # The variable names here are hacky, but this will creat local
2063        # variables which will be referenced in vars() which have the
2064        # value of the globals.
2065        MaxInstSrcRegs = self.maxInstSrcRegs
2066        MaxInstDestRegs = self.maxInstDestRegs
2067        # max_inst_regs.hh
2068        self.update_if_needed('max_inst_regs.hh',
2069                              max_inst_regs_template % vars())
2070
2071    def parse_isa_desc(self, *args, **kwargs):
2072        try:
2073            self._parse_isa_desc(*args, **kwargs)
2074        except ISAParserError, e:
2075            e.exit(self.fileNameStack)
2076
2077# Called as script: get args from command line.
2078# Args are: <path to cpu_models.py> <isa desc file> <output dir> <cpu models>
2079if __name__ == '__main__':
2080    execfile(sys.argv[1])  # read in CpuModel definitions
2081    cpu_models = [CpuModel.dict[cpu] for cpu in sys.argv[4:]]
2082    ISAParser(sys.argv[3], cpu_models).parse_isa_desc(sys.argv[2])
2083