isa_parser.py revision 4281:ee28ccd38dbf
1# Copyright (c) 2003-2005 The Regents of The University of Michigan
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met: redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer;
8# redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution;
11# neither the name of the copyright holders nor the names of its
12# contributors may be used to endorse or promote products derived from
13# this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26#
27# Authors: Steve Reinhardt
28#          Korey Sewell
29
30import os
31import sys
32import re
33import string
34import traceback
35# get type names
36from types import *
37
38# Prepend the directory where the PLY lex & yacc modules are found
39# to the search path.  Assumes we're compiling in a subdirectory
40# of 'build' in the current tree.
41sys.path[0:0] = [os.environ['M5_PLY']]
42
43import lex
44import yacc
45
46#####################################################################
47#
48#                                Lexer
49#
50# The PLY lexer module takes two things as input:
51# - A list of token names (the string list 'tokens')
52# - A regular expression describing a match for each token.  The
53#   regexp for token FOO can be provided in two ways:
54#   - as a string variable named t_FOO
55#   - as the doc string for a function named t_FOO.  In this case,
56#     the function is also executed, allowing an action to be
57#     associated with each token match.
58#
59#####################################################################
60
61# Reserved words.  These are listed separately as they are matched
62# using the same regexp as generic IDs, but distinguished in the
63# t_ID() function.  The PLY documentation suggests this approach.
64reserved = (
65    'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT',
66    'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS',
67    'OUTPUT', 'SIGNED', 'TEMPLATE'
68    )
69
70# List of tokens.  The lex module requires this.
71tokens = reserved + (
72    # identifier
73    'ID',
74
75    # integer literal
76    'INTLIT',
77
78    # string literal
79    'STRLIT',
80
81    # code literal
82    'CODELIT',
83
84    # ( ) [ ] { } < > , ; . : :: *
85    'LPAREN', 'RPAREN',
86    'LBRACKET', 'RBRACKET',
87    'LBRACE', 'RBRACE',
88    'LESS', 'GREATER', 'EQUALS',
89    'COMMA', 'SEMI', 'DOT', 'COLON', 'DBLCOLON',
90    'ASTERISK',
91
92    # C preprocessor directives
93    'CPPDIRECTIVE'
94
95# The following are matched but never returned. commented out to
96# suppress PLY warning
97    # newfile directive
98#    'NEWFILE',
99
100    # endfile directive
101#    'ENDFILE'
102)
103
104# Regular expressions for token matching
105t_LPAREN           = r'\('
106t_RPAREN           = r'\)'
107t_LBRACKET         = r'\['
108t_RBRACKET         = r'\]'
109t_LBRACE           = r'\{'
110t_RBRACE           = r'\}'
111t_LESS             = r'\<'
112t_GREATER          = r'\>'
113t_EQUALS           = r'='
114t_COMMA            = r','
115t_SEMI             = r';'
116t_DOT              = r'\.'
117t_COLON            = r':'
118t_DBLCOLON         = r'::'
119t_ASTERISK	   = r'\*'
120
121# Identifiers and reserved words
122reserved_map = { }
123for r in reserved:
124    reserved_map[r.lower()] = r
125
126def t_ID(t):
127    r'[A-Za-z_]\w*'
128    t.type = reserved_map.get(t.value,'ID')
129    return t
130
131# Integer literal
132def t_INTLIT(t):
133    r'(0x[\da-fA-F]+)|\d+'
134    try:
135        t.value = int(t.value,0)
136    except ValueError:
137        error(t.lineno, 'Integer value "%s" too large' % t.value)
138        t.value = 0
139    return t
140
141# String literal.  Note that these use only single quotes, and
142# can span multiple lines.
143def t_STRLIT(t):
144    r"(?m)'([^'])+'"
145    # strip off quotes
146    t.value = t.value[1:-1]
147    t.lineno += t.value.count('\n')
148    return t
149
150
151# "Code literal"... like a string literal, but delimiters are
152# '{{' and '}}' so they get formatted nicely under emacs c-mode
153def t_CODELIT(t):
154    r"(?m)\{\{([^\}]|}(?!\}))+\}\}"
155    # strip off {{ & }}
156    t.value = t.value[2:-2]
157    t.lineno += t.value.count('\n')
158    return t
159
160def t_CPPDIRECTIVE(t):
161    r'^\#[^\#].*\n'
162    t.lineno += t.value.count('\n')
163    return t
164
165def t_NEWFILE(t):
166    r'^\#\#newfile\s+"[\w/.-]*"'
167    fileNameStack.push((t.value[11:-1], t.lineno))
168    t.lineno = 0
169
170def t_ENDFILE(t):
171    r'^\#\#endfile'
172    (old_filename, t.lineno) = fileNameStack.pop()
173
174#
175# The functions t_NEWLINE, t_ignore, and t_error are
176# special for the lex module.
177#
178
179# Newlines
180def t_NEWLINE(t):
181    r'\n+'
182    t.lineno += t.value.count('\n')
183
184# Comments
185def t_comment(t):
186    r'//.*'
187
188# Completely ignored characters
189t_ignore           = ' \t\x0c'
190
191# Error handler
192def t_error(t):
193    error(t.lineno, "illegal character '%s'" % t.value[0])
194    t.skip(1)
195
196# Build the lexer
197lex.lex()
198
199#####################################################################
200#
201#                                Parser
202#
203# Every function whose name starts with 'p_' defines a grammar rule.
204# The rule is encoded in the function's doc string, while the
205# function body provides the action taken when the rule is matched.
206# The argument to each function is a list of the values of the
207# rule's symbols: t[0] for the LHS, and t[1..n] for the symbols
208# on the RHS.  For tokens, the value is copied from the t.value
209# attribute provided by the lexer.  For non-terminals, the value
210# is assigned by the producing rule; i.e., the job of the grammar
211# rule function is to set the value for the non-terminal on the LHS
212# (by assigning to t[0]).
213#####################################################################
214
215# The LHS of the first grammar rule is used as the start symbol
216# (in this case, 'specification').  Note that this rule enforces
217# that there will be exactly one namespace declaration, with 0 or more
218# global defs/decls before and after it.  The defs & decls before
219# the namespace decl will be outside the namespace; those after
220# will be inside.  The decoder function is always inside the namespace.
221def p_specification(t):
222    'specification : opt_defs_and_outputs name_decl opt_defs_and_outputs decode_block'
223    global_code = t[1]
224    isa_name = t[2]
225    namespace = isa_name + "Inst"
226    # wrap the decode block as a function definition
227    t[4].wrap_decode_block('''
228StaticInstPtr
229%(isa_name)s::decodeInst(%(isa_name)s::ExtMachInst machInst)
230{
231    using namespace %(namespace)s;
232''' % vars(), '}')
233    # both the latter output blocks and the decode block are in the namespace
234    namespace_code = t[3] + t[4]
235    # pass it all back to the caller of yacc.parse()
236    t[0] = (isa_name, namespace, global_code, namespace_code)
237
238# ISA name declaration looks like "namespace <foo>;"
239def p_name_decl(t):
240    'name_decl : NAMESPACE ID SEMI'
241    t[0] = t[2]
242
243# 'opt_defs_and_outputs' is a possibly empty sequence of
244# def and/or output statements.
245def p_opt_defs_and_outputs_0(t):
246    'opt_defs_and_outputs : empty'
247    t[0] = GenCode()
248
249def p_opt_defs_and_outputs_1(t):
250    'opt_defs_and_outputs : defs_and_outputs'
251    t[0] = t[1]
252
253def p_defs_and_outputs_0(t):
254    'defs_and_outputs : def_or_output'
255    t[0] = t[1]
256
257def p_defs_and_outputs_1(t):
258    'defs_and_outputs : defs_and_outputs def_or_output'
259    t[0] = t[1] + t[2]
260
261# The list of possible definition/output statements.
262def p_def_or_output(t):
263    '''def_or_output : def_format
264                     | def_bitfield
265                     | def_bitfield_struct
266                     | def_template
267                     | def_operand_types
268                     | def_operands
269                     | output_header
270                     | output_decoder
271                     | output_exec
272                     | global_let'''
273    t[0] = t[1]
274
275# Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied
276# directly to the appropriate output section.
277
278
279# Protect any non-dict-substitution '%'s in a format string
280# (i.e. those not followed by '(')
281def protect_non_subst_percents(s):
282    return re.sub(r'%(?!\()', '%%', s)
283
284# Massage output block by substituting in template definitions and bit
285# operators.  We handle '%'s embedded in the string that don't
286# indicate template substitutions (or CPU-specific symbols, which get
287# handled in GenCode) by doubling them first so that the format
288# operation will reduce them back to single '%'s.
289def process_output(s):
290    s = protect_non_subst_percents(s)
291    # protects cpu-specific symbols too
292    s = protect_cpu_symbols(s)
293    return substBitOps(s % templateMap)
294
295def p_output_header(t):
296    'output_header : OUTPUT HEADER CODELIT SEMI'
297    t[0] = GenCode(header_output = process_output(t[3]))
298
299def p_output_decoder(t):
300    'output_decoder : OUTPUT DECODER CODELIT SEMI'
301    t[0] = GenCode(decoder_output = process_output(t[3]))
302
303def p_output_exec(t):
304    'output_exec : OUTPUT EXEC CODELIT SEMI'
305    t[0] = GenCode(exec_output = process_output(t[3]))
306
307# global let blocks 'let {{...}}' (Python code blocks) are executed
308# directly when seen.  Note that these execute in a special variable
309# context 'exportContext' to prevent the code from polluting this
310# script's namespace.
311def p_global_let(t):
312    'global_let : LET CODELIT SEMI'
313    updateExportContext()
314    try:
315        exec fixPythonIndentation(t[2]) in exportContext
316    except Exception, exc:
317        error(t.lineno(1),
318              'error: %s in global let block "%s".' % (exc, t[2]))
319    t[0] = GenCode() # contributes nothing to the output C++ file
320
321# Define the mapping from operand type extensions to C++ types and bit
322# widths (stored in operandTypeMap).
323def p_def_operand_types(t):
324    'def_operand_types : DEF OPERAND_TYPES CODELIT SEMI'
325    try:
326        userDict = eval('{' + t[3] + '}')
327    except Exception, exc:
328        error(t.lineno(1),
329              'error: %s in def operand_types block "%s".' % (exc, t[3]))
330    buildOperandTypeMap(userDict, t.lineno(1))
331    t[0] = GenCode() # contributes nothing to the output C++ file
332
333# Define the mapping from operand names to operand classes and other
334# traits.  Stored in operandNameMap.
335def p_def_operands(t):
336    'def_operands : DEF OPERANDS CODELIT SEMI'
337    if not globals().has_key('operandTypeMap'):
338        error(t.lineno(1),
339              'error: operand types must be defined before operands')
340    try:
341        userDict = eval('{' + t[3] + '}')
342    except Exception, exc:
343        error(t.lineno(1),
344              'error: %s in def operands block "%s".' % (exc, t[3]))
345    buildOperandNameMap(userDict, t.lineno(1))
346    t[0] = GenCode() # contributes nothing to the output C++ file
347
348# A bitfield definition looks like:
349# 'def [signed] bitfield <ID> [<first>:<last>]'
350# This generates a preprocessor macro in the output file.
351def p_def_bitfield_0(t):
352    'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI'
353    expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8])
354    if (t[2] == 'signed'):
355        expr = 'sext<%d>(%s)' % (t[6] - t[8] + 1, expr)
356    hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
357    t[0] = GenCode(header_output = hash_define)
358
359# alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]'
360def p_def_bitfield_1(t):
361    'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI'
362    expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6])
363    if (t[2] == 'signed'):
364        expr = 'sext<%d>(%s)' % (1, expr)
365    hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
366    t[0] = GenCode(header_output = hash_define)
367
368# alternate form for structure member: 'def bitfield <ID> <ID>'
369def p_def_bitfield_struct(t):
370    'def_bitfield_struct : DEF opt_signed BITFIELD ID id_with_dot SEMI'
371    if (t[2] != ''):
372        error(t.lineno(1), 'error: structure bitfields are always unsigned.')
373    expr = 'machInst.%s' % t[5]
374    hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
375    t[0] = GenCode(header_output = hash_define)
376
377def p_id_with_dot_0(t):
378    'id_with_dot : ID'
379    t[0] = t[1]
380
381def p_id_with_dot_1(t):
382    'id_with_dot : ID DOT id_with_dot'
383    t[0] = t[1] + t[2] + t[3]
384
385def p_opt_signed_0(t):
386    'opt_signed : SIGNED'
387    t[0] = t[1]
388
389def p_opt_signed_1(t):
390    'opt_signed : empty'
391    t[0] = ''
392
393# Global map variable to hold templates
394templateMap = {}
395
396def p_def_template(t):
397    'def_template : DEF TEMPLATE ID CODELIT SEMI'
398    templateMap[t[3]] = Template(t[4])
399    t[0] = GenCode()
400
401# An instruction format definition looks like
402# "def format <fmt>(<params>) {{...}};"
403def p_def_format(t):
404    'def_format : DEF FORMAT ID LPAREN param_list RPAREN CODELIT SEMI'
405    (id, params, code) = (t[3], t[5], t[7])
406    defFormat(id, params, code, t.lineno(1))
407    t[0] = GenCode()
408
409# The formal parameter list for an instruction format is a possibly
410# empty list of comma-separated parameters.  Positional (standard,
411# non-keyword) parameters must come first, followed by keyword
412# parameters, followed by a '*foo' parameter that gets excess
413# positional arguments (as in Python).  Each of these three parameter
414# categories is optional.
415#
416# Note that we do not support the '**foo' parameter for collecting
417# otherwise undefined keyword args.  Otherwise the parameter list is
418# (I believe) identical to what is supported in Python.
419#
420# The param list generates a tuple, where the first element is a list of
421# the positional params and the second element is a dict containing the
422# keyword params.
423def p_param_list_0(t):
424    'param_list : positional_param_list COMMA nonpositional_param_list'
425    t[0] = t[1] + t[3]
426
427def p_param_list_1(t):
428    '''param_list : positional_param_list
429                  | nonpositional_param_list'''
430    t[0] = t[1]
431
432def p_positional_param_list_0(t):
433    'positional_param_list : empty'
434    t[0] = []
435
436def p_positional_param_list_1(t):
437    'positional_param_list : ID'
438    t[0] = [t[1]]
439
440def p_positional_param_list_2(t):
441    'positional_param_list : positional_param_list COMMA ID'
442    t[0] = t[1] + [t[3]]
443
444def p_nonpositional_param_list_0(t):
445    'nonpositional_param_list : keyword_param_list COMMA excess_args_param'
446    t[0] = t[1] + t[3]
447
448def p_nonpositional_param_list_1(t):
449    '''nonpositional_param_list : keyword_param_list
450                                | excess_args_param'''
451    t[0] = t[1]
452
453def p_keyword_param_list_0(t):
454    'keyword_param_list : keyword_param'
455    t[0] = [t[1]]
456
457def p_keyword_param_list_1(t):
458    'keyword_param_list : keyword_param_list COMMA keyword_param'
459    t[0] = t[1] + [t[3]]
460
461def p_keyword_param(t):
462    'keyword_param : ID EQUALS expr'
463    t[0] = t[1] + ' = ' + t[3].__repr__()
464
465def p_excess_args_param(t):
466    'excess_args_param : ASTERISK ID'
467    # Just concatenate them: '*ID'.  Wrap in list to be consistent
468    # with positional_param_list and keyword_param_list.
469    t[0] = [t[1] + t[2]]
470
471# End of format definition-related rules.
472##############
473
474#
475# A decode block looks like:
476#	decode <field1> [, <field2>]* [default <inst>] { ... }
477#
478def p_decode_block(t):
479    'decode_block : DECODE ID opt_default LBRACE decode_stmt_list RBRACE'
480    default_defaults = defaultStack.pop()
481    codeObj = t[5]
482    # use the "default defaults" only if there was no explicit
483    # default statement in decode_stmt_list
484    if not codeObj.has_decode_default:
485        codeObj += default_defaults
486    codeObj.wrap_decode_block('switch (%s) {\n' % t[2], '}\n')
487    t[0] = codeObj
488
489# The opt_default statement serves only to push the "default defaults"
490# onto defaultStack.  This value will be used by nested decode blocks,
491# and used and popped off when the current decode_block is processed
492# (in p_decode_block() above).
493def p_opt_default_0(t):
494    'opt_default : empty'
495    # no default specified: reuse the one currently at the top of the stack
496    defaultStack.push(defaultStack.top())
497    # no meaningful value returned
498    t[0] = None
499
500def p_opt_default_1(t):
501    'opt_default : DEFAULT inst'
502    # push the new default
503    codeObj = t[2]
504    codeObj.wrap_decode_block('\ndefault:\n', 'break;\n')
505    defaultStack.push(codeObj)
506    # no meaningful value returned
507    t[0] = None
508
509def p_decode_stmt_list_0(t):
510    'decode_stmt_list : decode_stmt'
511    t[0] = t[1]
512
513def p_decode_stmt_list_1(t):
514    'decode_stmt_list : decode_stmt decode_stmt_list'
515    if (t[1].has_decode_default and t[2].has_decode_default):
516        error(t.lineno(1), 'Two default cases in decode block')
517    t[0] = t[1] + t[2]
518
519#
520# Decode statement rules
521#
522# There are four types of statements allowed in a decode block:
523# 1. Format blocks 'format <foo> { ... }'
524# 2. Nested decode blocks
525# 3. Instruction definitions.
526# 4. C preprocessor directives.
527
528
529# Preprocessor directives found in a decode statement list are passed
530# through to the output, replicated to all of the output code
531# streams.  This works well for ifdefs, so we can ifdef out both the
532# declarations and the decode cases generated by an instruction
533# definition.  Handling them as part of the grammar makes it easy to
534# keep them in the right place with respect to the code generated by
535# the other statements.
536def p_decode_stmt_cpp(t):
537    'decode_stmt : CPPDIRECTIVE'
538    t[0] = GenCode(t[1], t[1], t[1], t[1])
539
540# A format block 'format <foo> { ... }' sets the default instruction
541# format used to handle instruction definitions inside the block.
542# This format can be overridden by using an explicit format on the
543# instruction definition or with a nested format block.
544def p_decode_stmt_format(t):
545    'decode_stmt : FORMAT push_format_id LBRACE decode_stmt_list RBRACE'
546    # The format will be pushed on the stack when 'push_format_id' is
547    # processed (see below).  Once the parser has recognized the full
548    # production (though the right brace), we're done with the format,
549    # so now we can pop it.
550    formatStack.pop()
551    t[0] = t[4]
552
553# This rule exists so we can set the current format (& push the stack)
554# when we recognize the format name part of the format block.
555def p_push_format_id(t):
556    'push_format_id : ID'
557    try:
558        formatStack.push(formatMap[t[1]])
559        t[0] = ('', '// format %s' % t[1])
560    except KeyError:
561        error(t.lineno(1), 'instruction format "%s" not defined.' % t[1])
562
563# Nested decode block: if the value of the current field matches the
564# specified constant, do a nested decode on some other field.
565def p_decode_stmt_decode(t):
566    'decode_stmt : case_label COLON decode_block'
567    label = t[1]
568    codeObj = t[3]
569    # just wrap the decoding code from the block as a case in the
570    # outer switch statement.
571    codeObj.wrap_decode_block('\n%s:\n' % label)
572    codeObj.has_decode_default = (label == 'default')
573    t[0] = codeObj
574
575# Instruction definition (finally!).
576def p_decode_stmt_inst(t):
577    'decode_stmt : case_label COLON inst SEMI'
578    label = t[1]
579    codeObj = t[3]
580    codeObj.wrap_decode_block('\n%s:' % label, 'break;\n')
581    codeObj.has_decode_default = (label == 'default')
582    t[0] = codeObj
583
584# The case label is either a list of one or more constants or 'default'
585def p_case_label_0(t):
586    'case_label : intlit_list'
587    t[0] = ': '.join(map(lambda a: 'case %#x' % a, t[1]))
588
589def p_case_label_1(t):
590    'case_label : DEFAULT'
591    t[0] = 'default'
592
593#
594# The constant list for a decode case label must be non-empty, but may have
595# one or more comma-separated integer literals in it.
596#
597def p_intlit_list_0(t):
598    'intlit_list : INTLIT'
599    t[0] = [t[1]]
600
601def p_intlit_list_1(t):
602    'intlit_list : intlit_list COMMA INTLIT'
603    t[0] = t[1]
604    t[0].append(t[3])
605
606# Define an instruction using the current instruction format (specified
607# by an enclosing format block).
608# "<mnemonic>(<args>)"
609def p_inst_0(t):
610    'inst : ID LPAREN arg_list RPAREN'
611    # Pass the ID and arg list to the current format class to deal with.
612    currentFormat = formatStack.top()
613    codeObj = currentFormat.defineInst(t[1], t[3], t.lineno(1))
614    args = ','.join(map(str, t[3]))
615    args = re.sub('(?m)^', '//', args)
616    args = re.sub('^//', '', args)
617    comment = '\n// %s::%s(%s)\n' % (currentFormat.id, t[1], args)
618    codeObj.prepend_all(comment)
619    t[0] = codeObj
620
621# Define an instruction using an explicitly specified format:
622# "<fmt>::<mnemonic>(<args>)"
623def p_inst_1(t):
624    'inst : ID DBLCOLON ID LPAREN arg_list RPAREN'
625    try:
626        format = formatMap[t[1]]
627    except KeyError:
628        error(t.lineno(1), 'instruction format "%s" not defined.' % t[1])
629    codeObj = format.defineInst(t[3], t[5], t.lineno(1))
630    comment = '\n// %s::%s(%s)\n' % (t[1], t[3], t[5])
631    codeObj.prepend_all(comment)
632    t[0] = codeObj
633
634# The arg list generates a tuple, where the first element is a list of
635# the positional args and the second element is a dict containing the
636# keyword args.
637def p_arg_list_0(t):
638    'arg_list : positional_arg_list COMMA keyword_arg_list'
639    t[0] = ( t[1], t[3] )
640
641def p_arg_list_1(t):
642    'arg_list : positional_arg_list'
643    t[0] = ( t[1], {} )
644
645def p_arg_list_2(t):
646    'arg_list : keyword_arg_list'
647    t[0] = ( [], t[1] )
648
649def p_positional_arg_list_0(t):
650    'positional_arg_list : empty'
651    t[0] = []
652
653def p_positional_arg_list_1(t):
654    'positional_arg_list : expr'
655    t[0] = [t[1]]
656
657def p_positional_arg_list_2(t):
658    'positional_arg_list : positional_arg_list COMMA expr'
659    t[0] = t[1] + [t[3]]
660
661def p_keyword_arg_list_0(t):
662    'keyword_arg_list : keyword_arg'
663    t[0] = t[1]
664
665def p_keyword_arg_list_1(t):
666    'keyword_arg_list : keyword_arg_list COMMA keyword_arg'
667    t[0] = t[1]
668    t[0].update(t[3])
669
670def p_keyword_arg(t):
671    'keyword_arg : ID EQUALS expr'
672    t[0] = { t[1] : t[3] }
673
674#
675# Basic expressions.  These constitute the argument values of
676# "function calls" (i.e. instruction definitions in the decode block)
677# and default values for formal parameters of format functions.
678#
679# Right now, these are either strings, integers, or (recursively)
680# lists of exprs (using Python square-bracket list syntax).  Note that
681# bare identifiers are trated as string constants here (since there
682# isn't really a variable namespace to refer to).
683#
684def p_expr_0(t):
685    '''expr : ID
686            | INTLIT
687            | STRLIT
688            | CODELIT'''
689    t[0] = t[1]
690
691def p_expr_1(t):
692    '''expr : LBRACKET list_expr RBRACKET'''
693    t[0] = t[2]
694
695def p_list_expr_0(t):
696    'list_expr : expr'
697    t[0] = [t[1]]
698
699def p_list_expr_1(t):
700    'list_expr : list_expr COMMA expr'
701    t[0] = t[1] + [t[3]]
702
703def p_list_expr_2(t):
704    'list_expr : empty'
705    t[0] = []
706
707#
708# Empty production... use in other rules for readability.
709#
710def p_empty(t):
711    'empty :'
712    pass
713
714# Parse error handler.  Note that the argument here is the offending
715# *token*, not a grammar symbol (hence the need to use t.value)
716def p_error(t):
717    if t:
718        error(t.lineno, "syntax error at '%s'" % t.value)
719    else:
720        error(0, "unknown syntax error", True)
721
722# END OF GRAMMAR RULES
723#
724# Now build the parser.
725yacc.yacc()
726
727
728#####################################################################
729#
730#                           Support Classes
731#
732#####################################################################
733
734# Expand template with CPU-specific references into a dictionary with
735# an entry for each CPU model name.  The entry key is the model name
736# and the corresponding value is the template with the CPU-specific
737# refs substituted for that model.
738def expand_cpu_symbols_to_dict(template):
739    # Protect '%'s that don't go with CPU-specific terms
740    t = re.sub(r'%(?!\(CPU_)', '%%', template)
741    result = {}
742    for cpu in cpu_models:
743        result[cpu.name] = t % cpu.strings
744    return result
745
746# *If* the template has CPU-specific references, return a single
747# string containing a copy of the template for each CPU model with the
748# corresponding values substituted in.  If the template has no
749# CPU-specific references, it is returned unmodified.
750def expand_cpu_symbols_to_string(template):
751    if template.find('%(CPU_') != -1:
752        return reduce(lambda x,y: x+y,
753                      expand_cpu_symbols_to_dict(template).values())
754    else:
755        return template
756
757# Protect CPU-specific references by doubling the corresponding '%'s
758# (in preparation for substituting a different set of references into
759# the template).
760def protect_cpu_symbols(template):
761    return re.sub(r'%(?=\(CPU_)', '%%', template)
762
763###############
764# GenCode class
765#
766# The GenCode class encapsulates generated code destined for various
767# output files.  The header_output and decoder_output attributes are
768# strings containing code destined for decoder.hh and decoder.cc
769# respectively.  The decode_block attribute contains code to be
770# incorporated in the decode function itself (that will also end up in
771# decoder.cc).  The exec_output attribute is a dictionary with a key
772# for each CPU model name; the value associated with a particular key
773# is the string of code for that CPU model's exec.cc file.  The
774# has_decode_default attribute is used in the decode block to allow
775# explicit default clauses to override default default clauses.
776
777class GenCode:
778    # Constructor.  At this point we substitute out all CPU-specific
779    # symbols.  For the exec output, these go into the per-model
780    # dictionary.  For all other output types they get collapsed into
781    # a single string.
782    def __init__(self,
783                 header_output = '', decoder_output = '', exec_output = '',
784                 decode_block = '', has_decode_default = False):
785        self.header_output = expand_cpu_symbols_to_string(header_output)
786        self.decoder_output = expand_cpu_symbols_to_string(decoder_output)
787        if isinstance(exec_output, dict):
788            self.exec_output = exec_output
789        elif isinstance(exec_output, str):
790            # If the exec_output arg is a single string, we replicate
791            # it for each of the CPU models, substituting and
792            # %(CPU_foo)s params appropriately.
793            self.exec_output = expand_cpu_symbols_to_dict(exec_output)
794        self.decode_block = expand_cpu_symbols_to_string(decode_block)
795        self.has_decode_default = has_decode_default
796
797    # Override '+' operator: generate a new GenCode object that
798    # concatenates all the individual strings in the operands.
799    def __add__(self, other):
800        exec_output = {}
801        for cpu in cpu_models:
802            n = cpu.name
803            exec_output[n] = self.exec_output[n] + other.exec_output[n]
804        return GenCode(self.header_output + other.header_output,
805                       self.decoder_output + other.decoder_output,
806                       exec_output,
807                       self.decode_block + other.decode_block,
808                       self.has_decode_default or other.has_decode_default)
809
810    # Prepend a string (typically a comment) to all the strings.
811    def prepend_all(self, pre):
812        self.header_output = pre + self.header_output
813        self.decoder_output  = pre + self.decoder_output
814        self.decode_block = pre + self.decode_block
815        for cpu in cpu_models:
816            self.exec_output[cpu.name] = pre + self.exec_output[cpu.name]
817
818    # Wrap the decode block in a pair of strings (e.g., 'case foo:'
819    # and 'break;').  Used to build the big nested switch statement.
820    def wrap_decode_block(self, pre, post = ''):
821        self.decode_block = pre + indent(self.decode_block) + post
822
823################
824# Format object.
825#
826# A format object encapsulates an instruction format.  It must provide
827# a defineInst() method that generates the code for an instruction
828# definition.
829
830exportContextSymbols = ('InstObjParams', 'makeList', 're', 'string')
831
832exportContext = {}
833
834def updateExportContext():
835    exportContext.update(exportDict(*exportContextSymbols))
836    exportContext.update(templateMap)
837
838def exportDict(*symNames):
839    return dict([(s, eval(s)) for s in symNames])
840
841
842class Format:
843    def __init__(self, id, params, code):
844        # constructor: just save away arguments
845        self.id = id
846        self.params = params
847        label = 'def format ' + id
848        self.user_code = compile(fixPythonIndentation(code), label, 'exec')
849        param_list = string.join(params, ", ")
850        f = '''def defInst(_code, _context, %s):
851                my_locals = vars().copy()
852                exec _code in _context, my_locals
853                return my_locals\n''' % param_list
854        c = compile(f, label + ' wrapper', 'exec')
855        exec c
856        self.func = defInst
857
858    def defineInst(self, name, args, lineno):
859        context = {}
860        updateExportContext()
861        context.update(exportContext)
862        context.update({ 'name': name, 'Name': string.capitalize(name) })
863        try:
864            vars = self.func(self.user_code, context, *args[0], **args[1])
865        except Exception, exc:
866            error(lineno, 'error defining "%s": %s.' % (name, exc))
867        for k in vars.keys():
868            if k not in ('header_output', 'decoder_output',
869                         'exec_output', 'decode_block'):
870                del vars[k]
871        return GenCode(**vars)
872
873# Special null format to catch an implicit-format instruction
874# definition outside of any format block.
875class NoFormat:
876    def __init__(self):
877        self.defaultInst = ''
878
879    def defineInst(self, name, args, lineno):
880        error(lineno,
881              'instruction definition "%s" with no active format!' % name)
882
883# This dictionary maps format name strings to Format objects.
884formatMap = {}
885
886# Define a new format
887def defFormat(id, params, code, lineno):
888    # make sure we haven't already defined this one
889    if formatMap.get(id, None) != None:
890        error(lineno, 'format %s redefined.' % id)
891    # create new object and store in global map
892    formatMap[id] = Format(id, params, code)
893
894
895##############
896# Stack: a simple stack object.  Used for both formats (formatStack)
897# and default cases (defaultStack).  Simply wraps a list to give more
898# stack-like syntax and enable initialization with an argument list
899# (as opposed to an argument that's a list).
900
901class Stack(list):
902    def __init__(self, *items):
903        list.__init__(self, items)
904
905    def push(self, item):
906        self.append(item);
907
908    def top(self):
909        return self[-1]
910
911# The global format stack.
912formatStack = Stack(NoFormat())
913
914# The global default case stack.
915defaultStack = Stack( None )
916
917# Global stack that tracks current file and line number.
918# Each element is a tuple (filename, lineno) that records the
919# *current* filename and the line number in the *previous* file where
920# it was included.
921fileNameStack = Stack()
922
923###################
924# Utility functions
925
926#
927# Indent every line in string 's' by two spaces
928# (except preprocessor directives).
929# Used to make nested code blocks look pretty.
930#
931def indent(s):
932    return re.sub(r'(?m)^(?!#)', '  ', s)
933
934#
935# Munge a somewhat arbitrarily formatted piece of Python code
936# (e.g. from a format 'let' block) into something whose indentation
937# will get by the Python parser.
938#
939# The two keys here are that Python will give a syntax error if
940# there's any whitespace at the beginning of the first line, and that
941# all lines at the same lexical nesting level must have identical
942# indentation.  Unfortunately the way code literals work, an entire
943# let block tends to have some initial indentation.  Rather than
944# trying to figure out what that is and strip it off, we prepend 'if
945# 1:' to make the let code the nested block inside the if (and have
946# the parser automatically deal with the indentation for us).
947#
948# We don't want to do this if (1) the code block is empty or (2) the
949# first line of the block doesn't have any whitespace at the front.
950
951def fixPythonIndentation(s):
952    # get rid of blank lines first
953    s = re.sub(r'(?m)^\s*\n', '', s);
954    if (s != '' and re.match(r'[ \t]', s[0])):
955        s = 'if 1:\n' + s
956    return s
957
958# Error handler.  Just call exit.  Output formatted to work under
959# Emacs compile-mode.  Optional 'print_traceback' arg, if set to True,
960# prints a Python stack backtrace too (can be handy when trying to
961# debug the parser itself).
962def error(lineno, string, print_traceback = False):
963    spaces = ""
964    for (filename, line) in fileNameStack[0:-1]:
965        print spaces + "In file included from " + filename + ":"
966        spaces += "  "
967    # Print a Python stack backtrace if requested.
968    if (print_traceback):
969        traceback.print_exc()
970    if lineno != 0:
971        line_str = "%d:" % lineno
972    else:
973        line_str = ""
974    sys.exit(spaces + "%s:%s %s" % (fileNameStack[-1][0], line_str, string))
975
976
977#####################################################################
978#
979#                      Bitfield Operator Support
980#
981#####################################################################
982
983bitOp1ArgRE = re.compile(r'<\s*(\w+)\s*:\s*>')
984
985bitOpWordRE = re.compile(r'(?<![\w\.])([\w\.]+)<\s*(\w+)\s*:\s*(\w+)\s*>')
986bitOpExprRE = re.compile(r'\)<\s*(\w+)\s*:\s*(\w+)\s*>')
987
988def substBitOps(code):
989    # first convert single-bit selectors to two-index form
990    # i.e., <n> --> <n:n>
991    code = bitOp1ArgRE.sub(r'<\1:\1>', code)
992    # simple case: selector applied to ID (name)
993    # i.e., foo<a:b> --> bits(foo, a, b)
994    code = bitOpWordRE.sub(r'bits(\1, \2, \3)', code)
995    # if selector is applied to expression (ending in ')'),
996    # we need to search backward for matching '('
997    match = bitOpExprRE.search(code)
998    while match:
999        exprEnd = match.start()
1000        here = exprEnd - 1
1001        nestLevel = 1
1002        while nestLevel > 0:
1003            if code[here] == '(':
1004                nestLevel -= 1
1005            elif code[here] == ')':
1006                nestLevel += 1
1007            here -= 1
1008            if here < 0:
1009                sys.exit("Didn't find '('!")
1010        exprStart = here+1
1011        newExpr = r'bits(%s, %s, %s)' % (code[exprStart:exprEnd+1],
1012                                         match.group(1), match.group(2))
1013        code = code[:exprStart] + newExpr + code[match.end():]
1014        match = bitOpExprRE.search(code)
1015    return code
1016
1017
1018####################
1019# Template objects.
1020#
1021# Template objects are format strings that allow substitution from
1022# the attribute spaces of other objects (e.g. InstObjParams instances).
1023
1024labelRE = re.compile(r'[^%]%\(([^\)]+)\)[sd]')
1025
1026class Template:
1027    def __init__(self, t):
1028        self.template = t
1029
1030    def subst(self, d):
1031        myDict = None
1032
1033        # Protect non-Python-dict substitutions (e.g. if there's a printf
1034        # in the templated C++ code)
1035        template = protect_non_subst_percents(self.template)
1036        # CPU-model-specific substitutions are handled later (in GenCode).
1037        template = protect_cpu_symbols(template)
1038
1039        # Build a dict ('myDict') to use for the template substitution.
1040        # Start with the template namespace.  Make a copy since we're
1041        # going to modify it.
1042        myDict = templateMap.copy()
1043
1044        if isinstance(d, InstObjParams):
1045            # If we're dealing with an InstObjParams object, we need
1046            # to be a little more sophisticated.  The instruction-wide
1047            # parameters are already formed, but the parameters which
1048            # are only function wide still need to be generated.
1049            compositeCode = ''
1050
1051            myDict.update(d.__dict__)
1052            # The "operands" and "snippets" attributes of the InstObjParams
1053            # objects are for internal use and not substitution.
1054            del myDict['operands']
1055            del myDict['snippets']
1056
1057            snippetLabels = [l for l in labelRE.findall(template)
1058                             if d.snippets.has_key(l)]
1059
1060            snippets = dict([(s, mungeSnippet(d.snippets[s]))
1061                             for s in snippetLabels])
1062
1063            myDict.update(snippets)
1064
1065            compositeCode = ' '.join(map(str, snippets.values()))
1066
1067            # Add in template itself in case it references any
1068            # operands explicitly (like Mem)
1069            compositeCode += ' ' + template
1070
1071            operands = SubOperandList(compositeCode, d.operands)
1072
1073            myDict['op_decl'] = operands.concatAttrStrings('op_decl')
1074
1075            is_src = lambda op: op.is_src
1076            is_dest = lambda op: op.is_dest
1077
1078            myDict['op_src_decl'] = \
1079                      operands.concatSomeAttrStrings(is_src, 'op_src_decl')
1080            myDict['op_dest_decl'] = \
1081                      operands.concatSomeAttrStrings(is_dest, 'op_dest_decl')
1082
1083            myDict['op_rd'] = operands.concatAttrStrings('op_rd')
1084            myDict['op_wb'] = operands.concatAttrStrings('op_wb')
1085
1086            if d.operands.memOperand:
1087                myDict['mem_acc_size'] = d.operands.memOperand.mem_acc_size
1088                myDict['mem_acc_type'] = d.operands.memOperand.mem_acc_type
1089
1090        elif isinstance(d, dict):
1091            # if the argument is a dictionary, we just use it.
1092            myDict.update(d)
1093        elif hasattr(d, '__dict__'):
1094            # if the argument is an object, we use its attribute map.
1095            myDict.update(d.__dict__)
1096        else:
1097            raise TypeError, "Template.subst() arg must be or have dictionary"
1098        return template % myDict
1099
1100    # Convert to string.  This handles the case when a template with a
1101    # CPU-specific term gets interpolated into another template or into
1102    # an output block.
1103    def __str__(self):
1104        return expand_cpu_symbols_to_string(self.template)
1105
1106#####################################################################
1107#
1108#                             Code Parser
1109#
1110# The remaining code is the support for automatically extracting
1111# instruction characteristics from pseudocode.
1112#
1113#####################################################################
1114
1115# Force the argument to be a list.  Useful for flags, where a caller
1116# can specify a singleton flag or a list of flags.  Also usful for
1117# converting tuples to lists so they can be modified.
1118def makeList(arg):
1119    if isinstance(arg, list):
1120        return arg
1121    elif isinstance(arg, tuple):
1122        return list(arg)
1123    elif not arg:
1124        return []
1125    else:
1126        return [ arg ]
1127
1128# Generate operandTypeMap from the user's 'def operand_types'
1129# statement.
1130def buildOperandTypeMap(userDict, lineno):
1131    global operandTypeMap
1132    operandTypeMap = {}
1133    for (ext, (desc, size)) in userDict.iteritems():
1134        if desc == 'signed int':
1135            ctype = 'int%d_t' % size
1136            is_signed = 1
1137        elif desc == 'unsigned int':
1138            ctype = 'uint%d_t' % size
1139            is_signed = 0
1140        elif desc == 'float':
1141            is_signed = 1	# shouldn't really matter
1142            if size == 32:
1143                ctype = 'float'
1144            elif size == 64:
1145                ctype = 'double'
1146        elif desc == 'twin64 int':
1147            is_signed = 0
1148            ctype = 'Twin64_t'
1149        elif desc == 'twin32 int':
1150            is_signed = 0
1151            ctype = 'Twin32_t'
1152        if ctype == '':
1153            error(lineno, 'Unrecognized type description "%s" in userDict')
1154        operandTypeMap[ext] = (size, ctype, is_signed)
1155
1156#
1157#
1158#
1159# Base class for operand descriptors.  An instance of this class (or
1160# actually a class derived from this one) represents a specific
1161# operand for a code block (e.g, "Rc.sq" as a dest). Intermediate
1162# derived classes encapsulates the traits of a particular operand type
1163# (e.g., "32-bit integer register").
1164#
1165class Operand(object):
1166    def __init__(self, full_name, ext, is_src, is_dest):
1167        self.full_name = full_name
1168        self.ext = ext
1169        self.is_src = is_src
1170        self.is_dest = is_dest
1171        # The 'effective extension' (eff_ext) is either the actual
1172        # extension, if one was explicitly provided, or the default.
1173        if ext:
1174            self.eff_ext = ext
1175        else:
1176            self.eff_ext = self.dflt_ext
1177
1178        (self.size, self.ctype, self.is_signed) = operandTypeMap[self.eff_ext]
1179
1180        # note that mem_acc_size is undefined for non-mem operands...
1181        # template must be careful not to use it if it doesn't apply.
1182        if self.isMem():
1183            self.mem_acc_size = self.makeAccSize()
1184            if self.ctype in ['Twin32_t', 'Twin64_t']:
1185                self.mem_acc_type = 'Twin'
1186            else:
1187                self.mem_acc_type = 'uint'
1188
1189    # Finalize additional fields (primarily code fields).  This step
1190    # is done separately since some of these fields may depend on the
1191    # register index enumeration that hasn't been performed yet at the
1192    # time of __init__().
1193    def finalize(self):
1194        self.flags = self.getFlags()
1195        self.constructor = self.makeConstructor()
1196        self.op_decl = self.makeDecl()
1197
1198        if self.is_src:
1199            self.op_rd = self.makeRead()
1200            self.op_src_decl = self.makeDecl()
1201        else:
1202            self.op_rd = ''
1203            self.op_src_decl = ''
1204
1205        if self.is_dest:
1206            self.op_wb = self.makeWrite()
1207            self.op_dest_decl = self.makeDecl()
1208        else:
1209            self.op_wb = ''
1210            self.op_dest_decl = ''
1211
1212    def isMem(self):
1213        return 0
1214
1215    def isReg(self):
1216        return 0
1217
1218    def isFloatReg(self):
1219        return 0
1220
1221    def isIntReg(self):
1222        return 0
1223
1224    def isControlReg(self):
1225        return 0
1226
1227    def getFlags(self):
1228        # note the empty slice '[:]' gives us a copy of self.flags[0]
1229        # instead of a reference to it
1230        my_flags = self.flags[0][:]
1231        if self.is_src:
1232            my_flags += self.flags[1]
1233        if self.is_dest:
1234            my_flags += self.flags[2]
1235        return my_flags
1236
1237    def makeDecl(self):
1238        # Note that initializations in the declarations are solely
1239        # to avoid 'uninitialized variable' errors from the compiler.
1240        return self.ctype + ' ' + self.base_name + ' = 0;\n';
1241
1242class IntRegOperand(Operand):
1243    def isReg(self):
1244        return 1
1245
1246    def isIntReg(self):
1247        return 1
1248
1249    def makeConstructor(self):
1250        c = ''
1251        if self.is_src:
1252            c += '\n\t_srcRegIdx[%d] = %s;' % \
1253                 (self.src_reg_idx, self.reg_spec)
1254        if self.is_dest:
1255            c += '\n\t_destRegIdx[%d] = %s;' % \
1256                 (self.dest_reg_idx, self.reg_spec)
1257        return c
1258
1259    def makeRead(self):
1260        if (self.ctype == 'float' or self.ctype == 'double'):
1261            error(0, 'Attempt to read integer register as FP')
1262        if (self.size == self.dflt_size):
1263            return '%s = xc->readIntRegOperand(this, %d);\n' % \
1264                   (self.base_name, self.src_reg_idx)
1265        elif (self.size > self.dflt_size):
1266            int_reg_val = 'xc->readIntRegOperand(this, %d)' % \
1267                          (self.src_reg_idx)
1268            if (self.is_signed):
1269                int_reg_val = 'sext<%d>(%s)' % (self.dflt_size, int_reg_val)
1270            return '%s = %s;\n' % (self.base_name, int_reg_val)
1271        else:
1272            return '%s = bits(xc->readIntRegOperand(this, %d), %d, 0);\n' % \
1273                   (self.base_name, self.src_reg_idx, self.size-1)
1274
1275    def makeWrite(self):
1276        if (self.ctype == 'float' or self.ctype == 'double'):
1277            error(0, 'Attempt to write integer register as FP')
1278        if (self.size != self.dflt_size and self.is_signed):
1279            final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
1280        else:
1281            final_val = self.base_name
1282        wb = '''
1283        {
1284            %s final_val = %s;
1285            xc->setIntRegOperand(this, %d, final_val);\n
1286            if (traceData) { traceData->setData(final_val); }
1287        }''' % (self.dflt_ctype, final_val, self.dest_reg_idx)
1288        return wb
1289
1290class FloatRegOperand(Operand):
1291    def isReg(self):
1292        return 1
1293
1294    def isFloatReg(self):
1295        return 1
1296
1297    def makeConstructor(self):
1298        c = ''
1299        if self.is_src:
1300            c += '\n\t_srcRegIdx[%d] = %s + FP_Base_DepTag;' % \
1301                 (self.src_reg_idx, self.reg_spec)
1302        if self.is_dest:
1303            c += '\n\t_destRegIdx[%d] = %s + FP_Base_DepTag;' % \
1304                 (self.dest_reg_idx, self.reg_spec)
1305        return c
1306
1307    def makeRead(self):
1308        bit_select = 0
1309        width = 0;
1310        if (self.ctype == 'float'):
1311            func = 'readFloatRegOperand'
1312            width = 32;
1313        elif (self.ctype == 'double'):
1314            func = 'readFloatRegOperand'
1315            width = 64;
1316        else:
1317            func = 'readFloatRegOperandBits'
1318            if (self.ctype == 'uint32_t'):
1319                width = 32;
1320            elif (self.ctype == 'uint64_t'):
1321                width = 64;
1322            if (self.size != self.dflt_size):
1323                bit_select = 1
1324        if width:
1325            base = 'xc->%s(this, %d, %d)' % \
1326                   (func, self.src_reg_idx, width)
1327        else:
1328            base = 'xc->%s(this, %d)' % \
1329                   (func, self.src_reg_idx)
1330        if bit_select:
1331            return '%s = bits(%s, %d, 0);\n' % \
1332                   (self.base_name, base, self.size-1)
1333        else:
1334            return '%s = %s;\n' % (self.base_name, base)
1335
1336    def makeWrite(self):
1337        final_val = self.base_name
1338        final_ctype = self.ctype
1339        widthSpecifier = ''
1340        width = 0
1341        if (self.ctype == 'float'):
1342            width = 32
1343            func = 'setFloatRegOperand'
1344        elif (self.ctype == 'double'):
1345            width = 64
1346            func = 'setFloatRegOperand'
1347        elif (self.ctype == 'uint32_t'):
1348            func = 'setFloatRegOperandBits'
1349            width = 32
1350        elif (self.ctype == 'uint64_t'):
1351            func = 'setFloatRegOperandBits'
1352            width = 64
1353        else:
1354            func = 'setFloatRegOperandBits'
1355            final_ctype = 'uint%d_t' % self.dflt_size
1356            if (self.size != self.dflt_size and self.is_signed):
1357                final_val = 'sext<%d>(%s)' % (self.size, self.base_name)
1358        if width:
1359            widthSpecifier = ', %d' % width
1360        wb = '''
1361        {
1362            %s final_val = %s;
1363            xc->%s(this, %d, final_val%s);\n
1364            if (traceData) { traceData->setData(final_val); }
1365        }''' % (final_ctype, final_val, func, self.dest_reg_idx,
1366                widthSpecifier)
1367        return wb
1368
1369class ControlRegOperand(Operand):
1370    def isReg(self):
1371        return 1
1372
1373    def isControlReg(self):
1374        return 1
1375
1376    def makeConstructor(self):
1377        c = ''
1378        if self.is_src:
1379            c += '\n\t_srcRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \
1380                 (self.src_reg_idx, self.reg_spec)
1381        if self.is_dest:
1382            c += '\n\t_destRegIdx[%d] = %s + Ctrl_Base_DepTag;' % \
1383                 (self.dest_reg_idx, self.reg_spec)
1384        return c
1385
1386    def makeRead(self):
1387        bit_select = 0
1388        if (self.ctype == 'float' or self.ctype == 'double'):
1389            error(0, 'Attempt to read control register as FP')
1390        base = 'xc->readMiscRegOperand(this, %s)' % self.src_reg_idx
1391        if self.size == self.dflt_size:
1392            return '%s = %s;\n' % (self.base_name, base)
1393        else:
1394            return '%s = bits(%s, %d, 0);\n' % \
1395                   (self.base_name, base, self.size-1)
1396
1397    def makeWrite(self):
1398        if (self.ctype == 'float' or self.ctype == 'double'):
1399            error(0, 'Attempt to write control register as FP')
1400        wb = 'xc->setMiscRegOperand(this, %s, %s);\n' % \
1401             (self.dest_reg_idx, self.base_name)
1402        wb += 'if (traceData) { traceData->setData(%s); }' % \
1403              self.base_name
1404        return wb
1405
1406class MemOperand(Operand):
1407    def isMem(self):
1408        return 1
1409
1410    def makeConstructor(self):
1411        return ''
1412
1413    def makeDecl(self):
1414        # Note that initializations in the declarations are solely
1415        # to avoid 'uninitialized variable' errors from the compiler.
1416        # Declare memory data variable.
1417        if self.ctype in ['Twin32_t','Twin64_t']:
1418            return "%s %s; %s.a = 0; %s.b = 0;\n" % (self.ctype, self.base_name,
1419                    self.base_name, self.base_name)
1420        c = '%s %s = 0;\n' % (self.ctype, self.base_name)
1421        return c
1422
1423    def makeRead(self):
1424        return ''
1425
1426    def makeWrite(self):
1427        return ''
1428
1429    # Return the memory access size *in bits*, suitable for
1430    # forming a type via "uint%d_t".  Divide by 8 if you want bytes.
1431    def makeAccSize(self):
1432        return self.size
1433
1434
1435class NPCOperand(Operand):
1436    def makeConstructor(self):
1437        return ''
1438
1439    def makeRead(self):
1440        return '%s = xc->readNextPC();\n' % self.base_name
1441
1442    def makeWrite(self):
1443        return 'xc->setNextPC(%s);\n' % self.base_name
1444
1445class NNPCOperand(Operand):
1446    def makeConstructor(self):
1447        return ''
1448
1449    def makeRead(self):
1450        return '%s = xc->readNextNPC();\n' % self.base_name
1451
1452    def makeWrite(self):
1453        return 'xc->setNextNPC(%s);\n' % self.base_name
1454
1455def buildOperandNameMap(userDict, lineno):
1456    global operandNameMap
1457    operandNameMap = {}
1458    for (op_name, val) in userDict.iteritems():
1459        (base_cls_name, dflt_ext, reg_spec, flags, sort_pri) = val
1460        (dflt_size, dflt_ctype, dflt_is_signed) = operandTypeMap[dflt_ext]
1461        # Canonical flag structure is a triple of lists, where each list
1462        # indicates the set of flags implied by this operand always, when
1463        # used as a source, and when used as a dest, respectively.
1464        # For simplicity this can be initialized using a variety of fairly
1465        # obvious shortcuts; we convert these to canonical form here.
1466        if not flags:
1467            # no flags specified (e.g., 'None')
1468            flags = ( [], [], [] )
1469        elif isinstance(flags, str):
1470            # a single flag: assumed to be unconditional
1471            flags = ( [ flags ], [], [] )
1472        elif isinstance(flags, list):
1473            # a list of flags: also assumed to be unconditional
1474            flags = ( flags, [], [] )
1475        elif isinstance(flags, tuple):
1476            # it's a tuple: it should be a triple,
1477            # but each item could be a single string or a list
1478            (uncond_flags, src_flags, dest_flags) = flags
1479            flags = (makeList(uncond_flags),
1480                     makeList(src_flags), makeList(dest_flags))
1481        # Accumulate attributes of new operand class in tmp_dict
1482        tmp_dict = {}
1483        for attr in ('dflt_ext', 'reg_spec', 'flags', 'sort_pri',
1484                     'dflt_size', 'dflt_ctype', 'dflt_is_signed'):
1485            tmp_dict[attr] = eval(attr)
1486        tmp_dict['base_name'] = op_name
1487        # New class name will be e.g. "IntReg_Ra"
1488        cls_name = base_cls_name + '_' + op_name
1489        # Evaluate string arg to get class object.  Note that the
1490        # actual base class for "IntReg" is "IntRegOperand", i.e. we
1491        # have to append "Operand".
1492        try:
1493            base_cls = eval(base_cls_name + 'Operand')
1494        except NameError:
1495            error(lineno,
1496                  'error: unknown operand base class "%s"' % base_cls_name)
1497        # The following statement creates a new class called
1498        # <cls_name> as a subclass of <base_cls> with the attributes
1499        # in tmp_dict, just as if we evaluated a class declaration.
1500        operandNameMap[op_name] = type(cls_name, (base_cls,), tmp_dict)
1501
1502    # Define operand variables.
1503    operands = userDict.keys()
1504
1505    operandsREString = (r'''
1506    (?<![\w\.])	     # neg. lookbehind assertion: prevent partial matches
1507    ((%s)(?:\.(\w+))?)   # match: operand with optional '.' then suffix
1508    (?![\w\.])	     # neg. lookahead assertion: prevent partial matches
1509    '''
1510                        % string.join(operands, '|'))
1511
1512    global operandsRE
1513    operandsRE = re.compile(operandsREString, re.MULTILINE|re.VERBOSE)
1514
1515    # Same as operandsREString, but extension is mandatory, and only two
1516    # groups are returned (base and ext, not full name as above).
1517    # Used for subtituting '_' for '.' to make C++ identifiers.
1518    operandsWithExtREString = (r'(?<![\w\.])(%s)\.(\w+)(?![\w\.])'
1519                               % string.join(operands, '|'))
1520
1521    global operandsWithExtRE
1522    operandsWithExtRE = re.compile(operandsWithExtREString, re.MULTILINE)
1523
1524
1525class OperandList:
1526
1527    # Find all the operands in the given code block.  Returns an operand
1528    # descriptor list (instance of class OperandList).
1529    def __init__(self, code):
1530        self.items = []
1531        self.bases = {}
1532        # delete comments so we don't match on reg specifiers inside
1533        code = commentRE.sub('', code)
1534        # search for operands
1535        next_pos = 0
1536        while 1:
1537            match = operandsRE.search(code, next_pos)
1538            if not match:
1539                # no more matches: we're done
1540                break
1541            op = match.groups()
1542            # regexp groups are operand full name, base, and extension
1543            (op_full, op_base, op_ext) = op
1544            # if the token following the operand is an assignment, this is
1545            # a destination (LHS), else it's a source (RHS)
1546            is_dest = (assignRE.match(code, match.end()) != None)
1547            is_src = not is_dest
1548            # see if we've already seen this one
1549            op_desc = self.find_base(op_base)
1550            if op_desc:
1551                if op_desc.ext != op_ext:
1552                    error(0, 'Inconsistent extensions for operand %s' % \
1553                          op_base)
1554                op_desc.is_src = op_desc.is_src or is_src
1555                op_desc.is_dest = op_desc.is_dest or is_dest
1556            else:
1557                # new operand: create new descriptor
1558                op_desc = operandNameMap[op_base](op_full, op_ext,
1559                                                  is_src, is_dest)
1560                self.append(op_desc)
1561            # start next search after end of current match
1562            next_pos = match.end()
1563        self.sort()
1564        # enumerate source & dest register operands... used in building
1565        # constructor later
1566        self.numSrcRegs = 0
1567        self.numDestRegs = 0
1568        self.numFPDestRegs = 0
1569        self.numIntDestRegs = 0
1570        self.memOperand = None
1571        for op_desc in self.items:
1572            if op_desc.isReg():
1573                if op_desc.is_src:
1574                    op_desc.src_reg_idx = self.numSrcRegs
1575                    self.numSrcRegs += 1
1576                if op_desc.is_dest:
1577                    op_desc.dest_reg_idx = self.numDestRegs
1578                    self.numDestRegs += 1
1579                    if op_desc.isFloatReg():
1580                        self.numFPDestRegs += 1
1581                    elif op_desc.isIntReg():
1582                        self.numIntDestRegs += 1
1583            elif op_desc.isMem():
1584                if self.memOperand:
1585                    error(0, "Code block has more than one memory operand.")
1586                self.memOperand = op_desc
1587        # now make a final pass to finalize op_desc fields that may depend
1588        # on the register enumeration
1589        for op_desc in self.items:
1590            op_desc.finalize()
1591
1592    def __len__(self):
1593        return len(self.items)
1594
1595    def __getitem__(self, index):
1596        return self.items[index]
1597
1598    def append(self, op_desc):
1599        self.items.append(op_desc)
1600        self.bases[op_desc.base_name] = op_desc
1601
1602    def find_base(self, base_name):
1603        # like self.bases[base_name], but returns None if not found
1604        # (rather than raising exception)
1605        return self.bases.get(base_name)
1606
1607    # internal helper function for concat[Some]Attr{Strings|Lists}
1608    def __internalConcatAttrs(self, attr_name, filter, result):
1609        for op_desc in self.items:
1610            if filter(op_desc):
1611                result += getattr(op_desc, attr_name)
1612        return result
1613
1614    # return a single string that is the concatenation of the (string)
1615    # values of the specified attribute for all operands
1616    def concatAttrStrings(self, attr_name):
1617        return self.__internalConcatAttrs(attr_name, lambda x: 1, '')
1618
1619    # like concatAttrStrings, but only include the values for the operands
1620    # for which the provided filter function returns true
1621    def concatSomeAttrStrings(self, filter, attr_name):
1622        return self.__internalConcatAttrs(attr_name, filter, '')
1623
1624    # return a single list that is the concatenation of the (list)
1625    # values of the specified attribute for all operands
1626    def concatAttrLists(self, attr_name):
1627        return self.__internalConcatAttrs(attr_name, lambda x: 1, [])
1628
1629    # like concatAttrLists, but only include the values for the operands
1630    # for which the provided filter function returns true
1631    def concatSomeAttrLists(self, filter, attr_name):
1632        return self.__internalConcatAttrs(attr_name, filter, [])
1633
1634    def sort(self):
1635        self.items.sort(lambda a, b: a.sort_pri - b.sort_pri)
1636
1637class SubOperandList(OperandList):
1638
1639    # Find all the operands in the given code block.  Returns an operand
1640    # descriptor list (instance of class OperandList).
1641    def __init__(self, code, master_list):
1642        self.items = []
1643        self.bases = {}
1644        # delete comments so we don't match on reg specifiers inside
1645        code = commentRE.sub('', code)
1646        # search for operands
1647        next_pos = 0
1648        while 1:
1649            match = operandsRE.search(code, next_pos)
1650            if not match:
1651                # no more matches: we're done
1652                break
1653            op = match.groups()
1654            # regexp groups are operand full name, base, and extension
1655            (op_full, op_base, op_ext) = op
1656            # find this op in the master list
1657            op_desc = master_list.find_base(op_base)
1658            if not op_desc:
1659                error(0, 'Found operand %s which is not in the master list!' \
1660                        ' This is an internal error' % \
1661                          op_base)
1662            else:
1663                # See if we've already found this operand
1664                op_desc = self.find_base(op_base)
1665                if not op_desc:
1666                    # if not, add a reference to it to this sub list
1667                    self.append(master_list.bases[op_base])
1668
1669            # start next search after end of current match
1670            next_pos = match.end()
1671        self.sort()
1672        self.memOperand = None
1673        for op_desc in self.items:
1674            if op_desc.isMem():
1675                if self.memOperand:
1676                    error(0, "Code block has more than one memory operand.")
1677                self.memOperand = op_desc
1678
1679# Regular expression object to match C++ comments
1680# (used in findOperands())
1681commentRE = re.compile(r'//.*\n')
1682
1683# Regular expression object to match assignment statements
1684# (used in findOperands())
1685assignRE = re.compile(r'\s*=(?!=)', re.MULTILINE)
1686
1687# Munge operand names in code string to make legal C++ variable names.
1688# This means getting rid of the type extension if any.
1689# (Will match base_name attribute of Operand object.)
1690def substMungedOpNames(code):
1691    return operandsWithExtRE.sub(r'\1', code)
1692
1693# Fix up code snippets for final substitution in templates.
1694def mungeSnippet(s):
1695    if isinstance(s, str):
1696        return substMungedOpNames(substBitOps(s))
1697    else:
1698        return s
1699
1700def makeFlagConstructor(flag_list):
1701    if len(flag_list) == 0:
1702        return ''
1703    # filter out repeated flags
1704    flag_list.sort()
1705    i = 1
1706    while i < len(flag_list):
1707        if flag_list[i] == flag_list[i-1]:
1708            del flag_list[i]
1709        else:
1710            i += 1
1711    pre = '\n\tflags['
1712    post = '] = true;'
1713    code = pre + string.join(flag_list, post + pre) + post
1714    return code
1715
1716# Assume all instruction flags are of the form 'IsFoo'
1717instFlagRE = re.compile(r'Is.*')
1718
1719# OpClass constants end in 'Op' except No_OpClass
1720opClassRE = re.compile(r'.*Op|No_OpClass')
1721
1722class InstObjParams:
1723    def __init__(self, mnem, class_name, base_class = '',
1724                 snippets = {}, opt_args = []):
1725        self.mnemonic = mnem
1726        self.class_name = class_name
1727        self.base_class = base_class
1728        if not isinstance(snippets, dict):
1729            snippets = {'code' : snippets}
1730        compositeCode = ' '.join(map(str, snippets.values()))
1731        self.snippets = snippets
1732
1733        self.operands = OperandList(compositeCode)
1734        self.constructor = self.operands.concatAttrStrings('constructor')
1735        self.constructor += \
1736                 '\n\t_numSrcRegs = %d;' % self.operands.numSrcRegs
1737        self.constructor += \
1738                 '\n\t_numDestRegs = %d;' % self.operands.numDestRegs
1739        self.constructor += \
1740                 '\n\t_numFPDestRegs = %d;' % self.operands.numFPDestRegs
1741        self.constructor += \
1742                 '\n\t_numIntDestRegs = %d;' % self.operands.numIntDestRegs
1743        self.flags = self.operands.concatAttrLists('flags')
1744
1745        # Make a basic guess on the operand class (function unit type).
1746        # These are good enough for most cases, and can be overridden
1747        # later otherwise.
1748        if 'IsStore' in self.flags:
1749            self.op_class = 'MemWriteOp'
1750        elif 'IsLoad' in self.flags or 'IsPrefetch' in self.flags:
1751            self.op_class = 'MemReadOp'
1752        elif 'IsFloating' in self.flags:
1753            self.op_class = 'FloatAddOp'
1754        else:
1755            self.op_class = 'IntAluOp'
1756
1757        # Optional arguments are assumed to be either StaticInst flags
1758        # or an OpClass value.  To avoid having to import a complete
1759        # list of these values to match against, we do it ad-hoc
1760        # with regexps.
1761        for oa in opt_args:
1762            if instFlagRE.match(oa):
1763                self.flags.append(oa)
1764            elif opClassRE.match(oa):
1765                self.op_class = oa
1766            else:
1767                error(0, 'InstObjParams: optional arg "%s" not recognized '
1768                      'as StaticInst::Flag or OpClass.' % oa)
1769
1770        # add flag initialization to contructor here to include
1771        # any flags added via opt_args
1772        self.constructor += makeFlagConstructor(self.flags)
1773
1774        # if 'IsFloating' is set, add call to the FP enable check
1775        # function (which should be provided by isa_desc via a declare)
1776        if 'IsFloating' in self.flags:
1777            self.fp_enable_check = 'fault = checkFpEnableFault(xc);'
1778        else:
1779            self.fp_enable_check = ''
1780
1781#######################
1782#
1783# Output file template
1784#
1785
1786file_template = '''
1787/*
1788 * DO NOT EDIT THIS FILE!!!
1789 *
1790 * It was automatically generated from the ISA description in %(filename)s
1791 */
1792
1793%(includes)s
1794
1795%(global_output)s
1796
1797namespace %(namespace)s {
1798
1799%(namespace_output)s
1800
1801} // namespace %(namespace)s
1802
1803%(decode_function)s
1804'''
1805
1806
1807# Update the output file only if the new contents are different from
1808# the current contents.  Minimizes the files that need to be rebuilt
1809# after minor changes.
1810def update_if_needed(file, contents):
1811    update = False
1812    if os.access(file, os.R_OK):
1813        f = open(file, 'r')
1814        old_contents = f.read()
1815        f.close()
1816        if contents != old_contents:
1817            print 'Updating', file
1818            os.remove(file) # in case it's write-protected
1819            update = True
1820        else:
1821            print 'File', file, 'is unchanged'
1822    else:
1823        print 'Generating', file
1824        update = True
1825    if update:
1826        f = open(file, 'w')
1827        f.write(contents)
1828        f.close()
1829
1830# This regular expression matches '##include' directives
1831includeRE = re.compile(r'^\s*##include\s+"(?P<filename>[\w/.-]*)".*$',
1832                       re.MULTILINE)
1833
1834# Function to replace a matched '##include' directive with the
1835# contents of the specified file (with nested ##includes replaced
1836# recursively).  'matchobj' is an re match object (from a match of
1837# includeRE) and 'dirname' is the directory relative to which the file
1838# path should be resolved.
1839def replace_include(matchobj, dirname):
1840    fname = matchobj.group('filename')
1841    full_fname = os.path.normpath(os.path.join(dirname, fname))
1842    contents = '##newfile "%s"\n%s\n##endfile\n' % \
1843               (full_fname, read_and_flatten(full_fname))
1844    return contents
1845
1846# Read a file and recursively flatten nested '##include' files.
1847def read_and_flatten(filename):
1848    current_dir = os.path.dirname(filename)
1849    try:
1850        contents = open(filename).read()
1851    except IOError:
1852        error(0, 'Error including file "%s"' % filename)
1853    fileNameStack.push((filename, 0))
1854    # Find any includes and include them
1855    contents = includeRE.sub(lambda m: replace_include(m, current_dir),
1856                             contents)
1857    fileNameStack.pop()
1858    return contents
1859
1860#
1861# Read in and parse the ISA description.
1862#
1863def parse_isa_desc(isa_desc_file, output_dir):
1864    # Read file and (recursively) all included files into a string.
1865    # PLY requires that the input be in a single string so we have to
1866    # do this up front.
1867    isa_desc = read_and_flatten(isa_desc_file)
1868
1869    # Initialize filename stack with outer file.
1870    fileNameStack.push((isa_desc_file, 0))
1871
1872    # Parse it.
1873    (isa_name, namespace, global_code, namespace_code) = yacc.parse(isa_desc)
1874
1875    # grab the last three path components of isa_desc_file to put in
1876    # the output
1877    filename = '/'.join(isa_desc_file.split('/')[-3:])
1878
1879    # generate decoder.hh
1880    includes = '#include "base/bitfield.hh" // for bitfield support'
1881    global_output = global_code.header_output
1882    namespace_output = namespace_code.header_output
1883    decode_function = ''
1884    update_if_needed(output_dir + '/decoder.hh', file_template % vars())
1885
1886    # generate decoder.cc
1887    includes = '#include "decoder.hh"'
1888    global_output = global_code.decoder_output
1889    namespace_output = namespace_code.decoder_output
1890    # namespace_output += namespace_code.decode_block
1891    decode_function = namespace_code.decode_block
1892    update_if_needed(output_dir + '/decoder.cc', file_template % vars())
1893
1894    # generate per-cpu exec files
1895    for cpu in cpu_models:
1896        includes = '#include "decoder.hh"\n'
1897        includes += cpu.includes
1898        global_output = global_code.exec_output[cpu.name]
1899        namespace_output = namespace_code.exec_output[cpu.name]
1900        decode_function = ''
1901        update_if_needed(output_dir + '/' + cpu.filename,
1902                          file_template % vars())
1903
1904# global list of CpuModel objects (see cpu_models.py)
1905cpu_models = []
1906
1907# Called as script: get args from command line.
1908# Args are: <path to cpu_models.py> <isa desc file> <output dir> <cpu models>
1909if __name__ == '__main__':
1910    execfile(sys.argv[1])  # read in CpuModel definitions
1911    cpu_models = [CpuModel.dict[cpu] for cpu in sys.argv[4:]]
1912    parse_isa_desc(sys.argv[2], sys.argv[3])
1913