isa_parser.py revision 742
1#! /usr/bin/env python 2 3# $Id$ 4 5# Copyright (c) 2003 The Regents of The University of Michigan 6# All rights reserved. 7# 8# Redistribution and use in source and binary forms, with or without 9# modification, are permitted provided that the following conditions are 10# met: redistributions of source code must retain the above copyright 11# notice, this list of conditions and the following disclaimer; 12# redistributions in binary form must reproduce the above copyright 13# notice, this list of conditions and the following disclaimer in the 14# documentation and/or other materials provided with the distribution; 15# neither the name of the copyright holders nor the names of its 16# contributors may be used to endorse or promote products derived from 17# this software without specific prior written permission. 18# 19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31import os 32import sys 33import re 34import string 35import traceback 36# get type names 37from types import * 38 39# Prepend the directory where the PLY lex & yacc modules are found 40# to the search path. Assumes we're compiling in a subdirectory 41# of 'build' in the current tree. 42sys.path[0:0] = [os.environ['M5_EXT'] + '/ply'] 43 44import lex 45import yacc 46 47##################################################################### 48# 49# Lexer 50# 51# The PLY lexer module takes two things as input: 52# - A list of token names (the string list 'tokens') 53# - A regular expression describing a match for each token. The 54# regexp for token FOO can be provided in two ways: 55# - as a string variable named t_FOO 56# - as the doc string for a function named t_FOO. In this case, 57# the function is also executed, allowing an action to be 58# associated with each token match. 59# 60##################################################################### 61 62# Reserved words. These are listed separately as they are matched 63# using the same regexp as generic IDs, but distinguished in the 64# t_ID() function. The PLY documentation suggests this approach. 65reserved = ( 66 'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT', 67 'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS', 68 'OUTPUT', 'SIGNED', 'TEMPLATE' 69 ) 70 71# List of tokens. The lex module requires this. 72tokens = reserved + ( 73 # identifier 74 'ID', 75 76 # integer literal 77 'INTLIT', 78 79 # string literal 80 'STRLIT', 81 82 # code literal 83 'CODELIT', 84 85 # ( ) [ ] { } < > , ; : :: * 86 'LPAREN', 'RPAREN', 87# not used any more... commented out to suppress PLY warning 88# 'LBRACKET', 'RBRACKET', 89 'LBRACE', 'RBRACE', 90 'LESS', 'GREATER', 91 'COMMA', 'SEMI', 'COLON', 'DBLCOLON', 92 'ASTERISK', 93 94 # C preprocessor directives 95 'CPPDIRECTIVE' 96) 97 98# Regular expressions for token matching 99t_LPAREN = r'\(' 100t_RPAREN = r'\)' 101# not used any more... commented out to suppress PLY warning 102# t_LBRACKET = r'\[' 103# t_RBRACKET = r'\]' 104t_LBRACE = r'\{' 105t_RBRACE = r'\}' 106t_LESS = r'\<' 107t_GREATER = r'\>' 108t_COMMA = r',' 109t_SEMI = r';' 110t_COLON = r':' 111t_DBLCOLON = r'::' 112t_ASTERISK = r'\*' 113 114# Identifiers and reserved words 115reserved_map = { } 116for r in reserved: 117 reserved_map[r.lower()] = r 118 119def t_ID(t): 120 r'[A-Za-z_]\w*' 121 t.type = reserved_map.get(t.value,'ID') 122 return t 123 124# Integer literal 125def t_INTLIT(t): 126 r'(0x[\da-fA-F]+)|\d+' 127 try: 128 t.value = int(t.value,0) 129 except ValueError: 130 error(t.lineno, 'Integer value "%s" too large' % t.value) 131 t.value = 0 132 return t 133 134# String literal. Note that these use only single quotes, and 135# can span multiple lines. 136def t_STRLIT(t): 137 r"(?m)'([^'])+'" 138 # strip off quotes 139 t.value = t.value[1:-1] 140 t.lineno += t.value.count('\n') 141 return t 142 143 144# "Code literal"... like a string literal, but delimiters are 145# '{{' and '}}' so they get formatted nicely under emacs c-mode 146def t_CODELIT(t): 147 r"(?m)\{\{([^\}]|}(?!\}))+\}\}" 148 # strip off {{ & }} 149 t.value = t.value[2:-2] 150 t.lineno += t.value.count('\n') 151 return t 152 153def t_CPPDIRECTIVE(t): 154 r'^\#.*\n' 155 t.lineno += t.value.count('\n') 156 return t 157 158# 159# The functions t_NEWLINE, t_ignore, and t_error are 160# special for the lex module. 161# 162 163# Newlines 164def t_NEWLINE(t): 165 r'\n+' 166 t.lineno += t.value.count('\n') 167 168# Comments 169def t_comment(t): 170 r'//.*' 171 172# Completely ignored characters 173t_ignore = ' \t\x0c' 174 175# Error handler 176def t_error(t): 177 error(t.lineno, "illegal character '%s'" % t.value[0]) 178 t.skip(1) 179 180# Build the lexer 181lex.lex() 182 183##################################################################### 184# 185# Parser 186# 187# Every function whose name starts with 'p_' defines a grammar rule. 188# The rule is encoded in the function's doc string, while the 189# function body provides the action taken when the rule is matched. 190# The argument to each function is a list of the values of the 191# rule's symbols: t[0] for the LHS, and t[1..n] for the symbols 192# on the RHS. For tokens, the value is copied from the t.value 193# attribute provided by the lexer. For non-terminals, the value 194# is assigned by the producing rule; i.e., the job of the grammar 195# rule function is to set the value for the non-terminal on the LHS 196# (by assigning to t[0]). 197##################################################################### 198 199# The LHS of the first grammar rule is used as the start symbol 200# (in this case, 'specification'). Note that this rule enforces 201# that there will be exactly one namespace declaration, with 0 or more 202# global defs/decls before and after it. The defs & decls before 203# the namespace decl will be outside the namespace; those after 204# will be inside. The decoder function is always inside the namespace. 205def p_specification(t): 206 'specification : opt_defs_and_outputs name_decl opt_defs_and_outputs decode_block' 207 global_code = t[1] 208 isa_name = t[2] 209 namespace = isa_name + "Inst" 210 # wrap the decode block as a function definition 211 t[4].wrap_decode_block(''' 212StaticInstPtr<%(isa_name)s> 213%(isa_name)s::decodeInst(%(isa_name)s::MachInst machInst) 214{ 215 using namespace %(namespace)s; 216''' % vars(), '}') 217 # both the latter output blocks and the decode block are in the namespace 218 namespace_code = t[3] + t[4] 219 # pass it all back to the caller of yacc.parse() 220 t[0] = (isa_name, namespace, global_code, namespace_code) 221 222# ISA name declaration looks like "namespace <foo>;" 223def p_name_decl(t): 224 'name_decl : NAMESPACE ID SEMI' 225 t[0] = t[2] 226 227# 'opt_defs_and_outputs' is a possibly empty sequence of 228# def and/or output statements. 229def p_opt_defs_and_outputs_0(t): 230 'opt_defs_and_outputs : empty' 231 t[0] = GenCode() 232 233def p_opt_defs_and_outputs_1(t): 234 'opt_defs_and_outputs : defs_and_outputs' 235 t[0] = t[1] 236 237def p_defs_and_outputs_0(t): 238 'defs_and_outputs : def_or_output' 239 t[0] = t[1] 240 241def p_defs_and_outputs_1(t): 242 'defs_and_outputs : defs_and_outputs def_or_output' 243 t[0] = t[1] + t[2] 244 245# The list of possible definition/output statements. 246def p_def_or_output(t): 247 '''def_or_output : def_format 248 | def_bitfield 249 | def_template 250 | def_operand_types 251 | def_operands 252 | output_header 253 | output_decoder 254 | output_exec 255 | global_let''' 256 t[0] = t[1] 257 258# Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied 259# directly to the appropriate output section. 260 261# Massage output block by substituting in template definitions and bit 262# operators. We handle '%'s embedded in the string that don't 263# indicate template substitutions (or CPU-specific symbols, which get 264# handled in GenCode) by doubling them first so that the format 265# operation will reduce them back to single '%'s. 266def process_output(s): 267 # protect any non-substitution '%'s (not followed by '(') 268 s = re.sub(r'%(?!\()', '%%', s) 269 # protects cpu-specific symbols too 270 s = protect_cpu_symbols(s) 271 return substBitOps(s % templateMap) 272 273def p_output_header(t): 274 'output_header : OUTPUT HEADER CODELIT SEMI' 275 t[0] = GenCode(header_output = process_output(t[3])) 276 277def p_output_decoder(t): 278 'output_decoder : OUTPUT DECODER CODELIT SEMI' 279 t[0] = GenCode(decoder_output = process_output(t[3])) 280 281def p_output_exec(t): 282 'output_exec : OUTPUT EXEC CODELIT SEMI' 283 t[0] = GenCode(exec_output = process_output(t[3])) 284 285# global let blocks 'let {{...}}' (Python code blocks) are executed 286# directly when seen. Note that these execute in a special variable 287# context 'exportContext' to prevent the code from polluting this 288# script's namespace. 289def p_global_let(t): 290 'global_let : LET CODELIT SEMI' 291 updateExportContext() 292 try: 293 exec fixPythonIndentation(t[2]) in exportContext 294 except Exception, exc: 295 error(t.lineno(1), 296 'error: %s in global let block "%s".' % (exc, t[2])) 297 t[0] = GenCode() # contributes nothing to the output C++ file 298 299# Define the mapping from operand type extensions to C++ types and bit 300# widths (stored in operandTypeMap). 301def p_def_operand_types(t): 302 'def_operand_types : DEF OPERAND_TYPES CODELIT SEMI' 303 s = 'global operandTypeMap; operandTypeMap = {' + t[3] + '}' 304 try: 305 exec s 306 except Exception, exc: 307 error(t.lineno(1), 308 'error: %s in def operand_types block "%s".' % (exc, t[3])) 309 t[0] = GenCode() # contributes nothing to the output C++ file 310 311# Define the mapping from operand names to operand classes and other 312# traits. Stored in operandTraitsMap. 313def p_def_operands(t): 314 'def_operands : DEF OPERANDS CODELIT SEMI' 315 s = 'global operandTraitsMap; operandTraitsMap = {' + t[3] + '}' 316 try: 317 exec s 318 except Exception, exc: 319 error(t.lineno(1), 320 'error: %s in def operands block "%s".' % (exc, t[3])) 321 defineDerivedOperandVars() 322 t[0] = GenCode() # contributes nothing to the output C++ file 323 324# A bitfield definition looks like: 325# 'def [signed] bitfield <ID> [<first>:<last>]' 326# This generates a preprocessor macro in the output file. 327def p_def_bitfield_0(t): 328 'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI' 329 expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8]) 330 if (t[2] == 'signed'): 331 expr = 'sext<%d>(%s)' % (t[6] - t[8] + 1, expr) 332 hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr) 333 t[0] = GenCode(header_output = hash_define) 334 335# alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]' 336def p_def_bitfield_1(t): 337 'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI' 338 expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6]) 339 if (t[2] == 'signed'): 340 expr = 'sext<%d>(%s)' % (1, expr) 341 hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr) 342 t[0] = GenCode(header_output = hash_define) 343 344def p_opt_signed_0(t): 345 'opt_signed : SIGNED' 346 t[0] = t[1] 347 348def p_opt_signed_1(t): 349 'opt_signed : empty' 350 t[0] = '' 351 352# Global map variable to hold templates 353templateMap = {} 354 355def p_def_template(t): 356 'def_template : DEF TEMPLATE ID CODELIT SEMI' 357 templateMap[t[3]] = Template(t[4]) 358 t[0] = GenCode() 359 360# An instruction format definition looks like 361# "def format <fmt>(<params>) {{...}};" 362def p_def_format(t): 363 'def_format : DEF FORMAT ID LPAREN param_list RPAREN CODELIT SEMI' 364 (id, params, code) = (t[3], t[5], t[7]) 365 defFormat(id, params, code, t.lineno(1)) 366 t[0] = GenCode() 367 368# The formal parameter list for an instruction format is a possibly 369# empty list of comma-separated parameters. 370def p_param_list_0(t): 371 'param_list : empty' 372 t[0] = [ ] 373 374def p_param_list_1(t): 375 'param_list : param' 376 t[0] = [t[1]] 377 378def p_param_list_2(t): 379 'param_list : param_list COMMA param' 380 t[0] = t[1] 381 t[0].append(t[3]) 382 383# Each formal parameter is either an identifier or an identifier 384# preceded by an asterisk. As in Python, the latter (if present) gets 385# a tuple containing all the excess positional arguments, allowing 386# varargs functions. 387def p_param_0(t): 388 'param : ID' 389 t[0] = t[1] 390 391def p_param_1(t): 392 'param : ASTERISK ID' 393 # just concatenate them: '*ID' 394 t[0] = t[1] + t[2] 395 396# End of format definition-related rules. 397############## 398 399# 400# A decode block looks like: 401# decode <field1> [, <field2>]* [default <inst>] { ... } 402# 403def p_decode_block(t): 404 'decode_block : DECODE ID opt_default LBRACE decode_stmt_list RBRACE' 405 default_defaults = defaultStack.pop() 406 codeObj = t[5] 407 # use the "default defaults" only if there was no explicit 408 # default statement in decode_stmt_list 409 if not codeObj.has_decode_default: 410 codeObj += default_defaults 411 codeObj.wrap_decode_block('switch (%s) {\n' % t[2], '}\n') 412 t[0] = codeObj 413 414# The opt_default statement serves only to push the "default defaults" 415# onto defaultStack. This value will be used by nested decode blocks, 416# and used and popped off when the current decode_block is processed 417# (in p_decode_block() above). 418def p_opt_default_0(t): 419 'opt_default : empty' 420 # no default specified: reuse the one currently at the top of the stack 421 defaultStack.push(defaultStack.top()) 422 # no meaningful value returned 423 t[0] = None 424 425def p_opt_default_1(t): 426 'opt_default : DEFAULT inst' 427 # push the new default 428 codeObj = t[2] 429 codeObj.wrap_decode_block('\ndefault:\n', 'break;\n') 430 defaultStack.push(codeObj) 431 # no meaningful value returned 432 t[0] = None 433 434def p_decode_stmt_list_0(t): 435 'decode_stmt_list : decode_stmt' 436 t[0] = t[1] 437 438def p_decode_stmt_list_1(t): 439 'decode_stmt_list : decode_stmt decode_stmt_list' 440 if (t[1].has_decode_default and t[2].has_decode_default): 441 error(t.lineno(1), 'Two default cases in decode block') 442 t[0] = t[1] + t[2] 443 444# 445# Decode statement rules 446# 447# There are four types of statements allowed in a decode block: 448# 1. Format blocks 'format <foo> { ... }' 449# 2. Nested decode blocks 450# 3. Instruction definitions. 451# 4. C preprocessor directives. 452 453 454# Preprocessor directives found in a decode statement list are passed 455# through to the output, replicated to all of the output code 456# streams. This works well for ifdefs, so we can ifdef out both the 457# declarations and the decode cases generated by an instruction 458# definition. Handling them as part of the grammar makes it easy to 459# keep them in the right place with respect to the code generated by 460# the other statements. 461def p_decode_stmt_cpp(t): 462 'decode_stmt : CPPDIRECTIVE' 463 t[0] = GenCode(t[1], t[1], t[1], t[1]) 464 465# A format block 'format <foo> { ... }' sets the default instruction 466# format used to handle instruction definitions inside the block. 467# This format can be overridden by using an explicit format on the 468# instruction definition or with a nested format block. 469def p_decode_stmt_format(t): 470 'decode_stmt : FORMAT push_format_id LBRACE decode_stmt_list RBRACE' 471 # The format will be pushed on the stack when 'push_format_id' is 472 # processed (see below). Once the parser has recognized the full 473 # production (though the right brace), we're done with the format, 474 # so now we can pop it. 475 formatStack.pop() 476 t[0] = t[4] 477 478# This rule exists so we can set the current format (& push the stack) 479# when we recognize the format name part of the format block. 480def p_push_format_id(t): 481 'push_format_id : ID' 482 try: 483 formatStack.push(formatMap[t[1]]) 484 t[0] = ('', '// format %s' % t[1]) 485 except KeyError: 486 error(t.lineno(1), 'instruction format "%s" not defined.' % t[1]) 487 488# Nested decode block: if the value of the current field matches the 489# specified constant, do a nested decode on some other field. 490def p_decode_stmt_decode(t): 491 'decode_stmt : case_label COLON decode_block' 492 label = t[1] 493 codeObj = t[3] 494 # just wrap the decoding code from the block as a case in the 495 # outer switch statement. 496 codeObj.wrap_decode_block('\n%s:\n' % label) 497 codeObj.has_decode_default = (label == 'default') 498 t[0] = codeObj 499 500# Instruction definition (finally!). 501def p_decode_stmt_inst(t): 502 'decode_stmt : case_label COLON inst SEMI' 503 label = t[1] 504 codeObj = t[3] 505 codeObj.wrap_decode_block('\n%s:' % label, 'break;\n') 506 codeObj.has_decode_default = (label == 'default') 507 t[0] = codeObj 508 509# The case label is either a list of one or more constants or 'default' 510def p_case_label_0(t): 511 'case_label : intlit_list' 512 t[0] = ': '.join(map(lambda a: 'case %#x' % a, t[1])) 513 514def p_case_label_1(t): 515 'case_label : DEFAULT' 516 t[0] = 'default' 517 518# 519# The constant list for a decode case label must be non-empty, but may have 520# one or more comma-separated integer literals in it. 521# 522def p_intlit_list_0(t): 523 'intlit_list : INTLIT' 524 t[0] = [t[1]] 525 526def p_intlit_list_1(t): 527 'intlit_list : intlit_list COMMA INTLIT' 528 t[0] = t[1] 529 t[0].append(t[3]) 530 531# Define an instruction using the current instruction format (specified 532# by an enclosing format block). 533# "<mnemonic>(<args>)" 534def p_inst_0(t): 535 'inst : ID LPAREN arg_list RPAREN' 536 # Pass the ID and arg list to the current format class to deal with. 537 currentFormat = formatStack.top() 538 codeObj = currentFormat.defineInst(t[1], t[3], t.lineno(1)) 539 args = ','.join(map(str, t[3])) 540 args = re.sub('(?m)^', '//', args) 541 args = re.sub('^//', '', args) 542 comment = '\n// %s::%s(%s)\n' % (currentFormat.id, t[1], args) 543 codeObj.prepend_all(comment) 544 t[0] = codeObj 545 546# Define an instruction using an explicitly specified format: 547# "<fmt>::<mnemonic>(<args>)" 548def p_inst_1(t): 549 'inst : ID DBLCOLON ID LPAREN arg_list RPAREN' 550 try: 551 format = formatMap[t[1]] 552 except KeyError: 553 error(t.lineno(1), 'instruction format "%s" not defined.' % t[1]) 554 codeObj = format.defineInst(t[3], t[5], t.lineno(1)) 555 comment = '\n// %s::%s(%s)\n' % (t[1], t[3], t[5]) 556 codeObj.prepend_all(comment) 557 t[0] = codeObj 558 559def p_arg_list_0(t): 560 'arg_list : empty' 561 t[0] = [ ] 562 563def p_arg_list_1(t): 564 'arg_list : arg' 565 t[0] = [t[1]] 566 567def p_arg_list_2(t): 568 'arg_list : arg_list COMMA arg' 569 t[0] = t[1] 570 t[0].append(t[3]) 571 572def p_arg(t): 573 '''arg : ID 574 | INTLIT 575 | STRLIT 576 | CODELIT''' 577 t[0] = t[1] 578 579# 580# Empty production... use in other rules for readability. 581# 582def p_empty(t): 583 'empty :' 584 pass 585 586# Parse error handler. Note that the argument here is the offending 587# *token*, not a grammar symbol (hence the need to use t.value) 588def p_error(t): 589 if t: 590 error(t.lineno, "syntax error at '%s'" % t.value) 591 else: 592 error_bt(0, "unknown syntax error") 593 594# END OF GRAMMAR RULES 595# 596# Now build the parser. 597yacc.yacc() 598 599 600##################################################################### 601# 602# Support Classes 603# 604##################################################################### 605 606################ 607# CpuModel class 608# 609# The CpuModel class encapsulates everything we need to know about a 610# particular CPU model. 611 612class CpuModel: 613 # List of all CPU models. Accessible as CpuModel.list. 614 list = [] 615 616 # Constructor. Automatically adds models to CpuModel.list. 617 def __init__(self, name, filename, includes, strings): 618 self.name = name 619 self.filename = filename # filename for output exec code 620 self.includes = includes # include files needed in exec file 621 # The 'strings' dict holds all the per-CPU symbols we can 622 # substitute into templates etc. 623 self.strings = strings 624 # Add self to list. 625 CpuModel.list.append(self) 626 627# Define CPU models. The following lines should contain the only 628# CPU-model-specific information in this file. Note that the ISA 629# description itself should have *no* CPU-model-specific content. 630CpuModel('SimpleCPU', 'simple_cpu_exec.cc', 631 '#include "cpu/simple_cpu/simple_cpu.hh"', 632 { 'CPU_exec_context': 'SimpleCPU' }) 633CpuModel('FastCPU', 'fast_cpu_exec.cc', 634 '#include "cpu/fast_cpu/fast_cpu.hh"', 635 { 'CPU_exec_context': 'FastCPU' }) 636CpuModel('FullCPU', 'full_cpu_exec.cc', 637 '#include "cpu/full_cpu/dyn_inst.hh"', 638 { 'CPU_exec_context': 'DynInst' }) 639 640# Expand template with CPU-specific references into a dictionary with 641# an entry for each CPU model name. The entry key is the model name 642# and the corresponding value is the template with the CPU-specific 643# refs substituted for that model. 644def expand_cpu_symbols_to_dict(template): 645 # Protect '%'s that don't go with CPU-specific terms 646 t = re.sub(r'%(?!\(CPU_)', '%%', template) 647 result = {} 648 for cpu in CpuModel.list: 649 result[cpu.name] = t % cpu.strings 650 return result 651 652# *If* the template has CPU-specific references, return a single 653# string containing a copy of the template for each CPU model with the 654# corresponding values substituted in. If the template has no 655# CPU-specific references, it is returned unmodified. 656def expand_cpu_symbols_to_string(template): 657 if template.find('%(CPU_') != -1: 658 return reduce(lambda x,y: x+y, 659 expand_cpu_symbols_to_dict(template).values()) 660 else: 661 return template 662 663# Protect CPU-specific references by doubling the corresponding '%'s 664# (in preparation for substituting a different set of references into 665# the template). 666def protect_cpu_symbols(template): 667 return re.sub(r'%(?=\(CPU_)', '%%', template) 668 669############### 670# GenCode class 671# 672# The GenCode class encapsulates generated code destined for various 673# output files. The header_output and decoder_output attributes are 674# strings containing code destined for decoder.hh and decoder.cc 675# respectively. The decode_block attribute contains code to be 676# incorporated in the decode function itself (that will also end up in 677# decoder.cc). The exec_output attribute is a dictionary with a key 678# for each CPU model name; the value associated with a particular key 679# is the string of code for that CPU model's exec.cc file. The 680# has_decode_default attribute is used in the decode block to allow 681# explicit default clauses to override default default clauses. 682 683class GenCode: 684 # Constructor. At this point we substitute out all CPU-specific 685 # symbols. For the exec output, these go into the per-model 686 # dictionary. For all other output types they get collapsed into 687 # a single string. 688 def __init__(self, 689 header_output = '', decoder_output = '', exec_output = '', 690 decode_block = '', has_decode_default = False): 691 self.header_output = expand_cpu_symbols_to_string(header_output) 692 self.decoder_output = expand_cpu_symbols_to_string(decoder_output) 693 if isinstance(exec_output, dict): 694 self.exec_output = exec_output 695 elif isinstance(exec_output, str): 696 # If the exec_output arg is a single string, we replicate 697 # it for each of the CPU models, substituting and 698 # %(CPU_foo)s params appropriately. 699 self.exec_output = expand_cpu_symbols_to_dict(exec_output) 700 self.decode_block = expand_cpu_symbols_to_string(decode_block) 701 self.has_decode_default = has_decode_default 702 703 # Override '+' operator: generate a new GenCode object that 704 # concatenates all the individual strings in the operands. 705 def __add__(self, other): 706 exec_output = {} 707 for cpu in CpuModel.list: 708 n = cpu.name 709 exec_output[n] = self.exec_output[n] + other.exec_output[n] 710 return GenCode(self.header_output + other.header_output, 711 self.decoder_output + other.decoder_output, 712 exec_output, 713 self.decode_block + other.decode_block, 714 self.has_decode_default or other.has_decode_default) 715 716 # Prepend a string (typically a comment) to all the strings. 717 def prepend_all(self, pre): 718 self.header_output = pre + self.header_output 719 self.decoder_output = pre + self.decoder_output 720 self.decode_block = pre + self.decode_block 721 for cpu in CpuModel.list: 722 self.exec_output[cpu.name] = pre + self.exec_output[cpu.name] 723 724 # Wrap the decode block in a pair of strings (e.g., 'case foo:' 725 # and 'break;'). Used to build the big nested switch statement. 726 def wrap_decode_block(self, pre, post = ''): 727 self.decode_block = pre + indent(self.decode_block) + post 728 729################ 730# Format object. 731# 732# A format object encapsulates an instruction format. It must provide 733# a defineInst() method that generates the code for an instruction 734# definition. 735 736class Format: 737 def __init__(self, id, params, code): 738 # constructor: just save away arguments 739 self.id = id 740 self.params = params 741 label = 'def format ' + id 742 self.user_code = compile(fixPythonIndentation(code), label, 'exec') 743 param_list = string.join(params, ", ") 744 f = '''def defInst(_code, _context, %s): 745 my_locals = vars().copy() 746 exec _code in _context, my_locals 747 return my_locals\n''' % param_list 748 c = compile(f, label + ' wrapper', 'exec') 749 exec c 750 self.func = defInst 751 752 def defineInst(self, name, args, lineno): 753 context = {} 754 updateExportContext() 755 context.update(exportContext) 756 context.update({ 'name': name, 'Name': string.capitalize(name) }) 757 try: 758 vars = self.func(self.user_code, context, *args) 759 except Exception, exc: 760 error(lineno, 'error defining "%s": %s.' % (name, exc)) 761 for k in vars.keys(): 762 if k not in ('header_output', 'decoder_output', 763 'exec_output', 'decode_block'): 764 del vars[k] 765 return GenCode(**vars) 766 767# Special null format to catch an implicit-format instruction 768# definition outside of any format block. 769class NoFormat: 770 def __init__(self): 771 self.defaultInst = '' 772 773 def defineInst(self, name, args, lineno): 774 error(lineno, 775 'instruction definition "%s" with no active format!' % name) 776 777# This dictionary maps format name strings to Format objects. 778formatMap = {} 779 780# Define a new format 781def defFormat(id, params, code, lineno): 782 # make sure we haven't already defined this one 783 if formatMap.get(id, None) != None: 784 error(lineno, 'format %s redefined.' % id) 785 # create new object and store in global map 786 formatMap[id] = Format(id, params, code) 787 788 789############## 790# Stack: a simple stack object. Used for both formats (formatStack) 791# and default cases (defaultStack). 792 793class Stack: 794 def __init__(self, initItem): 795 self.stack = [ initItem ] 796 797 def push(self, item): 798 self.stack.append(item); 799 800 def pop(self): 801 return self.stack.pop() 802 803 def top(self): 804 return self.stack[-1] 805 806# The global format stack. 807formatStack = Stack(NoFormat()) 808 809# The global default case stack. 810defaultStack = Stack( None ) 811 812################### 813# Utility functions 814 815# 816# Indent every line in string 's' by two spaces 817# (except preprocessor directives). 818# Used to make nested code blocks look pretty. 819# 820def indent(s): 821 return re.sub(r'(?m)^(?!\#)', ' ', s) 822 823# 824# Munge a somewhat arbitrarily formatted piece of Python code 825# (e.g. from a format 'let' block) into something whose indentation 826# will get by the Python parser. 827# 828# The two keys here are that Python will give a syntax error if 829# there's any whitespace at the beginning of the first line, and that 830# all lines at the same lexical nesting level must have identical 831# indentation. Unfortunately the way code literals work, an entire 832# let block tends to have some initial indentation. Rather than 833# trying to figure out what that is and strip it off, we prepend 'if 834# 1:' to make the let code the nested block inside the if (and have 835# the parser automatically deal with the indentation for us). 836# 837# We don't want to do this if (1) the code block is empty or (2) the 838# first line of the block doesn't have any whitespace at the front. 839 840def fixPythonIndentation(s): 841 # get rid of blank lines first 842 s = re.sub(r'(?m)^\s*\n', '', s); 843 if (s != '' and re.match(r'[ \t]', s[0])): 844 s = 'if 1:\n' + s 845 return s 846 847# Error handler. Just call exit. Output formatted to work under 848# Emacs compile-mode. 849def error(lineno, string): 850 sys.exit("%s:%d: %s" % (input_filename, lineno, string)) 851 852# Like error(), but include a Python stack backtrace (for processing 853# Python exceptions). 854def error_bt(lineno, string): 855 traceback.print_exc() 856 print >> sys.stderr, "%s:%d: %s" % (input_filename, lineno, string) 857 sys.exit(1) 858 859 860##################################################################### 861# 862# Bitfield Operator Support 863# 864##################################################################### 865 866bitOp1ArgRE = re.compile(r'<\s*(\w+)\s*:\s*>') 867 868bitOpWordRE = re.compile(r'(?<![\w\.])([\w\.]+)<\s*(\w+)\s*:\s*(\w+)\s*>') 869bitOpExprRE = re.compile(r'\)<\s*(\w+)\s*:\s*(\w+)\s*>') 870 871def substBitOps(code): 872 # first convert single-bit selectors to two-index form 873 # i.e., <n> --> <n:n> 874 code = bitOp1ArgRE.sub(r'<\1:\1>', code) 875 # simple case: selector applied to ID (name) 876 # i.e., foo<a:b> --> bits(foo, a, b) 877 code = bitOpWordRE.sub(r'bits(\1, \2, \3)', code) 878 # if selector is applied to expression (ending in ')'), 879 # we need to search backward for matching '(' 880 match = bitOpExprRE.search(code) 881 while match: 882 exprEnd = match.start() 883 here = exprEnd - 1 884 nestLevel = 1 885 while nestLevel > 0: 886 if code[here] == '(': 887 nestLevel -= 1 888 elif code[here] == ')': 889 nestLevel += 1 890 here -= 1 891 if here < 0: 892 sys.exit("Didn't find '('!") 893 exprStart = here+1 894 newExpr = r'bits(%s, %s, %s)' % (code[exprStart:exprEnd+1], 895 match.group(1), match.group(2)) 896 code = code[:exprStart] + newExpr + code[match.end():] 897 match = bitOpExprRE.search(code) 898 return code 899 900 901#################### 902# Template objects. 903# 904# Template objects are format strings that allow substitution from 905# the attribute spaces of other objects (e.g. InstObjParams instances). 906 907class Template: 908 def __init__(self, t): 909 self.template = t 910 911 def subst(self, d): 912 # Start with the template namespace. Make a copy since we're 913 # going to modify it. 914 myDict = templateMap.copy() 915 # if the argument is a dictionary, we just use it. 916 if isinstance(d, dict): 917 myDict.update(d) 918 # if the argument is an object, we use its attribute map. 919 elif hasattr(d, '__dict__'): 920 myDict.update(d.__dict__) 921 else: 922 raise TypeError, "Template.subst() arg must be or have dictionary" 923 # CPU-model-specific substitutions are handled later (in GenCode). 924 return protect_cpu_symbols(self.template) % myDict 925 926 # Convert to string. This handles the case when a template with a 927 # CPU-specific term gets interpolated into another template or into 928 # an output block. 929 def __str__(self): 930 return expand_cpu_symbols_to_string(self.template) 931 932##################################################################### 933# 934# Code Parser 935# 936# The remaining code is the support for automatically extracting 937# instruction characteristics from pseudocode. 938# 939##################################################################### 940 941# Force the argument to be a list 942def makeList(list_or_item): 943 if not list_or_item: 944 return [] 945 elif type(list_or_item) == ListType: 946 return list_or_item 947 else: 948 return [ list_or_item ] 949 950# generate operandSizeMap based on provided operandTypeMap: 951# basically generate equiv. C++ type and make is_signed flag 952def buildOperandSizeMap(): 953 global operandSizeMap 954 operandSizeMap = {} 955 for ext in operandTypeMap.keys(): 956 (desc, size) = operandTypeMap[ext] 957 if desc == 'signed int': 958 type = 'int%d_t' % size 959 is_signed = 1 960 elif desc == 'unsigned int': 961 type = 'uint%d_t' % size 962 is_signed = 0 963 elif desc == 'float': 964 is_signed = 1 # shouldn't really matter 965 if size == 32: 966 type = 'float' 967 elif size == 64: 968 type = 'double' 969 if type == '': 970 error(0, 'Unrecognized type description "%s" in operandTypeMap') 971 operandSizeMap[ext] = (size, type, is_signed) 972 973# 974# Base class for operand traits. An instance of this class (or actually 975# a class derived from this one) encapsulates the traits of a particular 976# operand type (e.g., "32-bit integer register"). 977# 978class OperandTraits: 979 def __init__(self, dflt_ext, reg_spec, flags, sort_pri): 980 # Force construction of operandSizeMap from operandTypeMap 981 # if it hasn't happened yet 982 if not globals().has_key('operandSizeMap'): 983 buildOperandSizeMap() 984 self.dflt_ext = dflt_ext 985 (self.dflt_size, self.dflt_type, self.dflt_is_signed) = \ 986 operandSizeMap[dflt_ext] 987 self.reg_spec = reg_spec 988 # Canonical flag structure is a triple of lists, where each list 989 # indicates the set of flags implied by this operand always, when 990 # used as a source, and when used as a dest, respectively. 991 # For simplicity this can be initialized using a variety of fairly 992 # obvious shortcuts; we convert these to canonical form here. 993 if not flags: 994 # no flags specified (e.g., 'None') 995 self.flags = ( [], [], [] ) 996 elif type(flags) == StringType: 997 # a single flag: assumed to be unconditional 998 self.flags = ( [ flags ], [], [] ) 999 elif type(flags) == ListType: 1000 # a list of flags: also assumed to be unconditional 1001 self.flags = ( flags, [], [] ) 1002 elif type(flags) == TupleType: 1003 # it's a tuple: it should be a triple, 1004 # but each item could be a single string or a list 1005 (uncond_flags, src_flags, dest_flags) = flags 1006 self.flags = (makeList(uncond_flags), 1007 makeList(src_flags), makeList(dest_flags)) 1008 self.sort_pri = sort_pri 1009 1010 def isMem(self): 1011 return 0 1012 1013 def isReg(self): 1014 return 0 1015 1016 def isFloatReg(self): 1017 return 0 1018 1019 def isIntReg(self): 1020 return 0 1021 1022 def isControlReg(self): 1023 return 0 1024 1025 def getFlags(self, op_desc): 1026 # note the empty slice '[:]' gives us a copy of self.flags[0] 1027 # instead of a reference to it 1028 my_flags = self.flags[0][:] 1029 if op_desc.is_src: 1030 my_flags += self.flags[1] 1031 if op_desc.is_dest: 1032 my_flags += self.flags[2] 1033 return my_flags 1034 1035 def makeDecl(self, op_desc): 1036 (size, type, is_signed) = operandSizeMap[op_desc.eff_ext] 1037 # Note that initializations in the declarations are solely 1038 # to avoid 'uninitialized variable' errors from the compiler. 1039 return type + ' ' + op_desc.munged_name + ' = 0;\n'; 1040 1041class IntRegOperandTraits(OperandTraits): 1042 def isReg(self): 1043 return 1 1044 1045 def isIntReg(self): 1046 return 1 1047 1048 def makeConstructor(self, op_desc): 1049 c = '' 1050 if op_desc.is_src: 1051 c += '\n\t_srcRegIdx[%d] = %s;' % \ 1052 (op_desc.src_reg_idx, self.reg_spec) 1053 if op_desc.is_dest: 1054 c += '\n\t_destRegIdx[%d] = %s;' % \ 1055 (op_desc.dest_reg_idx, self.reg_spec) 1056 return c 1057 1058 def makeRead(self, op_desc): 1059 (size, type, is_signed) = operandSizeMap[op_desc.eff_ext] 1060 if (type == 'float' or type == 'double'): 1061 error(0, 'Attempt to read integer register as FP') 1062 if (size == self.dflt_size): 1063 return '%s = xc->readIntReg(this, %d);\n' % \ 1064 (op_desc.munged_name, op_desc.src_reg_idx) 1065 else: 1066 return '%s = bits(xc->readIntReg(this, %d), %d, 0);\n' % \ 1067 (op_desc.munged_name, op_desc.src_reg_idx, size-1) 1068 1069 def makeWrite(self, op_desc): 1070 (size, type, is_signed) = operandSizeMap[op_desc.eff_ext] 1071 if (type == 'float' or type == 'double'): 1072 error(0, 'Attempt to write integer register as FP') 1073 if (size != self.dflt_size and is_signed): 1074 final_val = 'sext<%d>(%s)' % (size, op_desc.munged_name) 1075 else: 1076 final_val = op_desc.munged_name 1077 wb = ''' 1078 { 1079 %s final_val = %s; 1080 xc->setIntReg(this, %d, final_val);\n 1081 if (traceData) { traceData->setData(final_val); } 1082 }''' % (self.dflt_type, final_val, op_desc.dest_reg_idx) 1083 return wb 1084 1085class FloatRegOperandTraits(OperandTraits): 1086 def isReg(self): 1087 return 1 1088 1089 def isFloatReg(self): 1090 return 1 1091 1092 def makeConstructor(self, op_desc): 1093 c = '' 1094 if op_desc.is_src: 1095 c += '\n\t_srcRegIdx[%d] = %s + FP_Base_DepTag;' % \ 1096 (op_desc.src_reg_idx, self.reg_spec) 1097 if op_desc.is_dest: 1098 c += '\n\t_destRegIdx[%d] = %s + FP_Base_DepTag;' % \ 1099 (op_desc.dest_reg_idx, self.reg_spec) 1100 return c 1101 1102 def makeRead(self, op_desc): 1103 (size, type, is_signed) = operandSizeMap[op_desc.eff_ext] 1104 bit_select = 0 1105 if (type == 'float'): 1106 func = 'readFloatRegSingle' 1107 elif (type == 'double'): 1108 func = 'readFloatRegDouble' 1109 else: 1110 func = 'readFloatRegInt' 1111 if (size != self.dflt_size): 1112 bit_select = 1 1113 base = 'xc->%s(this, %d)' % \ 1114 (func, op_desc.src_reg_idx) 1115 if bit_select: 1116 return '%s = bits(%s, %d, 0);\n' % \ 1117 (op_desc.munged_name, base, size-1) 1118 else: 1119 return '%s = %s;\n' % (op_desc.munged_name, base) 1120 1121 def makeWrite(self, op_desc): 1122 (size, type, is_signed) = operandSizeMap[op_desc.eff_ext] 1123 final_val = op_desc.munged_name 1124 if (type == 'float'): 1125 func = 'setFloatRegSingle' 1126 elif (type == 'double'): 1127 func = 'setFloatRegDouble' 1128 else: 1129 func = 'setFloatRegInt' 1130 type = 'uint%d_t' % self.dflt_size 1131 if (size != self.dflt_size and is_signed): 1132 final_val = 'sext<%d>(%s)' % (size, op_desc.munged_name) 1133 wb = ''' 1134 { 1135 %s final_val = %s; 1136 xc->%s(this, %d, final_val);\n 1137 if (traceData) { traceData->setData(final_val); } 1138 }''' % (type, final_val, func, op_desc.dest_reg_idx) 1139 return wb 1140 1141class ControlRegOperandTraits(OperandTraits): 1142 def isReg(self): 1143 return 1 1144 1145 def isControlReg(self): 1146 return 1 1147 1148 def makeConstructor(self, op_desc): 1149 c = '' 1150 if op_desc.is_src: 1151 c += '\n\t_srcRegIdx[%d] = %s_DepTag;' % \ 1152 (op_desc.src_reg_idx, self.reg_spec) 1153 if op_desc.is_dest: 1154 c += '\n\t_destRegIdx[%d] = %s_DepTag;' % \ 1155 (op_desc.dest_reg_idx, self.reg_spec) 1156 return c 1157 1158 def makeRead(self, op_desc): 1159 (size, type, is_signed) = operandSizeMap[op_desc.eff_ext] 1160 bit_select = 0 1161 if (type == 'float' or type == 'double'): 1162 error(0, 'Attempt to read control register as FP') 1163 base = 'xc->read%s()' % self.reg_spec 1164 if size == self.dflt_size: 1165 return '%s = %s;\n' % (op_desc.munged_name, base) 1166 else: 1167 return '%s = bits(%s, %d, 0);\n' % \ 1168 (op_desc.munged_name, base, size-1) 1169 1170 def makeWrite(self, op_desc): 1171 (size, type, is_signed) = operandSizeMap[op_desc.eff_ext] 1172 if (type == 'float' or type == 'double'): 1173 error(0, 'Attempt to write control register as FP') 1174 wb = 'xc->set%s(%s);\n' % (self.reg_spec, op_desc.munged_name) 1175 wb += 'if (traceData) { traceData->setData(%s); }' % \ 1176 op_desc.munged_name 1177 return wb 1178 1179class MemOperandTraits(OperandTraits): 1180 def isMem(self): 1181 return 1 1182 1183 def makeConstructor(self, op_desc): 1184 return '' 1185 1186 def makeDecl(self, op_desc): 1187 (size, type, is_signed) = operandSizeMap[op_desc.eff_ext] 1188 # Note that initializations in the declarations are solely 1189 # to avoid 'uninitialized variable' errors from the compiler. 1190 # Declare memory data variable. 1191 c = '%s %s = 0;\n' % (type, op_desc.munged_name) 1192 # Declare var to hold memory access flags. 1193 c += 'unsigned %s_flags = memAccessFlags;\n' % op_desc.base_name 1194 # If this operand is a dest (i.e., it's a store operation), 1195 # then we need to declare a variable for the write result code 1196 # as well. 1197 if op_desc.is_dest: 1198 c += 'uint64_t %s_write_result = 0;\n' % op_desc.base_name 1199 return c 1200 1201 def makeRead(self, op_desc): 1202 (size, type, is_signed) = operandSizeMap[op_desc.eff_ext] 1203 eff_type = 'uint%d_t' % size 1204 return 'fault = xc->read(EA, (%s&)%s, %s_flags);\n' \ 1205 % (eff_type, op_desc.munged_name, op_desc.base_name) 1206 1207 def makeWrite(self, op_desc): 1208 (size, type, is_signed) = operandSizeMap[op_desc.eff_ext] 1209 eff_type = 'uint%d_t' % size 1210 return 'fault = xc->write((%s&)%s, EA, %s_flags,' \ 1211 ' &%s_write_result);\n' \ 1212 % (eff_type, op_desc.munged_name, op_desc.base_name, 1213 op_desc.base_name) 1214 1215class NPCOperandTraits(OperandTraits): 1216 def makeConstructor(self, op_desc): 1217 return '' 1218 1219 def makeRead(self, op_desc): 1220 return '%s = xc->readPC() + 4;\n' % op_desc.munged_name 1221 1222 def makeWrite(self, op_desc): 1223 return 'xc->setNextPC(%s);\n' % op_desc.munged_name 1224 1225 1226exportContextSymbols = ('IntRegOperandTraits', 'FloatRegOperandTraits', 1227 'ControlRegOperandTraits', 'MemOperandTraits', 1228 'NPCOperandTraits', 'InstObjParams', 'CodeBlock', 1229 're', 'string') 1230 1231exportContext = {} 1232 1233def updateExportContext(): 1234 exportContext.update(exportDict(*exportContextSymbols)) 1235 exportContext.update(templateMap) 1236 1237 1238def exportDict(*symNames): 1239 return dict([(s, eval(s)) for s in symNames]) 1240 1241 1242# 1243# Define operand variables that get derived from the basic declaration 1244# of ISA-specific operands in operandTraitsMap. This function must be 1245# called by the ISA description file explicitly after defining 1246# operandTraitsMap (in a 'let' block). 1247# 1248def defineDerivedOperandVars(): 1249 global operands 1250 operands = operandTraitsMap.keys() 1251 1252 operandsREString = (r''' 1253 (?<![\w\.]) # neg. lookbehind assertion: prevent partial matches 1254 ((%s)(?:\.(\w+))?) # match: operand with optional '.' then suffix 1255 (?![\w\.]) # neg. lookahead assertion: prevent partial matches 1256 ''' 1257 % string.join(operands, '|')) 1258 1259 global operandsRE 1260 operandsRE = re.compile(operandsREString, re.MULTILINE|re.VERBOSE) 1261 1262 # Same as operandsREString, but extension is mandatory, and only two 1263 # groups are returned (base and ext, not full name as above). 1264 # Used for subtituting '_' for '.' to make C++ identifiers. 1265 operandsWithExtREString = (r'(?<![\w\.])(%s)\.(\w+)(?![\w\.])' 1266 % string.join(operands, '|')) 1267 1268 global operandsWithExtRE 1269 operandsWithExtRE = re.compile(operandsWithExtREString, re.MULTILINE) 1270 1271 1272# 1273# Operand descriptor class. An instance of this class represents 1274# a specific operand for a code block. 1275# 1276class OperandDescriptor: 1277 def __init__(self, full_name, base_name, ext, is_src, is_dest): 1278 self.full_name = full_name 1279 self.base_name = base_name 1280 self.ext = ext 1281 self.is_src = is_src 1282 self.is_dest = is_dest 1283 self.traits = operandTraitsMap[base_name] 1284 # The 'effective extension' (eff_ext) is either the actual 1285 # extension, if one was explicitly provided, or the default. 1286 # The 'munged name' replaces the '.' between the base and 1287 # extension (if any) with a '_' to make a legal C++ variable name. 1288 if ext: 1289 self.eff_ext = ext 1290 self.munged_name = base_name + '_' + ext 1291 else: 1292 self.eff_ext = self.traits.dflt_ext 1293 self.munged_name = base_name 1294 1295 # Finalize additional fields (primarily code fields). This step 1296 # is done separately since some of these fields may depend on the 1297 # register index enumeration that hasn't been performed yet at the 1298 # time of __init__(). 1299 def finalize(self): 1300 self.flags = self.traits.getFlags(self) 1301 self.constructor = self.traits.makeConstructor(self) 1302 self.op_decl = self.traits.makeDecl(self) 1303 1304 if self.is_src: 1305 self.op_rd = self.traits.makeRead(self) 1306 else: 1307 self.op_rd = '' 1308 1309 if self.is_dest: 1310 self.op_wb = self.traits.makeWrite(self) 1311 else: 1312 self.op_wb = '' 1313 1314class OperandDescriptorList: 1315 def __init__(self): 1316 self.items = [] 1317 self.bases = {} 1318 1319 def __len__(self): 1320 return len(self.items) 1321 1322 def __getitem__(self, index): 1323 return self.items[index] 1324 1325 def append(self, op_desc): 1326 self.items.append(op_desc) 1327 self.bases[op_desc.base_name] = op_desc 1328 1329 def find_base(self, base_name): 1330 # like self.bases[base_name], but returns None if not found 1331 # (rather than raising exception) 1332 return self.bases.get(base_name) 1333 1334 # internal helper function for concat[Some]Attr{Strings|Lists} 1335 def __internalConcatAttrs(self, attr_name, filter, result): 1336 for op_desc in self.items: 1337 if filter(op_desc): 1338 result += getattr(op_desc, attr_name) 1339 return result 1340 1341 # return a single string that is the concatenation of the (string) 1342 # values of the specified attribute for all operands 1343 def concatAttrStrings(self, attr_name): 1344 return self.__internalConcatAttrs(attr_name, lambda x: 1, '') 1345 1346 # like concatAttrStrings, but only include the values for the operands 1347 # for which the provided filter function returns true 1348 def concatSomeAttrStrings(self, filter, attr_name): 1349 return self.__internalConcatAttrs(attr_name, filter, '') 1350 1351 # return a single list that is the concatenation of the (list) 1352 # values of the specified attribute for all operands 1353 def concatAttrLists(self, attr_name): 1354 return self.__internalConcatAttrs(attr_name, lambda x: 1, []) 1355 1356 # like concatAttrLists, but only include the values for the operands 1357 # for which the provided filter function returns true 1358 def concatSomeAttrLists(self, filter, attr_name): 1359 return self.__internalConcatAttrs(attr_name, filter, []) 1360 1361 def sort(self): 1362 self.items.sort(lambda a, b: a.traits.sort_pri - b.traits.sort_pri) 1363 1364# Regular expression object to match C++ comments 1365# (used in findOperands()) 1366commentRE = re.compile(r'//.*\n') 1367 1368# Regular expression object to match assignment statements 1369# (used in findOperands()) 1370assignRE = re.compile(r'\s*=(?!=)', re.MULTILINE) 1371 1372# 1373# Find all the operands in the given code block. Returns an operand 1374# descriptor list (instance of class OperandDescriptorList). 1375# 1376def findOperands(code): 1377 operands = OperandDescriptorList() 1378 # delete comments so we don't accidentally match on reg specifiers inside 1379 code = commentRE.sub('', code) 1380 # search for operands 1381 next_pos = 0 1382 while 1: 1383 match = operandsRE.search(code, next_pos) 1384 if not match: 1385 # no more matches: we're done 1386 break 1387 op = match.groups() 1388 # regexp groups are operand full name, base, and extension 1389 (op_full, op_base, op_ext) = op 1390 # if the token following the operand is an assignment, this is 1391 # a destination (LHS), else it's a source (RHS) 1392 is_dest = (assignRE.match(code, match.end()) != None) 1393 is_src = not is_dest 1394 # see if we've already seen this one 1395 op_desc = operands.find_base(op_base) 1396 if op_desc: 1397 if op_desc.ext != op_ext: 1398 error(0, 'Inconsistent extensions for operand %s' % op_base) 1399 op_desc.is_src = op_desc.is_src or is_src 1400 op_desc.is_dest = op_desc.is_dest or is_dest 1401 else: 1402 # new operand: create new descriptor 1403 op_desc = OperandDescriptor(op_full, op_base, op_ext, 1404 is_src, is_dest) 1405 operands.append(op_desc) 1406 # start next search after end of current match 1407 next_pos = match.end() 1408 operands.sort() 1409 # enumerate source & dest register operands... used in building 1410 # constructor later 1411 srcRegs = 0 1412 destRegs = 0 1413 operands.numFPDestRegs = 0 1414 operands.numIntDestRegs = 0 1415 for op_desc in operands: 1416 if op_desc.traits.isReg(): 1417 if op_desc.is_src: 1418 op_desc.src_reg_idx = srcRegs 1419 srcRegs += 1 1420 if op_desc.is_dest: 1421 op_desc.dest_reg_idx = destRegs 1422 destRegs += 1 1423 if op_desc.traits.isFloatReg(): 1424 operands.numFPDestRegs += 1 1425 elif op_desc.traits.isIntReg(): 1426 operands.numIntDestRegs += 1 1427 operands.numSrcRegs = srcRegs 1428 operands.numDestRegs = destRegs 1429 # now make a final pass to finalize op_desc fields that may depend 1430 # on the register enumeration 1431 for op_desc in operands: 1432 op_desc.finalize() 1433 return operands 1434 1435# Munge operand names in code string to make legal C++ variable names. 1436# (Will match munged_name attribute of OperandDescriptor object.) 1437def substMungedOpNames(code): 1438 return operandsWithExtRE.sub(r'\1_\2', code) 1439 1440def joinLists(t): 1441 return map(string.join, t) 1442 1443def makeFlagConstructor(flag_list): 1444 if len(flag_list) == 0: 1445 return '' 1446 # filter out repeated flags 1447 flag_list.sort() 1448 i = 1 1449 while i < len(flag_list): 1450 if flag_list[i] == flag_list[i-1]: 1451 del flag_list[i] 1452 else: 1453 i += 1 1454 pre = '\n\tflags[' 1455 post = '] = true;' 1456 code = pre + string.join(flag_list, post + pre) + post 1457 return code 1458 1459class CodeBlock: 1460 def __init__(self, code): 1461 self.orig_code = code 1462 self.operands = findOperands(code) 1463 self.code = substMungedOpNames(substBitOps(code)) 1464 self.constructor = self.operands.concatAttrStrings('constructor') 1465 self.constructor += \ 1466 '\n\t_numSrcRegs = %d;' % self.operands.numSrcRegs 1467 self.constructor += \ 1468 '\n\t_numDestRegs = %d;' % self.operands.numDestRegs 1469 self.constructor += \ 1470 '\n\t_numFPDestRegs = %d;' % self.operands.numFPDestRegs 1471 self.constructor += \ 1472 '\n\t_numIntDestRegs = %d;' % self.operands.numIntDestRegs 1473 1474 self.op_decl = self.operands.concatAttrStrings('op_decl') 1475 1476 is_mem = lambda op: op.traits.isMem() 1477 not_mem = lambda op: not op.traits.isMem() 1478 1479 self.op_rd = self.operands.concatAttrStrings('op_rd') 1480 self.op_wb = self.operands.concatAttrStrings('op_wb') 1481 self.op_mem_rd = \ 1482 self.operands.concatSomeAttrStrings(is_mem, 'op_rd') 1483 self.op_mem_wb = \ 1484 self.operands.concatSomeAttrStrings(is_mem, 'op_wb') 1485 self.op_nonmem_rd = \ 1486 self.operands.concatSomeAttrStrings(not_mem, 'op_rd') 1487 self.op_nonmem_wb = \ 1488 self.operands.concatSomeAttrStrings(not_mem, 'op_wb') 1489 1490 self.flags = self.operands.concatAttrLists('flags') 1491 1492 # Make a basic guess on the operand class (function unit type). 1493 # These are good enough for most cases, and will be overridden 1494 # later otherwise. 1495 if 'IsStore' in self.flags: 1496 self.op_class = 'MemWriteOp' 1497 elif 'IsLoad' in self.flags or 'IsPrefetch' in self.flags: 1498 self.op_class = 'MemReadOp' 1499 elif 'IsFloating' in self.flags: 1500 self.op_class = 'FloatAddOp' 1501 else: 1502 self.op_class = 'IntAluOp' 1503 1504# Assume all instruction flags are of the form 'IsFoo' 1505instFlagRE = re.compile(r'Is.*') 1506 1507# OpClass constants end in 'Op' except No_OpClass 1508opClassRE = re.compile(r'.*Op|No_OpClass') 1509 1510class InstObjParams: 1511 def __init__(self, mnem, class_name, base_class = '', 1512 code_block = None, opt_args = []): 1513 self.mnemonic = mnem 1514 self.class_name = class_name 1515 self.base_class = base_class 1516 if code_block: 1517 for code_attr in code_block.__dict__.keys(): 1518 setattr(self, code_attr, getattr(code_block, code_attr)) 1519 else: 1520 self.constructor = '' 1521 self.flags = [] 1522 # Optional arguments are assumed to be either StaticInst flags 1523 # or an OpClass value. To avoid having to import a complete 1524 # list of these values to match against, we do it ad-hoc 1525 # with regexps. 1526 for oa in opt_args: 1527 if instFlagRE.match(oa): 1528 self.flags.append(oa) 1529 elif opClassRE.match(oa): 1530 self.op_class = oa 1531 else: 1532 error(0, 'InstObjParams: optional arg "%s" not recognized ' 1533 'as StaticInst::Flag or OpClass.' % oa) 1534 1535 # add flag initialization to contructor here to include 1536 # any flags added via opt_args 1537 self.constructor += makeFlagConstructor(self.flags) 1538 1539 # if 'IsFloating' is set, add call to the FP enable check 1540 # function (which should be provided by isa_desc via a declare) 1541 if 'IsFloating' in self.flags: 1542 self.fp_enable_check = 'fault = checkFpEnableFault(xc);' 1543 else: 1544 self.fp_enable_check = '' 1545 1546####################### 1547# 1548# Output file template 1549# 1550 1551file_template = ''' 1552/* 1553 * Copyright (c) 2003 1554 * The Regents of The University of Michigan 1555 * All Rights Reserved 1556 * 1557 * This code is part of the M5 simulator, developed by Nathan Binkert, 1558 * Erik Hallnor, Steve Raasch, and Steve Reinhardt, with contributions 1559 * from Ron Dreslinski, Dave Greene, and Lisa Hsu. 1560 * 1561 * Permission is granted to use, copy, create derivative works and 1562 * redistribute this software and such derivative works for any 1563 * purpose, so long as the copyright notice above, this grant of 1564 * permission, and the disclaimer below appear in all copies made; and 1565 * so long as the name of The University of Michigan is not used in 1566 * any advertising or publicity pertaining to the use or distribution 1567 * of this software without specific, written prior authorization. 1568 * 1569 * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION FROM THE 1570 * UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY PURPOSE, AND 1571 * WITHOUT WARRANTY BY THE UNIVERSITY OF MICHIGAN OF ANY KIND, EITHER 1572 * EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED 1573 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 1574 * PURPOSE. THE REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE 1575 * LIABLE FOR ANY DAMAGES, INCLUDING DIRECT, SPECIAL, INDIRECT, 1576 * INCIDENTAL, OR CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM 1577 * ARISING OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN 1578 * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF SUCH 1579 * DAMAGES. 1580 */ 1581 1582/* 1583 * DO NOT EDIT THIS FILE!!! 1584 * 1585 * It was automatically generated from the ISA description in %(filename)s 1586 */ 1587 1588%(includes)s 1589 1590%(global_output)s 1591 1592namespace %(namespace)s { 1593 1594%(namespace_output)s 1595 1596} // namespace %(namespace)s 1597''' 1598 1599 1600# Update the output file only if the new contents are different from 1601# the current contents. Minimizes the files that need to be rebuilt 1602# after minor changes. 1603def update_if_needed(file, contents): 1604 update = False 1605 if os.access(file, os.R_OK): 1606 f = open(file, 'r') 1607 old_contents = f.read() 1608 f.close() 1609 if contents != old_contents: 1610 print 'Updating', file 1611 os.remove(file) # in case it's write-protected 1612 update = True 1613 else: 1614 print 'File', file, 'is unchanged' 1615 else: 1616 print 'Generating', file 1617 update = True 1618 if update: 1619 f = open(file, 'w') 1620 f.write(contents) 1621 f.close() 1622 1623# 1624# Read in and parse the ISA description. 1625# 1626def parse_isa_desc(isa_desc_file, output_dir, include_path): 1627 # set a global var for the input filename... used in error messages 1628 global input_filename 1629 input_filename = isa_desc_file 1630 1631 # Suck the ISA description file in. 1632 input = open(isa_desc_file) 1633 isa_desc = input.read() 1634 input.close() 1635 1636 # Parse it. 1637 (isa_name, namespace, global_code, namespace_code) = yacc.parse(isa_desc) 1638 1639 # grab the last three path components of isa_desc_file to put in 1640 # the output 1641 filename = '/'.join(isa_desc_file.split('/')[-3:]) 1642 1643 # generate decoder.hh 1644 includes = '#include "base/bitfield.hh" // for bitfield support' 1645 global_output = global_code.header_output 1646 namespace_output = namespace_code.header_output 1647 update_if_needed(output_dir + '/decoder.hh', file_template % vars()) 1648 1649 # generate decoder.cc 1650 includes = '#include "%s/decoder.hh"' % include_path 1651 global_output = global_code.decoder_output 1652 namespace_output = namespace_code.decoder_output 1653 namespace_output += namespace_code.decode_block 1654 update_if_needed(output_dir + '/decoder.cc', file_template % vars()) 1655 1656 # generate per-cpu exec files 1657 for cpu in CpuModel.list: 1658 includes = '#include "%s/decoder.hh"\n' % include_path 1659 includes += cpu.includes 1660 global_output = global_code.exec_output[cpu.name] 1661 namespace_output = namespace_code.exec_output[cpu.name] 1662 update_if_needed(output_dir + '/' + cpu.filename, 1663 file_template % vars()) 1664 1665# Called as script: get args from command line. 1666if __name__ == '__main__': 1667 parse_isa_desc(sys.argv[1], sys.argv[2], sys.argv[3]) 1668