isa_parser.py revision 2075
1#! /usr/bin/env python 2 3# Copyright (c) 2003-2005 The Regents of The University of Michigan 4# All rights reserved. 5# 6# Redistribution and use in source and binary forms, with or without 7# modification, are permitted provided that the following conditions are 8# met: redistributions of source code must retain the above copyright 9# notice, this list of conditions and the following disclaimer; 10# redistributions in binary form must reproduce the above copyright 11# notice, this list of conditions and the following disclaimer in the 12# documentation and/or other materials provided with the distribution; 13# neither the name of the copyright holders nor the names of its 14# contributors may be used to endorse or promote products derived from 15# this software without specific prior written permission. 16# 17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29import os 30import sys 31import re 32import string 33import traceback 34# get type names 35from types import * 36 37# Prepend the directory where the PLY lex & yacc modules are found 38# to the search path. Assumes we're compiling in a subdirectory 39# of 'build' in the current tree. 40sys.path[0:0] = [os.environ['M5_EXT'] + '/ply'] 41 42import lex 43import yacc 44 45##################################################################### 46# 47# Lexer 48# 49# The PLY lexer module takes two things as input: 50# - A list of token names (the string list 'tokens') 51# - A regular expression describing a match for each token. The 52# regexp for token FOO can be provided in two ways: 53# - as a string variable named t_FOO 54# - as the doc string for a function named t_FOO. In this case, 55# the function is also executed, allowing an action to be 56# associated with each token match. 57# 58##################################################################### 59 60# Reserved words. These are listed separately as they are matched 61# using the same regexp as generic IDs, but distinguished in the 62# t_ID() function. The PLY documentation suggests this approach. 63reserved = ( 64 'BITFIELD', 'DECODE', 'DECODER', 'DEFAULT', 'DEF', 'EXEC', 'FORMAT', 65 'HEADER', 'LET', 'NAMESPACE', 'OPERAND_TYPES', 'OPERANDS', 66 'OUTPUT', 'SIGNED', 'TEMPLATE' 67 ) 68 69# List of tokens. The lex module requires this. 70tokens = reserved + ( 71 # identifier 72 'ID', 73 74 # integer literal 75 'INTLIT', 76 77 # string literal 78 'STRLIT', 79 80 # code literal 81 'CODELIT', 82 83 # ( ) [ ] { } < > , ; : :: * 84 'LPAREN', 'RPAREN', 85 'LBRACKET', 'RBRACKET', 86 'LBRACE', 'RBRACE', 87 'LESS', 'GREATER', 'EQUALS', 88 'COMMA', 'SEMI', 'COLON', 'DBLCOLON', 89 'ASTERISK', 90 91 # C preprocessor directives 92 'CPPDIRECTIVE' 93 94# The following are matched but never returned. commented out to 95# suppress PLY warning 96 # newfile directive 97# 'NEWFILE', 98 99 # endfile directive 100# 'ENDFILE' 101) 102 103# Regular expressions for token matching 104t_LPAREN = r'\(' 105t_RPAREN = r'\)' 106t_LBRACKET = r'\[' 107t_RBRACKET = r'\]' 108t_LBRACE = r'\{' 109t_RBRACE = r'\}' 110t_LESS = r'\<' 111t_GREATER = r'\>' 112t_EQUALS = r'=' 113t_COMMA = r',' 114t_SEMI = r';' 115t_COLON = r':' 116t_DBLCOLON = r'::' 117t_ASTERISK = r'\*' 118 119# Identifiers and reserved words 120reserved_map = { } 121for r in reserved: 122 reserved_map[r.lower()] = r 123 124def t_ID(t): 125 r'[A-Za-z_]\w*' 126 t.type = reserved_map.get(t.value,'ID') 127 return t 128 129# Integer literal 130def t_INTLIT(t): 131 r'(0x[\da-fA-F]+)|\d+' 132 try: 133 t.value = int(t.value,0) 134 except ValueError: 135 error(t.lineno, 'Integer value "%s" too large' % t.value) 136 t.value = 0 137 return t 138 139# String literal. Note that these use only single quotes, and 140# can span multiple lines. 141def t_STRLIT(t): 142 r"(?m)'([^'])+'" 143 # strip off quotes 144 t.value = t.value[1:-1] 145 t.lineno += t.value.count('\n') 146 return t 147 148 149# "Code literal"... like a string literal, but delimiters are 150# '{{' and '}}' so they get formatted nicely under emacs c-mode 151def t_CODELIT(t): 152 r"(?m)\{\{([^\}]|}(?!\}))+\}\}" 153 # strip off {{ & }} 154 t.value = t.value[2:-2] 155 t.lineno += t.value.count('\n') 156 return t 157 158def t_CPPDIRECTIVE(t): 159 r'^\#[^\#].*\n' 160 t.lineno += t.value.count('\n') 161 return t 162 163def t_NEWFILE(t): 164 r'^\#\#newfile\s+"[\w/.-]*"' 165 global fileNameStack 166 fileNameStack.append((t.value[11:-1], t.lineno)) 167 t.lineno = 0 168 169def t_ENDFILE(t): 170 r'^\#\#endfile' 171 (filename, t.lineno) = fileNameStack.pop() 172 173# 174# The functions t_NEWLINE, t_ignore, and t_error are 175# special for the lex module. 176# 177 178# Newlines 179def t_NEWLINE(t): 180 r'\n+' 181 t.lineno += t.value.count('\n') 182 183# Comments 184def t_comment(t): 185 r'//.*' 186 187# Completely ignored characters 188t_ignore = ' \t\x0c' 189 190# Error handler 191def t_error(t): 192 error(t.lineno, "illegal character '%s'" % t.value[0]) 193 t.skip(1) 194 195# Build the lexer 196lex.lex() 197 198##################################################################### 199# 200# Parser 201# 202# Every function whose name starts with 'p_' defines a grammar rule. 203# The rule is encoded in the function's doc string, while the 204# function body provides the action taken when the rule is matched. 205# The argument to each function is a list of the values of the 206# rule's symbols: t[0] for the LHS, and t[1..n] for the symbols 207# on the RHS. For tokens, the value is copied from the t.value 208# attribute provided by the lexer. For non-terminals, the value 209# is assigned by the producing rule; i.e., the job of the grammar 210# rule function is to set the value for the non-terminal on the LHS 211# (by assigning to t[0]). 212##################################################################### 213 214# The LHS of the first grammar rule is used as the start symbol 215# (in this case, 'specification'). Note that this rule enforces 216# that there will be exactly one namespace declaration, with 0 or more 217# global defs/decls before and after it. The defs & decls before 218# the namespace decl will be outside the namespace; those after 219# will be inside. The decoder function is always inside the namespace. 220def p_specification(t): 221 'specification : opt_defs_and_outputs name_decl opt_defs_and_outputs decode_block' 222 global_code = t[1] 223 isa_name = t[2] 224 namespace = isa_name + "Inst" 225 # wrap the decode block as a function definition 226 t[4].wrap_decode_block(''' 227StaticInstPtr<%(isa_name)s> 228%(isa_name)s::decodeInst(%(isa_name)s::MachInst machInst) 229{ 230 using namespace %(namespace)s; 231''' % vars(), '}') 232 # both the latter output blocks and the decode block are in the namespace 233 namespace_code = t[3] + t[4] 234 # pass it all back to the caller of yacc.parse() 235 t[0] = (isa_name, namespace, global_code, namespace_code) 236 237# ISA name declaration looks like "namespace <foo>;" 238def p_name_decl(t): 239 'name_decl : NAMESPACE ID SEMI' 240 t[0] = t[2] 241 242# 'opt_defs_and_outputs' is a possibly empty sequence of 243# def and/or output statements. 244def p_opt_defs_and_outputs_0(t): 245 'opt_defs_and_outputs : empty' 246 t[0] = GenCode() 247 248def p_opt_defs_and_outputs_1(t): 249 'opt_defs_and_outputs : defs_and_outputs' 250 t[0] = t[1] 251 252def p_defs_and_outputs_0(t): 253 'defs_and_outputs : def_or_output' 254 t[0] = t[1] 255 256def p_defs_and_outputs_1(t): 257 'defs_and_outputs : defs_and_outputs def_or_output' 258 t[0] = t[1] + t[2] 259 260# The list of possible definition/output statements. 261def p_def_or_output(t): 262 '''def_or_output : def_format 263 | def_bitfield 264 | def_template 265 | def_operand_types 266 | def_operands 267 | output_header 268 | output_decoder 269 | output_exec 270 | global_let''' 271 t[0] = t[1] 272 273# Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied 274# directly to the appropriate output section. 275 276 277# Protect any non-dict-substitution '%'s in a format string 278# (i.e. those not followed by '(') 279def protect_non_subst_percents(s): 280 return re.sub(r'%(?!\()', '%%', s) 281 282# Massage output block by substituting in template definitions and bit 283# operators. We handle '%'s embedded in the string that don't 284# indicate template substitutions (or CPU-specific symbols, which get 285# handled in GenCode) by doubling them first so that the format 286# operation will reduce them back to single '%'s. 287def process_output(s): 288 s = protect_non_subst_percents(s) 289 # protects cpu-specific symbols too 290 s = protect_cpu_symbols(s) 291 return substBitOps(s % templateMap) 292 293def p_output_header(t): 294 'output_header : OUTPUT HEADER CODELIT SEMI' 295 t[0] = GenCode(header_output = process_output(t[3])) 296 297def p_output_decoder(t): 298 'output_decoder : OUTPUT DECODER CODELIT SEMI' 299 t[0] = GenCode(decoder_output = process_output(t[3])) 300 301def p_output_exec(t): 302 'output_exec : OUTPUT EXEC CODELIT SEMI' 303 t[0] = GenCode(exec_output = process_output(t[3])) 304 305# global let blocks 'let {{...}}' (Python code blocks) are executed 306# directly when seen. Note that these execute in a special variable 307# context 'exportContext' to prevent the code from polluting this 308# script's namespace. 309def p_global_let(t): 310 'global_let : LET CODELIT SEMI' 311 updateExportContext() 312 try: 313 exec fixPythonIndentation(t[2]) in exportContext 314 except Exception, exc: 315 error(t.lineno(1), 316 'error: %s in global let block "%s".' % (exc, t[2])) 317 t[0] = GenCode() # contributes nothing to the output C++ file 318 319# Define the mapping from operand type extensions to C++ types and bit 320# widths (stored in operandTypeMap). 321def p_def_operand_types(t): 322 'def_operand_types : DEF OPERAND_TYPES CODELIT SEMI' 323 s = 'global operandTypeMap; operandTypeMap = {' + t[3] + '}' 324 try: 325 exec s 326 except Exception, exc: 327 error(t.lineno(1), 328 'error: %s in def operand_types block "%s".' % (exc, t[3])) 329 t[0] = GenCode() # contributes nothing to the output C++ file 330 331# Define the mapping from operand names to operand classes and other 332# traits. Stored in operandTraitsMap. 333def p_def_operands(t): 334 'def_operands : DEF OPERANDS CODELIT SEMI' 335 s = 'global operandTraitsMap; operandTraitsMap = {' + t[3] + '}' 336 try: 337 exec s 338 except Exception, exc: 339 error(t.lineno(1), 340 'error: %s in def operands block "%s".' % (exc, t[3])) 341 defineDerivedOperandVars() 342 t[0] = GenCode() # contributes nothing to the output C++ file 343 344# A bitfield definition looks like: 345# 'def [signed] bitfield <ID> [<first>:<last>]' 346# This generates a preprocessor macro in the output file. 347def p_def_bitfield_0(t): 348 'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI' 349 expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8]) 350 if (t[2] == 'signed'): 351 expr = 'sext<%d>(%s)' % (t[6] - t[8] + 1, expr) 352 hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr) 353 t[0] = GenCode(header_output = hash_define) 354 355# alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]' 356def p_def_bitfield_1(t): 357 'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI' 358 expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6]) 359 if (t[2] == 'signed'): 360 expr = 'sext<%d>(%s)' % (1, expr) 361 hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr) 362 t[0] = GenCode(header_output = hash_define) 363 364def p_opt_signed_0(t): 365 'opt_signed : SIGNED' 366 t[0] = t[1] 367 368def p_opt_signed_1(t): 369 'opt_signed : empty' 370 t[0] = '' 371 372# Global map variable to hold templates 373templateMap = {} 374 375def p_def_template(t): 376 'def_template : DEF TEMPLATE ID CODELIT SEMI' 377 templateMap[t[3]] = Template(t[4]) 378 t[0] = GenCode() 379 380# An instruction format definition looks like 381# "def format <fmt>(<params>) {{...}};" 382def p_def_format(t): 383 'def_format : DEF FORMAT ID LPAREN param_list RPAREN CODELIT SEMI' 384 (id, params, code) = (t[3], t[5], t[7]) 385 defFormat(id, params, code, t.lineno(1)) 386 t[0] = GenCode() 387 388# The formal parameter list for an instruction format is a possibly 389# empty list of comma-separated parameters. Positional (standard, 390# non-keyword) parameters must come first, followed by keyword 391# parameters, followed by a '*foo' parameter that gets excess 392# positional arguments (as in Python). Each of these three parameter 393# categories is optional. 394# 395# Note that we do not support the '**foo' parameter for collecting 396# otherwise undefined keyword args. Otherwise the parameter list is 397# (I believe) identical to what is supported in Python. 398# 399# The param list generates a tuple, where the first element is a list of 400# the positional params and the second element is a dict containing the 401# keyword params. 402def p_param_list_0(t): 403 'param_list : positional_param_list COMMA nonpositional_param_list' 404 t[0] = t[1] + t[3] 405 406def p_param_list_1(t): 407 '''param_list : positional_param_list 408 | nonpositional_param_list''' 409 t[0] = t[1] 410 411def p_positional_param_list_0(t): 412 'positional_param_list : empty' 413 t[0] = [] 414 415def p_positional_param_list_1(t): 416 'positional_param_list : ID' 417 t[0] = [t[1]] 418 419def p_positional_param_list_2(t): 420 'positional_param_list : positional_param_list COMMA ID' 421 t[0] = t[1] + [t[3]] 422 423def p_nonpositional_param_list_0(t): 424 'nonpositional_param_list : keyword_param_list COMMA excess_args_param' 425 t[0] = t[1] + t[3] 426 427def p_nonpositional_param_list_1(t): 428 '''nonpositional_param_list : keyword_param_list 429 | excess_args_param''' 430 t[0] = t[1] 431 432def p_keyword_param_list_0(t): 433 'keyword_param_list : keyword_param' 434 t[0] = [t[1]] 435 436def p_keyword_param_list_1(t): 437 'keyword_param_list : keyword_param_list COMMA keyword_param' 438 t[0] = t[1] + [t[3]] 439 440def p_keyword_param(t): 441 'keyword_param : ID EQUALS expr' 442 t[0] = t[1] + ' = ' + t[3].__repr__() 443 444def p_excess_args_param(t): 445 'excess_args_param : ASTERISK ID' 446 # Just concatenate them: '*ID'. Wrap in list to be consistent 447 # with positional_param_list and keyword_param_list. 448 t[0] = [t[1] + t[2]] 449 450# End of format definition-related rules. 451############## 452 453# 454# A decode block looks like: 455# decode <field1> [, <field2>]* [default <inst>] { ... } 456# 457def p_decode_block(t): 458 'decode_block : DECODE ID opt_default LBRACE decode_stmt_list RBRACE' 459 default_defaults = defaultStack.pop() 460 codeObj = t[5] 461 # use the "default defaults" only if there was no explicit 462 # default statement in decode_stmt_list 463 if not codeObj.has_decode_default: 464 codeObj += default_defaults 465 codeObj.wrap_decode_block('switch (%s) {\n' % t[2], '}\n') 466 t[0] = codeObj 467 468# The opt_default statement serves only to push the "default defaults" 469# onto defaultStack. This value will be used by nested decode blocks, 470# and used and popped off when the current decode_block is processed 471# (in p_decode_block() above). 472def p_opt_default_0(t): 473 'opt_default : empty' 474 # no default specified: reuse the one currently at the top of the stack 475 defaultStack.push(defaultStack.top()) 476 # no meaningful value returned 477 t[0] = None 478 479def p_opt_default_1(t): 480 'opt_default : DEFAULT inst' 481 # push the new default 482 codeObj = t[2] 483 codeObj.wrap_decode_block('\ndefault:\n', 'break;\n') 484 defaultStack.push(codeObj) 485 # no meaningful value returned 486 t[0] = None 487 488def p_decode_stmt_list_0(t): 489 'decode_stmt_list : decode_stmt' 490 t[0] = t[1] 491 492def p_decode_stmt_list_1(t): 493 'decode_stmt_list : decode_stmt decode_stmt_list' 494 if (t[1].has_decode_default and t[2].has_decode_default): 495 error(t.lineno(1), 'Two default cases in decode block') 496 t[0] = t[1] + t[2] 497 498# 499# Decode statement rules 500# 501# There are four types of statements allowed in a decode block: 502# 1. Format blocks 'format <foo> { ... }' 503# 2. Nested decode blocks 504# 3. Instruction definitions. 505# 4. C preprocessor directives. 506 507 508# Preprocessor directives found in a decode statement list are passed 509# through to the output, replicated to all of the output code 510# streams. This works well for ifdefs, so we can ifdef out both the 511# declarations and the decode cases generated by an instruction 512# definition. Handling them as part of the grammar makes it easy to 513# keep them in the right place with respect to the code generated by 514# the other statements. 515def p_decode_stmt_cpp(t): 516 'decode_stmt : CPPDIRECTIVE' 517 t[0] = GenCode(t[1], t[1], t[1], t[1]) 518 519# A format block 'format <foo> { ... }' sets the default instruction 520# format used to handle instruction definitions inside the block. 521# This format can be overridden by using an explicit format on the 522# instruction definition or with a nested format block. 523def p_decode_stmt_format(t): 524 'decode_stmt : FORMAT push_format_id LBRACE decode_stmt_list RBRACE' 525 # The format will be pushed on the stack when 'push_format_id' is 526 # processed (see below). Once the parser has recognized the full 527 # production (though the right brace), we're done with the format, 528 # so now we can pop it. 529 formatStack.pop() 530 t[0] = t[4] 531 532# This rule exists so we can set the current format (& push the stack) 533# when we recognize the format name part of the format block. 534def p_push_format_id(t): 535 'push_format_id : ID' 536 try: 537 formatStack.push(formatMap[t[1]]) 538 t[0] = ('', '// format %s' % t[1]) 539 except KeyError: 540 error(t.lineno(1), 'instruction format "%s" not defined.' % t[1]) 541 542# Nested decode block: if the value of the current field matches the 543# specified constant, do a nested decode on some other field. 544def p_decode_stmt_decode(t): 545 'decode_stmt : case_label COLON decode_block' 546 label = t[1] 547 codeObj = t[3] 548 # just wrap the decoding code from the block as a case in the 549 # outer switch statement. 550 codeObj.wrap_decode_block('\n%s:\n' % label) 551 codeObj.has_decode_default = (label == 'default') 552 t[0] = codeObj 553 554# Instruction definition (finally!). 555def p_decode_stmt_inst(t): 556 'decode_stmt : case_label COLON inst SEMI' 557 label = t[1] 558 codeObj = t[3] 559 codeObj.wrap_decode_block('\n%s:' % label, 'break;\n') 560 codeObj.has_decode_default = (label == 'default') 561 t[0] = codeObj 562 563# The case label is either a list of one or more constants or 'default' 564def p_case_label_0(t): 565 'case_label : intlit_list' 566 t[0] = ': '.join(map(lambda a: 'case %#x' % a, t[1])) 567 568def p_case_label_1(t): 569 'case_label : DEFAULT' 570 t[0] = 'default' 571 572# 573# The constant list for a decode case label must be non-empty, but may have 574# one or more comma-separated integer literals in it. 575# 576def p_intlit_list_0(t): 577 'intlit_list : INTLIT' 578 t[0] = [t[1]] 579 580def p_intlit_list_1(t): 581 'intlit_list : intlit_list COMMA INTLIT' 582 t[0] = t[1] 583 t[0].append(t[3]) 584 585# Define an instruction using the current instruction format (specified 586# by an enclosing format block). 587# "<mnemonic>(<args>)" 588def p_inst_0(t): 589 'inst : ID LPAREN arg_list RPAREN' 590 # Pass the ID and arg list to the current format class to deal with. 591 currentFormat = formatStack.top() 592 codeObj = currentFormat.defineInst(t[1], t[3], t.lineno(1)) 593 args = ','.join(map(str, t[3])) 594 args = re.sub('(?m)^', '//', args) 595 args = re.sub('^//', '', args) 596 comment = '\n// %s::%s(%s)\n' % (currentFormat.id, t[1], args) 597 codeObj.prepend_all(comment) 598 t[0] = codeObj 599 600# Define an instruction using an explicitly specified format: 601# "<fmt>::<mnemonic>(<args>)" 602def p_inst_1(t): 603 'inst : ID DBLCOLON ID LPAREN arg_list RPAREN' 604 try: 605 format = formatMap[t[1]] 606 except KeyError: 607 error(t.lineno(1), 'instruction format "%s" not defined.' % t[1]) 608 codeObj = format.defineInst(t[3], t[5], t.lineno(1)) 609 comment = '\n// %s::%s(%s)\n' % (t[1], t[3], t[5]) 610 codeObj.prepend_all(comment) 611 t[0] = codeObj 612 613# The arg list generates a tuple, where the first element is a list of 614# the positional args and the second element is a dict containing the 615# keyword args. 616def p_arg_list_0(t): 617 'arg_list : positional_arg_list COMMA keyword_arg_list' 618 t[0] = ( t[1], t[3] ) 619 620def p_arg_list_1(t): 621 'arg_list : positional_arg_list' 622 t[0] = ( t[1], {} ) 623 624def p_arg_list_2(t): 625 'arg_list : keyword_arg_list' 626 t[0] = ( [], t[1] ) 627 628def p_positional_arg_list_0(t): 629 'positional_arg_list : empty' 630 t[0] = [] 631 632def p_positional_arg_list_1(t): 633 'positional_arg_list : expr' 634 t[0] = [t[1]] 635 636def p_positional_arg_list_2(t): 637 'positional_arg_list : positional_arg_list COMMA expr' 638 t[0] = t[1] + [t[3]] 639 640def p_keyword_arg_list_0(t): 641 'keyword_arg_list : keyword_arg' 642 t[0] = t[1] 643 644def p_keyword_arg_list_1(t): 645 'keyword_arg_list : keyword_arg_list COMMA keyword_arg' 646 t[0] = t[1] 647 t[0].update(t[3]) 648 649def p_keyword_arg(t): 650 'keyword_arg : ID EQUALS expr' 651 t[0] = { t[1] : t[3] } 652 653# 654# Basic expressions. These constitute the argument values of 655# "function calls" (i.e. instruction definitions in the decode block) 656# and default values for formal parameters of format functions. 657# 658# Right now, these are either strings, integers, or (recursively) 659# lists of exprs (using Python square-bracket list syntax). Note that 660# bare identifiers are trated as string constants here (since there 661# isn't really a variable namespace to refer to). 662# 663def p_expr_0(t): 664 '''expr : ID 665 | INTLIT 666 | STRLIT 667 | CODELIT''' 668 t[0] = t[1] 669 670def p_expr_1(t): 671 '''expr : LBRACKET list_expr RBRACKET''' 672 t[0] = t[2] 673 674def p_list_expr_0(t): 675 'list_expr : expr' 676 t[0] = [t[1]] 677 678def p_list_expr_1(t): 679 'list_expr : list_expr COMMA expr' 680 t[0] = t[1] + [t[3]] 681 682def p_list_expr_2(t): 683 'list_expr : empty' 684 t[0] = [] 685 686# 687# Empty production... use in other rules for readability. 688# 689def p_empty(t): 690 'empty :' 691 pass 692 693# Parse error handler. Note that the argument here is the offending 694# *token*, not a grammar symbol (hence the need to use t.value) 695def p_error(t): 696 if t: 697 error(t.lineno, "syntax error at '%s'" % t.value) 698 else: 699 error_bt(0, "unknown syntax error") 700 701# END OF GRAMMAR RULES 702# 703# Now build the parser. 704yacc.yacc() 705 706 707##################################################################### 708# 709# Support Classes 710# 711##################################################################### 712 713################ 714# CpuModel class 715# 716# The CpuModel class encapsulates everything we need to know about a 717# particular CPU model. 718 719class CpuModel: 720 # List of all CPU models. Accessible as CpuModel.list. 721 list = [] 722 723 # Constructor. Automatically adds models to CpuModel.list. 724 def __init__(self, name, filename, includes, strings): 725 self.name = name 726 self.filename = filename # filename for output exec code 727 self.includes = includes # include files needed in exec file 728 # The 'strings' dict holds all the per-CPU symbols we can 729 # substitute into templates etc. 730 self.strings = strings 731 # Add self to list. 732 CpuModel.list.append(self) 733 734# Define CPU models. The following lines should contain the only 735# CPU-model-specific information in this file. Note that the ISA 736# description itself should have *no* CPU-model-specific content. 737CpuModel('SimpleCPU', 'simple_cpu_exec.cc', 738 '#include "cpu/simple/cpu.hh"', 739 { 'CPU_exec_context': 'SimpleCPU' }) 740CpuModel('FastCPU', 'fast_cpu_exec.cc', 741 '#include "cpu/fast/cpu.hh"', 742 { 'CPU_exec_context': 'FastCPU' }) 743CpuModel('FullCPU', 'full_cpu_exec.cc', 744 '#include "encumbered/cpu/full/dyn_inst.hh"', 745 { 'CPU_exec_context': 'DynInst' }) 746CpuModel('AlphaFullCPU', 'alpha_o3_exec.cc', 747 '#include "cpu/o3/alpha_dyn_inst.hh"', 748 { 'CPU_exec_context': 'AlphaDynInst<AlphaSimpleImpl>' }) 749 750# Expand template with CPU-specific references into a dictionary with 751# an entry for each CPU model name. The entry key is the model name 752# and the corresponding value is the template with the CPU-specific 753# refs substituted for that model. 754def expand_cpu_symbols_to_dict(template): 755 # Protect '%'s that don't go with CPU-specific terms 756 t = re.sub(r'%(?!\(CPU_)', '%%', template) 757 result = {} 758 for cpu in CpuModel.list: 759 result[cpu.name] = t % cpu.strings 760 return result 761 762# *If* the template has CPU-specific references, return a single 763# string containing a copy of the template for each CPU model with the 764# corresponding values substituted in. If the template has no 765# CPU-specific references, it is returned unmodified. 766def expand_cpu_symbols_to_string(template): 767 if template.find('%(CPU_') != -1: 768 return reduce(lambda x,y: x+y, 769 expand_cpu_symbols_to_dict(template).values()) 770 else: 771 return template 772 773# Protect CPU-specific references by doubling the corresponding '%'s 774# (in preparation for substituting a different set of references into 775# the template). 776def protect_cpu_symbols(template): 777 return re.sub(r'%(?=\(CPU_)', '%%', template) 778 779############### 780# GenCode class 781# 782# The GenCode class encapsulates generated code destined for various 783# output files. The header_output and decoder_output attributes are 784# strings containing code destined for decoder.hh and decoder.cc 785# respectively. The decode_block attribute contains code to be 786# incorporated in the decode function itself (that will also end up in 787# decoder.cc). The exec_output attribute is a dictionary with a key 788# for each CPU model name; the value associated with a particular key 789# is the string of code for that CPU model's exec.cc file. The 790# has_decode_default attribute is used in the decode block to allow 791# explicit default clauses to override default default clauses. 792 793class GenCode: 794 # Constructor. At this point we substitute out all CPU-specific 795 # symbols. For the exec output, these go into the per-model 796 # dictionary. For all other output types they get collapsed into 797 # a single string. 798 def __init__(self, 799 header_output = '', decoder_output = '', exec_output = '', 800 decode_block = '', has_decode_default = False): 801 self.header_output = expand_cpu_symbols_to_string(header_output) 802 self.decoder_output = expand_cpu_symbols_to_string(decoder_output) 803 if isinstance(exec_output, dict): 804 self.exec_output = exec_output 805 elif isinstance(exec_output, str): 806 # If the exec_output arg is a single string, we replicate 807 # it for each of the CPU models, substituting and 808 # %(CPU_foo)s params appropriately. 809 self.exec_output = expand_cpu_symbols_to_dict(exec_output) 810 self.decode_block = expand_cpu_symbols_to_string(decode_block) 811 self.has_decode_default = has_decode_default 812 813 # Override '+' operator: generate a new GenCode object that 814 # concatenates all the individual strings in the operands. 815 def __add__(self, other): 816 exec_output = {} 817 for cpu in CpuModel.list: 818 n = cpu.name 819 exec_output[n] = self.exec_output[n] + other.exec_output[n] 820 return GenCode(self.header_output + other.header_output, 821 self.decoder_output + other.decoder_output, 822 exec_output, 823 self.decode_block + other.decode_block, 824 self.has_decode_default or other.has_decode_default) 825 826 # Prepend a string (typically a comment) to all the strings. 827 def prepend_all(self, pre): 828 self.header_output = pre + self.header_output 829 self.decoder_output = pre + self.decoder_output 830 self.decode_block = pre + self.decode_block 831 for cpu in CpuModel.list: 832 self.exec_output[cpu.name] = pre + self.exec_output[cpu.name] 833 834 # Wrap the decode block in a pair of strings (e.g., 'case foo:' 835 # and 'break;'). Used to build the big nested switch statement. 836 def wrap_decode_block(self, pre, post = ''): 837 self.decode_block = pre + indent(self.decode_block) + post 838 839################ 840# Format object. 841# 842# A format object encapsulates an instruction format. It must provide 843# a defineInst() method that generates the code for an instruction 844# definition. 845 846class Format: 847 def __init__(self, id, params, code): 848 # constructor: just save away arguments 849 self.id = id 850 self.params = params 851 label = 'def format ' + id 852 self.user_code = compile(fixPythonIndentation(code), label, 'exec') 853 param_list = string.join(params, ", ") 854 f = '''def defInst(_code, _context, %s): 855 my_locals = vars().copy() 856 exec _code in _context, my_locals 857 return my_locals\n''' % param_list 858 c = compile(f, label + ' wrapper', 'exec') 859 exec c 860 self.func = defInst 861 862 def defineInst(self, name, args, lineno): 863 context = {} 864 updateExportContext() 865 context.update(exportContext) 866 context.update({ 'name': name, 'Name': string.capitalize(name) }) 867 try: 868 vars = self.func(self.user_code, context, *args[0], **args[1]) 869 except Exception, exc: 870 error(lineno, 'error defining "%s": %s.' % (name, exc)) 871 for k in vars.keys(): 872 if k not in ('header_output', 'decoder_output', 873 'exec_output', 'decode_block'): 874 del vars[k] 875 return GenCode(**vars) 876 877# Special null format to catch an implicit-format instruction 878# definition outside of any format block. 879class NoFormat: 880 def __init__(self): 881 self.defaultInst = '' 882 883 def defineInst(self, name, args, lineno): 884 error(lineno, 885 'instruction definition "%s" with no active format!' % name) 886 887# This dictionary maps format name strings to Format objects. 888formatMap = {} 889 890# Define a new format 891def defFormat(id, params, code, lineno): 892 # make sure we haven't already defined this one 893 if formatMap.get(id, None) != None: 894 error(lineno, 'format %s redefined.' % id) 895 # create new object and store in global map 896 formatMap[id] = Format(id, params, code) 897 898 899############## 900# Stack: a simple stack object. Used for both formats (formatStack) 901# and default cases (defaultStack). Simply wraps a list to give more 902# stack-like syntax and enable initialization with an argument list 903# (as opposed to an argument that's a list). 904 905class Stack(list): 906 def __init__(self, *items): 907 list.__init__(self, items) 908 909 def push(self, item): 910 self.append(item); 911 912 def top(self): 913 return self[-1] 914 915# The global format stack. 916formatStack = Stack(NoFormat()) 917 918# The global default case stack. 919defaultStack = Stack( None ) 920 921################### 922# Utility functions 923 924# 925# Indent every line in string 's' by two spaces 926# (except preprocessor directives). 927# Used to make nested code blocks look pretty. 928# 929def indent(s): 930 return re.sub(r'(?m)^(?!#)', ' ', s) 931 932# 933# Munge a somewhat arbitrarily formatted piece of Python code 934# (e.g. from a format 'let' block) into something whose indentation 935# will get by the Python parser. 936# 937# The two keys here are that Python will give a syntax error if 938# there's any whitespace at the beginning of the first line, and that 939# all lines at the same lexical nesting level must have identical 940# indentation. Unfortunately the way code literals work, an entire 941# let block tends to have some initial indentation. Rather than 942# trying to figure out what that is and strip it off, we prepend 'if 943# 1:' to make the let code the nested block inside the if (and have 944# the parser automatically deal with the indentation for us). 945# 946# We don't want to do this if (1) the code block is empty or (2) the 947# first line of the block doesn't have any whitespace at the front. 948 949def fixPythonIndentation(s): 950 # get rid of blank lines first 951 s = re.sub(r'(?m)^\s*\n', '', s); 952 if (s != '' and re.match(r'[ \t]', s[0])): 953 s = 'if 1:\n' + s 954 return s 955 956# Error handler. Just call exit. Output formatted to work under 957# Emacs compile-mode. This function should be called when errors due 958# to user input are detected (as opposed to parser bugs). 959def error(lineno, string): 960 spaces = "" 961 for (filename, line) in fileNameStack[0:-1]: 962 print spaces + "In file included from " + filename 963 spaces += " " 964 # Uncomment the following line to get a Python stack backtrace for 965 # these errors too. Can be handy when trying to debug the parser. 966 # traceback.print_exc() 967 sys.exit(spaces + "%s:%d: %s" % (fileNameStack[-1][0], lineno, string)) 968 969# Like error(), but include a Python stack backtrace (for processing 970# Python exceptions). This function should be called for errors that 971# appear to be bugs in the parser itself. 972def error_bt(lineno, string): 973 traceback.print_exc() 974 print >> sys.stderr, "%s:%d: %s" % (input_filename, lineno, string) 975 sys.exit(1) 976 977 978##################################################################### 979# 980# Bitfield Operator Support 981# 982##################################################################### 983 984bitOp1ArgRE = re.compile(r'<\s*(\w+)\s*:\s*>') 985 986bitOpWordRE = re.compile(r'(?<![\w\.])([\w\.]+)<\s*(\w+)\s*:\s*(\w+)\s*>') 987bitOpExprRE = re.compile(r'\)<\s*(\w+)\s*:\s*(\w+)\s*>') 988 989def substBitOps(code): 990 # first convert single-bit selectors to two-index form 991 # i.e., <n> --> <n:n> 992 code = bitOp1ArgRE.sub(r'<\1:\1>', code) 993 # simple case: selector applied to ID (name) 994 # i.e., foo<a:b> --> bits(foo, a, b) 995 code = bitOpWordRE.sub(r'bits(\1, \2, \3)', code) 996 # if selector is applied to expression (ending in ')'), 997 # we need to search backward for matching '(' 998 match = bitOpExprRE.search(code) 999 while match: 1000 exprEnd = match.start() 1001 here = exprEnd - 1 1002 nestLevel = 1 1003 while nestLevel > 0: 1004 if code[here] == '(': 1005 nestLevel -= 1 1006 elif code[here] == ')': 1007 nestLevel += 1 1008 here -= 1 1009 if here < 0: 1010 sys.exit("Didn't find '('!") 1011 exprStart = here+1 1012 newExpr = r'bits(%s, %s, %s)' % (code[exprStart:exprEnd+1], 1013 match.group(1), match.group(2)) 1014 code = code[:exprStart] + newExpr + code[match.end():] 1015 match = bitOpExprRE.search(code) 1016 return code 1017 1018 1019#################### 1020# Template objects. 1021# 1022# Template objects are format strings that allow substitution from 1023# the attribute spaces of other objects (e.g. InstObjParams instances). 1024 1025class Template: 1026 def __init__(self, t): 1027 self.template = t 1028 1029 def subst(self, d): 1030 # Start with the template namespace. Make a copy since we're 1031 # going to modify it. 1032 myDict = templateMap.copy() 1033 # if the argument is a dictionary, we just use it. 1034 if isinstance(d, dict): 1035 myDict.update(d) 1036 # if the argument is an object, we use its attribute map. 1037 elif hasattr(d, '__dict__'): 1038 myDict.update(d.__dict__) 1039 else: 1040 raise TypeError, "Template.subst() arg must be or have dictionary" 1041 # Protect non-Python-dict substitutions (e.g. if there's a printf 1042 # in the templated C++ code) 1043 template = protect_non_subst_percents(self.template) 1044 # CPU-model-specific substitutions are handled later (in GenCode). 1045 template = protect_cpu_symbols(template) 1046 return template % myDict 1047 1048 # Convert to string. This handles the case when a template with a 1049 # CPU-specific term gets interpolated into another template or into 1050 # an output block. 1051 def __str__(self): 1052 return expand_cpu_symbols_to_string(self.template) 1053 1054##################################################################### 1055# 1056# Code Parser 1057# 1058# The remaining code is the support for automatically extracting 1059# instruction characteristics from pseudocode. 1060# 1061##################################################################### 1062 1063# Force the argument to be a list. Useful for flags, where a caller 1064# can specify a singleton flag or a list of flags. Also usful for 1065# converting tuples to lists so they can be modified. 1066def makeList(arg): 1067 if isinstance(arg, list): 1068 return arg 1069 elif isinstance(arg, tuple): 1070 return list(arg) 1071 elif not arg: 1072 return [] 1073 else: 1074 return [ arg ] 1075 1076# generate operandSizeMap based on provided operandTypeMap: 1077# basically generate equiv. C++ type and make is_signed flag 1078def buildOperandSizeMap(): 1079 global operandSizeMap 1080 operandSizeMap = {} 1081 for ext in operandTypeMap.keys(): 1082 (desc, size) = operandTypeMap[ext] 1083 if desc == 'signed int': 1084 type = 'int%d_t' % size 1085 is_signed = 1 1086 elif desc == 'unsigned int': 1087 type = 'uint%d_t' % size 1088 is_signed = 0 1089 elif desc == 'float': 1090 is_signed = 1 # shouldn't really matter 1091 if size == 32: 1092 type = 'float' 1093 elif size == 64: 1094 type = 'double' 1095 if type == '': 1096 error(0, 'Unrecognized type description "%s" in operandTypeMap') 1097 operandSizeMap[ext] = (size, type, is_signed) 1098 1099# 1100# Base class for operand traits. An instance of this class (or actually 1101# a class derived from this one) encapsulates the traits of a particular 1102# operand type (e.g., "32-bit integer register"). 1103# 1104class OperandTraits: 1105 def __init__(self, dflt_ext, reg_spec, flags, sort_pri): 1106 # Force construction of operandSizeMap from operandTypeMap 1107 # if it hasn't happened yet 1108 if not globals().has_key('operandSizeMap'): 1109 buildOperandSizeMap() 1110 self.dflt_ext = dflt_ext 1111 (self.dflt_size, self.dflt_type, self.dflt_is_signed) = \ 1112 operandSizeMap[dflt_ext] 1113 self.reg_spec = reg_spec 1114 # Canonical flag structure is a triple of lists, where each list 1115 # indicates the set of flags implied by this operand always, when 1116 # used as a source, and when used as a dest, respectively. 1117 # For simplicity this can be initialized using a variety of fairly 1118 # obvious shortcuts; we convert these to canonical form here. 1119 if not flags: 1120 # no flags specified (e.g., 'None') 1121 self.flags = ( [], [], [] ) 1122 elif type(flags) == StringType: 1123 # a single flag: assumed to be unconditional 1124 self.flags = ( [ flags ], [], [] ) 1125 elif type(flags) == ListType: 1126 # a list of flags: also assumed to be unconditional 1127 self.flags = ( flags, [], [] ) 1128 elif type(flags) == TupleType: 1129 # it's a tuple: it should be a triple, 1130 # but each item could be a single string or a list 1131 (uncond_flags, src_flags, dest_flags) = flags 1132 self.flags = (makeList(uncond_flags), 1133 makeList(src_flags), makeList(dest_flags)) 1134 self.sort_pri = sort_pri 1135 1136 def isMem(self): 1137 return 0 1138 1139 def isReg(self): 1140 return 0 1141 1142 def isFloatReg(self): 1143 return 0 1144 1145 def isIntReg(self): 1146 return 0 1147 1148 def isControlReg(self): 1149 return 0 1150 1151 def getFlags(self, op_desc): 1152 # note the empty slice '[:]' gives us a copy of self.flags[0] 1153 # instead of a reference to it 1154 my_flags = self.flags[0][:] 1155 if op_desc.is_src: 1156 my_flags += self.flags[1] 1157 if op_desc.is_dest: 1158 my_flags += self.flags[2] 1159 return my_flags 1160 1161 def makeDecl(self, op_desc): 1162 (size, type, is_signed) = operandSizeMap[op_desc.eff_ext] 1163 # Note that initializations in the declarations are solely 1164 # to avoid 'uninitialized variable' errors from the compiler. 1165 return type + ' ' + op_desc.base_name + ' = 0;\n'; 1166 1167class IntRegOperandTraits(OperandTraits): 1168 def isReg(self): 1169 return 1 1170 1171 def isIntReg(self): 1172 return 1 1173 1174 def makeConstructor(self, op_desc): 1175 c = '' 1176 if op_desc.is_src: 1177 c += '\n\t_srcRegIdx[%d] = %s;' % \ 1178 (op_desc.src_reg_idx, self.reg_spec) 1179 if op_desc.is_dest: 1180 c += '\n\t_destRegIdx[%d] = %s;' % \ 1181 (op_desc.dest_reg_idx, self.reg_spec) 1182 return c 1183 1184 def makeRead(self, op_desc): 1185 (size, type, is_signed) = operandSizeMap[op_desc.eff_ext] 1186 if (type == 'float' or type == 'double'): 1187 error(0, 'Attempt to read integer register as FP') 1188 if (size == self.dflt_size): 1189 return '%s = xc->readIntReg(this, %d);\n' % \ 1190 (op_desc.base_name, op_desc.src_reg_idx) 1191 else: 1192 return '%s = bits(xc->readIntReg(this, %d), %d, 0);\n' % \ 1193 (op_desc.base_name, op_desc.src_reg_idx, size-1) 1194 1195 def makeWrite(self, op_desc): 1196 (size, type, is_signed) = operandSizeMap[op_desc.eff_ext] 1197 if (type == 'float' or type == 'double'): 1198 error(0, 'Attempt to write integer register as FP') 1199 if (size != self.dflt_size and is_signed): 1200 final_val = 'sext<%d>(%s)' % (size, op_desc.base_name) 1201 else: 1202 final_val = op_desc.base_name 1203 wb = ''' 1204 { 1205 %s final_val = %s; 1206 xc->setIntReg(this, %d, final_val);\n 1207 if (traceData) { traceData->setData(final_val); } 1208 }''' % (self.dflt_type, final_val, op_desc.dest_reg_idx) 1209 return wb 1210 1211class FloatRegOperandTraits(OperandTraits): 1212 def isReg(self): 1213 return 1 1214 1215 def isFloatReg(self): 1216 return 1 1217 1218 def makeConstructor(self, op_desc): 1219 c = '' 1220 if op_desc.is_src: 1221 c += '\n\t_srcRegIdx[%d] = %s + FP_Base_DepTag;' % \ 1222 (op_desc.src_reg_idx, self.reg_spec) 1223 if op_desc.is_dest: 1224 c += '\n\t_destRegIdx[%d] = %s + FP_Base_DepTag;' % \ 1225 (op_desc.dest_reg_idx, self.reg_spec) 1226 return c 1227 1228 def makeRead(self, op_desc): 1229 (size, type, is_signed) = operandSizeMap[op_desc.eff_ext] 1230 bit_select = 0 1231 if (type == 'float'): 1232 func = 'readFloatRegSingle' 1233 elif (type == 'double'): 1234 func = 'readFloatRegDouble' 1235 else: 1236 func = 'readFloatRegInt' 1237 if (size != self.dflt_size): 1238 bit_select = 1 1239 base = 'xc->%s(this, %d)' % \ 1240 (func, op_desc.src_reg_idx) 1241 if bit_select: 1242 return '%s = bits(%s, %d, 0);\n' % \ 1243 (op_desc.base_name, base, size-1) 1244 else: 1245 return '%s = %s;\n' % (op_desc.base_name, base) 1246 1247 def makeWrite(self, op_desc): 1248 (size, type, is_signed) = operandSizeMap[op_desc.eff_ext] 1249 final_val = op_desc.base_name 1250 if (type == 'float'): 1251 func = 'setFloatRegSingle' 1252 elif (type == 'double'): 1253 func = 'setFloatRegDouble' 1254 else: 1255 func = 'setFloatRegInt' 1256 type = 'uint%d_t' % self.dflt_size 1257 if (size != self.dflt_size and is_signed): 1258 final_val = 'sext<%d>(%s)' % (size, op_desc.base_name) 1259 wb = ''' 1260 { 1261 %s final_val = %s; 1262 xc->%s(this, %d, final_val);\n 1263 if (traceData) { traceData->setData(final_val); } 1264 }''' % (type, final_val, func, op_desc.dest_reg_idx) 1265 return wb 1266 1267class ControlRegOperandTraits(OperandTraits): 1268 def isReg(self): 1269 return 1 1270 1271 def isControlReg(self): 1272 return 1 1273 1274 def makeConstructor(self, op_desc): 1275 c = '' 1276 if op_desc.is_src: 1277 c += '\n\t_srcRegIdx[%d] = %s_DepTag;' % \ 1278 (op_desc.src_reg_idx, self.reg_spec) 1279 if op_desc.is_dest: 1280 c += '\n\t_destRegIdx[%d] = %s_DepTag;' % \ 1281 (op_desc.dest_reg_idx, self.reg_spec) 1282 return c 1283 1284 def makeRead(self, op_desc): 1285 (size, type, is_signed) = operandSizeMap[op_desc.eff_ext] 1286 bit_select = 0 1287 if (type == 'float' or type == 'double'): 1288 error(0, 'Attempt to read control register as FP') 1289 base = 'xc->read%s()' % self.reg_spec 1290 if size == self.dflt_size: 1291 return '%s = %s;\n' % (op_desc.base_name, base) 1292 else: 1293 return '%s = bits(%s, %d, 0);\n' % \ 1294 (op_desc.base_name, base, size-1) 1295 1296 def makeWrite(self, op_desc): 1297 (size, type, is_signed) = operandSizeMap[op_desc.eff_ext] 1298 if (type == 'float' or type == 'double'): 1299 error(0, 'Attempt to write control register as FP') 1300 wb = 'xc->set%s(%s);\n' % (self.reg_spec, op_desc.base_name) 1301 wb += 'if (traceData) { traceData->setData(%s); }' % \ 1302 op_desc.base_name 1303 return wb 1304 1305class MemOperandTraits(OperandTraits): 1306 def isMem(self): 1307 return 1 1308 1309 def makeConstructor(self, op_desc): 1310 return '' 1311 1312 def makeDecl(self, op_desc): 1313 (size, type, is_signed) = operandSizeMap[op_desc.eff_ext] 1314 # Note that initializations in the declarations are solely 1315 # to avoid 'uninitialized variable' errors from the compiler. 1316 # Declare memory data variable. 1317 c = '%s %s = 0;\n' % (type, op_desc.base_name) 1318 return c 1319 1320 def makeRead(self, op_desc): 1321 return '' 1322 1323 def makeWrite(self, op_desc): 1324 return '' 1325 1326 # Return the memory access size *in bits*, suitable for 1327 # forming a type via "uint%d_t". Divide by 8 if you want bytes. 1328 def makeAccSize(self, op_desc): 1329 (size, type, is_signed) = operandSizeMap[op_desc.eff_ext] 1330 return size 1331 1332class NPCOperandTraits(OperandTraits): 1333 def makeConstructor(self, op_desc): 1334 return '' 1335 1336 def makeRead(self, op_desc): 1337 return '%s = xc->readPC() + 4;\n' % op_desc.base_name 1338 1339 def makeWrite(self, op_desc): 1340 return 'xc->setNextPC(%s);\n' % op_desc.base_name 1341 1342 1343exportContextSymbols = ('IntRegOperandTraits', 'FloatRegOperandTraits', 1344 'ControlRegOperandTraits', 'MemOperandTraits', 1345 'NPCOperandTraits', 'InstObjParams', 'CodeBlock', 1346 'makeList', 're', 'string') 1347 1348exportContext = {} 1349 1350def updateExportContext(): 1351 exportContext.update(exportDict(*exportContextSymbols)) 1352 exportContext.update(templateMap) 1353 1354 1355def exportDict(*symNames): 1356 return dict([(s, eval(s)) for s in symNames]) 1357 1358 1359# 1360# Define operand variables that get derived from the basic declaration 1361# of ISA-specific operands in operandTraitsMap. This function must be 1362# called by the ISA description file explicitly after defining 1363# operandTraitsMap (in a 'let' block). 1364# 1365def defineDerivedOperandVars(): 1366 global operands 1367 operands = operandTraitsMap.keys() 1368 1369 operandsREString = (r''' 1370 (?<![\w\.]) # neg. lookbehind assertion: prevent partial matches 1371 ((%s)(?:\.(\w+))?) # match: operand with optional '.' then suffix 1372 (?![\w\.]) # neg. lookahead assertion: prevent partial matches 1373 ''' 1374 % string.join(operands, '|')) 1375 1376 global operandsRE 1377 operandsRE = re.compile(operandsREString, re.MULTILINE|re.VERBOSE) 1378 1379 # Same as operandsREString, but extension is mandatory, and only two 1380 # groups are returned (base and ext, not full name as above). 1381 # Used for subtituting '_' for '.' to make C++ identifiers. 1382 operandsWithExtREString = (r'(?<![\w\.])(%s)\.(\w+)(?![\w\.])' 1383 % string.join(operands, '|')) 1384 1385 global operandsWithExtRE 1386 operandsWithExtRE = re.compile(operandsWithExtREString, re.MULTILINE) 1387 1388 1389# 1390# Operand descriptor class. An instance of this class represents 1391# a specific operand for a code block. 1392# 1393class OperandDescriptor: 1394 def __init__(self, full_name, base_name, ext, is_src, is_dest): 1395 self.full_name = full_name 1396 self.base_name = base_name 1397 self.ext = ext 1398 self.is_src = is_src 1399 self.is_dest = is_dest 1400 self.traits = operandTraitsMap[base_name] 1401 # The 'effective extension' (eff_ext) is either the actual 1402 # extension, if one was explicitly provided, or the default. 1403 if ext: 1404 self.eff_ext = ext 1405 else: 1406 self.eff_ext = self.traits.dflt_ext 1407 1408 # note that mem_acc_size is undefined for non-mem operands... 1409 # template must be careful not to use it if it doesn't apply. 1410 if self.traits.isMem(): 1411 self.mem_acc_size = self.traits.makeAccSize(self) 1412 1413 # Finalize additional fields (primarily code fields). This step 1414 # is done separately since some of these fields may depend on the 1415 # register index enumeration that hasn't been performed yet at the 1416 # time of __init__(). 1417 def finalize(self): 1418 self.flags = self.traits.getFlags(self) 1419 self.constructor = self.traits.makeConstructor(self) 1420 self.op_decl = self.traits.makeDecl(self) 1421 1422 if self.is_src: 1423 self.op_rd = self.traits.makeRead(self) 1424 else: 1425 self.op_rd = '' 1426 1427 if self.is_dest: 1428 self.op_wb = self.traits.makeWrite(self) 1429 else: 1430 self.op_wb = '' 1431 1432 1433class OperandDescriptorList: 1434 1435 # Find all the operands in the given code block. Returns an operand 1436 # descriptor list (instance of class OperandDescriptorList). 1437 def __init__(self, code): 1438 self.items = [] 1439 self.bases = {} 1440 # delete comments so we don't match on reg specifiers inside 1441 code = commentRE.sub('', code) 1442 # search for operands 1443 next_pos = 0 1444 while 1: 1445 match = operandsRE.search(code, next_pos) 1446 if not match: 1447 # no more matches: we're done 1448 break 1449 op = match.groups() 1450 # regexp groups are operand full name, base, and extension 1451 (op_full, op_base, op_ext) = op 1452 # if the token following the operand is an assignment, this is 1453 # a destination (LHS), else it's a source (RHS) 1454 is_dest = (assignRE.match(code, match.end()) != None) 1455 is_src = not is_dest 1456 # see if we've already seen this one 1457 op_desc = self.find_base(op_base) 1458 if op_desc: 1459 if op_desc.ext != op_ext: 1460 error(0, 'Inconsistent extensions for operand %s' % \ 1461 op_base) 1462 op_desc.is_src = op_desc.is_src or is_src 1463 op_desc.is_dest = op_desc.is_dest or is_dest 1464 else: 1465 # new operand: create new descriptor 1466 op_desc = OperandDescriptor(op_full, op_base, op_ext, 1467 is_src, is_dest) 1468 self.append(op_desc) 1469 # start next search after end of current match 1470 next_pos = match.end() 1471 self.sort() 1472 # enumerate source & dest register operands... used in building 1473 # constructor later 1474 self.numSrcRegs = 0 1475 self.numDestRegs = 0 1476 self.numFPDestRegs = 0 1477 self.numIntDestRegs = 0 1478 self.memOperand = None 1479 for op_desc in self.items: 1480 if op_desc.traits.isReg(): 1481 if op_desc.is_src: 1482 op_desc.src_reg_idx = self.numSrcRegs 1483 self.numSrcRegs += 1 1484 if op_desc.is_dest: 1485 op_desc.dest_reg_idx = self.numDestRegs 1486 self.numDestRegs += 1 1487 if op_desc.traits.isFloatReg(): 1488 self.numFPDestRegs += 1 1489 elif op_desc.traits.isIntReg(): 1490 self.numIntDestRegs += 1 1491 elif op_desc.traits.isMem(): 1492 if self.memOperand: 1493 error(0, "Code block has more than one memory operand.") 1494 self.memOperand = op_desc 1495 # now make a final pass to finalize op_desc fields that may depend 1496 # on the register enumeration 1497 for op_desc in self.items: 1498 op_desc.finalize() 1499 1500 def __len__(self): 1501 return len(self.items) 1502 1503 def __getitem__(self, index): 1504 return self.items[index] 1505 1506 def append(self, op_desc): 1507 self.items.append(op_desc) 1508 self.bases[op_desc.base_name] = op_desc 1509 1510 def find_base(self, base_name): 1511 # like self.bases[base_name], but returns None if not found 1512 # (rather than raising exception) 1513 return self.bases.get(base_name) 1514 1515 # internal helper function for concat[Some]Attr{Strings|Lists} 1516 def __internalConcatAttrs(self, attr_name, filter, result): 1517 for op_desc in self.items: 1518 if filter(op_desc): 1519 result += getattr(op_desc, attr_name) 1520 return result 1521 1522 # return a single string that is the concatenation of the (string) 1523 # values of the specified attribute for all operands 1524 def concatAttrStrings(self, attr_name): 1525 return self.__internalConcatAttrs(attr_name, lambda x: 1, '') 1526 1527 # like concatAttrStrings, but only include the values for the operands 1528 # for which the provided filter function returns true 1529 def concatSomeAttrStrings(self, filter, attr_name): 1530 return self.__internalConcatAttrs(attr_name, filter, '') 1531 1532 # return a single list that is the concatenation of the (list) 1533 # values of the specified attribute for all operands 1534 def concatAttrLists(self, attr_name): 1535 return self.__internalConcatAttrs(attr_name, lambda x: 1, []) 1536 1537 # like concatAttrLists, but only include the values for the operands 1538 # for which the provided filter function returns true 1539 def concatSomeAttrLists(self, filter, attr_name): 1540 return self.__internalConcatAttrs(attr_name, filter, []) 1541 1542 def sort(self): 1543 self.items.sort(lambda a, b: a.traits.sort_pri - b.traits.sort_pri) 1544 1545# Regular expression object to match C++ comments 1546# (used in findOperands()) 1547commentRE = re.compile(r'//.*\n') 1548 1549# Regular expression object to match assignment statements 1550# (used in findOperands()) 1551assignRE = re.compile(r'\s*=(?!=)', re.MULTILINE) 1552 1553# Munge operand names in code string to make legal C++ variable names. 1554# This means getting rid of the type extension if any. 1555# (Will match base_name attribute of OperandDescriptor object.) 1556def substMungedOpNames(code): 1557 return operandsWithExtRE.sub(r'\1', code) 1558 1559def joinLists(t): 1560 return map(string.join, t) 1561 1562def makeFlagConstructor(flag_list): 1563 if len(flag_list) == 0: 1564 return '' 1565 # filter out repeated flags 1566 flag_list.sort() 1567 i = 1 1568 while i < len(flag_list): 1569 if flag_list[i] == flag_list[i-1]: 1570 del flag_list[i] 1571 else: 1572 i += 1 1573 pre = '\n\tflags[' 1574 post = '] = true;' 1575 code = pre + string.join(flag_list, post + pre) + post 1576 return code 1577 1578class CodeBlock: 1579 def __init__(self, code): 1580 self.orig_code = code 1581 self.operands = OperandDescriptorList(code) 1582 self.code = substMungedOpNames(substBitOps(code)) 1583 self.constructor = self.operands.concatAttrStrings('constructor') 1584 self.constructor += \ 1585 '\n\t_numSrcRegs = %d;' % self.operands.numSrcRegs 1586 self.constructor += \ 1587 '\n\t_numDestRegs = %d;' % self.operands.numDestRegs 1588 self.constructor += \ 1589 '\n\t_numFPDestRegs = %d;' % self.operands.numFPDestRegs 1590 self.constructor += \ 1591 '\n\t_numIntDestRegs = %d;' % self.operands.numIntDestRegs 1592 1593 self.op_decl = self.operands.concatAttrStrings('op_decl') 1594 1595 self.op_rd = self.operands.concatAttrStrings('op_rd') 1596 self.op_wb = self.operands.concatAttrStrings('op_wb') 1597 1598 self.flags = self.operands.concatAttrLists('flags') 1599 1600 if self.operands.memOperand: 1601 self.mem_acc_size = self.operands.memOperand.mem_acc_size 1602 1603 # Make a basic guess on the operand class (function unit type). 1604 # These are good enough for most cases, and will be overridden 1605 # later otherwise. 1606 if 'IsStore' in self.flags: 1607 self.op_class = 'MemWriteOp' 1608 elif 'IsLoad' in self.flags or 'IsPrefetch' in self.flags: 1609 self.op_class = 'MemReadOp' 1610 elif 'IsFloating' in self.flags: 1611 self.op_class = 'FloatAddOp' 1612 else: 1613 self.op_class = 'IntAluOp' 1614 1615# Assume all instruction flags are of the form 'IsFoo' 1616instFlagRE = re.compile(r'Is.*') 1617 1618# OpClass constants end in 'Op' except No_OpClass 1619opClassRE = re.compile(r'.*Op|No_OpClass') 1620 1621class InstObjParams: 1622 def __init__(self, mnem, class_name, base_class = '', 1623 code_block = None, opt_args = []): 1624 self.mnemonic = mnem 1625 self.class_name = class_name 1626 self.base_class = base_class 1627 if code_block: 1628 for code_attr in code_block.__dict__.keys(): 1629 setattr(self, code_attr, getattr(code_block, code_attr)) 1630 else: 1631 self.constructor = '' 1632 self.flags = [] 1633 # Optional arguments are assumed to be either StaticInst flags 1634 # or an OpClass value. To avoid having to import a complete 1635 # list of these values to match against, we do it ad-hoc 1636 # with regexps. 1637 for oa in opt_args: 1638 if instFlagRE.match(oa): 1639 self.flags.append(oa) 1640 elif opClassRE.match(oa): 1641 self.op_class = oa 1642 else: 1643 error(0, 'InstObjParams: optional arg "%s" not recognized ' 1644 'as StaticInst::Flag or OpClass.' % oa) 1645 1646 # add flag initialization to contructor here to include 1647 # any flags added via opt_args 1648 self.constructor += makeFlagConstructor(self.flags) 1649 1650 # if 'IsFloating' is set, add call to the FP enable check 1651 # function (which should be provided by isa_desc via a declare) 1652 if 'IsFloating' in self.flags: 1653 self.fp_enable_check = 'fault = checkFpEnableFault(xc);' 1654 else: 1655 self.fp_enable_check = '' 1656 1657####################### 1658# 1659# Output file template 1660# 1661 1662file_template = ''' 1663/* 1664 * DO NOT EDIT THIS FILE!!! 1665 * 1666 * It was automatically generated from the ISA description in %(filename)s 1667 */ 1668 1669%(includes)s 1670 1671%(global_output)s 1672 1673namespace %(namespace)s { 1674 1675%(namespace_output)s 1676 1677} // namespace %(namespace)s 1678''' 1679 1680 1681# Update the output file only if the new contents are different from 1682# the current contents. Minimizes the files that need to be rebuilt 1683# after minor changes. 1684def update_if_needed(file, contents): 1685 update = False 1686 if os.access(file, os.R_OK): 1687 f = open(file, 'r') 1688 old_contents = f.read() 1689 f.close() 1690 if contents != old_contents: 1691 print 'Updating', file 1692 os.remove(file) # in case it's write-protected 1693 update = True 1694 else: 1695 print 'File', file, 'is unchanged' 1696 else: 1697 print 'Generating', file 1698 update = True 1699 if update: 1700 f = open(file, 'w') 1701 f.write(contents) 1702 f.close() 1703 1704# This regular expression matches include directives 1705includeRE = re.compile(r'^\s*##include\s+"(?P<filename>[\w/.-]*)".*$', 1706 re.MULTILINE) 1707 1708def preprocess_isa_desc(isa_desc): 1709 # Find any includes and include them 1710 pos = 0 1711 while 1: 1712 m = includeRE.search(isa_desc, pos) 1713 if not m: 1714 break 1715 filename = m.group('filename') 1716 print 'Including file "%s"' % filename 1717 try: 1718 isa_desc = isa_desc[:m.start()] + \ 1719 '##newfile "' + filename + '"\n' + \ 1720 open(filename).read() + \ 1721 '##endfile\n' + \ 1722 isa_desc[m.end():] 1723 except IOError: 1724 error(0, 'Error including file "%s"' % (filename)) 1725 pos = m.start() 1726 return isa_desc 1727 1728# 1729# Read in and parse the ISA description. 1730# 1731def parse_isa_desc(isa_desc_file, output_dir, include_path): 1732 # set a global var for the input filename... used in error messages 1733 global input_filename 1734 input_filename = isa_desc_file 1735 global fileNameStack 1736 fileNameStack = [(input_filename, 1)] 1737 1738 # Suck the ISA description file in. 1739 input = open(isa_desc_file) 1740 isa_desc = input.read() 1741 input.close() 1742 1743 # Perform Preprocessing 1744 isa_desc = preprocess_isa_desc(isa_desc) 1745 1746 # Parse it. 1747 (isa_name, namespace, global_code, namespace_code) = yacc.parse(isa_desc) 1748 1749 # grab the last three path components of isa_desc_file to put in 1750 # the output 1751 filename = '/'.join(isa_desc_file.split('/')[-3:]) 1752 1753 # generate decoder.hh 1754 includes = '#include "base/bitfield.hh" // for bitfield support' 1755 global_output = global_code.header_output 1756 namespace_output = namespace_code.header_output 1757 update_if_needed(output_dir + '/decoder.hh', file_template % vars()) 1758 1759 # generate decoder.cc 1760 includes = '#include "%s/decoder.hh"' % include_path 1761 global_output = global_code.decoder_output 1762 namespace_output = namespace_code.decoder_output 1763 namespace_output += namespace_code.decode_block 1764 update_if_needed(output_dir + '/decoder.cc', file_template % vars()) 1765 1766 # generate per-cpu exec files 1767 for cpu in CpuModel.list: 1768 includes = '#include "%s/decoder.hh"\n' % include_path 1769 includes += cpu.includes 1770 global_output = global_code.exec_output[cpu.name] 1771 namespace_output = namespace_code.exec_output[cpu.name] 1772 update_if_needed(output_dir + '/' + cpu.filename, 1773 file_template % vars()) 1774 1775# Called as script: get args from command line. 1776if __name__ == '__main__': 1777 parse_isa_desc(sys.argv[1], sys.argv[2], sys.argv[3]) 1778