micro_asm.py revision 6655:380a32b43336
1# Copyright (c) 2003-2005 The Regents of The University of Michigan 2# All rights reserved. 3# 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: redistributions of source code must retain the above copyright 7# notice, this list of conditions and the following disclaimer; 8# redistributions in binary form must reproduce the above copyright 9# notice, this list of conditions and the following disclaimer in the 10# documentation and/or other materials provided with the distribution; 11# neither the name of the copyright holders nor the names of its 12# contributors may be used to endorse or promote products derived from 13# this software without specific prior written permission. 14# 15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26# 27# Authors: Gabe Black 28 29import os 30import sys 31import re 32import string 33import traceback 34# get type names 35from types import * 36 37from ply import lex 38from ply import yacc 39 40########################################################################## 41# 42# Base classes for use outside of the assembler 43# 44########################################################################## 45 46class Micro_Container(object): 47 def __init__(self, name): 48 self.microops = [] 49 self.name = name 50 self.directives = {} 51 self.micro_classes = {} 52 self.labels = {} 53 54 def add_microop(self, mnemonic, microop): 55 self.microops.append(microop) 56 57 def __str__(self): 58 string = "%s:\n" % self.name 59 for microop in self.microops: 60 string += " %s\n" % microop 61 return string 62 63class Combinational_Macroop(Micro_Container): 64 pass 65 66class Rom_Macroop(object): 67 def __init__(self, name, target): 68 self.name = name 69 self.target = target 70 71 def __str__(self): 72 return "%s: %s\n" % (self.name, self.target) 73 74class Rom(Micro_Container): 75 def __init__(self, name): 76 super(Rom, self).__init__(name) 77 self.externs = {} 78 79########################################################################## 80# 81# Support classes 82# 83########################################################################## 84 85class Label(object): 86 def __init__(self): 87 self.extern = False 88 self.name = "" 89 90class Block(object): 91 def __init__(self): 92 self.statements = [] 93 94class Statement(object): 95 def __init__(self): 96 self.is_microop = False 97 self.is_directive = False 98 self.params = "" 99 100class Microop(Statement): 101 def __init__(self): 102 super(Microop, self).__init__() 103 self.mnemonic = "" 104 self.labels = [] 105 self.is_microop = True 106 107class Directive(Statement): 108 def __init__(self): 109 super(Directive, self).__init__() 110 self.name = "" 111 self.is_directive = True 112 113########################################################################## 114# 115# Functions that handle common tasks 116# 117########################################################################## 118 119def print_error(message): 120 print 121 print "*** %s" % message 122 print 123 124def handle_statement(parser, container, statement): 125 if statement.is_microop: 126 if statement.mnemonic not in parser.microops.keys(): 127 raise Exception, "Unrecognized mnemonic: %s" % statement.mnemonic 128 parser.symbols["__microopClassFromInsideTheAssembler"] = \ 129 parser.microops[statement.mnemonic] 130 try: 131 microop = eval('__microopClassFromInsideTheAssembler(%s)' % 132 statement.params, {}, parser.symbols) 133 except: 134 print_error("Error creating microop object with mnemonic %s." % \ 135 statement.mnemonic) 136 raise 137 try: 138 for label in statement.labels: 139 container.labels[label.text] = microop 140 if label.is_extern: 141 container.externs[label.text] = microop 142 container.add_microop(statement.mnemonic, microop) 143 except: 144 print_error("Error adding microop.") 145 raise 146 elif statement.is_directive: 147 if statement.name not in container.directives.keys(): 148 raise Exception, "Unrecognized directive: %s" % statement.name 149 parser.symbols["__directiveFunctionFromInsideTheAssembler"] = \ 150 container.directives[statement.name] 151 try: 152 eval('__directiveFunctionFromInsideTheAssembler(%s)' % 153 statement.params, {}, parser.symbols) 154 except: 155 print_error("Error executing directive.") 156 print container.directives 157 raise 158 else: 159 raise Exception, "Didn't recognize the type of statement", statement 160 161########################################################################## 162# 163# Lexer specification 164# 165########################################################################## 166 167# Error handler. Just call exit. Output formatted to work under 168# Emacs compile-mode. Optional 'print_traceback' arg, if set to True, 169# prints a Python stack backtrace too (can be handy when trying to 170# debug the parser itself). 171def error(lineno, string, print_traceback = False): 172 # Print a Python stack backtrace if requested. 173 if (print_traceback): 174 traceback.print_exc() 175 if lineno != 0: 176 line_str = "%d:" % lineno 177 else: 178 line_str = "" 179 sys.exit("%s %s" % (line_str, string)) 180 181reserved = ('DEF', 'MACROOP', 'ROM', 'EXTERN') 182 183tokens = reserved + ( 184 # identifier 185 'ID', 186 # arguments for microops and directives 187 'PARAMS', 188 189 'LPAREN', 'RPAREN', 190 'LBRACE', 'RBRACE', 191 'COLON', 'SEMI', 'DOT', 192 'NEWLINE' 193 ) 194 195# New lines are ignored at the top level, but they end statements in the 196# assembler 197states = ( 198 ('asm', 'exclusive'), 199 ('params', 'exclusive'), 200) 201 202reserved_map = { } 203for r in reserved: 204 reserved_map[r.lower()] = r 205 206# Ignore comments 207def t_ANY_COMMENT(t): 208 r'\#[^\n]*(?=\n)' 209 210def t_ANY_MULTILINECOMMENT(t): 211 r'/\*([^/]|((?<!\*)/))*\*/' 212 213# A colon marks the end of a label. It should follow an ID which will 214# put the lexer in the "params" state. Seeing the colon will put it back 215# in the "asm" state since it knows it saw a label and not a mnemonic. 216def t_params_COLON(t): 217 r':' 218 t.lexer.begin('asm') 219 return t 220 221# Parameters are a string of text which don't contain an unescaped statement 222# statement terminator, ie a newline or semi colon. 223def t_params_PARAMS(t): 224 r'([^\n;\\]|(\\[\n;\\]))+' 225 t.lineno += t.value.count('\n') 226 unescapeParamsRE = re.compile(r'(\\[\n;\\])') 227 def unescapeParams(mo): 228 val = mo.group(0) 229 return val[1] 230 t.value = unescapeParamsRE.sub(unescapeParams, t.value) 231 t.lexer.begin('asm') 232 return t 233 234# An "ID" in the micro assembler is either a label, directive, or mnemonic 235# If it's either a directive or a mnemonic, it will be optionally followed by 236# parameters. If it's a label, the following colon will make the lexer stop 237# looking for parameters. 238def t_asm_ID(t): 239 r'[A-Za-z_]\w*' 240 t.type = reserved_map.get(t.value, 'ID') 241 # If the ID is really "extern", we shouldn't start looking for parameters 242 # yet. The real ID, the label itself, is coming up. 243 if t.type != 'EXTERN': 244 t.lexer.begin('params') 245 return t 246 247# If there is a label and you're -not- in the assembler (which would be caught 248# above), don't start looking for parameters. 249def t_ANY_ID(t): 250 r'[A-Za-z_]\w*' 251 t.type = reserved_map.get(t.value, 'ID') 252 return t 253 254# Braces enter and exit micro assembly 255def t_INITIAL_LBRACE(t): 256 r'\{' 257 t.lexer.begin('asm') 258 return t 259 260def t_asm_RBRACE(t): 261 r'\}' 262 t.lexer.begin('INITIAL') 263 return t 264 265# At the top level, keep track of newlines only for line counting. 266def t_INITIAL_NEWLINE(t): 267 r'\n+' 268 t.lineno += t.value.count('\n') 269 270# In the micro assembler, do line counting but also return a token. The 271# token is needed by the parser to detect the end of a statement. 272def t_asm_NEWLINE(t): 273 r'\n+' 274 t.lineno += t.value.count('\n') 275 return t 276 277# A newline or semi colon when looking for params signals that the statement 278# is over and the lexer should go back to looking for regular assembly. 279def t_params_NEWLINE(t): 280 r'\n+' 281 t.lineno += t.value.count('\n') 282 t.lexer.begin('asm') 283 return t 284 285def t_params_SEMI(t): 286 r';' 287 t.lexer.begin('asm') 288 return t 289 290# Basic regular expressions to pick out simple tokens 291t_ANY_LPAREN = r'\(' 292t_ANY_RPAREN = r'\)' 293t_ANY_SEMI = r';' 294t_ANY_DOT = r'\.' 295 296t_ANY_ignore = ' \t\x0c' 297 298def t_ANY_error(t): 299 error(t.lineno, "illegal character '%s'" % t.value[0]) 300 t.skip(1) 301 302########################################################################## 303# 304# Parser specification 305# 306########################################################################## 307 308# Start symbol for a file which may have more than one macroop or rom 309# specification. 310def p_file(t): 311 'file : opt_rom_or_macros' 312 313def p_opt_rom_or_macros_0(t): 314 'opt_rom_or_macros : ' 315 316def p_opt_rom_or_macros_1(t): 317 'opt_rom_or_macros : rom_or_macros' 318 319def p_rom_or_macros_0(t): 320 'rom_or_macros : rom_or_macro' 321 322def p_rom_or_macros_1(t): 323 'rom_or_macros : rom_or_macros rom_or_macro' 324 325def p_rom_or_macro_0(t): 326 '''rom_or_macro : rom_block 327 | macroop_def''' 328 329# Defines a section of microcode that should go in the current ROM 330def p_rom_block(t): 331 'rom_block : DEF ROM block SEMI' 332 if not t.parser.rom: 333 print_error("Rom block found, but no Rom object specified.") 334 raise TypeError, "Rom block found, but no Rom object was specified." 335 for statement in t[3].statements: 336 handle_statement(t.parser, t.parser.rom, statement) 337 t[0] = t.parser.rom 338 339# Defines a macroop that jumps to an external label in the ROM 340def p_macroop_def_0(t): 341 'macroop_def : DEF MACROOP ID LPAREN ID RPAREN SEMI' 342 if not t.parser.rom_macroop_type: 343 print_error("ROM based macroop found, but no ROM macroop class was specified.") 344 raise TypeError, "ROM based macroop found, but no ROM macroop class was specified." 345 macroop = t.parser.rom_macroop_type(t[3], t[5]) 346 t.parser.macroops[t[3]] = macroop 347 348 349# Defines a macroop that is combinationally generated 350def p_macroop_def_1(t): 351 'macroop_def : DEF MACROOP ID block SEMI' 352 try: 353 curop = t.parser.macro_type(t[3]) 354 except TypeError: 355 print_error("Error creating macroop object.") 356 raise 357 for statement in t[4].statements: 358 handle_statement(t.parser, curop, statement) 359 t.parser.macroops[t[3]] = curop 360 361# A block of statements 362def p_block(t): 363 'block : LBRACE statements RBRACE' 364 block = Block() 365 block.statements = t[2] 366 t[0] = block 367 368def p_statements_0(t): 369 'statements : statement' 370 if t[1]: 371 t[0] = [t[1]] 372 else: 373 t[0] = [] 374 375def p_statements_1(t): 376 'statements : statements statement' 377 if t[2]: 378 t[1].append(t[2]) 379 t[0] = t[1] 380 381def p_statement(t): 382 'statement : content_of_statement end_of_statement' 383 t[0] = t[1] 384 385# A statement can be a microop or an assembler directive 386def p_content_of_statement_0(t): 387 '''content_of_statement : microop 388 | directive''' 389 t[0] = t[1] 390 391# Ignore empty statements 392def p_content_of_statement_1(t): 393 'content_of_statement : ' 394 pass 395 396# Statements are ended by newlines or a semi colon 397def p_end_of_statement(t): 398 '''end_of_statement : NEWLINE 399 | SEMI''' 400 pass 401 402# Different flavors of microop to avoid shift/reduce errors 403def p_microop_0(t): 404 'microop : labels ID' 405 microop = Microop() 406 microop.labels = t[1] 407 microop.mnemonic = t[2] 408 t[0] = microop 409 410def p_microop_1(t): 411 'microop : ID' 412 microop = Microop() 413 microop.mnemonic = t[1] 414 t[0] = microop 415 416def p_microop_2(t): 417 'microop : labels ID PARAMS' 418 microop = Microop() 419 microop.labels = t[1] 420 microop.mnemonic = t[2] 421 microop.params = t[3] 422 t[0] = microop 423 424def p_microop_3(t): 425 'microop : ID PARAMS' 426 microop = Microop() 427 microop.mnemonic = t[1] 428 microop.params = t[2] 429 t[0] = microop 430 431# Labels in the microcode 432def p_labels_0(t): 433 'labels : label' 434 t[0] = [t[1]] 435 436def p_labels_1(t): 437 'labels : labels label' 438 t[1].append(t[2]) 439 t[0] = t[1] 440 441# labels on lines by themselves are attached to the following instruction. 442def p_labels_2(t): 443 'labels : labels NEWLINE' 444 t[0] = t[1] 445 446def p_label_0(t): 447 'label : ID COLON' 448 label = Label() 449 label.is_extern = False 450 label.text = t[1] 451 t[0] = label 452 453def p_label_1(t): 454 'label : EXTERN ID COLON' 455 label = Label() 456 label.is_extern = True 457 label.text = t[2] 458 t[0] = label 459 460# Directives for the macroop 461def p_directive_0(t): 462 'directive : DOT ID' 463 directive = Directive() 464 directive.name = t[2] 465 t[0] = directive 466 467def p_directive_1(t): 468 'directive : DOT ID PARAMS' 469 directive = Directive() 470 directive.name = t[2] 471 directive.params = t[3] 472 t[0] = directive 473 474# Parse error handler. Note that the argument here is the offending 475# *token*, not a grammar symbol (hence the need to use t.value) 476def p_error(t): 477 if t: 478 error(t.lineno, "syntax error at '%s'" % t.value) 479 else: 480 error(0, "unknown syntax error", True) 481 482class MicroAssembler(object): 483 484 def __init__(self, macro_type, microops, 485 rom = None, rom_macroop_type = None): 486 self.lexer = lex.lex() 487 self.parser = yacc.yacc() 488 self.parser.macro_type = macro_type 489 self.parser.macroops = {} 490 self.parser.microops = microops 491 self.parser.rom = rom 492 self.parser.rom_macroop_type = rom_macroop_type 493 self.parser.symbols = {} 494 self.symbols = self.parser.symbols 495 496 def assemble(self, asm): 497 self.parser.parse(asm, lexer=self.lexer) 498 macroops = self.parser.macroops 499 self.parser.macroops = {} 500 return macroops 501