1# Copyright (c) 2003-2005 The Regents of The University of Michigan 2# All rights reserved. 3# 4# Redistribution and use in source and binary forms, with or without 5# modification, are permitted provided that the following conditions are 6# met: redistributions of source code must retain the above copyright 7# notice, this list of conditions and the following disclaimer; 8# redistributions in binary form must reproduce the above copyright 9# notice, this list of conditions and the following disclaimer in the 10# documentation and/or other materials provided with the distribution; 11# neither the name of the copyright holders nor the names of its 12# contributors may be used to endorse or promote products derived from 13# this software without specific prior written permission. 14# 15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26# 27# Authors: Gabe Black 28 29from __future__ import print_function 30 31import os 32import sys 33import re 34import string 35import traceback 36# get type names 37from types import * 38 39from ply import lex 40from ply import yacc 41 42########################################################################## 43# 44# Base classes for use outside of the assembler 45# 46########################################################################## 47 48class Micro_Container(object): 49 def __init__(self, name): 50 self.microops = [] 51 self.name = name 52 self.directives = {} 53 self.micro_classes = {} 54 self.labels = {} 55 56 def add_microop(self, mnemonic, microop): 57 self.microops.append(microop) 58 59 def __str__(self): 60 string = "%s:\n" % self.name 61 for microop in self.microops: 62 string += " %s\n" % microop 63 return string 64 65class Combinational_Macroop(Micro_Container): 66 pass 67 68class Rom_Macroop(object): 69 def __init__(self, name, target): 70 self.name = name 71 self.target = target 72 73 def __str__(self): 74 return "%s: %s\n" % (self.name, self.target) 75 76class Rom(Micro_Container): 77 def __init__(self, name): 78 super(Rom, self).__init__(name) 79 self.externs = {} 80 81########################################################################## 82# 83# Support classes 84# 85########################################################################## 86 87class Label(object): 88 def __init__(self): 89 self.extern = False 90 self.name = "" 91 92class Block(object): 93 def __init__(self): 94 self.statements = [] 95 96class Statement(object): 97 def __init__(self): 98 self.is_microop = False 99 self.is_directive = False 100 self.params = "" 101 102class Microop(Statement): 103 def __init__(self): 104 super(Microop, self).__init__() 105 self.mnemonic = "" 106 self.labels = [] 107 self.is_microop = True 108 109class Directive(Statement): 110 def __init__(self): 111 super(Directive, self).__init__() 112 self.name = "" 113 self.is_directive = True 114 115########################################################################## 116# 117# Functions that handle common tasks 118# 119########################################################################## 120 121def print_error(message): 122 print() 123 print("*** %s" % message) 124 print() 125 126def handle_statement(parser, container, statement): 127 if statement.is_microop: 128 if statement.mnemonic not in parser.microops.keys(): 129 raise Exception, "Unrecognized mnemonic: %s" % statement.mnemonic 130 parser.symbols["__microopClassFromInsideTheAssembler"] = \ 131 parser.microops[statement.mnemonic] 132 try: 133 microop = eval('__microopClassFromInsideTheAssembler(%s)' % 134 statement.params, {}, parser.symbols) 135 except: 136 print_error("Error creating microop object with mnemonic %s." % \ 137 statement.mnemonic) 138 raise 139 try: 140 for label in statement.labels: 141 container.labels[label.text] = microop 142 if label.is_extern: 143 container.externs[label.text] = microop 144 container.add_microop(statement.mnemonic, microop) 145 except: 146 print_error("Error adding microop.") 147 raise 148 elif statement.is_directive: 149 if statement.name not in container.directives.keys(): 150 raise Exception, "Unrecognized directive: %s" % statement.name 151 parser.symbols["__directiveFunctionFromInsideTheAssembler"] = \ 152 container.directives[statement.name] 153 try: 154 eval('__directiveFunctionFromInsideTheAssembler(%s)' % 155 statement.params, {}, parser.symbols) 156 except: 157 print_error("Error executing directive.") 158 print(container.directives) 159 raise 160 else: 161 raise Exception, "Didn't recognize the type of statement", statement 162 163########################################################################## 164# 165# Lexer specification 166# 167########################################################################## 168 169# Error handler. Just call exit. Output formatted to work under 170# Emacs compile-mode. Optional 'print_traceback' arg, if set to True, 171# prints a Python stack backtrace too (can be handy when trying to 172# debug the parser itself). 173def error(lineno, string, print_traceback = False): 174 # Print a Python stack backtrace if requested. 175 if (print_traceback): 176 traceback.print_exc() 177 if lineno != 0: 178 line_str = "%d:" % lineno 179 else: 180 line_str = "" 181 sys.exit("%s %s" % (line_str, string)) 182 183reserved = ('DEF', 'MACROOP', 'ROM', 'EXTERN') 184 185tokens = reserved + ( 186 # identifier 187 'ID', 188 # arguments for microops and directives 189 'PARAMS', 190 191 'LPAREN', 'RPAREN', 192 'LBRACE', 'RBRACE', 193 'COLON', 'SEMI', 'DOT', 194 'NEWLINE' 195 ) 196 197# New lines are ignored at the top level, but they end statements in the 198# assembler 199states = ( 200 ('asm', 'exclusive'), 201 ('params', 'exclusive'), 202) 203 204reserved_map = { } 205for r in reserved: 206 reserved_map[r.lower()] = r 207 208# Ignore comments 209def t_ANY_COMMENT(t): 210 r'\#[^\n]*(?=\n)' 211 212def t_ANY_MULTILINECOMMENT(t): 213 r'/\*([^/]|((?<!\*)/))*\*/' 214 215# A colon marks the end of a label. It should follow an ID which will 216# put the lexer in the "params" state. Seeing the colon will put it back 217# in the "asm" state since it knows it saw a label and not a mnemonic. 218def t_params_COLON(t): 219 r':' 220 t.lexer.begin('asm') 221 return t 222 223# Parameters are a string of text which don't contain an unescaped statement 224# statement terminator, ie a newline or semi colon. 225def t_params_PARAMS(t): 226 r'([^\n;\\]|(\\[\n;\\]))+' 227 t.lineno += t.value.count('\n') 228 unescapeParamsRE = re.compile(r'(\\[\n;\\])') 229 def unescapeParams(mo): 230 val = mo.group(0) 231 return val[1] 232 t.value = unescapeParamsRE.sub(unescapeParams, t.value) 233 t.lexer.begin('asm') 234 return t 235 236# An "ID" in the micro assembler is either a label, directive, or mnemonic 237# If it's either a directive or a mnemonic, it will be optionally followed by 238# parameters. If it's a label, the following colon will make the lexer stop 239# looking for parameters. 240def t_asm_ID(t): 241 r'[A-Za-z_]\w*' 242 t.type = reserved_map.get(t.value, 'ID') 243 # If the ID is really "extern", we shouldn't start looking for parameters 244 # yet. The real ID, the label itself, is coming up. 245 if t.type != 'EXTERN': 246 t.lexer.begin('params') 247 return t 248 249# If there is a label and you're -not- in the assembler (which would be caught 250# above), don't start looking for parameters. 251def t_ANY_ID(t): 252 r'[A-Za-z_]\w*' 253 t.type = reserved_map.get(t.value, 'ID') 254 return t 255 256# Braces enter and exit micro assembly 257def t_INITIAL_LBRACE(t): 258 r'\{' 259 t.lexer.begin('asm') 260 return t 261 262def t_asm_RBRACE(t): 263 r'\}' 264 t.lexer.begin('INITIAL') 265 return t 266 267# At the top level, keep track of newlines only for line counting. 268def t_INITIAL_NEWLINE(t): 269 r'\n+' 270 t.lineno += t.value.count('\n') 271 272# In the micro assembler, do line counting but also return a token. The 273# token is needed by the parser to detect the end of a statement. 274def t_asm_NEWLINE(t): 275 r'\n+' 276 t.lineno += t.value.count('\n') 277 return t 278 279# A newline or semi colon when looking for params signals that the statement 280# is over and the lexer should go back to looking for regular assembly. 281def t_params_NEWLINE(t): 282 r'\n+' 283 t.lineno += t.value.count('\n') 284 t.lexer.begin('asm') 285 return t 286 287def t_params_SEMI(t): 288 r';' 289 t.lexer.begin('asm') 290 return t 291 292# Basic regular expressions to pick out simple tokens 293t_ANY_LPAREN = r'\(' 294t_ANY_RPAREN = r'\)' 295t_ANY_SEMI = r';' 296t_ANY_DOT = r'\.' 297 298t_ANY_ignore = ' \t\x0c' 299 300def t_ANY_error(t): 301 error(t.lineno, "illegal character '%s'" % t.value[0]) 302 t.skip(1) 303 304########################################################################## 305# 306# Parser specification 307# 308########################################################################## 309 310# Start symbol for a file which may have more than one macroop or rom 311# specification. 312def p_file(t): 313 'file : opt_rom_or_macros' 314 315def p_opt_rom_or_macros_0(t): 316 'opt_rom_or_macros : ' 317 318def p_opt_rom_or_macros_1(t): 319 'opt_rom_or_macros : rom_or_macros' 320 321def p_rom_or_macros_0(t): 322 'rom_or_macros : rom_or_macro' 323 324def p_rom_or_macros_1(t): 325 'rom_or_macros : rom_or_macros rom_or_macro' 326 327def p_rom_or_macro_0(t): 328 '''rom_or_macro : rom_block 329 | macroop_def''' 330 331# Defines a section of microcode that should go in the current ROM 332def p_rom_block(t): 333 'rom_block : DEF ROM block SEMI' 334 if not t.parser.rom: 335 print_error("Rom block found, but no Rom object specified.") 336 raise TypeError, "Rom block found, but no Rom object was specified." 337 for statement in t[3].statements: 338 handle_statement(t.parser, t.parser.rom, statement) 339 t[0] = t.parser.rom 340 341# Defines a macroop that jumps to an external label in the ROM 342def p_macroop_def_0(t): 343 'macroop_def : DEF MACROOP ID LPAREN ID RPAREN SEMI' 344 if not t.parser.rom_macroop_type: 345 print_error("ROM based macroop found, but no ROM macroop class was specified.") 346 raise TypeError, "ROM based macroop found, but no ROM macroop class was specified." 347 macroop = t.parser.rom_macroop_type(t[3], t[5]) 348 t.parser.macroops[t[3]] = macroop 349 350 351# Defines a macroop that is combinationally generated 352def p_macroop_def_1(t): 353 'macroop_def : DEF MACROOP ID block SEMI' 354 try: 355 curop = t.parser.macro_type(t[3]) 356 except TypeError: 357 print_error("Error creating macroop object.") 358 raise 359 for statement in t[4].statements: 360 handle_statement(t.parser, curop, statement) 361 t.parser.macroops[t[3]] = curop 362 363# A block of statements 364def p_block(t): 365 'block : LBRACE statements RBRACE' 366 block = Block() 367 block.statements = t[2] 368 t[0] = block 369 370def p_statements_0(t): 371 'statements : statement' 372 if t[1]: 373 t[0] = [t[1]] 374 else: 375 t[0] = [] 376 377def p_statements_1(t): 378 'statements : statements statement' 379 if t[2]: 380 t[1].append(t[2]) 381 t[0] = t[1] 382 383def p_statement(t): 384 'statement : content_of_statement end_of_statement' 385 t[0] = t[1] 386 387# A statement can be a microop or an assembler directive 388def p_content_of_statement_0(t): 389 '''content_of_statement : microop 390 | directive''' 391 t[0] = t[1] 392 393# Ignore empty statements 394def p_content_of_statement_1(t): 395 'content_of_statement : ' 396 pass 397 398# Statements are ended by newlines or a semi colon 399def p_end_of_statement(t): 400 '''end_of_statement : NEWLINE 401 | SEMI''' 402 pass 403 404# Different flavors of microop to avoid shift/reduce errors 405def p_microop_0(t): 406 'microop : labels ID' 407 microop = Microop() 408 microop.labels = t[1] 409 microop.mnemonic = t[2] 410 t[0] = microop 411 412def p_microop_1(t): 413 'microop : ID' 414 microop = Microop() 415 microop.mnemonic = t[1] 416 t[0] = microop 417 418def p_microop_2(t): 419 'microop : labels ID PARAMS' 420 microop = Microop() 421 microop.labels = t[1] 422 microop.mnemonic = t[2] 423 microop.params = t[3] 424 t[0] = microop 425 426def p_microop_3(t): 427 'microop : ID PARAMS' 428 microop = Microop() 429 microop.mnemonic = t[1] 430 microop.params = t[2] 431 t[0] = microop 432 433# Labels in the microcode 434def p_labels_0(t): 435 'labels : label' 436 t[0] = [t[1]] 437 438def p_labels_1(t): 439 'labels : labels label' 440 t[1].append(t[2]) 441 t[0] = t[1] 442 443# labels on lines by themselves are attached to the following instruction. 444def p_labels_2(t): 445 'labels : labels NEWLINE' 446 t[0] = t[1] 447 448def p_label_0(t): 449 'label : ID COLON' 450 label = Label() 451 label.is_extern = False 452 label.text = t[1] 453 t[0] = label 454 455def p_label_1(t): 456 'label : EXTERN ID COLON' 457 label = Label() 458 label.is_extern = True 459 label.text = t[2] 460 t[0] = label 461 462# Directives for the macroop 463def p_directive_0(t): 464 'directive : DOT ID' 465 directive = Directive() 466 directive.name = t[2] 467 t[0] = directive 468 469def p_directive_1(t): 470 'directive : DOT ID PARAMS' 471 directive = Directive() 472 directive.name = t[2] 473 directive.params = t[3] 474 t[0] = directive 475 476# Parse error handler. Note that the argument here is the offending 477# *token*, not a grammar symbol (hence the need to use t.value) 478def p_error(t): 479 if t: 480 error(t.lineno, "syntax error at '%s'" % t.value) 481 else: 482 error(0, "unknown syntax error", True) 483 484class MicroAssembler(object): 485 486 def __init__(self, macro_type, microops, 487 rom = None, rom_macroop_type = None): 488 self.lexer = lex.lex() 489 self.parser = yacc.yacc() 490 self.parser.macro_type = macro_type 491 self.parser.macroops = {} 492 self.parser.microops = microops 493 self.parser.rom = rom 494 self.parser.rom_macroop_type = rom_macroop_type 495 self.parser.symbols = {} 496 self.symbols = self.parser.symbols 497 498 def assemble(self, asm): 499 self.parser.parse(asm, lexer=self.lexer) 500 macroops = self.parser.macroops 501 self.parser.macroops = {} 502 return macroops 503