micro_asm.py revision 6655:380a32b43336
1# Copyright (c) 2003-2005 The Regents of The University of Michigan
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met: redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer;
8# redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution;
11# neither the name of the copyright holders nor the names of its
12# contributors may be used to endorse or promote products derived from
13# this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26#
27# Authors: Gabe Black
28
29import os
30import sys
31import re
32import string
33import traceback
34# get type names
35from types import *
36
37from ply import lex
38from ply import yacc
39
40##########################################################################
41#
42# Base classes for use outside of the assembler
43#
44##########################################################################
45
46class Micro_Container(object):
47    def __init__(self, name):
48        self.microops = []
49        self.name = name
50        self.directives = {}
51        self.micro_classes = {}
52        self.labels = {}
53
54    def add_microop(self, mnemonic, microop):
55        self.microops.append(microop)
56
57    def __str__(self):
58        string = "%s:\n" % self.name
59        for microop in self.microops:
60            string += "  %s\n" % microop
61        return string
62
63class Combinational_Macroop(Micro_Container):
64    pass
65
66class Rom_Macroop(object):
67    def __init__(self, name, target):
68        self.name = name
69        self.target = target
70
71    def __str__(self):
72        return "%s: %s\n" % (self.name, self.target)
73
74class Rom(Micro_Container):
75    def __init__(self, name):
76        super(Rom, self).__init__(name)
77        self.externs = {}
78
79##########################################################################
80#
81# Support classes
82#
83##########################################################################
84
85class Label(object):
86    def __init__(self):
87        self.extern = False
88        self.name = ""
89
90class Block(object):
91    def __init__(self):
92        self.statements = []
93
94class Statement(object):
95    def __init__(self):
96        self.is_microop = False
97        self.is_directive = False
98        self.params = ""
99
100class Microop(Statement):
101    def __init__(self):
102        super(Microop, self).__init__()
103        self.mnemonic = ""
104        self.labels = []
105        self.is_microop = True
106
107class Directive(Statement):
108    def __init__(self):
109        super(Directive, self).__init__()
110        self.name = ""
111        self.is_directive = True
112
113##########################################################################
114#
115# Functions that handle common tasks
116#
117##########################################################################
118
119def print_error(message):
120    print
121    print "*** %s" % message
122    print
123
124def handle_statement(parser, container, statement):
125    if statement.is_microop:
126        if statement.mnemonic not in parser.microops.keys():
127            raise Exception, "Unrecognized mnemonic: %s" % statement.mnemonic
128        parser.symbols["__microopClassFromInsideTheAssembler"] = \
129            parser.microops[statement.mnemonic]
130        try:
131            microop = eval('__microopClassFromInsideTheAssembler(%s)' %
132                    statement.params, {}, parser.symbols)
133        except:
134            print_error("Error creating microop object with mnemonic %s." % \
135                    statement.mnemonic)
136            raise
137        try:
138            for label in statement.labels:
139                container.labels[label.text] = microop
140                if label.is_extern:
141                    container.externs[label.text] = microop
142            container.add_microop(statement.mnemonic, microop)
143        except:
144            print_error("Error adding microop.")
145            raise
146    elif statement.is_directive:
147        if statement.name not in container.directives.keys():
148            raise Exception, "Unrecognized directive: %s" % statement.name
149        parser.symbols["__directiveFunctionFromInsideTheAssembler"] = \
150            container.directives[statement.name]
151        try:
152            eval('__directiveFunctionFromInsideTheAssembler(%s)' %
153                    statement.params, {}, parser.symbols)
154        except:
155            print_error("Error executing directive.")
156            print container.directives
157            raise
158    else:
159        raise Exception, "Didn't recognize the type of statement", statement
160
161##########################################################################
162#
163# Lexer specification
164#
165##########################################################################
166
167# Error handler.  Just call exit.  Output formatted to work under
168# Emacs compile-mode.  Optional 'print_traceback' arg, if set to True,
169# prints a Python stack backtrace too (can be handy when trying to
170# debug the parser itself).
171def error(lineno, string, print_traceback = False):
172    # Print a Python stack backtrace if requested.
173    if (print_traceback):
174        traceback.print_exc()
175    if lineno != 0:
176        line_str = "%d:" % lineno
177    else:
178        line_str = ""
179    sys.exit("%s %s" % (line_str, string))
180
181reserved = ('DEF', 'MACROOP', 'ROM', 'EXTERN')
182
183tokens = reserved + (
184        # identifier
185        'ID',
186        # arguments for microops and directives
187        'PARAMS',
188
189        'LPAREN', 'RPAREN',
190        'LBRACE', 'RBRACE',
191        'COLON', 'SEMI', 'DOT',
192        'NEWLINE'
193        )
194
195# New lines are ignored at the top level, but they end statements in the
196# assembler
197states = (
198    ('asm', 'exclusive'),
199    ('params', 'exclusive'),
200)
201
202reserved_map = { }
203for r in reserved:
204    reserved_map[r.lower()] = r
205
206# Ignore comments
207def t_ANY_COMMENT(t):
208    r'\#[^\n]*(?=\n)'
209
210def t_ANY_MULTILINECOMMENT(t):
211    r'/\*([^/]|((?<!\*)/))*\*/'
212
213# A colon marks the end of a label. It should follow an ID which will
214# put the lexer in the "params" state. Seeing the colon will put it back
215# in the "asm" state since it knows it saw a label and not a mnemonic.
216def t_params_COLON(t):
217    r':'
218    t.lexer.begin('asm')
219    return t
220
221# Parameters are a string of text which don't contain an unescaped statement
222# statement terminator, ie a newline or semi colon.
223def t_params_PARAMS(t):
224    r'([^\n;\\]|(\\[\n;\\]))+'
225    t.lineno += t.value.count('\n')
226    unescapeParamsRE = re.compile(r'(\\[\n;\\])')
227    def unescapeParams(mo):
228        val = mo.group(0)
229        return val[1]
230    t.value = unescapeParamsRE.sub(unescapeParams, t.value)
231    t.lexer.begin('asm')
232    return t
233
234# An "ID" in the micro assembler is either a label, directive, or mnemonic
235# If it's either a directive or a mnemonic, it will be optionally followed by
236# parameters. If it's a label, the following colon will make the lexer stop
237# looking for parameters.
238def t_asm_ID(t):
239    r'[A-Za-z_]\w*'
240    t.type = reserved_map.get(t.value, 'ID')
241    # If the ID is really "extern", we shouldn't start looking for parameters
242    # yet. The real ID, the label itself, is coming up.
243    if t.type != 'EXTERN':
244        t.lexer.begin('params')
245    return t
246
247# If there is a label and you're -not- in the assembler (which would be caught
248# above), don't start looking for parameters.
249def t_ANY_ID(t):
250    r'[A-Za-z_]\w*'
251    t.type = reserved_map.get(t.value, 'ID')
252    return t
253
254# Braces enter and exit micro assembly
255def t_INITIAL_LBRACE(t):
256    r'\{'
257    t.lexer.begin('asm')
258    return t
259
260def t_asm_RBRACE(t):
261    r'\}'
262    t.lexer.begin('INITIAL')
263    return t
264
265# At the top level, keep track of newlines only for line counting.
266def t_INITIAL_NEWLINE(t):
267    r'\n+'
268    t.lineno += t.value.count('\n')
269
270# In the micro assembler, do line counting but also return a token. The
271# token is needed by the parser to detect the end of a statement.
272def t_asm_NEWLINE(t):
273    r'\n+'
274    t.lineno += t.value.count('\n')
275    return t
276
277# A newline or semi colon when looking for params signals that the statement
278# is over and the lexer should go back to looking for regular assembly.
279def t_params_NEWLINE(t):
280    r'\n+'
281    t.lineno += t.value.count('\n')
282    t.lexer.begin('asm')
283    return t
284
285def t_params_SEMI(t):
286    r';'
287    t.lexer.begin('asm')
288    return t
289
290# Basic regular expressions to pick out simple tokens
291t_ANY_LPAREN = r'\('
292t_ANY_RPAREN = r'\)'
293t_ANY_SEMI   = r';'
294t_ANY_DOT    = r'\.'
295
296t_ANY_ignore = ' \t\x0c'
297
298def t_ANY_error(t):
299    error(t.lineno, "illegal character '%s'" % t.value[0])
300    t.skip(1)
301
302##########################################################################
303#
304# Parser specification
305#
306##########################################################################
307
308# Start symbol for a file which may have more than one macroop or rom
309# specification.
310def p_file(t):
311    'file : opt_rom_or_macros'
312
313def p_opt_rom_or_macros_0(t):
314    'opt_rom_or_macros : '
315
316def p_opt_rom_or_macros_1(t):
317    'opt_rom_or_macros : rom_or_macros'
318
319def p_rom_or_macros_0(t):
320    'rom_or_macros : rom_or_macro'
321
322def p_rom_or_macros_1(t):
323    'rom_or_macros : rom_or_macros rom_or_macro'
324
325def p_rom_or_macro_0(t):
326    '''rom_or_macro : rom_block
327                    | macroop_def'''
328
329# Defines a section of microcode that should go in the current ROM
330def p_rom_block(t):
331    'rom_block : DEF ROM block SEMI'
332    if not t.parser.rom:
333        print_error("Rom block found, but no Rom object specified.")
334        raise TypeError, "Rom block found, but no Rom object was specified."
335    for statement in t[3].statements:
336        handle_statement(t.parser, t.parser.rom, statement)
337    t[0] = t.parser.rom
338
339# Defines a macroop that jumps to an external label in the ROM
340def p_macroop_def_0(t):
341    'macroop_def : DEF MACROOP ID LPAREN ID RPAREN SEMI'
342    if not t.parser.rom_macroop_type:
343        print_error("ROM based macroop found, but no ROM macroop class was specified.")
344        raise TypeError, "ROM based macroop found, but no ROM macroop class was specified."
345    macroop = t.parser.rom_macroop_type(t[3], t[5])
346    t.parser.macroops[t[3]] = macroop
347
348
349# Defines a macroop that is combinationally generated
350def p_macroop_def_1(t):
351    'macroop_def : DEF MACROOP ID block SEMI'
352    try:
353        curop = t.parser.macro_type(t[3])
354    except TypeError:
355        print_error("Error creating macroop object.")
356        raise
357    for statement in t[4].statements:
358        handle_statement(t.parser, curop, statement)
359    t.parser.macroops[t[3]] = curop
360
361# A block of statements
362def p_block(t):
363    'block : LBRACE statements RBRACE'
364    block = Block()
365    block.statements = t[2]
366    t[0] = block
367
368def p_statements_0(t):
369    'statements : statement'
370    if t[1]:
371        t[0] = [t[1]]
372    else:
373        t[0] = []
374
375def p_statements_1(t):
376    'statements : statements statement'
377    if t[2]:
378        t[1].append(t[2])
379    t[0] = t[1]
380
381def p_statement(t):
382    'statement : content_of_statement end_of_statement'
383    t[0] = t[1]
384
385# A statement can be a microop or an assembler directive
386def p_content_of_statement_0(t):
387    '''content_of_statement : microop
388                            | directive'''
389    t[0] = t[1]
390
391# Ignore empty statements
392def p_content_of_statement_1(t):
393    'content_of_statement : '
394    pass
395
396# Statements are ended by newlines or a semi colon
397def p_end_of_statement(t):
398    '''end_of_statement : NEWLINE
399                        | SEMI'''
400    pass
401
402# Different flavors of microop to avoid shift/reduce errors
403def p_microop_0(t):
404    'microop : labels ID'
405    microop = Microop()
406    microop.labels = t[1]
407    microop.mnemonic = t[2]
408    t[0] = microop
409
410def p_microop_1(t):
411    'microop : ID'
412    microop = Microop()
413    microop.mnemonic = t[1]
414    t[0] = microop
415
416def p_microop_2(t):
417    'microop : labels ID PARAMS'
418    microop = Microop()
419    microop.labels = t[1]
420    microop.mnemonic = t[2]
421    microop.params = t[3]
422    t[0] = microop
423
424def p_microop_3(t):
425    'microop : ID PARAMS'
426    microop = Microop()
427    microop.mnemonic = t[1]
428    microop.params = t[2]
429    t[0] = microop
430
431# Labels in the microcode
432def p_labels_0(t):
433    'labels : label'
434    t[0] = [t[1]]
435
436def p_labels_1(t):
437    'labels : labels label'
438    t[1].append(t[2])
439    t[0] = t[1]
440
441# labels on lines by themselves are attached to the following instruction.
442def p_labels_2(t):
443    'labels : labels NEWLINE'
444    t[0] = t[1]
445
446def p_label_0(t):
447    'label : ID COLON'
448    label = Label()
449    label.is_extern = False
450    label.text = t[1]
451    t[0] = label
452
453def p_label_1(t):
454    'label : EXTERN ID COLON'
455    label = Label()
456    label.is_extern = True
457    label.text = t[2]
458    t[0] = label
459
460# Directives for the macroop
461def p_directive_0(t):
462    'directive : DOT ID'
463    directive = Directive()
464    directive.name = t[2]
465    t[0] = directive
466
467def p_directive_1(t):
468    'directive : DOT ID PARAMS'
469    directive = Directive()
470    directive.name = t[2]
471    directive.params = t[3]
472    t[0] = directive
473
474# Parse error handler.  Note that the argument here is the offending
475# *token*, not a grammar symbol (hence the need to use t.value)
476def p_error(t):
477    if t:
478        error(t.lineno, "syntax error at '%s'" % t.value)
479    else:
480        error(0, "unknown syntax error", True)
481
482class MicroAssembler(object):
483
484    def __init__(self, macro_type, microops,
485            rom = None, rom_macroop_type = None):
486        self.lexer = lex.lex()
487        self.parser = yacc.yacc()
488        self.parser.macro_type = macro_type
489        self.parser.macroops = {}
490        self.parser.microops = microops
491        self.parser.rom = rom
492        self.parser.rom_macroop_type = rom_macroop_type
493        self.parser.symbols = {}
494        self.symbols = self.parser.symbols
495
496    def assemble(self, asm):
497        self.parser.parse(asm, lexer=self.lexer)
498        macroops = self.parser.macroops
499        self.parser.macroops = {}
500        return macroops
501