micro_asm.py revision 5594:7ef21c6c76bb
1# Copyright (c) 2003-2005 The Regents of The University of Michigan
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met: redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer;
8# redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution;
11# neither the name of the copyright holders nor the names of its
12# contributors may be used to endorse or promote products derived from
13# this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26#
27# Authors: Gabe Black
28
29import os
30import sys
31import re
32import string
33import traceback
34# get type names
35from types import *
36
37# Prepend the directory where the PLY lex & yacc modules are found
38# to the search path.
39sys.path[0:0] = [os.environ['M5_PLY']]
40
41from ply import lex
42from ply import yacc
43
44##########################################################################
45#
46# Base classes for use outside of the assembler
47#
48##########################################################################
49
50class Micro_Container(object):
51    def __init__(self, name):
52        self.microops = []
53        self.name = name
54        self.directives = {}
55        self.micro_classes = {}
56        self.labels = {}
57
58    def add_microop(self, mnemonic, microop):
59        self.microops.append(microop)
60
61    def __str__(self):
62        string = "%s:\n" % self.name
63        for microop in self.microops:
64            string += "  %s\n" % microop
65        return string
66
67class Combinational_Macroop(Micro_Container):
68    pass
69
70class Rom_Macroop(object):
71    def __init__(self, name, target):
72        self.name = name
73        self.target = target
74
75    def __str__(self):
76        return "%s: %s\n" % (self.name, self.target)
77
78class Rom(Micro_Container):
79    def __init__(self, name):
80        super(Rom, self).__init__(name)
81        self.externs = {}
82
83##########################################################################
84#
85# Support classes
86#
87##########################################################################
88
89class Label(object):
90    def __init__(self):
91        self.extern = False
92        self.name = ""
93
94class Block(object):
95    def __init__(self):
96        self.statements = []
97
98class Statement(object):
99    def __init__(self):
100        self.is_microop = False
101        self.is_directive = False
102        self.params = ""
103
104class Microop(Statement):
105    def __init__(self):
106        super(Microop, self).__init__()
107        self.mnemonic = ""
108        self.labels = []
109        self.is_microop = True
110
111class Directive(Statement):
112    def __init__(self):
113        super(Directive, self).__init__()
114        self.name = ""
115        self.is_directive = True
116
117##########################################################################
118#
119# Functions that handle common tasks
120#
121##########################################################################
122
123def print_error(message):
124    print
125    print "*** %s" % message
126    print
127
128def handle_statement(parser, container, statement):
129    if statement.is_microop:
130        if statement.mnemonic not in parser.microops.keys():
131            raise Exception, "Unrecognized mnemonic: %s" % statement.mnemonic
132        parser.symbols["__microopClassFromInsideTheAssembler"] = \
133            parser.microops[statement.mnemonic]
134        try:
135            microop = eval('__microopClassFromInsideTheAssembler(%s)' %
136                    statement.params, {}, parser.symbols)
137        except:
138            print_error("Error creating microop object with mnemonic %s." % \
139                    statement.mnemonic)
140            raise
141        try:
142            for label in statement.labels:
143                container.labels[label.text] = microop
144                if label.is_extern:
145                    container.externs[label.text] = microop
146            container.add_microop(statement.mnemonic, microop)
147        except:
148            print_error("Error adding microop.")
149            raise
150    elif statement.is_directive:
151        if statement.name not in container.directives.keys():
152            raise Exception, "Unrecognized directive: %s" % statement.name
153        parser.symbols["__directiveFunctionFromInsideTheAssembler"] = \
154            container.directives[statement.name]
155        try:
156            eval('__directiveFunctionFromInsideTheAssembler(%s)' %
157                    statement.params, {}, parser.symbols)
158        except:
159            print_error("Error executing directive.")
160            print container.directives
161            raise
162    else:
163        raise Exception, "Didn't recognize the type of statement", statement
164
165##########################################################################
166#
167# Lexer specification
168#
169##########################################################################
170
171# Error handler.  Just call exit.  Output formatted to work under
172# Emacs compile-mode.  Optional 'print_traceback' arg, if set to True,
173# prints a Python stack backtrace too (can be handy when trying to
174# debug the parser itself).
175def error(lineno, string, print_traceback = False):
176    # Print a Python stack backtrace if requested.
177    if (print_traceback):
178        traceback.print_exc()
179    if lineno != 0:
180        line_str = "%d:" % lineno
181    else:
182        line_str = ""
183    sys.exit("%s %s" % (line_str, string))
184
185reserved = ('DEF', 'MACROOP', 'ROM', 'EXTERN')
186
187tokens = reserved + (
188        # identifier
189        'ID',
190        # arguments for microops and directives
191        'PARAMS',
192
193        'LPAREN', 'RPAREN',
194        'LBRACE', 'RBRACE',
195        'COLON', 'SEMI', 'DOT',
196        'NEWLINE'
197        )
198
199# New lines are ignored at the top level, but they end statements in the
200# assembler
201states = (
202    ('asm', 'exclusive'),
203    ('params', 'exclusive'),
204)
205
206reserved_map = { }
207for r in reserved:
208    reserved_map[r.lower()] = r
209
210# Ignore comments
211def t_ANY_COMMENT(t):
212    r'\#[^\n]*(?=\n)'
213
214def t_ANY_MULTILINECOMMENT(t):
215    r'/\*([^/]|((?<!\*)/))*\*/'
216
217# A colon marks the end of a label. It should follow an ID which will
218# put the lexer in the "params" state. Seeing the colon will put it back
219# in the "asm" state since it knows it saw a label and not a mnemonic.
220def t_params_COLON(t):
221    r':'
222    t.lexer.begin('asm')
223    return t
224
225# Parameters are a string of text which don't contain an unescaped statement
226# statement terminator, ie a newline or semi colon.
227def t_params_PARAMS(t):
228    r'([^\n;\\]|(\\[\n;\\]))+'
229    t.lineno += t.value.count('\n')
230    unescapeParamsRE = re.compile(r'(\\[\n;\\])')
231    def unescapeParams(mo):
232        val = mo.group(0)
233        return val[1]
234    t.value = unescapeParamsRE.sub(unescapeParams, t.value)
235    t.lexer.begin('asm')
236    return t
237
238# An "ID" in the micro assembler is either a label, directive, or mnemonic
239# If it's either a directive or a mnemonic, it will be optionally followed by
240# parameters. If it's a label, the following colon will make the lexer stop
241# looking for parameters.
242def t_asm_ID(t):
243    r'[A-Za-z_]\w*'
244    t.type = reserved_map.get(t.value, 'ID')
245    # If the ID is really "extern", we shouldn't start looking for parameters
246    # yet. The real ID, the label itself, is coming up.
247    if t.type != 'EXTERN':
248        t.lexer.begin('params')
249    return t
250
251# If there is a label and you're -not- in the assembler (which would be caught
252# above), don't start looking for parameters.
253def t_ANY_ID(t):
254    r'[A-Za-z_]\w*'
255    t.type = reserved_map.get(t.value, 'ID')
256    return t
257
258# Braces enter and exit micro assembly
259def t_INITIAL_LBRACE(t):
260    r'\{'
261    t.lexer.begin('asm')
262    return t
263
264def t_asm_RBRACE(t):
265    r'\}'
266    t.lexer.begin('INITIAL')
267    return t
268
269# At the top level, keep track of newlines only for line counting.
270def t_INITIAL_NEWLINE(t):
271    r'\n+'
272    t.lineno += t.value.count('\n')
273
274# In the micro assembler, do line counting but also return a token. The
275# token is needed by the parser to detect the end of a statement.
276def t_asm_NEWLINE(t):
277    r'\n+'
278    t.lineno += t.value.count('\n')
279    return t
280
281# A newline or semi colon when looking for params signals that the statement
282# is over and the lexer should go back to looking for regular assembly.
283def t_params_NEWLINE(t):
284    r'\n+'
285    t.lineno += t.value.count('\n')
286    t.lexer.begin('asm')
287    return t
288
289def t_params_SEMI(t):
290    r';'
291    t.lexer.begin('asm')
292    return t
293
294# Basic regular expressions to pick out simple tokens
295t_ANY_LPAREN = r'\('
296t_ANY_RPAREN = r'\)'
297t_ANY_SEMI   = r';'
298t_ANY_DOT    = r'\.'
299
300t_ANY_ignore = ' \t\x0c'
301
302def t_ANY_error(t):
303    error(t.lineno, "illegal character '%s'" % t.value[0])
304    t.skip(1)
305
306##########################################################################
307#
308# Parser specification
309#
310##########################################################################
311
312# Start symbol for a file which may have more than one macroop or rom
313# specification.
314def p_file(t):
315    'file : opt_rom_or_macros'
316
317def p_opt_rom_or_macros_0(t):
318    'opt_rom_or_macros : '
319
320def p_opt_rom_or_macros_1(t):
321    'opt_rom_or_macros : rom_or_macros'
322
323def p_rom_or_macros_0(t):
324    'rom_or_macros : rom_or_macro'
325
326def p_rom_or_macros_1(t):
327    'rom_or_macros : rom_or_macros rom_or_macro'
328
329def p_rom_or_macro_0(t):
330    '''rom_or_macro : rom_block
331                    | macroop_def'''
332
333# Defines a section of microcode that should go in the current ROM
334def p_rom_block(t):
335    'rom_block : DEF ROM block SEMI'
336    if not t.parser.rom:
337        print_error("Rom block found, but no Rom object specified.")
338        raise TypeError, "Rom block found, but no Rom object was specified."
339    for statement in t[3].statements:
340        handle_statement(t.parser, t.parser.rom, statement)
341    t[0] = t.parser.rom
342
343# Defines a macroop that jumps to an external label in the ROM
344def p_macroop_def_0(t):
345    'macroop_def : DEF MACROOP ID LPAREN ID RPAREN SEMI'
346    if not t.parser.rom_macroop_type:
347        print_error("ROM based macroop found, but no ROM macroop class was specified.")
348        raise TypeError, "ROM based macroop found, but no ROM macroop class was specified."
349    macroop = t.parser.rom_macroop_type(t[3], t[5])
350    t.parser.macroops[t[3]] = macroop
351
352
353# Defines a macroop that is combinationally generated
354def p_macroop_def_1(t):
355    'macroop_def : DEF MACROOP ID block SEMI'
356    try:
357        curop = t.parser.macro_type(t[3])
358    except TypeError:
359        print_error("Error creating macroop object.")
360        raise
361    for statement in t[4].statements:
362        handle_statement(t.parser, curop, statement)
363    t.parser.macroops[t[3]] = curop
364
365# A block of statements
366def p_block(t):
367    'block : LBRACE statements RBRACE'
368    block = Block()
369    block.statements = t[2]
370    t[0] = block
371
372def p_statements_0(t):
373    'statements : statement'
374    if t[1]:
375        t[0] = [t[1]]
376    else:
377        t[0] = []
378
379def p_statements_1(t):
380    'statements : statements statement'
381    if t[2]:
382        t[1].append(t[2])
383    t[0] = t[1]
384
385def p_statement(t):
386    'statement : content_of_statement end_of_statement'
387    t[0] = t[1]
388
389# A statement can be a microop or an assembler directive
390def p_content_of_statement_0(t):
391    '''content_of_statement : microop
392                            | directive'''
393    t[0] = t[1]
394
395# Ignore empty statements
396def p_content_of_statement_1(t):
397    'content_of_statement : '
398    pass
399
400# Statements are ended by newlines or a semi colon
401def p_end_of_statement(t):
402    '''end_of_statement : NEWLINE
403                        | SEMI'''
404    pass
405
406# Different flavors of microop to avoid shift/reduce errors
407def p_microop_0(t):
408    'microop : labels ID'
409    microop = Microop()
410    microop.labels = t[1]
411    microop.mnemonic = t[2]
412    t[0] = microop
413
414def p_microop_1(t):
415    'microop : ID'
416    microop = Microop()
417    microop.mnemonic = t[1]
418    t[0] = microop
419
420def p_microop_2(t):
421    'microop : labels ID PARAMS'
422    microop = Microop()
423    microop.labels = t[1]
424    microop.mnemonic = t[2]
425    microop.params = t[3]
426    t[0] = microop
427
428def p_microop_3(t):
429    'microop : ID PARAMS'
430    microop = Microop()
431    microop.mnemonic = t[1]
432    microop.params = t[2]
433    t[0] = microop
434
435# Labels in the microcode
436def p_labels_0(t):
437    'labels : label'
438    t[0] = [t[1]]
439
440def p_labels_1(t):
441    'labels : labels label'
442    t[1].append(t[2])
443    t[0] = t[1]
444
445# labels on lines by themselves are attached to the following instruction.
446def p_labels_2(t):
447    'labels : labels NEWLINE'
448    t[0] = t[1]
449
450def p_label_0(t):
451    'label : ID COLON'
452    label = Label()
453    label.is_extern = False
454    label.text = t[1]
455    t[0] = label
456
457def p_label_1(t):
458    'label : EXTERN ID COLON'
459    label = Label()
460    label.is_extern = True
461    label.text = t[2]
462    t[0] = label
463
464# Directives for the macroop
465def p_directive_0(t):
466    'directive : DOT ID'
467    directive = Directive()
468    directive.name = t[2]
469    t[0] = directive
470
471def p_directive_1(t):
472    'directive : DOT ID PARAMS'
473    directive = Directive()
474    directive.name = t[2]
475    directive.params = t[3]
476    t[0] = directive
477
478# Parse error handler.  Note that the argument here is the offending
479# *token*, not a grammar symbol (hence the need to use t.value)
480def p_error(t):
481    if t:
482        error(t.lineno, "syntax error at '%s'" % t.value)
483    else:
484        error(0, "unknown syntax error", True)
485
486class MicroAssembler(object):
487
488    def __init__(self, macro_type, microops,
489            rom = None, rom_macroop_type = None):
490        self.lexer = lex.lex()
491        self.parser = yacc.yacc()
492        self.parser.macro_type = macro_type
493        self.parser.macroops = {}
494        self.parser.microops = microops
495        self.parser.rom = rom
496        self.parser.rom_macroop_type = rom_macroop_type
497        self.parser.symbols = {}
498        self.symbols = self.parser.symbols
499
500    def assemble(self, asm):
501        self.parser.parse(asm, lexer=self.lexer)
502        macroops = self.parser.macroops
503        self.parser.macroops = {}
504        return macroops
505