1# Copyright (c) 2003-2005 The Regents of The University of Michigan
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met: redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer;
8# redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution;
11# neither the name of the copyright holders nor the names of its
12# contributors may be used to endorse or promote products derived from
13# this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26#
27# Authors: Gabe Black
28
29from __future__ import print_function
30
31import os
32import sys
33import re
34import string
35import traceback
36# get type names
37from types import *
38
39from ply import lex
40from ply import yacc
41
42##########################################################################
43#
44# Base classes for use outside of the assembler
45#
46##########################################################################
47
48class Micro_Container(object):
49    def __init__(self, name):
50        self.microops = []
51        self.name = name
52        self.directives = {}
53        self.micro_classes = {}
54        self.labels = {}
55
56    def add_microop(self, mnemonic, microop):
57        self.microops.append(microop)
58
59    def __str__(self):
60        string = "%s:\n" % self.name
61        for microop in self.microops:
62            string += "  %s\n" % microop
63        return string
64
65class Combinational_Macroop(Micro_Container):
66    pass
67
68class Rom_Macroop(object):
69    def __init__(self, name, target):
70        self.name = name
71        self.target = target
72
73    def __str__(self):
74        return "%s: %s\n" % (self.name, self.target)
75
76class Rom(Micro_Container):
77    def __init__(self, name):
78        super(Rom, self).__init__(name)
79        self.externs = {}
80
81##########################################################################
82#
83# Support classes
84#
85##########################################################################
86
87class Label(object):
88    def __init__(self):
89        self.extern = False
90        self.name = ""
91
92class Block(object):
93    def __init__(self):
94        self.statements = []
95
96class Statement(object):
97    def __init__(self):
98        self.is_microop = False
99        self.is_directive = False
100        self.params = ""
101
102class Microop(Statement):
103    def __init__(self):
104        super(Microop, self).__init__()
105        self.mnemonic = ""
106        self.labels = []
107        self.is_microop = True
108
109class Directive(Statement):
110    def __init__(self):
111        super(Directive, self).__init__()
112        self.name = ""
113        self.is_directive = True
114
115##########################################################################
116#
117# Functions that handle common tasks
118#
119##########################################################################
120
121def print_error(message):
122    print()
123    print("*** %s" % message)
124    print()
125
126def handle_statement(parser, container, statement):
127    if statement.is_microop:
128        if statement.mnemonic not in parser.microops.keys():
129            raise Exception, "Unrecognized mnemonic: %s" % statement.mnemonic
130        parser.symbols["__microopClassFromInsideTheAssembler"] = \
131            parser.microops[statement.mnemonic]
132        try:
133            microop = eval('__microopClassFromInsideTheAssembler(%s)' %
134                    statement.params, {}, parser.symbols)
135        except:
136            print_error("Error creating microop object with mnemonic %s." % \
137                    statement.mnemonic)
138            raise
139        try:
140            for label in statement.labels:
141                container.labels[label.text] = microop
142                if label.is_extern:
143                    container.externs[label.text] = microop
144            container.add_microop(statement.mnemonic, microop)
145        except:
146            print_error("Error adding microop.")
147            raise
148    elif statement.is_directive:
149        if statement.name not in container.directives.keys():
150            raise Exception, "Unrecognized directive: %s" % statement.name
151        parser.symbols["__directiveFunctionFromInsideTheAssembler"] = \
152            container.directives[statement.name]
153        try:
154            eval('__directiveFunctionFromInsideTheAssembler(%s)' %
155                    statement.params, {}, parser.symbols)
156        except:
157            print_error("Error executing directive.")
158            print(container.directives)
159            raise
160    else:
161        raise Exception, "Didn't recognize the type of statement", statement
162
163##########################################################################
164#
165# Lexer specification
166#
167##########################################################################
168
169# Error handler.  Just call exit.  Output formatted to work under
170# Emacs compile-mode.  Optional 'print_traceback' arg, if set to True,
171# prints a Python stack backtrace too (can be handy when trying to
172# debug the parser itself).
173def error(lineno, string, print_traceback = False):
174    # Print a Python stack backtrace if requested.
175    if (print_traceback):
176        traceback.print_exc()
177    if lineno != 0:
178        line_str = "%d:" % lineno
179    else:
180        line_str = ""
181    sys.exit("%s %s" % (line_str, string))
182
183reserved = ('DEF', 'MACROOP', 'ROM', 'EXTERN')
184
185tokens = reserved + (
186        # identifier
187        'ID',
188        # arguments for microops and directives
189        'PARAMS',
190
191        'LPAREN', 'RPAREN',
192        'LBRACE', 'RBRACE',
193        'COLON', 'SEMI', 'DOT',
194        'NEWLINE'
195        )
196
197# New lines are ignored at the top level, but they end statements in the
198# assembler
199states = (
200    ('asm', 'exclusive'),
201    ('params', 'exclusive'),
202)
203
204reserved_map = { }
205for r in reserved:
206    reserved_map[r.lower()] = r
207
208# Ignore comments
209def t_ANY_COMMENT(t):
210    r'\#[^\n]*(?=\n)'
211
212def t_ANY_MULTILINECOMMENT(t):
213    r'/\*([^/]|((?<!\*)/))*\*/'
214
215# A colon marks the end of a label. It should follow an ID which will
216# put the lexer in the "params" state. Seeing the colon will put it back
217# in the "asm" state since it knows it saw a label and not a mnemonic.
218def t_params_COLON(t):
219    r':'
220    t.lexer.begin('asm')
221    return t
222
223# Parameters are a string of text which don't contain an unescaped statement
224# statement terminator, ie a newline or semi colon.
225def t_params_PARAMS(t):
226    r'([^\n;\\]|(\\[\n;\\]))+'
227    t.lineno += t.value.count('\n')
228    unescapeParamsRE = re.compile(r'(\\[\n;\\])')
229    def unescapeParams(mo):
230        val = mo.group(0)
231        return val[1]
232    t.value = unescapeParamsRE.sub(unescapeParams, t.value)
233    t.lexer.begin('asm')
234    return t
235
236# An "ID" in the micro assembler is either a label, directive, or mnemonic
237# If it's either a directive or a mnemonic, it will be optionally followed by
238# parameters. If it's a label, the following colon will make the lexer stop
239# looking for parameters.
240def t_asm_ID(t):
241    r'[A-Za-z_]\w*'
242    t.type = reserved_map.get(t.value, 'ID')
243    # If the ID is really "extern", we shouldn't start looking for parameters
244    # yet. The real ID, the label itself, is coming up.
245    if t.type != 'EXTERN':
246        t.lexer.begin('params')
247    return t
248
249# If there is a label and you're -not- in the assembler (which would be caught
250# above), don't start looking for parameters.
251def t_ANY_ID(t):
252    r'[A-Za-z_]\w*'
253    t.type = reserved_map.get(t.value, 'ID')
254    return t
255
256# Braces enter and exit micro assembly
257def t_INITIAL_LBRACE(t):
258    r'\{'
259    t.lexer.begin('asm')
260    return t
261
262def t_asm_RBRACE(t):
263    r'\}'
264    t.lexer.begin('INITIAL')
265    return t
266
267# At the top level, keep track of newlines only for line counting.
268def t_INITIAL_NEWLINE(t):
269    r'\n+'
270    t.lineno += t.value.count('\n')
271
272# In the micro assembler, do line counting but also return a token. The
273# token is needed by the parser to detect the end of a statement.
274def t_asm_NEWLINE(t):
275    r'\n+'
276    t.lineno += t.value.count('\n')
277    return t
278
279# A newline or semi colon when looking for params signals that the statement
280# is over and the lexer should go back to looking for regular assembly.
281def t_params_NEWLINE(t):
282    r'\n+'
283    t.lineno += t.value.count('\n')
284    t.lexer.begin('asm')
285    return t
286
287def t_params_SEMI(t):
288    r';'
289    t.lexer.begin('asm')
290    return t
291
292# Basic regular expressions to pick out simple tokens
293t_ANY_LPAREN = r'\('
294t_ANY_RPAREN = r'\)'
295t_ANY_SEMI   = r';'
296t_ANY_DOT    = r'\.'
297
298t_ANY_ignore = ' \t\x0c'
299
300def t_ANY_error(t):
301    error(t.lineno, "illegal character '%s'" % t.value[0])
302    t.skip(1)
303
304##########################################################################
305#
306# Parser specification
307#
308##########################################################################
309
310# Start symbol for a file which may have more than one macroop or rom
311# specification.
312def p_file(t):
313    'file : opt_rom_or_macros'
314
315def p_opt_rom_or_macros_0(t):
316    'opt_rom_or_macros : '
317
318def p_opt_rom_or_macros_1(t):
319    'opt_rom_or_macros : rom_or_macros'
320
321def p_rom_or_macros_0(t):
322    'rom_or_macros : rom_or_macro'
323
324def p_rom_or_macros_1(t):
325    'rom_or_macros : rom_or_macros rom_or_macro'
326
327def p_rom_or_macro_0(t):
328    '''rom_or_macro : rom_block
329                    | macroop_def'''
330
331# Defines a section of microcode that should go in the current ROM
332def p_rom_block(t):
333    'rom_block : DEF ROM block SEMI'
334    if not t.parser.rom:
335        print_error("Rom block found, but no Rom object specified.")
336        raise TypeError, "Rom block found, but no Rom object was specified."
337    for statement in t[3].statements:
338        handle_statement(t.parser, t.parser.rom, statement)
339    t[0] = t.parser.rom
340
341# Defines a macroop that jumps to an external label in the ROM
342def p_macroop_def_0(t):
343    'macroop_def : DEF MACROOP ID LPAREN ID RPAREN SEMI'
344    if not t.parser.rom_macroop_type:
345        print_error("ROM based macroop found, but no ROM macroop class was specified.")
346        raise TypeError, "ROM based macroop found, but no ROM macroop class was specified."
347    macroop = t.parser.rom_macroop_type(t[3], t[5])
348    t.parser.macroops[t[3]] = macroop
349
350
351# Defines a macroop that is combinationally generated
352def p_macroop_def_1(t):
353    'macroop_def : DEF MACROOP ID block SEMI'
354    try:
355        curop = t.parser.macro_type(t[3])
356    except TypeError:
357        print_error("Error creating macroop object.")
358        raise
359    for statement in t[4].statements:
360        handle_statement(t.parser, curop, statement)
361    t.parser.macroops[t[3]] = curop
362
363# A block of statements
364def p_block(t):
365    'block : LBRACE statements RBRACE'
366    block = Block()
367    block.statements = t[2]
368    t[0] = block
369
370def p_statements_0(t):
371    'statements : statement'
372    if t[1]:
373        t[0] = [t[1]]
374    else:
375        t[0] = []
376
377def p_statements_1(t):
378    'statements : statements statement'
379    if t[2]:
380        t[1].append(t[2])
381    t[0] = t[1]
382
383def p_statement(t):
384    'statement : content_of_statement end_of_statement'
385    t[0] = t[1]
386
387# A statement can be a microop or an assembler directive
388def p_content_of_statement_0(t):
389    '''content_of_statement : microop
390                            | directive'''
391    t[0] = t[1]
392
393# Ignore empty statements
394def p_content_of_statement_1(t):
395    'content_of_statement : '
396    pass
397
398# Statements are ended by newlines or a semi colon
399def p_end_of_statement(t):
400    '''end_of_statement : NEWLINE
401                        | SEMI'''
402    pass
403
404# Different flavors of microop to avoid shift/reduce errors
405def p_microop_0(t):
406    'microop : labels ID'
407    microop = Microop()
408    microop.labels = t[1]
409    microop.mnemonic = t[2]
410    t[0] = microop
411
412def p_microop_1(t):
413    'microop : ID'
414    microop = Microop()
415    microop.mnemonic = t[1]
416    t[0] = microop
417
418def p_microop_2(t):
419    'microop : labels ID PARAMS'
420    microop = Microop()
421    microop.labels = t[1]
422    microop.mnemonic = t[2]
423    microop.params = t[3]
424    t[0] = microop
425
426def p_microop_3(t):
427    'microop : ID PARAMS'
428    microop = Microop()
429    microop.mnemonic = t[1]
430    microop.params = t[2]
431    t[0] = microop
432
433# Labels in the microcode
434def p_labels_0(t):
435    'labels : label'
436    t[0] = [t[1]]
437
438def p_labels_1(t):
439    'labels : labels label'
440    t[1].append(t[2])
441    t[0] = t[1]
442
443# labels on lines by themselves are attached to the following instruction.
444def p_labels_2(t):
445    'labels : labels NEWLINE'
446    t[0] = t[1]
447
448def p_label_0(t):
449    'label : ID COLON'
450    label = Label()
451    label.is_extern = False
452    label.text = t[1]
453    t[0] = label
454
455def p_label_1(t):
456    'label : EXTERN ID COLON'
457    label = Label()
458    label.is_extern = True
459    label.text = t[2]
460    t[0] = label
461
462# Directives for the macroop
463def p_directive_0(t):
464    'directive : DOT ID'
465    directive = Directive()
466    directive.name = t[2]
467    t[0] = directive
468
469def p_directive_1(t):
470    'directive : DOT ID PARAMS'
471    directive = Directive()
472    directive.name = t[2]
473    directive.params = t[3]
474    t[0] = directive
475
476# Parse error handler.  Note that the argument here is the offending
477# *token*, not a grammar symbol (hence the need to use t.value)
478def p_error(t):
479    if t:
480        error(t.lineno, "syntax error at '%s'" % t.value)
481    else:
482        error(0, "unknown syntax error", True)
483
484class MicroAssembler(object):
485
486    def __init__(self, macro_type, microops,
487            rom = None, rom_macroop_type = None):
488        self.lexer = lex.lex()
489        self.parser = yacc.yacc()
490        self.parser.macro_type = macro_type
491        self.parser.macroops = {}
492        self.parser.microops = microops
493        self.parser.rom = rom
494        self.parser.rom_macroop_type = rom_macroop_type
495        self.parser.symbols = {}
496        self.symbols = self.parser.symbols
497
498    def assemble(self, asm):
499        self.parser.parse(asm, lexer=self.lexer)
500        macroops = self.parser.macroops
501        self.parser.macroops = {}
502        return macroops
503