micro_asm.py revision 4503
12817Sksewell@umich.edu# Copyright (c) 2003-2005 The Regents of The University of Michigan
28733Sgeoffrey.blake@arm.com# All rights reserved.
38733Sgeoffrey.blake@arm.com#
48733Sgeoffrey.blake@arm.com# Redistribution and use in source and binary forms, with or without
58733Sgeoffrey.blake@arm.com# modification, are permitted provided that the following conditions are
68733Sgeoffrey.blake@arm.com# met: redistributions of source code must retain the above copyright
78733Sgeoffrey.blake@arm.com# notice, this list of conditions and the following disclaimer;
88733Sgeoffrey.blake@arm.com# redistributions in binary form must reproduce the above copyright
98733Sgeoffrey.blake@arm.com# notice, this list of conditions and the following disclaimer in the
108733Sgeoffrey.blake@arm.com# documentation and/or other materials provided with the distribution;
118733Sgeoffrey.blake@arm.com# neither the name of the copyright holders nor the names of its
128733Sgeoffrey.blake@arm.com# contributors may be used to endorse or promote products derived from
138733Sgeoffrey.blake@arm.com# this software without specific prior written permission.
142817Sksewell@umich.edu#
152817Sksewell@umich.edu# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
162817Sksewell@umich.edu# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
172817Sksewell@umich.edu# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
182817Sksewell@umich.edu# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
192817Sksewell@umich.edu# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
202817Sksewell@umich.edu# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
212817Sksewell@umich.edu# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
222817Sksewell@umich.edu# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
232817Sksewell@umich.edu# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
242817Sksewell@umich.edu# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
252817Sksewell@umich.edu# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
262817Sksewell@umich.edu#
272817Sksewell@umich.edu# Authors: Gabe Black
282817Sksewell@umich.edu
292817Sksewell@umich.eduimport os
302817Sksewell@umich.eduimport sys
312817Sksewell@umich.eduimport re
322817Sksewell@umich.eduimport string
332817Sksewell@umich.eduimport traceback
342817Sksewell@umich.edu# get type names
352817Sksewell@umich.edufrom types import *
362817Sksewell@umich.edu
372817Sksewell@umich.edu# Prepend the directory where the PLY lex & yacc modules are found
382817Sksewell@umich.edu# to the search path.
392817Sksewell@umich.edusys.path[0:0] = [os.environ['M5_PLY']]
402817Sksewell@umich.edu
412817Sksewell@umich.edufrom ply import lex
422817Sksewell@umich.edufrom ply import yacc
432817Sksewell@umich.edu
442817Sksewell@umich.edu##########################################################################
452817Sksewell@umich.edu#
466658Snate@binkert.org# Base classes for use outside of the assembler
478229Snate@binkert.org#
482935Sksewell@umich.edu##########################################################################
492817Sksewell@umich.edu
502834Sksewell@umich.educlass Micro_Container(object):
512834Sksewell@umich.edu    def __init__(self, name):
522834Sksewell@umich.edu        self.microops = []
538902Sandreas.hansson@arm.com        self.name = name
542834Sksewell@umich.edu        self.directives = {}
552817Sksewell@umich.edu        self.micro_classes = {}
562817Sksewell@umich.edu        self.labels = {}
572817Sksewell@umich.edu
582817Sksewell@umich.edu    def add_microop(self, microop):
592817Sksewell@umich.edu        self.microops.append(microop)
602817Sksewell@umich.edu
612817Sksewell@umich.edu    def __str__(self):
622817Sksewell@umich.edu        string = "%s:\n" % self.name
632817Sksewell@umich.edu        for microop in self.microops:
642817Sksewell@umich.edu            string += "  %s\n" % microop
652817Sksewell@umich.edu        return string
662817Sksewell@umich.edu
672817Sksewell@umich.educlass Macroop(Micro_Container):
682817Sksewell@umich.edu    pass
692817Sksewell@umich.edu
702817Sksewell@umich.educlass Rom(Micro_Container):
712817Sksewell@umich.edu    def __init__(self, name):
722817Sksewell@umich.edu        super(Rom, self).__init__(name)
732817Sksewell@umich.edu        self.externs = {}
742817Sksewell@umich.edu
752817Sksewell@umich.edu##########################################################################
762817Sksewell@umich.edu#
772817Sksewell@umich.edu# Support classes
782817Sksewell@umich.edu#
792817Sksewell@umich.edu##########################################################################
803784Sgblack@eecs.umich.edu
816022Sgblack@eecs.umich.educlass Label(object):
823784Sgblack@eecs.umich.edu    def __init__(self):
833784Sgblack@eecs.umich.edu        self.extern = False
846022Sgblack@eecs.umich.edu        self.name = ""
853784Sgblack@eecs.umich.edu
868887Sgeoffrey.blake@arm.comclass Block(object):
878733Sgeoffrey.blake@arm.com    def __init__(self):
889023Sgblack@eecs.umich.edu        self.statements = []
899023Sgblack@eecs.umich.edu
909023Sgblack@eecs.umich.educlass Statement(object):
919023Sgblack@eecs.umich.edu    def __init__(self):
929023Sgblack@eecs.umich.edu        self.is_microop = False
938541Sgblack@eecs.umich.edu        self.is_directive = False
942817Sksewell@umich.edu        self.params = ""
952817Sksewell@umich.edu
962817Sksewell@umich.educlass Microop(Statement):
972817Sksewell@umich.edu    def __init__(self):
985712Shsul@eecs.umich.edu        super(Microop, self).__init__()
992817Sksewell@umich.edu        self.mnemonic = ""
1005714Shsul@eecs.umich.edu        self.labels = []
1015714Shsul@eecs.umich.edu        self.is_microop = True
1025714Shsul@eecs.umich.edu
1035714Shsul@eecs.umich.educlass Directive(Statement):
1045715Shsul@eecs.umich.edu    def __init__(self):
1055715Shsul@eecs.umich.edu        super(Directive, self).__init__()
1065715Shsul@eecs.umich.edu        self.name = ""
1075715Shsul@eecs.umich.edu        self.is_directive = True
1082817Sksewell@umich.edu
1092817Sksewell@umich.edu##########################################################################
1102817Sksewell@umich.edu#
1112817Sksewell@umich.edu# Functions that handle common tasks
1123548Sgblack@eecs.umich.edu#
1132817Sksewell@umich.edu##########################################################################
1142817Sksewell@umich.edu
1158541Sgblack@eecs.umich.edudef print_error(message):
1168541Sgblack@eecs.umich.edu    print
1178754Sgblack@eecs.umich.edu    print "*** %s" % message
1188852Sandreas.hansson@arm.com    print
1192817Sksewell@umich.edu
1208852Sandreas.hansson@arm.comdef handle_statement(parser, container, statement):
1213675Sktlim@umich.edu    if statement.is_microop:
1228706Sandreas.hansson@arm.com        try:
1238706Sandreas.hansson@arm.com            microop = eval('parser.microops[statement.mnemonic](%s)' %
1248799Sgblack@eecs.umich.edu                    statement.params)
1258852Sandreas.hansson@arm.com        except:
1268706Sandreas.hansson@arm.com            print_error("Error creating microop object.")
1272817Sksewell@umich.edu            raise
1282817Sksewell@umich.edu        try:
1292817Sksewell@umich.edu            for label in statement.labels:
1302817Sksewell@umich.edu                container.labels[label.name] = microop
1312817Sksewell@umich.edu                if label.extern:
1322817Sksewell@umich.edu                    container.externs[label.name] = microop
1332817Sksewell@umich.edu            container.add_microop(microop)
1342817Sksewell@umich.edu        except:
1352817Sksewell@umich.edu            print_error("Error adding microop.")
1362817Sksewell@umich.edu            raise
1372817Sksewell@umich.edu    elif statement.is_directive:
1382817Sksewell@umich.edu        try:
1392817Sksewell@umich.edu            eval('container.directives[statement.name](%s)' % statement.params)
1405250Sksewell@umich.edu        except:
1412817Sksewell@umich.edu            print_error("Error executing directive.")
1422817Sksewell@umich.edu            print container.directives
1435250Sksewell@umich.edu            raise
1442817Sksewell@umich.edu    else:
1452817Sksewell@umich.edu        raise Exception, "Didn't recognize the type of statement", statement
1462817Sksewell@umich.edu
1472817Sksewell@umich.edu##########################################################################
1482817Sksewell@umich.edu#
1498777Sgblack@eecs.umich.edu# Lexer specification
1502817Sksewell@umich.edu#
1512817Sksewell@umich.edu##########################################################################
1522817Sksewell@umich.edu
1532817Sksewell@umich.edu# Error handler.  Just call exit.  Output formatted to work under
1542817Sksewell@umich.edu# Emacs compile-mode.  Optional 'print_traceback' arg, if set to True,
1552817Sksewell@umich.edu# prints a Python stack backtrace too (can be handy when trying to
1562817Sksewell@umich.edu# debug the parser itself).
1572817Sksewell@umich.edudef error(lineno, string, print_traceback = False):
1582817Sksewell@umich.edu    # Print a Python stack backtrace if requested.
1592817Sksewell@umich.edu    if (print_traceback):
1602817Sksewell@umich.edu        traceback.print_exc()
1612817Sksewell@umich.edu    if lineno != 0:
1622817Sksewell@umich.edu        line_str = "%d:" % lineno
1632817Sksewell@umich.edu    else:
1642817Sksewell@umich.edu        line_str = ""
1652817Sksewell@umich.edu    sys.exit("%s %s" % (line_str, string))
1662817Sksewell@umich.edu
1672817Sksewell@umich.edureserved = ('DEF', 'MACROOP', 'ROM', 'EXTERN')
1682817Sksewell@umich.edu
1692817Sksewell@umich.edutokens = reserved + (
1702817Sksewell@umich.edu        # identifier
1712817Sksewell@umich.edu        'ID',
1722817Sksewell@umich.edu        # arguments for microops and directives
1732817Sksewell@umich.edu        'PARAMS',
1742817Sksewell@umich.edu
1752817Sksewell@umich.edu        'LPAREN', 'RPAREN',
1762817Sksewell@umich.edu        'LBRACE', 'RBRACE',
1772817Sksewell@umich.edu        'COLON', 'SEMI', 'DOT',
1782817Sksewell@umich.edu        'NEWLINE'
1792817Sksewell@umich.edu        )
1802817Sksewell@umich.edu
1812817Sksewell@umich.edu# New lines are ignored at the top level, but they end statements in the
1822817Sksewell@umich.edu# assembler
1832817Sksewell@umich.edustates = (
1842817Sksewell@umich.edu    ('asm', 'exclusive'),
1852817Sksewell@umich.edu    ('params', 'exclusive'),
1862817Sksewell@umich.edu)
1872817Sksewell@umich.edu
1882817Sksewell@umich.edureserved_map = { }
1892817Sksewell@umich.edufor r in reserved:
1902817Sksewell@umich.edu    reserved_map[r.lower()] = r
1917720Sgblack@eecs.umich.edu
1927720Sgblack@eecs.umich.edudef t_ANY_COMMENT(t):
1937720Sgblack@eecs.umich.edu    r'\#[^\n]*(?=\n)'
1947720Sgblack@eecs.umich.edu    #print "t_ANY_COMMENT %s" % t.value
1957720Sgblack@eecs.umich.edu
1967720Sgblack@eecs.umich.edudef t_ANY_MULTILINECOMMENT(t):
1977720Sgblack@eecs.umich.edu    r'/\*([^/]|((?<!\*)/))*\*/'
1988733Sgeoffrey.blake@arm.com    #print "t_ANY_MULTILINECOMMENT %s" % t.value
1998733Sgeoffrey.blake@arm.com
2002817Sksewell@umich.edudef t_params_COLON(t):
2017720Sgblack@eecs.umich.edu    r':'
2027720Sgblack@eecs.umich.edu    t.lexer.begin('asm')
2032817Sksewell@umich.edu    #print "t_params_COLON %s" % t.value
2042817Sksewell@umich.edu    return t
2057720Sgblack@eecs.umich.edu
2067720Sgblack@eecs.umich.edudef t_asm_ID(t):
2072817Sksewell@umich.edu    r'[A-Za-z_]\w*'
2087720Sgblack@eecs.umich.edu    t.type = reserved_map.get(t.value, 'ID')
2097720Sgblack@eecs.umich.edu    t.lexer.begin('params')
2107720Sgblack@eecs.umich.edu    #print "t_asm_ID %s" % t.value
2115259Sksewell@umich.edu    return t
2122817Sksewell@umich.edu
2134172Ssaidi@eecs.umich.edudef t_ANY_ID(t):
2145715Shsul@eecs.umich.edu    r'[A-Za-z_]\w*'
2154172Ssaidi@eecs.umich.edu    t.type = reserved_map.get(t.value, 'ID')
2164172Ssaidi@eecs.umich.edu    #print "t_ANY_ID %s" % t.value
2174172Ssaidi@eecs.umich.edu    return t
2182817Sksewell@umich.edu
2195715Shsul@eecs.umich.edudef t_params_PARAMS(t):
2202817Sksewell@umich.edu    r'([^\n;]|((?<=\\)[\n;]))+'
2212817Sksewell@umich.edu    t.lineno += t.value.count('\n')
2224172Ssaidi@eecs.umich.edu    t.lexer.begin('asm')
2232817Sksewell@umich.edu    #print "t_params_PARAMS %s" % t.value
2242817Sksewell@umich.edu    return t
2252817Sksewell@umich.edu
2264172Ssaidi@eecs.umich.edudef t_INITIAL_LBRACE(t):
2272817Sksewell@umich.edu    r'\{'
2286313Sgblack@eecs.umich.edu    t.lexer.begin('asm')
2296313Sgblack@eecs.umich.edu    #print "t_INITIAL_LBRACE %s" % t.value
2306313Sgblack@eecs.umich.edu    return t
2312817Sksewell@umich.edu
2322817Sksewell@umich.edudef t_asm_RBRACE(t):
2332817Sksewell@umich.edu    r'\}'
2342817Sksewell@umich.edu    t.lexer.begin('INITIAL')
2352817Sksewell@umich.edu    #print "t_asm_RBRACE %s" % t.value
2362817Sksewell@umich.edu    return t
2372817Sksewell@umich.edu
2382817Sksewell@umich.edudef t_INITIAL_NEWLINE(t):
2392817Sksewell@umich.edu    r'\n+'
2402817Sksewell@umich.edu    t.lineno += t.value.count('\n')
2412817Sksewell@umich.edu    #print "t_INITIAL_NEWLINE %s" % t.value
2422817Sksewell@umich.edu
2432817Sksewell@umich.edudef t_asm_NEWLINE(t):
2442817Sksewell@umich.edu    r'\n+'
2452817Sksewell@umich.edu    t.lineno += t.value.count('\n')
2462817Sksewell@umich.edu    #print "t_asm_NEWLINE %s" % t.value
2472817Sksewell@umich.edu    return t
2482817Sksewell@umich.edu
2492817Sksewell@umich.edudef t_params_NEWLINE(t):
2505715Shsul@eecs.umich.edu    r'\n+'
2512817Sksewell@umich.edu    t.lineno += t.value.count('\n')
2522817Sksewell@umich.edu    t.lexer.begin('asm')
2532817Sksewell@umich.edu    #print "t_params_NEWLINE %s" % t.value
2548777Sgblack@eecs.umich.edu    return t
2555595Sgblack@eecs.umich.edu
2565595Sgblack@eecs.umich.edudef t_params_SEMI(t):
2575595Sgblack@eecs.umich.edu    r';'
2585595Sgblack@eecs.umich.edu    t.lexer.begin('asm')
2595595Sgblack@eecs.umich.edu    #print "t_params_SEMI %s" % t.value
2605595Sgblack@eecs.umich.edu    return t
2612817Sksewell@umich.edu
2622817Sksewell@umich.edu# Basic regular expressions to pick out simple tokens
2632817Sksewell@umich.edut_ANY_LPAREN = r'\('
264t_ANY_RPAREN = r'\)'
265t_ANY_SEMI   = r';'
266t_ANY_DOT    = r'\.'
267
268t_ANY_ignore = ' \t\x0c'
269
270def t_ANY_error(t):
271    error(t.lineno, "illegal character '%s'" % t.value[0])
272    t.skip(1)
273
274##########################################################################
275#
276# Parser specification
277#
278##########################################################################
279
280# Start symbol for a file which may have more than one macroop or rom
281# specification.
282def p_file(t):
283    'file : opt_rom_or_macros'
284
285def p_opt_rom_or_macros_0(t):
286    'opt_rom_or_macros : '
287
288def p_opt_rom_or_macros_1(t):
289    'opt_rom_or_macros : rom_or_macros'
290
291def p_rom_or_macros_0(t):
292    'rom_or_macros : rom_or_macro'
293
294def p_rom_or_macros_1(t):
295    'rom_or_macros : rom_or_macros rom_or_macro'
296
297def p_rom_or_macro_0(t):
298    '''rom_or_macro : rom_block'''
299
300def p_rom_or_macro_1(t):
301    '''rom_or_macro : macroop_def'''
302
303# A block of statements
304def p_block(t):
305    'block : LBRACE statements RBRACE'
306    block = Block()
307    block.statements = t[2]
308    t[0] = block
309
310# Defines a section of microcode that should go in the current ROM
311def p_rom_block(t):
312    'rom_block : DEF ROM block SEMI'
313    for statement in t[3].statements:
314        handle_statement(t.parser, t.parser.rom, statement)
315    t[0] = t.parser.rom
316
317# Defines a macroop that jumps to an external label in the ROM
318def p_macroop_def_0(t):
319    'macroop_def : DEF MACROOP ID LPAREN ID RPAREN SEMI'
320    t[0] = t[4]
321
322# Defines a macroop that is combinationally generated
323def p_macroop_def_1(t):
324    'macroop_def : DEF MACROOP ID block SEMI'
325    try:
326        curop = t.parser.macro_type(t[3])
327    except TypeError:
328        print_error("Error creating macroop object.")
329        raise
330    for statement in t[4].statements:
331        handle_statement(t.parser, curop, statement)
332    t.parser.macroops[t[3]] = curop
333
334def p_statements_0(t):
335    'statements : statement'
336    if t[1]:
337        t[0] = [t[1]]
338    else:
339        t[0] = []
340
341def p_statements_1(t):
342    'statements : statements statement'
343    if t[2]:
344        t[1].append(t[2])
345    t[0] = t[1]
346
347def p_statement(t):
348    'statement : content_of_statement end_of_statement'
349    t[0] = t[1]
350
351# A statement can be a microop or an assembler directive
352def p_content_of_statement_0(t):
353    '''content_of_statement : microop
354                            | directive'''
355    t[0] = t[1]
356
357def p_content_of_statement_1(t):
358    'content_of_statement : '
359    pass
360
361# Statements are ended by newlines or a semi colon
362def p_end_of_statement(t):
363    '''end_of_statement : NEWLINE
364                        | SEMI'''
365    pass
366
367def p_microop_0(t):
368    'microop : labels ID'
369    microop = Microop()
370    microop.labels = t[1]
371    microop.mnemonic = t[2]
372    t[0] = microop
373
374def p_microop_1(t):
375    'microop : ID'
376    microop = Microop()
377    microop.mnemonic = t[1]
378    t[0] = microop
379
380def p_microop_2(t):
381    'microop : labels ID PARAMS'
382    microop = Microop()
383    microop.labels = t[1]
384    microop.mnemonic = t[2]
385    microop.params = t[3]
386    t[0] = microop
387
388def p_microop_3(t):
389    'microop : ID PARAMS'
390    microop = Microop()
391    microop.mnemonic = t[1]
392    microop.params = t[2]
393    t[0] = microop
394
395def p_labels_0(t):
396    'labels : label'
397    t[0] = [t[1]]
398
399def p_labels_1(t):
400    'labels : labels label'
401    t[1].append(t[2])
402    t[0] = t[1]
403
404def p_label_0(t):
405    'label : ID COLON'
406    label = Label()
407    label.is_extern = False
408    label.text = t[1]
409    t[0] = label
410
411def p_label_1(t):
412    'label : EXTERN ID COLON'
413    label = Label()
414    label.is_extern = True
415    label.text = t[2]
416    t[0] = label
417
418def p_directive_0(t):
419    'directive : DOT ID'
420    directive = Directive()
421    directive.name = t[2]
422    t[0] = directive
423
424def p_directive_1(t):
425    'directive : DOT ID PARAMS'
426    directive = Directive()
427    directive.name = t[2]
428    directive.params = t[3]
429    t[0] = directive
430
431# Parse error handler.  Note that the argument here is the offending
432# *token*, not a grammar symbol (hence the need to use t.value)
433def p_error(t):
434    if t:
435        error(t.lineno, "syntax error at '%s'" % t.value)
436    else:
437        error(0, "unknown syntax error", True)
438
439class MicroAssembler(object):
440
441    def __init__(self, macro_type, microops, rom):
442        self.lexer = lex.lex()
443        self.parser = yacc.yacc()
444        self.parser.macro_type = macro_type
445        self.parser.macroops = {}
446        self.parser.microops = microops
447        self.parser.rom = rom
448
449    def assemble(self, asm):
450        self.parser.parse(asm, lexer=self.lexer)
451        # Begin debug printing
452        for macroop in self.parser.macroops.values():
453            print macroop
454        print self.parser.rom
455        # End debug printing
456        macroops = self.parser.macroops
457        self.parser.macroops = {}
458        return macroops
459