12632Sstever@eecs.umich.edu# ----------------------------------------------------------------------
22632Sstever@eecs.umich.edu# clex.py
32632Sstever@eecs.umich.edu#
42632Sstever@eecs.umich.edu# A lexer for ANSI C.
52632Sstever@eecs.umich.edu# ----------------------------------------------------------------------
62632Sstever@eecs.umich.edu
74479Sbinkertn@umich.eduimport sys
84479Sbinkertn@umich.edusys.path.insert(0,"../..")
94479Sbinkertn@umich.edu
104479Sbinkertn@umich.eduimport ply.lex as lex
112632Sstever@eecs.umich.edu
122632Sstever@eecs.umich.edu# Reserved words
132632Sstever@eecs.umich.edureserved = (
142632Sstever@eecs.umich.edu    'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE',
152632Sstever@eecs.umich.edu    'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER',
162632Sstever@eecs.umich.edu    'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF',
172632Sstever@eecs.umich.edu    'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE',
182632Sstever@eecs.umich.edu    )
192632Sstever@eecs.umich.edu
202632Sstever@eecs.umich.edutokens = reserved + (
212632Sstever@eecs.umich.edu    # Literals (identifier, integer constant, float constant, string constant, char const)
222632Sstever@eecs.umich.edu    'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST',
232632Sstever@eecs.umich.edu
242632Sstever@eecs.umich.edu    # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
252632Sstever@eecs.umich.edu    'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
262632Sstever@eecs.umich.edu    'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
272632Sstever@eecs.umich.edu    'LOR', 'LAND', 'LNOT',
282632Sstever@eecs.umich.edu    'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
296498Snate@binkert.org
302632Sstever@eecs.umich.edu    # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
312632Sstever@eecs.umich.edu    'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
322632Sstever@eecs.umich.edu    'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
332632Sstever@eecs.umich.edu
342632Sstever@eecs.umich.edu    # Increment/decrement (++,--)
352632Sstever@eecs.umich.edu    'PLUSPLUS', 'MINUSMINUS',
362632Sstever@eecs.umich.edu
372632Sstever@eecs.umich.edu    # Structure dereference (->)
382632Sstever@eecs.umich.edu    'ARROW',
392632Sstever@eecs.umich.edu
402632Sstever@eecs.umich.edu    # Conditional operator (?)
412632Sstever@eecs.umich.edu    'CONDOP',
426498Snate@binkert.org
432632Sstever@eecs.umich.edu    # Delimeters ( ) [ ] { } , . ; :
442632Sstever@eecs.umich.edu    'LPAREN', 'RPAREN',
452632Sstever@eecs.umich.edu    'LBRACKET', 'RBRACKET',
462632Sstever@eecs.umich.edu    'LBRACE', 'RBRACE',
472632Sstever@eecs.umich.edu    'COMMA', 'PERIOD', 'SEMI', 'COLON',
482632Sstever@eecs.umich.edu
492632Sstever@eecs.umich.edu    # Ellipsis (...)
502632Sstever@eecs.umich.edu    'ELLIPSIS',
512632Sstever@eecs.umich.edu    )
522632Sstever@eecs.umich.edu
532632Sstever@eecs.umich.edu# Completely ignored characters
542632Sstever@eecs.umich.edut_ignore           = ' \t\x0c'
552632Sstever@eecs.umich.edu
562632Sstever@eecs.umich.edu# Newlines
572632Sstever@eecs.umich.edudef t_NEWLINE(t):
582632Sstever@eecs.umich.edu    r'\n+'
594479Sbinkertn@umich.edu    t.lexer.lineno += t.value.count("\n")
606498Snate@binkert.org
612632Sstever@eecs.umich.edu# Operators
622632Sstever@eecs.umich.edut_PLUS             = r'\+'
632632Sstever@eecs.umich.edut_MINUS            = r'-'
642632Sstever@eecs.umich.edut_TIMES            = r'\*'
652632Sstever@eecs.umich.edut_DIVIDE           = r'/'
662632Sstever@eecs.umich.edut_MOD              = r'%'
672632Sstever@eecs.umich.edut_OR               = r'\|'
682632Sstever@eecs.umich.edut_AND              = r'&'
692632Sstever@eecs.umich.edut_NOT              = r'~'
704479Sbinkertn@umich.edut_XOR              = r'\^'
712632Sstever@eecs.umich.edut_LSHIFT           = r'<<'
722632Sstever@eecs.umich.edut_RSHIFT           = r'>>'
732632Sstever@eecs.umich.edut_LOR              = r'\|\|'
742632Sstever@eecs.umich.edut_LAND             = r'&&'
752632Sstever@eecs.umich.edut_LNOT             = r'!'
762632Sstever@eecs.umich.edut_LT               = r'<'
772632Sstever@eecs.umich.edut_GT               = r'>'
782632Sstever@eecs.umich.edut_LE               = r'<='
792632Sstever@eecs.umich.edut_GE               = r'>='
802632Sstever@eecs.umich.edut_EQ               = r'=='
812632Sstever@eecs.umich.edut_NE               = r'!='
822632Sstever@eecs.umich.edu
832632Sstever@eecs.umich.edu# Assignment operators
842632Sstever@eecs.umich.edu
852632Sstever@eecs.umich.edut_EQUALS           = r'='
862632Sstever@eecs.umich.edut_TIMESEQUAL       = r'\*='
872632Sstever@eecs.umich.edut_DIVEQUAL         = r'/='
882632Sstever@eecs.umich.edut_MODEQUAL         = r'%='
892632Sstever@eecs.umich.edut_PLUSEQUAL        = r'\+='
902632Sstever@eecs.umich.edut_MINUSEQUAL       = r'-='
912632Sstever@eecs.umich.edut_LSHIFTEQUAL      = r'<<='
922632Sstever@eecs.umich.edut_RSHIFTEQUAL      = r'>>='
932632Sstever@eecs.umich.edut_ANDEQUAL         = r'&='
942632Sstever@eecs.umich.edut_OREQUAL          = r'\|='
952632Sstever@eecs.umich.edut_XOREQUAL         = r'^='
962632Sstever@eecs.umich.edu
972632Sstever@eecs.umich.edu# Increment/decrement
982632Sstever@eecs.umich.edut_PLUSPLUS         = r'\+\+'
992632Sstever@eecs.umich.edut_MINUSMINUS       = r'--'
1002632Sstever@eecs.umich.edu
1012632Sstever@eecs.umich.edu# ->
1022632Sstever@eecs.umich.edut_ARROW            = r'->'
1032632Sstever@eecs.umich.edu
1042632Sstever@eecs.umich.edu# ?
1052632Sstever@eecs.umich.edut_CONDOP           = r'\?'
1062632Sstever@eecs.umich.edu
1072632Sstever@eecs.umich.edu# Delimeters
1082632Sstever@eecs.umich.edut_LPAREN           = r'\('
1092632Sstever@eecs.umich.edut_RPAREN           = r'\)'
1102632Sstever@eecs.umich.edut_LBRACKET         = r'\['
1112632Sstever@eecs.umich.edut_RBRACKET         = r'\]'
1122632Sstever@eecs.umich.edut_LBRACE           = r'\{'
1132632Sstever@eecs.umich.edut_RBRACE           = r'\}'
1142632Sstever@eecs.umich.edut_COMMA            = r','
1152632Sstever@eecs.umich.edut_PERIOD           = r'\.'
1162632Sstever@eecs.umich.edut_SEMI             = r';'
1172632Sstever@eecs.umich.edut_COLON            = r':'
1182632Sstever@eecs.umich.edut_ELLIPSIS         = r'\.\.\.'
1192632Sstever@eecs.umich.edu
1202632Sstever@eecs.umich.edu# Identifiers and reserved words
1212632Sstever@eecs.umich.edu
1222632Sstever@eecs.umich.edureserved_map = { }
1232632Sstever@eecs.umich.edufor r in reserved:
1242632Sstever@eecs.umich.edu    reserved_map[r.lower()] = r
1252632Sstever@eecs.umich.edu
1262632Sstever@eecs.umich.edudef t_ID(t):
1272632Sstever@eecs.umich.edu    r'[A-Za-z_][\w_]*'
1282632Sstever@eecs.umich.edu    t.type = reserved_map.get(t.value,"ID")
1292632Sstever@eecs.umich.edu    return t
1302632Sstever@eecs.umich.edu
1312632Sstever@eecs.umich.edu# Integer literal
1322632Sstever@eecs.umich.edut_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
1332632Sstever@eecs.umich.edu
1342632Sstever@eecs.umich.edu# Floating literal
1352632Sstever@eecs.umich.edut_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
1362632Sstever@eecs.umich.edu
1372632Sstever@eecs.umich.edu# String literal
1382632Sstever@eecs.umich.edut_SCONST = r'\"([^\\\n]|(\\.))*?\"'
1392632Sstever@eecs.umich.edu
1402632Sstever@eecs.umich.edu# Character constant 'c' or L'c'
1412632Sstever@eecs.umich.edut_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\''
1422632Sstever@eecs.umich.edu
1432632Sstever@eecs.umich.edu# Comments
1442632Sstever@eecs.umich.edudef t_comment(t):
1456498Snate@binkert.org    r'/\*(.|\n)*?\*/'
1466498Snate@binkert.org    t.lexer.lineno += t.value.count('\n')
1472632Sstever@eecs.umich.edu
1482632Sstever@eecs.umich.edu# Preprocessor directive (ignored)
1492632Sstever@eecs.umich.edudef t_preprocessor(t):
1502632Sstever@eecs.umich.edu    r'\#(.)*?\n'
1516498Snate@binkert.org    t.lexer.lineno += 1
1526498Snate@binkert.org
1532632Sstever@eecs.umich.edudef t_error(t):
1546498Snate@binkert.org    print("Illegal character %s" % repr(t.value[0]))
1554479Sbinkertn@umich.edu    t.lexer.skip(1)
1566498Snate@binkert.org
1572632Sstever@eecs.umich.edulexer = lex.lex(optimize=1)
1582632Sstever@eecs.umich.eduif __name__ == "__main__":
1592632Sstever@eecs.umich.edu    lex.runmain(lexer)
1602632Sstever@eecs.umich.edu
1616498Snate@binkert.org
1622632Sstever@eecs.umich.edu
1632632Sstever@eecs.umich.edu
1642632Sstever@eecs.umich.edu
165