clex.py revision 6498
12632Sstever@eecs.umich.edu# ---------------------------------------------------------------------- 22632Sstever@eecs.umich.edu# clex.py 32632Sstever@eecs.umich.edu# 42632Sstever@eecs.umich.edu# A lexer for ANSI C. 52632Sstever@eecs.umich.edu# ---------------------------------------------------------------------- 62632Sstever@eecs.umich.edu 74479Sbinkertn@umich.eduimport sys 84479Sbinkertn@umich.edusys.path.insert(0,"../..") 94479Sbinkertn@umich.edu 104479Sbinkertn@umich.eduimport ply.lex as lex 112632Sstever@eecs.umich.edu 122632Sstever@eecs.umich.edu# Reserved words 132632Sstever@eecs.umich.edureserved = ( 142632Sstever@eecs.umich.edu 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', 152632Sstever@eecs.umich.edu 'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER', 162632Sstever@eecs.umich.edu 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF', 172632Sstever@eecs.umich.edu 'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE', 182632Sstever@eecs.umich.edu ) 192632Sstever@eecs.umich.edu 202632Sstever@eecs.umich.edutokens = reserved + ( 212632Sstever@eecs.umich.edu # Literals (identifier, integer constant, float constant, string constant, char const) 222632Sstever@eecs.umich.edu 'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST', 232632Sstever@eecs.umich.edu 242632Sstever@eecs.umich.edu # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) 252632Sstever@eecs.umich.edu 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', 262632Sstever@eecs.umich.edu 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', 272632Sstever@eecs.umich.edu 'LOR', 'LAND', 'LNOT', 282632Sstever@eecs.umich.edu 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', 296498Snate@binkert.org 302632Sstever@eecs.umich.edu # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) 312632Sstever@eecs.umich.edu 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', 322632Sstever@eecs.umich.edu 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', 332632Sstever@eecs.umich.edu 342632Sstever@eecs.umich.edu # Increment/decrement (++,--) 352632Sstever@eecs.umich.edu 'PLUSPLUS', 'MINUSMINUS', 362632Sstever@eecs.umich.edu 372632Sstever@eecs.umich.edu # Structure dereference (->) 382632Sstever@eecs.umich.edu 'ARROW', 392632Sstever@eecs.umich.edu 402632Sstever@eecs.umich.edu # Conditional operator (?) 412632Sstever@eecs.umich.edu 'CONDOP', 426498Snate@binkert.org 432632Sstever@eecs.umich.edu # Delimeters ( ) [ ] { } , . ; : 442632Sstever@eecs.umich.edu 'LPAREN', 'RPAREN', 452632Sstever@eecs.umich.edu 'LBRACKET', 'RBRACKET', 462632Sstever@eecs.umich.edu 'LBRACE', 'RBRACE', 472632Sstever@eecs.umich.edu 'COMMA', 'PERIOD', 'SEMI', 'COLON', 482632Sstever@eecs.umich.edu 492632Sstever@eecs.umich.edu # Ellipsis (...) 502632Sstever@eecs.umich.edu 'ELLIPSIS', 512632Sstever@eecs.umich.edu ) 522632Sstever@eecs.umich.edu 532632Sstever@eecs.umich.edu# Completely ignored characters 542632Sstever@eecs.umich.edut_ignore = ' \t\x0c' 552632Sstever@eecs.umich.edu 562632Sstever@eecs.umich.edu# Newlines 572632Sstever@eecs.umich.edudef t_NEWLINE(t): 582632Sstever@eecs.umich.edu r'\n+' 594479Sbinkertn@umich.edu t.lexer.lineno += t.value.count("\n") 606498Snate@binkert.org 612632Sstever@eecs.umich.edu# Operators 622632Sstever@eecs.umich.edut_PLUS = r'\+' 632632Sstever@eecs.umich.edut_MINUS = r'-' 642632Sstever@eecs.umich.edut_TIMES = r'\*' 652632Sstever@eecs.umich.edut_DIVIDE = r'/' 662632Sstever@eecs.umich.edut_MOD = r'%' 672632Sstever@eecs.umich.edut_OR = r'\|' 682632Sstever@eecs.umich.edut_AND = r'&' 692632Sstever@eecs.umich.edut_NOT = r'~' 704479Sbinkertn@umich.edut_XOR = r'\^' 712632Sstever@eecs.umich.edut_LSHIFT = r'<<' 722632Sstever@eecs.umich.edut_RSHIFT = r'>>' 732632Sstever@eecs.umich.edut_LOR = r'\|\|' 742632Sstever@eecs.umich.edut_LAND = r'&&' 752632Sstever@eecs.umich.edut_LNOT = r'!' 762632Sstever@eecs.umich.edut_LT = r'<' 772632Sstever@eecs.umich.edut_GT = r'>' 782632Sstever@eecs.umich.edut_LE = r'<=' 792632Sstever@eecs.umich.edut_GE = r'>=' 802632Sstever@eecs.umich.edut_EQ = r'==' 812632Sstever@eecs.umich.edut_NE = r'!=' 822632Sstever@eecs.umich.edu 832632Sstever@eecs.umich.edu# Assignment operators 842632Sstever@eecs.umich.edu 852632Sstever@eecs.umich.edut_EQUALS = r'=' 862632Sstever@eecs.umich.edut_TIMESEQUAL = r'\*=' 872632Sstever@eecs.umich.edut_DIVEQUAL = r'/=' 882632Sstever@eecs.umich.edut_MODEQUAL = r'%=' 892632Sstever@eecs.umich.edut_PLUSEQUAL = r'\+=' 902632Sstever@eecs.umich.edut_MINUSEQUAL = r'-=' 912632Sstever@eecs.umich.edut_LSHIFTEQUAL = r'<<=' 922632Sstever@eecs.umich.edut_RSHIFTEQUAL = r'>>=' 932632Sstever@eecs.umich.edut_ANDEQUAL = r'&=' 942632Sstever@eecs.umich.edut_OREQUAL = r'\|=' 952632Sstever@eecs.umich.edut_XOREQUAL = r'^=' 962632Sstever@eecs.umich.edu 972632Sstever@eecs.umich.edu# Increment/decrement 982632Sstever@eecs.umich.edut_PLUSPLUS = r'\+\+' 992632Sstever@eecs.umich.edut_MINUSMINUS = r'--' 1002632Sstever@eecs.umich.edu 1012632Sstever@eecs.umich.edu# -> 1022632Sstever@eecs.umich.edut_ARROW = r'->' 1032632Sstever@eecs.umich.edu 1042632Sstever@eecs.umich.edu# ? 1052632Sstever@eecs.umich.edut_CONDOP = r'\?' 1062632Sstever@eecs.umich.edu 1072632Sstever@eecs.umich.edu# Delimeters 1082632Sstever@eecs.umich.edut_LPAREN = r'\(' 1092632Sstever@eecs.umich.edut_RPAREN = r'\)' 1102632Sstever@eecs.umich.edut_LBRACKET = r'\[' 1112632Sstever@eecs.umich.edut_RBRACKET = r'\]' 1122632Sstever@eecs.umich.edut_LBRACE = r'\{' 1132632Sstever@eecs.umich.edut_RBRACE = r'\}' 1142632Sstever@eecs.umich.edut_COMMA = r',' 1152632Sstever@eecs.umich.edut_PERIOD = r'\.' 1162632Sstever@eecs.umich.edut_SEMI = r';' 1172632Sstever@eecs.umich.edut_COLON = r':' 1182632Sstever@eecs.umich.edut_ELLIPSIS = r'\.\.\.' 1192632Sstever@eecs.umich.edu 1202632Sstever@eecs.umich.edu# Identifiers and reserved words 1212632Sstever@eecs.umich.edu 1222632Sstever@eecs.umich.edureserved_map = { } 1232632Sstever@eecs.umich.edufor r in reserved: 1242632Sstever@eecs.umich.edu reserved_map[r.lower()] = r 1252632Sstever@eecs.umich.edu 1262632Sstever@eecs.umich.edudef t_ID(t): 1272632Sstever@eecs.umich.edu r'[A-Za-z_][\w_]*' 1282632Sstever@eecs.umich.edu t.type = reserved_map.get(t.value,"ID") 1292632Sstever@eecs.umich.edu return t 1302632Sstever@eecs.umich.edu 1312632Sstever@eecs.umich.edu# Integer literal 1322632Sstever@eecs.umich.edut_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?' 1332632Sstever@eecs.umich.edu 1342632Sstever@eecs.umich.edu# Floating literal 1352632Sstever@eecs.umich.edut_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' 1362632Sstever@eecs.umich.edu 1372632Sstever@eecs.umich.edu# String literal 1382632Sstever@eecs.umich.edut_SCONST = r'\"([^\\\n]|(\\.))*?\"' 1392632Sstever@eecs.umich.edu 1402632Sstever@eecs.umich.edu# Character constant 'c' or L'c' 1412632Sstever@eecs.umich.edut_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\'' 1422632Sstever@eecs.umich.edu 1432632Sstever@eecs.umich.edu# Comments 1442632Sstever@eecs.umich.edudef t_comment(t): 1456498Snate@binkert.org r'/\*(.|\n)*?\*/' 1466498Snate@binkert.org t.lexer.lineno += t.value.count('\n') 1472632Sstever@eecs.umich.edu 1482632Sstever@eecs.umich.edu# Preprocessor directive (ignored) 1492632Sstever@eecs.umich.edudef t_preprocessor(t): 1502632Sstever@eecs.umich.edu r'\#(.)*?\n' 1516498Snate@binkert.org t.lexer.lineno += 1 1526498Snate@binkert.org 1532632Sstever@eecs.umich.edudef t_error(t): 1546498Snate@binkert.org print("Illegal character %s" % repr(t.value[0])) 1554479Sbinkertn@umich.edu t.lexer.skip(1) 1566498Snate@binkert.org 1572632Sstever@eecs.umich.edulexer = lex.lex(optimize=1) 1582632Sstever@eecs.umich.eduif __name__ == "__main__": 1592632Sstever@eecs.umich.edu lex.runmain(lexer) 1602632Sstever@eecs.umich.edu 1616498Snate@binkert.org 1622632Sstever@eecs.umich.edu 1632632Sstever@eecs.umich.edu 1642632Sstever@eecs.umich.edu 165