1# ---------------------------------------------------------------------- 2# clex.py 3# 4# A lexer for ANSI C. 5# ---------------------------------------------------------------------- 6 7import sys 8sys.path.insert(0,"../..") 9 10import ply.lex as lex 11 12# Reserved words 13reserved = ( 14 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', 15 'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER', 16 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF', 17 'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE', 18 ) 19 20tokens = reserved + ( 21 # Literals (identifier, integer constant, float constant, string constant, char const) 22 'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST', 23 24 # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) 25 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', 26 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', 27 'LOR', 'LAND', 'LNOT', 28 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', 29 30 # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) 31 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', 32 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', 33 34 # Increment/decrement (++,--) 35 'PLUSPLUS', 'MINUSMINUS', 36 37 # Structure dereference (->) 38 'ARROW', 39 40 # Conditional operator (?) 41 'CONDOP', 42 43 # Delimeters ( ) [ ] { } , . ; : 44 'LPAREN', 'RPAREN', 45 'LBRACKET', 'RBRACKET', 46 'LBRACE', 'RBRACE', 47 'COMMA', 'PERIOD', 'SEMI', 'COLON', 48 49 # Ellipsis (...) 50 'ELLIPSIS', 51 ) 52 53# Completely ignored characters 54t_ignore = ' \t\x0c' 55 56# Newlines 57def t_NEWLINE(t): 58 r'\n+' 59 t.lexer.lineno += t.value.count("\n") 60 61# Operators 62t_PLUS = r'\+' 63t_MINUS = r'-' 64t_TIMES = r'\*' 65t_DIVIDE = r'/' 66t_MOD = r'%' 67t_OR = r'\|' 68t_AND = r'&' 69t_NOT = r'~' 70t_XOR = r'\^' 71t_LSHIFT = r'<<' 72t_RSHIFT = r'>>' 73t_LOR = r'\|\|' 74t_LAND = r'&&' 75t_LNOT = r'!' 76t_LT = r'<' 77t_GT = r'>' 78t_LE = r'<=' 79t_GE = r'>=' 80t_EQ = r'==' 81t_NE = r'!=' 82 83# Assignment operators 84 85t_EQUALS = r'=' 86t_TIMESEQUAL = r'\*=' 87t_DIVEQUAL = r'/=' 88t_MODEQUAL = r'%=' 89t_PLUSEQUAL = r'\+=' 90t_MINUSEQUAL = r'-=' 91t_LSHIFTEQUAL = r'<<=' 92t_RSHIFTEQUAL = r'>>=' 93t_ANDEQUAL = r'&=' 94t_OREQUAL = r'\|=' 95t_XOREQUAL = r'^=' 96 97# Increment/decrement 98t_PLUSPLUS = r'\+\+' 99t_MINUSMINUS = r'--' 100 101# -> 102t_ARROW = r'->' 103 104# ? 105t_CONDOP = r'\?' 106 107# Delimeters 108t_LPAREN = r'\(' 109t_RPAREN = r'\)' 110t_LBRACKET = r'\[' 111t_RBRACKET = r'\]' 112t_LBRACE = r'\{' 113t_RBRACE = r'\}' 114t_COMMA = r',' 115t_PERIOD = r'\.' 116t_SEMI = r';' 117t_COLON = r':' 118t_ELLIPSIS = r'\.\.\.' 119 120# Identifiers and reserved words 121 122reserved_map = { } 123for r in reserved: 124 reserved_map[r.lower()] = r 125 126def t_ID(t): 127 r'[A-Za-z_][\w_]*' 128 t.type = reserved_map.get(t.value,"ID") 129 return t 130 131# Integer literal 132t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?' 133 134# Floating literal 135t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' 136 137# String literal 138t_SCONST = r'\"([^\\\n]|(\\.))*?\"' 139 140# Character constant 'c' or L'c' 141t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\'' 142 143# Comments 144def t_comment(t): 145 r'/\*(.|\n)*?\*/' 146 t.lexer.lineno += t.value.count('\n') 147 148# Preprocessor directive (ignored) 149def t_preprocessor(t): 150 r'\#(.)*?\n' 151 t.lexer.lineno += 1 152 153def t_error(t): 154 print("Illegal character %s" % repr(t.value[0])) 155 t.lexer.skip(1) 156 157lexer = lex.lex(optimize=1) 158if __name__ == "__main__": 159 lex.runmain(lexer) 160 161 162 163 164 165