clex.py revision 2632:1bb2f91485ea
1# ---------------------------------------------------------------------- 2# clex.py 3# 4# A lexer for ANSI C. 5# ---------------------------------------------------------------------- 6 7import lex 8 9# Reserved words 10reserved = ( 11 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', 12 'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER', 13 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF', 14 'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE', 15 ) 16 17tokens = reserved + ( 18 # Literals (identifier, integer constant, float constant, string constant, char const) 19 'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST', 20 21 # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) 22 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', 23 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', 24 'LOR', 'LAND', 'LNOT', 25 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', 26 27 # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) 28 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', 29 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', 30 31 # Increment/decrement (++,--) 32 'PLUSPLUS', 'MINUSMINUS', 33 34 # Structure dereference (->) 35 'ARROW', 36 37 # Conditional operator (?) 38 'CONDOP', 39 40 # Delimeters ( ) [ ] { } , . ; : 41 'LPAREN', 'RPAREN', 42 'LBRACKET', 'RBRACKET', 43 'LBRACE', 'RBRACE', 44 'COMMA', 'PERIOD', 'SEMI', 'COLON', 45 46 # Ellipsis (...) 47 'ELLIPSIS', 48 ) 49 50# Completely ignored characters 51t_ignore = ' \t\x0c' 52 53# Newlines 54def t_NEWLINE(t): 55 r'\n+' 56 t.lineno += t.value.count("\n") 57 58# Operators 59t_PLUS = r'\+' 60t_MINUS = r'-' 61t_TIMES = r'\*' 62t_DIVIDE = r'/' 63t_MOD = r'%' 64t_OR = r'\|' 65t_AND = r'&' 66t_NOT = r'~' 67t_XOR = r'^' 68t_LSHIFT = r'<<' 69t_RSHIFT = r'>>' 70t_LOR = r'\|\|' 71t_LAND = r'&&' 72t_LNOT = r'!' 73t_LT = r'<' 74t_GT = r'>' 75t_LE = r'<=' 76t_GE = r'>=' 77t_EQ = r'==' 78t_NE = r'!=' 79 80# Assignment operators 81 82t_EQUALS = r'=' 83t_TIMESEQUAL = r'\*=' 84t_DIVEQUAL = r'/=' 85t_MODEQUAL = r'%=' 86t_PLUSEQUAL = r'\+=' 87t_MINUSEQUAL = r'-=' 88t_LSHIFTEQUAL = r'<<=' 89t_RSHIFTEQUAL = r'>>=' 90t_ANDEQUAL = r'&=' 91t_OREQUAL = r'\|=' 92t_XOREQUAL = r'^=' 93 94# Increment/decrement 95t_PLUSPLUS = r'\+\+' 96t_MINUSMINUS = r'--' 97 98# -> 99t_ARROW = r'->' 100 101# ? 102t_CONDOP = r'\?' 103 104# Delimeters 105t_LPAREN = r'\(' 106t_RPAREN = r'\)' 107t_LBRACKET = r'\[' 108t_RBRACKET = r'\]' 109t_LBRACE = r'\{' 110t_RBRACE = r'\}' 111t_COMMA = r',' 112t_PERIOD = r'\.' 113t_SEMI = r';' 114t_COLON = r':' 115t_ELLIPSIS = r'\.\.\.' 116 117# Identifiers and reserved words 118 119reserved_map = { } 120for r in reserved: 121 reserved_map[r.lower()] = r 122 123def t_ID(t): 124 r'[A-Za-z_][\w_]*' 125 t.type = reserved_map.get(t.value,"ID") 126 return t 127 128# Integer literal 129t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?' 130 131# Floating literal 132t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' 133 134# String literal 135t_SCONST = r'\"([^\\\n]|(\\.))*?\"' 136 137# Character constant 'c' or L'c' 138t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\'' 139 140# Comments 141def t_comment(t): 142 r' /\*(.|\n)*?\*/' 143 t.lineno += t.value.count('\n') 144 145# Preprocessor directive (ignored) 146def t_preprocessor(t): 147 r'\#(.)*?\n' 148 t.lineno += 1 149 150def t_error(t): 151 print "Illegal character %s" % repr(t.value[0]) 152 t.skip(1) 153 154lexer = lex.lex(optimize=1) 155if __name__ == "__main__": 156 lex.runmain(lexer) 157 158 159 160 161 162