1# ---------------------------------------------------------------------- 2# ctokens.py 3# 4# Token specifications for symbols in ANSI C and C++. This file is 5# meant to be used as a library in other tokenizers. 6# ---------------------------------------------------------------------- 7 8# Reserved words 9 10tokens = [ 11 # Literals (identifier, integer constant, float constant, string constant, char const) 12 'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST', 13 14 # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) 15 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', 16 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', 17 'LOR', 'LAND', 'LNOT', 18 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', 19 20 # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) 21 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', 22 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', 23 24 # Increment/decrement (++,--) 25 'PLUSPLUS', 'MINUSMINUS', 26 27 # Structure dereference (->) 28 'ARROW', 29 30 # Ternary operator (?) 31 'TERNARY', 32 33 # Delimeters ( ) [ ] { } , . ; : 34 'LPAREN', 'RPAREN', 35 'LBRACKET', 'RBRACKET', 36 'LBRACE', 'RBRACE', 37 'COMMA', 'PERIOD', 'SEMI', 'COLON', 38 39 # Ellipsis (...) 40 'ELLIPSIS', 41] 42 43# Operators 44t_PLUS = r'\+' 45t_MINUS = r'-' 46t_TIMES = r'\*' 47t_DIVIDE = r'/' 48t_MODULO = r'%' 49t_OR = r'\|' 50t_AND = r'&' 51t_NOT = r'~' 52t_XOR = r'\^' 53t_LSHIFT = r'<<' 54t_RSHIFT = r'>>' 55t_LOR = r'\|\|' 56t_LAND = r'&&' 57t_LNOT = r'!' 58t_LT = r'<' 59t_GT = r'>' 60t_LE = r'<=' 61t_GE = r'>=' 62t_EQ = r'==' 63t_NE = r'!=' 64 65# Assignment operators 66 67t_EQUALS = r'=' 68t_TIMESEQUAL = r'\*=' 69t_DIVEQUAL = r'/=' 70t_MODEQUAL = r'%=' 71t_PLUSEQUAL = r'\+=' 72t_MINUSEQUAL = r'-=' 73t_LSHIFTEQUAL = r'<<=' 74t_RSHIFTEQUAL = r'>>=' 75t_ANDEQUAL = r'&=' 76t_OREQUAL = r'\|=' 77t_XOREQUAL = r'^=' 78 79# Increment/decrement 80t_INCREMENT = r'\+\+' 81t_DECREMENT = r'--' 82 83# -> 84t_ARROW = r'->' 85 86# ? 87t_TERNARY = r'\?' 88 89# Delimeters 90t_LPAREN = r'\(' 91t_RPAREN = r'\)' 92t_LBRACKET = r'\[' 93t_RBRACKET = r'\]' 94t_LBRACE = r'\{' 95t_RBRACE = r'\}' 96t_COMMA = r',' 97t_PERIOD = r'\.' 98t_SEMI = r';' 99t_COLON = r':' 100t_ELLIPSIS = r'\.\.\.' 101 102# Identifiers 103t_ID = r'[A-Za-z_][A-Za-z0-9_]*' 104 105# Integer literal 106t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?' 107 108# Floating literal 109t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' 110 111# String literal 112t_STRING = r'\"([^\\\n]|(\\.))*?\"' 113 114# Character constant 'c' or L'c' 115t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\'' 116 117# Comment (C-Style) 118def t_COMMENT(t): 119 r'/\*(.|\n)*?\*/' 120 t.lexer.lineno += t.value.count('\n') 121 return t 122 123# Comment (C++-Style) 124def t_CPPCOMMENT(t): 125 r'//.*\n' 126 t.lexer.lineno += 1 127 return t 128 129 130 131 132 133 134