1# ----------------------------------------------------------------------
2# ctokens.py
3#
4# Token specifications for symbols in ANSI C and C++.  This file is
5# meant to be used as a library in other tokenizers.
6# ----------------------------------------------------------------------
7
8# Reserved words
9
10tokens = [
11    # Literals (identifier, integer constant, float constant, string constant, char const)
12    'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST',
13
14    # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
15    'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
16    'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
17    'LOR', 'LAND', 'LNOT',
18    'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
19
20    # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
21    'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
22    'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
23
24    # Increment/decrement (++,--)
25    'PLUSPLUS', 'MINUSMINUS',
26
27    # Structure dereference (->)
28    'ARROW',
29
30    # Ternary operator (?)
31    'TERNARY',
32
33    # Delimeters ( ) [ ] { } , . ; :
34    'LPAREN', 'RPAREN',
35    'LBRACKET', 'RBRACKET',
36    'LBRACE', 'RBRACE',
37    'COMMA', 'PERIOD', 'SEMI', 'COLON',
38
39    # Ellipsis (...)
40    'ELLIPSIS',
41]
42
43# Operators
44t_PLUS             = r'\+'
45t_MINUS            = r'-'
46t_TIMES            = r'\*'
47t_DIVIDE           = r'/'
48t_MODULO           = r'%'
49t_OR               = r'\|'
50t_AND              = r'&'
51t_NOT              = r'~'
52t_XOR              = r'\^'
53t_LSHIFT           = r'<<'
54t_RSHIFT           = r'>>'
55t_LOR              = r'\|\|'
56t_LAND             = r'&&'
57t_LNOT             = r'!'
58t_LT               = r'<'
59t_GT               = r'>'
60t_LE               = r'<='
61t_GE               = r'>='
62t_EQ               = r'=='
63t_NE               = r'!='
64
65# Assignment operators
66
67t_EQUALS           = r'='
68t_TIMESEQUAL       = r'\*='
69t_DIVEQUAL         = r'/='
70t_MODEQUAL         = r'%='
71t_PLUSEQUAL        = r'\+='
72t_MINUSEQUAL       = r'-='
73t_LSHIFTEQUAL      = r'<<='
74t_RSHIFTEQUAL      = r'>>='
75t_ANDEQUAL         = r'&='
76t_OREQUAL          = r'\|='
77t_XOREQUAL         = r'^='
78
79# Increment/decrement
80t_INCREMENT        = r'\+\+'
81t_DECREMENT        = r'--'
82
83# ->
84t_ARROW            = r'->'
85
86# ?
87t_TERNARY          = r'\?'
88
89# Delimeters
90t_LPAREN           = r'\('
91t_RPAREN           = r'\)'
92t_LBRACKET         = r'\['
93t_RBRACKET         = r'\]'
94t_LBRACE           = r'\{'
95t_RBRACE           = r'\}'
96t_COMMA            = r','
97t_PERIOD           = r'\.'
98t_SEMI             = r';'
99t_COLON            = r':'
100t_ELLIPSIS         = r'\.\.\.'
101
102# Identifiers
103t_ID = r'[A-Za-z_][A-Za-z0-9_]*'
104
105# Integer literal
106t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
107
108# Floating literal
109t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
110
111# String literal
112t_STRING = r'\"([^\\\n]|(\\.))*?\"'
113
114# Character constant 'c' or L'c'
115t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\''
116
117# Comment (C-Style)
118def t_COMMENT(t):
119    r'/\*(.|\n)*?\*/'
120    t.lexer.lineno += t.value.count('\n')
121    return t
122
123# Comment (C++-Style)
124def t_CPPCOMMENT(t):
125    r'//.*\n'
126    t.lexer.lineno += 1
127    return t
128
129
130
131
132
133
134