clex.py revision 2632:1bb2f91485ea
1# ----------------------------------------------------------------------
2# clex.py
3#
4# A lexer for ANSI C.
5# ----------------------------------------------------------------------
6
7import lex
8
9# Reserved words
10reserved = (
11    'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE',
12    'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER',
13    'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF',
14    'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE',
15    )
16
17tokens = reserved + (
18    # Literals (identifier, integer constant, float constant, string constant, char const)
19    'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST',
20
21    # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
22    'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
23    'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
24    'LOR', 'LAND', 'LNOT',
25    'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
26
27    # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
28    'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
29    'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
30
31    # Increment/decrement (++,--)
32    'PLUSPLUS', 'MINUSMINUS',
33
34    # Structure dereference (->)
35    'ARROW',
36
37    # Conditional operator (?)
38    'CONDOP',
39
40    # Delimeters ( ) [ ] { } , . ; :
41    'LPAREN', 'RPAREN',
42    'LBRACKET', 'RBRACKET',
43    'LBRACE', 'RBRACE',
44    'COMMA', 'PERIOD', 'SEMI', 'COLON',
45
46    # Ellipsis (...)
47    'ELLIPSIS',
48    )
49
50# Completely ignored characters
51t_ignore           = ' \t\x0c'
52
53# Newlines
54def t_NEWLINE(t):
55    r'\n+'
56    t.lineno += t.value.count("\n")
57
58# Operators
59t_PLUS             = r'\+'
60t_MINUS            = r'-'
61t_TIMES            = r'\*'
62t_DIVIDE           = r'/'
63t_MOD              = r'%'
64t_OR               = r'\|'
65t_AND              = r'&'
66t_NOT              = r'~'
67t_XOR              = r'^'
68t_LSHIFT           = r'<<'
69t_RSHIFT           = r'>>'
70t_LOR              = r'\|\|'
71t_LAND             = r'&&'
72t_LNOT             = r'!'
73t_LT               = r'<'
74t_GT               = r'>'
75t_LE               = r'<='
76t_GE               = r'>='
77t_EQ               = r'=='
78t_NE               = r'!='
79
80# Assignment operators
81
82t_EQUALS           = r'='
83t_TIMESEQUAL       = r'\*='
84t_DIVEQUAL         = r'/='
85t_MODEQUAL         = r'%='
86t_PLUSEQUAL        = r'\+='
87t_MINUSEQUAL       = r'-='
88t_LSHIFTEQUAL      = r'<<='
89t_RSHIFTEQUAL      = r'>>='
90t_ANDEQUAL         = r'&='
91t_OREQUAL          = r'\|='
92t_XOREQUAL         = r'^='
93
94# Increment/decrement
95t_PLUSPLUS         = r'\+\+'
96t_MINUSMINUS       = r'--'
97
98# ->
99t_ARROW            = r'->'
100
101# ?
102t_CONDOP           = r'\?'
103
104# Delimeters
105t_LPAREN           = r'\('
106t_RPAREN           = r'\)'
107t_LBRACKET         = r'\['
108t_RBRACKET         = r'\]'
109t_LBRACE           = r'\{'
110t_RBRACE           = r'\}'
111t_COMMA            = r','
112t_PERIOD           = r'\.'
113t_SEMI             = r';'
114t_COLON            = r':'
115t_ELLIPSIS         = r'\.\.\.'
116
117# Identifiers and reserved words
118
119reserved_map = { }
120for r in reserved:
121    reserved_map[r.lower()] = r
122
123def t_ID(t):
124    r'[A-Za-z_][\w_]*'
125    t.type = reserved_map.get(t.value,"ID")
126    return t
127
128# Integer literal
129t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
130
131# Floating literal
132t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
133
134# String literal
135t_SCONST = r'\"([^\\\n]|(\\.))*?\"'
136
137# Character constant 'c' or L'c'
138t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\''
139
140# Comments
141def t_comment(t):
142    r' /\*(.|\n)*?\*/'
143    t.lineno += t.value.count('\n')
144
145# Preprocessor directive (ignored)
146def t_preprocessor(t):
147    r'\#(.)*?\n'
148    t.lineno += 1
149
150def t_error(t):
151    print "Illegal character %s" % repr(t.value[0])
152    t.skip(1)
153
154lexer = lex.lex(optimize=1)
155if __name__ == "__main__":
156    lex.runmain(lexer)
157
158
159
160
161
162