clex.py revision 6498
1# ----------------------------------------------------------------------
2# clex.py
3#
4# A lexer for ANSI C.
5# ----------------------------------------------------------------------
6
7import sys
8sys.path.insert(0,"../..")
9
10import ply.lex as lex
11
12# Reserved words
13reserved = (
14    'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE',
15    'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER',
16    'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF',
17    'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE',
18    )
19
20tokens = reserved + (
21    # Literals (identifier, integer constant, float constant, string constant, char const)
22    'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST',
23
24    # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
25    'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
26    'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
27    'LOR', 'LAND', 'LNOT',
28    'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
29
30    # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
31    'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
32    'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
33
34    # Increment/decrement (++,--)
35    'PLUSPLUS', 'MINUSMINUS',
36
37    # Structure dereference (->)
38    'ARROW',
39
40    # Conditional operator (?)
41    'CONDOP',
42
43    # Delimeters ( ) [ ] { } , . ; :
44    'LPAREN', 'RPAREN',
45    'LBRACKET', 'RBRACKET',
46    'LBRACE', 'RBRACE',
47    'COMMA', 'PERIOD', 'SEMI', 'COLON',
48
49    # Ellipsis (...)
50    'ELLIPSIS',
51    )
52
53# Completely ignored characters
54t_ignore           = ' \t\x0c'
55
56# Newlines
57def t_NEWLINE(t):
58    r'\n+'
59    t.lexer.lineno += t.value.count("\n")
60
61# Operators
62t_PLUS             = r'\+'
63t_MINUS            = r'-'
64t_TIMES            = r'\*'
65t_DIVIDE           = r'/'
66t_MOD              = r'%'
67t_OR               = r'\|'
68t_AND              = r'&'
69t_NOT              = r'~'
70t_XOR              = r'\^'
71t_LSHIFT           = r'<<'
72t_RSHIFT           = r'>>'
73t_LOR              = r'\|\|'
74t_LAND             = r'&&'
75t_LNOT             = r'!'
76t_LT               = r'<'
77t_GT               = r'>'
78t_LE               = r'<='
79t_GE               = r'>='
80t_EQ               = r'=='
81t_NE               = r'!='
82
83# Assignment operators
84
85t_EQUALS           = r'='
86t_TIMESEQUAL       = r'\*='
87t_DIVEQUAL         = r'/='
88t_MODEQUAL         = r'%='
89t_PLUSEQUAL        = r'\+='
90t_MINUSEQUAL       = r'-='
91t_LSHIFTEQUAL      = r'<<='
92t_RSHIFTEQUAL      = r'>>='
93t_ANDEQUAL         = r'&='
94t_OREQUAL          = r'\|='
95t_XOREQUAL         = r'^='
96
97# Increment/decrement
98t_PLUSPLUS         = r'\+\+'
99t_MINUSMINUS       = r'--'
100
101# ->
102t_ARROW            = r'->'
103
104# ?
105t_CONDOP           = r'\?'
106
107# Delimeters
108t_LPAREN           = r'\('
109t_RPAREN           = r'\)'
110t_LBRACKET         = r'\['
111t_RBRACKET         = r'\]'
112t_LBRACE           = r'\{'
113t_RBRACE           = r'\}'
114t_COMMA            = r','
115t_PERIOD           = r'\.'
116t_SEMI             = r';'
117t_COLON            = r':'
118t_ELLIPSIS         = r'\.\.\.'
119
120# Identifiers and reserved words
121
122reserved_map = { }
123for r in reserved:
124    reserved_map[r.lower()] = r
125
126def t_ID(t):
127    r'[A-Za-z_][\w_]*'
128    t.type = reserved_map.get(t.value,"ID")
129    return t
130
131# Integer literal
132t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
133
134# Floating literal
135t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
136
137# String literal
138t_SCONST = r'\"([^\\\n]|(\\.))*?\"'
139
140# Character constant 'c' or L'c'
141t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\''
142
143# Comments
144def t_comment(t):
145    r'/\*(.|\n)*?\*/'
146    t.lexer.lineno += t.value.count('\n')
147
148# Preprocessor directive (ignored)
149def t_preprocessor(t):
150    r'\#(.)*?\n'
151    t.lexer.lineno += 1
152
153def t_error(t):
154    print("Illegal character %s" % repr(t.value[0]))
155    t.lexer.skip(1)
156
157lexer = lex.lex(optimize=1)
158if __name__ == "__main__":
159    lex.runmain(lexer)
160
161
162
163
164
165