grammar.py revision 6660
1# Copyright (c) 2006-2009 Nathan Binkert <nate@binkert.org>
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met: redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer;
8# redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution;
11# neither the name of the copyright holders nor the names of its
12# contributors may be used to endorse or promote products derived from
13# this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27from ply import lex, yacc
28
29class TokenError(lex.LexError):
30    def __init__(self, msg, t):
31        super(TokenError, self).__init__(msg)
32        self.token = t
33
34class ParseError(yacc.YaccError):
35    def __init__(self, message, token=None):
36        super(ParseError, self).__init__(message)
37        self.token = token
38
39class Tokenizer(object):
40    def __init__(self, lexer, data):
41        if isinstance(data, basestring):
42            indata = [ data ]
43        elif isinstance(data, file):
44            indata = data.xreadlines()
45        else:
46            indata = data
47
48        def _input():
49            for i,line in enumerate(indata):
50                lexer.lineno = i + 1
51                lexer.input(line)
52                while True:
53                    tok = lexer.token()
54                    if not tok:
55                        break
56                    yield tok
57        self.input = _input()
58        self.lexer = lexer
59
60    def next(self):
61        return self.input.next()
62
63    def __iter__(self):
64        return self
65
66    def token(self):
67        try:
68            return self.next()
69        except StopIteration:
70            return None
71
72    def __getattr__(self, attr):
73        return getattr(self.lexer, attr)
74
75class Grammar(object):
76    def __init__(self, output=None, debug=False):
77        self.yacc_args = {}
78        self.yacc_args['debug'] = debug
79
80        if output:
81            import os
82
83            dir,tab = os.path.split(output)
84            if not tab.endswith('.py'):
85                raise AttributeError, 'The output file must end with .py'
86            self.yacc_args['outputdir'] = dir
87            self.yacc_args['tabmodule'] = tab[:-3]
88
89    def t_error(self, t):
90        raise lex.LexError("Illegal character %s @ %d:%d" % \
91              (`t.value[0]`, t.lineno, t.lexpos), `t.value[0]`)
92
93    def p_error(self, t):
94        if t:
95            msg = "Syntax error at %d:%d\n>>%s<<" % \
96                  (t.lineno, t.lexpos + 1, t.value)
97        else:
98            msg = "Syntax error at end of input"
99        raise ParseError(msg, t)
100
101    def __getattr__(self, attr):
102        if attr == 'parser':
103            import ply.yacc
104            parser = ply.yacc.yacc(module=self, **self.yacc_args)
105            self.parser = parser
106            return parser
107
108        if attr == 'lexer':
109            import ply.lex
110            lexer = ply.lex.lex(module=self)
111            self.lexer = lexer
112            return lexer
113
114        raise AttributeError, "'%s' object has no attribute '%s'" % \
115              (self.__class__.__name__, attr)
116
117    def parse(self, stmt, **kwargs):
118        self.lexer.lineno = 1
119        result = self.parser.parse(lexer=Tokenizer(self.lexer, stmt), **kwargs)
120        self.parser.restart()
121
122        return result
123
124