grammar.py revision 6501
1# Copyright (c) 2006-2009 Nathan Binkert <nate@binkert.org>
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met: redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer;
8# redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution;
11# neither the name of the copyright holders nor the names of its
12# contributors may be used to endorse or promote products derived from
13# this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27from ply import lex, yacc
28
29class TokenError(lex.LexError):
30    def __init__(self, msg, t):
31        super(TokenError, self).__init__(msg)
32        self.token = t
33
34class ParseError(yacc.YaccError):
35    def __init__(self, message, token=None):
36        super(ParseError, self).__init__(message)
37        self.token = token
38
39class Tokenizer(object):
40    def __init__(self, lexer, data):
41        if isinstance(data, basestring):
42            indata = [ data ]
43        elif isinstance(data, file):
44            indata = data.xreadlines()
45        else:
46            indata = data
47
48        def _input():
49            for i,line in enumerate(indata):
50                lexer.lineno = i + 1
51                lexer.input(line)
52                while True:
53                    tok = lexer.token()
54                    if not tok:
55                        break
56                    yield tok
57        self.input = _input()
58
59    def next(self):
60        return self.input.next()
61
62    def __iter__(self):
63        return self
64
65    def token(self):
66        try:
67            return self.next()
68        except StopIteration:
69            return None
70
71class Grammar(object):
72    def __init__(self, output=None, debug=False):
73        self.yacc_args = {}
74        self.yacc_args['debug'] = debug
75
76        if output:
77            import os
78
79            dir,tab = os.path.split(output)
80            if not tab.endswith('.py'):
81                raise AttributeError, 'The output file must end with .py'
82            self.yacc_args['outputdir'] = dir
83            self.yacc_args['tabmodule'] = tab[:-3]
84
85    def t_error(self, t):
86        raise lex.LexError("Illegal character %s @ %d:%d" % \
87              (`t.value[0]`, t.lineno, t.lexpos), `t.value[0]`)
88
89    def p_error(self, t):
90        if t:
91            msg = "Syntax error at %d:%d\n>>%s<<" % \
92                  (t.lineno, t.lexpos + 1, t.value)
93        else:
94            msg = "Syntax error at end of input"
95        raise ParseError(msg, t)
96
97    def __getattr__(self, attr):
98        if attr == 'parser':
99            import ply.yacc
100            parser = ply.yacc.yacc(module=self, **self.yacc_args)
101            self.parser = parser
102            return parser
103
104        if attr == 'lexer':
105            import ply.lex
106            lexer = ply.lex.lex(module=self)
107            self.lexer = lexer
108            return lexer
109
110        raise AttributeError, "'%s' object has no attribute '%s'" % \
111              (self.__class__.__name__, attr)
112
113    def parse(self, stmt, **kwargs):
114        self.lexer.lineno = 1
115        result = self.parser.parse(lexer=Tokenizer(self.lexer, stmt), **kwargs)
116        self.parser.restart()
117
118        return result
119
120