1# parser for Unix yacc-based grammars
2#
3# Author: David Beazley (dave@dabeaz.com)
4# Date  : October 2, 2006
5
6import ylex
7tokens = ylex.tokens
8
9from ply import *
10
11tokenlist = []
12preclist  = []
13
14emit_code = 1
15
16def p_yacc(p):
17    '''yacc : defsection rulesection'''
18
19def p_defsection(p):
20    '''defsection : definitions SECTION
21                  | SECTION'''
22    p.lexer.lastsection = 1
23    print "tokens = ", repr(tokenlist)
24    print
25    print "precedence = ", repr(preclist)
26    print
27    print "# -------------- RULES ----------------"
28    print
29
30def p_rulesection(p):
31    '''rulesection : rules SECTION'''
32
33    print "# -------------- RULES END ----------------"
34    print_code(p[2],0)
35
36def p_definitions(p):
37    '''definitions : definitions definition
38                   | definition'''
39
40def p_definition_literal(p):
41    '''definition : LITERAL'''
42    print_code(p[1],0)
43
44def p_definition_start(p):
45    '''definition : START ID'''
46    print "start = '%s'" % p[2]
47
48def p_definition_token(p):
49    '''definition : toktype opttype idlist optsemi '''
50    for i in p[3]:
51       if i[0] not in "'\"":
52           tokenlist.append(i)
53    if p[1] == '%left':
54        preclist.append(('left',) + tuple(p[3]))
55    elif p[1] == '%right':
56        preclist.append(('right',) + tuple(p[3]))
57    elif p[1] == '%nonassoc':
58        preclist.append(('nonassoc',)+ tuple(p[3]))
59
60def p_toktype(p):
61    '''toktype : TOKEN
62               | LEFT
63               | RIGHT
64               | NONASSOC'''
65    p[0] = p[1]
66
67def p_opttype(p):
68    '''opttype : '<' ID '>'
69               | empty'''
70
71def p_idlist(p):
72    '''idlist  : idlist optcomma tokenid
73               | tokenid'''
74    if len(p) == 2:
75        p[0] = [p[1]]
76    else:
77        p[0] = p[1]
78        p[1].append(p[3])
79
80def p_tokenid(p):
81    '''tokenid : ID
82               | ID NUMBER
83               | QLITERAL
84               | QLITERAL NUMBER'''
85    p[0] = p[1]
86
87def p_optsemi(p):
88    '''optsemi : ';'
89               | empty'''
90
91def p_optcomma(p):
92    '''optcomma : ','
93                | empty'''
94
95def p_definition_type(p):
96    '''definition : TYPE '<' ID '>' namelist optsemi'''
97    # type declarations are ignored
98
99def p_namelist(p):
100    '''namelist : namelist optcomma ID
101                | ID'''
102
103def p_definition_union(p):
104    '''definition : UNION CODE optsemi'''
105    # Union declarations are ignored
106
107def p_rules(p):
108    '''rules   : rules rule
109               | rule'''
110    if len(p) == 2:
111       rule = p[1]
112    else:
113       rule = p[2]
114
115    # Print out a Python equivalent of this rule
116
117    embedded = [ ]      # Embedded actions (a mess)
118    embed_count = 0
119
120    rulename = rule[0]
121    rulecount = 1
122    for r in rule[1]:
123        # r contains one of the rule possibilities
124        print "def p_%s_%d(p):" % (rulename,rulecount)
125        prod = []
126        prodcode = ""
127        for i in range(len(r)):
128             item = r[i]
129             if item[0] == '{':    # A code block
130                  if i == len(r) - 1:
131                      prodcode = item
132                      break
133                  else:
134                      # an embedded action
135                      embed_name = "_embed%d_%s" % (embed_count,rulename)
136                      prod.append(embed_name)
137                      embedded.append((embed_name,item))
138                      embed_count += 1
139             else:
140                  prod.append(item)
141        print "    '''%s : %s'''" % (rulename, " ".join(prod))
142        # Emit code
143        print_code(prodcode,4)
144        print
145        rulecount += 1
146
147    for e,code in embedded:
148        print "def p_%s(p):" % e
149        print "    '''%s : '''" % e
150        print_code(code,4)
151        print
152
153def p_rule(p):
154   '''rule : ID ':' rulelist ';' '''
155   p[0] = (p[1],[p[3]])
156
157def p_rule2(p):
158   '''rule : ID ':' rulelist morerules ';' '''
159   p[4].insert(0,p[3])
160   p[0] = (p[1],p[4])
161
162def p_rule_empty(p):
163   '''rule : ID ':' ';' '''
164   p[0] = (p[1],[[]])
165
166def p_rule_empty2(p):
167   '''rule : ID ':' morerules ';' '''
168
169   p[3].insert(0,[])
170   p[0] = (p[1],p[3])
171
172def p_morerules(p):
173   '''morerules : morerules '|' rulelist
174                | '|' rulelist
175                | '|'  '''
176
177   if len(p) == 2:
178       p[0] = [[]]
179   elif len(p) == 3:
180       p[0] = [p[2]]
181   else:
182       p[0] = p[1]
183       p[0].append(p[3])
184
185#   print "morerules", len(p), p[0]
186
187def p_rulelist(p):
188   '''rulelist : rulelist ruleitem
189               | ruleitem'''
190
191   if len(p) == 2:
192        p[0] = [p[1]]
193   else:
194        p[0] = p[1]
195        p[1].append(p[2])
196
197def p_ruleitem(p):
198   '''ruleitem : ID
199               | QLITERAL
200               | CODE
201               | PREC'''
202   p[0] = p[1]
203
204def p_empty(p):
205    '''empty : '''
206
207def p_error(p):
208    pass
209
210yacc.yacc(debug=0)
211
212def print_code(code,indent):
213    if not emit_code: return
214    codelines = code.splitlines()
215    for c in codelines:
216         print "%s# %s" % (" "*indent,c)
217
218