1# parser for Unix yacc-based grammars 2# 3# Author: David Beazley (dave@dabeaz.com) 4# Date : October 2, 2006 5 6import ylex 7tokens = ylex.tokens 8 9from ply import * 10 11tokenlist = [] 12preclist = [] 13 14emit_code = 1 15 16def p_yacc(p): 17 '''yacc : defsection rulesection''' 18 19def p_defsection(p): 20 '''defsection : definitions SECTION 21 | SECTION''' 22 p.lexer.lastsection = 1 23 print "tokens = ", repr(tokenlist) 24 print 25 print "precedence = ", repr(preclist) 26 print 27 print "# -------------- RULES ----------------" 28 print 29 30def p_rulesection(p): 31 '''rulesection : rules SECTION''' 32 33 print "# -------------- RULES END ----------------" 34 print_code(p[2],0) 35 36def p_definitions(p): 37 '''definitions : definitions definition 38 | definition''' 39 40def p_definition_literal(p): 41 '''definition : LITERAL''' 42 print_code(p[1],0) 43 44def p_definition_start(p): 45 '''definition : START ID''' 46 print "start = '%s'" % p[2] 47 48def p_definition_token(p): 49 '''definition : toktype opttype idlist optsemi ''' 50 for i in p[3]: 51 if i[0] not in "'\"": 52 tokenlist.append(i) 53 if p[1] == '%left': 54 preclist.append(('left',) + tuple(p[3])) 55 elif p[1] == '%right': 56 preclist.append(('right',) + tuple(p[3])) 57 elif p[1] == '%nonassoc': 58 preclist.append(('nonassoc',)+ tuple(p[3])) 59 60def p_toktype(p): 61 '''toktype : TOKEN 62 | LEFT 63 | RIGHT 64 | NONASSOC''' 65 p[0] = p[1] 66 67def p_opttype(p): 68 '''opttype : '<' ID '>' 69 | empty''' 70 71def p_idlist(p): 72 '''idlist : idlist optcomma tokenid 73 | tokenid''' 74 if len(p) == 2: 75 p[0] = [p[1]] 76 else: 77 p[0] = p[1] 78 p[1].append(p[3]) 79 80def p_tokenid(p): 81 '''tokenid : ID 82 | ID NUMBER 83 | QLITERAL 84 | QLITERAL NUMBER''' 85 p[0] = p[1] 86 87def p_optsemi(p): 88 '''optsemi : ';' 89 | empty''' 90 91def p_optcomma(p): 92 '''optcomma : ',' 93 | empty''' 94 95def p_definition_type(p): 96 '''definition : TYPE '<' ID '>' namelist optsemi''' 97 # type declarations are ignored 98 99def p_namelist(p): 100 '''namelist : namelist optcomma ID 101 | ID''' 102 103def p_definition_union(p): 104 '''definition : UNION CODE optsemi''' 105 # Union declarations are ignored 106 107def p_rules(p): 108 '''rules : rules rule 109 | rule''' 110 if len(p) == 2: 111 rule = p[1] 112 else: 113 rule = p[2] 114 115 # Print out a Python equivalent of this rule 116 117 embedded = [ ] # Embedded actions (a mess) 118 embed_count = 0 119 120 rulename = rule[0] 121 rulecount = 1 122 for r in rule[1]: 123 # r contains one of the rule possibilities 124 print "def p_%s_%d(p):" % (rulename,rulecount) 125 prod = [] 126 prodcode = "" 127 for i in range(len(r)): 128 item = r[i] 129 if item[0] == '{': # A code block 130 if i == len(r) - 1: 131 prodcode = item 132 break 133 else: 134 # an embedded action 135 embed_name = "_embed%d_%s" % (embed_count,rulename) 136 prod.append(embed_name) 137 embedded.append((embed_name,item)) 138 embed_count += 1 139 else: 140 prod.append(item) 141 print " '''%s : %s'''" % (rulename, " ".join(prod)) 142 # Emit code 143 print_code(prodcode,4) 144 print 145 rulecount += 1 146 147 for e,code in embedded: 148 print "def p_%s(p):" % e 149 print " '''%s : '''" % e 150 print_code(code,4) 151 print 152 153def p_rule(p): 154 '''rule : ID ':' rulelist ';' ''' 155 p[0] = (p[1],[p[3]]) 156 157def p_rule2(p): 158 '''rule : ID ':' rulelist morerules ';' ''' 159 p[4].insert(0,p[3]) 160 p[0] = (p[1],p[4]) 161 162def p_rule_empty(p): 163 '''rule : ID ':' ';' ''' 164 p[0] = (p[1],[[]]) 165 166def p_rule_empty2(p): 167 '''rule : ID ':' morerules ';' ''' 168 169 p[3].insert(0,[]) 170 p[0] = (p[1],p[3]) 171 172def p_morerules(p): 173 '''morerules : morerules '|' rulelist 174 | '|' rulelist 175 | '|' ''' 176 177 if len(p) == 2: 178 p[0] = [[]] 179 elif len(p) == 3: 180 p[0] = [p[2]] 181 else: 182 p[0] = p[1] 183 p[0].append(p[3]) 184 185# print "morerules", len(p), p[0] 186 187def p_rulelist(p): 188 '''rulelist : rulelist ruleitem 189 | ruleitem''' 190 191 if len(p) == 2: 192 p[0] = [p[1]] 193 else: 194 p[0] = p[1] 195 p[1].append(p[2]) 196 197def p_ruleitem(p): 198 '''ruleitem : ID 199 | QLITERAL 200 | CODE 201 | PREC''' 202 p[0] = p[1] 203 204def p_empty(p): 205 '''empty : ''' 206 207def p_error(p): 208 pass 209 210yacc.yacc(debug=0) 211 212def print_code(code,indent): 213 if not emit_code: return 214 codelines = code.splitlines() 215 for c in codelines: 216 print "%s# %s" % (" "*indent,c) 217 218