yacc.py revision 2632
12972Sgblack@eecs.umich.edu#-----------------------------------------------------------------------------
22972Sgblack@eecs.umich.edu# ply: yacc.py
32972Sgblack@eecs.umich.edu#
42972Sgblack@eecs.umich.edu# Author: David M. Beazley (beazley@cs.uchicago.edu)
52972Sgblack@eecs.umich.edu#         Department of Computer Science
62972Sgblack@eecs.umich.edu#         University of Chicago
72972Sgblack@eecs.umich.edu#         Chicago, IL 60637
82972Sgblack@eecs.umich.edu#
92972Sgblack@eecs.umich.edu# Copyright (C) 2001, David M. Beazley
102972Sgblack@eecs.umich.edu#
112972Sgblack@eecs.umich.edu# $Header: /home/stever/bk/newmem2/ext/ply/yacc.py 1.3 03/06/06 14:59:28-00:00 stever@ $
122972Sgblack@eecs.umich.edu#
132972Sgblack@eecs.umich.edu# This library is free software; you can redistribute it and/or
142972Sgblack@eecs.umich.edu# modify it under the terms of the GNU Lesser General Public
152972Sgblack@eecs.umich.edu# License as published by the Free Software Foundation; either
162972Sgblack@eecs.umich.edu# version 2.1 of the License, or (at your option) any later version.
172972Sgblack@eecs.umich.edu#
182972Sgblack@eecs.umich.edu# This library is distributed in the hope that it will be useful,
192972Sgblack@eecs.umich.edu# but WITHOUT ANY WARRANTY; without even the implied warranty of
202972Sgblack@eecs.umich.edu# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
212972Sgblack@eecs.umich.edu# Lesser General Public License for more details.
222972Sgblack@eecs.umich.edu#
232972Sgblack@eecs.umich.edu# You should have received a copy of the GNU Lesser General Public
242972Sgblack@eecs.umich.edu# License along with this library; if not, write to the Free Software
252972Sgblack@eecs.umich.edu# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
262972Sgblack@eecs.umich.edu#
272972Sgblack@eecs.umich.edu# See the file COPYING for a complete copy of the LGPL.
282972Sgblack@eecs.umich.edu#
292972Sgblack@eecs.umich.edu#
302972Sgblack@eecs.umich.edu# This implements an LR parser that is constructed from grammar rules defined
312972Sgblack@eecs.umich.edu# as Python functions.  Roughly speaking, this module is a cross between
322972Sgblack@eecs.umich.edu# John Aycock's Spark system and the GNU bison utility.
332972Sgblack@eecs.umich.edu#
344040Ssaidi@eecs.umich.edu# Disclaimer:  This is a work in progress. SLR parsing seems to work fairly
356215Snate@binkert.org# well and there is extensive error checking. LALR(1) is in progress.  The
367720Sgblack@eecs.umich.edu# rest of this file is a bit of a mess.  Please pardon the dust.
372972Sgblack@eecs.umich.edu#
382972Sgblack@eecs.umich.edu# The current implementation is only somewhat object-oriented. The
392972Sgblack@eecs.umich.edu# LR parser itself is defined in terms of an object (which allows multiple
402972Sgblack@eecs.umich.edu# parsers to co-exist).  However, most of the variables used during table
417741Sgblack@eecs.umich.edu# construction are defined in terms of global variables.  Users shouldn't
427741Sgblack@eecs.umich.edu# notice unless they are trying to define multiple parsers at the same
437720Sgblack@eecs.umich.edu# time using threads (in which case they should have their head examined).
447741Sgblack@eecs.umich.edu#-----------------------------------------------------------------------------
455251Sksewell@umich.edu
467741Sgblack@eecs.umich.edu__version__ = "1.3"
477741Sgblack@eecs.umich.edu
487741Sgblack@eecs.umich.edu#-----------------------------------------------------------------------------
497741Sgblack@eecs.umich.edu#                     === User configurable parameters ===
507741Sgblack@eecs.umich.edu#
517741Sgblack@eecs.umich.edu# Change these to modify the default behavior of yacc (if you wish)
527741Sgblack@eecs.umich.edu#-----------------------------------------------------------------------------
532972Sgblack@eecs.umich.edu
542972Sgblack@eecs.umich.eduyaccdebug   = 1                # Debugging mode.  If set, yacc generates a
552972Sgblack@eecs.umich.edu                               # a 'parser.out' file in the current directory
56
57debug_file  = 'parser.out'     # Default name of the debugging file
58tab_module  = 'parsetab'       # Default name of the table module
59default_lr  = 'SLR'            # Default LR table generation method
60
61error_count = 3                # Number of symbols that must be shifted to leave recovery mode
62
63import re, types, sys, cStringIO, md5, os.path
64
65# Exception raised for yacc-related errors
66class YaccError(Exception):   pass
67
68#-----------------------------------------------------------------------------
69#                        ===  LR Parsing Engine ===
70#
71# The following classes are used for the LR parser itself.  These are not
72# used during table construction and are independent of the actual LR
73# table generation algorithm
74#-----------------------------------------------------------------------------
75
76# This class is used to hold non-terminal grammar symbols during parsing.
77# It normally has the following attributes set:
78#        .type       = Grammar symbol type
79#        .value      = Symbol value
80#        .lineno     = Starting line number
81#        .endlineno  = Ending line number (optional, set automatically)
82
83class YaccSymbol:
84    def __str__(self):    return self.type
85    def __repr__(self):   return str(self)
86
87# This class is a wrapper around the objects actually passed to each
88# grammar rule.   Index lookup and assignment actually assign the
89# .value attribute of the underlying YaccSymbol object.
90# The lineno() method returns the line number of a given
91# item (or 0 if not defined).   The linespan() method returns
92# a tuple of (startline,endline) representing the range of lines
93# for a symbol.
94
95class YaccSlice:
96    def __init__(self,s):
97        self.slice = s
98        self.pbstack = []
99
100    def __getitem__(self,n):
101        return self.slice[n].value
102
103    def __setitem__(self,n,v):
104        self.slice[n].value = v
105
106    def __len__(self):
107        return len(self.slice)
108
109    def lineno(self,n):
110        return getattr(self.slice[n],"lineno",0)
111
112    def linespan(self,n):
113        startline = getattr(self.slice[n],"lineno",0)
114        endline = getattr(self.slice[n],"endlineno",startline)
115        return startline,endline
116
117    def pushback(self,n):
118        if n <= 0:
119            raise ValueError, "Expected a positive value"
120        if n > (len(self.slice)-1):
121            raise ValueError, "Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1)
122        for i in range(0,n):
123            self.pbstack.append(self.slice[-i-1])
124
125# The LR Parsing engine.   This is defined as a class so that multiple parsers
126# can exist in the same process.  A user never instantiates this directly.
127# Instead, the global yacc() function should be used to create a suitable Parser
128# object.
129
130class Parser:
131    def __init__(self,magic=None):
132
133        # This is a hack to keep users from trying to instantiate a Parser
134        # object directly.
135
136        if magic != "xyzzy":
137            raise YaccError, "Can't instantiate Parser. Use yacc() instead."
138
139        # Reset internal state
140        self.productions = None          # List of productions
141        self.errorfunc   = None          # Error handling function
142        self.action      = { }           # LR Action table
143        self.goto        = { }           # LR goto table
144        self.require     = { }           # Attribute require table
145        self.method      = "Unknown LR"  # Table construction method used
146
147    def errok(self):
148        self.errorcount = 0
149
150    def restart(self):
151        del self.statestack[:]
152        del self.symstack[:]
153        sym = YaccSymbol()
154        sym.type = '$'
155        self.symstack.append(sym)
156        self.statestack.append(0)
157
158    def parse(self,input=None,lexer=None,debug=0):
159        lookahead = None                 # Current lookahead symbol
160        lookaheadstack = [ ]             # Stack of lookahead symbols
161        actions = self.action            # Local reference to action table
162        goto    = self.goto              # Local reference to goto table
163        prod    = self.productions       # Local reference to production list
164        pslice  = YaccSlice(None)        # Slice object passed to grammar rules
165        pslice.parser = self             # Parser object
166        self.errorcount = 0              # Used during error recovery
167
168        # If no lexer was given, we will try to use the lex module
169        if not lexer:
170            import lex as lexer
171
172        pslice.lexer = lexer
173
174        # If input was supplied, pass to lexer
175        if input:
176            lexer.input(input)
177
178        # Tokenize function
179        get_token = lexer.token
180
181        statestack = [ ]                # Stack of parsing states
182        self.statestack = statestack
183        symstack   = [ ]                # Stack of grammar symbols
184        self.symstack = symstack
185
186        errtoken   = None               # Err token
187
188        # The start state is assumed to be (0,$)
189        statestack.append(0)
190        sym = YaccSymbol()
191        sym.type = '$'
192        symstack.append(sym)
193
194        while 1:
195            # Get the next symbol on the input.  If a lookahead symbol
196            # is already set, we just use that. Otherwise, we'll pull
197            # the next token off of the lookaheadstack or from the lexer
198            if not lookahead:
199                if not lookaheadstack:
200                    lookahead = get_token()     # Get the next token
201                else:
202                    lookahead = lookaheadstack.pop()
203                if not lookahead:
204                    lookahead = YaccSymbol()
205                    lookahead.type = '$'
206            if debug:
207                print "%-20s : %s" % (lookahead, [xx.type for xx in symstack])
208
209            # Check the action table
210            s = statestack[-1]
211            ltype = lookahead.type
212            t = actions.get((s,ltype),None)
213
214            if t is not None:
215                if t > 0:
216                    # shift a symbol on the stack
217                    if ltype == '$':
218                        # Error, end of input
219                        print "yacc: Parse error. EOF"
220                        return
221                    statestack.append(t)
222                    symstack.append(lookahead)
223                    lookahead = None
224
225                    # Decrease error count on successful shift
226                    if self.errorcount > 0:
227                        self.errorcount -= 1
228
229                    continue
230
231                if t < 0:
232                    # reduce a symbol on the stack, emit a production
233                    p = prod[-t]
234                    pname = p.name
235                    plen  = p.len
236
237                    # Get production function
238                    sym = YaccSymbol()
239                    sym.type = pname       # Production name
240                    sym.value = None
241
242                    if plen:
243                        targ = symstack[-plen-1:]
244                        targ[0] = sym
245                        try:
246                            sym.lineno = targ[1].lineno
247                            sym.endlineno = getattr(targ[-1],"endlineno",targ[-1].lineno)
248                        except AttributeError:
249                            sym.lineno = 0
250                        del symstack[-plen:]
251                        del statestack[-plen:]
252                    else:
253                        sym.lineno = 0
254                        targ = [ sym ]
255                    pslice.slice = targ
256                    pslice.pbstack = []
257                    # Call the grammar rule with our special slice object
258                    p.func(pslice)
259
260                    # Validate attributes of the resulting value attribute
261#                  if require:
262#                      try:
263#                          t0 = targ[0]
264#                          r = Requires.get(t0.type,None)
265#                          t0d = t0.__dict__
266#                          if r:
267#                              for field in r:
268#                                  tn = t0
269#                                  for fname in field:
270#                                      try:
271#                                          tf = tn.__dict__
272#                                          tn = tf.get(fname)
273#                                      except StandardError:
274#                                          tn = None
275#                                      if not tn:
276#                                          print "%s:%d: Rule %s doesn't set required attribute '%s'" % \
277#                                                (p.file,p.line,p.name,".".join(field))
278#                      except TypeError,LookupError:
279#                          print "Bad requires directive " % r
280#                          pass
281
282
283                    # If there was a pushback, put that on the stack
284                    if pslice.pbstack:
285                        lookaheadstack.append(lookahead)
286                        for _t in pslice.pbstack:
287                            lookaheadstack.append(_t)
288                        lookahead = None
289
290                    symstack.append(sym)
291                    statestack.append(goto[statestack[-1],pname])
292                    continue
293
294                if t == 0:
295                    n = symstack[-1]
296                    return getattr(n,"value",None)
297
298            if t == None:
299                # We have some kind of parsing error here.  To handle this,
300                # we are going to push the current token onto the tokenstack
301                # and replace it with an 'error' token.  If there are any synchronization
302                # rules, they may catch it.
303                #
304                # In addition to pushing the error token, we call call the user defined p_error()
305                # function if this is the first syntax error.   This function is only called
306                # if errorcount == 0.
307
308                if not self.errorcount:
309                    self.errorcount = error_count
310                    errtoken = lookahead
311                    if errtoken.type == '$':
312                        errtoken = None               # End of file!
313                    if self.errorfunc:
314                        global errok,token,restart
315                        errok = self.errok        # Set some special functions available in error recovery
316                        token = get_token
317                        restart = self.restart
318                        tok = self.errorfunc(errtoken)
319                        del errok, token, restart   # Delete special functions
320
321                        if not self.errorcount:
322                            # User must have done some kind of panic mode recovery on their own.  The returned token
323                            # is the next lookahead
324                            lookahead = tok
325                            errtoken = None
326                            continue
327                    else:
328                        if errtoken:
329                            if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
330                            else: lineno = 0
331                            if lineno:
332                                print "yacc: Syntax error at line %d, token=%s" % (lineno, errtoken.type)
333                            else:
334                                print "yacc: Syntax error, token=%s" % errtoken.type
335                        else:
336                            print "yacc: Parse error in input. EOF"
337                            return
338
339                else:
340                    self.errorcount = error_count
341
342                # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
343                # entire parse has been rolled back and we're completely hosed.   The token is
344                # discarded and we just keep going.
345
346                if len(statestack) <= 1 and lookahead.type != '$':
347                    lookahead = None
348                    errtoken = None
349                    # Nuke the pushback stack
350                    del lookaheadstack[:]
351                    continue
352
353                # case 2: the statestack has a couple of entries on it, but we're
354                # at the end of the file. nuke the top entry and generate an error token
355
356                # Start nuking entries on the stack
357                if lookahead.type == '$':
358                    # Whoa. We're really hosed here. Bail out
359                    return
360
361                if lookahead.type != 'error':
362                    sym = symstack[-1]
363                    if sym.type == 'error':
364                        # Hmmm. Error is on top of stack, we'll just nuke input
365                        # symbol and continue
366                        lookahead = None
367                        continue
368                    t = YaccSymbol()
369                    t.type = 'error'
370                    if hasattr(lookahead,"lineno"):
371                        t.lineno = lookahead.lineno
372                    t.value = lookahead
373                    lookaheadstack.append(lookahead)
374                    lookahead = t
375                else:
376                    symstack.pop()
377                    statestack.pop()
378
379                continue
380
381            # Call an error function here
382            raise RuntimeError, "yacc: internal parser error!!!\n"
383
384# -----------------------------------------------------------------------------
385#                          === Parser Construction ===
386#
387# The following functions and variables are used to implement the yacc() function
388# itself.   This is pretty hairy stuff involving lots of error checking,
389# construction of LR items, kernels, and so forth.   Although a lot of
390# this work is done using global variables, the resulting Parser object
391# is completely self contained--meaning that it is safe to repeatedly
392# call yacc() with different grammars in the same application.
393# -----------------------------------------------------------------------------
394
395# -----------------------------------------------------------------------------
396# validate_file()
397#
398# This function checks to see if there are duplicated p_rulename() functions
399# in the parser module file.  Without this function, it is really easy for
400# users to make mistakes by cutting and pasting code fragments (and it's a real
401# bugger to try and figure out why the resulting parser doesn't work).  Therefore,
402# we just do a little regular expression pattern matching of def statements
403# to try and detect duplicates.
404# -----------------------------------------------------------------------------
405
406def validate_file(filename):
407    base,ext = os.path.splitext(filename)
408    if ext != '.py': return 1          # No idea. Assume it's okay.
409
410    try:
411        f = open(filename)
412        lines = f.readlines()
413        f.close()
414    except IOError:
415        return 1                       # Oh well
416
417    # Match def p_funcname(
418    fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
419    counthash = { }
420    linen = 1
421    noerror = 1
422    for l in lines:
423        m = fre.match(l)
424        if m:
425            name = m.group(1)
426            prev = counthash.get(name)
427            if not prev:
428                counthash[name] = linen
429            else:
430                print "%s:%d: Function %s redefined. Previously defined on line %d" % (filename,linen,name,prev)
431                noerror = 0
432        linen += 1
433    return noerror
434
435# This function looks for functions that might be grammar rules, but which don't have the proper p_suffix.
436def validate_dict(d):
437    for n,v in d.items():
438        if n[0:2] == 'p_' and isinstance(v,types.FunctionType): continue
439        if n[0:2] == 't_': continue
440
441        if n[0:2] == 'p_':
442            print "yacc: Warning. '%s' not defined as a function" % n
443        if isinstance(v,types.FunctionType) and v.func_code.co_argcount == 1:
444            try:
445                doc = v.__doc__.split(" ")
446                if doc[1] == ':':
447                    print "%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix." % (v.func_code.co_filename, v.func_code.co_firstlineno,n)
448            except StandardError:
449                pass
450
451# -----------------------------------------------------------------------------
452#                           === GRAMMAR FUNCTIONS ===
453#
454# The following global variables and functions are used to store, manipulate,
455# and verify the grammar rules specified by the user.
456# -----------------------------------------------------------------------------
457
458# Initialize all of the global variables used during grammar construction
459def initialize_vars():
460    global Productions, Prodnames, Prodmap, Terminals
461    global Nonterminals, First, Follow, Precedence, LRitems
462    global Errorfunc, Signature, Requires
463
464    Productions  = [None]  # A list of all of the productions.  The first
465                           # entry is always reserved for the purpose of
466                           # building an augmented grammar
467
468    Prodnames    = { }     # A dictionary mapping the names of nonterminals to a list of all
469                           # productions of that nonterminal.
470
471    Prodmap      = { }     # A dictionary that is only used to detect duplicate
472                           # productions.
473
474    Terminals    = { }     # A dictionary mapping the names of terminal symbols to a
475                           # list of the rules where they are used.
476
477    Nonterminals = { }     # A dictionary mapping names of nonterminals to a list
478                           # of rule numbers where they are used.
479
480    First        = { }     # A dictionary of precomputed FIRST(x) symbols
481
482    Follow       = { }     # A dictionary of precomputed FOLLOW(x) symbols
483
484    Precedence   = { }     # Precedence rules for each terminal. Contains tuples of the
485                           # form ('right',level) or ('nonassoc', level) or ('left',level)
486
487    LRitems      = [ ]     # A list of all LR items for the grammar.  These are the
488                           # productions with the "dot" like E -> E . PLUS E
489
490    Errorfunc    = None    # User defined error handler
491
492    Signature    = md5.new()   # Digital signature of the grammar rules, precedence
493                               # and other information.  Used to determined when a
494                               # parsing table needs to be regenerated.
495
496    Requires     = { }     # Requires list
497
498    # File objects used when creating the parser.out debugging file
499    global _vf, _vfc
500    _vf           = cStringIO.StringIO()
501    _vfc          = cStringIO.StringIO()
502
503# -----------------------------------------------------------------------------
504# class Production:
505#
506# This class stores the raw information about a single production or grammar rule.
507# It has a few required attributes:
508#
509#       name     - Name of the production (nonterminal)
510#       prod     - A list of symbols making up its production
511#       number   - Production number.
512#
513# In addition, a few additional attributes are used to help with debugging or
514# optimization of table generation.
515#
516#       file     - File where production action is defined.
517#       lineno   - Line number where action is defined
518#       func     - Action function
519#       prec     - Precedence level
520#       lr_next  - Next LR item. Example, if we are ' E -> E . PLUS E'
521#                  then lr_next refers to 'E -> E PLUS . E'
522#       lr_index - LR item index (location of the ".") in the prod list.
523#       len      - Length of the production (number of symbols on right hand side)
524# -----------------------------------------------------------------------------
525
526class Production:
527    def __init__(self,**kw):
528        for k,v in kw.items():
529            setattr(self,k,v)
530        self.lr_index = -1
531        self.lr0_added = 0    # Flag indicating whether or not added to LR0 closure
532        self.usyms = [ ]
533
534    def __str__(self):
535        if self.prod:
536            s = "%s -> %s" % (self.name," ".join(self.prod))
537        else:
538            s = "%s -> <empty>" % self.name
539        return s
540
541    def __repr__(self):
542        return str(self)
543
544    # Compute lr_items from the production
545    def lr_item(self,n):
546        if n > len(self.prod): return None
547        p = Production()
548        p.name = self.name
549        p.prod = list(self.prod)
550        p.number = self.number
551        p.lr_index = n
552        p.prod.insert(n,".")
553        p.prod = tuple(p.prod)
554        p.len = len(p.prod)
555        p.usyms = self.usyms
556
557        # Precompute list of productions immediately following
558        try:
559            p.lrafter = Prodnames[p.prod[n+1]]
560        except (IndexError,KeyError),e:
561            p.lrafter = []
562        try:
563            p.lrbefore = p.prod[n-1]
564        except IndexError:
565            p.lrbefore = None
566
567        return p
568
569class MiniProduction:
570    pass
571
572# Utility function
573def is_identifier(s):
574    for c in s:
575        if not (c.isalnum() or c == '_'): return 0
576    return 1
577
578# -----------------------------------------------------------------------------
579# add_production()
580#
581# Given an action function, this function assembles a production rule.
582# The production rule is assumed to be found in the function's docstring.
583# This rule has the general syntax:
584#
585#              name1 ::= production1
586#                     |  production2
587#                     |  production3
588#                    ...
589#                     |  productionn
590#              name2 ::= production1
591#                     |  production2
592#                    ...
593# -----------------------------------------------------------------------------
594
595def add_production(f,file,line,prodname,syms):
596
597    if Terminals.has_key(prodname):
598        print "%s:%d: Illegal rule name '%s'. Already defined as a token." % (file,line,prodname)
599        return -1
600    if prodname == 'error':
601        print "%s:%d: Illegal rule name '%s'. error is a reserved word." % (file,line,prodname)
602        return -1
603
604    if not is_identifier(prodname):
605        print "%s:%d: Illegal rule name '%s'" % (file,line,prodname)
606        return -1
607
608    for s in syms:
609        if not is_identifier(s) and s != '%prec':
610            print "%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)
611            return -1
612
613    # See if the rule is already in the rulemap
614    map = "%s -> %s" % (prodname,syms)
615    if Prodmap.has_key(map):
616        m = Prodmap[map]
617        print "%s:%d: Duplicate rule %s." % (file,line, m)
618        print "%s:%d: Previous definition at %s:%d" % (file,line, m.file, m.line)
619        return -1
620
621    p = Production()
622    p.name = prodname
623    p.prod = syms
624    p.file = file
625    p.line = line
626    p.func = f
627    p.number = len(Productions)
628
629
630    Productions.append(p)
631    Prodmap[map] = p
632    if not Nonterminals.has_key(prodname):
633        Nonterminals[prodname] = [ ]
634
635    # Add all terminals to Terminals
636    i = 0
637    while i < len(p.prod):
638        t = p.prod[i]
639        if t == '%prec':
640            try:
641                precname = p.prod[i+1]
642            except IndexError:
643                print "%s:%d: Syntax error. Nothing follows %%prec." % (p.file,p.line)
644                return -1
645
646            prec = Precedence.get(precname,None)
647            if not prec:
648                print "%s:%d: Nothing known about the precedence of '%s'" % (p.file,p.line,precname)
649                return -1
650            else:
651                p.prec = prec
652            del p.prod[i]
653            del p.prod[i]
654            continue
655
656        if Terminals.has_key(t):
657            Terminals[t].append(p.number)
658            # Is a terminal.  We'll assign a precedence to p based on this
659            if not hasattr(p,"prec"):
660                p.prec = Precedence.get(t,('right',0))
661        else:
662            if not Nonterminals.has_key(t):
663                Nonterminals[t] = [ ]
664            Nonterminals[t].append(p.number)
665        i += 1
666
667    if not hasattr(p,"prec"):
668        p.prec = ('right',0)
669
670    # Set final length of productions
671    p.len  = len(p.prod)
672    p.prod = tuple(p.prod)
673
674    # Calculate unique syms in the production
675    p.usyms = [ ]
676    for s in p.prod:
677        if s not in p.usyms:
678            p.usyms.append(s)
679
680    # Add to the global productions list
681    try:
682        Prodnames[p.name].append(p)
683    except KeyError:
684        Prodnames[p.name] = [ p ]
685    return 0
686
687# Given a raw rule function, this function rips out its doc string
688# and adds rules to the grammar
689
690def add_function(f):
691    line = f.func_code.co_firstlineno
692    file = f.func_code.co_filename
693    error = 0
694
695    if f.func_code.co_argcount > 1:
696        print "%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__)
697        return -1
698
699    if f.func_code.co_argcount < 1:
700        print "%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__)
701        return -1
702
703    if f.__doc__:
704        # Split the doc string into lines
705        pstrings = f.__doc__.splitlines()
706        lastp = None
707        dline = line
708        for ps in pstrings:
709            dline += 1
710            p = ps.split()
711            if not p: continue
712            try:
713                if p[0] == '|':
714                    # This is a continuation of a previous rule
715                    if not lastp:
716                        print "%s:%d: Misplaced '|'." % (file,dline)
717                        return -1
718                    prodname = lastp
719                    if len(p) > 1:
720                        syms = p[1:]
721                    else:
722                        syms = [ ]
723                else:
724                    prodname = p[0]
725                    lastp = prodname
726                    assign = p[1]
727                    if len(p) > 2:
728                        syms = p[2:]
729                    else:
730                        syms = [ ]
731                    if assign != ':' and assign != '::=':
732                        print "%s:%d: Syntax error. Expected ':'" % (file,dline)
733                        return -1
734                e = add_production(f,file,dline,prodname,syms)
735                error += e
736            except StandardError:
737                print "%s:%d: Syntax error in rule '%s'" % (file,dline,ps)
738                error -= 1
739    else:
740        print "%s:%d: No documentation string specified in function '%s'" % (file,line,f.__name__)
741    return error
742
743
744# Cycle checking code (Michael Dyck)
745
746def compute_reachable():
747    '''
748    Find each symbol that can be reached from the start symbol.
749    Print a warning for any nonterminals that can't be reached.
750    (Unused terminals have already had their warning.)
751    '''
752    Reachable = { }
753    for s in Terminals.keys() + Nonterminals.keys():
754        Reachable[s] = 0
755
756    mark_reachable_from( Productions[0].prod[0], Reachable )
757
758    for s in Nonterminals.keys():
759        if not Reachable[s]:
760            print "yacc: Symbol '%s' is unreachable." % s
761
762def mark_reachable_from(s, Reachable):
763    '''
764    Mark all symbols that are reachable from symbol s.
765    '''
766    if Reachable[s]:
767        # We've already reached symbol s.
768        return
769    Reachable[s] = 1
770    for p in Prodnames.get(s,[]):
771        for r in p.prod:
772            mark_reachable_from(r, Reachable)
773
774# -----------------------------------------------------------------------------
775# compute_terminates()
776#
777# This function looks at the various parsing rules and tries to detect
778# infinite recursion cycles (grammar rules where there is no possible way
779# to derive a string of only terminals).
780# -----------------------------------------------------------------------------
781def compute_terminates():
782    '''
783    Raise an error for any symbols that don't terminate.
784    '''
785    Terminates = {}
786
787    # Terminals:
788    for t in Terminals.keys():
789        Terminates[t] = 1
790
791    Terminates['$'] = 1
792
793    # Nonterminals:
794
795    # Initialize to false:
796    for n in Nonterminals.keys():
797        Terminates[n] = 0
798
799    # Then propagate termination until no change:
800    while 1:
801        some_change = 0
802        for (n,pl) in Prodnames.items():
803            # Nonterminal n terminates iff any of its productions terminates.
804            for p in pl:
805                # Production p terminates iff all of its rhs symbols terminate.
806                for s in p.prod:
807                    if not Terminates[s]:
808                        # The symbol s does not terminate,
809                        # so production p does not terminate.
810                        p_terminates = 0
811                        break
812                else:
813                    # didn't break from the loop,
814                    # so every symbol s terminates
815                    # so production p terminates.
816                    p_terminates = 1
817
818                if p_terminates:
819                    # symbol n terminates!
820                    if not Terminates[n]:
821                        Terminates[n] = 1
822                        some_change = 1
823                    # Don't need to consider any more productions for this n.
824                    break
825
826        if not some_change:
827            break
828
829    some_error = 0
830    for (s,terminates) in Terminates.items():
831        if not terminates:
832            if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error':
833                # s is used-but-not-defined, and we've already warned of that,
834                # so it would be overkill to say that it's also non-terminating.
835                pass
836            else:
837                print "yacc: Infinite recursion detected for symbol '%s'." % s
838                some_error = 1
839
840    return some_error
841
842# -----------------------------------------------------------------------------
843# verify_productions()
844#
845# This function examines all of the supplied rules to see if they seem valid.
846# -----------------------------------------------------------------------------
847def verify_productions(cycle_check=1):
848    error = 0
849    for p in Productions:
850        if not p: continue
851
852        for s in p.prod:
853            if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error':
854                print "%s:%d: Symbol '%s' used, but not defined as a token or a rule." % (p.file,p.line,s)
855                error = 1
856                continue
857
858    unused_tok = 0
859    # Now verify all of the tokens
860    if yaccdebug:
861        _vf.write("Unused terminals:\n\n")
862    for s,v in Terminals.items():
863        if s != 'error' and not v:
864            print "yacc: Warning. Token '%s' defined, but not used." % s
865            if yaccdebug: _vf.write("   %s\n"% s)
866            unused_tok += 1
867
868    # Print out all of the productions
869    if yaccdebug:
870        _vf.write("\nGrammar\n\n")
871        for i in range(1,len(Productions)):
872            _vf.write("Rule %-5d %s\n" % (i, Productions[i]))
873
874    unused_prod = 0
875    # Verify the use of all productions
876    for s,v in Nonterminals.items():
877        if not v:
878            p = Prodnames[s][0]
879            print "%s:%d: Warning. Rule '%s' defined, but not used." % (p.file,p.line, s)
880            unused_prod += 1
881
882
883    if unused_tok == 1:
884        print "yacc: Warning. There is 1 unused token."
885    if unused_tok > 1:
886        print "yacc: Warning. There are %d unused tokens." % unused_tok
887
888    if unused_prod == 1:
889        print "yacc: Warning. There is 1 unused rule."
890    if unused_prod > 1:
891        print "yacc: Warning. There are %d unused rules." % unused_prod
892
893    if yaccdebug:
894        _vf.write("\nTerminals, with rules where they appear\n\n")
895        ks = Terminals.keys()
896        ks.sort()
897        for k in ks:
898            _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Terminals[k]])))
899        _vf.write("\nNonterminals, with rules where they appear\n\n")
900        ks = Nonterminals.keys()
901        ks.sort()
902        for k in ks:
903            _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Nonterminals[k]])))
904
905    if (cycle_check):
906        compute_reachable()
907        error += compute_terminates()
908#        error += check_cycles()
909    return error
910
911# -----------------------------------------------------------------------------
912# build_lritems()
913#
914# This function walks the list of productions and builds a complete set of the
915# LR items.  The LR items are stored in two ways:  First, they are uniquely
916# numbered and placed in the list _lritems.  Second, a linked list of LR items
917# is built for each production.  For example:
918#
919#   E -> E PLUS E
920#
921# Creates the list
922#
923#  [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
924# -----------------------------------------------------------------------------
925
926def build_lritems():
927    for p in Productions:
928        lastlri = p
929        lri = p.lr_item(0)
930        i = 0
931        while 1:
932            lri = p.lr_item(i)
933            lastlri.lr_next = lri
934            if not lri: break
935            lri.lr_num = len(LRitems)
936            LRitems.append(lri)
937            lastlri = lri
938            i += 1
939
940    # In order for the rest of the parser generator to work, we need to
941    # guarantee that no more lritems are generated.  Therefore, we nuke
942    # the p.lr_item method.  (Only used in debugging)
943    # Production.lr_item = None
944
945# -----------------------------------------------------------------------------
946# add_precedence()
947#
948# Given a list of precedence rules, add to the precedence table.
949# -----------------------------------------------------------------------------
950
951def add_precedence(plist):
952    plevel = 0
953    error = 0
954    for p in plist:
955        plevel += 1
956        try:
957            prec = p[0]
958            terms = p[1:]
959            if prec != 'left' and prec != 'right' and prec != 'nonassoc':
960                print "yacc: Invalid precedence '%s'" % prec
961                return -1
962            for t in terms:
963                if Precedence.has_key(t):
964                    print "yacc: Precedence already specified for terminal '%s'" % t
965                    error += 1
966                    continue
967                Precedence[t] = (prec,plevel)
968        except:
969            print "yacc: Invalid precedence table."
970            error += 1
971
972    return error
973
974# -----------------------------------------------------------------------------
975# augment_grammar()
976#
977# Compute the augmented grammar.  This is just a rule S' -> start where start
978# is the starting symbol.
979# -----------------------------------------------------------------------------
980
981def augment_grammar(start=None):
982    if not start:
983        start = Productions[1].name
984    Productions[0] = Production(name="S'",prod=[start],number=0,len=1,prec=('right',0),func=None)
985    Productions[0].usyms = [ start ]
986    Nonterminals[start].append(0)
987
988
989# -------------------------------------------------------------------------
990# first()
991#
992# Compute the value of FIRST1(beta) where beta is a tuple of symbols.
993#
994# During execution of compute_first1, the result may be incomplete.
995# Afterward (e.g., when called from compute_follow()), it will be complete.
996# -------------------------------------------------------------------------
997def first(beta):
998
999    # We are computing First(x1,x2,x3,...,xn)
1000    result = [ ]
1001    for x in beta:
1002        x_produces_empty = 0
1003
1004        # Add all the non-<empty> symbols of First[x] to the result.
1005        for f in First[x]:
1006            if f == '<empty>':
1007                x_produces_empty = 1
1008            else:
1009                if f not in result: result.append(f)
1010
1011        if x_produces_empty:
1012            # We have to consider the next x in beta,
1013            # i.e. stay in the loop.
1014            pass
1015        else:
1016            # We don't have to consider any further symbols in beta.
1017            break
1018    else:
1019        # There was no 'break' from the loop,
1020        # so x_produces_empty was true for all x in beta,
1021        # so beta produces empty as well.
1022        result.append('<empty>')
1023
1024    return result
1025
1026
1027# FOLLOW(x)
1028# Given a non-terminal.  This function computes the set of all symbols
1029# that might follow it.  Dragon book, p. 189.
1030
1031def compute_follow(start=None):
1032    # Add '$' to the follow list of the start symbol
1033    for k in Nonterminals.keys():
1034        Follow[k] = [ ]
1035
1036    if not start:
1037        start = Productions[1].name
1038
1039    Follow[start] = [ '$' ]
1040
1041    while 1:
1042        didadd = 0
1043        for p in Productions[1:]:
1044            # Here is the production set
1045            for i in range(len(p.prod)):
1046                B = p.prod[i]
1047                if Nonterminals.has_key(B):
1048                    # Okay. We got a non-terminal in a production
1049                    fst = first(p.prod[i+1:])
1050                    hasempty = 0
1051                    for f in fst:
1052                        if f != '<empty>' and f not in Follow[B]:
1053                            Follow[B].append(f)
1054                            didadd = 1
1055                        if f == '<empty>':
1056                            hasempty = 1
1057                    if hasempty or i == (len(p.prod)-1):
1058                        # Add elements of follow(a) to follow(b)
1059                        for f in Follow[p.name]:
1060                            if f not in Follow[B]:
1061                                Follow[B].append(f)
1062                                didadd = 1
1063        if not didadd: break
1064
1065    if 0 and yaccdebug:
1066        _vf.write('\nFollow:\n')
1067        for k in Nonterminals.keys():
1068            _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Follow[k]])))
1069
1070# -------------------------------------------------------------------------
1071# compute_first1()
1072#
1073# Compute the value of FIRST1(X) for all symbols
1074# -------------------------------------------------------------------------
1075def compute_first1():
1076
1077    # Terminals:
1078    for t in Terminals.keys():
1079        First[t] = [t]
1080
1081    First['$'] = ['$']
1082    First['#'] = ['#'] # what's this for?
1083
1084    # Nonterminals:
1085
1086    # Initialize to the empty set:
1087    for n in Nonterminals.keys():
1088        First[n] = []
1089
1090    # Then propagate symbols until no change:
1091    while 1:
1092        some_change = 0
1093        for n in Nonterminals.keys():
1094            for p in Prodnames[n]:
1095                for f in first(p.prod):
1096                    if f not in First[n]:
1097                        First[n].append( f )
1098                        some_change = 1
1099        if not some_change:
1100            break
1101
1102    if 0 and yaccdebug:
1103        _vf.write('\nFirst:\n')
1104        for k in Nonterminals.keys():
1105            _vf.write("%-20s : %s\n" %
1106                (k, " ".join([str(s) for s in First[k]])))
1107
1108# -----------------------------------------------------------------------------
1109#                           === SLR Generation ===
1110#
1111# The following functions are used to construct SLR (Simple LR) parsing tables
1112# as described on p.221-229 of the dragon book.
1113# -----------------------------------------------------------------------------
1114
1115# Global variables for the LR parsing engine
1116def lr_init_vars():
1117    global _lr_action, _lr_goto, _lr_method
1118    global _lr_goto_cache
1119
1120    _lr_action       = { }        # Action table
1121    _lr_goto         = { }        # Goto table
1122    _lr_method       = "Unknown"  # LR method used
1123    _lr_goto_cache   = { }
1124
1125# Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
1126# prodlist is a list of productions.
1127
1128_add_count = 0       # Counter used to detect cycles
1129
1130def lr0_closure(I):
1131    global _add_count
1132
1133    _add_count += 1
1134    prodlist = Productions
1135
1136    # Add everything in I to J
1137    J = I[:]
1138    didadd = 1
1139    while didadd:
1140        didadd = 0
1141        for j in J:
1142            for x in j.lrafter:
1143                if x.lr0_added == _add_count: continue
1144                # Add B --> .G to J
1145                J.append(x.lr_next)
1146                x.lr0_added = _add_count
1147                didadd = 1
1148
1149    return J
1150
1151# Compute the LR(0) goto function goto(I,X) where I is a set
1152# of LR(0) items and X is a grammar symbol.   This function is written
1153# in a way that guarantees uniqueness of the generated goto sets
1154# (i.e. the same goto set will never be returned as two different Python
1155# objects).  With uniqueness, we can later do fast set comparisons using
1156# id(obj) instead of element-wise comparison.
1157
1158def lr0_goto(I,x):
1159    # First we look for a previously cached entry
1160    g = _lr_goto_cache.get((id(I),x),None)
1161    if g: return g
1162
1163    # Now we generate the goto set in a way that guarantees uniqueness
1164    # of the result
1165
1166    s = _lr_goto_cache.get(x,None)
1167    if not s:
1168        s = { }
1169        _lr_goto_cache[x] = s
1170
1171    gs = [ ]
1172    for p in I:
1173        n = p.lr_next
1174        if n and n.lrbefore == x:
1175            s1 = s.get(id(n),None)
1176            if not s1:
1177                s1 = { }
1178                s[id(n)] = s1
1179            gs.append(n)
1180            s = s1
1181    g = s.get('$',None)
1182    if not g:
1183        if gs:
1184            g = lr0_closure(gs)
1185            s['$'] = g
1186        else:
1187            s['$'] = gs
1188    _lr_goto_cache[(id(I),x)] = g
1189    return g
1190
1191# Compute the kernel of a set of LR(0) items
1192def lr0_kernel(I):
1193    KI = [ ]
1194    for p in I:
1195        if p.name == "S'" or p.lr_index > 0 or p.len == 0:
1196            KI.append(p)
1197
1198    return KI
1199
1200_lr0_cidhash = { }
1201
1202# Compute the LR(0) sets of item function
1203def lr0_items():
1204
1205    C = [ lr0_closure([Productions[0].lr_next]) ]
1206    i = 0
1207    for I in C:
1208        _lr0_cidhash[id(I)] = i
1209        i += 1
1210
1211    # Loop over the items in C and each grammar symbols
1212    i = 0
1213    while i < len(C):
1214        I = C[i]
1215        i += 1
1216
1217        # Collect all of the symbols that could possibly be in the goto(I,X) sets
1218        asyms = { }
1219        for ii in I:
1220            for s in ii.usyms:
1221                asyms[s] = None
1222
1223        for x in asyms.keys():
1224            g = lr0_goto(I,x)
1225            if not g:  continue
1226            if _lr0_cidhash.has_key(id(g)): continue
1227            _lr0_cidhash[id(g)] = len(C)
1228            C.append(g)
1229
1230    return C
1231
1232# -----------------------------------------------------------------------------
1233# slr_parse_table()
1234#
1235# This function constructs an SLR table.
1236# -----------------------------------------------------------------------------
1237def slr_parse_table():
1238    global _lr_method
1239    goto = _lr_goto           # Goto array
1240    action = _lr_action       # Action array
1241    actionp = { }             # Action production array (temporary)
1242
1243    _lr_method = "SLR"
1244
1245    n_srconflict = 0
1246    n_rrconflict = 0
1247
1248    if yaccdebug:
1249        _vf.write("\n\nParsing method: SLR\n\n")
1250
1251    # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
1252    # This determines the number of states
1253
1254    C = lr0_items()
1255
1256    # Build the parser table, state by state
1257    st = 0
1258    for I in C:
1259        # Loop over each production in I
1260        actlist = [ ]              # List of actions
1261
1262        if yaccdebug:
1263            _vf.write("\nstate %d\n\n" % st)
1264            for p in I:
1265                _vf.write("    (%d) %s\n" % (p.number, str(p)))
1266            _vf.write("\n")
1267
1268        for p in I:
1269            try:
1270                if p.prod[-1] == ".":
1271                    if p.name == "S'":
1272                        # Start symbol. Accept!
1273                        action[st,"$"] = 0
1274                        actionp[st,"$"] = p
1275                    else:
1276                        # We are at the end of a production.  Reduce!
1277                        for a in Follow[p.name]:
1278                            actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
1279                            r = action.get((st,a),None)
1280                            if r is not None:
1281                                # Whoa. Have a shift/reduce or reduce/reduce conflict
1282                                if r > 0:
1283                                    # Need to decide on shift or reduce here
1284                                    # By default we favor shifting. Need to add
1285                                    # some precedence rules here.
1286                                    sprec,slevel = Productions[actionp[st,a].number].prec
1287                                    rprec,rlevel = Precedence.get(a,('right',0))
1288                                    if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
1289                                        # We really need to reduce here.
1290                                        action[st,a] = -p.number
1291                                        actionp[st,a] = p
1292                                        if not slevel and not rlevel:
1293                                            _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
1294                                            _vf.write("  ! shift/reduce conflict for %s resolved as reduce.\n" % a)
1295                                            n_srconflict += 1
1296                                    elif (slevel == rlevel) and (rprec == 'nonassoc'):
1297                                        action[st,a] = None
1298                                    else:
1299                                        # Hmmm. Guess we'll keep the shift
1300                                        if not slevel and not rlevel:
1301                                            _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
1302                                            _vf.write("  ! shift/reduce conflict for %s resolved as shift.\n" % a)
1303                                            n_srconflict +=1
1304                                elif r < 0:
1305                                    # Reduce/reduce conflict.   In this case, we favor the rule
1306                                    # that was defined first in the grammar file
1307                                    oldp = Productions[-r]
1308                                    pp = Productions[p.number]
1309                                    if oldp.line > pp.line:
1310                                        action[st,a] = -p.number
1311                                        actionp[st,a] = p
1312                                    # print "Reduce/reduce conflict in state %d" % st
1313                                    n_rrconflict += 1
1314                                    _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a]))
1315                                    _vf.write("  ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a]))
1316                                else:
1317                                    print "Unknown conflict in state %d" % st
1318                            else:
1319                                action[st,a] = -p.number
1320                                actionp[st,a] = p
1321                else:
1322                    i = p.lr_index
1323                    a = p.prod[i+1]       # Get symbol right after the "."
1324                    if Terminals.has_key(a):
1325                        g = lr0_goto(I,a)
1326                        j = _lr0_cidhash.get(id(g),-1)
1327                        if j >= 0:
1328                            # We are in a shift state
1329                            actlist.append((a,p,"shift and go to state %d" % j))
1330                            r = action.get((st,a),None)
1331                            if r is not None:
1332                                # Whoa have a shift/reduce or shift/shift conflict
1333                                if r > 0:
1334                                    if r != j:
1335                                        print "Shift/shift conflict in state %d" % st
1336                                elif r < 0:
1337                                    # Do a precedence check.
1338                                    #   -  if precedence of reduce rule is higher, we reduce.
1339                                    #   -  if precedence of reduce is same and left assoc, we reduce.
1340                                    #   -  otherwise we shift
1341                                    rprec,rlevel = Productions[actionp[st,a].number].prec
1342                                    sprec,slevel = Precedence.get(a,('right',0))
1343                                    if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')):
1344                                        # We decide to shift here... highest precedence to shift
1345                                        action[st,a] = j
1346                                        actionp[st,a] = p
1347                                        if not slevel and not rlevel:
1348                                            n_srconflict += 1
1349                                            _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
1350                                            _vf.write("  ! shift/reduce conflict for %s resolved as shift.\n" % a)
1351                                    elif (slevel == rlevel) and (rprec == 'nonassoc'):
1352                                        action[st,a] = None
1353                                    else:
1354                                        # Hmmm. Guess we'll keep the reduce
1355                                        if not slevel and not rlevel:
1356                                            n_srconflict +=1
1357                                            _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
1358                                            _vf.write("  ! shift/reduce conflict for %s resolved as reduce.\n" % a)
1359
1360                                else:
1361                                    print "Unknown conflict in state %d" % st
1362                            else:
1363                                action[st,a] = j
1364                                actionp[st,a] = p
1365
1366            except StandardError,e:
1367                raise YaccError, "Hosed in slr_parse_table", e
1368
1369        # Print the actions associated with each terminal
1370        if yaccdebug:
1371          for a,p,m in actlist:
1372            if action.has_key((st,a)):
1373                if p is actionp[st,a]:
1374                    _vf.write("    %-15s %s\n" % (a,m))
1375          _vf.write("\n")
1376          for a,p,m in actlist:
1377            if action.has_key((st,a)):
1378                if p is not actionp[st,a]:
1379                    _vf.write("  ! %-15s [ %s ]\n" % (a,m))
1380
1381        # Construct the goto table for this state
1382        if yaccdebug:
1383            _vf.write("\n")
1384        nkeys = { }
1385        for ii in I:
1386            for s in ii.usyms:
1387                if Nonterminals.has_key(s):
1388                    nkeys[s] = None
1389        for n in nkeys.keys():
1390            g = lr0_goto(I,n)
1391            j = _lr0_cidhash.get(id(g),-1)
1392            if j >= 0:
1393                goto[st,n] = j
1394                if yaccdebug:
1395                    _vf.write("    %-15s shift and go to state %d\n" % (n,j))
1396
1397        st += 1
1398
1399    if n_srconflict == 1:
1400        print "yacc: %d shift/reduce conflict" % n_srconflict
1401    if n_srconflict > 1:
1402        print "yacc: %d shift/reduce conflicts" % n_srconflict
1403    if n_rrconflict == 1:
1404        print "yacc: %d reduce/reduce conflict" % n_rrconflict
1405    if n_rrconflict > 1:
1406        print "yacc: %d reduce/reduce conflicts" % n_rrconflict
1407
1408
1409# -----------------------------------------------------------------------------
1410#                       ==== LALR(1) Parsing ====
1411# **** UNFINISHED!  6/16/01
1412# -----------------------------------------------------------------------------
1413
1414
1415# Compute the lr1_closure of a set I.  I is a list of tuples (p,a) where
1416# p is a LR0 item and a is a terminal
1417
1418_lr1_add_count = 0
1419
1420def lr1_closure(I):
1421    global _lr1_add_count
1422
1423    _lr1_add_count += 1
1424
1425    J = I[:]
1426
1427    # Loop over items (p,a) in I.
1428    ji = 0
1429    while ji < len(J):
1430        p,a = J[ji]
1431        #  p = [ A -> alpha . B beta]
1432
1433        #  For each production B -> gamma
1434        for B in p.lr1_after:
1435            f = tuple(p.lr1_beta + (a,))
1436
1437            # For each terminal b in first(Beta a)
1438            for b in first(f):
1439                # Check if (B -> . gamma, b) is in J
1440                # Only way this can happen is if the add count mismatches
1441                pn = B.lr_next
1442                if pn.lr_added.get(b,0) == _lr1_add_count: continue
1443                pn.lr_added[b] = _lr1_add_count
1444                J.append((pn,b))
1445        ji += 1
1446
1447    return J
1448
1449def lalr_parse_table():
1450
1451    # Compute some lr1 information about all of the productions
1452    for p in LRitems:
1453        try:
1454            after = p.prod[p.lr_index + 1]
1455            p.lr1_after = Prodnames[after]
1456            p.lr1_beta = p.prod[p.lr_index + 2:]
1457        except LookupError:
1458            p.lr1_after = [ ]
1459            p.lr1_beta = [ ]
1460        p.lr_added = { }
1461
1462    # Compute the LR(0) items
1463    C = lr0_items()
1464    CK = []
1465    for I in C:
1466        CK.append(lr0_kernel(I))
1467
1468    print CK
1469
1470# -----------------------------------------------------------------------------
1471#                          ==== LR Utility functions ====
1472# -----------------------------------------------------------------------------
1473
1474# -----------------------------------------------------------------------------
1475# _lr_write_tables()
1476#
1477# This function writes the LR parsing tables to a file
1478# -----------------------------------------------------------------------------
1479
1480def lr_write_tables(modulename=tab_module):
1481    filename = modulename + ".py"
1482    try:
1483        f = open(filename,"w")
1484
1485        f.write("""
1486# %s
1487# This file is automatically generated. Do not edit.
1488
1489_lr_method = %s
1490
1491_lr_signature = %s
1492""" % (filename, repr(_lr_method), repr(Signature.digest())))
1493
1494        # Change smaller to 0 to go back to original tables
1495        smaller = 1
1496
1497        # Factor out names to try and make smaller
1498        if smaller:
1499            items = { }
1500
1501            for k,v in _lr_action.items():
1502                i = items.get(k[1])
1503                if not i:
1504                    i = ([],[])
1505                    items[k[1]] = i
1506                i[0].append(k[0])
1507                i[1].append(v)
1508
1509            f.write("\n_lr_action_items = {")
1510            for k,v in items.items():
1511                f.write("%r:([" % k)
1512                for i in v[0]:
1513                    f.write("%r," % i)
1514                f.write("],[")
1515                for i in v[1]:
1516                    f.write("%r," % i)
1517
1518                f.write("]),")
1519            f.write("}\n")
1520
1521            f.write("""
1522_lr_action = { }
1523for _k, _v in _lr_action_items.items():
1524   for _x,_y in zip(_v[0],_v[1]):
1525       _lr_action[(_x,_k)] = _y
1526del _lr_action_items
1527""")
1528
1529        else:
1530            f.write("\n_lr_action = { ");
1531            for k,v in _lr_action.items():
1532                f.write("(%r,%r):%r," % (k[0],k[1],v))
1533            f.write("}\n");
1534
1535        if smaller:
1536            # Factor out names to try and make smaller
1537            items = { }
1538
1539            for k,v in _lr_goto.items():
1540                i = items.get(k[1])
1541                if not i:
1542                    i = ([],[])
1543                    items[k[1]] = i
1544                i[0].append(k[0])
1545                i[1].append(v)
1546
1547            f.write("\n_lr_goto_items = {")
1548            for k,v in items.items():
1549                f.write("%r:([" % k)
1550                for i in v[0]:
1551                    f.write("%r," % i)
1552                f.write("],[")
1553                for i in v[1]:
1554                    f.write("%r," % i)
1555
1556                f.write("]),")
1557            f.write("}\n")
1558
1559            f.write("""
1560_lr_goto = { }
1561for _k, _v in _lr_goto_items.items():
1562   for _x,_y in zip(_v[0],_v[1]):
1563       _lr_goto[(_x,_k)] = _y
1564del _lr_goto_items
1565""")
1566        else:
1567            f.write("\n_lr_goto = { ");
1568            for k,v in _lr_goto.items():
1569                f.write("(%r,%r):%r," % (k[0],k[1],v))
1570            f.write("}\n");
1571
1572        # Write production table
1573        f.write("_lr_productions = [\n")
1574        for p in Productions:
1575            if p:
1576                if (p.func):
1577                    f.write("  (%r,%d,%r,%r,%d),\n" % (p.name, p.len, p.func.__name__,p.file,p.line))
1578                else:
1579                    f.write("  (%r,%d,None,None,None),\n" % (p.name, p.len))
1580            else:
1581                f.write("  None,\n")
1582        f.write("]\n")
1583        f.close()
1584
1585    except IOError,e:
1586        print "Unable to create '%s'" % filename
1587        print e
1588        return
1589
1590def lr_read_tables(module=tab_module,optimize=0):
1591    global _lr_action, _lr_goto, _lr_productions, _lr_method
1592    try:
1593        exec "import %s as parsetab" % module
1594
1595        if (optimize) or (Signature.digest() == parsetab._lr_signature):
1596            _lr_action = parsetab._lr_action
1597            _lr_goto   = parsetab._lr_goto
1598            _lr_productions = parsetab._lr_productions
1599            _lr_method = parsetab._lr_method
1600            return 1
1601        else:
1602            return 0
1603
1604    except (ImportError,AttributeError):
1605        return 0
1606
1607# -----------------------------------------------------------------------------
1608# yacc(module)
1609#
1610# Build the parser module
1611# -----------------------------------------------------------------------------
1612
1613def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, start=None, check_recursion=1, optimize=0):
1614    global yaccdebug
1615    yaccdebug = debug
1616
1617    initialize_vars()
1618    files = { }
1619    error = 0
1620
1621    # Add starting symbol to signature
1622    if start:
1623        Signature.update(start)
1624
1625    # Try to figure out what module we are working with
1626    if module:
1627        # User supplied a module object.
1628        if not isinstance(module, types.ModuleType):
1629            raise ValueError,"Expected a module"
1630
1631        ldict = module.__dict__
1632
1633    else:
1634        # No module given.  We might be able to get information from the caller.
1635        # Throw an exception and unwind the traceback to get the globals
1636
1637        try:
1638            raise RuntimeError
1639        except RuntimeError:
1640            e,b,t = sys.exc_info()
1641            f = t.tb_frame
1642            f = f.f_back           # Walk out to our calling function
1643            ldict = f.f_globals    # Grab its globals dictionary
1644
1645    # If running in optimized mode.  We're going to
1646
1647    if (optimize and lr_read_tables(tabmodule,1)):
1648        # Read parse table
1649        del Productions[:]
1650        for p in _lr_productions:
1651            if not p:
1652                Productions.append(None)
1653            else:
1654                m = MiniProduction()
1655                m.name = p[0]
1656                m.len  = p[1]
1657                m.file = p[3]
1658                m.line = p[4]
1659                if p[2]:
1660                    m.func = ldict[p[2]]
1661                Productions.append(m)
1662
1663    else:
1664        # Get the tokens map
1665        tokens = ldict.get("tokens",None)
1666
1667        if not tokens:
1668            raise YaccError,"module does not define a list 'tokens'"
1669        if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)):
1670            raise YaccError,"tokens must be a list or tuple."
1671
1672        # Check to see if a requires dictionary is defined.
1673        requires = ldict.get("require",None)
1674        if requires:
1675            if not (isinstance(requires,types.DictType)):
1676                raise YaccError,"require must be a dictionary."
1677
1678            for r,v in requires.items():
1679                try:
1680                    if not (isinstance(v,types.ListType)):
1681                        raise TypeError
1682                    v1 = [x.split(".") for x in v]
1683                    Requires[r] = v1
1684                except StandardError:
1685                    print "Invalid specification for rule '%s' in require. Expected a list of strings" % r
1686
1687
1688        # Build the dictionary of terminals.  We a record a 0 in the
1689        # dictionary to track whether or not a terminal is actually
1690        # used in the grammar
1691
1692        if 'error' in tokens:
1693            print "yacc: Illegal token 'error'.  Is a reserved word."
1694            raise YaccError,"Illegal token name"
1695
1696        for n in tokens:
1697            if Terminals.has_key(n):
1698                print "yacc: Warning. Token '%s' multiply defined." % n
1699            Terminals[n] = [ ]
1700
1701        Terminals['error'] = [ ]
1702
1703        # Get the precedence map (if any)
1704        prec = ldict.get("precedence",None)
1705        if prec:
1706            if not (isinstance(prec,types.ListType) or isinstance(prec,types.TupleType)):
1707                raise YaccError,"precedence must be a list or tuple."
1708            add_precedence(prec)
1709            Signature.update(repr(prec))
1710
1711        for n in tokens:
1712            if not Precedence.has_key(n):
1713                Precedence[n] = ('right',0)         # Default, right associative, 0 precedence
1714
1715        # Look for error handler
1716        ef = ldict.get('p_error',None)
1717        if ef:
1718            if not isinstance(ef,types.FunctionType):
1719                raise YaccError,"'p_error' defined, but is not a function."
1720            eline = ef.func_code.co_firstlineno
1721            efile = ef.func_code.co_filename
1722            files[efile] = None
1723
1724            if (ef.func_code.co_argcount != 1):
1725                raise YaccError,"%s:%d: p_error() requires 1 argument." % (efile,eline)
1726            global Errorfunc
1727            Errorfunc = ef
1728        else:
1729            print "yacc: Warning. no p_error() function is defined."
1730
1731        # Get the list of built-in functions with p_ prefix
1732        symbols = [ldict[f] for f in ldict.keys()
1733               if (isinstance(ldict[f],types.FunctionType) and ldict[f].__name__[:2] == 'p_'
1734                   and ldict[f].__name__ != 'p_error')]
1735
1736        # Check for non-empty symbols
1737        if len(symbols) == 0:
1738            raise YaccError,"no rules of the form p_rulename are defined."
1739
1740        # Sort the symbols by line number
1741        symbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno))
1742
1743        # Add all of the symbols to the grammar
1744        for f in symbols:
1745            if (add_function(f)) < 0:
1746                error += 1
1747            else:
1748                files[f.func_code.co_filename] = None
1749
1750        # Make a signature of the docstrings
1751        for f in symbols:
1752            if f.__doc__:
1753                Signature.update(f.__doc__)
1754
1755        lr_init_vars()
1756
1757        if error:
1758            raise YaccError,"Unable to construct parser."
1759
1760        if not lr_read_tables(tabmodule):
1761
1762            # Validate files
1763            for filename in files.keys():
1764                if not validate_file(filename):
1765                    error = 1
1766
1767            # Validate dictionary
1768            validate_dict(ldict)
1769
1770            if start and not Prodnames.has_key(start):
1771                raise YaccError,"Bad starting symbol '%s'" % start
1772
1773            augment_grammar(start)
1774            error = verify_productions(cycle_check=check_recursion)
1775            otherfunc = [ldict[f] for f in ldict.keys()
1776               if (isinstance(ldict[f],types.FunctionType) and ldict[f].__name__[:2] != 'p_')]
1777
1778            if error:
1779                raise YaccError,"Unable to construct parser."
1780
1781            build_lritems()
1782            compute_first1()
1783            compute_follow(start)
1784
1785            if method == 'SLR':
1786                slr_parse_table()
1787            elif method == 'LALR1':
1788                lalr_parse_table()
1789                return
1790            else:
1791                raise YaccError, "Unknown parsing method '%s'" % method
1792
1793            lr_write_tables(tabmodule)
1794
1795            if yaccdebug:
1796                try:
1797                    f = open(debug_file,"w")
1798                    f.write(_vfc.getvalue())
1799                    f.write("\n\n")
1800                    f.write(_vf.getvalue())
1801                    f.close()
1802                except IOError,e:
1803                    print "yacc: can't create '%s'" % debug_file,e
1804
1805    # Made it here.   Create a parser object and set up its internal state.
1806    # Set global parse() method to bound method of parser object.
1807
1808    p = Parser("xyzzy")
1809    p.productions = Productions
1810    p.errorfunc = Errorfunc
1811    p.action = _lr_action
1812    p.goto   = _lr_goto
1813    p.method = _lr_method
1814    p.require = Requires
1815
1816    global parse
1817    parse = p.parse
1818
1819    # Clean up all of the globals we created
1820    if (not optimize):
1821        yacc_cleanup()
1822    return p
1823
1824# yacc_cleanup function.  Delete all of the global variables
1825# used during table construction
1826
1827def yacc_cleanup():
1828    global _lr_action, _lr_goto, _lr_method, _lr_goto_cache
1829    del _lr_action, _lr_goto, _lr_method, _lr_goto_cache
1830
1831    global Productions, Prodnames, Prodmap, Terminals
1832    global Nonterminals, First, Follow, Precedence, LRitems
1833    global Errorfunc, Signature, Requires
1834
1835    del Productions, Prodnames, Prodmap, Terminals
1836    del Nonterminals, First, Follow, Precedence, LRitems
1837    del Errorfunc, Signature, Requires
1838
1839    global _vf, _vfc
1840    del _vf, _vfc
1841
1842
1843# Stub that raises an error if parsing is attempted without first calling yacc()
1844def parse(*args,**kwargs):
1845    raise YaccError, "yacc: No parser built with yacc()"
1846
1847