cscg24-lolpython

CSCG 2024 Challenge 'Can I Haz Lolpython?'
git clone https://git.sinitax.com/sinitax/cscg24-lolpython
Log | Files | Refs | sfeed.txt

yacc.py (82064B)


      1#-----------------------------------------------------------------------------
      2# ply: yacc.py
      3#
      4# Author(s): David M. Beazley (dave@dabeaz.com)
      5#
      6# Copyright (C) 2001-2006, David M. Beazley
      7#
      8# This library is free software; you can redistribute it and/or
      9# modify it under the terms of the GNU Lesser General Public
     10# License as published by the Free Software Foundation; either
     11# version 2.1 of the License, or (at your option) any later version.
     12# 
     13# This library is distributed in the hope that it will be useful,
     14# but WITHOUT ANY WARRANTY; without even the implied warranty of
     15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     16# Lesser General Public License for more details.
     17# 
     18# You should have received a copy of the GNU Lesser General Public
     19# License along with this library; if not, write to the Free Software
     20# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     21# 
     22# See the file COPYING for a complete copy of the LGPL.
     23#
     24#
     25# This implements an LR parser that is constructed from grammar rules defined
     26# as Python functions. The grammer is specified by supplying the BNF inside
     27# Python documentation strings.  The inspiration for this technique was borrowed
     28# from John Aycock's Spark parsing system.  PLY might be viewed as cross between
     29# Spark and the GNU bison utility.
     30#
     31# The current implementation is only somewhat object-oriented. The
     32# LR parser itself is defined in terms of an object (which allows multiple
     33# parsers to co-exist).  However, most of the variables used during table
     34# construction are defined in terms of global variables.  Users shouldn't
     35# notice unless they are trying to define multiple parsers at the same
     36# time using threads (in which case they should have their head examined).
     37#
     38# This implementation supports both SLR and LALR(1) parsing.  LALR(1)
     39# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
     40# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
     41# Techniques, and Tools" (The Dragon Book).  LALR(1) has since been replaced
     42# by the more efficient DeRemer and Pennello algorithm.
     43#
     44# :::::::: WARNING :::::::
     45#
     46# Construction of LR parsing tables is fairly complicated and expensive.
     47# To make this module run fast, a *LOT* of work has been put into
     48# optimization---often at the expensive of readability and what might
     49# consider to be good Python "coding style."   Modify the code at your
     50# own risk!
     51# ----------------------------------------------------------------------------
     52
     53__version__ = "2.2"
     54
     55#-----------------------------------------------------------------------------
     56#                     === User configurable parameters ===
     57#
     58# Change these to modify the default behavior of yacc (if you wish)
     59#-----------------------------------------------------------------------------
     60
     61yaccdebug   = 1                # Debugging mode.  If set, yacc generates a
     62                               # a 'parser.out' file in the current directory
     63
     64debug_file  = 'parser.out'     # Default name of the debugging file
     65tab_module  = 'parsetab'       # Default name of the table module
     66default_lr  = 'LALR'           # Default LR table generation method
     67
     68error_count = 3                # Number of symbols that must be shifted to leave recovery mode
     69
     70import re, types, sys, cStringIO, md5, os.path
     71
     72# Exception raised for yacc-related errors
     73class YaccError(Exception):   pass
     74
     75#-----------------------------------------------------------------------------
     76#                        ===  LR Parsing Engine ===
     77#
     78# The following classes are used for the LR parser itself.  These are not
     79# used during table construction and are independent of the actual LR
     80# table generation algorithm
     81#-----------------------------------------------------------------------------
     82
     83# This class is used to hold non-terminal grammar symbols during parsing.
     84# It normally has the following attributes set:
     85#        .type       = Grammar symbol type
     86#        .value      = Symbol value
     87#        .lineno     = Starting line number
     88#        .endlineno  = Ending line number (optional, set automatically)
     89#        .lexpos     = Starting lex position
     90#        .endlexpos  = Ending lex position (optional, set automatically)
     91
     92class YaccSymbol:
     93    def __str__(self):    return self.type
     94    def __repr__(self):   return str(self)
     95
     96# This class is a wrapper around the objects actually passed to each
     97# grammar rule.   Index lookup and assignment actually assign the
     98# .value attribute of the underlying YaccSymbol object.
     99# The lineno() method returns the line number of a given
    100# item (or 0 if not defined).   The linespan() method returns
    101# a tuple of (startline,endline) representing the range of lines
    102# for a symbol.  The lexspan() method returns a tuple (lexpos,endlexpos)
    103# representing the range of positional information for a symbol.
    104
    105class YaccProduction:
    106    def __init__(self,s,stack=None):
    107        self.slice = s
    108        self.pbstack = []
    109        self.stack = stack
    110
    111    def __getitem__(self,n):
    112        if type(n) == types.IntType:
    113             if n >= 0: return self.slice[n].value
    114             else: return self.stack[n].value
    115        else:
    116             return [s.value for s in self.slice[n.start:n.stop:n.step]]
    117
    118    def __setitem__(self,n,v):
    119        self.slice[n].value = v
    120
    121    def __len__(self):
    122        return len(self.slice)
    123    
    124    def lineno(self,n):
    125        return getattr(self.slice[n],"lineno",0)
    126
    127    def linespan(self,n):
    128        startline = getattr(self.slice[n],"lineno",0)
    129        endline = getattr(self.slice[n],"endlineno",startline)
    130        return startline,endline
    131
    132    def lexpos(self,n):
    133        return getattr(self.slice[n],"lexpos",0)
    134
    135    def lexspan(self,n):
    136        startpos = getattr(self.slice[n],"lexpos",0)
    137        endpos = getattr(self.slice[n],"endlexpos",startpos)
    138        return startpos,endpos
    139
    140    def pushback(self,n):
    141        if n <= 0:
    142            raise ValueError, "Expected a positive value"
    143        if n > (len(self.slice)-1):
    144            raise ValueError, "Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1)
    145        for i in range(0,n):
    146            self.pbstack.append(self.slice[-i-1])
    147
    148# The LR Parsing engine.   This is defined as a class so that multiple parsers
    149# can exist in the same process.  A user never instantiates this directly.
    150# Instead, the global yacc() function should be used to create a suitable Parser
    151# object. 
    152
    153class Parser:
    154    def __init__(self,magic=None):
    155
    156        # This is a hack to keep users from trying to instantiate a Parser
    157        # object directly.
    158
    159        if magic != "xyzzy":
    160            raise YaccError, "Can't instantiate Parser. Use yacc() instead."
    161
    162        # Reset internal state
    163        self.productions = None          # List of productions
    164        self.errorfunc   = None          # Error handling function
    165        self.action      = { }           # LR Action table
    166        self.goto        = { }           # LR goto table
    167        self.require     = { }           # Attribute require table
    168        self.method      = "Unknown LR"  # Table construction method used
    169
    170    def errok(self):
    171        self.errorcount = 0
    172
    173    def restart(self):
    174        del self.statestack[:]
    175        del self.symstack[:]
    176        sym = YaccSymbol()
    177        sym.type = '$end'
    178        self.symstack.append(sym)
    179        self.statestack.append(0)
    180        
    181    def parse(self,input=None,lexer=None,debug=0):
    182        lookahead = None                 # Current lookahead symbol
    183        lookaheadstack = [ ]             # Stack of lookahead symbols
    184        actions = self.action            # Local reference to action table
    185        goto    = self.goto              # Local reference to goto table
    186        prod    = self.productions       # Local reference to production list
    187        pslice  = YaccProduction(None)   # Production object passed to grammar rules
    188        pslice.parser = self             # Parser object
    189        self.errorcount = 0              # Used during error recovery
    190
    191        # If no lexer was given, we will try to use the lex module
    192        if not lexer:
    193            import lex
    194            lexer = lex.lexer
    195
    196        pslice.lexer = lexer
    197        
    198        # If input was supplied, pass to lexer
    199        if input:
    200            lexer.input(input)
    201
    202        # Tokenize function
    203        get_token = lexer.token
    204
    205        statestack = [ ]                # Stack of parsing states
    206        self.statestack = statestack
    207        symstack   = [ ]                # Stack of grammar symbols
    208        self.symstack = symstack
    209
    210        pslice.stack = symstack         # Put in the production
    211        errtoken   = None               # Err token
    212
    213        # The start state is assumed to be (0,$end)
    214        statestack.append(0)
    215        sym = YaccSymbol()
    216        sym.type = '$end'
    217        symstack.append(sym)
    218        
    219        while 1:
    220            # Get the next symbol on the input.  If a lookahead symbol
    221            # is already set, we just use that. Otherwise, we'll pull
    222            # the next token off of the lookaheadstack or from the lexer
    223            if debug > 1:
    224                print 'state', statestack[-1]
    225            if not lookahead:
    226                if not lookaheadstack:
    227                    lookahead = get_token()     # Get the next token
    228                else:
    229                    lookahead = lookaheadstack.pop()
    230                if not lookahead:
    231                    lookahead = YaccSymbol()
    232                    lookahead.type = '$end'
    233            if debug:
    234                errorlead = ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()
    235
    236            # Check the action table
    237            s = statestack[-1]
    238            ltype = lookahead.type
    239            t = actions.get((s,ltype),None)
    240
    241            if debug > 1:
    242                print 'action', t
    243            if t is not None:
    244                if t > 0:
    245                    # shift a symbol on the stack
    246                    if ltype == '$end':
    247                        # Error, end of input
    248                        sys.stderr.write("yacc: Parse error. EOF\n")
    249                        return
    250                    statestack.append(t)
    251                    if debug > 1:
    252                        sys.stderr.write("%-60s shift state %s\n" % (errorlead, t))
    253                    symstack.append(lookahead)
    254                    lookahead = None
    255
    256                    # Decrease error count on successful shift
    257                    if self.errorcount > 0:
    258                        self.errorcount -= 1
    259                        
    260                    continue
    261                
    262                if t < 0:
    263                    # reduce a symbol on the stack, emit a production
    264                    p = prod[-t]
    265                    pname = p.name
    266                    plen  = p.len
    267
    268                    # Get production function
    269                    sym = YaccSymbol()
    270                    sym.type = pname       # Production name
    271                    sym.value = None
    272                    if debug > 1:
    273                        sys.stderr.write("%-60s reduce %d\n" % (errorlead, -t))
    274
    275                    if plen:
    276                        targ = symstack[-plen-1:]
    277                        targ[0] = sym
    278                        try:
    279                            sym.lineno = targ[1].lineno
    280                            sym.endlineno = getattr(targ[-1],"endlineno",targ[-1].lineno)
    281                            sym.lexpos = targ[1].lexpos
    282                            sym.endlexpos = getattr(targ[-1],"endlexpos",targ[-1].lexpos)
    283                        except AttributeError:
    284                            sym.lineno = 0
    285                        del symstack[-plen:]
    286                        del statestack[-plen:]
    287                    else:
    288                        sym.lineno = 0
    289                        targ = [ sym ]
    290                    pslice.slice = targ
    291                    pslice.pbstack = []
    292                    # Call the grammar rule with our special slice object
    293                    p.func(pslice)
    294
    295                    # If there was a pushback, put that on the stack
    296                    if pslice.pbstack:
    297                        lookaheadstack.append(lookahead)
    298                        for _t in pslice.pbstack:
    299                            lookaheadstack.append(_t)
    300                        lookahead = None
    301
    302                    symstack.append(sym)
    303                    statestack.append(goto[statestack[-1],pname])
    304                    continue
    305
    306                if t == 0:
    307                    n = symstack[-1]
    308                    return getattr(n,"value",None)
    309                    sys.stderr.write(errorlead, "\n")
    310
    311            if t == None:
    312                if debug:
    313                    sys.stderr.write(errorlead + "\n")
    314                # We have some kind of parsing error here.  To handle
    315                # this, we are going to push the current token onto
    316                # the tokenstack and replace it with an 'error' token.
    317                # If there are any synchronization rules, they may
    318                # catch it.
    319                #
    320                # In addition to pushing the error token, we call call
    321                # the user defined p_error() function if this is the
    322                # first syntax error.  This function is only called if
    323                # errorcount == 0.
    324                if not self.errorcount:
    325                    self.errorcount = error_count
    326                    errtoken = lookahead
    327                    if errtoken.type == '$end':
    328                        errtoken = None               # End of file!
    329                    if self.errorfunc:
    330                        global errok,token,restart
    331                        errok = self.errok        # Set some special functions available in error recovery
    332                        token = get_token
    333                        restart = self.restart
    334                        tok = self.errorfunc(errtoken)
    335                        del errok, token, restart   # Delete special functions
    336                        
    337                        if not self.errorcount:
    338                            # User must have done some kind of panic
    339                            # mode recovery on their own.  The
    340                            # returned token is the next lookahead
    341                            lookahead = tok
    342                            errtoken = None
    343                            continue
    344                    else:
    345                        if errtoken:
    346                            if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
    347                            else: lineno = 0
    348                            if lineno:
    349                                sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
    350                            else:
    351                                sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
    352                        else:
    353                            sys.stderr.write("yacc: Parse error in input. EOF\n")
    354                            return
    355
    356                else:
    357                    self.errorcount = error_count
    358                
    359                # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
    360                # entire parse has been rolled back and we're completely hosed.   The token is
    361                # discarded and we just keep going.
    362
    363                if len(statestack) <= 1 and lookahead.type != '$end':
    364                    lookahead = None
    365                    errtoken = None
    366                    # Nuke the pushback stack
    367                    del lookaheadstack[:]
    368                    continue
    369
    370                # case 2: the statestack has a couple of entries on it, but we're
    371                # at the end of the file. nuke the top entry and generate an error token
    372
    373                # Start nuking entries on the stack
    374                if lookahead.type == '$end':
    375                    # Whoa. We're really hosed here. Bail out
    376                    return 
    377
    378                if lookahead.type != 'error':
    379                    sym = symstack[-1]
    380                    if sym.type == 'error':
    381                        # Hmmm. Error is on top of stack, we'll just nuke input
    382                        # symbol and continue
    383                        lookahead = None
    384                        continue
    385                    t = YaccSymbol()
    386                    t.type = 'error'
    387                    if hasattr(lookahead,"lineno"):
    388                        t.lineno = lookahead.lineno
    389                    t.value = lookahead
    390                    lookaheadstack.append(lookahead)
    391                    lookahead = t
    392                else:
    393                    symstack.pop()
    394                    statestack.pop()
    395
    396                continue
    397
    398            # Call an error function here
    399            raise RuntimeError, "yacc: internal parser error!!!\n"
    400
    401# -----------------------------------------------------------------------------
    402#                          === Parser Construction ===
    403#
    404# The following functions and variables are used to implement the yacc() function
    405# itself.   This is pretty hairy stuff involving lots of error checking,
    406# construction of LR items, kernels, and so forth.   Although a lot of
    407# this work is done using global variables, the resulting Parser object
    408# is completely self contained--meaning that it is safe to repeatedly
    409# call yacc() with different grammars in the same application.
    410# -----------------------------------------------------------------------------
    411        
    412# -----------------------------------------------------------------------------
    413# validate_file()
    414#
    415# This function checks to see if there are duplicated p_rulename() functions
    416# in the parser module file.  Without this function, it is really easy for
    417# users to make mistakes by cutting and pasting code fragments (and it's a real
    418# bugger to try and figure out why the resulting parser doesn't work).  Therefore,
    419# we just do a little regular expression pattern matching of def statements
    420# to try and detect duplicates.
    421# -----------------------------------------------------------------------------
    422
    423def validate_file(filename):
    424    base,ext = os.path.splitext(filename)
    425    if ext != '.py': return 1          # No idea. Assume it's okay.
    426
    427    try:
    428        f = open(filename)
    429        lines = f.readlines()
    430        f.close()
    431    except IOError:
    432        return 1                       # Oh well
    433
    434    # Match def p_funcname(
    435    fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
    436    counthash = { }
    437    linen = 1
    438    noerror = 1
    439    for l in lines:
    440        m = fre.match(l)
    441        if m:
    442            name = m.group(1)
    443            prev = counthash.get(name)
    444            if not prev:
    445                counthash[name] = linen
    446            else:
    447                sys.stderr.write("%s:%d: Function %s redefined. Previously defined on line %d\n" % (filename,linen,name,prev))
    448                noerror = 0
    449        linen += 1
    450    return noerror
    451
    452# This function looks for functions that might be grammar rules, but which don't have the proper p_suffix.
    453def validate_dict(d):
    454    for n,v in d.items(): 
    455        if n[0:2] == 'p_' and type(v) in (types.FunctionType, types.MethodType): continue
    456        if n[0:2] == 't_': continue
    457
    458        if n[0:2] == 'p_':
    459            sys.stderr.write("yacc: Warning. '%s' not defined as a function\n" % n)
    460        if 1 and isinstance(v,types.FunctionType) and v.func_code.co_argcount == 1:
    461            try:
    462                doc = v.__doc__.split(" ")
    463                if doc[1] == ':':
    464                    sys.stderr.write("%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix.\n" % (v.func_code.co_filename, v.func_code.co_firstlineno,n))
    465            except StandardError:
    466                pass
    467
    468# -----------------------------------------------------------------------------
    469#                           === GRAMMAR FUNCTIONS ===
    470#
    471# The following global variables and functions are used to store, manipulate,
    472# and verify the grammar rules specified by the user.
    473# -----------------------------------------------------------------------------
    474
    475# Initialize all of the global variables used during grammar construction
    476def initialize_vars():
    477    global Productions, Prodnames, Prodmap, Terminals 
    478    global Nonterminals, First, Follow, Precedence, LRitems
    479    global Errorfunc, Signature, Requires
    480
    481    Productions  = [None]  # A list of all of the productions.  The first
    482                           # entry is always reserved for the purpose of
    483                           # building an augmented grammar
    484                        
    485    Prodnames    = { }     # A dictionary mapping the names of nonterminals to a list of all
    486                           # productions of that nonterminal.
    487                        
    488    Prodmap      = { }     # A dictionary that is only used to detect duplicate
    489                           # productions.
    490
    491    Terminals    = { }     # A dictionary mapping the names of terminal symbols to a
    492                           # list of the rules where they are used.
    493
    494    Nonterminals = { }     # A dictionary mapping names of nonterminals to a list
    495                           # of rule numbers where they are used.
    496
    497    First        = { }     # A dictionary of precomputed FIRST(x) symbols
    498    
    499    Follow       = { }     # A dictionary of precomputed FOLLOW(x) symbols
    500
    501    Precedence   = { }     # Precedence rules for each terminal. Contains tuples of the
    502                           # form ('right',level) or ('nonassoc', level) or ('left',level)
    503
    504    LRitems      = [ ]     # A list of all LR items for the grammar.  These are the
    505                           # productions with the "dot" like E -> E . PLUS E
    506
    507    Errorfunc    = None    # User defined error handler
    508
    509    Signature    = md5.new()   # Digital signature of the grammar rules, precedence
    510                               # and other information.  Used to determined when a
    511                               # parsing table needs to be regenerated.
    512
    513    Requires     = { }     # Requires list
    514
    515    # File objects used when creating the parser.out debugging file
    516    global _vf, _vfc
    517    _vf           = cStringIO.StringIO()
    518    _vfc          = cStringIO.StringIO()
    519
    520# -----------------------------------------------------------------------------
    521# class Production:
    522#
    523# This class stores the raw information about a single production or grammar rule.
    524# It has a few required attributes:
    525#
    526#       name     - Name of the production (nonterminal)
    527#       prod     - A list of symbols making up its production
    528#       number   - Production number.
    529#
    530# In addition, a few additional attributes are used to help with debugging or
    531# optimization of table generation.
    532#
    533#       file     - File where production action is defined.
    534#       lineno   - Line number where action is defined
    535#       func     - Action function
    536#       prec     - Precedence level
    537#       lr_next  - Next LR item. Example, if we are ' E -> E . PLUS E'
    538#                  then lr_next refers to 'E -> E PLUS . E'   
    539#       lr_index - LR item index (location of the ".") in the prod list.
    540#       lookaheads - LALR lookahead symbols for this item
    541#       len      - Length of the production (number of symbols on right hand side)
    542# -----------------------------------------------------------------------------
    543
    544class Production:
    545    def __init__(self,**kw):
    546        for k,v in kw.items():
    547            setattr(self,k,v)
    548        self.lr_index = -1
    549        self.lr0_added = 0    # Flag indicating whether or not added to LR0 closure
    550        self.lr1_added = 0    # Flag indicating whether or not added to LR1
    551        self.usyms = [ ]
    552        self.lookaheads = { }
    553        self.lk_added = { }
    554        self.setnumbers = [ ]
    555        
    556    def __str__(self):
    557        if self.prod:
    558            s = "%s -> %s" % (self.name," ".join(self.prod))
    559        else:
    560            s = "%s -> <empty>" % self.name
    561        return s
    562
    563    def __repr__(self):
    564        return str(self)
    565
    566    # Compute lr_items from the production
    567    def lr_item(self,n):
    568        if n > len(self.prod): return None
    569        p = Production()
    570        p.name = self.name
    571        p.prod = list(self.prod)
    572        p.number = self.number
    573        p.lr_index = n
    574        p.lookaheads = { }
    575        p.setnumbers = self.setnumbers
    576        p.prod.insert(n,".")
    577        p.prod = tuple(p.prod)
    578        p.len = len(p.prod)
    579        p.usyms = self.usyms
    580
    581        # Precompute list of productions immediately following
    582        try:
    583            p.lrafter = Prodnames[p.prod[n+1]]
    584        except (IndexError,KeyError),e:
    585            p.lrafter = []
    586        try:
    587            p.lrbefore = p.prod[n-1]
    588        except IndexError:
    589            p.lrbefore = None
    590
    591        return p
    592
    593class MiniProduction:
    594    pass
    595
    596# regex matching identifiers
    597_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
    598
    599# -----------------------------------------------------------------------------
    600# add_production()
    601#
    602# Given an action function, this function assembles a production rule.
    603# The production rule is assumed to be found in the function's docstring.
    604# This rule has the general syntax:
    605#
    606#              name1 ::= production1
    607#                     |  production2
    608#                     |  production3
    609#                    ...
    610#                     |  productionn
    611#              name2 ::= production1
    612#                     |  production2
    613#                    ... 
    614# -----------------------------------------------------------------------------
    615
    616def add_production(f,file,line,prodname,syms):
    617    
    618    if Terminals.has_key(prodname):
    619        sys.stderr.write("%s:%d: Illegal rule name '%s'. Already defined as a token.\n" % (file,line,prodname))
    620        return -1
    621    if prodname == 'error':
    622        sys.stderr.write("%s:%d: Illegal rule name '%s'. error is a reserved word.\n" % (file,line,prodname))
    623        return -1
    624                
    625    if not _is_identifier.match(prodname):
    626        sys.stderr.write("%s:%d: Illegal rule name '%s'\n" % (file,line,prodname))
    627        return -1
    628
    629    for x in range(len(syms)):
    630        s = syms[x]
    631        if s[0] in "'\"":
    632             try:
    633                 c = eval(s)
    634                 if (len(c) > 1):
    635                      sys.stderr.write("%s:%d: Literal token %s in rule '%s' may only be a single character\n" % (file,line,s, prodname)) 
    636                      return -1
    637                 if not Terminals.has_key(c):
    638                      Terminals[c] = []
    639                 syms[x] = c
    640                 continue
    641             except SyntaxError:
    642                 pass
    643        if not _is_identifier.match(s) and s != '%prec':
    644            sys.stderr.write("%s:%d: Illegal name '%s' in rule '%s'\n" % (file,line,s, prodname))
    645            return -1
    646
    647    # See if the rule is already in the rulemap
    648    map = "%s -> %s" % (prodname,syms)
    649    if Prodmap.has_key(map):
    650        m = Prodmap[map]
    651        sys.stderr.write("%s:%d: Duplicate rule %s.\n" % (file,line, m))
    652        sys.stderr.write("%s:%d: Previous definition at %s:%d\n" % (file,line, m.file, m.line))
    653        return -1
    654
    655    p = Production()
    656    p.name = prodname
    657    p.prod = syms
    658    p.file = file
    659    p.line = line
    660    p.func = f
    661    p.number = len(Productions)
    662
    663            
    664    Productions.append(p)
    665    Prodmap[map] = p
    666    if not Nonterminals.has_key(prodname):
    667        Nonterminals[prodname] = [ ]
    668    
    669    # Add all terminals to Terminals
    670    i = 0
    671    while i < len(p.prod):
    672        t = p.prod[i]
    673        if t == '%prec':
    674            try:
    675                precname = p.prod[i+1]
    676            except IndexError:
    677                sys.stderr.write("%s:%d: Syntax error. Nothing follows %%prec.\n" % (p.file,p.line))
    678                return -1
    679
    680            prec = Precedence.get(precname,None)
    681            if not prec:
    682                sys.stderr.write("%s:%d: Nothing known about the precedence of '%s'\n" % (p.file,p.line,precname))
    683                return -1
    684            else:
    685                p.prec = prec
    686            del p.prod[i]
    687            del p.prod[i]
    688            continue
    689
    690        if Terminals.has_key(t):
    691            Terminals[t].append(p.number)
    692            # Is a terminal.  We'll assign a precedence to p based on this
    693            if not hasattr(p,"prec"):
    694                p.prec = Precedence.get(t,('right',0))
    695        else:
    696            if not Nonterminals.has_key(t):
    697                Nonterminals[t] = [ ]
    698            Nonterminals[t].append(p.number)
    699        i += 1
    700
    701    if not hasattr(p,"prec"):
    702        p.prec = ('right',0)
    703        
    704    # Set final length of productions
    705    p.len  = len(p.prod)
    706    p.prod = tuple(p.prod)
    707
    708    # Calculate unique syms in the production
    709    p.usyms = [ ]
    710    for s in p.prod:
    711        if s not in p.usyms:
    712            p.usyms.append(s)
    713    
    714    # Add to the global productions list
    715    try:
    716        Prodnames[p.name].append(p)
    717    except KeyError:
    718        Prodnames[p.name] = [ p ]
    719    return 0
    720
    721# Given a raw rule function, this function rips out its doc string
    722# and adds rules to the grammar
    723
    724def add_function(f):
    725    line = f.func_code.co_firstlineno
    726    file = f.func_code.co_filename
    727    error = 0
    728
    729    if isinstance(f,types.MethodType):
    730        reqdargs = 2
    731    else:
    732        reqdargs = 1
    733        
    734    if f.func_code.co_argcount > reqdargs:
    735        sys.stderr.write("%s:%d: Rule '%s' has too many arguments.\n" % (file,line,f.__name__))
    736        return -1
    737
    738    if f.func_code.co_argcount < reqdargs:
    739        sys.stderr.write("%s:%d: Rule '%s' requires an argument.\n" % (file,line,f.__name__))
    740        return -1
    741          
    742    if f.__doc__:
    743        # Split the doc string into lines
    744        pstrings = f.__doc__.splitlines()
    745        lastp = None
    746        dline = line
    747        for ps in pstrings:
    748            dline += 1
    749            p = ps.split()
    750            if not p: continue
    751            try:
    752                if p[0] == '|':
    753                    # This is a continuation of a previous rule
    754                    if not lastp:
    755                        sys.stderr.write("%s:%d: Misplaced '|'.\n" % (file,dline))
    756                        return -1
    757                    prodname = lastp
    758                    if len(p) > 1:
    759                        syms = p[1:]
    760                    else:
    761                        syms = [ ]
    762                else:
    763                    prodname = p[0]
    764                    lastp = prodname
    765                    assign = p[1]
    766                    if len(p) > 2:
    767                        syms = p[2:]
    768                    else:
    769                        syms = [ ]
    770                    if assign != ':' and assign != '::=':
    771                        sys.stderr.write("%s:%d: Syntax error. Expected ':'\n" % (file,dline))
    772                        return -1
    773                         
    774 
    775                e = add_production(f,file,dline,prodname,syms)
    776                error += e
    777
    778                
    779            except StandardError:
    780                sys.stderr.write("%s:%d: Syntax error in rule '%s'\n" % (file,dline,ps))
    781                error -= 1
    782    else:
    783        sys.stderr.write("%s:%d: No documentation string specified in function '%s'\n" % (file,line,f.__name__))
    784    return error
    785
    786
    787# Cycle checking code (Michael Dyck)
    788
    789def compute_reachable():
    790    '''
    791    Find each symbol that can be reached from the start symbol.
    792    Print a warning for any nonterminals that can't be reached.
    793    (Unused terminals have already had their warning.)
    794    '''
    795    Reachable = { }
    796    for s in Terminals.keys() + Nonterminals.keys():
    797        Reachable[s] = 0
    798
    799    mark_reachable_from( Productions[0].prod[0], Reachable )
    800
    801    for s in Nonterminals.keys():
    802        if not Reachable[s]:
    803            sys.stderr.write("yacc: Symbol '%s' is unreachable.\n" % s)
    804
    805def mark_reachable_from(s, Reachable):
    806    '''
    807    Mark all symbols that are reachable from symbol s.
    808    '''
    809    if Reachable[s]:
    810        # We've already reached symbol s.
    811        return
    812    Reachable[s] = 1
    813    for p in Prodnames.get(s,[]):
    814        for r in p.prod:
    815            mark_reachable_from(r, Reachable)
    816
    817# -----------------------------------------------------------------------------
    818# compute_terminates()
    819#
    820# This function looks at the various parsing rules and tries to detect
    821# infinite recursion cycles (grammar rules where there is no possible way
    822# to derive a string of only terminals).
    823# -----------------------------------------------------------------------------
    824def compute_terminates():
    825    '''
    826    Raise an error for any symbols that don't terminate.
    827    '''
    828    Terminates = {}
    829
    830    # Terminals:
    831    for t in Terminals.keys():
    832        Terminates[t] = 1
    833
    834    Terminates['$end'] = 1
    835
    836    # Nonterminals:
    837
    838    # Initialize to false:
    839    for n in Nonterminals.keys():
    840        Terminates[n] = 0
    841
    842    # Then propagate termination until no change:
    843    while 1:
    844        some_change = 0
    845        for (n,pl) in Prodnames.items():
    846            # Nonterminal n terminates iff any of its productions terminates.
    847            for p in pl:
    848                # Production p terminates iff all of its rhs symbols terminate.
    849                for s in p.prod:
    850                    if not Terminates[s]:
    851                        # The symbol s does not terminate,
    852                        # so production p does not terminate.
    853                        p_terminates = 0
    854                        break
    855                else:
    856                    # didn't break from the loop,
    857                    # so every symbol s terminates
    858                    # so production p terminates.
    859                    p_terminates = 1
    860
    861                if p_terminates:
    862                    # symbol n terminates!
    863                    if not Terminates[n]:
    864                        Terminates[n] = 1
    865                        some_change = 1
    866                    # Don't need to consider any more productions for this n.
    867                    break
    868
    869        if not some_change:
    870            break
    871
    872    some_error = 0
    873    for (s,terminates) in Terminates.items():
    874        if not terminates:
    875            if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error':
    876                # s is used-but-not-defined, and we've already warned of that,
    877                # so it would be overkill to say that it's also non-terminating.
    878                pass
    879            else:
    880                sys.stderr.write("yacc: Infinite recursion detected for symbol '%s'.\n" % s)
    881                some_error = 1
    882
    883    return some_error
    884
    885# -----------------------------------------------------------------------------
    886# verify_productions()
    887#
    888# This function examines all of the supplied rules to see if they seem valid.
    889# -----------------------------------------------------------------------------
    890def verify_productions(cycle_check=1):
    891    error = 0
    892    for p in Productions:
    893        if not p: continue
    894
    895        for s in p.prod:
    896            if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error':
    897                sys.stderr.write("%s:%d: Symbol '%s' used, but not defined as a token or a rule.\n" % (p.file,p.line,s))
    898                error = 1
    899                continue
    900
    901    unused_tok = 0 
    902    # Now verify all of the tokens
    903    if yaccdebug:
    904        _vf.write("Unused terminals:\n\n")
    905    for s,v in Terminals.items():
    906        if s != 'error' and not v:
    907            sys.stderr.write("yacc: Warning. Token '%s' defined, but not used.\n" % s)
    908            if yaccdebug: _vf.write("   %s\n"% s)
    909            unused_tok += 1
    910
    911    # Print out all of the productions
    912    if yaccdebug:
    913        _vf.write("\nGrammar\n\n")
    914        for i in range(1,len(Productions)):
    915            _vf.write("Rule %-5d %s\n" % (i, Productions[i]))
    916        
    917    unused_prod = 0
    918    # Verify the use of all productions
    919    for s,v in Nonterminals.items():
    920        if not v:
    921            p = Prodnames[s][0]
    922            sys.stderr.write("%s:%d: Warning. Rule '%s' defined, but not used.\n" % (p.file,p.line, s))
    923            unused_prod += 1
    924
    925    
    926    if unused_tok == 1:
    927        sys.stderr.write("yacc: Warning. There is 1 unused token.\n")
    928    if unused_tok > 1:
    929        sys.stderr.write("yacc: Warning. There are %d unused tokens.\n" % unused_tok)
    930
    931    if unused_prod == 1:
    932        sys.stderr.write("yacc: Warning. There is 1 unused rule.\n")
    933    if unused_prod > 1:
    934        sys.stderr.write("yacc: Warning. There are %d unused rules.\n" % unused_prod)
    935
    936    if yaccdebug:
    937        _vf.write("\nTerminals, with rules where they appear\n\n")
    938        ks = Terminals.keys()
    939        ks.sort()
    940        for k in ks:
    941            _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Terminals[k]])))
    942        _vf.write("\nNonterminals, with rules where they appear\n\n")
    943        ks = Nonterminals.keys()
    944        ks.sort()
    945        for k in ks:
    946            _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Nonterminals[k]])))
    947
    948    if (cycle_check):
    949        compute_reachable()
    950        error += compute_terminates()
    951#        error += check_cycles()
    952    return error
    953
    954# -----------------------------------------------------------------------------
    955# build_lritems()
    956#
    957# This function walks the list of productions and builds a complete set of the
    958# LR items.  The LR items are stored in two ways:  First, they are uniquely
    959# numbered and placed in the list _lritems.  Second, a linked list of LR items
    960# is built for each production.  For example:
    961#
    962#   E -> E PLUS E
    963#
    964# Creates the list
    965#
    966#  [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] 
    967# -----------------------------------------------------------------------------
    968
    969def build_lritems():
    970    for p in Productions:
    971        lastlri = p
    972        lri = p.lr_item(0)
    973        i = 0
    974        while 1:
    975            lri = p.lr_item(i)
    976            lastlri.lr_next = lri
    977            if not lri: break
    978            lri.lr_num = len(LRitems)
    979            LRitems.append(lri)
    980            lastlri = lri
    981            i += 1
    982
    983    # In order for the rest of the parser generator to work, we need to
    984    # guarantee that no more lritems are generated.  Therefore, we nuke
    985    # the p.lr_item method.  (Only used in debugging)
    986    # Production.lr_item = None
    987
    988# -----------------------------------------------------------------------------
    989# add_precedence()
    990#
    991# Given a list of precedence rules, add to the precedence table.
    992# -----------------------------------------------------------------------------
    993
    994def add_precedence(plist):
    995    plevel = 0
    996    error = 0
    997    for p in plist:
    998        plevel += 1
    999        try:
   1000            prec = p[0]
   1001            terms = p[1:]
   1002            if prec != 'left' and prec != 'right' and prec != 'nonassoc':
   1003                sys.stderr.write("yacc: Invalid precedence '%s'\n" % prec)
   1004                return -1
   1005            for t in terms:
   1006                if Precedence.has_key(t):
   1007                    sys.stderr.write("yacc: Precedence already specified for terminal '%s'\n" % t)
   1008                    error += 1
   1009                    continue
   1010                Precedence[t] = (prec,plevel)
   1011        except:
   1012            sys.stderr.write("yacc: Invalid precedence table.\n")
   1013            error += 1
   1014
   1015    return error
   1016
   1017# -----------------------------------------------------------------------------
   1018# augment_grammar()
   1019#
   1020# Compute the augmented grammar.  This is just a rule S' -> start where start
   1021# is the starting symbol.
   1022# -----------------------------------------------------------------------------
   1023
   1024def augment_grammar(start=None):
   1025    if not start:
   1026        start = Productions[1].name
   1027    Productions[0] = Production(name="S'",prod=[start],number=0,len=1,prec=('right',0),func=None)
   1028    Productions[0].usyms = [ start ]
   1029    Nonterminals[start].append(0)
   1030
   1031
   1032# -------------------------------------------------------------------------
   1033# first()
   1034#
   1035# Compute the value of FIRST1(beta) where beta is a tuple of symbols.
   1036#
   1037# During execution of compute_first1, the result may be incomplete.
   1038# Afterward (e.g., when called from compute_follow()), it will be complete.
   1039# -------------------------------------------------------------------------
   1040def first(beta):
   1041
   1042    # We are computing First(x1,x2,x3,...,xn)
   1043    result = [ ]
   1044    for x in beta:
   1045        x_produces_empty = 0
   1046
   1047        # Add all the non-<empty> symbols of First[x] to the result.
   1048        for f in First[x]:
   1049            if f == '<empty>':
   1050                x_produces_empty = 1
   1051            else:
   1052                if f not in result: result.append(f)
   1053
   1054        if x_produces_empty:
   1055            # We have to consider the next x in beta,
   1056            # i.e. stay in the loop.
   1057            pass
   1058        else:
   1059            # We don't have to consider any further symbols in beta.
   1060            break
   1061    else:
   1062        # There was no 'break' from the loop,
   1063        # so x_produces_empty was true for all x in beta,
   1064        # so beta produces empty as well.
   1065        result.append('<empty>')
   1066
   1067    return result
   1068
   1069
   1070# FOLLOW(x)
   1071# Given a non-terminal.  This function computes the set of all symbols
   1072# that might follow it.  Dragon book, p. 189.
   1073
   1074def compute_follow(start=None):
   1075    # Add '$end' to the follow list of the start symbol
   1076    for k in Nonterminals.keys():
   1077        Follow[k] = [ ]
   1078
   1079    if not start:
   1080        start = Productions[1].name
   1081        
   1082    Follow[start] = [ '$end' ]
   1083        
   1084    while 1:
   1085        didadd = 0
   1086        for p in Productions[1:]:
   1087            # Here is the production set
   1088            for i in range(len(p.prod)):
   1089                B = p.prod[i]
   1090                if Nonterminals.has_key(B):
   1091                    # Okay. We got a non-terminal in a production
   1092                    fst = first(p.prod[i+1:])
   1093                    hasempty = 0
   1094                    for f in fst:
   1095                        if f != '<empty>' and f not in Follow[B]:
   1096                            Follow[B].append(f)
   1097                            didadd = 1
   1098                        if f == '<empty>':
   1099                            hasempty = 1
   1100                    if hasempty or i == (len(p.prod)-1):
   1101                        # Add elements of follow(a) to follow(b)
   1102                        for f in Follow[p.name]:
   1103                            if f not in Follow[B]:
   1104                                Follow[B].append(f)
   1105                                didadd = 1
   1106        if not didadd: break
   1107
   1108    if 0 and yaccdebug:
   1109        _vf.write('\nFollow:\n')
   1110        for k in Nonterminals.keys():
   1111            _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Follow[k]])))
   1112
   1113# -------------------------------------------------------------------------
   1114# compute_first1()
   1115#
   1116# Compute the value of FIRST1(X) for all symbols
   1117# -------------------------------------------------------------------------
   1118def compute_first1():
   1119
   1120    # Terminals:
   1121    for t in Terminals.keys():
   1122        First[t] = [t]
   1123
   1124    First['$end'] = ['$end']
   1125    First['#'] = ['#'] # what's this for?
   1126
   1127    # Nonterminals:
   1128
   1129    # Initialize to the empty set:
   1130    for n in Nonterminals.keys():
   1131        First[n] = []
   1132
   1133    # Then propagate symbols until no change:
   1134    while 1:
   1135        some_change = 0
   1136        for n in Nonterminals.keys():
   1137            for p in Prodnames[n]:
   1138                for f in first(p.prod):
   1139                    if f not in First[n]:
   1140                        First[n].append( f )
   1141                        some_change = 1
   1142        if not some_change:
   1143            break
   1144
   1145    if 0 and yaccdebug:
   1146        _vf.write('\nFirst:\n')
   1147        for k in Nonterminals.keys():
   1148            _vf.write("%-20s : %s\n" %
   1149                (k, " ".join([str(s) for s in First[k]])))
   1150
   1151# -----------------------------------------------------------------------------
   1152#                           === SLR Generation ===
   1153#
   1154# The following functions are used to construct SLR (Simple LR) parsing tables
   1155# as described on p.221-229 of the dragon book.
   1156# -----------------------------------------------------------------------------
   1157
   1158# Global variables for the LR parsing engine
   1159def lr_init_vars():
   1160    global _lr_action, _lr_goto, _lr_method
   1161    global _lr_goto_cache, _lr0_cidhash
   1162    
   1163    _lr_action       = { }        # Action table
   1164    _lr_goto         = { }        # Goto table
   1165    _lr_method       = "Unknown"  # LR method used
   1166    _lr_goto_cache   = { }
   1167    _lr0_cidhash     = { }
   1168
   1169
   1170# Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
   1171# prodlist is a list of productions.
   1172
   1173_add_count = 0       # Counter used to detect cycles
   1174
   1175def lr0_closure(I):
   1176    global _add_count
   1177    
   1178    _add_count += 1
   1179    prodlist = Productions
   1180    
   1181    # Add everything in I to J        
   1182    J = I[:]
   1183    didadd = 1
   1184    while didadd:
   1185        didadd = 0
   1186        for j in J:
   1187            for x in j.lrafter:
   1188                if x.lr0_added == _add_count: continue
   1189                # Add B --> .G to J
   1190                J.append(x.lr_next)
   1191                x.lr0_added = _add_count
   1192                didadd = 1
   1193               
   1194    return J
   1195
   1196# Compute the LR(0) goto function goto(I,X) where I is a set
   1197# of LR(0) items and X is a grammar symbol.   This function is written
   1198# in a way that guarantees uniqueness of the generated goto sets
   1199# (i.e. the same goto set will never be returned as two different Python
   1200# objects).  With uniqueness, we can later do fast set comparisons using
   1201# id(obj) instead of element-wise comparison.
   1202
   1203def lr0_goto(I,x):
   1204    # First we look for a previously cached entry
   1205    g = _lr_goto_cache.get((id(I),x),None)
   1206    if g: return g
   1207
   1208    # Now we generate the goto set in a way that guarantees uniqueness
   1209    # of the result
   1210    
   1211    s = _lr_goto_cache.get(x,None)
   1212    if not s:
   1213        s = { }
   1214        _lr_goto_cache[x] = s
   1215
   1216    gs = [ ]
   1217    for p in I:
   1218        n = p.lr_next
   1219        if n and n.lrbefore == x:
   1220            s1 = s.get(id(n),None)
   1221            if not s1:
   1222                s1 = { }
   1223                s[id(n)] = s1
   1224            gs.append(n)
   1225            s = s1
   1226    g = s.get('$end',None)
   1227    if not g:
   1228        if gs:
   1229            g = lr0_closure(gs)
   1230            s['$end'] = g
   1231        else:
   1232            s['$end'] = gs
   1233    _lr_goto_cache[(id(I),x)] = g
   1234    return g
   1235
   1236_lr0_cidhash = { }
   1237
   1238# Compute the LR(0) sets of item function
   1239def lr0_items():
   1240    
   1241    C = [ lr0_closure([Productions[0].lr_next]) ]
   1242    i = 0
   1243    for I in C:
   1244        _lr0_cidhash[id(I)] = i
   1245        i += 1
   1246
   1247    # Loop over the items in C and each grammar symbols
   1248    i = 0
   1249    while i < len(C):
   1250        I = C[i]
   1251        i += 1
   1252
   1253        # Collect all of the symbols that could possibly be in the goto(I,X) sets
   1254        asyms = { }
   1255        for ii in I:
   1256            for s in ii.usyms:
   1257                asyms[s] = None
   1258
   1259        for x in asyms.keys():
   1260            g = lr0_goto(I,x)
   1261            if not g:  continue
   1262            if _lr0_cidhash.has_key(id(g)): continue
   1263            _lr0_cidhash[id(g)] = len(C)            
   1264            C.append(g)
   1265            
   1266    return C
   1267
   1268# -----------------------------------------------------------------------------
   1269#                       ==== LALR(1) Parsing ====
   1270#
   1271# LALR(1) parsing is almost exactly the same as SLR except that instead of
   1272# relying upon Follow() sets when performing reductions, a more selective
   1273# lookahead set that incorporates the state of the LR(0) machine is utilized.
   1274# Thus, we mainly just have to focus on calculating the lookahead sets.
   1275#
   1276# The method used here is due to DeRemer and Pennelo (1982).
   1277#
   1278# DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
   1279#     Lookahead Sets", ACM Transactions on Programming Languages and Systems,
   1280#     Vol. 4, No. 4, Oct. 1982, pp. 615-649
   1281#
   1282# Further details can also be found in:
   1283#
   1284#  J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
   1285#      McGraw-Hill Book Company, (1985).
   1286#
   1287# Note:  This implementation is a complete replacement of the LALR(1) 
   1288#        implementation in PLY-1.x releases.   That version was based on
   1289#        a less efficient algorithm and it had bugs in its implementation.
   1290# -----------------------------------------------------------------------------
   1291
   1292# -----------------------------------------------------------------------------
   1293# compute_nullable_nonterminals()
   1294#
   1295# Creates a dictionary containing all of the non-terminals that might produce
   1296# an empty production.   
   1297# -----------------------------------------------------------------------------
   1298
   1299def compute_nullable_nonterminals():
   1300    nullable = {}
   1301    num_nullable = 0
   1302    while 1:
   1303       for p in Productions[1:]:
   1304           if p.len == 0:
   1305                nullable[p.name] = 1
   1306                continue
   1307           for t in p.prod:
   1308                if not nullable.has_key(t): break
   1309           else:
   1310                nullable[p.name] = 1
   1311       if len(nullable) == num_nullable: break
   1312       num_nullable = len(nullable)
   1313    return nullable
   1314
   1315# -----------------------------------------------------------------------------
   1316# find_nonterminal_trans(C)
   1317#
   1318# Given a set of LR(0) items, this functions finds all of the non-terminal
   1319# transitions.    These are transitions in which a dot appears immediately before
   1320# a non-terminal.   Returns a list of tuples of the form (state,N) where state
   1321# is the state number and N is the nonterminal symbol.
   1322#
   1323# The input C is the set of LR(0) items.
   1324# -----------------------------------------------------------------------------
   1325
   1326def find_nonterminal_transitions(C):
   1327     trans = []
   1328     for state in range(len(C)):
   1329         for p in C[state]:
   1330             if p.lr_index < p.len - 1:
   1331                  t = (state,p.prod[p.lr_index+1])
   1332                  if Nonterminals.has_key(t[1]):
   1333                        if t not in trans: trans.append(t)
   1334         state = state + 1
   1335     return trans
   1336
   1337# -----------------------------------------------------------------------------
   1338# dr_relation()
   1339#
   1340# Computes the DR(p,A) relationships for non-terminal transitions.  The input
   1341# is a tuple (state,N) where state is a number and N is a nonterminal symbol.
   1342#
   1343# Returns a list of terminals.
   1344# -----------------------------------------------------------------------------
   1345
   1346def dr_relation(C,trans,nullable):
   1347    dr_set = { }
   1348    state,N = trans
   1349    terms = []
   1350
   1351    g = lr0_goto(C[state],N)
   1352    for p in g:
   1353       if p.lr_index < p.len - 1:
   1354           a = p.prod[p.lr_index+1]
   1355           if Terminals.has_key(a):
   1356               if a not in terms: terms.append(a)
   1357
   1358    # This extra bit is to handle the start state
   1359    if state == 0 and N == Productions[0].prod[0]:
   1360       terms.append('$end')
   1361 
   1362    return terms
   1363
   1364# -----------------------------------------------------------------------------
   1365# reads_relation()
   1366#
   1367# Computes the READS() relation (p,A) READS (t,C).
   1368# -----------------------------------------------------------------------------
   1369
   1370def reads_relation(C, trans, empty):
   1371    # Look for empty transitions
   1372    rel = []
   1373    state, N = trans
   1374
   1375    g = lr0_goto(C[state],N)
   1376    j = _lr0_cidhash.get(id(g),-1)
   1377    for p in g:
   1378        if p.lr_index < p.len - 1:
   1379             a = p.prod[p.lr_index + 1]
   1380             if empty.has_key(a):
   1381                  rel.append((j,a))
   1382
   1383    return rel
   1384
   1385# -----------------------------------------------------------------------------
   1386# compute_lookback_includes()
   1387#
   1388# Determines the lookback and includes relations
   1389#
   1390# LOOKBACK:
   1391# 
   1392# This relation is determined by running the LR(0) state machine forward.
   1393# For example, starting with a production "N : . A B C", we run it forward
   1394# to obtain "N : A B C ."   We then build a relationship between this final
   1395# state and the starting state.   These relationships are stored in a dictionary
   1396# lookdict.   
   1397#
   1398# INCLUDES:
   1399#
   1400# Computes the INCLUDE() relation (p,A) INCLUDES (p',B).   
   1401#
   1402# This relation is used to determine non-terminal transitions that occur
   1403# inside of other non-terminal transition states.   (p,A) INCLUDES (p', B)
   1404# if the following holds:
   1405#
   1406#       B -> LAT, where T -> epsilon and p' -L-> p 
   1407#
   1408# L is essentially a prefix (which may be empty), T is a suffix that must be
   1409# able to derive an empty string.  State p' must lead to state p with the string L.
   1410# 
   1411# -----------------------------------------------------------------------------
   1412
   1413def compute_lookback_includes(C,trans,nullable):
   1414    
   1415    lookdict = {}          # Dictionary of lookback relations
   1416    includedict = {}       # Dictionary of include relations
   1417
   1418    # Make a dictionary of non-terminal transitions
   1419    dtrans = {}
   1420    for t in trans:
   1421        dtrans[t] = 1
   1422    
   1423    # Loop over all transitions and compute lookbacks and includes
   1424    for state,N in trans:
   1425        lookb = []
   1426        includes = []
   1427        for p in C[state]:
   1428            if p.name != N: continue
   1429        
   1430            # Okay, we have a name match.  We now follow the production all the way
   1431            # through the state machine until we get the . on the right hand side
   1432
   1433            lr_index = p.lr_index
   1434            j = state
   1435            while lr_index < p.len - 1:
   1436                 lr_index = lr_index + 1
   1437                 t = p.prod[lr_index]
   1438
   1439                 # Check to see if this symbol and state are a non-terminal transition
   1440                 if dtrans.has_key((j,t)):
   1441                       # Yes.  Okay, there is some chance that this is an includes relation
   1442                       # the only way to know for certain is whether the rest of the 
   1443                       # production derives empty
   1444
   1445                       li = lr_index + 1
   1446                       while li < p.len:
   1447                            if Terminals.has_key(p.prod[li]): break      # No forget it
   1448                            if not nullable.has_key(p.prod[li]): break
   1449                            li = li + 1
   1450                       else:
   1451                            # Appears to be a relation between (j,t) and (state,N)
   1452                            includes.append((j,t))
   1453
   1454                 g = lr0_goto(C[j],t)               # Go to next set             
   1455                 j = _lr0_cidhash.get(id(g),-1)     # Go to next state
   1456             
   1457            # When we get here, j is the final state, now we have to locate the production
   1458            for r in C[j]:
   1459                 if r.name != p.name: continue
   1460                 if r.len != p.len:   continue
   1461                 i = 0
   1462                 # This look is comparing a production ". A B C" with "A B C ."
   1463                 while i < r.lr_index:
   1464                      if r.prod[i] != p.prod[i+1]: break
   1465                      i = i + 1
   1466                 else:
   1467                      lookb.append((j,r))
   1468        for i in includes:
   1469             if not includedict.has_key(i): includedict[i] = []
   1470             includedict[i].append((state,N))
   1471        lookdict[(state,N)] = lookb
   1472
   1473    return lookdict,includedict
   1474
   1475# -----------------------------------------------------------------------------
   1476# digraph()
   1477# traverse()
   1478#
   1479# The following two functions are used to compute set valued functions
   1480# of the form:
   1481#
   1482#     F(x) = F'(x) U U{F(y) | x R y}
   1483#
   1484# This is used to compute the values of Read() sets as well as FOLLOW sets
   1485# in LALR(1) generation.
   1486#
   1487# Inputs:  X    - An input set
   1488#          R    - A relation
   1489#          FP   - Set-valued function
   1490# ------------------------------------------------------------------------------
   1491
   1492def digraph(X,R,FP):
   1493    N = { }
   1494    for x in X:
   1495       N[x] = 0
   1496    stack = []
   1497    F = { }
   1498    for x in X:
   1499        if N[x] == 0: traverse(x,N,stack,F,X,R,FP)
   1500    return F
   1501
   1502def traverse(x,N,stack,F,X,R,FP):
   1503    stack.append(x)
   1504    d = len(stack)
   1505    N[x] = d
   1506    F[x] = FP(x)             # F(X) <- F'(x)
   1507    
   1508    rel = R(x)               # Get y's related to x
   1509    for y in rel:
   1510        if N[y] == 0:
   1511             traverse(y,N,stack,F,X,R,FP)
   1512        N[x] = min(N[x],N[y])
   1513        for a in F.get(y,[]):
   1514            if a not in F[x]: F[x].append(a)
   1515    if N[x] == d:
   1516       N[stack[-1]] = sys.maxint
   1517       F[stack[-1]] = F[x]
   1518       element = stack.pop()
   1519       while element != x:
   1520           N[stack[-1]] = sys.maxint
   1521           F[stack[-1]] = F[x]
   1522           element = stack.pop()
   1523
   1524# -----------------------------------------------------------------------------
   1525# compute_read_sets()
   1526#
   1527# Given a set of LR(0) items, this function computes the read sets.
   1528#
   1529# Inputs:  C        =  Set of LR(0) items
   1530#          ntrans   = Set of nonterminal transitions
   1531#          nullable = Set of empty transitions
   1532#
   1533# Returns a set containing the read sets
   1534# -----------------------------------------------------------------------------
   1535
   1536def compute_read_sets(C, ntrans, nullable):
   1537    FP = lambda x: dr_relation(C,x,nullable)
   1538    R =  lambda x: reads_relation(C,x,nullable)
   1539    F = digraph(ntrans,R,FP)
   1540    return F
   1541
   1542# -----------------------------------------------------------------------------
   1543# compute_follow_sets()
   1544#
   1545# Given a set of LR(0) items, a set of non-terminal transitions, a readset, 
   1546# and an include set, this function computes the follow sets
   1547#
   1548# Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
   1549#
   1550# Inputs:    
   1551#            ntrans     = Set of nonterminal transitions
   1552#            readsets   = Readset (previously computed)
   1553#            inclsets   = Include sets (previously computed)
   1554#
   1555# Returns a set containing the follow sets      
   1556# -----------------------------------------------------------------------------
   1557
   1558def compute_follow_sets(ntrans,readsets,inclsets):
   1559     FP = lambda x: readsets[x]
   1560     R  = lambda x: inclsets.get(x,[])
   1561     F = digraph(ntrans,R,FP)
   1562     return F
   1563
   1564# -----------------------------------------------------------------------------
   1565# add_lookaheads()
   1566#
   1567# Attaches the lookahead symbols to grammar rules. 
   1568#
   1569# Inputs:    lookbacks         -  Set of lookback relations
   1570#            followset         -  Computed follow set
   1571#
   1572# This function directly attaches the lookaheads to productions contained
   1573# in the lookbacks set
   1574# -----------------------------------------------------------------------------
   1575
   1576def add_lookaheads(lookbacks,followset):
   1577    for trans,lb in lookbacks.items():
   1578        # Loop over productions in lookback
   1579        for state,p in lb:
   1580             if not p.lookaheads.has_key(state):
   1581                  p.lookaheads[state] = []
   1582             f = followset.get(trans,[])
   1583             for a in f:
   1584                  if a not in p.lookaheads[state]: p.lookaheads[state].append(a)
   1585
   1586# -----------------------------------------------------------------------------
   1587# add_lalr_lookaheads()
   1588#
   1589# This function does all of the work of adding lookahead information for use
   1590# with LALR parsing
   1591# -----------------------------------------------------------------------------
   1592
   1593def add_lalr_lookaheads(C):
   1594    # Determine all of the nullable nonterminals
   1595    nullable = compute_nullable_nonterminals()
   1596
   1597    # Find all non-terminal transitions
   1598    trans = find_nonterminal_transitions(C)
   1599
   1600    # Compute read sets
   1601    readsets = compute_read_sets(C,trans,nullable)
   1602
   1603    # Compute lookback/includes relations
   1604    lookd, included = compute_lookback_includes(C,trans,nullable)
   1605
   1606    # Compute LALR FOLLOW sets
   1607    followsets = compute_follow_sets(trans,readsets,included)
   1608    
   1609    # Add all of the lookaheads
   1610    add_lookaheads(lookd,followsets)
   1611
   1612# -----------------------------------------------------------------------------
   1613# lr_parse_table()
   1614#
   1615# This function constructs the parse tables for SLR or LALR
   1616# -----------------------------------------------------------------------------
   1617def lr_parse_table(method):
   1618    global _lr_method
   1619    goto = _lr_goto           # Goto array
   1620    action = _lr_action       # Action array
   1621    actionp = { }             # Action production array (temporary)
   1622
   1623    _lr_method = method
   1624    
   1625    n_srconflict = 0
   1626    n_rrconflict = 0
   1627
   1628    if yaccdebug:
   1629        sys.stderr.write("yacc: Generating %s parsing table...\n" % method)        
   1630        _vf.write("\n\nParsing method: %s\n\n" % method)
   1631        
   1632    # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
   1633    # This determines the number of states
   1634    
   1635    C = lr0_items()
   1636
   1637    if method == 'LALR':
   1638        add_lalr_lookaheads(C)
   1639
   1640    # Build the parser table, state by state
   1641    st = 0
   1642    for I in C:
   1643        # Loop over each production in I
   1644        actlist = [ ]              # List of actions
   1645        
   1646        if yaccdebug:
   1647            _vf.write("\nstate %d\n\n" % st)
   1648            for p in I:
   1649                _vf.write("    (%d) %s\n" % (p.number, str(p)))
   1650            _vf.write("\n")
   1651
   1652        for p in I:
   1653            try:
   1654                if p.prod[-1] == ".":
   1655                    if p.name == "S'":
   1656                        # Start symbol. Accept!
   1657                        action[st,"$end"] = 0
   1658                        actionp[st,"$end"] = p
   1659                    else:
   1660                        # We are at the end of a production.  Reduce!
   1661                        if method == 'LALR':
   1662                            laheads = p.lookaheads[st]
   1663                        else:
   1664                            laheads = Follow[p.name]
   1665                        for a in laheads:
   1666                            actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
   1667                            r = action.get((st,a),None)
   1668                            if r is not None:
   1669                                # Whoa. Have a shift/reduce or reduce/reduce conflict
   1670                                if r > 0:
   1671                                    # Need to decide on shift or reduce here
   1672                                    # By default we favor shifting. Need to add
   1673                                    # some precedence rules here.
   1674                                    sprec,slevel = Productions[actionp[st,a].number].prec                                    
   1675                                    rprec,rlevel = Precedence.get(a,('right',0))
   1676                                    if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
   1677                                        # We really need to reduce here.  
   1678                                        action[st,a] = -p.number
   1679                                        actionp[st,a] = p
   1680                                        if not slevel and not rlevel:
   1681                                            _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
   1682                                            _vf.write("  ! shift/reduce conflict for %s resolved as reduce.\n" % a)
   1683                                            n_srconflict += 1
   1684                                    elif (slevel == rlevel) and (rprec == 'nonassoc'):
   1685                                        action[st,a] = None
   1686                                    else:
   1687                                        # Hmmm. Guess we'll keep the shift
   1688                                        if not rlevel:
   1689                                            _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
   1690                                            _vf.write("  ! shift/reduce conflict for %s resolved as shift.\n" % a)
   1691                                            n_srconflict +=1                                    
   1692                                elif r < 0:
   1693                                    # Reduce/reduce conflict.   In this case, we favor the rule
   1694                                    # that was defined first in the grammar file
   1695                                    oldp = Productions[-r]
   1696                                    pp = Productions[p.number]
   1697                                    if oldp.line > pp.line:
   1698                                        action[st,a] = -p.number
   1699                                        actionp[st,a] = p
   1700                                    # sys.stderr.write("Reduce/reduce conflict in state %d\n" % st)
   1701                                    n_rrconflict += 1
   1702                                    _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a]))
   1703                                    _vf.write("  ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a]))
   1704                                else:
   1705                                    sys.stderr.write("Unknown conflict in state %d\n" % st)
   1706                            else:
   1707                                action[st,a] = -p.number
   1708                                actionp[st,a] = p
   1709                else:
   1710                    i = p.lr_index
   1711                    a = p.prod[i+1]       # Get symbol right after the "."
   1712                    if Terminals.has_key(a):
   1713                        g = lr0_goto(I,a)
   1714                        j = _lr0_cidhash.get(id(g),-1)
   1715                        if j >= 0:
   1716                            # We are in a shift state
   1717                            actlist.append((a,p,"shift and go to state %d" % j))
   1718                            r = action.get((st,a),None)
   1719                            if r is not None:
   1720                                # Whoa have a shift/reduce or shift/shift conflict
   1721                                if r > 0:
   1722                                    if r != j:
   1723                                        sys.stderr.write("Shift/shift conflict in state %d\n" % st)
   1724                                elif r < 0:
   1725                                    # Do a precedence check.
   1726                                    #   -  if precedence of reduce rule is higher, we reduce.
   1727                                    #   -  if precedence of reduce is same and left assoc, we reduce.
   1728                                    #   -  otherwise we shift
   1729                                    rprec,rlevel = Productions[actionp[st,a].number].prec
   1730                                    sprec,slevel = Precedence.get(a,('right',0))
   1731                                    if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')):
   1732                                        # We decide to shift here... highest precedence to shift
   1733                                        action[st,a] = j
   1734                                        actionp[st,a] = p
   1735                                        if not rlevel:
   1736                                            n_srconflict += 1
   1737                                            _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
   1738                                            _vf.write("  ! shift/reduce conflict for %s resolved as shift.\n" % a)
   1739                                    elif (slevel == rlevel) and (rprec == 'nonassoc'):
   1740                                        action[st,a] = None
   1741                                    else:                                            
   1742                                        # Hmmm. Guess we'll keep the reduce
   1743                                        if not slevel and not rlevel:
   1744                                            n_srconflict +=1
   1745                                            _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
   1746                                            _vf.write("  ! shift/reduce conflict for %s resolved as reduce.\n" % a)
   1747                                            
   1748                                else:
   1749                                    sys.stderr.write("Unknown conflict in state %d\n" % st)
   1750                            else:
   1751                                action[st,a] = j
   1752                                actionp[st,a] = p
   1753                                
   1754            except StandardError,e:
   1755                raise YaccError, "Hosed in lr_parse_table", e
   1756
   1757        # Print the actions associated with each terminal
   1758        if yaccdebug:
   1759          _actprint = { }
   1760          for a,p,m in actlist:
   1761            if action.has_key((st,a)):
   1762                if p is actionp[st,a]:
   1763                    _vf.write("    %-15s %s\n" % (a,m))
   1764                    _actprint[(a,m)] = 1
   1765          _vf.write("\n")
   1766          for a,p,m in actlist:
   1767            if action.has_key((st,a)):
   1768                if p is not actionp[st,a]:
   1769                    if not _actprint.has_key((a,m)):
   1770                        _vf.write("  ! %-15s [ %s ]\n" % (a,m))
   1771                        _actprint[(a,m)] = 1
   1772            
   1773        # Construct the goto table for this state
   1774        if yaccdebug:
   1775            _vf.write("\n")
   1776        nkeys = { }
   1777        for ii in I:
   1778            for s in ii.usyms:
   1779                if Nonterminals.has_key(s):
   1780                    nkeys[s] = None
   1781        for n in nkeys.keys():
   1782            g = lr0_goto(I,n)
   1783            j = _lr0_cidhash.get(id(g),-1)            
   1784            if j >= 0:
   1785                goto[st,n] = j
   1786                if yaccdebug:
   1787                    _vf.write("    %-30s shift and go to state %d\n" % (n,j))
   1788
   1789        st += 1
   1790
   1791    if yaccdebug:
   1792        if n_srconflict == 1:
   1793            sys.stderr.write("yacc: %d shift/reduce conflict\n" % n_srconflict)
   1794        if n_srconflict > 1:
   1795            sys.stderr.write("yacc: %d shift/reduce conflicts\n" % n_srconflict)
   1796        if n_rrconflict == 1:
   1797            sys.stderr.write("yacc: %d reduce/reduce conflict\n" % n_rrconflict)
   1798        if n_rrconflict > 1:
   1799            sys.stderr.write("yacc: %d reduce/reduce conflicts\n" % n_rrconflict)
   1800
   1801# -----------------------------------------------------------------------------
   1802#                          ==== LR Utility functions ====
   1803# -----------------------------------------------------------------------------
   1804
   1805# -----------------------------------------------------------------------------
   1806# _lr_write_tables()
   1807#
   1808# This function writes the LR parsing tables to a file
   1809# -----------------------------------------------------------------------------
   1810
   1811def lr_write_tables(modulename=tab_module,outputdir=''):
   1812    filename = os.path.join(outputdir,modulename) + ".py"
   1813    try:
   1814        f = open(filename,"w")
   1815
   1816        f.write("""
   1817# %s
   1818# This file is automatically generated. Do not edit.
   1819
   1820_lr_method = %s
   1821
   1822_lr_signature = %s
   1823""" % (filename, repr(_lr_method), repr(Signature.digest())))
   1824
   1825        # Change smaller to 0 to go back to original tables
   1826        smaller = 1
   1827                
   1828        # Factor out names to try and make smaller
   1829        if smaller:
   1830            items = { }
   1831        
   1832            for k,v in _lr_action.items():
   1833                i = items.get(k[1])
   1834                if not i:
   1835                    i = ([],[])
   1836                    items[k[1]] = i
   1837                i[0].append(k[0])
   1838                i[1].append(v)
   1839
   1840            f.write("\n_lr_action_items = {")
   1841            for k,v in items.items():
   1842                f.write("%r:([" % k)
   1843                for i in v[0]:
   1844                    f.write("%r," % i)
   1845                f.write("],[")
   1846                for i in v[1]:
   1847                    f.write("%r," % i)
   1848                           
   1849                f.write("]),")
   1850            f.write("}\n")
   1851
   1852            f.write("""
   1853_lr_action = { }
   1854for _k, _v in _lr_action_items.items():
   1855   for _x,_y in zip(_v[0],_v[1]):
   1856       _lr_action[(_x,_k)] = _y
   1857del _lr_action_items
   1858""")
   1859            
   1860        else:
   1861            f.write("\n_lr_action = { ");
   1862            for k,v in _lr_action.items():
   1863                f.write("(%r,%r):%r," % (k[0],k[1],v))
   1864            f.write("}\n");
   1865
   1866        if smaller:
   1867            # Factor out names to try and make smaller
   1868            items = { }
   1869        
   1870            for k,v in _lr_goto.items():
   1871                i = items.get(k[1])
   1872                if not i:
   1873                    i = ([],[])
   1874                    items[k[1]] = i
   1875                i[0].append(k[0])
   1876                i[1].append(v)
   1877
   1878            f.write("\n_lr_goto_items = {")
   1879            for k,v in items.items():
   1880                f.write("%r:([" % k)
   1881                for i in v[0]:
   1882                    f.write("%r," % i)
   1883                f.write("],[")
   1884                for i in v[1]:
   1885                    f.write("%r," % i)
   1886                           
   1887                f.write("]),")
   1888            f.write("}\n")
   1889
   1890            f.write("""
   1891_lr_goto = { }
   1892for _k, _v in _lr_goto_items.items():
   1893   for _x,_y in zip(_v[0],_v[1]):
   1894       _lr_goto[(_x,_k)] = _y
   1895del _lr_goto_items
   1896""")
   1897        else:
   1898            f.write("\n_lr_goto = { ");
   1899            for k,v in _lr_goto.items():
   1900                f.write("(%r,%r):%r," % (k[0],k[1],v))                    
   1901            f.write("}\n");
   1902
   1903        # Write production table
   1904        f.write("_lr_productions = [\n")
   1905        for p in Productions:
   1906            if p:
   1907                if (p.func):
   1908                    f.write("  (%r,%d,%r,%r,%d),\n" % (p.name, p.len, p.func.__name__,p.file,p.line))
   1909                else:
   1910                    f.write("  (%r,%d,None,None,None),\n" % (p.name, p.len))
   1911            else:
   1912                f.write("  None,\n")
   1913        f.write("]\n")
   1914        
   1915        f.close()
   1916
   1917    except IOError,e:
   1918        print "Unable to create '%s'" % filename
   1919        print e
   1920        return
   1921
   1922def lr_read_tables(module=tab_module,optimize=0):
   1923    global _lr_action, _lr_goto, _lr_productions, _lr_method
   1924    try:
   1925        exec "import %s as parsetab" % module
   1926        
   1927        if (optimize) or (Signature.digest() == parsetab._lr_signature):
   1928            _lr_action = parsetab._lr_action
   1929            _lr_goto   = parsetab._lr_goto
   1930            _lr_productions = parsetab._lr_productions
   1931            _lr_method = parsetab._lr_method
   1932            return 1
   1933        else:
   1934            return 0
   1935        
   1936    except (ImportError,AttributeError):
   1937        return 0
   1938
   1939
   1940# Available instance types.  This is used when parsers are defined by a class.
   1941# it's a little funky because I want to preserve backwards compatibility
   1942# with Python 2.0 where types.ObjectType is undefined.
   1943
   1944try:
   1945   _INSTANCETYPE = (types.InstanceType, types.ObjectType)
   1946except AttributeError:
   1947   _INSTANCETYPE = types.InstanceType
   1948
   1949# -----------------------------------------------------------------------------
   1950# yacc(module)
   1951#
   1952# Build the parser module
   1953# -----------------------------------------------------------------------------
   1954
   1955def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, start=None, check_recursion=1, optimize=0,write_tables=1,debugfile=debug_file,outputdir=''):
   1956    global yaccdebug
   1957    yaccdebug = debug
   1958    
   1959    initialize_vars()
   1960    files = { }
   1961    error = 0
   1962
   1963
   1964    # Add parsing method to signature
   1965    Signature.update(method)
   1966    
   1967    # If a "module" parameter was supplied, extract its dictionary.
   1968    # Note: a module may in fact be an instance as well.
   1969    
   1970    if module:
   1971        # User supplied a module object.
   1972        if isinstance(module, types.ModuleType):
   1973            ldict = module.__dict__
   1974        elif isinstance(module, _INSTANCETYPE):
   1975            _items = [(k,getattr(module,k)) for k in dir(module)]
   1976            ldict = { }
   1977            for i in _items:
   1978                ldict[i[0]] = i[1]
   1979        else:
   1980            raise ValueError,"Expected a module"
   1981        
   1982    else:
   1983        # No module given.  We might be able to get information from the caller.
   1984        # Throw an exception and unwind the traceback to get the globals
   1985        
   1986        try:
   1987            raise RuntimeError
   1988        except RuntimeError:
   1989            e,b,t = sys.exc_info()
   1990            f = t.tb_frame
   1991            f = f.f_back           # Walk out to our calling function
   1992            ldict = f.f_globals    # Grab its globals dictionary
   1993
   1994    # Add starting symbol to signature
   1995    if not start:
   1996        start = ldict.get("start",None)
   1997    if start:
   1998        Signature.update(start)
   1999
   2000    # If running in optimized mode.  We're going to
   2001
   2002    if (optimize and lr_read_tables(tabmodule,1)):
   2003        # Read parse table
   2004        del Productions[:]
   2005        for p in _lr_productions:
   2006            if not p:
   2007                Productions.append(None)
   2008            else:
   2009                m = MiniProduction()
   2010                m.name = p[0]
   2011                m.len  = p[1]
   2012                m.file = p[3]
   2013                m.line = p[4]
   2014                if p[2]:
   2015                    m.func = ldict[p[2]]
   2016                Productions.append(m)
   2017        
   2018    else:
   2019        # Get the tokens map
   2020        if (module and isinstance(module,_INSTANCETYPE)):
   2021            tokens = getattr(module,"tokens",None)
   2022        else:
   2023            tokens = ldict.get("tokens",None)
   2024    
   2025        if not tokens:
   2026            raise YaccError,"module does not define a list 'tokens'"
   2027        if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)):
   2028            raise YaccError,"tokens must be a list or tuple."
   2029
   2030        # Check to see if a requires dictionary is defined.
   2031        requires = ldict.get("require",None)
   2032        if requires:
   2033            if not (isinstance(requires,types.DictType)):
   2034                raise YaccError,"require must be a dictionary."
   2035
   2036            for r,v in requires.items():
   2037                try:
   2038                    if not (isinstance(v,types.ListType)):
   2039                        raise TypeError
   2040                    v1 = [x.split(".") for x in v]
   2041                    Requires[r] = v1
   2042                except StandardError:
   2043                    print "Invalid specification for rule '%s' in require. Expected a list of strings" % r            
   2044
   2045        
   2046        # Build the dictionary of terminals.  We a record a 0 in the
   2047        # dictionary to track whether or not a terminal is actually
   2048        # used in the grammar
   2049
   2050        if 'error' in tokens:
   2051            print "yacc: Illegal token 'error'.  Is a reserved word."
   2052            raise YaccError,"Illegal token name"
   2053
   2054        for n in tokens:
   2055            if Terminals.has_key(n):
   2056                print "yacc: Warning. Token '%s' multiply defined." % n
   2057            Terminals[n] = [ ]
   2058
   2059        Terminals['error'] = [ ]
   2060
   2061        # Get the precedence map (if any)
   2062        prec = ldict.get("precedence",None)
   2063        if prec:
   2064            if not (isinstance(prec,types.ListType) or isinstance(prec,types.TupleType)):
   2065                raise YaccError,"precedence must be a list or tuple."
   2066            add_precedence(prec)
   2067            Signature.update(repr(prec))
   2068
   2069        for n in tokens:
   2070            if not Precedence.has_key(n):
   2071                Precedence[n] = ('right',0)         # Default, right associative, 0 precedence
   2072
   2073        # Look for error handler
   2074        ef = ldict.get('p_error',None)
   2075        if ef:
   2076            if isinstance(ef,types.FunctionType):
   2077                ismethod = 0
   2078            elif isinstance(ef, types.MethodType):
   2079                ismethod = 1
   2080            else:
   2081                raise YaccError,"'p_error' defined, but is not a function or method."                
   2082            eline = ef.func_code.co_firstlineno
   2083            efile = ef.func_code.co_filename
   2084            files[efile] = None
   2085
   2086            if (ef.func_code.co_argcount != 1+ismethod):
   2087                raise YaccError,"%s:%d: p_error() requires 1 argument." % (efile,eline)
   2088            global Errorfunc
   2089            Errorfunc = ef
   2090        else:
   2091            print "yacc: Warning. no p_error() function is defined."
   2092            
   2093        # Get the list of built-in functions with p_ prefix
   2094        symbols = [ldict[f] for f in ldict.keys()
   2095               if (type(ldict[f]) in (types.FunctionType, types.MethodType) and ldict[f].__name__[:2] == 'p_'
   2096                   and ldict[f].__name__ != 'p_error')]
   2097
   2098        # Check for non-empty symbols
   2099        if len(symbols) == 0:
   2100            raise YaccError,"no rules of the form p_rulename are defined."
   2101    
   2102        # Sort the symbols by line number
   2103        symbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno))
   2104
   2105        # Add all of the symbols to the grammar
   2106        for f in symbols:
   2107            if (add_function(f)) < 0:
   2108                error += 1
   2109            else:
   2110                files[f.func_code.co_filename] = None
   2111
   2112        # Make a signature of the docstrings
   2113        for f in symbols:
   2114            if f.__doc__:
   2115                Signature.update(f.__doc__)
   2116    
   2117        lr_init_vars()
   2118
   2119        if error:
   2120            raise YaccError,"Unable to construct parser."
   2121
   2122        if not lr_read_tables(tabmodule):
   2123
   2124            # Validate files
   2125            for filename in files.keys():
   2126                if not validate_file(filename):
   2127                    error = 1
   2128
   2129            # Validate dictionary
   2130            validate_dict(ldict)
   2131
   2132            if start and not Prodnames.has_key(start):
   2133                raise YaccError,"Bad starting symbol '%s'" % start
   2134        
   2135            augment_grammar(start)    
   2136            error = verify_productions(cycle_check=check_recursion)
   2137            otherfunc = [ldict[f] for f in ldict.keys()
   2138               if (type(f) in (types.FunctionType,types.MethodType) and ldict[f].__name__[:2] != 'p_')]
   2139
   2140            if error:
   2141                raise YaccError,"Unable to construct parser."
   2142            
   2143            build_lritems()
   2144            compute_first1()
   2145            compute_follow(start)
   2146        
   2147            if method in ['SLR','LALR']:
   2148                lr_parse_table(method)
   2149            else:
   2150                raise YaccError, "Unknown parsing method '%s'" % method
   2151
   2152            if write_tables:
   2153                lr_write_tables(tabmodule,outputdir)        
   2154    
   2155            if yaccdebug:
   2156                try:
   2157                    f = open(os.path.join(outputdir,debugfile),"w")
   2158                    f.write(_vfc.getvalue())
   2159                    f.write("\n\n")
   2160                    f.write(_vf.getvalue())
   2161                    f.close()
   2162                except IOError,e:
   2163                    print "yacc: can't create '%s'" % debugfile,e
   2164        
   2165    # Made it here.   Create a parser object and set up its internal state.
   2166    # Set global parse() method to bound method of parser object.
   2167
   2168    p = Parser("xyzzy")
   2169    p.productions = Productions
   2170    p.errorfunc = Errorfunc
   2171    p.action = _lr_action
   2172    p.goto   = _lr_goto
   2173    p.method = _lr_method
   2174    p.require = Requires
   2175
   2176    global parse
   2177    parse = p.parse
   2178
   2179    global parser
   2180    parser = p
   2181
   2182    # Clean up all of the globals we created
   2183    if (not optimize):
   2184        yacc_cleanup()
   2185    return p
   2186
   2187# yacc_cleanup function.  Delete all of the global variables
   2188# used during table construction
   2189
   2190def yacc_cleanup():
   2191    global _lr_action, _lr_goto, _lr_method, _lr_goto_cache
   2192    del _lr_action, _lr_goto, _lr_method, _lr_goto_cache
   2193
   2194    global Productions, Prodnames, Prodmap, Terminals 
   2195    global Nonterminals, First, Follow, Precedence, LRitems
   2196    global Errorfunc, Signature, Requires
   2197    
   2198    del Productions, Prodnames, Prodmap, Terminals
   2199    del Nonterminals, First, Follow, Precedence, LRitems
   2200    del Errorfunc, Signature, Requires
   2201    
   2202    global _vf, _vfc
   2203    del _vf, _vfc
   2204    
   2205    
   2206# Stub that raises an error if parsing is attempted without first calling yacc()
   2207def parse(*args,**kwargs):
   2208    raise YaccError, "yacc: No parser built with yacc()"
   2209