diff options
Diffstat (limited to 'chall/ply-2.2')
162 files changed, 0 insertions, 17724 deletions
diff --git a/chall/ply-2.2/ANNOUNCE b/chall/ply-2.2/ANNOUNCE deleted file mode 100644 index ba96215..0000000 --- a/chall/ply-2.2/ANNOUNCE +++ /dev/null @@ -1,48 +0,0 @@ -November 1, 2006 - - Announcing : PLY-2.2 (Python Lex-Yacc) - - http://www.dabeaz.com/ply - -I'm pleased to announce a significant new update to PLY---a 100% Python -implementation of the common parsing tools lex and yacc. PLY-2.2 is -a minor update that fixes a few bugs and adds some new capabilities. - -If you are new to PLY, here are a few highlights: - -- PLY is closely modeled after traditional lex/yacc. If you know how - to use these or similar tools in other languages, you will find - PLY to be comparable. - -- PLY provides very extensive error reporting and diagnostic - information to assist in parser construction. The original - implementation was developed for instructional purposes. As - a result, the system tries to identify the most common types - of errors made by novice users. - -- PLY provides full support for empty productions, error recovery, - precedence rules, and ambiguous grammars. - -- Parsing is based on LR-parsing which is fast, memory efficient, - better suited to large grammars, and which has a number of nice - properties when dealing with syntax errors and other parsing - problems. Currently, PLY can build its parsing tables using - either SLR or LALR(1) algorithms. - -- PLY can be used to build parsers for large programming languages. - Although it is not ultra-fast due to its Python implementation, - PLY can be used to parse grammars consisting of several hundred - rules (as might be found for a language like C). The lexer and LR - parser are also reasonably efficient when parsing normal - sized programs. - -More information about PLY can be obtained on the PLY webpage at: - - http://www.dabeaz.com/ply - -PLY is freely available and is licensed under the terms of the Lesser -GNU Public License (LGPL). - -Cheers, - -David Beazley (http://www.dabeaz.com)
\ No newline at end of file diff --git a/chall/ply-2.2/CHANGES b/chall/ply-2.2/CHANGES deleted file mode 100644 index bd0894d..0000000 --- a/chall/ply-2.2/CHANGES +++ /dev/null @@ -1,680 +0,0 @@ -Version 2.2 ------------------------------- -11/01/06: beazley - Added lexpos() and lexspan() methods to grammar symbols. These - mirror the same functionality of lineno() and linespan(). For - example: - - def p_expr(p): - 'expr : expr PLUS expr' - p.lexpos(1) # Lexing position of left-hand-expression - p.lexpos(1) # Lexing position of PLUS - start,end = p.lexspan(3) # Lexing range of right hand expression - -11/01/06: beazley - Minor change to error handling. The recommended way to skip characters - in the input is to use t.lexer.skip() as shown here: - - def t_error(t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) - - The old approach of just using t.skip(1) will still work, but won't - be documented. - -10/31/06: beazley - Discarded tokens can now be specified as simple strings instead of - functions. To do this, simply include the text "ignore_" in the - token declaration. For example: - - t_ignore_cppcomment = r'//.*' - - Previously, this had to be done with a function. For example: - - def t_ignore_cppcomment(t): - r'//.*' - pass - - If start conditions/states are being used, state names should appear - before the "ignore_" text. - -10/19/06: beazley - The Lex module now provides support for flex-style start conditions - as described at http://www.gnu.org/software/flex/manual/html_chapter/flex_11.html. - Please refer to this document to understand this change note. Refer to - the PLY documentation for PLY-specific explanation of how this works. - - To use start conditions, you first need to declare a set of states in - your lexer file: - - states = ( - ('foo','exclusive'), - ('bar','inclusive') - ) - - This serves the same role as the %s and %x specifiers in flex. - - One a state has been declared, tokens for that state can be - declared by defining rules of the form t_state_TOK. For example: - - t_PLUS = '\+' # Rule defined in INITIAL state - t_foo_NUM = '\d+' # Rule defined in foo state - t_bar_NUM = '\d+' # Rule defined in bar state - - t_foo_bar_NUM = '\d+' # Rule defined in both foo and bar - t_ANY_NUM = '\d+' # Rule defined in all states - - In addition to defining tokens for each state, the t_ignore and t_error - specifications can be customized for specific states. For example: - - t_foo_ignore = " " # Ignored characters for foo state - def t_bar_error(t): - # Handle errors in bar state - - With token rules, the following methods can be used to change states - - def t_TOKNAME(t): - t.lexer.begin('foo') # Begin state 'foo' - t.lexer.push_state('foo') # Begin state 'foo', push old state - # onto a stack - t.lexer.pop_state() # Restore previous state - t.lexer.current_state() # Returns name of current state - - These methods mirror the BEGIN(), yy_push_state(), yy_pop_state(), and - yy_top_state() functions in flex. - - The use of start states can be used as one way to write sub-lexers. - For example, the lexer or parser might instruct the lexer to start - generating a different set of tokens depending on the context. - - example/yply/ylex.py shows the use of start states to grab C/C++ - code fragments out of traditional yacc specification files. - - *** NEW FEATURE *** Suggested by Daniel Larraz with whom I also - discussed various aspects of the design. - -10/19/06: beazley - Minor change to the way in which yacc.py was reporting shift/reduce - conflicts. Although the underlying LALR(1) algorithm was correct, - PLY was under-reporting the number of conflicts compared to yacc/bison - when precedence rules were in effect. This change should make PLY - report the same number of conflicts as yacc. - -10/19/06: beazley - Modified yacc so that grammar rules could also include the '-' - character. For example: - - def p_expr_list(p): - 'expression-list : expression-list expression' - - Suggested by Oldrich Jedlicka. - -10/18/06: beazley - Attribute lexer.lexmatch added so that token rules can access the re - match object that was generated. For example: - - def t_FOO(t): - r'some regex' - m = t.lexer.lexmatch - # Do something with m - - - This may be useful if you want to access named groups specified within - the regex for a specific token. Suggested by Oldrich Jedlicka. - -10/16/06: beazley - Changed the error message that results if an illegal character - is encountered and no default error function is defined in lex. - The exception is now more informative about the actual cause of - the error. - -Version 2.1 ------------------------------- -10/02/06: beazley - The last Lexer object built by lex() can be found in lex.lexer. - The last Parser object built by yacc() can be found in yacc.parser. - -10/02/06: beazley - New example added: examples/yply - - This example uses PLY to convert Unix-yacc specification files to - PLY programs with the same grammar. This may be useful if you - want to convert a grammar from bison/yacc to use with PLY. - -10/02/06: beazley - Added support for a start symbol to be specified in the yacc - input file itself. Just do this: - - start = 'name' - - where 'name' matches some grammar rule. For example: - - def p_name(p): - 'name : A B C' - ... - - This mirrors the functionality of the yacc %start specifier. - -09/30/06: beazley - Some new examples added.: - - examples/GardenSnake : A simple indentation based language similar - to Python. Shows how you might handle - whitespace. Contributed by Andrew Dalke. - - examples/BASIC : An implementation of 1964 Dartmouth BASIC. - Contributed by Dave against his better - judgement. - -09/28/06: beazley - Minor patch to allow named groups to be used in lex regular - expression rules. For example: - - t_QSTRING = r'''(?P<quote>['"]).*?(?P=quote)''' - - Patch submitted by Adam Ring. - -09/28/06: beazley - LALR(1) is now the default parsing method. To use SLR, use - yacc.yacc(method="SLR"). Note: there is no performance impact - on parsing when using LALR(1) instead of SLR. However, constructing - the parsing tables will take a little longer. - -09/26/06: beazley - Change to line number tracking. To modify line numbers, modify - the line number of the lexer itself. For example: - - def t_NEWLINE(t): - r'\n' - t.lexer.lineno += 1 - - This modification is both cleanup and a performance optimization. - In past versions, lex was monitoring every token for changes in - the line number. This extra processing is unnecessary for a vast - majority of tokens. Thus, this new approach cleans it up a bit. - - *** POTENTIAL INCOMPATIBILITY *** - You will need to change code in your lexer that updates the line - number. For example, "t.lineno += 1" becomes "t.lexer.lineno += 1" - -09/26/06: beazley - Added the lexing position to tokens as an attribute lexpos. This - is the raw index into the input text at which a token appears. - This information can be used to compute column numbers and other - details (e.g., scan backwards from lexpos to the first newline - to get a column position). - -09/25/06: beazley - Changed the name of the __copy__() method on the Lexer class - to clone(). This is used to clone a Lexer object (e.g., if - you're running different lexers at the same time). - -09/21/06: beazley - Limitations related to the use of the re module have been eliminated. - Several users reported problems with regular expressions exceeding - more than 100 named groups. To solve this, lex.py is now capable - of automatically splitting its master regular regular expression into - smaller expressions as needed. This should, in theory, make it - possible to specify an arbitrarily large number of tokens. - -09/21/06: beazley - Improved error checking in lex.py. Rules that match the empty string - are now rejected (otherwise they cause the lexer to enter an infinite - loop). An extra check for rules containing '#' has also been added. - Since lex compiles regular expressions in verbose mode, '#' is interpreted - as a regex comment, it is critical to use '\#' instead. - -09/18/06: beazley - Added a @TOKEN decorator function to lex.py that can be used to - define token rules where the documentation string might be computed - in some way. - - digit = r'([0-9])' - nondigit = r'([_A-Za-z])' - identifier = r'(' + nondigit + r'(' + digit + r'|' + nondigit + r')*)' - - from ply.lex import TOKEN - - @TOKEN(identifier) - def t_ID(t): - # Do whatever - - The @TOKEN decorator merely sets the documentation string of the - associated token function as needed for lex to work. - - Note: An alternative solution is the following: - - def t_ID(t): - # Do whatever - - t_ID.__doc__ = identifier - - Note: Decorators require the use of Python 2.4 or later. If compatibility - with old versions is needed, use the latter solution. - - The need for this feature was suggested by Cem Karan. - -09/14/06: beazley - Support for single-character literal tokens has been added to yacc. - These literals must be enclosed in quotes. For example: - - def p_expr(p): - "expr : expr '+' expr" - ... - - def p_expr(p): - 'expr : expr "-" expr' - ... - - In addition to this, it is necessary to tell the lexer module about - literal characters. This is done by defining the variable 'literals' - as a list of characters. This should be defined in the module that - invokes the lex.lex() function. For example: - - literals = ['+','-','*','/','(',')','='] - - or simply - - literals = '+=*/()=' - - It is important to note that literals can only be a single character. - When the lexer fails to match a token using its normal regular expression - rules, it will check the current character against the literal list. - If found, it will be returned with a token type set to match the literal - character. Otherwise, an illegal character will be signalled. - - -09/14/06: beazley - Modified PLY to install itself as a proper Python package called 'ply'. - This will make it a little more friendly to other modules. This - changes the usage of PLY only slightly. Just do this to import the - modules - - import ply.lex as lex - import ply.yacc as yacc - - Alternatively, you can do this: - - from ply import * - - Which imports both the lex and yacc modules. - Change suggested by Lee June. - -09/13/06: beazley - Changed the handling of negative indices when used in production rules. - A negative production index now accesses already parsed symbols on the - parsing stack. For example, - - def p_foo(p): - "foo: A B C D" - print p[1] # Value of 'A' symbol - print p[2] # Value of 'B' symbol - print p[-1] # Value of whatever symbol appears before A - # on the parsing stack. - - p[0] = some_val # Sets the value of the 'foo' grammer symbol - - This behavior makes it easier to work with embedded actions within the - parsing rules. For example, in C-yacc, it is possible to write code like - this: - - bar: A { printf("seen an A = %d\n", $1); } B { do_stuff; } - - In this example, the printf() code executes immediately after A has been - parsed. Within the embedded action code, $1 refers to the A symbol on - the stack. - - To perform this equivalent action in PLY, you need to write a pair - of rules like this: - - def p_bar(p): - "bar : A seen_A B" - do_stuff - - def p_seen_A(p): - "seen_A :" - print "seen an A =", p[-1] - - The second rule "seen_A" is merely a empty production which should be - reduced as soon as A is parsed in the "bar" rule above. The use - of the negative index p[-1] is used to access whatever symbol appeared - before the seen_A symbol. - - This feature also makes it possible to support inherited attributes. - For example: - - def p_decl(p): - "decl : scope name" - - def p_scope(p): - """scope : GLOBAL - | LOCAL""" - p[0] = p[1] - - def p_name(p): - "name : ID" - if p[-1] == "GLOBAL": - # ... - else if p[-1] == "LOCAL": - #... - - In this case, the name rule is inheriting an attribute from the - scope declaration that precedes it. - - *** POTENTIAL INCOMPATIBILITY *** - If you are currently using negative indices within existing grammar rules, - your code will break. This should be extremely rare if non-existent in - most cases. The argument to various grammar rules is not usually not - processed in the same way as a list of items. - -Version 2.0 ------------------------------- -09/07/06: beazley - Major cleanup and refactoring of the LR table generation code. Both SLR - and LALR(1) table generation is now performed by the same code base with - only minor extensions for extra LALR(1) processing. - -09/07/06: beazley - Completely reimplemented the entire LALR(1) parsing engine to use the - DeRemer and Pennello algorithm for calculating lookahead sets. This - significantly improves the performance of generating LALR(1) tables - and has the added feature of actually working correctly! If you - experienced weird behavior with LALR(1) in prior releases, this should - hopefully resolve all of those problems. Many thanks to - Andrew Waters and Markus Schoepflin for submitting bug reports - and helping me test out the revised LALR(1) support. - -Version 1.8 ------------------------------- -08/02/06: beazley - Fixed a problem related to the handling of default actions in LALR(1) - parsing. If you experienced subtle and/or bizarre behavior when trying - to use the LALR(1) engine, this may correct those problems. Patch - contributed by Russ Cox. Note: This patch has been superceded by - revisions for LALR(1) parsing in Ply-2.0. - -08/02/06: beazley - Added support for slicing of productions in yacc. - Patch contributed by Patrick Mezard. - -Version 1.7 ------------------------------- -03/02/06: beazley - Fixed infinite recursion problem ReduceToTerminals() function that - would sometimes come up in LALR(1) table generation. Reported by - Markus Schoepflin. - -03/01/06: beazley - Added "reflags" argument to lex(). For example: - - lex.lex(reflags=re.UNICODE) - - This can be used to specify optional flags to the re.compile() function - used inside the lexer. This may be necessary for special situations such - as processing Unicode (e.g., if you want escapes like \w and \b to consult - the Unicode character property database). The need for this suggested by - Andreas Jung. - -03/01/06: beazley - Fixed a bug with an uninitialized variable on repeated instantiations of parser - objects when the write_tables=0 argument was used. Reported by Michael Brown. - -03/01/06: beazley - Modified lex.py to accept Unicode strings both as the regular expressions for - tokens and as input. Hopefully this is the only change needed for Unicode support. - Patch contributed by Johan Dahl. - -03/01/06: beazley - Modified the class-based interface to work with new-style or old-style classes. - Patch contributed by Michael Brown (although I tweaked it slightly so it would work - with older versions of Python). - -Version 1.6 ------------------------------- -05/27/05: beazley - Incorporated patch contributed by Christopher Stawarz to fix an extremely - devious bug in LALR(1) parser generation. This patch should fix problems - numerous people reported with LALR parsing. - -05/27/05: beazley - Fixed problem with lex.py copy constructor. Reported by Dave Aitel, Aaron Lav, - and Thad Austin. - -05/27/05: beazley - Added outputdir option to yacc() to control output directory. Contributed - by Christopher Stawarz. - -05/27/05: beazley - Added rununit.py test script to run tests using the Python unittest module. - Contributed by Miki Tebeka. - -Version 1.5 ------------------------------- -05/26/04: beazley - Major enhancement. LALR(1) parsing support is now working. - This feature was implemented by Elias Ioup (ezioup@alumni.uchicago.edu) - and optimized by David Beazley. To use LALR(1) parsing do - the following: - - yacc.yacc(method="LALR") - - Computing LALR(1) parsing tables takes about twice as long as - the default SLR method. However, LALR(1) allows you to handle - more complex grammars. For example, the ANSI C grammar - (in example/ansic) has 13 shift-reduce conflicts with SLR, but - only has 1 shift-reduce conflict with LALR(1). - -05/20/04: beazley - Added a __len__ method to parser production lists. Can - be used in parser rules like this: - - def p_somerule(p): - """a : B C D - | E F" - if (len(p) == 3): - # Must have been first rule - elif (len(p) == 2): - # Must be second rule - - Suggested by Joshua Gerth and others. - -Version 1.4 ------------------------------- -04/23/04: beazley - Incorporated a variety of patches contributed by Eric Raymond. - These include: - - 0. Cleans up some comments so they don't wrap on an 80-column display. - 1. Directs compiler errors to stderr where they belong. - 2. Implements and documents automatic line counting when \n is ignored. - 3. Changes the way progress messages are dumped when debugging is on. - The new format is both less verbose and conveys more information than - the old, including shift and reduce actions. - -04/23/04: beazley - Added a Python setup.py file to simply installation. Contributed - by Adam Kerrison. - -04/23/04: beazley - Added patches contributed by Adam Kerrison. - - - Some output is now only shown when debugging is enabled. This - means that PLY will be completely silent when not in debugging mode. - - - An optional parameter "write_tables" can be passed to yacc() to - control whether or not parsing tables are written. By default, - it is true, but it can be turned off if you don't want the yacc - table file. Note: disabling this will cause yacc() to regenerate - the parsing table each time. - -04/23/04: beazley - Added patches contributed by David McNab. This patch addes two - features: - - - The parser can be supplied as a class instead of a module. - For an example of this, see the example/classcalc directory. - - - Debugging output can be directed to a filename of the user's - choice. Use - - yacc(debugfile="somefile.out") - - -Version 1.3 ------------------------------- -12/10/02: jmdyck - Various minor adjustments to the code that Dave checked in today. - Updated test/yacc_{inf,unused}.exp to reflect today's changes. - -12/10/02: beazley - Incorporated a variety of minor bug fixes to empty production - handling and infinite recursion checking. Contributed by - Michael Dyck. - -12/10/02: beazley - Removed bogus recover() method call in yacc.restart() - -Version 1.2 ------------------------------- -11/27/02: beazley - Lexer and parser objects are now available as an attribute - of tokens and slices respectively. For example: - - def t_NUMBER(t): - r'\d+' - print t.lexer - - def p_expr_plus(t): - 'expr: expr PLUS expr' - print t.lexer - print t.parser - - This can be used for state management (if needed). - -10/31/02: beazley - Modified yacc.py to work with Python optimize mode. To make - this work, you need to use - - yacc.yacc(optimize=1) - - Furthermore, you need to first run Python in normal mode - to generate the necessary parsetab.py files. After that, - you can use python -O or python -OO. - - Note: optimized mode turns off a lot of error checking. - Only use when you are sure that your grammar is working. - Make sure parsetab.py is up to date! - -10/30/02: beazley - Added cloning of Lexer objects. For example: - - import copy - l = lex.lex() - lc = copy.copy(l) - - l.input("Some text") - lc.input("Some other text") - ... - - This might be useful if the same "lexer" is meant to - be used in different contexts---or if multiple lexers - are running concurrently. - -10/30/02: beazley - Fixed subtle bug with first set computation and empty productions. - Patch submitted by Michael Dyck. - -10/30/02: beazley - Fixed error messages to use "filename:line: message" instead - of "filename:line. message". This makes error reporting more - friendly to emacs. Patch submitted by François Pinard. - -10/30/02: beazley - Improvements to parser.out file. Terminals and nonterminals - are sorted instead of being printed in random order. - Patch submitted by François Pinard. - -10/30/02: beazley - Improvements to parser.out file output. Rules are now printed - in a way that's easier to understand. Contributed by Russ Cox. - -10/30/02: beazley - Added 'nonassoc' associativity support. This can be used - to disable the chaining of operators like a < b < c. - To use, simply specify 'nonassoc' in the precedence table - - precedence = ( - ('nonassoc', 'LESSTHAN', 'GREATERTHAN'), # Nonassociative operators - ('left', 'PLUS', 'MINUS'), - ('left', 'TIMES', 'DIVIDE'), - ('right', 'UMINUS'), # Unary minus operator - ) - - Patch contributed by Russ Cox. - -10/30/02: beazley - Modified the lexer to provide optional support for Python -O and -OO - modes. To make this work, Python *first* needs to be run in - unoptimized mode. This reads the lexing information and creates a - file "lextab.py". Then, run lex like this: - - # module foo.py - ... - ... - lex.lex(optimize=1) - - Once the lextab file has been created, subsequent calls to - lex.lex() will read data from the lextab file instead of using - introspection. In optimized mode (-O, -OO) everything should - work normally despite the loss of doc strings. - - To change the name of the file 'lextab.py' use the following: - - lex.lex(lextab="footab") - - (this creates a file footab.py) - - -Version 1.1 October 25, 2001 ------------------------------- - -10/25/01: beazley - Modified the table generator to produce much more compact data. - This should greatly reduce the size of the parsetab.py[c] file. - Caveat: the tables still need to be constructed so a little more - work is done in parsetab on import. - -10/25/01: beazley - There may be a possible bug in the cycle detector that reports errors - about infinite recursion. I'm having a little trouble tracking it - down, but if you get this problem, you can disable the cycle - detector as follows: - - yacc.yacc(check_recursion = 0) - -10/25/01: beazley - Fixed a bug in lex.py that sometimes caused illegal characters to be - reported incorrectly. Reported by Sverre Jørgensen. - -7/8/01 : beazley - Added a reference to the underlying lexer object when tokens are handled by - functions. The lexer is available as the 'lexer' attribute. This - was added to provide better lexing support for languages such as Fortran - where certain types of tokens can't be conveniently expressed as regular - expressions (and where the tokenizing function may want to perform a - little backtracking). Suggested by Pearu Peterson. - -6/20/01 : beazley - Modified yacc() function so that an optional starting symbol can be specified. - For example: - - yacc.yacc(start="statement") - - Normally yacc always treats the first production rule as the starting symbol. - However, if you are debugging your grammar it may be useful to specify - an alternative starting symbol. Idea suggested by Rich Salz. - -Version 1.0 June 18, 2001 --------------------------- -Initial public offering - diff --git a/chall/ply-2.2/COPYING b/chall/ply-2.2/COPYING deleted file mode 100644 index b1e3f5a..0000000 --- a/chall/ply-2.2/COPYING +++ /dev/null @@ -1,504 +0,0 @@ - GNU LESSER GENERAL PUBLIC LICENSE - Version 2.1, February 1999 - - Copyright (C) 1991, 1999 Free Software Foundation, Inc. - 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - -[This is the first released version of the Lesser GPL. It also counts - as the successor of the GNU Library Public License, version 2, hence - the version number 2.1.] - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -Licenses are intended to guarantee your freedom to share and change -free software--to make sure the software is free for all its users. - - This license, the Lesser General Public License, applies to some -specially designated software packages--typically libraries--of the -Free Software Foundation and other authors who decide to use it. You -can use it too, but we suggest you first think carefully about whether -this license or the ordinary General Public License is the better -strategy to use in any particular case, based on the explanations below. - - When we speak of free software, we are referring to freedom of use, -not price. Our General Public Licenses are designed to make sure that -you have the freedom to distribute copies of free software (and charge -for this service if you wish); that you receive source code or can get -it if you want it; that you can change the software and use pieces of -it in new free programs; and that you are informed that you can do -these things. - - To protect your rights, we need to make restrictions that forbid -distributors to deny you these rights or to ask you to surrender these -rights. These restrictions translate to certain responsibilities for -you if you distribute copies of the library or if you modify it. - - For example, if you distribute copies of the library, whether gratis -or for a fee, you must give the recipients all the rights that we gave -you. You must make sure that they, too, receive or can get the source -code. If you link other code with the library, you must provide -complete object files to the recipients, so that they can relink them -with the library after making changes to the library and recompiling -it. And you must show them these terms so they know their rights. - - We protect your rights with a two-step method: (1) we copyright the -library, and (2) we offer you this license, which gives you legal -permission to copy, distribute and/or modify the library. - - To protect each distributor, we want to make it very clear that -there is no warranty for the free library. Also, if the library is -modified by someone else and passed on, the recipients should know -that what they have is not the original version, so that the original -author's reputation will not be affected by problems that might be -introduced by others. - - Finally, software patents pose a constant threat to the existence of -any free program. We wish to make sure that a company cannot -effectively restrict the users of a free program by obtaining a -restrictive license from a patent holder. Therefore, we insist that -any patent license obtained for a version of the library must be -consistent with the full freedom of use specified in this license. - - Most GNU software, including some libraries, is covered by the -ordinary GNU General Public License. This license, the GNU Lesser -General Public License, applies to certain designated libraries, and -is quite different from the ordinary General Public License. We use -this license for certain libraries in order to permit linking those -libraries into non-free programs. - - When a program is linked with a library, whether statically or using -a shared library, the combination of the two is legally speaking a -combined work, a derivative of the original library. The ordinary -General Public License therefore permits such linking only if the -entire combination fits its criteria of freedom. The Lesser General -Public License permits more lax criteria for linking other code with -the library. - - We call this license the "Lesser" General Public License because it -does Less to protect the user's freedom than the ordinary General -Public License. It also provides other free software developers Less -of an advantage over competing non-free programs. These disadvantages -are the reason we use the ordinary General Public License for many -libraries. However, the Lesser license provides advantages in certain -special circumstances. - - For example, on rare occasions, there may be a special need to -encourage the widest possible use of a certain library, so that it becomes -a de-facto standard. To achieve this, non-free programs must be -allowed to use the library. A more frequent case is that a free -library does the same job as widely used non-free libraries. In this -case, there is little to gain by limiting the free library to free -software only, so we use the Lesser General Public License. - - In other cases, permission to use a particular library in non-free -programs enables a greater number of people to use a large body of -free software. For example, permission to use the GNU C Library in -non-free programs enables many more people to use the whole GNU -operating system, as well as its variant, the GNU/Linux operating -system. - - Although the Lesser General Public License is Less protective of the -users' freedom, it does ensure that the user of a program that is -linked with the Library has the freedom and the wherewithal to run -that program using a modified version of the Library. - - The precise terms and conditions for copying, distribution and -modification follow. Pay close attention to the difference between a -"work based on the library" and a "work that uses the library". The -former contains code derived from the library, whereas the latter must -be combined with the library in order to run. - - GNU LESSER GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License Agreement applies to any software library or other -program which contains a notice placed by the copyright holder or -other authorized party saying it may be distributed under the terms of -this Lesser General Public License (also called "this License"). -Each licensee is addressed as "you". - - A "library" means a collection of software functions and/or data -prepared so as to be conveniently linked with application programs -(which use some of those functions and data) to form executables. - - The "Library", below, refers to any such software library or work -which has been distributed under these terms. A "work based on the -Library" means either the Library or any derivative work under -copyright law: that is to say, a work containing the Library or a -portion of it, either verbatim or with modifications and/or translated -straightforwardly into another language. (Hereinafter, translation is -included without limitation in the term "modification".) - - "Source code" for a work means the preferred form of the work for -making modifications to it. For a library, complete source code means -all the source code for all modules it contains, plus any associated -interface definition files, plus the scripts used to control compilation -and installation of the library. - - Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running a program using the Library is not restricted, and output from -such a program is covered only if its contents constitute a work based -on the Library (independent of the use of the Library in a tool for -writing it). Whether that is true depends on what the Library does -and what the program that uses the Library does. - - 1. You may copy and distribute verbatim copies of the Library's -complete source code as you receive it, in any medium, provided that -you conspicuously and appropriately publish on each copy an -appropriate copyright notice and disclaimer of warranty; keep intact -all the notices that refer to this License and to the absence of any -warranty; and distribute a copy of this License along with the -Library. - - You may charge a fee for the physical act of transferring a copy, -and you may at your option offer warranty protection in exchange for a -fee. - - 2. You may modify your copy or copies of the Library or any portion -of it, thus forming a work based on the Library, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) The modified work must itself be a software library. - - b) You must cause the files modified to carry prominent notices - stating that you changed the files and the date of any change. - - c) You must cause the whole of the work to be licensed at no - charge to all third parties under the terms of this License. - - d) If a facility in the modified Library refers to a function or a - table of data to be supplied by an application program that uses - the facility, other than as an argument passed when the facility - is invoked, then you must make a good faith effort to ensure that, - in the event an application does not supply such function or - table, the facility still operates, and performs whatever part of - its purpose remains meaningful. - - (For example, a function in a library to compute square roots has - a purpose that is entirely well-defined independent of the - application. Therefore, Subsection 2d requires that any - application-supplied function or table used by this function must - be optional: if the application does not supply it, the square - root function must still compute square roots.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Library, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Library, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote -it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Library. - -In addition, mere aggregation of another work not based on the Library -with the Library (or with a work based on the Library) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may opt to apply the terms of the ordinary GNU General Public -License instead of this License to a given copy of the Library. To do -this, you must alter all the notices that refer to this License, so -that they refer to the ordinary GNU General Public License, version 2, -instead of to this License. (If a newer version than version 2 of the -ordinary GNU General Public License has appeared, then you can specify -that version instead if you wish.) Do not make any other change in -these notices. - - Once this change is made in a given copy, it is irreversible for -that copy, so the ordinary GNU General Public License applies to all -subsequent copies and derivative works made from that copy. - - This option is useful when you wish to copy part of the code of -the Library into a program that is not a library. - - 4. You may copy and distribute the Library (or a portion or -derivative of it, under Section 2) in object code or executable form -under the terms of Sections 1 and 2 above provided that you accompany -it with the complete corresponding machine-readable source code, which -must be distributed under the terms of Sections 1 and 2 above on a -medium customarily used for software interchange. - - If distribution of object code is made by offering access to copy -from a designated place, then offering equivalent access to copy the -source code from the same place satisfies the requirement to -distribute the source code, even though third parties are not -compelled to copy the source along with the object code. - - 5. A program that contains no derivative of any portion of the -Library, but is designed to work with the Library by being compiled or -linked with it, is called a "work that uses the Library". Such a -work, in isolation, is not a derivative work of the Library, and -therefore falls outside the scope of this License. - - However, linking a "work that uses the Library" with the Library -creates an executable that is a derivative of the Library (because it -contains portions of the Library), rather than a "work that uses the -library". The executable is therefore covered by this License. -Section 6 states terms for distribution of such executables. - - When a "work that uses the Library" uses material from a header file -that is part of the Library, the object code for the work may be a -derivative work of the Library even though the source code is not. -Whether this is true is especially significant if the work can be -linked without the Library, or if the work is itself a library. The -threshold for this to be true is not precisely defined by law. - - If such an object file uses only numerical parameters, data -structure layouts and accessors, and small macros and small inline -functions (ten lines or less in length), then the use of the object -file is unrestricted, regardless of whether it is legally a derivative -work. (Executables containing this object code plus portions of the -Library will still fall under Section 6.) - - Otherwise, if the work is a derivative of the Library, you may -distribute the object code for the work under the terms of Section 6. -Any executables containing that work also fall under Section 6, -whether or not they are linked directly with the Library itself. - - 6. As an exception to the Sections above, you may also combine or -link a "work that uses the Library" with the Library to produce a -work containing portions of the Library, and distribute that work -under terms of your choice, provided that the terms permit -modification of the work for the customer's own use and reverse -engineering for debugging such modifications. - - You must give prominent notice with each copy of the work that the -Library is used in it and that the Library and its use are covered by -this License. You must supply a copy of this License. If the work -during execution displays copyright notices, you must include the -copyright notice for the Library among them, as well as a reference -directing the user to the copy of this License. Also, you must do one -of these things: - - a) Accompany the work with the complete corresponding - machine-readable source code for the Library including whatever - changes were used in the work (which must be distributed under - Sections 1 and 2 above); and, if the work is an executable linked - with the Library, with the complete machine-readable "work that - uses the Library", as object code and/or source code, so that the - user can modify the Library and then relink to produce a modified - executable containing the modified Library. (It is understood - that the user who changes the contents of definitions files in the - Library will not necessarily be able to recompile the application - to use the modified definitions.) - - b) Use a suitable shared library mechanism for linking with the - Library. A suitable mechanism is one that (1) uses at run time a - copy of the library already present on the user's computer system, - rather than copying library functions into the executable, and (2) - will operate properly with a modified version of the library, if - the user installs one, as long as the modified version is - interface-compatible with the version that the work was made with. - - c) Accompany the work with a written offer, valid for at - least three years, to give the same user the materials - specified in Subsection 6a, above, for a charge no more - than the cost of performing this distribution. - - d) If distribution of the work is made by offering access to copy - from a designated place, offer equivalent access to copy the above - specified materials from the same place. - - e) Verify that the user has already received a copy of these - materials or that you have already sent this user a copy. - - For an executable, the required form of the "work that uses the -Library" must include any data and utility programs needed for -reproducing the executable from it. However, as a special exception, -the materials to be distributed need not include anything that is -normally distributed (in either source or binary form) with the major -components (compiler, kernel, and so on) of the operating system on -which the executable runs, unless that component itself accompanies -the executable. - - It may happen that this requirement contradicts the license -restrictions of other proprietary libraries that do not normally -accompany the operating system. Such a contradiction means you cannot -use both them and the Library together in an executable that you -distribute. - - 7. You may place library facilities that are a work based on the -Library side-by-side in a single library together with other library -facilities not covered by this License, and distribute such a combined -library, provided that the separate distribution of the work based on -the Library and of the other library facilities is otherwise -permitted, and provided that you do these two things: - - a) Accompany the combined library with a copy of the same work - based on the Library, uncombined with any other library - facilities. This must be distributed under the terms of the - Sections above. - - b) Give prominent notice with the combined library of the fact - that part of it is a work based on the Library, and explaining - where to find the accompanying uncombined form of the same work. - - 8. You may not copy, modify, sublicense, link with, or distribute -the Library except as expressly provided under this License. Any -attempt otherwise to copy, modify, sublicense, link with, or -distribute the Library is void, and will automatically terminate your -rights under this License. However, parties who have received copies, -or rights, from you under this License will not have their licenses -terminated so long as such parties remain in full compliance. - - 9. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Library or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Library (or any work based on the -Library), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Library or works based on it. - - 10. Each time you redistribute the Library (or any work based on the -Library), the recipient automatically receives a license from the -original licensor to copy, distribute, link with or modify the Library -subject to these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties with -this License. - - 11. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Library at all. For example, if a patent -license would not permit royalty-free redistribution of the Library by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Library. - -If any portion of this section is held invalid or unenforceable under any -particular circumstance, the balance of the section is intended to apply, -and the section as a whole is intended to apply in other circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 12. If the distribution and/or use of the Library is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Library under this License may add -an explicit geographical distribution limitation excluding those countries, -so that distribution is permitted only in or among countries not thus -excluded. In such case, this License incorporates the limitation as if -written in the body of this License. - - 13. The Free Software Foundation may publish revised and/or new -versions of the Lesser General Public License from time to time. -Such new versions will be similar in spirit to the present version, -but may differ in detail to address new problems or concerns. - -Each version is given a distinguishing version number. If the Library -specifies a version number of this License which applies to it and -"any later version", you have the option of following the terms and -conditions either of that version or of any later version published by -the Free Software Foundation. If the Library does not specify a -license version number, you may choose any version ever published by -the Free Software Foundation. - - 14. If you wish to incorporate parts of the Library into other free -programs whose distribution conditions are incompatible with these, -write to the author to ask for permission. For software which is -copyrighted by the Free Software Foundation, write to the Free -Software Foundation; we sometimes make exceptions for this. Our -decision will be guided by the two goals of preserving the free status -of all derivatives of our free software and of promoting the sharing -and reuse of software generally. - - NO WARRANTY - - 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO -WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. -EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR -OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY -KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE -LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME -THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN -WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY -AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU -FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR -CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE -LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING -RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A -FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF -SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH -DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Libraries - - If you develop a new library, and you want it to be of the greatest -possible use to the public, we recommend making it free software that -everyone can redistribute and change. You can do so by permitting -redistribution under these terms (or, alternatively, under the terms of the -ordinary General Public License). - - To apply these terms, attach the following notices to the library. It is -safest to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least the -"copyright" line and a pointer to where the full notice is found. - - <one line to give the library's name and a brief idea of what it does.> - Copyright (C) <year> <name of author> - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -Also add information on how to contact you by electronic and paper mail. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the library, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the - library `Frob' (a library for tweaking knobs) written by James Random Hacker. - - <signature of Ty Coon>, 1 April 1990 - Ty Coon, President of Vice - -That's all there is to it! - - diff --git a/chall/ply-2.2/README b/chall/ply-2.2/README deleted file mode 100644 index d91e26c..0000000 --- a/chall/ply-2.2/README +++ /dev/null @@ -1,277 +0,0 @@ -PLY (Python Lex-Yacc) Version 2.2 (November 1, 2006) - -David M. Beazley (dave@dabeaz.com) - -Copyright (C) 2001-2006 David M. Beazley - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -See the file COPYING for a complete copy of the LGPL. - -Introduction -============ - -PLY is a 100% Python implementation of the common parsing tools lex -and yacc. Although several other parsing tools are available for -Python, there are several reasons why you might want to consider PLY: - - - The tools are very closely modeled after traditional lex/yacc. - If you know how to use these tools in C, you will find PLY - to be similar. - - - PLY provides *very* extensive error reporting and diagnostic - information to assist in parser construction. The original - implementation was developed for instructional purposes. As - a result, the system tries to identify the most common types - of errors made by novice users. - - - PLY provides full support for empty productions, error recovery, - precedence specifiers, and moderately ambiguous grammars. - - - Parsing is based on LR-parsing which is fast, memory efficient, - better suited to large grammars, and which has a number of nice - properties when dealing with syntax errors and other parsing problems. - Currently, PLY builds its parsing tables using the SLR algorithm which - is slightly weaker than LALR(1) used in traditional yacc. - - - PLY uses Python introspection features to build lexers and parsers. - This greatly simplifies the task of parser construction since it reduces - the number of files and eliminates the need to run a separate lex/yacc - tool before running your program. - - - PLY can be used to build parsers for "real" programming languages. - Although it is not ultra-fast due to its Python implementation, - PLY can be used to parse grammars consisting of several hundred - rules (as might be found for a language like C). The lexer and LR - parser are also reasonably efficient when parsing typically - sized programs. - -The original version of PLY was developed for an Introduction to -Compilers course where students used it to build a compiler for a -simple Pascal-like language. Their compiler had to include lexical -analysis, parsing, type checking, type inference, and generation of -assembly code for the SPARC processor. Because of this, the current -implementation has been extensively tested and debugged. In addition, -most of the API and error checking steps have been adapted to address -common usability problems. - -How to Use -========== - -PLY consists of two files : lex.py and yacc.py. These are contained -within the 'ply' directory which may also be used as a Python package. -To use PLY, simply copy the 'ply' directory to your project and import -lex and yacc from the associated 'ply' package. For example: - - import ply.lex as lex - import ply.yacc as yacc - -Alternatively, you can copy just the files lex.py and yacc.py -individually and use them as modules. For example: - - import lex - import yacc - -The file setup.py can be used to install ply using distutils. - -The file doc/ply.html contains complete documentation on how to use -the system. - -The example directory contains several different examples including a -PLY specification for ANSI C as given in K&R 2nd Ed. - -A simple example is found at the end of this document - -Requirements -============ -PLY requires the use of Python 2.0 or greater. It should work on -just about any platform. PLY has been tested with both CPython and -Jython. However, it does not work with IronPython. - -Resources -========= -More information about PLY can be obtained on the PLY webpage at: - - http://www.dabeaz.com/ply - -For a detailed overview of parsing theory, consult the excellent -book "Compilers : Principles, Techniques, and Tools" by Aho, Sethi, and -Ullman. The topics found in "Lex & Yacc" by Levine, Mason, and Brown -may also be useful. - -A Google group for PLY can be found at - - http://groups.google.com/group/ply-hack - -Acknowledgments -=============== -A special thanks is in order for all of the students in CS326 who -suffered through about 25 different versions of these tools :-). - -The CHANGES file acknowledges those who have contributed patches. - -Elias Ioup did the first implementation of LALR(1) parsing in PLY-1.x. -Andrew Waters and Markus Schoepflin were instrumental in reporting bugs -and testing a revised LALR(1) implementation for PLY-2.0. - -Special Note for PLY-2.x -======================== -PLY-2.0 is the first in a series of PLY releases that will be adding a -variety of significant new features. The first release in this series -(Ply-2.0) should be 100% compatible with all previous Ply-1.x releases -except for the fact that Ply-2.0 features a correct implementation of -LALR(1) table generation. - -If you have suggestions for improving PLY in future 2.x releases, please -contact me. - Dave - -Example -======= - -Here is a simple example showing a PLY implementation of a calculator -with variables. - -# ----------------------------------------------------------------------------- -# calc.py -# -# A simple calculator with variables. -# ----------------------------------------------------------------------------- - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print "Integer value too large", t.value - t.value = 0 - return t - -# Ignored characters -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - -def t_error(t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) - -# Build the lexer -import ply.lex as lex -lex.lex() - -# Precedence rules for the arithmetic operators -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names (for storing variables) -names = { } - -def p_statement_assign(p): - 'statement : NAME EQUALS expression' - names[p[1]] = p[3] - -def p_statement_expr(p): - 'statement : expression' - print p[1] - -def p_expression_binop(p): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] - -def p_expression_uminus(p): - 'expression : MINUS expression %prec UMINUS' - p[0] = -p[2] - -def p_expression_group(p): - 'expression : LPAREN expression RPAREN' - p[0] = p[2] - -def p_expression_number(p): - 'expression : NUMBER' - p[0] = p[1] - -def p_expression_name(p): - 'expression : NAME' - try: - p[0] = names[p[1]] - except LookupError: - print "Undefined name '%s'" % p[1] - p[0] = 0 - -def p_error(p): - print "Syntax error at '%s'" % p.value - -import ply.yacc as yacc -yacc.yacc() - -while 1: - try: - s = raw_input('calc > ') - except EOFError: - break - yacc.parse(s) - - -Bug Reports and Patches -======================= -Because of the extremely specialized and advanced nature of PLY, I -rarely spend much time working on it unless I receive very specific -bug-reports and/or patches to fix problems. I also try to incorporate -submitted feature requests and enhancements into each new version. To -contact me about bugs and/or new features, please send email to -dave@dabeaz.com. - -In addition there is a Google group for discussing PLY related issues at - - http://groups.google.com/group/ply-hack - --- Dave - - - - - - - - - diff --git a/chall/ply-2.2/TODO b/chall/ply-2.2/TODO deleted file mode 100644 index 7139d53..0000000 --- a/chall/ply-2.2/TODO +++ /dev/null @@ -1,14 +0,0 @@ -The PLY to-do list: - -1. More interesting parsing examples. - -2. Work on the ANSI C grammar so that it can actually parse C programs. To do this, - some extra code needs to be added to the lexer to deal with typedef names and enumeration - constants. - -3. More tests in the test directory. - -4. Performance improvements and cleanup in yacc.py. - -5. More documentation (?). - diff --git a/chall/ply-2.2/build/lib.linux-x86_64-2.7/ply/__init__.py b/chall/ply-2.2/build/lib.linux-x86_64-2.7/ply/__init__.py deleted file mode 100644 index 853a985..0000000 --- a/chall/ply-2.2/build/lib.linux-x86_64-2.7/ply/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# PLY package -# Author: David Beazley (dave@dabeaz.com) - -__all__ = ['lex','yacc'] diff --git a/chall/ply-2.2/build/lib.linux-x86_64-2.7/ply/lex.py b/chall/ply-2.2/build/lib.linux-x86_64-2.7/ply/lex.py deleted file mode 100644 index c149366..0000000 --- a/chall/ply-2.2/build/lib.linux-x86_64-2.7/ply/lex.py +++ /dev/null @@ -1,866 +0,0 @@ -#----------------------------------------------------------------------------- -# ply: lex.py -# -# Author: David M. Beazley (dave@dabeaz.com) -# -# Copyright (C) 2001-2006, David M. Beazley -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# -# See the file COPYING for a complete copy of the LGPL. -#----------------------------------------------------------------------------- - -__version__ = "2.2" - -import re, sys, types - -# Regular expression used to match valid token names -_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') - -# Available instance types. This is used when lexers are defined by a class. -# It's a little funky because I want to preserve backwards compatibility -# with Python 2.0 where types.ObjectType is undefined. - -try: - _INSTANCETYPE = (types.InstanceType, types.ObjectType) -except AttributeError: - _INSTANCETYPE = types.InstanceType - class object: pass # Note: needed if no new-style classes present - -# Exception thrown when invalid token encountered and no default error -# handler is defined. -class LexError(Exception): - def __init__(self,message,s): - self.args = (message,) - self.text = s - -# Token class -class LexToken(object): - def __str__(self): - return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos) - def __repr__(self): - return str(self) - def skip(self,n): - self.lexer.skip(n) - -# ----------------------------------------------------------------------------- -# Lexer class -# -# This class encapsulates all of the methods and data associated with a lexer. -# -# input() - Store a new string in the lexer -# token() - Get the next token -# ----------------------------------------------------------------------------- - -class Lexer: - def __init__(self): - self.lexre = None # Master regular expression. This is a list of - # tuples (re,findex) where re is a compiled - # regular expression and findex is a list - # mapping regex group numbers to rules - self.lexretext = None # Current regular expression strings - self.lexstatere = {} # Dictionary mapping lexer states to master regexs - self.lexstateretext = {} # Dictionary mapping lexer states to regex strings - self.lexstate = "INITIAL" # Current lexer state - self.lexstatestack = [] # Stack of lexer states - self.lexstateinfo = None # State information - self.lexstateignore = {} # Dictionary of ignored characters for each state - self.lexstateerrorf = {} # Dictionary of error functions for each state - self.lexreflags = 0 # Optional re compile flags - self.lexdata = None # Actual input data (as a string) - self.lexpos = 0 # Current position in input text - self.lexlen = 0 # Length of the input text - self.lexerrorf = None # Error rule (if any) - self.lextokens = None # List of valid tokens - self.lexignore = "" # Ignored characters - self.lexliterals = "" # Literal characters that can be passed through - self.lexmodule = None # Module - self.lineno = 1 # Current line number - self.lexdebug = 0 # Debugging mode - self.lexoptimize = 0 # Optimized mode - - def clone(self,object=None): - c = Lexer() - c.lexstatere = self.lexstatere - c.lexstateinfo = self.lexstateinfo - c.lexstateretext = self.lexstateretext - c.lexstate = self.lexstate - c.lexstatestack = self.lexstatestack - c.lexstateignore = self.lexstateignore - c.lexstateerrorf = self.lexstateerrorf - c.lexreflags = self.lexreflags - c.lexdata = self.lexdata - c.lexpos = self.lexpos - c.lexlen = self.lexlen - c.lextokens = self.lextokens - c.lexdebug = self.lexdebug - c.lineno = self.lineno - c.lexoptimize = self.lexoptimize - c.lexliterals = self.lexliterals - c.lexmodule = self.lexmodule - - # If the object parameter has been supplied, it means we are attaching the - # lexer to a new object. In this case, we have to rebind all methods in - # the lexstatere and lexstateerrorf tables. - - if object: - newtab = { } - for key, ritem in self.lexstatere.items(): - newre = [] - for cre, findex in ritem: - newfindex = [] - for f in findex: - if not f or not f[0]: - newfindex.append(f) - continue - newfindex.append((getattr(object,f[0].__name__),f[1])) - newre.append((cre,newfindex)) - newtab[key] = newre - c.lexstatere = newtab - c.lexstateerrorf = { } - for key, ef in self.lexstateerrorf.items(): - c.lexstateerrorf[key] = getattr(object,ef.__name__) - c.lexmodule = object - - # Set up other attributes - c.begin(c.lexstate) - return c - - # ------------------------------------------------------------ - # writetab() - Write lexer information to a table file - # ------------------------------------------------------------ - def writetab(self,tabfile): - tf = open(tabfile+".py","w") - tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__)) - tf.write("_lextokens = %s\n" % repr(self.lextokens)) - tf.write("_lexreflags = %s\n" % repr(self.lexreflags)) - tf.write("_lexliterals = %s\n" % repr(self.lexliterals)) - tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo)) - - tabre = { } - for key, lre in self.lexstatere.items(): - titem = [] - for i in range(len(lre)): - titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1]))) - tabre[key] = titem - - tf.write("_lexstatere = %s\n" % repr(tabre)) - tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore)) - - taberr = { } - for key, ef in self.lexstateerrorf.items(): - if ef: - taberr[key] = ef.__name__ - else: - taberr[key] = None - tf.write("_lexstateerrorf = %s\n" % repr(taberr)) - tf.close() - - # ------------------------------------------------------------ - # readtab() - Read lexer information from a tab file - # ------------------------------------------------------------ - def readtab(self,tabfile,fdict): - exec "import %s as lextab" % tabfile - self.lextokens = lextab._lextokens - self.lexreflags = lextab._lexreflags - self.lexliterals = lextab._lexliterals - self.lexstateinfo = lextab._lexstateinfo - self.lexstateignore = lextab._lexstateignore - self.lexstatere = { } - self.lexstateretext = { } - for key,lre in lextab._lexstatere.items(): - titem = [] - txtitem = [] - for i in range(len(lre)): - titem.append((re.compile(lre[i][0],lextab._lexreflags),_names_to_funcs(lre[i][1],fdict))) - txtitem.append(lre[i][0]) - self.lexstatere[key] = titem - self.lexstateretext[key] = txtitem - self.lexstateerrorf = { } - for key,ef in lextab._lexstateerrorf.items(): - self.lexstateerrorf[key] = fdict[ef] - self.begin('INITIAL') - - # ------------------------------------------------------------ - # input() - Push a new string into the lexer - # ------------------------------------------------------------ - def input(self,s): - if not (isinstance(s,types.StringType) or isinstance(s,types.UnicodeType)): - raise ValueError, "Expected a string" - self.lexdata = s - self.lexpos = 0 - self.lexlen = len(s) - - # ------------------------------------------------------------ - # begin() - Changes the lexing state - # ------------------------------------------------------------ - def begin(self,state): - if not self.lexstatere.has_key(state): - raise ValueError, "Undefined state" - self.lexre = self.lexstatere[state] - self.lexretext = self.lexstateretext[state] - self.lexignore = self.lexstateignore.get(state,"") - self.lexerrorf = self.lexstateerrorf.get(state,None) - self.lexstate = state - - # ------------------------------------------------------------ - # push_state() - Changes the lexing state and saves old on stack - # ------------------------------------------------------------ - def push_state(self,state): - self.lexstatestack.append(self.lexstate) - self.begin(state) - - # ------------------------------------------------------------ - # pop_state() - Restores the previous state - # ------------------------------------------------------------ - def pop_state(self): - self.begin(self.lexstatestack.pop()) - - # ------------------------------------------------------------ - # current_state() - Returns the current lexing state - # ------------------------------------------------------------ - def current_state(self): - return self.lexstate - - # ------------------------------------------------------------ - # skip() - Skip ahead n characters - # ------------------------------------------------------------ - def skip(self,n): - self.lexpos += n - - # ------------------------------------------------------------ - # token() - Return the next token from the Lexer - # - # Note: This function has been carefully implemented to be as fast - # as possible. Don't make changes unless you really know what - # you are doing - # ------------------------------------------------------------ - def token(self): - # Make local copies of frequently referenced attributes - lexpos = self.lexpos - lexlen = self.lexlen - lexignore = self.lexignore - lexdata = self.lexdata - - while lexpos < lexlen: - # This code provides some short-circuit code for whitespace, tabs, and other ignored characters - if lexdata[lexpos] in lexignore: - lexpos += 1 - continue - - # Look for a regular expression match - for lexre,lexindexfunc in self.lexre: - m = lexre.match(lexdata,lexpos) - if not m: continue - - # Set last match in lexer so that rules can access it if they want - self.lexmatch = m - - # Create a token for return - tok = LexToken() - tok.value = m.group() - tok.lineno = self.lineno - tok.lexpos = lexpos - tok.lexer = self - - lexpos = m.end() - i = m.lastindex - func,tok.type = lexindexfunc[i] - self.lexpos = lexpos - - if not func: - # If no token type was set, it's an ignored token - if tok.type: return tok - break - - # if func not callable, it means it's an ignored token - if not callable(func): - break - - # If token is processed by a function, call it - newtok = func(tok) - - # Every function must return a token, if nothing, we just move to next token - if not newtok: - lexpos = self.lexpos # This is here in case user has updated lexpos. - break - - # Verify type of the token. If not in the token map, raise an error - if not self.lexoptimize: - if not self.lextokens.has_key(newtok.type): - raise LexError, ("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( - func.func_code.co_filename, func.func_code.co_firstlineno, - func.__name__, newtok.type),lexdata[lexpos:]) - - return newtok - else: - # No match, see if in literals - if lexdata[lexpos] in self.lexliterals: - tok = LexToken() - tok.value = lexdata[lexpos] - tok.lineno = self.lineno - tok.lexer = self - tok.type = tok.value - tok.lexpos = lexpos - self.lexpos = lexpos + 1 - return tok - - # No match. Call t_error() if defined. - if self.lexerrorf: - tok = LexToken() - tok.value = self.lexdata[lexpos:] - tok.lineno = self.lineno - tok.type = "error" - tok.lexer = self - tok.lexpos = lexpos - self.lexpos = lexpos - newtok = self.lexerrorf(tok) - if lexpos == self.lexpos: - # Error method didn't change text position at all. This is an error. - raise LexError, ("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) - lexpos = self.lexpos - if not newtok: continue - return newtok - - self.lexpos = lexpos - raise LexError, ("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:]) - - self.lexpos = lexpos + 1 - if self.lexdata is None: - raise RuntimeError, "No input string given with input()" - return None - -# ----------------------------------------------------------------------------- -# _validate_file() -# -# This checks to see if there are duplicated t_rulename() functions or strings -# in the parser input file. This is done using a simple regular expression -# match on each line in the filename. -# ----------------------------------------------------------------------------- - -def _validate_file(filename): - import os.path - base,ext = os.path.splitext(filename) - if ext != '.py': return 1 # No idea what the file is. Return OK - - try: - f = open(filename) - lines = f.readlines() - f.close() - except IOError: - return 1 # Oh well - - fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') - sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') - counthash = { } - linen = 1 - noerror = 1 - for l in lines: - m = fre.match(l) - if not m: - m = sre.match(l) - if m: - name = m.group(1) - prev = counthash.get(name) - if not prev: - counthash[name] = linen - else: - print "%s:%d: Rule %s redefined. Previously defined on line %d" % (filename,linen,name,prev) - noerror = 0 - linen += 1 - return noerror - -# ----------------------------------------------------------------------------- -# _funcs_to_names() -# -# Given a list of regular expression functions, this converts it to a list -# suitable for output to a table file -# ----------------------------------------------------------------------------- - -def _funcs_to_names(funclist): - result = [] - for f in funclist: - if f and f[0]: - result.append((f[0].__name__,f[1])) - else: - result.append(f) - return result - -# ----------------------------------------------------------------------------- -# _names_to_funcs() -# -# Given a list of regular expression function names, this converts it back to -# functions. -# ----------------------------------------------------------------------------- - -def _names_to_funcs(namelist,fdict): - result = [] - for n in namelist: - if n and n[0]: - result.append((fdict[n[0]],n[1])) - else: - result.append(n) - return result - -# ----------------------------------------------------------------------------- -# _form_master_re() -# -# This function takes a list of all of the regex components and attempts to -# form the master regular expression. Given limitations in the Python re -# module, it may be necessary to break the master regex into separate expressions. -# ----------------------------------------------------------------------------- - -def _form_master_re(relist,reflags,ldict): - if not relist: return [] - regex = "|".join(relist) - try: - lexre = re.compile(regex,re.VERBOSE | reflags) - - # Build the index to function map for the matching engine - lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1) - for f,i in lexre.groupindex.items(): - handle = ldict.get(f,None) - if type(handle) in (types.FunctionType, types.MethodType): - lexindexfunc[i] = (handle,handle.__name__[2:]) - elif handle is not None: - # If rule was specified as a string, we build an anonymous - # callback function to carry out the action - if f.find("ignore_") > 0: - lexindexfunc[i] = (None,None) - print "IGNORE", f - else: - lexindexfunc[i] = (None, f[2:]) - - return [(lexre,lexindexfunc)],[regex] - except Exception,e: - m = int(len(relist)/2) - if m == 0: m = 1 - llist, lre = _form_master_re(relist[:m],reflags,ldict) - rlist, rre = _form_master_re(relist[m:],reflags,ldict) - return llist+rlist, lre+rre - -# ----------------------------------------------------------------------------- -# def _statetoken(s,names) -# -# Given a declaration name s of the form "t_" and a dictionary whose keys are -# state names, this function returns a tuple (states,tokenname) where states -# is a tuple of state names and tokenname is the name of the token. For example, -# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') -# ----------------------------------------------------------------------------- - -def _statetoken(s,names): - nonstate = 1 - parts = s.split("_") - for i in range(1,len(parts)): - if not names.has_key(parts[i]) and parts[i] != 'ANY': break - if i > 1: - states = tuple(parts[1:i]) - else: - states = ('INITIAL',) - - if 'ANY' in states: - states = tuple(names.keys()) - - tokenname = "_".join(parts[i:]) - return (states,tokenname) - -# ----------------------------------------------------------------------------- -# lex(module) -# -# Build all of the regular expression rules from definitions in the supplied module -# ----------------------------------------------------------------------------- -def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0): - global lexer - ldict = None - stateinfo = { 'INITIAL' : 'inclusive'} - error = 0 - files = { } - lexobj = Lexer() - lexobj.lexdebug = debug - lexobj.lexoptimize = optimize - global token,input - - if nowarn: warn = 0 - else: warn = 1 - - if object: module = object - - if module: - # User supplied a module object. - if isinstance(module, types.ModuleType): - ldict = module.__dict__ - elif isinstance(module, _INSTANCETYPE): - _items = [(k,getattr(module,k)) for k in dir(module)] - ldict = { } - for (i,v) in _items: - ldict[i] = v - else: - raise ValueError,"Expected a module or instance" - lexobj.lexmodule = module - - else: - # No module given. We might be able to get information from the caller. - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - f = f.f_back # Walk out to our calling function - ldict = f.f_globals # Grab its globals dictionary - - if optimize and lextab: - try: - lexobj.readtab(lextab,ldict) - token = lexobj.token - input = lexobj.input - lexer = lexobj - return lexobj - - except ImportError: - pass - - # Get the tokens, states, and literals variables (if any) - if (module and isinstance(module,_INSTANCETYPE)): - tokens = getattr(module,"tokens",None) - states = getattr(module,"states",None) - literals = getattr(module,"literals","") - else: - tokens = ldict.get("tokens",None) - states = ldict.get("states",None) - literals = ldict.get("literals","") - - if not tokens: - raise SyntaxError,"lex: module does not define 'tokens'" - if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)): - raise SyntaxError,"lex: tokens must be a list or tuple." - - # Build a dictionary of valid token names - lexobj.lextokens = { } - if not optimize: - for n in tokens: - if not _is_identifier.match(n): - print "lex: Bad token name '%s'" % n - error = 1 - if warn and lexobj.lextokens.has_key(n): - print "lex: Warning. Token '%s' multiply defined." % n - lexobj.lextokens[n] = None - else: - for n in tokens: lexobj.lextokens[n] = None - - if debug: - print "lex: tokens = '%s'" % lexobj.lextokens.keys() - - try: - for c in literals: - if not (isinstance(c,types.StringType) or isinstance(c,types.UnicodeType)) or len(c) > 1: - print "lex: Invalid literal %s. Must be a single character" % repr(c) - error = 1 - continue - - except TypeError: - print "lex: Invalid literals specification. literals must be a sequence of characters." - error = 1 - - lexobj.lexliterals = literals - - # Build statemap - if states: - if not (isinstance(states,types.TupleType) or isinstance(states,types.ListType)): - print "lex: states must be defined as a tuple or list." - error = 1 - else: - for s in states: - if not isinstance(s,types.TupleType) or len(s) != 2: - print "lex: invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')" % repr(s) - error = 1 - continue - name, statetype = s - if not isinstance(name,types.StringType): - print "lex: state name %s must be a string" % repr(name) - error = 1 - continue - if not (statetype == 'inclusive' or statetype == 'exclusive'): - print "lex: state type for state %s must be 'inclusive' or 'exclusive'" % name - error = 1 - continue - if stateinfo.has_key(name): - print "lex: state '%s' already defined." % name - error = 1 - continue - stateinfo[name] = statetype - - # Get a list of symbols with the t_ or s_ prefix - tsymbols = [f for f in ldict.keys() if f[:2] == 't_' ] - - # Now build up a list of functions and a list of strings - - funcsym = { } # Symbols defined as functions - strsym = { } # Symbols defined as strings - toknames = { } # Mapping of symbols to token names - - for s in stateinfo.keys(): - funcsym[s] = [] - strsym[s] = [] - - ignore = { } # Ignore strings by state - errorf = { } # Error functions by state - - if len(tsymbols) == 0: - raise SyntaxError,"lex: no rules of the form t_rulename are defined." - - for f in tsymbols: - t = ldict[f] - states, tokname = _statetoken(f,stateinfo) - toknames[f] = tokname - - if callable(t): - for s in states: funcsym[s].append((f,t)) - elif (isinstance(t, types.StringType) or isinstance(t,types.UnicodeType)): - for s in states: strsym[s].append((f,t)) - else: - print "lex: %s not defined as a function or string" % f - error = 1 - - # Sort the functions by line number - for f in funcsym.values(): - f.sort(lambda x,y: cmp(x[1].func_code.co_firstlineno,y[1].func_code.co_firstlineno)) - - # Sort the strings by regular expression length - for s in strsym.values(): - s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1]))) - - regexs = { } - - # Build the master regular expressions - for state in stateinfo.keys(): - regex_list = [] - - # Add rules defined by functions first - for fname, f in funcsym[state]: - line = f.func_code.co_firstlineno - file = f.func_code.co_filename - files[file] = None - tokname = toknames[fname] - - ismethod = isinstance(f, types.MethodType) - - if not optimize: - nargs = f.func_code.co_argcount - if ismethod: - reqargs = 2 - else: - reqargs = 1 - if nargs > reqargs: - print "%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__) - error = 1 - continue - - if nargs < reqargs: - print "%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__) - error = 1 - continue - - if tokname == 'ignore': - print "%s:%d: Rule '%s' must be defined as a string." % (file,line,f.__name__) - error = 1 - continue - - if tokname == 'error': - errorf[state] = f - continue - - if f.__doc__: - if not optimize: - try: - c = re.compile("(?P<%s>%s)" % (f.__name__,f.__doc__), re.VERBOSE | reflags) - if c.match(""): - print "%s:%d: Regular expression for rule '%s' matches empty string." % (file,line,f.__name__) - error = 1 - continue - except re.error,e: - print "%s:%d: Invalid regular expression for rule '%s'. %s" % (file,line,f.__name__,e) - if '#' in f.__doc__: - print "%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'." % (file,line, f.__name__) - error = 1 - continue - - if debug: - print "lex: Adding rule %s -> '%s' (state '%s')" % (f.__name__,f.__doc__, state) - - # Okay. The regular expression seemed okay. Let's append it to the master regular - # expression we're building - - regex_list.append("(?P<%s>%s)" % (f.__name__,f.__doc__)) - else: - print "%s:%d: No regular expression defined for rule '%s'" % (file,line,f.__name__) - - # Now add all of the simple rules - for name,r in strsym[state]: - tokname = toknames[name] - - if tokname == 'ignore': - ignore[state] = r - continue - - if not optimize: - if tokname == 'error': - raise SyntaxError,"lex: Rule '%s' must be defined as a function" % name - error = 1 - continue - - if not lexobj.lextokens.has_key(tokname) and tokname.find("ignore_") < 0: - print "lex: Rule '%s' defined for an unspecified token %s." % (name,tokname) - error = 1 - continue - try: - c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | reflags) - if (c.match("")): - print "lex: Regular expression for rule '%s' matches empty string." % name - error = 1 - continue - except re.error,e: - print "lex: Invalid regular expression for rule '%s'. %s" % (name,e) - if '#' in r: - print "lex: Make sure '#' in rule '%s' is escaped with '\\#'." % name - - error = 1 - continue - if debug: - print "lex: Adding rule %s -> '%s' (state '%s')" % (name,r,state) - - regex_list.append("(?P<%s>%s)" % (name,r)) - - if not regex_list: - print "lex: No rules defined for state '%s'" % state - error = 1 - - regexs[state] = regex_list - - - if not optimize: - for f in files.keys(): - if not _validate_file(f): - error = 1 - - if error: - raise SyntaxError,"lex: Unable to build lexer." - - # From this point forward, we're reasonably confident that we can build the lexer. - # No more errors will be generated, but there might be some warning messages. - - # Build the master regular expressions - - for state in regexs.keys(): - lexre, re_text = _form_master_re(regexs[state],reflags,ldict) - lexobj.lexstatere[state] = lexre - lexobj.lexstateretext[state] = re_text - if debug: - for i in range(len(re_text)): - print "lex: state '%s'. regex[%d] = '%s'" % (state, i, re_text[i]) - - # For inclusive states, we need to add the INITIAL state - for state,type in stateinfo.items(): - if state != "INITIAL" and type == 'inclusive': - lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) - lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) - - lexobj.lexstateinfo = stateinfo - lexobj.lexre = lexobj.lexstatere["INITIAL"] - lexobj.lexretext = lexobj.lexstateretext["INITIAL"] - - # Set up ignore variables - lexobj.lexstateignore = ignore - lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","") - - # Set up error functions - lexobj.lexstateerrorf = errorf - lexobj.lexerrorf = errorf.get("INITIAL",None) - if warn and not lexobj.lexerrorf: - print "lex: Warning. no t_error rule is defined." - - # Check state information for ignore and error rules - for s,stype in stateinfo.items(): - if stype == 'exclusive': - if warn and not errorf.has_key(s): - print "lex: Warning. no error rule is defined for exclusive state '%s'" % s - if warn and not ignore.has_key(s) and lexobj.lexignore: - print "lex: Warning. no ignore rule is defined for exclusive state '%s'" % s - elif stype == 'inclusive': - if not errorf.has_key(s): - errorf[s] = errorf.get("INITIAL",None) - if not ignore.has_key(s): - ignore[s] = ignore.get("INITIAL","") - - - # Create global versions of the token() and input() functions - token = lexobj.token - input = lexobj.input - lexer = lexobj - - # If in optimize mode, we write the lextab - if lextab and optimize: - lexobj.writetab(lextab) - - return lexobj - -# ----------------------------------------------------------------------------- -# runmain() -# -# This runs the lexer as a main program -# ----------------------------------------------------------------------------- - -def runmain(lexer=None,data=None): - if not data: - try: - filename = sys.argv[1] - f = open(filename) - data = f.read() - f.close() - except IndexError: - print "Reading from standard input (type EOF to end):" - data = sys.stdin.read() - - if lexer: - _input = lexer.input - else: - _input = input - _input(data) - if lexer: - _token = lexer.token - else: - _token = token - - while 1: - tok = _token() - if not tok: break - print "(%s,%r,%d,%d)" % (tok.type, tok.value, tok.lineno,tok.lexpos) - - -# ----------------------------------------------------------------------------- -# @TOKEN(regex) -# -# This decorator function can be used to set the regex expression on a function -# when its docstring might need to be set in an alternative way -# ----------------------------------------------------------------------------- - -def TOKEN(r): - def set_doc(f): - f.__doc__ = r - return f - return set_doc - -# Alternative spelling of the TOKEN decorator -Token = TOKEN - diff --git a/chall/ply-2.2/build/lib.linux-x86_64-2.7/ply/yacc.py b/chall/ply-2.2/build/lib.linux-x86_64-2.7/ply/yacc.py deleted file mode 100644 index caf98af..0000000 --- a/chall/ply-2.2/build/lib.linux-x86_64-2.7/ply/yacc.py +++ /dev/null @@ -1,2209 +0,0 @@ -#----------------------------------------------------------------------------- -# ply: yacc.py -# -# Author(s): David M. Beazley (dave@dabeaz.com) -# -# Copyright (C) 2001-2006, David M. Beazley -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# -# See the file COPYING for a complete copy of the LGPL. -# -# -# This implements an LR parser that is constructed from grammar rules defined -# as Python functions. The grammer is specified by supplying the BNF inside -# Python documentation strings. The inspiration for this technique was borrowed -# from John Aycock's Spark parsing system. PLY might be viewed as cross between -# Spark and the GNU bison utility. -# -# The current implementation is only somewhat object-oriented. The -# LR parser itself is defined in terms of an object (which allows multiple -# parsers to co-exist). However, most of the variables used during table -# construction are defined in terms of global variables. Users shouldn't -# notice unless they are trying to define multiple parsers at the same -# time using threads (in which case they should have their head examined). -# -# This implementation supports both SLR and LALR(1) parsing. LALR(1) -# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), -# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, -# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced -# by the more efficient DeRemer and Pennello algorithm. -# -# :::::::: WARNING ::::::: -# -# Construction of LR parsing tables is fairly complicated and expensive. -# To make this module run fast, a *LOT* of work has been put into -# optimization---often at the expensive of readability and what might -# consider to be good Python "coding style." Modify the code at your -# own risk! -# ---------------------------------------------------------------------------- - -__version__ = "2.2" - -#----------------------------------------------------------------------------- -# === User configurable parameters === -# -# Change these to modify the default behavior of yacc (if you wish) -#----------------------------------------------------------------------------- - -yaccdebug = 1 # Debugging mode. If set, yacc generates a - # a 'parser.out' file in the current directory - -debug_file = 'parser.out' # Default name of the debugging file -tab_module = 'parsetab' # Default name of the table module -default_lr = 'LALR' # Default LR table generation method - -error_count = 3 # Number of symbols that must be shifted to leave recovery mode - -import re, types, sys, cStringIO, md5, os.path - -# Exception raised for yacc-related errors -class YaccError(Exception): pass - -#----------------------------------------------------------------------------- -# === LR Parsing Engine === -# -# The following classes are used for the LR parser itself. These are not -# used during table construction and are independent of the actual LR -# table generation algorithm -#----------------------------------------------------------------------------- - -# This class is used to hold non-terminal grammar symbols during parsing. -# It normally has the following attributes set: -# .type = Grammar symbol type -# .value = Symbol value -# .lineno = Starting line number -# .endlineno = Ending line number (optional, set automatically) -# .lexpos = Starting lex position -# .endlexpos = Ending lex position (optional, set automatically) - -class YaccSymbol: - def __str__(self): return self.type - def __repr__(self): return str(self) - -# This class is a wrapper around the objects actually passed to each -# grammar rule. Index lookup and assignment actually assign the -# .value attribute of the underlying YaccSymbol object. -# The lineno() method returns the line number of a given -# item (or 0 if not defined). The linespan() method returns -# a tuple of (startline,endline) representing the range of lines -# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) -# representing the range of positional information for a symbol. - -class YaccProduction: - def __init__(self,s,stack=None): - self.slice = s - self.pbstack = [] - self.stack = stack - - def __getitem__(self,n): - if type(n) == types.IntType: - if n >= 0: return self.slice[n].value - else: return self.stack[n].value - else: - return [s.value for s in self.slice[n.start:n.stop:n.step]] - - def __setitem__(self,n,v): - self.slice[n].value = v - - def __len__(self): - return len(self.slice) - - def lineno(self,n): - return getattr(self.slice[n],"lineno",0) - - def linespan(self,n): - startline = getattr(self.slice[n],"lineno",0) - endline = getattr(self.slice[n],"endlineno",startline) - return startline,endline - - def lexpos(self,n): - return getattr(self.slice[n],"lexpos",0) - - def lexspan(self,n): - startpos = getattr(self.slice[n],"lexpos",0) - endpos = getattr(self.slice[n],"endlexpos",startpos) - return startpos,endpos - - def pushback(self,n): - if n <= 0: - raise ValueError, "Expected a positive value" - if n > (len(self.slice)-1): - raise ValueError, "Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1) - for i in range(0,n): - self.pbstack.append(self.slice[-i-1]) - -# The LR Parsing engine. This is defined as a class so that multiple parsers -# can exist in the same process. A user never instantiates this directly. -# Instead, the global yacc() function should be used to create a suitable Parser -# object. - -class Parser: - def __init__(self,magic=None): - - # This is a hack to keep users from trying to instantiate a Parser - # object directly. - - if magic != "xyzzy": - raise YaccError, "Can't instantiate Parser. Use yacc() instead." - - # Reset internal state - self.productions = None # List of productions - self.errorfunc = None # Error handling function - self.action = { } # LR Action table - self.goto = { } # LR goto table - self.require = { } # Attribute require table - self.method = "Unknown LR" # Table construction method used - - def errok(self): - self.errorcount = 0 - - def restart(self): - del self.statestack[:] - del self.symstack[:] - sym = YaccSymbol() - sym.type = '$end' - self.symstack.append(sym) - self.statestack.append(0) - - def parse(self,input=None,lexer=None,debug=0): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table - goto = self.goto # Local reference to goto table - prod = self.productions # Local reference to production list - pslice = YaccProduction(None) # Production object passed to grammar rules - pslice.parser = self # Parser object - self.errorcount = 0 # Used during error recovery - - # If no lexer was given, we will try to use the lex module - if not lexer: - import lex - lexer = lex.lexer - - pslice.lexer = lexer - - # If input was supplied, pass to lexer - if input: - lexer.input(input) - - # Tokenize function - get_token = lexer.token - - statestack = [ ] # Stack of parsing states - self.statestack = statestack - symstack = [ ] # Stack of grammar symbols - self.symstack = symstack - - pslice.stack = symstack # Put in the production - errtoken = None # Err token - - # The start state is assumed to be (0,$end) - statestack.append(0) - sym = YaccSymbol() - sym.type = '$end' - symstack.append(sym) - - while 1: - # Get the next symbol on the input. If a lookahead symbol - # is already set, we just use that. Otherwise, we'll pull - # the next token off of the lookaheadstack or from the lexer - if debug > 1: - print 'state', statestack[-1] - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' - if debug: - errorlead = ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip() - - # Check the action table - s = statestack[-1] - ltype = lookahead.type - t = actions.get((s,ltype),None) - - if debug > 1: - print 'action', t - if t is not None: - if t > 0: - # shift a symbol on the stack - if ltype == '$end': - # Error, end of input - sys.stderr.write("yacc: Parse error. EOF\n") - return - statestack.append(t) - if debug > 1: - sys.stderr.write("%-60s shift state %s\n" % (errorlead, t)) - symstack.append(lookahead) - lookahead = None - - # Decrease error count on successful shift - if self.errorcount > 0: - self.errorcount -= 1 - - continue - - if t < 0: - # reduce a symbol on the stack, emit a production - p = prod[-t] - pname = p.name - plen = p.len - - # Get production function - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - if debug > 1: - sys.stderr.write("%-60s reduce %d\n" % (errorlead, -t)) - - if plen: - targ = symstack[-plen-1:] - targ[0] = sym - try: - sym.lineno = targ[1].lineno - sym.endlineno = getattr(targ[-1],"endlineno",targ[-1].lineno) - sym.lexpos = targ[1].lexpos - sym.endlexpos = getattr(targ[-1],"endlexpos",targ[-1].lexpos) - except AttributeError: - sym.lineno = 0 - del symstack[-plen:] - del statestack[-plen:] - else: - sym.lineno = 0 - targ = [ sym ] - pslice.slice = targ - pslice.pbstack = [] - # Call the grammar rule with our special slice object - p.func(pslice) - - # If there was a pushback, put that on the stack - if pslice.pbstack: - lookaheadstack.append(lookahead) - for _t in pslice.pbstack: - lookaheadstack.append(_t) - lookahead = None - - symstack.append(sym) - statestack.append(goto[statestack[-1],pname]) - continue - - if t == 0: - n = symstack[-1] - return getattr(n,"value",None) - sys.stderr.write(errorlead, "\n") - - if t == None: - if debug: - sys.stderr.write(errorlead + "\n") - # We have some kind of parsing error here. To handle - # this, we are going to push the current token onto - # the tokenstack and replace it with an 'error' token. - # If there are any synchronization rules, they may - # catch it. - # - # In addition to pushing the error token, we call call - # the user defined p_error() function if this is the - # first syntax error. This function is only called if - # errorcount == 0. - if not self.errorcount: - self.errorcount = error_count - errtoken = lookahead - if errtoken.type == '$end': - errtoken = None # End of file! - if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - - if not self.errorcount: - # User must have done some kind of panic - # mode recovery on their own. The - # returned token is the next lookahead - lookahead = tok - errtoken = None - continue - else: - if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 - if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) - else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) - else: - sys.stderr.write("yacc: Parse error in input. EOF\n") - return - - else: - self.errorcount = error_count - - # case 1: the statestack only has 1 entry on it. If we're in this state, the - # entire parse has been rolled back and we're completely hosed. The token is - # discarded and we just keep going. - - if len(statestack) <= 1 and lookahead.type != '$end': - lookahead = None - errtoken = None - # Nuke the pushback stack - del lookaheadstack[:] - continue - - # case 2: the statestack has a couple of entries on it, but we're - # at the end of the file. nuke the top entry and generate an error token - - # Start nuking entries on the stack - if lookahead.type == '$end': - # Whoa. We're really hosed here. Bail out - return - - if lookahead.type != 'error': - sym = symstack[-1] - if sym.type == 'error': - # Hmmm. Error is on top of stack, we'll just nuke input - # symbol and continue - lookahead = None - continue - t = YaccSymbol() - t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t - else: - symstack.pop() - statestack.pop() - - continue - - # Call an error function here - raise RuntimeError, "yacc: internal parser error!!!\n" - -# ----------------------------------------------------------------------------- -# === Parser Construction === -# -# The following functions and variables are used to implement the yacc() function -# itself. This is pretty hairy stuff involving lots of error checking, -# construction of LR items, kernels, and so forth. Although a lot of -# this work is done using global variables, the resulting Parser object -# is completely self contained--meaning that it is safe to repeatedly -# call yacc() with different grammars in the same application. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# validate_file() -# -# This function checks to see if there are duplicated p_rulename() functions -# in the parser module file. Without this function, it is really easy for -# users to make mistakes by cutting and pasting code fragments (and it's a real -# bugger to try and figure out why the resulting parser doesn't work). Therefore, -# we just do a little regular expression pattern matching of def statements -# to try and detect duplicates. -# ----------------------------------------------------------------------------- - -def validate_file(filename): - base,ext = os.path.splitext(filename) - if ext != '.py': return 1 # No idea. Assume it's okay. - - try: - f = open(filename) - lines = f.readlines() - f.close() - except IOError: - return 1 # Oh well - - # Match def p_funcname( - fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') - counthash = { } - linen = 1 - noerror = 1 - for l in lines: - m = fre.match(l) - if m: - name = m.group(1) - prev = counthash.get(name) - if not prev: - counthash[name] = linen - else: - sys.stderr.write("%s:%d: Function %s redefined. Previously defined on line %d\n" % (filename,linen,name,prev)) - noerror = 0 - linen += 1 - return noerror - -# This function looks for functions that might be grammar rules, but which don't have the proper p_suffix. -def validate_dict(d): - for n,v in d.items(): - if n[0:2] == 'p_' and type(v) in (types.FunctionType, types.MethodType): continue - if n[0:2] == 't_': continue - - if n[0:2] == 'p_': - sys.stderr.write("yacc: Warning. '%s' not defined as a function\n" % n) - if 1 and isinstance(v,types.FunctionType) and v.func_code.co_argcount == 1: - try: - doc = v.__doc__.split(" ") - if doc[1] == ':': - sys.stderr.write("%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix.\n" % (v.func_code.co_filename, v.func_code.co_firstlineno,n)) - except StandardError: - pass - -# ----------------------------------------------------------------------------- -# === GRAMMAR FUNCTIONS === -# -# The following global variables and functions are used to store, manipulate, -# and verify the grammar rules specified by the user. -# ----------------------------------------------------------------------------- - -# Initialize all of the global variables used during grammar construction -def initialize_vars(): - global Productions, Prodnames, Prodmap, Terminals - global Nonterminals, First, Follow, Precedence, LRitems - global Errorfunc, Signature, Requires - - Productions = [None] # A list of all of the productions. The first - # entry is always reserved for the purpose of - # building an augmented grammar - - Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all - # productions of that nonterminal. - - Prodmap = { } # A dictionary that is only used to detect duplicate - # productions. - - Terminals = { } # A dictionary mapping the names of terminal symbols to a - # list of the rules where they are used. - - Nonterminals = { } # A dictionary mapping names of nonterminals to a list - # of rule numbers where they are used. - - First = { } # A dictionary of precomputed FIRST(x) symbols - - Follow = { } # A dictionary of precomputed FOLLOW(x) symbols - - Precedence = { } # Precedence rules for each terminal. Contains tuples of the - # form ('right',level) or ('nonassoc', level) or ('left',level) - - LRitems = [ ] # A list of all LR items for the grammar. These are the - # productions with the "dot" like E -> E . PLUS E - - Errorfunc = None # User defined error handler - - Signature = md5.new() # Digital signature of the grammar rules, precedence - # and other information. Used to determined when a - # parsing table needs to be regenerated. - - Requires = { } # Requires list - - # File objects used when creating the parser.out debugging file - global _vf, _vfc - _vf = cStringIO.StringIO() - _vfc = cStringIO.StringIO() - -# ----------------------------------------------------------------------------- -# class Production: -# -# This class stores the raw information about a single production or grammar rule. -# It has a few required attributes: -# -# name - Name of the production (nonterminal) -# prod - A list of symbols making up its production -# number - Production number. -# -# In addition, a few additional attributes are used to help with debugging or -# optimization of table generation. -# -# file - File where production action is defined. -# lineno - Line number where action is defined -# func - Action function -# prec - Precedence level -# lr_next - Next LR item. Example, if we are ' E -> E . PLUS E' -# then lr_next refers to 'E -> E PLUS . E' -# lr_index - LR item index (location of the ".") in the prod list. -# lookaheads - LALR lookahead symbols for this item -# len - Length of the production (number of symbols on right hand side) -# ----------------------------------------------------------------------------- - -class Production: - def __init__(self,**kw): - for k,v in kw.items(): - setattr(self,k,v) - self.lr_index = -1 - self.lr0_added = 0 # Flag indicating whether or not added to LR0 closure - self.lr1_added = 0 # Flag indicating whether or not added to LR1 - self.usyms = [ ] - self.lookaheads = { } - self.lk_added = { } - self.setnumbers = [ ] - - def __str__(self): - if self.prod: - s = "%s -> %s" % (self.name," ".join(self.prod)) - else: - s = "%s -> <empty>" % self.name - return s - - def __repr__(self): - return str(self) - - # Compute lr_items from the production - def lr_item(self,n): - if n > len(self.prod): return None - p = Production() - p.name = self.name - p.prod = list(self.prod) - p.number = self.number - p.lr_index = n - p.lookaheads = { } - p.setnumbers = self.setnumbers - p.prod.insert(n,".") - p.prod = tuple(p.prod) - p.len = len(p.prod) - p.usyms = self.usyms - - # Precompute list of productions immediately following - try: - p.lrafter = Prodnames[p.prod[n+1]] - except (IndexError,KeyError),e: - p.lrafter = [] - try: - p.lrbefore = p.prod[n-1] - except IndexError: - p.lrbefore = None - - return p - -class MiniProduction: - pass - -# regex matching identifiers -_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') - -# ----------------------------------------------------------------------------- -# add_production() -# -# Given an action function, this function assembles a production rule. -# The production rule is assumed to be found in the function's docstring. -# This rule has the general syntax: -# -# name1 ::= production1 -# | production2 -# | production3 -# ... -# | productionn -# name2 ::= production1 -# | production2 -# ... -# ----------------------------------------------------------------------------- - -def add_production(f,file,line,prodname,syms): - - if Terminals.has_key(prodname): - sys.stderr.write("%s:%d: Illegal rule name '%s'. Already defined as a token.\n" % (file,line,prodname)) - return -1 - if prodname == 'error': - sys.stderr.write("%s:%d: Illegal rule name '%s'. error is a reserved word.\n" % (file,line,prodname)) - return -1 - - if not _is_identifier.match(prodname): - sys.stderr.write("%s:%d: Illegal rule name '%s'\n" % (file,line,prodname)) - return -1 - - for x in range(len(syms)): - s = syms[x] - if s[0] in "'\"": - try: - c = eval(s) - if (len(c) > 1): - sys.stderr.write("%s:%d: Literal token %s in rule '%s' may only be a single character\n" % (file,line,s, prodname)) - return -1 - if not Terminals.has_key(c): - Terminals[c] = [] - syms[x] = c - continue - except SyntaxError: - pass - if not _is_identifier.match(s) and s != '%prec': - sys.stderr.write("%s:%d: Illegal name '%s' in rule '%s'\n" % (file,line,s, prodname)) - return -1 - - # See if the rule is already in the rulemap - map = "%s -> %s" % (prodname,syms) - if Prodmap.has_key(map): - m = Prodmap[map] - sys.stderr.write("%s:%d: Duplicate rule %s.\n" % (file,line, m)) - sys.stderr.write("%s:%d: Previous definition at %s:%d\n" % (file,line, m.file, m.line)) - return -1 - - p = Production() - p.name = prodname - p.prod = syms - p.file = file - p.line = line - p.func = f - p.number = len(Productions) - - - Productions.append(p) - Prodmap[map] = p - if not Nonterminals.has_key(prodname): - Nonterminals[prodname] = [ ] - - # Add all terminals to Terminals - i = 0 - while i < len(p.prod): - t = p.prod[i] - if t == '%prec': - try: - precname = p.prod[i+1] - except IndexError: - sys.stderr.write("%s:%d: Syntax error. Nothing follows %%prec.\n" % (p.file,p.line)) - return -1 - - prec = Precedence.get(precname,None) - if not prec: - sys.stderr.write("%s:%d: Nothing known about the precedence of '%s'\n" % (p.file,p.line,precname)) - return -1 - else: - p.prec = prec - del p.prod[i] - del p.prod[i] - continue - - if Terminals.has_key(t): - Terminals[t].append(p.number) - # Is a terminal. We'll assign a precedence to p based on this - if not hasattr(p,"prec"): - p.prec = Precedence.get(t,('right',0)) - else: - if not Nonterminals.has_key(t): - Nonterminals[t] = [ ] - Nonterminals[t].append(p.number) - i += 1 - - if not hasattr(p,"prec"): - p.prec = ('right',0) - - # Set final length of productions - p.len = len(p.prod) - p.prod = tuple(p.prod) - - # Calculate unique syms in the production - p.usyms = [ ] - for s in p.prod: - if s not in p.usyms: - p.usyms.append(s) - - # Add to the global productions list - try: - Prodnames[p.name].append(p) - except KeyError: - Prodnames[p.name] = [ p ] - return 0 - -# Given a raw rule function, this function rips out its doc string -# and adds rules to the grammar - -def add_function(f): - line = f.func_code.co_firstlineno - file = f.func_code.co_filename - error = 0 - - if isinstance(f,types.MethodType): - reqdargs = 2 - else: - reqdargs = 1 - - if f.func_code.co_argcount > reqdargs: - sys.stderr.write("%s:%d: Rule '%s' has too many arguments.\n" % (file,line,f.__name__)) - return -1 - - if f.func_code.co_argcount < reqdargs: - sys.stderr.write("%s:%d: Rule '%s' requires an argument.\n" % (file,line,f.__name__)) - return -1 - - if f.__doc__: - # Split the doc string into lines - pstrings = f.__doc__.splitlines() - lastp = None - dline = line - for ps in pstrings: - dline += 1 - p = ps.split() - if not p: continue - try: - if p[0] == '|': - # This is a continuation of a previous rule - if not lastp: - sys.stderr.write("%s:%d: Misplaced '|'.\n" % (file,dline)) - return -1 - prodname = lastp - if len(p) > 1: - syms = p[1:] - else: - syms = [ ] - else: - prodname = p[0] - lastp = prodname - assign = p[1] - if len(p) > 2: - syms = p[2:] - else: - syms = [ ] - if assign != ':' and assign != '::=': - sys.stderr.write("%s:%d: Syntax error. Expected ':'\n" % (file,dline)) - return -1 - - - e = add_production(f,file,dline,prodname,syms) - error += e - - - except StandardError: - sys.stderr.write("%s:%d: Syntax error in rule '%s'\n" % (file,dline,ps)) - error -= 1 - else: - sys.stderr.write("%s:%d: No documentation string specified in function '%s'\n" % (file,line,f.__name__)) - return error - - -# Cycle checking code (Michael Dyck) - -def compute_reachable(): - ''' - Find each symbol that can be reached from the start symbol. - Print a warning for any nonterminals that can't be reached. - (Unused terminals have already had their warning.) - ''' - Reachable = { } - for s in Terminals.keys() + Nonterminals.keys(): - Reachable[s] = 0 - - mark_reachable_from( Productions[0].prod[0], Reachable ) - - for s in Nonterminals.keys(): - if not Reachable[s]: - sys.stderr.write("yacc: Symbol '%s' is unreachable.\n" % s) - -def mark_reachable_from(s, Reachable): - ''' - Mark all symbols that are reachable from symbol s. - ''' - if Reachable[s]: - # We've already reached symbol s. - return - Reachable[s] = 1 - for p in Prodnames.get(s,[]): - for r in p.prod: - mark_reachable_from(r, Reachable) - -# ----------------------------------------------------------------------------- -# compute_terminates() -# -# This function looks at the various parsing rules and tries to detect -# infinite recursion cycles (grammar rules where there is no possible way -# to derive a string of only terminals). -# ----------------------------------------------------------------------------- -def compute_terminates(): - ''' - Raise an error for any symbols that don't terminate. - ''' - Terminates = {} - - # Terminals: - for t in Terminals.keys(): - Terminates[t] = 1 - - Terminates['$end'] = 1 - - # Nonterminals: - - # Initialize to false: - for n in Nonterminals.keys(): - Terminates[n] = 0 - - # Then propagate termination until no change: - while 1: - some_change = 0 - for (n,pl) in Prodnames.items(): - # Nonterminal n terminates iff any of its productions terminates. - for p in pl: - # Production p terminates iff all of its rhs symbols terminate. - for s in p.prod: - if not Terminates[s]: - # The symbol s does not terminate, - # so production p does not terminate. - p_terminates = 0 - break - else: - # didn't break from the loop, - # so every symbol s terminates - # so production p terminates. - p_terminates = 1 - - if p_terminates: - # symbol n terminates! - if not Terminates[n]: - Terminates[n] = 1 - some_change = 1 - # Don't need to consider any more productions for this n. - break - - if not some_change: - break - - some_error = 0 - for (s,terminates) in Terminates.items(): - if not terminates: - if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error': - # s is used-but-not-defined, and we've already warned of that, - # so it would be overkill to say that it's also non-terminating. - pass - else: - sys.stderr.write("yacc: Infinite recursion detected for symbol '%s'.\n" % s) - some_error = 1 - - return some_error - -# ----------------------------------------------------------------------------- -# verify_productions() -# -# This function examines all of the supplied rules to see if they seem valid. -# ----------------------------------------------------------------------------- -def verify_productions(cycle_check=1): - error = 0 - for p in Productions: - if not p: continue - - for s in p.prod: - if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error': - sys.stderr.write("%s:%d: Symbol '%s' used, but not defined as a token or a rule.\n" % (p.file,p.line,s)) - error = 1 - continue - - unused_tok = 0 - # Now verify all of the tokens - if yaccdebug: - _vf.write("Unused terminals:\n\n") - for s,v in Terminals.items(): - if s != 'error' and not v: - sys.stderr.write("yacc: Warning. Token '%s' defined, but not used.\n" % s) - if yaccdebug: _vf.write(" %s\n"% s) - unused_tok += 1 - - # Print out all of the productions - if yaccdebug: - _vf.write("\nGrammar\n\n") - for i in range(1,len(Productions)): - _vf.write("Rule %-5d %s\n" % (i, Productions[i])) - - unused_prod = 0 - # Verify the use of all productions - for s,v in Nonterminals.items(): - if not v: - p = Prodnames[s][0] - sys.stderr.write("%s:%d: Warning. Rule '%s' defined, but not used.\n" % (p.file,p.line, s)) - unused_prod += 1 - - - if unused_tok == 1: - sys.stderr.write("yacc: Warning. There is 1 unused token.\n") - if unused_tok > 1: - sys.stderr.write("yacc: Warning. There are %d unused tokens.\n" % unused_tok) - - if unused_prod == 1: - sys.stderr.write("yacc: Warning. There is 1 unused rule.\n") - if unused_prod > 1: - sys.stderr.write("yacc: Warning. There are %d unused rules.\n" % unused_prod) - - if yaccdebug: - _vf.write("\nTerminals, with rules where they appear\n\n") - ks = Terminals.keys() - ks.sort() - for k in ks: - _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Terminals[k]]))) - _vf.write("\nNonterminals, with rules where they appear\n\n") - ks = Nonterminals.keys() - ks.sort() - for k in ks: - _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Nonterminals[k]]))) - - if (cycle_check): - compute_reachable() - error += compute_terminates() -# error += check_cycles() - return error - -# ----------------------------------------------------------------------------- -# build_lritems() -# -# This function walks the list of productions and builds a complete set of the -# LR items. The LR items are stored in two ways: First, they are uniquely -# numbered and placed in the list _lritems. Second, a linked list of LR items -# is built for each production. For example: -# -# E -> E PLUS E -# -# Creates the list -# -# [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] -# ----------------------------------------------------------------------------- - -def build_lritems(): - for p in Productions: - lastlri = p - lri = p.lr_item(0) - i = 0 - while 1: - lri = p.lr_item(i) - lastlri.lr_next = lri - if not lri: break - lri.lr_num = len(LRitems) - LRitems.append(lri) - lastlri = lri - i += 1 - - # In order for the rest of the parser generator to work, we need to - # guarantee that no more lritems are generated. Therefore, we nuke - # the p.lr_item method. (Only used in debugging) - # Production.lr_item = None - -# ----------------------------------------------------------------------------- -# add_precedence() -# -# Given a list of precedence rules, add to the precedence table. -# ----------------------------------------------------------------------------- - -def add_precedence(plist): - plevel = 0 - error = 0 - for p in plist: - plevel += 1 - try: - prec = p[0] - terms = p[1:] - if prec != 'left' and prec != 'right' and prec != 'nonassoc': - sys.stderr.write("yacc: Invalid precedence '%s'\n" % prec) - return -1 - for t in terms: - if Precedence.has_key(t): - sys.stderr.write("yacc: Precedence already specified for terminal '%s'\n" % t) - error += 1 - continue - Precedence[t] = (prec,plevel) - except: - sys.stderr.write("yacc: Invalid precedence table.\n") - error += 1 - - return error - -# ----------------------------------------------------------------------------- -# augment_grammar() -# -# Compute the augmented grammar. This is just a rule S' -> start where start -# is the starting symbol. -# ----------------------------------------------------------------------------- - -def augment_grammar(start=None): - if not start: - start = Productions[1].name - Productions[0] = Production(name="S'",prod=[start],number=0,len=1,prec=('right',0),func=None) - Productions[0].usyms = [ start ] - Nonterminals[start].append(0) - - -# ------------------------------------------------------------------------- -# first() -# -# Compute the value of FIRST1(beta) where beta is a tuple of symbols. -# -# During execution of compute_first1, the result may be incomplete. -# Afterward (e.g., when called from compute_follow()), it will be complete. -# ------------------------------------------------------------------------- -def first(beta): - - # We are computing First(x1,x2,x3,...,xn) - result = [ ] - for x in beta: - x_produces_empty = 0 - - # Add all the non-<empty> symbols of First[x] to the result. - for f in First[x]: - if f == '<empty>': - x_produces_empty = 1 - else: - if f not in result: result.append(f) - - if x_produces_empty: - # We have to consider the next x in beta, - # i.e. stay in the loop. - pass - else: - # We don't have to consider any further symbols in beta. - break - else: - # There was no 'break' from the loop, - # so x_produces_empty was true for all x in beta, - # so beta produces empty as well. - result.append('<empty>') - - return result - - -# FOLLOW(x) -# Given a non-terminal. This function computes the set of all symbols -# that might follow it. Dragon book, p. 189. - -def compute_follow(start=None): - # Add '$end' to the follow list of the start symbol - for k in Nonterminals.keys(): - Follow[k] = [ ] - - if not start: - start = Productions[1].name - - Follow[start] = [ '$end' ] - - while 1: - didadd = 0 - for p in Productions[1:]: - # Here is the production set - for i in range(len(p.prod)): - B = p.prod[i] - if Nonterminals.has_key(B): - # Okay. We got a non-terminal in a production - fst = first(p.prod[i+1:]) - hasempty = 0 - for f in fst: - if f != '<empty>' and f not in Follow[B]: - Follow[B].append(f) - didadd = 1 - if f == '<empty>': - hasempty = 1 - if hasempty or i == (len(p.prod)-1): - # Add elements of follow(a) to follow(b) - for f in Follow[p.name]: - if f not in Follow[B]: - Follow[B].append(f) - didadd = 1 - if not didadd: break - - if 0 and yaccdebug: - _vf.write('\nFollow:\n') - for k in Nonterminals.keys(): - _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Follow[k]]))) - -# ------------------------------------------------------------------------- -# compute_first1() -# -# Compute the value of FIRST1(X) for all symbols -# ------------------------------------------------------------------------- -def compute_first1(): - - # Terminals: - for t in Terminals.keys(): - First[t] = [t] - - First['$end'] = ['$end'] - First['#'] = ['#'] # what's this for? - - # Nonterminals: - - # Initialize to the empty set: - for n in Nonterminals.keys(): - First[n] = [] - - # Then propagate symbols until no change: - while 1: - some_change = 0 - for n in Nonterminals.keys(): - for p in Prodnames[n]: - for f in first(p.prod): - if f not in First[n]: - First[n].append( f ) - some_change = 1 - if not some_change: - break - - if 0 and yaccdebug: - _vf.write('\nFirst:\n') - for k in Nonterminals.keys(): - _vf.write("%-20s : %s\n" % - (k, " ".join([str(s) for s in First[k]]))) - -# ----------------------------------------------------------------------------- -# === SLR Generation === -# -# The following functions are used to construct SLR (Simple LR) parsing tables -# as described on p.221-229 of the dragon book. -# ----------------------------------------------------------------------------- - -# Global variables for the LR parsing engine -def lr_init_vars(): - global _lr_action, _lr_goto, _lr_method - global _lr_goto_cache, _lr0_cidhash - - _lr_action = { } # Action table - _lr_goto = { } # Goto table - _lr_method = "Unknown" # LR method used - _lr_goto_cache = { } - _lr0_cidhash = { } - - -# Compute the LR(0) closure operation on I, where I is a set of LR(0) items. -# prodlist is a list of productions. - -_add_count = 0 # Counter used to detect cycles - -def lr0_closure(I): - global _add_count - - _add_count += 1 - prodlist = Productions - - # Add everything in I to J - J = I[:] - didadd = 1 - while didadd: - didadd = 0 - for j in J: - for x in j.lrafter: - if x.lr0_added == _add_count: continue - # Add B --> .G to J - J.append(x.lr_next) - x.lr0_added = _add_count - didadd = 1 - - return J - -# Compute the LR(0) goto function goto(I,X) where I is a set -# of LR(0) items and X is a grammar symbol. This function is written -# in a way that guarantees uniqueness of the generated goto sets -# (i.e. the same goto set will never be returned as two different Python -# objects). With uniqueness, we can later do fast set comparisons using -# id(obj) instead of element-wise comparison. - -def lr0_goto(I,x): - # First we look for a previously cached entry - g = _lr_goto_cache.get((id(I),x),None) - if g: return g - - # Now we generate the goto set in a way that guarantees uniqueness - # of the result - - s = _lr_goto_cache.get(x,None) - if not s: - s = { } - _lr_goto_cache[x] = s - - gs = [ ] - for p in I: - n = p.lr_next - if n and n.lrbefore == x: - s1 = s.get(id(n),None) - if not s1: - s1 = { } - s[id(n)] = s1 - gs.append(n) - s = s1 - g = s.get('$end',None) - if not g: - if gs: - g = lr0_closure(gs) - s['$end'] = g - else: - s['$end'] = gs - _lr_goto_cache[(id(I),x)] = g - return g - -_lr0_cidhash = { } - -# Compute the LR(0) sets of item function -def lr0_items(): - - C = [ lr0_closure([Productions[0].lr_next]) ] - i = 0 - for I in C: - _lr0_cidhash[id(I)] = i - i += 1 - - # Loop over the items in C and each grammar symbols - i = 0 - while i < len(C): - I = C[i] - i += 1 - - # Collect all of the symbols that could possibly be in the goto(I,X) sets - asyms = { } - for ii in I: - for s in ii.usyms: - asyms[s] = None - - for x in asyms.keys(): - g = lr0_goto(I,x) - if not g: continue - if _lr0_cidhash.has_key(id(g)): continue - _lr0_cidhash[id(g)] = len(C) - C.append(g) - - return C - -# ----------------------------------------------------------------------------- -# ==== LALR(1) Parsing ==== -# -# LALR(1) parsing is almost exactly the same as SLR except that instead of -# relying upon Follow() sets when performing reductions, a more selective -# lookahead set that incorporates the state of the LR(0) machine is utilized. -# Thus, we mainly just have to focus on calculating the lookahead sets. -# -# The method used here is due to DeRemer and Pennelo (1982). -# -# DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) -# Lookahead Sets", ACM Transactions on Programming Languages and Systems, -# Vol. 4, No. 4, Oct. 1982, pp. 615-649 -# -# Further details can also be found in: -# -# J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", -# McGraw-Hill Book Company, (1985). -# -# Note: This implementation is a complete replacement of the LALR(1) -# implementation in PLY-1.x releases. That version was based on -# a less efficient algorithm and it had bugs in its implementation. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# compute_nullable_nonterminals() -# -# Creates a dictionary containing all of the non-terminals that might produce -# an empty production. -# ----------------------------------------------------------------------------- - -def compute_nullable_nonterminals(): - nullable = {} - num_nullable = 0 - while 1: - for p in Productions[1:]: - if p.len == 0: - nullable[p.name] = 1 - continue - for t in p.prod: - if not nullable.has_key(t): break - else: - nullable[p.name] = 1 - if len(nullable) == num_nullable: break - num_nullable = len(nullable) - return nullable - -# ----------------------------------------------------------------------------- -# find_nonterminal_trans(C) -# -# Given a set of LR(0) items, this functions finds all of the non-terminal -# transitions. These are transitions in which a dot appears immediately before -# a non-terminal. Returns a list of tuples of the form (state,N) where state -# is the state number and N is the nonterminal symbol. -# -# The input C is the set of LR(0) items. -# ----------------------------------------------------------------------------- - -def find_nonterminal_transitions(C): - trans = [] - for state in range(len(C)): - for p in C[state]: - if p.lr_index < p.len - 1: - t = (state,p.prod[p.lr_index+1]) - if Nonterminals.has_key(t[1]): - if t not in trans: trans.append(t) - state = state + 1 - return trans - -# ----------------------------------------------------------------------------- -# dr_relation() -# -# Computes the DR(p,A) relationships for non-terminal transitions. The input -# is a tuple (state,N) where state is a number and N is a nonterminal symbol. -# -# Returns a list of terminals. -# ----------------------------------------------------------------------------- - -def dr_relation(C,trans,nullable): - dr_set = { } - state,N = trans - terms = [] - - g = lr0_goto(C[state],N) - for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index+1] - if Terminals.has_key(a): - if a not in terms: terms.append(a) - - # This extra bit is to handle the start state - if state == 0 and N == Productions[0].prod[0]: - terms.append('$end') - - return terms - -# ----------------------------------------------------------------------------- -# reads_relation() -# -# Computes the READS() relation (p,A) READS (t,C). -# ----------------------------------------------------------------------------- - -def reads_relation(C, trans, empty): - # Look for empty transitions - rel = [] - state, N = trans - - g = lr0_goto(C[state],N) - j = _lr0_cidhash.get(id(g),-1) - for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index + 1] - if empty.has_key(a): - rel.append((j,a)) - - return rel - -# ----------------------------------------------------------------------------- -# compute_lookback_includes() -# -# Determines the lookback and includes relations -# -# LOOKBACK: -# -# This relation is determined by running the LR(0) state machine forward. -# For example, starting with a production "N : . A B C", we run it forward -# to obtain "N : A B C ." We then build a relationship between this final -# state and the starting state. These relationships are stored in a dictionary -# lookdict. -# -# INCLUDES: -# -# Computes the INCLUDE() relation (p,A) INCLUDES (p',B). -# -# This relation is used to determine non-terminal transitions that occur -# inside of other non-terminal transition states. (p,A) INCLUDES (p', B) -# if the following holds: -# -# B -> LAT, where T -> epsilon and p' -L-> p -# -# L is essentially a prefix (which may be empty), T is a suffix that must be -# able to derive an empty string. State p' must lead to state p with the string L. -# -# ----------------------------------------------------------------------------- - -def compute_lookback_includes(C,trans,nullable): - - lookdict = {} # Dictionary of lookback relations - includedict = {} # Dictionary of include relations - - # Make a dictionary of non-terminal transitions - dtrans = {} - for t in trans: - dtrans[t] = 1 - - # Loop over all transitions and compute lookbacks and includes - for state,N in trans: - lookb = [] - includes = [] - for p in C[state]: - if p.name != N: continue - - # Okay, we have a name match. We now follow the production all the way - # through the state machine until we get the . on the right hand side - - lr_index = p.lr_index - j = state - while lr_index < p.len - 1: - lr_index = lr_index + 1 - t = p.prod[lr_index] - - # Check to see if this symbol and state are a non-terminal transition - if dtrans.has_key((j,t)): - # Yes. Okay, there is some chance that this is an includes relation - # the only way to know for certain is whether the rest of the - # production derives empty - - li = lr_index + 1 - while li < p.len: - if Terminals.has_key(p.prod[li]): break # No forget it - if not nullable.has_key(p.prod[li]): break - li = li + 1 - else: - # Appears to be a relation between (j,t) and (state,N) - includes.append((j,t)) - - g = lr0_goto(C[j],t) # Go to next set - j = _lr0_cidhash.get(id(g),-1) # Go to next state - - # When we get here, j is the final state, now we have to locate the production - for r in C[j]: - if r.name != p.name: continue - if r.len != p.len: continue - i = 0 - # This look is comparing a production ". A B C" with "A B C ." - while i < r.lr_index: - if r.prod[i] != p.prod[i+1]: break - i = i + 1 - else: - lookb.append((j,r)) - for i in includes: - if not includedict.has_key(i): includedict[i] = [] - includedict[i].append((state,N)) - lookdict[(state,N)] = lookb - - return lookdict,includedict - -# ----------------------------------------------------------------------------- -# digraph() -# traverse() -# -# The following two functions are used to compute set valued functions -# of the form: -# -# F(x) = F'(x) U U{F(y) | x R y} -# -# This is used to compute the values of Read() sets as well as FOLLOW sets -# in LALR(1) generation. -# -# Inputs: X - An input set -# R - A relation -# FP - Set-valued function -# ------------------------------------------------------------------------------ - -def digraph(X,R,FP): - N = { } - for x in X: - N[x] = 0 - stack = [] - F = { } - for x in X: - if N[x] == 0: traverse(x,N,stack,F,X,R,FP) - return F - -def traverse(x,N,stack,F,X,R,FP): - stack.append(x) - d = len(stack) - N[x] = d - F[x] = FP(x) # F(X) <- F'(x) - - rel = R(x) # Get y's related to x - for y in rel: - if N[y] == 0: - traverse(y,N,stack,F,X,R,FP) - N[x] = min(N[x],N[y]) - for a in F.get(y,[]): - if a not in F[x]: F[x].append(a) - if N[x] == d: - N[stack[-1]] = sys.maxint - F[stack[-1]] = F[x] - element = stack.pop() - while element != x: - N[stack[-1]] = sys.maxint - F[stack[-1]] = F[x] - element = stack.pop() - -# ----------------------------------------------------------------------------- -# compute_read_sets() -# -# Given a set of LR(0) items, this function computes the read sets. -# -# Inputs: C = Set of LR(0) items -# ntrans = Set of nonterminal transitions -# nullable = Set of empty transitions -# -# Returns a set containing the read sets -# ----------------------------------------------------------------------------- - -def compute_read_sets(C, ntrans, nullable): - FP = lambda x: dr_relation(C,x,nullable) - R = lambda x: reads_relation(C,x,nullable) - F = digraph(ntrans,R,FP) - return F - -# ----------------------------------------------------------------------------- -# compute_follow_sets() -# -# Given a set of LR(0) items, a set of non-terminal transitions, a readset, -# and an include set, this function computes the follow sets -# -# Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} -# -# Inputs: -# ntrans = Set of nonterminal transitions -# readsets = Readset (previously computed) -# inclsets = Include sets (previously computed) -# -# Returns a set containing the follow sets -# ----------------------------------------------------------------------------- - -def compute_follow_sets(ntrans,readsets,inclsets): - FP = lambda x: readsets[x] - R = lambda x: inclsets.get(x,[]) - F = digraph(ntrans,R,FP) - return F - -# ----------------------------------------------------------------------------- -# add_lookaheads() -# -# Attaches the lookahead symbols to grammar rules. -# -# Inputs: lookbacks - Set of lookback relations -# followset - Computed follow set -# -# This function directly attaches the lookaheads to productions contained -# in the lookbacks set -# ----------------------------------------------------------------------------- - -def add_lookaheads(lookbacks,followset): - for trans,lb in lookbacks.items(): - # Loop over productions in lookback - for state,p in lb: - if not p.lookaheads.has_key(state): - p.lookaheads[state] = [] - f = followset.get(trans,[]) - for a in f: - if a not in p.lookaheads[state]: p.lookaheads[state].append(a) - -# ----------------------------------------------------------------------------- -# add_lalr_lookaheads() -# -# This function does all of the work of adding lookahead information for use -# with LALR parsing -# ----------------------------------------------------------------------------- - -def add_lalr_lookaheads(C): - # Determine all of the nullable nonterminals - nullable = compute_nullable_nonterminals() - - # Find all non-terminal transitions - trans = find_nonterminal_transitions(C) - - # Compute read sets - readsets = compute_read_sets(C,trans,nullable) - - # Compute lookback/includes relations - lookd, included = compute_lookback_includes(C,trans,nullable) - - # Compute LALR FOLLOW sets - followsets = compute_follow_sets(trans,readsets,included) - - # Add all of the lookaheads - add_lookaheads(lookd,followsets) - -# ----------------------------------------------------------------------------- -# lr_parse_table() -# -# This function constructs the parse tables for SLR or LALR -# ----------------------------------------------------------------------------- -def lr_parse_table(method): - global _lr_method - goto = _lr_goto # Goto array - action = _lr_action # Action array - actionp = { } # Action production array (temporary) - - _lr_method = method - - n_srconflict = 0 - n_rrconflict = 0 - - if yaccdebug: - sys.stderr.write("yacc: Generating %s parsing table...\n" % method) - _vf.write("\n\nParsing method: %s\n\n" % method) - - # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items - # This determines the number of states - - C = lr0_items() - - if method == 'LALR': - add_lalr_lookaheads(C) - - # Build the parser table, state by state - st = 0 - for I in C: - # Loop over each production in I - actlist = [ ] # List of actions - - if yaccdebug: - _vf.write("\nstate %d\n\n" % st) - for p in I: - _vf.write(" (%d) %s\n" % (p.number, str(p))) - _vf.write("\n") - - for p in I: - try: - if p.prod[-1] == ".": - if p.name == "S'": - # Start symbol. Accept! - action[st,"$end"] = 0 - actionp[st,"$end"] = p - else: - # We are at the end of a production. Reduce! - if method == 'LALR': - laheads = p.lookaheads[st] - else: - laheads = Follow[p.name] - for a in laheads: - actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) - r = action.get((st,a),None) - if r is not None: - # Whoa. Have a shift/reduce or reduce/reduce conflict - if r > 0: - # Need to decide on shift or reduce here - # By default we favor shifting. Need to add - # some precedence rules here. - sprec,slevel = Productions[actionp[st,a].number].prec - rprec,rlevel = Precedence.get(a,('right',0)) - if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): - # We really need to reduce here. - action[st,a] = -p.number - actionp[st,a] = p - if not slevel and not rlevel: - _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) - _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) - n_srconflict += 1 - elif (slevel == rlevel) and (rprec == 'nonassoc'): - action[st,a] = None - else: - # Hmmm. Guess we'll keep the shift - if not rlevel: - _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) - _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) - n_srconflict +=1 - elif r < 0: - # Reduce/reduce conflict. In this case, we favor the rule - # that was defined first in the grammar file - oldp = Productions[-r] - pp = Productions[p.number] - if oldp.line > pp.line: - action[st,a] = -p.number - actionp[st,a] = p - # sys.stderr.write("Reduce/reduce conflict in state %d\n" % st) - n_rrconflict += 1 - _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a])) - _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a])) - else: - sys.stderr.write("Unknown conflict in state %d\n" % st) - else: - action[st,a] = -p.number - actionp[st,a] = p - else: - i = p.lr_index - a = p.prod[i+1] # Get symbol right after the "." - if Terminals.has_key(a): - g = lr0_goto(I,a) - j = _lr0_cidhash.get(id(g),-1) - if j >= 0: - # We are in a shift state - actlist.append((a,p,"shift and go to state %d" % j)) - r = action.get((st,a),None) - if r is not None: - # Whoa have a shift/reduce or shift/shift conflict - if r > 0: - if r != j: - sys.stderr.write("Shift/shift conflict in state %d\n" % st) - elif r < 0: - # Do a precedence check. - # - if precedence of reduce rule is higher, we reduce. - # - if precedence of reduce is same and left assoc, we reduce. - # - otherwise we shift - rprec,rlevel = Productions[actionp[st,a].number].prec - sprec,slevel = Precedence.get(a,('right',0)) - if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')): - # We decide to shift here... highest precedence to shift - action[st,a] = j - actionp[st,a] = p - if not rlevel: - n_srconflict += 1 - _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) - _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) - elif (slevel == rlevel) and (rprec == 'nonassoc'): - action[st,a] = None - else: - # Hmmm. Guess we'll keep the reduce - if not slevel and not rlevel: - n_srconflict +=1 - _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) - _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) - - else: - sys.stderr.write("Unknown conflict in state %d\n" % st) - else: - action[st,a] = j - actionp[st,a] = p - - except StandardError,e: - raise YaccError, "Hosed in lr_parse_table", e - - # Print the actions associated with each terminal - if yaccdebug: - _actprint = { } - for a,p,m in actlist: - if action.has_key((st,a)): - if p is actionp[st,a]: - _vf.write(" %-15s %s\n" % (a,m)) - _actprint[(a,m)] = 1 - _vf.write("\n") - for a,p,m in actlist: - if action.has_key((st,a)): - if p is not actionp[st,a]: - if not _actprint.has_key((a,m)): - _vf.write(" ! %-15s [ %s ]\n" % (a,m)) - _actprint[(a,m)] = 1 - - # Construct the goto table for this state - if yaccdebug: - _vf.write("\n") - nkeys = { } - for ii in I: - for s in ii.usyms: - if Nonterminals.has_key(s): - nkeys[s] = None - for n in nkeys.keys(): - g = lr0_goto(I,n) - j = _lr0_cidhash.get(id(g),-1) - if j >= 0: - goto[st,n] = j - if yaccdebug: - _vf.write(" %-30s shift and go to state %d\n" % (n,j)) - - st += 1 - - if yaccdebug: - if n_srconflict == 1: - sys.stderr.write("yacc: %d shift/reduce conflict\n" % n_srconflict) - if n_srconflict > 1: - sys.stderr.write("yacc: %d shift/reduce conflicts\n" % n_srconflict) - if n_rrconflict == 1: - sys.stderr.write("yacc: %d reduce/reduce conflict\n" % n_rrconflict) - if n_rrconflict > 1: - sys.stderr.write("yacc: %d reduce/reduce conflicts\n" % n_rrconflict) - -# ----------------------------------------------------------------------------- -# ==== LR Utility functions ==== -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# _lr_write_tables() -# -# This function writes the LR parsing tables to a file -# ----------------------------------------------------------------------------- - -def lr_write_tables(modulename=tab_module,outputdir=''): - filename = os.path.join(outputdir,modulename) + ".py" - try: - f = open(filename,"w") - - f.write(""" -# %s -# This file is automatically generated. Do not edit. - -_lr_method = %s - -_lr_signature = %s -""" % (filename, repr(_lr_method), repr(Signature.digest()))) - - # Change smaller to 0 to go back to original tables - smaller = 1 - - # Factor out names to try and make smaller - if smaller: - items = { } - - for k,v in _lr_action.items(): - i = items.get(k[1]) - if not i: - i = ([],[]) - items[k[1]] = i - i[0].append(k[0]) - i[1].append(v) - - f.write("\n_lr_action_items = {") - for k,v in items.items(): - f.write("%r:([" % k) - for i in v[0]: - f.write("%r," % i) - f.write("],[") - for i in v[1]: - f.write("%r," % i) - - f.write("]),") - f.write("}\n") - - f.write(""" -_lr_action = { } -for _k, _v in _lr_action_items.items(): - for _x,_y in zip(_v[0],_v[1]): - _lr_action[(_x,_k)] = _y -del _lr_action_items -""") - - else: - f.write("\n_lr_action = { "); - for k,v in _lr_action.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); - - if smaller: - # Factor out names to try and make smaller - items = { } - - for k,v in _lr_goto.items(): - i = items.get(k[1]) - if not i: - i = ([],[]) - items[k[1]] = i - i[0].append(k[0]) - i[1].append(v) - - f.write("\n_lr_goto_items = {") - for k,v in items.items(): - f.write("%r:([" % k) - for i in v[0]: - f.write("%r," % i) - f.write("],[") - for i in v[1]: - f.write("%r," % i) - - f.write("]),") - f.write("}\n") - - f.write(""" -_lr_goto = { } -for _k, _v in _lr_goto_items.items(): - for _x,_y in zip(_v[0],_v[1]): - _lr_goto[(_x,_k)] = _y -del _lr_goto_items -""") - else: - f.write("\n_lr_goto = { "); - for k,v in _lr_goto.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); - - # Write production table - f.write("_lr_productions = [\n") - for p in Productions: - if p: - if (p.func): - f.write(" (%r,%d,%r,%r,%d),\n" % (p.name, p.len, p.func.__name__,p.file,p.line)) - else: - f.write(" (%r,%d,None,None,None),\n" % (p.name, p.len)) - else: - f.write(" None,\n") - f.write("]\n") - - f.close() - - except IOError,e: - print "Unable to create '%s'" % filename - print e - return - -def lr_read_tables(module=tab_module,optimize=0): - global _lr_action, _lr_goto, _lr_productions, _lr_method - try: - exec "import %s as parsetab" % module - - if (optimize) or (Signature.digest() == parsetab._lr_signature): - _lr_action = parsetab._lr_action - _lr_goto = parsetab._lr_goto - _lr_productions = parsetab._lr_productions - _lr_method = parsetab._lr_method - return 1 - else: - return 0 - - except (ImportError,AttributeError): - return 0 - - -# Available instance types. This is used when parsers are defined by a class. -# it's a little funky because I want to preserve backwards compatibility -# with Python 2.0 where types.ObjectType is undefined. - -try: - _INSTANCETYPE = (types.InstanceType, types.ObjectType) -except AttributeError: - _INSTANCETYPE = types.InstanceType - -# ----------------------------------------------------------------------------- -# yacc(module) -# -# Build the parser module -# ----------------------------------------------------------------------------- - -def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, start=None, check_recursion=1, optimize=0,write_tables=1,debugfile=debug_file,outputdir=''): - global yaccdebug - yaccdebug = debug - - initialize_vars() - files = { } - error = 0 - - - # Add parsing method to signature - Signature.update(method) - - # If a "module" parameter was supplied, extract its dictionary. - # Note: a module may in fact be an instance as well. - - if module: - # User supplied a module object. - if isinstance(module, types.ModuleType): - ldict = module.__dict__ - elif isinstance(module, _INSTANCETYPE): - _items = [(k,getattr(module,k)) for k in dir(module)] - ldict = { } - for i in _items: - ldict[i[0]] = i[1] - else: - raise ValueError,"Expected a module" - - else: - # No module given. We might be able to get information from the caller. - # Throw an exception and unwind the traceback to get the globals - - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - f = f.f_back # Walk out to our calling function - ldict = f.f_globals # Grab its globals dictionary - - # Add starting symbol to signature - if not start: - start = ldict.get("start",None) - if start: - Signature.update(start) - - # If running in optimized mode. We're going to - - if (optimize and lr_read_tables(tabmodule,1)): - # Read parse table - del Productions[:] - for p in _lr_productions: - if not p: - Productions.append(None) - else: - m = MiniProduction() - m.name = p[0] - m.len = p[1] - m.file = p[3] - m.line = p[4] - if p[2]: - m.func = ldict[p[2]] - Productions.append(m) - - else: - # Get the tokens map - if (module and isinstance(module,_INSTANCETYPE)): - tokens = getattr(module,"tokens",None) - else: - tokens = ldict.get("tokens",None) - - if not tokens: - raise YaccError,"module does not define a list 'tokens'" - if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)): - raise YaccError,"tokens must be a list or tuple." - - # Check to see if a requires dictionary is defined. - requires = ldict.get("require",None) - if requires: - if not (isinstance(requires,types.DictType)): - raise YaccError,"require must be a dictionary." - - for r,v in requires.items(): - try: - if not (isinstance(v,types.ListType)): - raise TypeError - v1 = [x.split(".") for x in v] - Requires[r] = v1 - except StandardError: - print "Invalid specification for rule '%s' in require. Expected a list of strings" % r - - - # Build the dictionary of terminals. We a record a 0 in the - # dictionary to track whether or not a terminal is actually - # used in the grammar - - if 'error' in tokens: - print "yacc: Illegal token 'error'. Is a reserved word." - raise YaccError,"Illegal token name" - - for n in tokens: - if Terminals.has_key(n): - print "yacc: Warning. Token '%s' multiply defined." % n - Terminals[n] = [ ] - - Terminals['error'] = [ ] - - # Get the precedence map (if any) - prec = ldict.get("precedence",None) - if prec: - if not (isinstance(prec,types.ListType) or isinstance(prec,types.TupleType)): - raise YaccError,"precedence must be a list or tuple." - add_precedence(prec) - Signature.update(repr(prec)) - - for n in tokens: - if not Precedence.has_key(n): - Precedence[n] = ('right',0) # Default, right associative, 0 precedence - - # Look for error handler - ef = ldict.get('p_error',None) - if ef: - if isinstance(ef,types.FunctionType): - ismethod = 0 - elif isinstance(ef, types.MethodType): - ismethod = 1 - else: - raise YaccError,"'p_error' defined, but is not a function or method." - eline = ef.func_code.co_firstlineno - efile = ef.func_code.co_filename - files[efile] = None - - if (ef.func_code.co_argcount != 1+ismethod): - raise YaccError,"%s:%d: p_error() requires 1 argument." % (efile,eline) - global Errorfunc - Errorfunc = ef - else: - print "yacc: Warning. no p_error() function is defined." - - # Get the list of built-in functions with p_ prefix - symbols = [ldict[f] for f in ldict.keys() - if (type(ldict[f]) in (types.FunctionType, types.MethodType) and ldict[f].__name__[:2] == 'p_' - and ldict[f].__name__ != 'p_error')] - - # Check for non-empty symbols - if len(symbols) == 0: - raise YaccError,"no rules of the form p_rulename are defined." - - # Sort the symbols by line number - symbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno)) - - # Add all of the symbols to the grammar - for f in symbols: - if (add_function(f)) < 0: - error += 1 - else: - files[f.func_code.co_filename] = None - - # Make a signature of the docstrings - for f in symbols: - if f.__doc__: - Signature.update(f.__doc__) - - lr_init_vars() - - if error: - raise YaccError,"Unable to construct parser." - - if not lr_read_tables(tabmodule): - - # Validate files - for filename in files.keys(): - if not validate_file(filename): - error = 1 - - # Validate dictionary - validate_dict(ldict) - - if start and not Prodnames.has_key(start): - raise YaccError,"Bad starting symbol '%s'" % start - - augment_grammar(start) - error = verify_productions(cycle_check=check_recursion) - otherfunc = [ldict[f] for f in ldict.keys() - if (type(f) in (types.FunctionType,types.MethodType) and ldict[f].__name__[:2] != 'p_')] - - if error: - raise YaccError,"Unable to construct parser." - - build_lritems() - compute_first1() - compute_follow(start) - - if method in ['SLR','LALR']: - lr_parse_table(method) - else: - raise YaccError, "Unknown parsing method '%s'" % method - - if write_tables: - lr_write_tables(tabmodule,outputdir) - - if yaccdebug: - try: - f = open(os.path.join(outputdir,debugfile),"w") - f.write(_vfc.getvalue()) - f.write("\n\n") - f.write(_vf.getvalue()) - f.close() - except IOError,e: - print "yacc: can't create '%s'" % debugfile,e - - # Made it here. Create a parser object and set up its internal state. - # Set global parse() method to bound method of parser object. - - p = Parser("xyzzy") - p.productions = Productions - p.errorfunc = Errorfunc - p.action = _lr_action - p.goto = _lr_goto - p.method = _lr_method - p.require = Requires - - global parse - parse = p.parse - - global parser - parser = p - - # Clean up all of the globals we created - if (not optimize): - yacc_cleanup() - return p - -# yacc_cleanup function. Delete all of the global variables -# used during table construction - -def yacc_cleanup(): - global _lr_action, _lr_goto, _lr_method, _lr_goto_cache - del _lr_action, _lr_goto, _lr_method, _lr_goto_cache - - global Productions, Prodnames, Prodmap, Terminals - global Nonterminals, First, Follow, Precedence, LRitems - global Errorfunc, Signature, Requires - - del Productions, Prodnames, Prodmap, Terminals - del Nonterminals, First, Follow, Precedence, LRitems - del Errorfunc, Signature, Requires - - global _vf, _vfc - del _vf, _vfc - - -# Stub that raises an error if parsing is attempted without first calling yacc() -def parse(*args,**kwargs): - raise YaccError, "yacc: No parser built with yacc()" - diff --git a/chall/ply-2.2/doc/makedoc.py b/chall/ply-2.2/doc/makedoc.py deleted file mode 100644 index 415a53a..0000000 --- a/chall/ply-2.2/doc/makedoc.py +++ /dev/null @@ -1,194 +0,0 @@ -#!/usr/local/bin/python - -############################################################################### -# Takes a chapter as input and adds internal links and numbering to all -# of the H1, H2, H3, H4 and H5 sections. -# -# Every heading HTML tag (H1, H2 etc) is given an autogenerated name to link -# to. However, if the name is not an autogenerated name from a previous run, -# it will be kept. If it is autogenerated, it might change on subsequent runs -# of this program. Thus if you want to create links to one of the headings, -# then change the heading link name to something that does not look like an -# autogenerated link name. -############################################################################### - -import sys -import re -import string - -############################################################################### -# Functions -############################################################################### - -# Regexs for <a name="..."></a> -alink = re.compile(r"<a *name *= *\"(.*)\"></a>", re.IGNORECASE) -heading = re.compile(r"(_nn\d)", re.IGNORECASE) - -def getheadingname(m): - autogeneratedheading = True; - if m.group(1) != None: - amatch = alink.match(m.group(1)) - if amatch: - # A non-autogenerated heading - keep it - headingname = amatch.group(1) - autogeneratedheading = heading.match(headingname) - if autogeneratedheading: - # The heading name was either non-existent or autogenerated, - # We can create a new heading / change the existing heading - headingname = "%s_nn%d" % (filenamebase, nameindex) - return headingname - -############################################################################### -# Main program -############################################################################### - -if len(sys.argv) != 2: - print "usage: makedoc.py filename" - sys.exit(1) - -filename = sys.argv[1] -filenamebase = string.split(filename,".")[0] - -section = 0 -subsection = 0 -subsubsection = 0 -subsubsubsection = 0 -nameindex = 0 - -name = "" - -# Regexs for <h1>,... <h5> sections - -h1 = re.compile(r".*?<H1>(<a.*a>)*[\d\.\s]*(.*?)</H1>", re.IGNORECASE) -h2 = re.compile(r".*?<H2>(<a.*a>)*[\d\.\s]*(.*?)</H2>", re.IGNORECASE) -h3 = re.compile(r".*?<H3>(<a.*a>)*[\d\.\s]*(.*?)</H3>", re.IGNORECASE) -h4 = re.compile(r".*?<H4>(<a.*a>)*[\d\.\s]*(.*?)</H4>", re.IGNORECASE) -h5 = re.compile(r".*?<H5>(<a.*a>)*[\d\.\s]*(.*?)</H5>", re.IGNORECASE) - -data = open(filename).read() # Read data -open(filename+".bak","w").write(data) # Make backup - -lines = data.splitlines() -result = [ ] # This is the result of postprocessing the file -index = "<!-- INDEX -->\n<div class=\"sectiontoc\">\n" # index contains the index for adding at the top of the file. Also printed to stdout. - -skip = 0 -skipspace = 0 - -for s in lines: - if s == "<!-- INDEX -->": - if not skip: - result.append("@INDEX@") - skip = 1 - else: - skip = 0 - continue; - if skip: - continue - - if not s and skipspace: - continue - - if skipspace: - result.append("") - result.append("") - skipspace = 0 - - m = h2.match(s) - if m: - prevheadingtext = m.group(2) - nameindex += 1 - section += 1 - headingname = getheadingname(m) - result.append("""<H2><a name="%s"></a>%d. %s</H2>""" % (headingname,section, prevheadingtext)) - - if subsubsubsection: - index += "</ul>\n" - if subsubsection: - index += "</ul>\n" - if subsection: - index += "</ul>\n" - if section == 1: - index += "<ul>\n" - - index += """<li><a href="#%s">%s</a>\n""" % (headingname,prevheadingtext) - subsection = 0 - subsubsection = 0 - subsubsubsection = 0 - skipspace = 1 - continue - m = h3.match(s) - if m: - prevheadingtext = m.group(2) - nameindex += 1 - subsection += 1 - headingname = getheadingname(m) - result.append("""<H3><a name="%s"></a>%d.%d %s</H3>""" % (headingname,section, subsection, prevheadingtext)) - - if subsubsubsection: - index += "</ul>\n" - if subsubsection: - index += "</ul>\n" - if subsection == 1: - index += "<ul>\n" - - index += """<li><a href="#%s">%s</a>\n""" % (headingname,prevheadingtext) - subsubsection = 0 - skipspace = 1 - continue - m = h4.match(s) - if m: - prevheadingtext = m.group(2) - nameindex += 1 - subsubsection += 1 - subsubsubsection = 0 - headingname = getheadingname(m) - result.append("""<H4><a name="%s"></a>%d.%d.%d %s</H4>""" % (headingname,section, subsection, subsubsection, prevheadingtext)) - - if subsubsubsection: - index += "</ul>\n" - if subsubsection == 1: - index += "<ul>\n" - - index += """<li><a href="#%s">%s</a>\n""" % (headingname,prevheadingtext) - skipspace = 1 - continue - m = h5.match(s) - if m: - prevheadingtext = m.group(2) - nameindex += 1 - subsubsubsection += 1 - headingname = getheadingname(m) - result.append("""<H5><a name="%s"></a>%d.%d.%d.%d %s</H5>""" % (headingname,section, subsection, subsubsection, subsubsubsection, prevheadingtext)) - - if subsubsubsection == 1: - index += "<ul>\n" - - index += """<li><a href="#%s">%s</a>\n""" % (headingname,prevheadingtext) - skipspace = 1 - continue - - result.append(s) - -if subsubsubsection: - index += "</ul>\n" - -if subsubsection: - index += "</ul>\n" - -if subsection: - index += "</ul>\n" - -if section: - index += "</ul>\n" - -index += "</div>\n<!-- INDEX -->\n" - -data = "\n".join(result) - -data = data.replace("@INDEX@",index) + "\n"; - -# Write the file back out -open(filename,"w").write(data) - - diff --git a/chall/ply-2.2/doc/ply.html b/chall/ply-2.2/doc/ply.html deleted file mode 100644 index b3219ea..0000000 --- a/chall/ply-2.2/doc/ply.html +++ /dev/null @@ -1,2874 +0,0 @@ -<html> -<head> -<title>PLY (Python Lex-Yacc)</title> -</head> -<body bgcolor="#ffffff"> - -<h1>PLY (Python Lex-Yacc)</h1> - -<b> -David M. Beazley <br> -dave@dabeaz.com<br> -</b> - -<p> -<b>PLY Version: 2.2</b> -<p> - -<!-- INDEX --> -<div class="sectiontoc"> -<ul> -<li><a href="#ply_nn1">Introduction</a> -<li><a href="#ply_nn2">PLY Overview</a> -<li><a href="#ply_nn3">Lex</a> -<ul> -<li><a href="#ply_nn4">Lex Example</a> -<li><a href="#ply_nn5">The tokens list</a> -<li><a href="#ply_nn6">Specification of tokens</a> -<li><a href="#ply_nn7">Token values</a> -<li><a href="#ply_nn8">Discarded tokens</a> -<li><a href="#ply_nn9">Line numbers and positional information</a> -<li><a href="#ply_nn10">Ignored characters</a> -<li><a href="#ply_nn11">Literal characters</a> -<li><a href="#ply_nn12">Error handling</a> -<li><a href="#ply_nn13">Building and using the lexer</a> -<li><a href="#ply_nn14">The @TOKEN decorator</a> -<li><a href="#ply_nn15">Optimized mode</a> -<li><a href="#ply_nn16">Debugging</a> -<li><a href="#ply_nn17">Alternative specification of lexers</a> -<li><a href="#ply_nn18">Maintaining state</a> -<li><a href="#ply_nn19">Duplicating lexers</a> -<li><a href="#ply_nn20">Internal lexer state</a> -<li><a href="#ply_nn21">Conditional lexing and start conditions</a> -<li><a href="#ply_nn21">Miscellaneous Issues</a> -</ul> -<li><a href="#ply_nn22">Parsing basics</a> -<li><a href="#ply_nn23">Yacc reference</a> -<ul> -<li><a href="#ply_nn24">An example</a> -<li><a href="#ply_nn25">Combining Grammar Rule Functions</a> -<li><a href="#ply_nn26">Character Literals</a> -<li><a href="#ply_nn26">Empty Productions</a> -<li><a href="#ply_nn28">Changing the starting symbol</a> -<li><a href="#ply_nn27">Dealing With Ambiguous Grammars</a> -<li><a href="#ply_nn28">The parser.out file</a> -<li><a href="#ply_nn29">Syntax Error Handling</a> -<ul> -<li><a href="#ply_nn30">Recovery and resynchronization with error rules</a> -<li><a href="#ply_nn31">Panic mode recovery</a> -<li><a href="#ply_nn32">General comments on error handling</a> -</ul> -<li><a href="#ply_nn33">Line Number and Position Tracking</a> -<li><a href="#ply_nn34">AST Construction</a> -<li><a href="#ply_nn35">Embedded Actions</a> -<li><a href="#ply_nn36">Yacc implementation notes</a> -</ul> -<li><a href="#ply_nn37">Parser and Lexer State Management</a> -<li><a href="#ply_nn38">Using Python's Optimized Mode</a> -<li><a href="#ply_nn39">Where to go from here?</a> -</ul> -</div> -<!-- INDEX --> - - - - - - -<H2><a name="ply_nn1"></a>1. Introduction</H2> - - -PLY is a pure-Python implementation of the popular compiler -construction tools lex and yacc. The main goal of PLY is to stay -fairly faithful to the way in which traditional lex/yacc tools work. -This includes supporting LALR(1) parsing as well as providing -extensive input validation, error reporting, and diagnostics. Thus, -if you've used yacc in another programming language, it should be -relatively straightforward to use PLY. - -<p> -Early versions of PLY were developed to support an Introduction to -Compilers Course I taught in 2001 at the University of Chicago. In this course, -students built a fully functional compiler for a simple Pascal-like -language. Their compiler, implemented entirely in Python, had to -include lexical analysis, parsing, type checking, type inference, -nested scoping, and code generation for the SPARC processor. -Approximately 30 different compiler implementations were completed in -this course. Most of PLY's interface and operation has been influenced by common -usability problems encountered by students. - -<p> -Since PLY was primarily developed as an instructional tool, you will -find it to be fairly picky about token and grammar rule -specification. In part, this -added formality is meant to catch common programming mistakes made by -novice users. However, advanced users will also find such features to -be useful when building complicated grammars for real programming -languages. It should also be noted that PLY does not provide much in -the way of bells and whistles (e.g., automatic construction of -abstract syntax trees, tree traversal, etc.). Nor would I consider it -to be a parsing framework. Instead, you will find a bare-bones, yet -fully capable lex/yacc implementation written entirely in Python. - -<p> -The rest of this document assumes that you are somewhat familar with -parsing theory, syntax directed translation, and the use of compiler -construction tools such as lex and yacc in other programming -languages. If you are unfamilar with these topics, you will probably -want to consult an introductory text such as "Compilers: Principles, -Techniques, and Tools", by Aho, Sethi, and Ullman. O'Reilly's "Lex -and Yacc" by John Levine may also be handy. In fact, the O'Reilly book can be -used as a reference for PLY as the concepts are virtually identical. - -<H2><a name="ply_nn2"></a>2. PLY Overview</H2> - - -PLY consists of two separate modules; <tt>lex.py</tt> and -<tt>yacc.py</tt>, both of which are found in a Python package -called <tt>ply</tt>. The <tt>lex.py</tt> module is used to break input text into a -collection of tokens specified by a collection of regular expression -rules. <tt>yacc.py</tt> is used to recognize language syntax that has -been specified in the form of a context free grammar. <tt>yacc.py</tt> uses LR parsing and generates its parsing tables -using either the LALR(1) (the default) or SLR table generation algorithms. - -<p> -The two tools are meant to work together. Specifically, -<tt>lex.py</tt> provides an external interface in the form of a -<tt>token()</tt> function that returns the next valid token on the -input stream. <tt>yacc.py</tt> calls this repeatedly to retrieve -tokens and invoke grammar rules. The output of <tt>yacc.py</tt> is -often an Abstract Syntax Tree (AST). However, this is entirely up to -the user. If desired, <tt>yacc.py</tt> can also be used to implement -simple one-pass compilers. - -<p> -Like its Unix counterpart, <tt>yacc.py</tt> provides most of the -features you expect including extensive error checking, grammar -validation, support for empty productions, error tokens, and ambiguity -resolution via precedence rules. In fact, everything that is possible in traditional yacc -should be supported in PLY. - -<p> -The primary difference between -<tt>yacc.py</tt> and Unix <tt>yacc</tt> is that <tt>yacc.py</tt> -doesn't involve a separate code-generation process. -Instead, PLY relies on reflection (introspection) -to build its lexers and parsers. Unlike traditional lex/yacc which -require a special input file that is converted into a separate source -file, the specifications given to PLY <em>are</em> valid Python -programs. This means that there are no extra source files nor is -there a special compiler construction step (e.g., running yacc to -generate Python code for the compiler). Since the generation of the -parsing tables is relatively expensive, PLY caches the results and -saves them to a file. If no changes are detected in the input source, -the tables are read from the cache. Otherwise, they are regenerated. - -<H2><a name="ply_nn3"></a>3. Lex</H2> - - -<tt>lex.py</tt> is used to tokenize an input string. For example, suppose -you're writing a programming language and a user supplied the following input string: - -<blockquote> -<pre> -x = 3 + 42 * (s - t) -</pre> -</blockquote> - -A tokenizer splits the string into individual tokens - -<blockquote> -<pre> -'x','=', '3', '+', '42', '*', '(', 's', '-', 't', ')' -</pre> -</blockquote> - -Tokens are usually given names to indicate what they are. For example: - -<blockquote> -<pre> -'ID','EQUALS','NUMBER','PLUS','NUMBER','TIMES', -'LPAREN','ID','MINUS','ID','RPAREN' -</pre> -</blockquote> - -More specifically, the input is broken into pairs of token types and values. For example: - -<blockquote> -<pre> -('ID','x'), ('EQUALS','='), ('NUMBER','3'), -('PLUS','+'), ('NUMBER','42), ('TIMES','*'), -('LPAREN','('), ('ID','s'), ('MINUS','-'), -('ID','t'), ('RPAREN',')' -</pre> -</blockquote> - -The identification of tokens is typically done by writing a series of regular expression -rules. The next section shows how this is done using <tt>lex.py</tt>. - -<H3><a name="ply_nn4"></a>3.1 Lex Example</H3> - - -The following example shows how <tt>lex.py</tt> is used to write a simple tokenizer. - -<blockquote> -<pre> -# ------------------------------------------------------------ -# calclex.py -# -# tokenizer for a simple expression evaluator for -# numbers and +,-,*,/ -# ------------------------------------------------------------ -import ply.lex as lex - -# List of token names. This is always required -tokens = ( - 'NUMBER', - 'PLUS', - 'MINUS', - 'TIMES', - 'DIVIDE', - 'LPAREN', - 'RPAREN', -) - -# Regular expression rules for simple tokens -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_LPAREN = r'\(' -t_RPAREN = r'\)' - -# A regular expression rule with some action code -def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print "Line %d: Number %s is too large!" % (t.lineno,t.value) - t.value = 0 - return t - -# Define a rule so we can track line numbers -def t_newline(t): - r'\n+' - t.lexer.lineno += len(t.value) - -# A string containing ignored characters (spaces and tabs) -t_ignore = ' \t' - -# Error handling rule -def t_error(t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) - -# Build the lexer -lex.lex() - -</pre> -</blockquote> -To use the lexer, you first need to feed it some input text using its <tt>input()</tt> method. After that, repeated calls to <tt>token()</tt> produce tokens. The following code shows how this works: - -<blockquote> -<pre> - -# Test it out -data = ''' -3 + 4 * 10 - + -20 *2 -''' - -# Give the lexer some input -lex.input(data) - -# Tokenize -while 1: - tok = lex.token() - if not tok: break # No more input - print tok -</pre> -</blockquote> - -When executed, the example will produce the following output: - -<blockquote> -<pre> -$ python example.py -LexToken(NUMBER,3,2,1) -LexToken(PLUS,'+',2,3) -LexToken(NUMBER,4,2,5) -LexToken(TIMES,'*',2,7) -LexToken(NUMBER,10,2,10) -LexToken(PLUS,'+',3,14) -LexToken(MINUS,'-',3,16) -LexToken(NUMBER,20,3,18) -LexToken(TIMES,'*',3,20) -LexToken(NUMBER,2,3,21) -</pre> -</blockquote> - -The tokens returned by <tt>lex.token()</tt> are instances -of <tt>LexToken</tt>. This object has -attributes <tt>tok.type</tt>, <tt>tok.value</tt>, -<tt>tok.lineno</tt>, and <tt>tok.lexpos</tt>. The following code shows an example of -accessing these attributes: - -<blockquote> -<pre> -# Tokenize -while 1: - tok = lex.token() - if not tok: break # No more input - print tok.type, tok.value, tok.line, tok.lexpos -</pre> -</blockquote> - -The <tt>tok.type</tt> and <tt>tok.value</tt> attributes contain the -type and value of the token itself. -<tt>tok.line</tt> and <tt>tok.lexpos</tt> contain information about -the location of the token. <tt>tok.lexpos</tt> is the index of the -token relative to the start of the input text. - -<H3><a name="ply_nn5"></a>3.2 The tokens list</H3> - - -All lexers must provide a list <tt>tokens</tt> that defines all of the possible token -names that can be produced by the lexer. This list is always required -and is used to perform a variety of validation checks. The tokens list is also used by the -<tt>yacc.py</tt> module to identify terminals. - -<p> -In the example, the following code specified the token names: - -<blockquote> -<pre> -tokens = ( - 'NUMBER', - 'PLUS', - 'MINUS', - 'TIMES', - 'DIVIDE', - 'LPAREN', - 'RPAREN', -) -</pre> -</blockquote> - -<H3><a name="ply_nn6"></a>3.3 Specification of tokens</H3> - - -Each token is specified by writing a regular expression rule. Each of these rules are -are defined by making declarations with a special prefix <tt>t_</tt> to indicate that it -defines a token. For simple tokens, the regular expression can -be specified as strings such as this (note: Python raw strings are used since they are the -most convenient way to write regular expression strings): - -<blockquote> -<pre> -t_PLUS = r'\+' -</pre> -</blockquote> - -In this case, the name following the <tt>t_</tt> must exactly match one of the -names supplied in <tt>tokens</tt>. If some kind of action needs to be performed, -a token rule can be specified as a function. For example, this rule matches numbers and -converts the string into a Python integer. - -<blockquote> -<pre> -def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print "Number %s is too large!" % t.value - t.value = 0 - return t -</pre> -</blockquote> - -When a function is used, the regular expression rule is specified in the function documentation string. -The function always takes a single argument which is an instance of -<tt>LexToken</tt>. This object has attributes of <tt>t.type</tt> which is the token type (as a string), -<tt>t.value</tt> which is the lexeme (the actual text matched), <tt>t.lineno</tt> which is the current line number, and <tt>t.lexpos</tt> which -is the position of the token relative to the beginning of the input text. -By default, <tt>t.type</tt> is set to the name following the <tt>t_</tt> prefix. The action -function can modify the contents of the <tt>LexToken</tt> object as appropriate. However, -when it is done, the resulting token should be returned. If no value is returned by the action -function, the token is simply discarded and the next token read. - -<p> -Internally, <tt>lex.py</tt> uses the <tt>re</tt> module to do its patten matching. When building the master regular expression, -rules are added in the following order: -<p> -<ol> -<li>All tokens defined by functions are added in the same order as they appear in the lexer file. -<li>Tokens defined by strings are added next by sorting them in order of decreasing regular expression length (longer expressions -are added first). -</ol> -<p> -Without this ordering, it can be difficult to correctly match certain types of tokens. For example, if you -wanted to have separate tokens for "=" and "==", you need to make sure that "==" is checked first. By sorting regular -expressions in order of decreasing length, this problem is solved for rules defined as strings. For functions, -the order can be explicitly controlled since rules appearing first are checked first. - -<p> -To handle reserved words, it is usually easier to just match an identifier and do a special name lookup in a function -like this: - -<blockquote> -<pre> -reserved = { - 'if' : 'IF', - 'then' : 'THEN', - 'else' : 'ELSE', - 'while' : 'WHILE', - ... -} - -def t_ID(t): - r'[a-zA-Z_][a-zA-Z_0-9]*' - t.type = reserved.get(t.value,'ID') # Check for reserved words - return t -</pre> -</blockquote> - -This approach greatly reduces the number of regular expression rules and is likely to make things a little faster. - -<p> -<b>Note:</b> You should avoid writing individual rules for reserved words. For example, if you write rules like this, - -<blockquote> -<pre> -t_FOR = r'for' -t_PRINT = r'print' -</pre> -</blockquote> - -those rules will be triggered for identifiers that include those words as a prefix such as "forget" or "printed". This is probably not -what you want. - -<H3><a name="ply_nn7"></a>3.4 Token values</H3> - - -When tokens are returned by lex, they have a value that is stored in the <tt>value</tt> attribute. Normally, the value is the text -that was matched. However, the value can be assigned to any Python object. For instance, when lexing identifiers, you may -want to return both the identifier name and information from some sort of symbol table. To do this, you might write a rule like this: - -<blockquote> -<pre> -def t_ID(t): - ... - # Look up symbol table information and return a tuple - t.value = (t.value, symbol_lookup(t.value)) - ... - return t -</pre> -</blockquote> - -It is important to note that storing data in other attribute names is <em>not</em> recommended. The <tt>yacc.py</tt> module only exposes the -contents of the <tt>value</tt> attribute. Thus, accessing other attributes may be unnecessarily awkward. - -<H3><a name="ply_nn8"></a>3.5 Discarded tokens</H3> - - -To discard a token, such as a comment, simply define a token rule that returns no value. For example: - -<blockquote> -<pre> -def t_COMMENT(t): - r'\#.*' - pass - # No return value. Token discarded -</pre> -</blockquote> - -Alternatively, you can include the prefix "ignore_" in the token declaration to force a token to be ignored. For example: - -<blockquote> -<pre> -t_ignore_COMMENT = r'\#.*' -</pre> -</blockquote> - -Be advised that if you are ignoring many different kinds of text, you may still want to use functions since these provide more precise -control over the order in which regular expressions are matched (i.e., functions are matched in order of specification whereas strings are -sorted by regular expression length). - -<H3><a name="ply_nn9"></a>3.6 Line numbers and positional information</H3> - - -<p>By default, <tt>lex.py</tt> knows nothing about line numbers. This is because <tt>lex.py</tt> doesn't know anything -about what constitutes a "line" of input (e.g., the newline character or even if the input is textual data). -To update this information, you need to write a special rule. In the example, the <tt>t_newline()</tt> rule shows how to do this. - -<blockquote> -<pre> -# Define a rule so we can track line numbers -def t_newline(t): - r'\n+' - t.lexer.lineno += len(t.value) -</pre> -</blockquote> -Within the rule, the <tt>lineno</tt> attribute of the underlying lexer <tt>t.lexer</tt> is updated. -After the line number is updated, the token is simply discarded since nothing is returned. - -<p> -<tt>lex.py</tt> does not perform and kind of automatic column tracking. However, it does record positional -information related to each token in the <tt>lexpos</tt> attribute. Using this, it is usually possible to compute -column information as a separate step. For instance, just count backwards until you reach a newline. - -<blockquote> -<pre> -# Compute column. -# input is the input text string -# token is a token instance -def find_column(input,token): - i = token.lexpos - while i > 0: - if input[i] == '\n': break - i -= 1 - column = (token.lexpos - i)+1 - return column -</pre> -</blockquote> - -Since column information is often only useful in the context of error handling, calculating the column -position can be performed when needed as opposed to doing it for each token. - -<H3><a name="ply_nn10"></a>3.7 Ignored characters</H3> - - -<p> -The special <tt>t_ignore</tt> rule is reserved by <tt>lex.py</tt> for characters -that should be completely ignored in the input stream. -Usually this is used to skip over whitespace and other non-essential characters. -Although it is possible to define a regular expression rule for whitespace in a manner -similar to <tt>t_newline()</tt>, the use of <tt>t_ignore</tt> provides substantially better -lexing performance because it is handled as a special case and is checked in a much -more efficient manner than the normal regular expression rules. - -<H3><a name="ply_nn11"></a>3.8 Literal characters</H3> - - -<p> -Literal characters can be specified by defining a variable <tt>literals</tt> in your lexing module. For example: - -<blockquote> -<pre> -literals = [ '+','-','*','/' ] -</pre> -</blockquote> - -or alternatively - -<blockquote> -<pre> -literals = "+-*/" -</pre> -</blockquote> - -A literal character is simply a single character that is returned "as is" when encountered by the lexer. Literals are checked -after all of the defined regular expression rules. Thus, if a rule starts with one of the literal characters, it will always -take precedence. -<p> -When a literal token is returned, both its <tt>type</tt> and <tt>value</tt> attributes are set to the character itself. For example, <tt>'+'</tt>. - -<H3><a name="ply_nn12"></a>3.9 Error handling</H3> - - -<p> -Finally, the <tt>t_error()</tt> -function is used to handle lexing errors that occur when illegal -characters are detected. In this case, the <tt>t.value</tt> attribute contains the -rest of the input string that has not been tokenized. In the example, the error function -was defined as follows: - -<blockquote> -<pre> -# Error handling rule -def t_error(t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) -</pre> -</blockquote> - -In this case, we simply print the offending character and skip ahead one character by calling <tt>t.lexer.skip(1)</tt>. - -<H3><a name="ply_nn13"></a>3.10 Building and using the lexer</H3> - - -<p> -To build the lexer, the function <tt>lex.lex()</tt> is used. This function -uses Python reflection (or introspection) to read the the regular expression rules -out of the calling context and build the lexer. Once the lexer has been built, two functions can -be used to control the lexer. - -<ul> -<li><tt>lex.input(data)</tt>. Reset the lexer and store a new input string. -<li><tt>lex.token()</tt>. Return the next token. Returns a special <tt>LexToken</tt> instance on success or -None if the end of the input text has been reached. -</ul> - -If desired, the lexer can also be used as an object. The <tt>lex()</tt> returns a <tt>Lexer</tt> object that -can be used for this purpose. For example: - -<blockquote> -<pre> -lexer = lex.lex() -lexer.input(sometext) -while 1: - tok = lexer.token() - if not tok: break - print tok -</pre> -</blockquote> - -<p> -This latter technique should be used if you intend to use multiple lexers in your application. Simply define each -lexer in its own module and use the object returned by <tt>lex()</tt> as appropriate. - -<p> -Note: The global functions <tt>lex.input()</tt> and <tt>lex.token()</tt> are bound to the <tt>input()</tt> -and <tt>token()</tt> methods of the last lexer created by the lex module. - -<H3><a name="ply_nn14"></a>3.11 The @TOKEN decorator</H3> - - -In some applications, you may want to define build tokens from as a series of -more complex regular expression rules. For example: - -<blockquote> -<pre> -digit = r'([0-9])' -nondigit = r'([_A-Za-z])' -identifier = r'(' + nondigit + r'(' + digit + r'|' + nondigit + r')*)' - -def t_ID(t): - # want docstring to be identifier above. ????? - ... -</pre> -</blockquote> - -In this case, we want the regular expression rule for <tt>ID</tt> to be one of the variables above. However, there is no -way to directly specify this using a normal documentation string. To solve this problem, you can use the <tt>@TOKEN</tt> -decorator. For example: - -<blockquote> -<pre> -from ply.lex import TOKEN - -@TOKEN(identifier) -def t_ID(t): - ... -</pre> -</blockquote> - -This will attach <tt>identifier</tt> to the docstring for <tt>t_ID()</tt> allowing <tt>lex.py</tt> to work normally. An alternative -approach this problem is to set the docstring directly like this: - -<blockquote> -<pre> -def t_ID(t): - ... - -t_ID.__doc__ = identifier -</pre> -</blockquote> - -<b>NOTE:</b> Use of <tt>@TOKEN</tt> requires Python-2.4 or newer. If you're concerned about backwards compatibility with older -versions of Python, use the alternative approach of setting the docstring directly. - -<H3><a name="ply_nn15"></a>3.12 Optimized mode</H3> - - -For improved performance, it may be desirable to use Python's -optimized mode (e.g., running Python with the <tt>-O</tt> -option). However, doing so causes Python to ignore documentation -strings. This presents special problems for <tt>lex.py</tt>. To -handle this case, you can create your lexer using -the <tt>optimize</tt> option as follows: - -<blockquote> -<pre> -lexer = lex.lex(optimize=1) -</pre> -</blockquote> - -Next, run Python in its normal operating mode. When you do -this, <tt>lex.py</tt> will write a file called <tt>lextab.py</tt> to -the current directory. This file contains all of the regular -expression rules and tables used during lexing. On subsequent -executions, -<tt>lextab.py</tt> will simply be imported to build the lexer. This -approach substantially improves the startup time of the lexer and it -works in Python's optimized mode. - -<p> -To change the name of the lexer-generated file, use the <tt>lextab</tt> keyword argument. For example: - -<blockquote> -<pre> -lexer = lex.lex(optimize=1,lextab="footab") -</pre> -</blockquote> - -When running in optimized mode, it is important to note that lex disables most error checking. Thus, this is really only recommended -if you're sure everything is working correctly and you're ready to start releasing production code. - -<H3><a name="ply_nn16"></a>3.13 Debugging</H3> - - -For the purpose of debugging, you can run <tt>lex()</tt> in a debugging mode as follows: - -<blockquote> -<pre> -lexer = lex.lex(debug=1) -</pre> -</blockquote> - -This will result in a large amount of debugging information to be printed including all of the added rules and the master -regular expressions. - -In addition, <tt>lex.py</tt> comes with a simple main function which -will either tokenize input read from standard input or from a file specified -on the command line. To use it, simply put this in your lexer: - -<blockquote> -<pre> -if __name__ == '__main__': - lex.runmain() -</pre> -</blockquote> - -<H3><a name="ply_nn17"></a>3.14 Alternative specification of lexers</H3> - - -As shown in the example, lexers are specified all within one Python module. If you want to -put token rules in a different module from the one in which you invoke <tt>lex()</tt>, use the -<tt>module</tt> keyword argument. - -<p> -For example, you might have a dedicated module that just contains -the token rules: - -<blockquote> -<pre> -# module: tokrules.py -# This module just contains the lexing rules - -# List of token names. This is always required -tokens = ( - 'NUMBER', - 'PLUS', - 'MINUS', - 'TIMES', - 'DIVIDE', - 'LPAREN', - 'RPAREN', -) - -# Regular expression rules for simple tokens -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_LPAREN = r'\(' -t_RPAREN = r'\)' - -# A regular expression rule with some action code -def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print "Line %d: Number %s is too large!" % (t.lineno,t.value) - t.value = 0 - return t - -# Define a rule so we can track line numbers -def t_newline(t): - r'\n+' - t.lexer.lineno += len(t.value) - -# A string containing ignored characters (spaces and tabs) -t_ignore = ' \t' - -# Error handling rule -def t_error(t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) -</pre> -</blockquote> - -Now, if you wanted to build a tokenizer from these rules from within a different module, you would do the following (shown for Python interactive mode): - -<blockquote> -<pre> ->>> import tokrules ->>> <b>lexer = lex.lex(module=tokrules)</b> ->>> lexer.input("3 + 4") ->>> lexer.token() -LexToken(NUMBER,3,1,1,0) ->>> lexer.token() -LexToken(PLUS,'+',1,2) ->>> lexer.token() -LexToken(NUMBER,4,1,4) ->>> lexer.token() -None ->>> -</pre> -</blockquote> - -The <tt>object</tt> option can be used to define lexers as a class instead of a module. For example: - -<blockquote> -<pre> -import ply.lex as lex - -class MyLexer: - # List of token names. This is always required - tokens = ( - 'NUMBER', - 'PLUS', - 'MINUS', - 'TIMES', - 'DIVIDE', - 'LPAREN', - 'RPAREN', - ) - - # Regular expression rules for simple tokens - t_PLUS = r'\+' - t_MINUS = r'-' - t_TIMES = r'\*' - t_DIVIDE = r'/' - t_LPAREN = r'\(' - t_RPAREN = r'\)' - - # A regular expression rule with some action code - # Note addition of self parameter since we're in a class - def t_NUMBER(self,t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print "Line %d: Number %s is too large!" % (t.lineno,t.value) - t.value = 0 - return t - - # Define a rule so we can track line numbers - def t_newline(self,t): - r'\n+' - t.lexer.lineno += len(t.value) - - # A string containing ignored characters (spaces and tabs) - t_ignore = ' \t' - - # Error handling rule - def t_error(self,t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) - - <b># Build the lexer - def build(self,**kwargs): - self.lexer = lex.lex(object=self, **kwargs)</b> - - # Test it output - def test(self,data): - self.lexer.input(data) - while 1: - tok = lexer.token() - if not tok: break - print tok - -# Build the lexer and try it out -m = MyLexer() -m.build() # Build the lexer -m.test("3 + 4") # Test it -</pre> -</blockquote> - -For reasons that are subtle, you should <em>NOT</em> invoke <tt>lex.lex()</tt> inside the <tt>__init__()</tt> method of your class. If you -do, it may cause bizarre behavior if someone tries to duplicate a lexer object. Keep reading. - -<H3><a name="ply_nn18"></a>3.15 Maintaining state</H3> - - -In your lexer, you may want to maintain a variety of state information. This might include mode settings, symbol tables, and other details. There are a few -different ways to handle this situation. First, you could just keep some global variables: - -<blockquote> -<pre> -num_count = 0 -def t_NUMBER(t): - r'\d+' - global num_count - num_count += 1 - try: - t.value = int(t.value) - except ValueError: - print "Line %d: Number %s is too large!" % (t.lineno,t.value) - t.value = 0 - return t -</pre> -</blockquote> - -Alternatively, you can store this information inside the Lexer object created by <tt>lex()</tt>. To this, you can use the <tt>lexer</tt> attribute -of tokens passed to the various rules. For example: - -<blockquote> -<pre> -def t_NUMBER(t): - r'\d+' - t.lexer.num_count += 1 # Note use of lexer attribute - try: - t.value = int(t.value) - except ValueError: - print "Line %d: Number %s is too large!" % (t.lineno,t.value) - t.value = 0 - return t - -lexer = lex.lex() -lexer.num_count = 0 # Set the initial count -</pre> -</blockquote> - -This latter approach has the advantage of storing information inside -the lexer itself---something that may be useful if multiple instances -of the same lexer have been created. However, it may also feel kind -of "hacky" to the purists. Just to put their mind at some ease, all -internal attributes of the lexer (with the exception of <tt>lineno</tt>) have names that are prefixed -by <tt>lex</tt> (e.g., <tt>lexdata</tt>,<tt>lexpos</tt>, etc.). Thus, -it should be perfectly safe to store attributes in the lexer that -don't have names starting with that prefix. - -<p> -A third approach is to define the lexer as a class as shown in the previous example: - -<blockquote> -<pre> -class MyLexer: - ... - def t_NUMBER(self,t): - r'\d+' - self.num_count += 1 - try: - t.value = int(t.value) - except ValueError: - print "Line %d: Number %s is too large!" % (t.lineno,t.value) - t.value = 0 - return t - - def build(self, **kwargs): - self.lexer = lex.lex(object=self,**kwargs) - - def __init__(self): - self.num_count = 0 - -# Create a lexer -m = MyLexer() -lexer = lex.lex(object=m) -</pre> -</blockquote> - -The class approach may be the easiest to manage if your application is going to be creating multiple instances of the same lexer and -you need to manage a lot of state. - -<H3><a name="ply_nn19"></a>3.16 Duplicating lexers</H3> - - -<b>NOTE: I am thinking about deprecating this feature. Post comments on <a href="http://groups.google.com/group/ply-hack">ply-hack@googlegroups.com</a> or send me a private email at dave@dabeaz.com.</b> - -<p> -If necessary, a lexer object can be quickly duplicated by invoking its <tt>clone()</tt> method. For example: - -<blockquote> -<pre> -lexer = lex.lex() -... -newlexer = lexer.clone() -</pre> -</blockquote> - -When a lexer is cloned, the copy is identical to the original lexer, -including any input text. However, once created, different text can be -fed to the clone which can be used independently. This capability may -be useful in situations when you are writing a parser/compiler that -involves recursive or reentrant processing. For instance, if you -needed to scan ahead in the input for some reason, you could create a -clone and use it to look ahead. - -<p> -The advantage of using <tt>clone()</tt> instead of reinvoking <tt>lex()</tt> is -that it is significantly faster. Namely, it is not necessary to re-examine all of the -token rules, build a regular expression, and construct internal tables. All of this -information can simply be reused in the new lexer. - -<p> -Special considerations need to be made when cloning a lexer that is defined as a class. Previous sections -showed an example of a class <tt>MyLexer</tt>. If you have the following code: - -<blockquote> -<pre> -m = MyLexer() -a = lex.lex(object=m) # Create a lexer - -b = a.clone() # Clone the lexer -</pre> -</blockquote> - -Then both <tt>a</tt> and <tt>b</tt> are going to be bound to the same -object <tt>m</tt>. If the object <tt>m</tt> contains internal state -related to lexing, this sharing may lead to quite a bit of confusion. To fix this, -the <tt>clone()</tt> method accepts an optional argument that can be used to supply a new object. This -can be used to clone the lexer and bind it to a new instance. For example: - -<blockquote> -<pre> -m = MyLexer() # Create a lexer -a = lex.lex(object=m) - -# Create a clone -n = MyLexer() # New instance of MyLexer -b = a.clone(n) # New lexer bound to n -</pre> -</blockquote> - -It may make sense to encapsulate all of this inside a method: - -<blockquote> -<pre> -class MyLexer: - ... - def clone(self): - c = MyLexer() # Create a new instance of myself - # Copy attributes from self to c as appropriate - ... - # Clone the lexer - c.lexer = self.lexer.clone(c) - return c -</pre> -</blockquote> - -The fact that a new instance of <tt>MyLexer</tt> may be created while cloning a lexer is the reason why you should never -invoke <tt>lex.lex()</tt> inside <tt>__init__()</tt>. If you do, the lexer will be rebuilt from scratch and you lose -all of the performance benefits of using <tt>clone()</tt> in the first place. - -<H3><a name="ply_nn20"></a>3.17 Internal lexer state</H3> - - -A Lexer object <tt>lexer</tt> has a number of internal attributes that may be useful in certain -situations. - -<p> -<tt>lexer.lexpos</tt> -<blockquote> -This attribute is an integer that contains the current position within the input text. If you modify -the value, it will change the result of the next call to <tt>token()</tt>. Within token rule functions, this points -to the first character <em>after</em> the matched text. If the value is modified within a rule, the next returned token will be -matched at the new position. -</blockquote> - -<p> -<tt>lexer.lineno</tt> -<blockquote> -The current value of the line number attribute stored in the lexer. This can be modified as needed to -change the line number. -</blockquote> - -<p> -<tt>lexer.lexdata</tt> -<blockquote> -The current input text stored in the lexer. This is the string passed with the <tt>input()</tt> method. It -would probably be a bad idea to modify this unless you really know what you're doing. -</blockquote> - -<P> -<tt>lexer.lexmatch</tt> -<blockquote> -This is the raw <tt>Match</tt> object returned by the Python <tt>re.match()</tt> function (used internally by PLY) for the -current token. If you have written a regular expression that contains named groups, you can use this to retrieve those values. -</blockquote> - -<H3><a name="ply_nn21"></a>3.18 Conditional lexing and start conditions</H3> - - -In advanced parsing applications, it may be useful to have different -lexing states. For instance, you may want the occurrence of a certain -token or syntactic construct to trigger a different kind of lexing. -PLY supports a feature that allows the underlying lexer to be put into -a series of different states. Each state can have its own tokens, -lexing rules, and so forth. The implementation is based largely on -the "start condition" feature of GNU flex. Details of this can be found -at <a -href="http://www.gnu.org/software/flex/manual/html_chapter/flex_11.html">http://www.gnu.org/software/flex/manual/html_chapter/flex_11.html.</a>. - -<p> -To define a new lexing state, it must first be declared. This is done by including a "states" declaration in your -lex file. For example: - -<blockquote> -<pre> -states = ( - ('foo','exclusive'), - ('bar','inclusive'), -) -</pre> -</blockquote> - -This declaration declares two states, <tt>'foo'</tt> -and <tt>'bar'</tt>. States may be of two types; <tt>'exclusive'</tt> -and <tt>'inclusive'</tt>. An exclusive state completely overrides the -default behavior of the lexer. That is, lex will only return tokens -and apply rules defined specifically for that state. An inclusive -state adds additional tokens and rules to the default set of rules. -Thus, lex will return both the tokens defined by default in addition -to those defined for the inclusive state. - -<p> -Once a state has been declared, tokens and rules are declared by including the -state name in token/rule declaration. For example: - -<blockquote> -<pre> -t_foo_NUMBER = r'\d+' # Token 'NUMBER' in state 'foo' -t_bar_ID = r'[a-zA-Z_][a-zA-Z0-9_]*' # Token 'ID' in state 'bar' - -def t_foo_newline(t): - r'\n' - t.lexer.lineno += 1 -</pre> -</blockquote> - -A token can be declared in multiple states by including multiple state names in the declaration. For example: - -<blockquote> -<pre> -t_foo_bar_NUMBER = r'\d+' # Defines token 'NUMBER' in both state 'foo' and 'bar' -</pre> -</blockquote> - -Alternative, a token can be declared in all states using the 'ANY' in the name. - -<blockquote> -<pre> -t_ANY_NUMBER = r'\d+' # Defines a token 'NUMBER' in all states -</pre> -</blockquote> - -If no state name is supplied, as is normally the case, the token is associated with a special state <tt>'INITIAL'</tt>. For example, -these two declarations are identical: - -<blockquote> -<pre> -t_NUMBER = r'\d+' -t_INITIAL_NUMBER = r'\d+' -</pre> -</blockquote> - -<p> -States are also associated with the special <tt>t_ignore</tt> and <tt>t_error()</tt> declarations. For example, if a state treats -these differently, you can declare: - -<blockquote> -<pre> -t_foo_ignore = " \t\n" # Ignored characters for state 'foo' - -def t_bar_error(t): # Special error handler for state 'bar' - pass -</pre> -</blockquote> - -By default, lexing operates in the <tt>'INITIAL'</tt> state. This state includes all of the normally defined tokens. -For users who aren't using different states, this fact is completely transparent. If, during lexing or parsing, you want to change -the lexing state, use the <tt>begin()</tt> method. For example: - -<blockquote> -<pre> -def t_begin_foo(t): - r'start_foo' - t.lexer.begin('foo') # Starts 'foo' state -</pre> -</blockquote> - -To get out of a state, you use <tt>begin()</tt> to switch back to the initial state. For example: - -<blockquote> -<pre> -def t_foo_end(t): - r'end_foo' - t.lexer.begin('INITIAL') # Back to the initial state -</pre> -</blockquote> - -The management of states can also be done with a stack. For example: - -<blockquote> -<pre> -def t_begin_foo(t): - r'start_foo' - t.lexer.push_state('foo') # Starts 'foo' state - -def t_foo_end(t): - r'end_foo' - t.lexer.pop_state() # Back to the previous state -</pre> -</blockquote> - -<p> -The use of a stack would be useful in situations where there are many ways of entering a new lexing state and you merely want to go back -to the previous state afterwards. - -<P> -An example might help clarify. Suppose you were writing a parser and you wanted to grab sections of arbitrary C code enclosed by -curly braces. That is, whenever you encounter a starting brace '{', you want to read all of the enclosed code up to the ending brace '}' -and return it as a string. Doing this with a normal regular expression rule is nearly (if not actually) impossible. This is because braces can -be nested and can be included in comments and strings. Thus, simply matching up to the first matching '}' character isn't good enough. Here is how -you might use lexer states to do this: - -<blockquote> -<pre> -# Declare the state -states = ( - ('ccode','exclusive'), -) - -# Match the first {. Enter ccode state. -def t_ccode(t): - r'\{' - t.lexer.code_start = t.lexer.lexpos # Record the starting position - t.lexer.level = 1 # Initial brace level - t.lexer.begin('ccode') # Enter 'ccode' state - -# Rules for the ccode state -def t_ccode_lbrace(t): - r'\{' - t.lexer.level +=1 - -def t_ccode_rbrace(t): - r'\}' - t.lexer.level -=1 - - # If closing brace, return the code fragment - if t.lexer.level == 0: - t.value = t.lexer.lexdata[t.lexer.code_start:t.lexer.lexpos+1] - t.type = "CCODE" - t.lexer.lineno += t.value.count('\n') - t.lexer.begin('INITIAL') - return t - -# C or C++ comment (ignore) -def t_ccode_comment(t): - r'(/\*(.|\n)*?*/)|(//.*)' - pass - -# C string -def t_ccode_string(t): - r'\"([^\\\n]|(\\.))*?\"' - -# C character literal -def t_ccode_char(t): - r'\'([^\\\n]|(\\.))*?\'' - -# Any sequence of non-whitespace characters (not braces, strings) -def t_ccode_nonspace(t): - r'[^\s\{\}\'\"]+' - -# Ignored characters (whitespace) -t_ccode_ignore = " \t\n" - -# For bad characters, we just skip over it -def t_ccode_error(t): - t.lexer.skip(1) -</pre> -</blockquote> - -In this example, the occurrence of the first '{' causes the lexer to record the starting position and enter a new state <tt>'ccode'</tt>. A collection of rules then match -various parts of the input that follow (comments, strings, etc.). All of these rules merely discard the token (by not returning a value). -However, if the closing right brace is encountered, the rule <tt>t_ccode_rbrace</tt> collects all of the code (using the earlier recorded starting -position), stores it, and returns a token 'CCODE' containing all of that text. When returning the token, the lexing state is restored back to its -initial state. - -<H3><a name="ply_nn21"></a>3.19 Miscellaneous Issues</H3> - - -<P> -<li>The lexer requires input to be supplied as a single input string. Since most machines have more than enough memory, this -rarely presents a performance concern. However, it means that the lexer currently can't be used with streaming data -such as open files or sockets. This limitation is primarily a side-effect of using the <tt>re</tt> module. - -<p> -<li>The lexer should work properly with both Unicode strings given as token and pattern matching rules as -well as for input text. - -<p> -<li>If you need to supply optional flags to the re.compile() function, use the reflags option to lex. For example: - -<blockquote> -<pre> -lex.lex(reflags=re.UNICODE) -</pre> -</blockquote> - -<p> -<li>Since the lexer is written entirely in Python, its performance is -largely determined by that of the Python <tt>re</tt> module. Although -the lexer has been written to be as efficient as possible, it's not -blazingly fast when used on very large input files. If -performance is concern, you might consider upgrading to the most -recent version of Python, creating a hand-written lexer, or offloading -the lexer into a C extension module. - -<p> -If you are going to create a hand-written lexer and you plan to use it with <tt>yacc.py</tt>, -it only needs to conform to the following requirements: - -<ul> -<li>It must provide a <tt>token()</tt> method that returns the next token or <tt>None</tt> if no more -tokens are available. -<li>The <tt>token()</tt> method must return an object <tt>tok</tt> that has <tt>type</tt> and <tt>value</tt> attributes. -</ul> - -<H2><a name="ply_nn22"></a>4. Parsing basics</H2> - - -<tt>yacc.py</tt> is used to parse language syntax. Before showing an -example, there are a few important bits of background that must be -mentioned. First, <em>syntax</em> is usually specified in terms of a BNF grammar. -For example, if you wanted to parse -simple arithmetic expressions, you might first write an unambiguous -grammar specification like this: - -<blockquote> -<pre> -expression : expression + term - | expression - term - | term - -term : term * factor - | term / factor - | factor - -factor : NUMBER - | ( expression ) -</pre> -</blockquote> - -In the grammar, symbols such as <tt>NUMBER</tt>, <tt>+</tt>, <tt>-</tt>, <tt>*</tt>, and <tt>/</tt> are known -as <em>terminals</em> and correspond to raw input tokens. Identifiers such as <tt>term</tt> and <tt>factor</tt> refer to more -complex rules, typically comprised of a collection of tokens. These identifiers are known as <em>non-terminals</em>. -<P> -The semantic behavior of a language is often specified using a -technique known as syntax directed translation. In syntax directed -translation, attributes are attached to each symbol in a given grammar -rule along with an action. Whenever a particular grammar rule is -recognized, the action describes what to do. For example, given the -expression grammar above, you might write the specification for a -simple calculator like this: - -<blockquote> -<pre> -Grammar Action --------------------------------- -------------------------------------------- -expression0 : expression1 + term expression0.val = expression1.val + term.val - | expression1 - term expression0.val = expression1.val - term.val - | term expression0.val = term.val - -term0 : term1 * factor term0.val = term1.val * factor.val - | term1 / factor term0.val = term1.val / factor.val - | factor term0.val = factor.val - -factor : NUMBER factor.val = int(NUMBER.lexval) - | ( expression ) factor.val = expression.val -</pre> -</blockquote> - -A good way to think about syntax directed translation is to simply think of each symbol in the grammar as some -kind of object. The semantics of the language are then expressed as a collection of methods/operations on these -objects. - -<p> -Yacc uses a parsing technique known as LR-parsing or shift-reduce parsing. LR parsing is a -bottom up technique that tries to recognize the right-hand-side of various grammar rules. -Whenever a valid right-hand-side is found in the input, the appropriate action code is triggered and the -grammar symbols are replaced by the grammar symbol on the left-hand-side. - -<p> -LR parsing is commonly implemented by shifting grammar symbols onto a stack and looking at the stack and the next -input token for patterns. The details of the algorithm can be found in a compiler text, but the -following example illustrates the steps that are performed if you wanted to parse the expression -<tt>3 + 5 * (10 - 20)</tt> using the grammar defined above: - -<blockquote> -<pre> -Step Symbol Stack Input Tokens Action ----- --------------------- --------------------- ------------------------------- -1 $ 3 + 5 * ( 10 - 20 )$ Shift 3 -2 $ 3 + 5 * ( 10 - 20 )$ Reduce factor : NUMBER -3 $ factor + 5 * ( 10 - 20 )$ Reduce term : factor -4 $ term + 5 * ( 10 - 20 )$ Reduce expr : term -5 $ expr + 5 * ( 10 - 20 )$ Shift + -6 $ expr + 5 * ( 10 - 20 )$ Shift 5 -7 $ expr + 5 * ( 10 - 20 )$ Reduce factor : NUMBER -8 $ expr + factor * ( 10 - 20 )$ Reduce term : factor -9 $ expr + term * ( 10 - 20 )$ Shift * -10 $ expr + term * ( 10 - 20 )$ Shift ( -11 $ expr + term * ( 10 - 20 )$ Shift 10 -12 $ expr + term * ( 10 - 20 )$ Reduce factor : NUMBER -13 $ expr + term * ( factor - 20 )$ Reduce term : factor -14 $ expr + term * ( term - 20 )$ Reduce expr : term -15 $ expr + term * ( expr - 20 )$ Shift - -16 $ expr + term * ( expr - 20 )$ Shift 20 -17 $ expr + term * ( expr - 20 )$ Reduce factor : NUMBER -18 $ expr + term * ( expr - factor )$ Reduce term : factor -19 $ expr + term * ( expr - term )$ Reduce expr : expr - term -20 $ expr + term * ( expr )$ Shift ) -21 $ expr + term * ( expr ) $ Reduce factor : (expr) -22 $ expr + term * factor $ Reduce term : term * factor -23 $ expr + term $ Reduce expr : expr + term -24 $ expr $ Reduce expr -25 $ $ Success! -</pre> -</blockquote> - -When parsing the expression, an underlying state machine and the current input token determine what to do next. -If the next token looks like part of a valid grammar rule (based on other items on the stack), it is generally shifted -onto the stack. If the top of the stack contains a valid right-hand-side of a grammar rule, it is -usually "reduced" and the symbols replaced with the symbol on the left-hand-side. When this reduction occurs, the -appropriate action is triggered (if defined). If the input token can't be shifted and the top of stack doesn't match -any grammar rules, a syntax error has occurred and the parser must take some kind of recovery step (or bail out). - -<p> -It is important to note that the underlying implementation is built around a large finite-state machine that is encoded -in a collection of tables. The construction of these tables is quite complicated and beyond the scope of this discussion. -However, subtle details of this process explain why, in the example above, the parser chooses to shift a token -onto the stack in step 9 rather than reducing the rule <tt>expr : expr + term</tt>. - -<H2><a name="ply_nn23"></a>5. Yacc reference</H2> - - -This section describes how to use write parsers in PLY. - -<H3><a name="ply_nn24"></a>5.1 An example</H3> - - -Suppose you wanted to make a grammar for simple arithmetic expressions as previously described. Here is -how you would do it with <tt>yacc.py</tt>: - -<blockquote> -<pre> -# Yacc example - -import ply.yacc as yacc - -# Get the token map from the lexer. This is required. -from calclex import tokens - -def p_expression_plus(p): - 'expression : expression PLUS term' - p[0] = p[1] + p[3] - -def p_expression_minus(p): - 'expression : expression MINUS term' - p[0] = p[1] - p[3] - -def p_expression_term(p): - 'expression : term' - p[0] = p[1] - -def p_term_times(p): - 'term : term TIMES factor' - p[0] = p[1] * p[3] - -def p_term_div(p): - 'term : term DIVIDE factor' - p[0] = p[1] / p[3] - -def p_term_factor(p): - 'term : factor' - p[0] = p[1] - -def p_factor_num(p): - 'factor : NUMBER' - p[0] = p[1] - -def p_factor_expr(p): - 'factor : LPAREN expression RPAREN' - p[0] = p[2] - -# Error rule for syntax errors -def p_error(p): - print "Syntax error in input!" - -# Build the parser -yacc.yacc() - -# Use this if you want to build the parser using SLR instead of LALR -# yacc.yacc(method="SLR") - -while 1: - try: - s = raw_input('calc > ') - except EOFError: - break - if not s: continue - result = yacc.parse(s) - print result -</pre> -</blockquote> - -In this example, each grammar rule is defined by a Python function where the docstring to that function contains the -appropriate context-free grammar specification. Each function accepts a single -argument <tt>p</tt> that is a sequence containing the values of each grammar symbol in the corresponding rule. The values of -<tt>p[i]</tt> are mapped to grammar symbols as shown here: - -<blockquote> -<pre> -def p_expression_plus(p): - 'expression : expression PLUS term' - # ^ ^ ^ ^ - # p[0] p[1] p[2] p[3] - - p[0] = p[1] + p[3] -</pre> -</blockquote> - -For tokens, the "value" of the corresponding <tt>p[i]</tt> is the -<em>same</em> as the <tt>p.value</tt> attribute assigned -in the lexer module. For non-terminals, the value is determined by -whatever is placed in <tt>p[0]</tt> when rules are reduced. This -value can be anything at all. However, it probably most common for -the value to be a simple Python type, a tuple, or an instance. In this example, we -are relying on the fact that the <tt>NUMBER</tt> token stores an integer value in its value -field. All of the other rules simply perform various types of integer operations and store -the result. - -<P> -Note: The use of negative indices have a special meaning in yacc---specially <tt>p[-1]</tt> does -not have the same value as <tt>p[3]</tt> in this example. Please see the section on "Embedded Actions" for further -details. - -<p> -The first rule defined in the yacc specification determines the starting grammar -symbol (in this case, a rule for <tt>expression</tt> appears first). Whenever -the starting rule is reduced by the parser and no more input is available, parsing -stops and the final value is returned (this value will be whatever the top-most rule -placed in <tt>p[0]</tt>). Note: an alternative starting symbol can be specified using the <tt>start</tt> keyword argument to -<tt>yacc()</tt>. - -<p>The <tt>p_error(p)</tt> rule is defined to catch syntax errors. See the error handling section -below for more detail. - -<p> -To build the parser, call the <tt>yacc.yacc()</tt> function. This function -looks at the module and attempts to construct all of the LR parsing tables for the grammar -you have specified. The first time <tt>yacc.yacc()</tt> is invoked, you will get a message -such as this: - -<blockquote> -<pre> -$ python calcparse.py -yacc: Generating LALR parsing table... -calc > -</pre> -</blockquote> - -Since table construction is relatively expensive (especially for large -grammars), the resulting parsing table is written to the current -directory in a file called <tt>parsetab.py</tt>. In addition, a -debugging file called <tt>parser.out</tt> is created. On subsequent -executions, <tt>yacc</tt> will reload the table from -<tt>parsetab.py</tt> unless it has detected a change in the underlying -grammar (in which case the tables and <tt>parsetab.py</tt> file are -regenerated). Note: The names of parser output files can be changed if necessary. See the notes that follow later. - -<p> -If any errors are detected in your grammar specification, <tt>yacc.py</tt> will produce -diagnostic messages and possibly raise an exception. Some of the errors that can be detected include: - -<ul> -<li>Duplicated function names (if more than one rule function have the same name in the grammar file). -<li>Shift/reduce and reduce/reduce conflicts generated by ambiguous grammars. -<li>Badly specified grammar rules. -<li>Infinite recursion (rules that can never terminate). -<li>Unused rules and tokens -<li>Undefined rules and tokens -</ul> - -The next few sections now discuss a few finer points of grammar construction. - -<H3><a name="ply_nn25"></a>5.2 Combining Grammar Rule Functions</H3> - - -When grammar rules are similar, they can be combined into a single function. -For example, consider the two rules in our earlier example: - -<blockquote> -<pre> -def p_expression_plus(p): - 'expression : expression PLUS term' - p[0] = p[1] + p[3] - -def p_expression_minus(t): - 'expression : expression MINUS term' - p[0] = p[1] - p[3] -</pre> -</blockquote> - -Instead of writing two functions, you might write a single function like this: - -<blockquote> -<pre> -def p_expression(p): - '''expression : expression PLUS term - | expression MINUS term''' - if p[2] == '+': - p[0] = p[1] + p[3] - elif p[2] == '-': - p[0] = p[1] - p[3] -</pre> -</blockquote> - -In general, the doc string for any given function can contain multiple grammar rules. So, it would -have also been legal (although possibly confusing) to write this: - -<blockquote> -<pre> -def p_binary_operators(p): - '''expression : expression PLUS term - | expression MINUS term - term : term TIMES factor - | term DIVIDE factor''' - if p[2] == '+': - p[0] = p[1] + p[3] - elif p[2] == '-': - p[0] = p[1] - p[3] - elif p[2] == '*': - p[0] = p[1] * p[3] - elif p[2] == '/': - p[0] = p[1] / p[3] -</pre> -</blockquote> - -When combining grammar rules into a single function, it is usually a good idea for all of the rules to have -a similar structure (e.g., the same number of terms). Otherwise, the corresponding action code may be more -complicated than necessary. However, it is possible to handle simple cases using len(). For example: - -<blockquote> -<pre> -def p_expressions(p): - '''expression : expression MINUS expression - | MINUS expression''' - if (len(p) == 4): - p[0] = p[1] - p[3] - elif (len(p) == 3): - p[0] = -p[2] -</pre> -</blockquote> - -<H3><a name="ply_nn26"></a>5.3 Character Literals</H3> - - -If desired, a grammar may contain tokens defined as single character literals. For example: - -<blockquote> -<pre> -def p_binary_operators(p): - '''expression : expression '+' term - | expression '-' term - term : term '*' factor - | term '/' factor''' - if p[2] == '+': - p[0] = p[1] + p[3] - elif p[2] == '-': - p[0] = p[1] - p[3] - elif p[2] == '*': - p[0] = p[1] * p[3] - elif p[2] == '/': - p[0] = p[1] / p[3] -</pre> -</blockquote> - -A character literal must be enclosed in quotes such as <tt>'+'</tt>. In addition, if literals are used, they must be declared in the -corresponding <tt>lex</tt> file through the use of a special <tt>literals</tt> declaration. - -<blockquote> -<pre> -# Literals. Should be placed in module given to lex() -literals = ['+','-','*','/' ] -</pre> -</blockquote> - -<b>Character literals are limited to a single character</b>. Thus, it is not legal to specify literals such as <tt>'<='</tt> or <tt>'=='</tt>. For this, use -the normal lexing rules (e.g., define a rule such as <tt>t_EQ = r'=='</tt>). - -<H3><a name="ply_nn26"></a>5.4 Empty Productions</H3> - - -<tt>yacc.py</tt> can handle empty productions by defining a rule like this: - -<blockquote> -<pre> -def p_empty(p): - 'empty :' - pass -</pre> -</blockquote> - -Now to use the empty production, simply use 'empty' as a symbol. For example: - -<blockquote> -<pre> -def p_optitem(p): - 'optitem : item' - ' | empty' - ... -</pre> -</blockquote> - -Note: You can write empty rules anywhere by simply specifying an empty right hand side. However, I personally find that -writing an "empty" rule and using "empty" to denote an empty production is easier to read. - -<H3><a name="ply_nn28"></a>5.5 Changing the starting symbol</H3> - - -Normally, the first rule found in a yacc specification defines the starting grammar rule (top level rule). To change this, simply -supply a <tt>start</tt> specifier in your file. For example: - -<blockquote> -<pre> -start = 'foo' - -def p_bar(p): - 'bar : A B' - -# This is the starting rule due to the start specifier above -def p_foo(p): - 'foo : bar X' -... -</pre> -</blockquote> - -The use of a <tt>start</tt> specifier may be useful during debugging since you can use it to have yacc build a subset of -a larger grammar. For this purpose, it is also possible to specify a starting symbol as an argument to <tt>yacc()</tt>. For example: - -<blockquote> -<pre> -yacc.yacc(start='foo') -</pre> -</blockquote> - -<H3><a name="ply_nn27"></a>5.6 Dealing With Ambiguous Grammars</H3> - - -The expression grammar given in the earlier example has been written in a special format to eliminate ambiguity. -However, in many situations, it is extremely difficult or awkward to write grammars in this format. A -much more natural way to express the grammar is in a more compact form like this: - -<blockquote> -<pre> -expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression - | LPAREN expression RPAREN - | NUMBER -</pre> -</blockquote> - -Unfortunately, this grammar specification is ambiguous. For example, if you are parsing the string -"3 * 4 + 5", there is no way to tell how the operators are supposed to be grouped. -For example, does the expression mean "(3 * 4) + 5" or is it "3 * (4+5)"? - -<p> -When an ambiguous grammar is given to <tt>yacc.py</tt> it will print messages about "shift/reduce conflicts" -or a "reduce/reduce conflicts". A shift/reduce conflict is caused when the parser generator can't decide -whether or not to reduce a rule or shift a symbol on the parsing stack. For example, consider -the string "3 * 4 + 5" and the internal parsing stack: - -<blockquote> -<pre> -Step Symbol Stack Input Tokens Action ----- --------------------- --------------------- ------------------------------- -1 $ 3 * 4 + 5$ Shift 3 -2 $ 3 * 4 + 5$ Reduce : expression : NUMBER -3 $ expr * 4 + 5$ Shift * -4 $ expr * 4 + 5$ Shift 4 -5 $ expr * 4 + 5$ Reduce: expression : NUMBER -6 $ expr * expr + 5$ SHIFT/REDUCE CONFLICT ???? -</pre> -</blockquote> - -In this case, when the parser reaches step 6, it has two options. One is to reduce the -rule <tt>expr : expr * expr</tt> on the stack. The other option is to shift the -token <tt>+</tt> on the stack. Both options are perfectly legal from the rules -of the context-free-grammar. - -<p> -By default, all shift/reduce conflicts are resolved in favor of shifting. Therefore, in the above -example, the parser will always shift the <tt>+</tt> instead of reducing. Although this -strategy works in many cases (including the ambiguous if-then-else), it is not enough for arithmetic -expressions. In fact, in the above example, the decision to shift <tt>+</tt> is completely wrong---we should have -reduced <tt>expr * expr</tt> since multiplication has higher mathematical precedence than addition. - -<p>To resolve ambiguity, especially in expression grammars, <tt>yacc.py</tt> allows individual -tokens to be assigned a precedence level and associativity. This is done by adding a variable -<tt>precedence</tt> to the grammar file like this: - -<blockquote> -<pre> -precedence = ( - ('left', 'PLUS', 'MINUS'), - ('left', 'TIMES', 'DIVIDE'), -) -</pre> -</blockquote> - -This declaration specifies that <tt>PLUS</tt>/<tt>MINUS</tt> have -the same precedence level and are left-associative and that -<tt>TIMES</tt>/<tt>DIVIDE</tt> have the same precedence and are left-associative. -Within the <tt>precedence</tt> declaration, tokens are ordered from lowest to highest precedence. Thus, -this declaration specifies that <tt>TIMES</tt>/<tt>DIVIDE</tt> have higher -precedence than <tt>PLUS</tt>/<tt>MINUS</tt> (since they appear later in the -precedence specification). - -<p> -The precedence specification works by associating a numerical precedence level value and associativity direction to -the listed tokens. For example, in the above example you get: - -<blockquote> -<pre> -PLUS : level = 1, assoc = 'left' -MINUS : level = 1, assoc = 'left' -TIMES : level = 2, assoc = 'left' -DIVIDE : level = 2, assoc = 'left' -</pre> -</blockquote> - -These values are then used to attach a numerical precedence value and associativity direction -to each grammar rule. <em>This is always determined by looking at the precedence of the right-most terminal symbol.</em> -For example: - -<blockquote> -<pre> -expression : expression PLUS expression # level = 1, left - | expression MINUS expression # level = 1, left - | expression TIMES expression # level = 2, left - | expression DIVIDE expression # level = 2, left - | LPAREN expression RPAREN # level = None (not specified) - | NUMBER # level = None (not specified) -</pre> -</blockquote> - -When shift/reduce conflicts are encountered, the parser generator resolves the conflict by -looking at the precedence rules and associativity specifiers. - -<p> -<ol> -<li>If the current token has higher precedence, it is shifted. -<li>If the grammar rule on the stack has higher precedence, the rule is reduced. -<li>If the current token and the grammar rule have the same precedence, the -rule is reduced for left associativity, whereas the token is shifted for right associativity. -<li>If nothing is known about the precedence, shift/reduce conflicts are resolved in -favor of shifting (the default). -</ol> - -For example, if "expression PLUS expression" has been parsed and the next token -is "TIMES", the action is going to be a shift because "TIMES" has a higher precedence level than "PLUS". On the other -hand, if "expression TIMES expression" has been parsed and the next token is "PLUS", the action -is going to be reduce because "PLUS" has a lower precedence than "TIMES." - -<p> -When shift/reduce conflicts are resolved using the first three techniques (with the help of -precedence rules), <tt>yacc.py</tt> will report no errors or conflicts in the grammar. - -<p> -One problem with the precedence specifier technique is that it is sometimes necessary to -change the precedence of an operator in certain contents. For example, consider a unary-minus operator -in "3 + 4 * -5". Normally, unary minus has a very high precedence--being evaluated before the multiply. -However, in our precedence specifier, MINUS has a lower precedence than TIMES. To deal with this, -precedence rules can be given for fictitious tokens like this: - -<blockquote> -<pre> -precedence = ( - ('left', 'PLUS', 'MINUS'), - ('left', 'TIMES', 'DIVIDE'), - ('right', 'UMINUS'), # Unary minus operator -) -</pre> -</blockquote> - -Now, in the grammar file, we can write our unary minus rule like this: - -<blockquote> -<pre> -def p_expr_uminus(p): - 'expression : MINUS expression %prec UMINUS' - p[0] = -p[2] -</pre> -</blockquote> - -In this case, <tt>%prec UMINUS</tt> overrides the default rule precedence--setting it to that -of UMINUS in the precedence specifier. - -<p> -At first, the use of UMINUS in this example may appear very confusing. -UMINUS is not an input token or a grammer rule. Instead, you should -think of it as the name of a special marker in the precedence table. When you use the <tt>%prec</tt> qualifier, you're simply -telling yacc that you want the precedence of the expression to be the same as for this special marker instead of the usual precedence. - -<p> -It is also possible to specify non-associativity in the <tt>precedence</tt> table. This would -be used when you <em>don't</em> want operations to chain together. For example, suppose -you wanted to support comparison operators like <tt><</tt> and <tt>></tt> but you didn't want to allow -combinations like <tt>a < b < c</tt>. To do this, simply specify a rule like this: - -<blockquote> -<pre> -precedence = ( - ('nonassoc', 'LESSTHAN', 'GREATERTHAN'), # Nonassociative operators - ('left', 'PLUS', 'MINUS'), - ('left', 'TIMES', 'DIVIDE'), - ('right', 'UMINUS'), # Unary minus operator -) -</pre> -</blockquote> - -<p> -If you do this, the occurrence of input text such as <tt> a < b < c</tt> will result in a syntax error. However, simple -expressions such as <tt>a < b</tt> will still be fine. - -<p> -Reduce/reduce conflicts are caused when there are multiple grammar -rules that can be applied to a given set of symbols. This kind of -conflict is almost always bad and is always resolved by picking the -rule that appears first in the grammar file. Reduce/reduce conflicts -are almost always caused when different sets of grammar rules somehow -generate the same set of symbols. For example: - -<blockquote> -<pre> -assignment : ID EQUALS NUMBER - | ID EQUALS expression - -expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression - | LPAREN expression RPAREN - | NUMBER -</pre> -</blockquote> - -In this case, a reduce/reduce conflict exists between these two rules: - -<blockquote> -<pre> -assignment : ID EQUALS NUMBER -expression : NUMBER -</pre> -</blockquote> - -For example, if you wrote "a = 5", the parser can't figure out if this -is supposed to be reduced as <tt>assignment : ID EQUALS NUMBER</tt> or -whether it's supposed to reduce the 5 as an expression and then reduce -the rule <tt>assignment : ID EQUALS expression</tt>. - -<p> -It should be noted that reduce/reduce conflicts are notoriously difficult to spot -simply looking at the input grammer. To locate these, it is usually easier to look at the -<tt>parser.out</tt> debugging file with an appropriately high level of caffeination. - -<H3><a name="ply_nn28"></a>5.7 The parser.out file</H3> - - -Tracking down shift/reduce and reduce/reduce conflicts is one of the finer pleasures of using an LR -parsing algorithm. To assist in debugging, <tt>yacc.py</tt> creates a debugging file called -'parser.out' when it generates the parsing table. The contents of this file look like the following: - -<blockquote> -<pre> -Unused terminals: - - -Grammar - -Rule 1 expression -> expression PLUS expression -Rule 2 expression -> expression MINUS expression -Rule 3 expression -> expression TIMES expression -Rule 4 expression -> expression DIVIDE expression -Rule 5 expression -> NUMBER -Rule 6 expression -> LPAREN expression RPAREN - -Terminals, with rules where they appear - -TIMES : 3 -error : -MINUS : 2 -RPAREN : 6 -LPAREN : 6 -DIVIDE : 4 -PLUS : 1 -NUMBER : 5 - -Nonterminals, with rules where they appear - -expression : 1 1 2 2 3 3 4 4 6 0 - - -Parsing method: LALR - - -state 0 - - S' -> . expression - expression -> . expression PLUS expression - expression -> . expression MINUS expression - expression -> . expression TIMES expression - expression -> . expression DIVIDE expression - expression -> . NUMBER - expression -> . LPAREN expression RPAREN - - NUMBER shift and go to state 3 - LPAREN shift and go to state 2 - - -state 1 - - S' -> expression . - expression -> expression . PLUS expression - expression -> expression . MINUS expression - expression -> expression . TIMES expression - expression -> expression . DIVIDE expression - - PLUS shift and go to state 6 - MINUS shift and go to state 5 - TIMES shift and go to state 4 - DIVIDE shift and go to state 7 - - -state 2 - - expression -> LPAREN . expression RPAREN - expression -> . expression PLUS expression - expression -> . expression MINUS expression - expression -> . expression TIMES expression - expression -> . expression DIVIDE expression - expression -> . NUMBER - expression -> . LPAREN expression RPAREN - - NUMBER shift and go to state 3 - LPAREN shift and go to state 2 - - -state 3 - - expression -> NUMBER . - - $ reduce using rule 5 - PLUS reduce using rule 5 - MINUS reduce using rule 5 - TIMES reduce using rule 5 - DIVIDE reduce using rule 5 - RPAREN reduce using rule 5 - - -state 4 - - expression -> expression TIMES . expression - expression -> . expression PLUS expression - expression -> . expression MINUS expression - expression -> . expression TIMES expression - expression -> . expression DIVIDE expression - expression -> . NUMBER - expression -> . LPAREN expression RPAREN - - NUMBER shift and go to state 3 - LPAREN shift and go to state 2 - - -state 5 - - expression -> expression MINUS . expression - expression -> . expression PLUS expression - expression -> . expression MINUS expression - expression -> . expression TIMES expression - expression -> . expression DIVIDE expression - expression -> . NUMBER - expression -> . LPAREN expression RPAREN - - NUMBER shift and go to state 3 - LPAREN shift and go to state 2 - - -state 6 - - expression -> expression PLUS . expression - expression -> . expression PLUS expression - expression -> . expression MINUS expression - expression -> . expression TIMES expression - expression -> . expression DIVIDE expression - expression -> . NUMBER - expression -> . LPAREN expression RPAREN - - NUMBER shift and go to state 3 - LPAREN shift and go to state 2 - - -state 7 - - expression -> expression DIVIDE . expression - expression -> . expression PLUS expression - expression -> . expression MINUS expression - expression -> . expression TIMES expression - expression -> . expression DIVIDE expression - expression -> . NUMBER - expression -> . LPAREN expression RPAREN - - NUMBER shift and go to state 3 - LPAREN shift and go to state 2 - - -state 8 - - expression -> LPAREN expression . RPAREN - expression -> expression . PLUS expression - expression -> expression . MINUS expression - expression -> expression . TIMES expression - expression -> expression . DIVIDE expression - - RPAREN shift and go to state 13 - PLUS shift and go to state 6 - MINUS shift and go to state 5 - TIMES shift and go to state 4 - DIVIDE shift and go to state 7 - - -state 9 - - expression -> expression TIMES expression . - expression -> expression . PLUS expression - expression -> expression . MINUS expression - expression -> expression . TIMES expression - expression -> expression . DIVIDE expression - - $ reduce using rule 3 - PLUS reduce using rule 3 - MINUS reduce using rule 3 - TIMES reduce using rule 3 - DIVIDE reduce using rule 3 - RPAREN reduce using rule 3 - - ! PLUS [ shift and go to state 6 ] - ! MINUS [ shift and go to state 5 ] - ! TIMES [ shift and go to state 4 ] - ! DIVIDE [ shift and go to state 7 ] - -state 10 - - expression -> expression MINUS expression . - expression -> expression . PLUS expression - expression -> expression . MINUS expression - expression -> expression . TIMES expression - expression -> expression . DIVIDE expression - - $ reduce using rule 2 - PLUS reduce using rule 2 - MINUS reduce using rule 2 - RPAREN reduce using rule 2 - TIMES shift and go to state 4 - DIVIDE shift and go to state 7 - - ! TIMES [ reduce using rule 2 ] - ! DIVIDE [ reduce using rule 2 ] - ! PLUS [ shift and go to state 6 ] - ! MINUS [ shift and go to state 5 ] - -state 11 - - expression -> expression PLUS expression . - expression -> expression . PLUS expression - expression -> expression . MINUS expression - expression -> expression . TIMES expression - expression -> expression . DIVIDE expression - - $ reduce using rule 1 - PLUS reduce using rule 1 - MINUS reduce using rule 1 - RPAREN reduce using rule 1 - TIMES shift and go to state 4 - DIVIDE shift and go to state 7 - - ! TIMES [ reduce using rule 1 ] - ! DIVIDE [ reduce using rule 1 ] - ! PLUS [ shift and go to state 6 ] - ! MINUS [ shift and go to state 5 ] - -state 12 - - expression -> expression DIVIDE expression . - expression -> expression . PLUS expression - expression -> expression . MINUS expression - expression -> expression . TIMES expression - expression -> expression . DIVIDE expression - - $ reduce using rule 4 - PLUS reduce using rule 4 - MINUS reduce using rule 4 - TIMES reduce using rule 4 - DIVIDE reduce using rule 4 - RPAREN reduce using rule 4 - - ! PLUS [ shift and go to state 6 ] - ! MINUS [ shift and go to state 5 ] - ! TIMES [ shift and go to state 4 ] - ! DIVIDE [ shift and go to state 7 ] - -state 13 - - expression -> LPAREN expression RPAREN . - - $ reduce using rule 6 - PLUS reduce using rule 6 - MINUS reduce using rule 6 - TIMES reduce using rule 6 - DIVIDE reduce using rule 6 - RPAREN reduce using rule 6 -</pre> -</blockquote> - -In the file, each state of the grammar is described. Within each state the "." indicates the current -location of the parse within any applicable grammar rules. In addition, the actions for each valid -input token are listed. When a shift/reduce or reduce/reduce conflict arises, rules <em>not</em> selected -are prefixed with an !. For example: - -<blockquote> -<pre> - ! TIMES [ reduce using rule 2 ] - ! DIVIDE [ reduce using rule 2 ] - ! PLUS [ shift and go to state 6 ] - ! MINUS [ shift and go to state 5 ] -</pre> -</blockquote> - -By looking at these rules (and with a little practice), you can usually track down the source -of most parsing conflicts. It should also be stressed that not all shift-reduce conflicts are -bad. However, the only way to be sure that they are resolved correctly is to look at <tt>parser.out</tt>. - -<H3><a name="ply_nn29"></a>5.8 Syntax Error Handling</H3> - - -When a syntax error occurs during parsing, the error is immediately -detected (i.e., the parser does not read any more tokens beyond the -source of the error). Error recovery in LR parsers is a delicate -topic that involves ancient rituals and black-magic. The recovery mechanism -provided by <tt>yacc.py</tt> is comparable to Unix yacc so you may want -consult a book like O'Reilly's "Lex and Yacc" for some of the finer details. - -<p> -When a syntax error occurs, <tt>yacc.py</tt> performs the following steps: - -<ol> -<li>On the first occurrence of an error, the user-defined <tt>p_error()</tt> function -is called with the offending token as an argument. Afterwards, the parser enters -an "error-recovery" mode in which it will not make future calls to <tt>p_error()</tt> until it -has successfully shifted at least 3 tokens onto the parsing stack. - -<p> -<li>If no recovery action is taken in <tt>p_error()</tt>, the offending lookahead token is replaced -with a special <tt>error</tt> token. - -<p> -<li>If the offending lookahead token is already set to <tt>error</tt>, the top item of the parsing stack is -deleted. - -<p> -<li>If the entire parsing stack is unwound, the parser enters a restart state and attempts to start -parsing from its initial state. - -<p> -<li>If a grammar rule accepts <tt>error</tt> as a token, it will be -shifted onto the parsing stack. - -<p> -<li>If the top item of the parsing stack is <tt>error</tt>, lookahead tokens will be discarded until the -parser can successfully shift a new symbol or reduce a rule involving <tt>error</tt>. -</ol> - -<H4><a name="ply_nn30"></a>5.8.1 Recovery and resynchronization with error rules</H4> - - -The most well-behaved approach for handling syntax errors is to write grammar rules that include the <tt>error</tt> -token. For example, suppose your language had a grammar rule for a print statement like this: - -<blockquote> -<pre> -def p_statement_print(p): - 'statement : PRINT expr SEMI' - ... -</pre> -</blockquote> - -To account for the possibility of a bad expression, you might write an additional grammar rule like this: - -<blockquote> -<pre> -def p_statement_print_error(p): - 'statement : PRINT error SEMI' - print "Syntax error in print statement. Bad expression" - -</pre> -</blockquote> - -In this case, the <tt>error</tt> token will match any sequence of -tokens that might appear up to the first semicolon that is -encountered. Once the semicolon is reached, the rule will be -invoked and the <tt>error</tt> token will go away. - -<p> -This type of recovery is sometimes known as parser resynchronization. -The <tt>error</tt> token acts as a wildcard for any bad input text and -the token immediately following <tt>error</tt> acts as a -synchronization token. - -<p> -It is important to note that the <tt>error</tt> token usually does not appear as the last token -on the right in an error rule. For example: - -<blockquote> -<pre> -def p_statement_print_error(p): - 'statement : PRINT error' - print "Syntax error in print statement. Bad expression" -</pre> -</blockquote> - -This is because the first bad token encountered will cause the rule to -be reduced--which may make it difficult to recover if more bad tokens -immediately follow. - -<H4><a name="ply_nn31"></a>5.8.2 Panic mode recovery</H4> - - -An alternative error recovery scheme is to enter a panic mode recovery in which tokens are -discarded to a point where the parser might be able to recover in some sensible manner. - -<p> -Panic mode recovery is implemented entirely in the <tt>p_error()</tt> function. For example, this -function starts discarding tokens until it reaches a closing '}'. Then, it restarts the -parser in its initial state. - -<blockquote> -<pre> -def p_error(p): - print "Whoa. You are seriously hosed." - # Read ahead looking for a closing '}' - while 1: - tok = yacc.token() # Get the next token - if not tok or tok.type == 'RBRACE': break - yacc.restart() -</pre> -</blockquote> - -<p> -This function simply discards the bad token and tells the parser that the error was ok. - -<blockquote> -<pre> -def p_error(p): - print "Syntax error at token", p.type - # Just discard the token and tell the parser it's okay. - yacc.errok() -</pre> -</blockquote> - -<P> -Within the <tt>p_error()</tt> function, three functions are available to control the behavior -of the parser: -<p> -<ul> -<li><tt>yacc.errok()</tt>. This resets the parser state so it doesn't think it's in error-recovery -mode. This will prevent an <tt>error</tt> token from being generated and will reset the internal -error counters so that the next syntax error will call <tt>p_error()</tt> again. - -<p> -<li><tt>yacc.token()</tt>. This returns the next token on the input stream. - -<p> -<li><tt>yacc.restart()</tt>. This discards the entire parsing stack and resets the parser -to its initial state. -</ul> - -Note: these functions are only available when invoking <tt>p_error()</tt> and are not available -at any other time. - -<p> -To supply the next lookahead token to the parser, <tt>p_error()</tt> can return a token. This might be -useful if trying to synchronize on special characters. For example: - -<blockquote> -<pre> -def p_error(p): - # Read ahead looking for a terminating ";" - while 1: - tok = yacc.token() # Get the next token - if not tok or tok.type == 'SEMI': break - yacc.errok() - - # Return SEMI to the parser as the next lookahead token - return tok -</pre> -</blockquote> - -<H4><a name="ply_nn32"></a>5.8.3 General comments on error handling</H4> - - -For normal types of languages, error recovery with error rules and resynchronization characters is probably the most reliable -technique. This is because you can instrument the grammar to catch errors at selected places where it is relatively easy -to recover and continue parsing. Panic mode recovery is really only useful in certain specialized applications where you might want -to discard huge portions of the input text to find a valid restart point. - -<H3><a name="ply_nn33"></a>5.9 Line Number and Position Tracking</H3> - - -<tt>yacc.py</tt> automatically tracks line numbers and positions for all of the grammar symbols and tokens it processes. To retrieve the line -numbers, two functions are used in grammar rules: - -<ul> -<li><tt>p.lineno(num)</tt>. Return the starting line number for symbol <em>num</em> -<li><tt>p.linespan(num)</tt>. Return a tuple (startline,endline) with the starting and ending line number for symbol <em>num</em>. -</ul> - -For example: - -<blockquote> -<pre> -def p_expression(p): - 'expression : expression PLUS expression' - p.lineno(1) # Line number of the left expression - p.lineno(2) # line number of the PLUS operator - p.lineno(3) # line number of the right expression - ... - start,end = p.linespan(3) # Start,end lines of the right expression - -</pre> -</blockquote> - -Since line numbers are managed internally by the parser, there is usually no need to modify the line -numbers. However, if you want to save the line numbers in a parse-tree node, you will need to make your own -private copy. - -<p> -To get positional information about where tokens were lexed, the following two functions are used: - -<ul> -<li><tt>p.lexpos(num)</tt>. Return the starting lexing position for symbol <em>num</em> -<li><tt>p.lexspan(num)</tt>. Return a tuple (start,end) with the starting and ending positions for symbol <em>num</em>. -</ul> - -For example: - -<blockquote> -<pre> -def p_expression(p): - 'expression : expression PLUS expression' - p.lexpos(1) # Lexing position of the left expression - p.lexpos(2) # Lexing position of the PLUS operator - p.lexpos(3) # Lexing position of the right expression - ... - start,end = p.lexspan(3) # Start,end positions of the right expression -</pre> -</blockquote> - -Note: The <tt>lexspan()</tt> function only returns the range of values up the start of the last grammar symbol. - -<H3><a name="ply_nn34"></a>5.10 AST Construction</H3> - - -<tt>yacc.py</tt> provides no special functions for constructing an abstract syntax tree. However, such -construction is easy enough to do on your own. Simply create a data structure for abstract syntax tree nodes -and assign nodes to <tt>p[0]</tt> in each rule. - -For example: - -<blockquote> -<pre> -class Expr: pass - -class BinOp(Expr): - def __init__(self,left,op,right): - self.type = "binop" - self.left = left - self.right = right - self.op = op - -class Number(Expr): - def __init__(self,value): - self.type = "number" - self.value = value - -def p_expression_binop(p): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - - p[0] = BinOp(p[1],p[2],p[3]) - -def p_expression_group(p): - 'expression : LPAREN expression RPAREN' - p[0] = p[2] - -def p_expression_number(p): - 'expression : NUMBER' - p[0] = Number(p[1]) -</pre> -</blockquote> - -To simplify tree traversal, it may make sense to pick a very generic tree structure for your parse tree nodes. -For example: - -<blockquote> -<pre> -class Node: - def __init__(self,type,children=None,leaf=None): - self.type = type - if children: - self.children = children - else: - self.children = [ ] - self.leaf = leaf - -def p_expression_binop(p): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - - p[0] = Node("binop", [p[1],p[3]], p[2]) -</pre> -</blockquote> - -<H3><a name="ply_nn35"></a>5.11 Embedded Actions</H3> - - -The parsing technique used by yacc only allows actions to be executed at the end of a rule. For example, -suppose you have a rule like this: - -<blockquote> -<pre> -def p_foo(p): - "foo : A B C D" - print "Parsed a foo", p[1],p[2],p[3],p[4] -</pre> -</blockquote> - -<p> -In this case, the supplied action code only executes after all of the -symbols <tt>A</tt>, <tt>B</tt>, <tt>C</tt>, and <tt>D</tt> have been -parsed. Sometimes, however, it is useful to execute small code -fragments during intermediate stages of parsing. For example, suppose -you wanted to perform some action immediately after <tt>A</tt> has -been parsed. To do this, you can write a empty rule like this: - -<blockquote> -<pre> -def p_foo(p): - "foo : A seen_A B C D" - print "Parsed a foo", p[1],p[3],p[4],p[5] - print "seen_A returned", p[2] - -def p_seen_A(p): - "seen_A :" - print "Saw an A = ", p[-1] # Access grammar symbol to left - p[0] = some_value # Assign value to seen_A - -</pre> -</blockquote> - -<p> -In this example, the empty <tt>seen_A</tt> rule executes immediately -after <tt>A</tt> is shifted onto the parsing stack. Within this -rule, <tt>p[-1]</tt> refers to the symbol on the stack that appears -immediately to the left of the <tt>seen_A</tt> symbol. In this case, -it would be the value of <tt>A</tt> in the <tt>foo</tt> rule -immediately above. Like other rules, a value can be returned from an -embedded action by simply assigning it to <tt>p[0]</tt> - -<p> -The use of embedded actions can sometimes introduce extra shift/reduce conflicts. For example, -this grammar has no conflicts: - -<blockquote> -<pre> -def p_foo(p): - """foo : abcd - | abcx""" - -def p_abcd(p): - "abcd : A B C D" - -def p_abcx(p): - "abcx : A B C X" -</pre> -</blockquote> - -However, if you insert an embedded action into one of the rules like this, - -<blockquote> -<pre> -def p_foo(p): - """foo : abcd - | abcx""" - -def p_abcd(p): - "abcd : A B C D" - -def p_abcx(p): - "abcx : A B seen_AB C X" - -def p_seen_AB(p): - "seen_AB :" -</pre> -</blockquote> - -an extra shift-reduce conflict will be introduced. This conflict is caused by the fact that the same symbol <tt>C</tt> appears next in -both the <tt>abcd</tt> and <tt>abcx</tt> rules. The parser can either shift the symbol (<tt>abcd</tt> rule) or reduce the empty rule <tt>seen_AB</tt> (<tt>abcx</tt> rule). - -<p> -A common use of embedded rules is to control other aspects of parsing -such as scoping of local variables. For example, if you were parsing C code, you might -write code like this: - -<blockquote> -<pre> -def p_statements_block(p): - "statements: LBRACE new_scope statements RBRACE""" - # Action code - ... - pop_scope() # Return to previous scope - -def p_new_scope(p): - "new_scope :" - # Create a new scope for local variables - s = new_scope() - push_scope(s) - ... -</pre> -</blockquote> - -In this case, the embedded action <tt>new_scope</tt> executes immediately after a <tt>LBRACE</tt> (<tt>{</tt>) symbol is parsed. This might -adjust internal symbol tables and other aspects of the parser. Upon completion of the rule <tt>statements_block</tt>, code might undo the operations performed in the embedded action (e.g., <tt>pop_scope()</tt>). - -<H3><a name="ply_nn36"></a>5.12 Yacc implementation notes</H3> - - -<ul> -<li>The default parsing method is LALR. To use SLR instead, run yacc() as follows: - -<blockquote> -<pre> -yacc.yacc(method="SLR") -</pre> -</blockquote> -Note: LALR table generation takes approximately twice as long as SLR table generation. There is no -difference in actual parsing performance---the same code is used in both cases. LALR is preferred when working -with more complicated grammars since it is more powerful. - -<p> - -<li>By default, <tt>yacc.py</tt> relies on <tt>lex.py</tt> for tokenizing. However, an alternative tokenizer -can be supplied as follows: - -<blockquote> -<pre> -yacc.parse(lexer=x) -</pre> -</blockquote> -in this case, <tt>x</tt> must be a Lexer object that minimally has a <tt>x.token()</tt> method for retrieving the next -token. If an input string is given to <tt>yacc.parse()</tt>, the lexer must also have an <tt>x.input()</tt> method. - -<p> -<li>By default, the yacc generates tables in debugging mode (which produces the parser.out file and other output). -To disable this, use - -<blockquote> -<pre> -yacc.yacc(debug=0) -</pre> -</blockquote> - -<p> -<li>To change the name of the <tt>parsetab.py</tt> file, use: - -<blockquote> -<pre> -yacc.yacc(tabmodule="foo") -</pre> -</blockquote> - -<p> -<li>To change the directory in which the <tt>parsetab.py</tt> file (and other output files) are written, use: -<blockquote> -<pre> -yacc.yacc(tabmodule="foo",outputdir="somedirectory") -</pre> -</blockquote> - -<p> -<li>To prevent yacc from generating any kind of parser table file, use: -<blockquote> -<pre> -yacc.yacc(write_tables=0) -</pre> -</blockquote> - -Note: If you disable table generation, yacc() will regenerate the parsing tables -each time it runs (which may take awhile depending on how large your grammar is). - -<P> -<li>To print copious amounts of debugging during parsing, use: - -<blockquote> -<pre> -yacc.parse(debug=1) -</pre> -</blockquote> - -<p> -<li>To redirect the debugging output to a filename of your choosing, use: - -<blockquote> -<pre> -yacc.parse(debug=1, debugfile="debugging.out") -</pre> -</blockquote> - -<p> -<li>The <tt>yacc.yacc()</tt> function really returns a parser object. If you want to support multiple -parsers in the same application, do this: - -<blockquote> -<pre> -p = yacc.yacc() -... -p.parse() -</pre> -</blockquote> - -Note: The function <tt>yacc.parse()</tt> is bound to the last parser that was generated. - -<p> -<li>Since the generation of the LALR tables is relatively expensive, previously generated tables are -cached and reused if possible. The decision to regenerate the tables is determined by taking an MD5 -checksum of all grammar rules and precedence rules. Only in the event of a mismatch are the tables regenerated. - -<p> -It should be noted that table generation is reasonably efficient, even for grammars that involve around a 100 rules -and several hundred states. For more complex languages such as C, table generation may take 30-60 seconds on a slow -machine. Please be patient. - -<p> -<li>Since LR parsing is driven by tables, the performance of the parser is largely independent of the -size of the grammar. The biggest bottlenecks will be the lexer and the complexity of the code in your grammar rules. -</ul> - -<H2><a name="ply_nn37"></a>6. Parser and Lexer State Management</H2> - - -In advanced parsing applications, you may want to have multiple -parsers and lexers. Furthermore, the parser may want to control the -behavior of the lexer in some way. - -<p> -To do this, it is important to note that both the lexer and parser are -actually implemented as objects. These objects are returned by the -<tt>lex()</tt> and <tt>yacc()</tt> functions respectively. For example: - -<blockquote> -<pre> -lexer = lex.lex() # Return lexer object -parser = yacc.yacc() # Return parser object -</pre> -</blockquote> - -To attach the lexer and parser together, make sure you use the <tt>lexer</tt> argumemnt to parse. For example: - -<blockquote> -<pre> -parser.parse(text,lexer=lexer) -</pre> -</blockquote> - -Within lexer and parser rules, these objects are also available. In the lexer, -the "lexer" attribute of a token refers to the lexer object in use. For example: - -<blockquote> -<pre> -def t_NUMBER(t): - r'\d+' - ... - print t.lexer # Show lexer object -</pre> -</blockquote> - -In the parser, the "lexer" and "parser" attributes refer to the lexer -and parser objects respectively. - -<blockquote> -<pre> -def p_expr_plus(p): - 'expr : expr PLUS expr' - ... - print p.parser # Show parser object - print p.lexer # Show lexer object -</pre> -</blockquote> - -If necessary, arbitrary attributes can be attached to the lexer or parser object. -For example, if you wanted to have different parsing modes, you could attach a mode -attribute to the parser object and look at it later. - -<H2><a name="ply_nn38"></a>7. Using Python's Optimized Mode</H2> - - -Because PLY uses information from doc-strings, parsing and lexing -information must be gathered while running the Python interpreter in -normal mode (i.e., not with the -O or -OO options). However, if you -specify optimized mode like this: - -<blockquote> -<pre> -lex.lex(optimize=1) -yacc.yacc(optimize=1) -</pre> -</blockquote> - -then PLY can later be used when Python runs in optimized mode. To make this work, -make sure you first run Python in normal mode. Once the lexing and parsing tables -have been generated the first time, run Python in optimized mode. PLY will use -the tables without the need for doc strings. - -<p> -Beware: running PLY in optimized mode disables a lot of error -checking. You should only do this when your project has stabilized -and you don't need to do any debugging. - -<H2><a name="ply_nn39"></a>8. Where to go from here?</H2> - - -The <tt>examples</tt> directory of the PLY distribution contains several simple examples. Please consult a -compilers textbook for the theory and underlying implementation details or LR parsing. - -</body> -</html> - - - - - - - diff --git a/chall/ply-2.2/example/BASIC/README b/chall/ply-2.2/example/BASIC/README deleted file mode 100644 index be24a30..0000000 --- a/chall/ply-2.2/example/BASIC/README +++ /dev/null @@ -1,79 +0,0 @@ -Inspired by a September 14, 2006 Salon article "Why Johnny Can't Code" by -David Brin (http://www.salon.com/tech/feature/2006/09/14/basic/index.html), -I thought that a fully working BASIC interpreter might be an interesting, -if not questionable, PLY example. Uh, okay, so maybe it's just a bad idea, -but in any case, here it is. - -In this example, you'll find a rough implementation of 1964 Dartmouth BASIC -as described in the manual at: - - http://www.bitsavers.org/pdf/dartmouth/BASIC_Oct64.pdf - -See also: - - http://en.wikipedia.org/wiki/Dartmouth_BASIC - -This dialect is downright primitive---there are no string variables -and no facilities for interactive input. Moreover, subroutines and functions -are brain-dead even more than they usually are for BASIC. Of course, -the GOTO statement is provided. - -Nevertheless, there are a few interesting aspects of this example: - - - It illustrates a fully working interpreter including lexing, parsing, - and interpretation of instructions. - - - The parser shows how to catch and report various kinds of parsing - errors in a more graceful way. - - - The example both parses files (supplied on command line) and - interactive input entered line by line. - - - It shows how you might represent parsed information. In this case, - each BASIC statement is encoded into a Python tuple containing the - statement type and parameters. These tuples are then stored in - a dictionary indexed by program line numbers. - - - Even though it's just BASIC, the parser contains more than 80 - rules and 150 parsing states. Thus, it's a little more meaty than - the calculator example. - -To use the example, run it as follows: - - % python basic.py hello.bas - HELLO WORLD - % - -or use it interactively: - - % python basic.py - [BASIC] 10 PRINT "HELLO WORLD" - [BASIC] 20 END - [BASIC] RUN - HELLO WORLD - [BASIC] - -The following files are defined: - - basic.py - High level script that controls everything - basiclex.py - BASIC tokenizer - basparse.py - BASIC parser - basinterp.py - BASIC interpreter that runs parsed programs. - -In addition, a number of sample BASIC programs (.bas suffix) are -provided. These were taken out of the Dartmouth manual. - -Disclaimer: I haven't spent a ton of time testing this and it's likely that -I've skimped here and there on a few finer details (e.g., strictly enforcing -variable naming rules). However, the interpreter seems to be able to run -the examples in the BASIC manual. - -Have fun! - --Dave - - - - - - diff --git a/chall/ply-2.2/example/BASIC/basic.py b/chall/ply-2.2/example/BASIC/basic.py deleted file mode 100644 index 6a2f489..0000000 --- a/chall/ply-2.2/example/BASIC/basic.py +++ /dev/null @@ -1,68 +0,0 @@ -# An implementation of Dartmouth BASIC (1964) -# - -import sys -sys.path.insert(0,"../..") - -import basiclex -import basparse -import basinterp - -# If a filename has been specified, we try to run it. -# If a runtime error occurs, we bail out and enter -# interactive mode below -if len(sys.argv) == 2: - data = open(sys.argv[1]).read() - prog = basparse.parse(data) - if not prog: raise SystemExit - b = basinterp.BasicInterpreter(prog) - try: - b.run() - raise SystemExit - except RuntimeError: - pass - -else: - b = basinterp.BasicInterpreter({}) - -# Interactive mode. This incrementally adds/deletes statements -# from the program stored in the BasicInterpreter object. In -# addition, special commands 'NEW','LIST',and 'RUN' are added. -# Specifying a line number with no code deletes that line from -# the program. - -while 1: - try: - line = raw_input("[BASIC] ") - except EOFError: - raise SystemExit - if not line: continue - line += "\n" - prog = basparse.parse(line) - if not prog: continue - - keys = prog.keys() - if keys[0] > 0: - b.add_statements(prog) - else: - stat = prog[keys[0]] - if stat[0] == 'RUN': - try: - b.run() - except RuntimeError: - pass - elif stat[0] == 'LIST': - b.list() - elif stat[0] == 'BLANK': - b.del_line(stat[1]) - elif stat[0] == 'NEW': - b.new() - - - - - - - - - diff --git a/chall/ply-2.2/example/BASIC/basiclex.py b/chall/ply-2.2/example/BASIC/basiclex.py deleted file mode 100644 index 463ef9b..0000000 --- a/chall/ply-2.2/example/BASIC/basiclex.py +++ /dev/null @@ -1,74 +0,0 @@ -# An implementation of Dartmouth BASIC (1964) - -from ply import * - -keywords = ( - 'LET','READ','DATA','PRINT','GOTO','IF','THEN','FOR','NEXT','TO','STEP', - 'END','STOP','DEF','GOSUB','DIM','REM','RETURN','RUN','LIST','NEW', -) - -tokens = keywords + ( - 'EQUALS','PLUS','MINUS','TIMES','DIVIDE','POWER', - 'LPAREN','RPAREN','LT','LE','GT','GE','NE', - 'COMMA','SEMI', 'INTEGER','FLOAT', 'STRING', - 'ID','NEWLINE' -) - -t_ignore = ' \t' - -def t_REM(t): - r'REM .*' - return t - -def t_ID(t): - r'[A-Z][A-Z0-9]*' - if t.value in keywords: - t.type = t.value - return t - -t_EQUALS = r'=' -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_POWER = r'\^' -t_DIVIDE = r'/' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_LT = r'<' -t_LE = r'<=' -t_GT = r'>' -t_GE = r'>=' -t_NE = r'<>' -t_COMMA = r'\,' -t_SEMI = r';' -t_INTEGER = r'\d+' -t_FLOAT = r'((\d*\.\d+)(E[\+-]?\d+)?|([1-9]\d*E[\+-]?\d+))' -t_STRING = r'\".*?\"' - -def t_NEWLINE(t): - r'\n' - t.lexer.lineno += 1 - return t - -def t_error(t): - print "Illegal character", t.value[0] - t.lexer.skip(1) - -lex.lex() - - - - - - - - - - - - - - - - - diff --git a/chall/ply-2.2/example/BASIC/basinterp.py b/chall/ply-2.2/example/BASIC/basinterp.py deleted file mode 100644 index 0252aa3..0000000 --- a/chall/ply-2.2/example/BASIC/basinterp.py +++ /dev/null @@ -1,440 +0,0 @@ -# This file provides the runtime support for running a basic program -# Assumes the program has been parsed using basparse.py - -import sys -import math -import random - -class BasicInterpreter: - - # Initialize the interpreter. prog is a dictionary - # containing (line,statement) mappings - def __init__(self,prog): - self.prog = prog - - self.functions = { # Built-in function table - 'SIN' : lambda z: math.sin(self.eval(z)), - 'COS' : lambda z: math.cos(self.eval(z)), - 'TAN' : lambda z: math.tan(self.eval(z)), - 'ATN' : lambda z: math.atan(self.eval(z)), - 'EXP' : lambda z: math.exp(self.eval(z)), - 'ABS' : lambda z: abs(self.eval(z)), - 'LOG' : lambda z: math.log(self.eval(z)), - 'SQR' : lambda z: math.sqrt(self.eval(z)), - 'INT' : lambda z: int(self.eval(z)), - 'RND' : lambda z: random.random() - } - - # Collect all data statements - def collect_data(self): - self.data = [] - for lineno in self.stat: - if self.prog[lineno][0] == 'DATA': - self.data = self.data + self.prog[lineno][1] - self.dc = 0 # Initialize the data counter - - # Check for end statements - def check_end(self): - has_end = 0 - for lineno in self.stat: - if self.prog[lineno][0] == 'END' and not has_end: - has_end = lineno - if not has_end: - print "NO END INSTRUCTION" - self.error = 1 - if has_end != lineno: - print "END IS NOT LAST" - self.error = 1 - - # Check loops - def check_loops(self): - for pc in range(len(self.stat)): - lineno = self.stat[pc] - if self.prog[lineno][0] == 'FOR': - forinst = self.prog[lineno] - loopvar = forinst[1] - for i in range(pc+1,len(self.stat)): - if self.prog[self.stat[i]][0] == 'NEXT': - nextvar = self.prog[self.stat[i]][1] - if nextvar != loopvar: continue - self.loopend[pc] = i - break - else: - print "FOR WITHOUT NEXT AT LINE" % self.stat[pc] - self.error = 1 - - # Evaluate an expression - def eval(self,expr): - etype = expr[0] - if etype == 'NUM': return expr[1] - elif etype == 'GROUP': return self.eval(expr[1]) - elif etype == 'UNARY': - if expr[1] == '-': return -self.eval(expr[2]) - elif etype == 'BINOP': - if expr[1] == '+': return self.eval(expr[2])+self.eval(expr[3]) - elif expr[1] == '-': return self.eval(expr[2])-self.eval(expr[3]) - elif expr[1] == '*': return self.eval(expr[2])*self.eval(expr[3]) - elif expr[1] == '/': return float(self.eval(expr[2]))/self.eval(expr[3]) - elif expr[1] == '^': return abs(self.eval(expr[2]))**self.eval(expr[3]) - elif etype == 'VAR': - var,dim1,dim2 = expr[1] - if not dim1 and not dim2: - if self.vars.has_key(var): - return self.vars[var] - else: - print "UNDEFINED VARIABLE", var, "AT LINE", self.stat[self.pc] - raise RuntimeError - # May be a list lookup or a function evaluation - if dim1 and not dim2: - if self.functions.has_key(var): - # A function - return self.functions[var](dim1) - else: - # A list evaluation - if self.lists.has_key(var): - dim1val = self.eval(dim1) - if dim1val < 1 or dim1val > len(self.lists[var]): - print "LIST INDEX OUT OF BOUNDS AT LINE", self.stat[self.pc] - raise RuntimeError - return self.lists[var][dim1val-1] - if dim1 and dim2: - if self.tables.has_key(var): - dim1val = self.eval(dim1) - dim2val = self.eval(dim2) - if dim1val < 1 or dim1val > len(self.tables[var]) or dim2val < 1 or dim2val > len(self.tables[var][0]): - print "TABLE INDEX OUT OUT BOUNDS AT LINE", self.stat[self.pc] - raise RuntimeError - return self.tables[var][dim1val-1][dim2val-1] - print "UNDEFINED VARIABLE", var, "AT LINE", self.stat[self.pc] - raise RuntimeError - - # Evaluate a relational expression - def releval(self,expr): - etype = expr[1] - lhs = self.eval(expr[2]) - rhs = self.eval(expr[3]) - if etype == '<': - if lhs < rhs: return 1 - else: return 0 - - elif etype == '<=': - if lhs <= rhs: return 1 - else: return 0 - - elif etype == '>': - if lhs > rhs: return 1 - else: return 0 - - elif etype == '>=': - if lhs >= rhs: return 1 - else: return 0 - - elif etype == '=': - if lhs == rhs: return 1 - else: return 0 - - elif etype == '<>': - if lhs != rhs: return 1 - else: return 0 - - # Assignment - def assign(self,target,value): - var, dim1, dim2 = target - if not dim1 and not dim2: - self.vars[var] = self.eval(value) - elif dim1 and not dim2: - # List assignment - dim1val = self.eval(dim1) - if not self.lists.has_key(var): - self.lists[var] = [0]*10 - - if dim1val > len(self.lists[var]): - print "DIMENSION TOO LARGE AT LINE", self.stat[self.pc] - raise RuntimeError - self.lists[var][dim1val-1] = self.eval(value) - elif dim1 and dim2: - dim1val = self.eval(dim1) - dim2val = self.eval(dim2) - if not self.tables.has_key(var): - temp = [0]*10 - v = [] - for i in range(10): v.append(temp[:]) - self.tables[var] = v - # Variable already exists - if dim1val > len(self.tables[var]) or dim2val > len(self.tables[var][0]): - print "DIMENSION TOO LARGE AT LINE", self.stat[self.pc] - raise RuntimeError - self.tables[var][dim1val-1][dim2val-1] = self.eval(value) - - # Change the current line number - def goto(self,linenum): - if not self.prog.has_key(linenum): - print "UNDEFINED LINE NUMBER %d AT LINE %d" % (linenum, self.stat[self.pc]) - raise RuntimeError - self.pc = self.stat.index(linenum) - - # Run it - def run(self): - self.vars = { } # All variables - self.lists = { } # List variables - self.tables = { } # Tables - self.loops = [ ] # Currently active loops - self.loopend= { } # Mapping saying where loops end - self.gosub = None # Gosub return point (if any) - self.error = 0 # Indicates program error - - self.stat = self.prog.keys() # Ordered list of all line numbers - self.stat.sort() - self.pc = 0 # Current program counter - - # Processing prior to running - - self.collect_data() # Collect all of the data statements - self.check_end() - self.check_loops() - - if self.error: raise RuntimeError - - while 1: - line = self.stat[self.pc] - instr = self.prog[line] - - op = instr[0] - - # END and STOP statements - if op == 'END' or op == 'STOP': - break # We're done - - # GOTO statement - elif op == 'GOTO': - newline = instr[1] - self.goto(newline) - continue - - # PRINT statement - elif op == 'PRINT': - plist = instr[1] - out = "" - for label,val in plist: - if out: - out += ' '*(15 - (len(out) % 15)) - out += label - if val: - if label: out += " " - eval = self.eval(val) - out += str(eval) - sys.stdout.write(out) - end = instr[2] - if not (end == ',' or end == ';'): - sys.stdout.write("\n") - if end == ',': sys.stdout.write(" "*(15-(len(out) % 15))) - if end == ';': sys.stdout.write(" "*(3-(len(out) % 3))) - - # LET statement - elif op == 'LET': - target = instr[1] - value = instr[2] - self.assign(target,value) - - # READ statement - elif op == 'READ': - for target in instr[1]: - if self.dc < len(self.data): - value = ('NUM',self.data[self.dc]) - self.assign(target,value) - self.dc += 1 - else: - # No more data. Program ends - return - elif op == 'IF': - relop = instr[1] - newline = instr[2] - if (self.releval(relop)): - self.goto(newline) - continue - - elif op == 'FOR': - loopvar = instr[1] - initval = instr[2] - finval = instr[3] - stepval = instr[4] - - # Check to see if this is a new loop - if not self.loops or self.loops[-1][0] != self.pc: - # Looks like a new loop. Make the initial assignment - newvalue = initval - self.assign((loopvar,None,None),initval) - if not stepval: stepval = ('NUM',1) - stepval = self.eval(stepval) # Evaluate step here - self.loops.append((self.pc,stepval)) - else: - # It's a repeat of the previous loop - # Update the value of the loop variable according to the step - stepval = ('NUM',self.loops[-1][1]) - newvalue = ('BINOP','+',('VAR',(loopvar,None,None)),stepval) - - if self.loops[-1][1] < 0: relop = '>=' - else: relop = '<=' - if not self.releval(('RELOP',relop,newvalue,finval)): - # Loop is done. Jump to the NEXT - self.pc = self.loopend[self.pc] - self.loops.pop() - else: - self.assign((loopvar,None,None),newvalue) - - elif op == 'NEXT': - if not self.loops: - print "NEXT WITHOUT FOR AT LINE",line - return - - nextvar = instr[1] - self.pc = self.loops[-1][0] - loopinst = self.prog[self.stat[self.pc]] - forvar = loopinst[1] - if nextvar != forvar: - print "NEXT DOESN'T MATCH FOR AT LINE", line - return - continue - elif op == 'GOSUB': - newline = instr[1] - if self.gosub: - print "ALREADY IN A SUBROUTINE AT LINE", line - return - self.gosub = self.stat[self.pc] - self.goto(newline) - continue - - elif op == 'RETURN': - if not self.gosub: - print "RETURN WITHOUT A GOSUB AT LINE",line - return - self.goto(self.gosub) - self.gosub = None - - elif op == 'FUNC': - fname = instr[1] - pname = instr[2] - expr = instr[3] - def eval_func(pvalue,name=pname,self=self,expr=expr): - self.assign((pname,None,None),pvalue) - return self.eval(expr) - self.functions[fname] = eval_func - - elif op == 'DIM': - for vname,x,y in instr[1]: - if y == 0: - # Single dimension variable - self.lists[vname] = [0]*x - else: - # Double dimension variable - temp = [0]*y - v = [] - for i in range(x): - v.append(temp[:]) - self.tables[vname] = v - - self.pc += 1 - - # Utility functions for program listing - def expr_str(self,expr): - etype = expr[0] - if etype == 'NUM': return str(expr[1]) - elif etype == 'GROUP': return "(%s)" % self.expr_str(expr[1]) - elif etype == 'UNARY': - if expr[1] == '-': return "-"+str(expr[2]) - elif etype == 'BINOP': - return "%s %s %s" % (self.expr_str(expr[2]),expr[1],self.expr_str(expr[3])) - elif etype == 'VAR': - return self.var_str(expr[1]) - - def relexpr_str(self,expr): - return "%s %s %s" % (self.expr_str(expr[2]),expr[1],self.expr_str(expr[3])) - - def var_str(self,var): - varname,dim1,dim2 = var - if not dim1 and not dim2: return varname - if dim1 and not dim2: return "%s(%s)" % (varname, self.expr_str(dim1)) - return "%s(%s,%s)" % (varname, self.expr_str(dim1),self.expr_str(dim2)) - - # Create a program listing - def list(self): - stat = self.prog.keys() # Ordered list of all line numbers - stat.sort() - for line in stat: - instr = self.prog[line] - op = instr[0] - if op in ['END','STOP','RETURN']: - print line, op - continue - elif op == 'REM': - print line, instr[1] - elif op == 'PRINT': - print line, op, - first = 1 - for p in instr[1]: - if not first: print ",", - if p[0] and p[1]: print '"%s"%s' % (p[0],self.expr_str(p[1])), - elif p[1]: print self.expr_str(p[1]), - else: print '"%s"' % (p[0],), - first = 0 - if instr[2]: print instr[2] - else: print - elif op == 'LET': - print line,"LET",self.var_str(instr[1]),"=",self.expr_str(instr[2]) - elif op == 'READ': - print line,"READ", - first = 1 - for r in instr[1]: - if not first: print ",", - print self.var_str(r), - first = 0 - print "" - elif op == 'IF': - print line,"IF %s THEN %d" % (self.relexpr_str(instr[1]),instr[2]) - elif op == 'GOTO' or op == 'GOSUB': - print line, op, instr[1] - elif op == 'FOR': - print line,"FOR %s = %s TO %s" % (instr[1],self.expr_str(instr[2]),self.expr_str(instr[3])), - if instr[4]: print "STEP %s" % (self.expr_str(instr[4])), - print - elif op == 'NEXT': - print line,"NEXT", instr[1] - elif op == 'FUNC': - print line,"DEF %s(%s) = %s" % (instr[1],instr[2],self.expr_str(instr[3])) - elif op == 'DIM': - print line,"DIM", - first = 1 - for vname,x,y in instr[1]: - if not first: print ",", - first = 0 - if y == 0: - print "%s(%d)" % (vname,x), - else: - print "%s(%d,%d)" % (vname,x,y), - - print - elif op == 'DATA': - print line,"DATA", - first = 1 - for v in instr[1]: - if not first: print ",", - first = 0 - print v, - print - - # Erase the current program - def new(self): - self.prog = {} - - # Insert statements - def add_statements(self,prog): - for line,stat in prog.items(): - self.prog[line] = stat - - # Delete a statement - def del_line(self,lineno): - try: - del self.prog[lineno] - except KeyError: - pass - diff --git a/chall/ply-2.2/example/BASIC/basparse.py b/chall/ply-2.2/example/BASIC/basparse.py deleted file mode 100644 index 79210ad..0000000 --- a/chall/ply-2.2/example/BASIC/basparse.py +++ /dev/null @@ -1,424 +0,0 @@ -# An implementation of Dartmouth BASIC (1964) -# - -from ply import * -import basiclex - -tokens = basiclex.tokens - -precedence = ( - ('left', 'PLUS','MINUS'), - ('left', 'TIMES','DIVIDE'), - ('left', 'POWER'), - ('right','UMINUS') -) - -#### A BASIC program is a series of statements. We represent the program as a -#### dictionary of tuples indexed by line number. - -def p_program(p): - '''program : program statement - | statement''' - - if len(p) == 2 and p[1]: - p[0] = { } - line,stat = p[1] - p[0][line] = stat - elif len(p) ==3: - p[0] = p[1] - if not p[0]: p[0] = { } - if p[2]: - line,stat = p[2] - p[0][line] = stat - -#### This catch-all rule is used for any catastrophic errors. In this case, -#### we simply return nothing - -def p_program_error(p): - '''program : error''' - p[0] = None - p.parser.error = 1 - -#### Format of all BASIC statements. - -def p_statement(p): - '''statement : INTEGER command NEWLINE''' - if isinstance(p[2],str): - print p[2],"AT LINE", p[1] - p[0] = None - p.parser.error = 1 - else: - lineno = int(p[1]) - p[0] = (lineno,p[2]) - -#### Interactive statements. - -def p_statement_interactive(p): - '''statement : RUN NEWLINE - | LIST NEWLINE - | NEW NEWLINE''' - p[0] = (0, (p[1],0)) - -#### Blank line number -def p_statement_blank(p): - '''statement : INTEGER NEWLINE''' - p[0] = (0,('BLANK',int(p[1]))) - -#### Error handling for malformed statements - -def p_statement_bad(p): - '''statement : INTEGER error NEWLINE''' - print "MALFORMED STATEMENT AT LINE", p[1] - p[0] = None - p.parser.error = 1 - -#### Blank line - -def p_statement_newline(p): - '''statement : NEWLINE''' - p[0] = None - -#### LET statement - -def p_command_let(p): - '''command : LET variable EQUALS expr''' - p[0] = ('LET',p[2],p[4]) - -def p_command_let_bad(p): - '''command : LET variable EQUALS error''' - p[0] = "BAD EXPRESSION IN LET" - -#### READ statement - -def p_command_read(p): - '''command : READ varlist''' - p[0] = ('READ',p[2]) - -def p_command_read_bad(p): - '''command : READ error''' - p[0] = "MALFORMED VARIABLE LIST IN READ" - -#### DATA statement - -def p_command_data(p): - '''command : DATA numlist''' - p[0] = ('DATA',p[2]) - -def p_command_data_bad(p): - '''command : DATA error''' - p[0] = "MALFORMED NUMBER LIST IN DATA" - -#### PRINT statement - -def p_command_print(p): - '''command : PRINT plist optend''' - p[0] = ('PRINT',p[2],p[3]) - -def p_command_print_bad(p): - '''command : PRINT error''' - p[0] = "MALFORMED PRINT STATEMENT" - -#### Optional ending on PRINT. Either a comma (,) or semicolon (;) - -def p_optend(p): - '''optend : COMMA - | SEMI - |''' - if len(p) == 2: - p[0] = p[1] - else: - p[0] = None - -#### PRINT statement with no arguments - -def p_command_print_empty(p): - '''command : PRINT''' - p[0] = ('PRINT',[],None) - -#### GOTO statement - -def p_command_goto(p): - '''command : GOTO INTEGER''' - p[0] = ('GOTO',int(p[2])) - -def p_command_goto_bad(p): - '''command : GOTO error''' - p[0] = "INVALID LINE NUMBER IN GOTO" - -#### IF-THEN statement - -def p_command_if(p): - '''command : IF relexpr THEN INTEGER''' - p[0] = ('IF',p[2],int(p[4])) - -def p_command_if_bad(p): - '''command : IF error THEN INTEGER''' - p[0] = "BAD RELATIONAL EXPRESSION" - -def p_command_if_bad2(p): - '''command : IF relexpr THEN error''' - p[0] = "INVALID LINE NUMBER IN THEN" - -#### FOR statement - -def p_command_for(p): - '''command : FOR ID EQUALS expr TO expr optstep''' - p[0] = ('FOR',p[2],p[4],p[6],p[7]) - -def p_command_for_bad_initial(p): - '''command : FOR ID EQUALS error TO expr optstep''' - p[0] = "BAD INITIAL VALUE IN FOR STATEMENT" - -def p_command_for_bad_final(p): - '''command : FOR ID EQUALS expr TO error optstep''' - p[0] = "BAD FINAL VALUE IN FOR STATEMENT" - -def p_command_for_bad_step(p): - '''command : FOR ID EQUALS expr TO expr STEP error''' - p[0] = "MALFORMED STEP IN FOR STATEMENT" - -#### Optional STEP qualifier on FOR statement - -def p_optstep(p): - '''optstep : STEP expr - | empty''' - if len(p) == 3: - p[0] = p[2] - else: - p[0] = None - -#### NEXT statement - -def p_command_next(p): - '''command : NEXT ID''' - - p[0] = ('NEXT',p[2]) - -def p_command_next_bad(p): - '''command : NEXT error''' - p[0] = "MALFORMED NEXT" - -#### END statement - -def p_command_end(p): - '''command : END''' - p[0] = ('END',) - -#### REM statement - -def p_command_rem(p): - '''command : REM''' - p[0] = ('REM',p[1]) - -#### STOP statement - -def p_command_stop(p): - '''command : STOP''' - p[0] = ('STOP',) - -#### DEF statement - -def p_command_def(p): - '''command : DEF ID LPAREN ID RPAREN EQUALS expr''' - p[0] = ('FUNC',p[2],p[4],p[7]) - -def p_command_def_bad_rhs(p): - '''command : DEF ID LPAREN ID RPAREN EQUALS error''' - p[0] = "BAD EXPRESSION IN DEF STATEMENT" - -def p_command_def_bad_arg(p): - '''command : DEF ID LPAREN error RPAREN EQUALS expr''' - p[0] = "BAD ARGUMENT IN DEF STATEMENT" - -#### GOSUB statement - -def p_command_gosub(p): - '''command : GOSUB INTEGER''' - p[0] = ('GOSUB',int(p[2])) - -def p_command_gosub_bad(p): - '''command : GOSUB error''' - p[0] = "INVALID LINE NUMBER IN GOSUB" - -#### RETURN statement - -def p_command_return(p): - '''command : RETURN''' - p[0] = ('RETURN',) - -#### DIM statement - -def p_command_dim(p): - '''command : DIM dimlist''' - p[0] = ('DIM',p[2]) - -def p_command_dim_bad(p): - '''command : DIM error''' - p[0] = "MALFORMED VARIABLE LIST IN DIM" - -#### List of variables supplied to DIM statement - -def p_dimlist(p): - '''dimlist : dimlist COMMA dimitem - | dimitem''' - if len(p) == 4: - p[0] = p[1] - p[0].append(p[3]) - else: - p[0] = [p[1]] - -#### DIM items - -def p_dimitem_single(p): - '''dimitem : ID LPAREN INTEGER RPAREN''' - p[0] = (p[1],eval(p[3]),0) - -def p_dimitem_double(p): - '''dimitem : ID LPAREN INTEGER COMMA INTEGER RPAREN''' - p[0] = (p[1],eval(p[3]),eval(p[5])) - -#### Arithmetic expressions - -def p_expr_binary(p): - '''expr : expr PLUS expr - | expr MINUS expr - | expr TIMES expr - | expr DIVIDE expr - | expr POWER expr''' - - p[0] = ('BINOP',p[2],p[1],p[3]) - -def p_expr_number(p): - '''expr : INTEGER - | FLOAT''' - p[0] = ('NUM',eval(p[1])) - -def p_expr_variable(p): - '''expr : variable''' - p[0] = ('VAR',p[1]) - -def p_expr_group(p): - '''expr : LPAREN expr RPAREN''' - p[0] = ('GROUP',p[2]) - -def p_expr_unary(p): - '''expr : MINUS expr %prec UMINUS''' - p[0] = ('UNARY','-',p[2]) - -#### Relational expressions - -def p_relexpr(p): - '''relexpr : expr LT expr - | expr LE expr - | expr GT expr - | expr GE expr - | expr EQUALS expr - | expr NE expr''' - p[0] = ('RELOP',p[2],p[1],p[3]) - -#### Variables - -def p_variable(p): - '''variable : ID - | ID LPAREN expr RPAREN - | ID LPAREN expr COMMA expr RPAREN''' - if len(p) == 2: - p[0] = (p[1],None,None) - elif len(p) == 5: - p[0] = (p[1],p[3],None) - else: - p[0] = (p[1],p[3],p[5]) - -#### Builds a list of variable targets as a Python list - -def p_varlist(p): - '''varlist : varlist COMMA variable - | variable''' - if len(p) > 2: - p[0] = p[1] - p[0].append(p[3]) - else: - p[0] = [p[1]] - - -#### Builds a list of numbers as a Python list - -def p_numlist(p): - '''numlist : numlist COMMA number - | number''' - - if len(p) > 2: - p[0] = p[1] - p[0].append(p[3]) - else: - p[0] = [p[1]] - -#### A number. May be an integer or a float - -def p_number(p): - '''number : INTEGER - | FLOAT''' - p[0] = eval(p[1]) - -#### A signed number. - -def p_number_signed(p): - '''number : MINUS INTEGER - | MINUS FLOAT''' - p[0] = eval("-"+p[2]) - -#### List of targets for a print statement -#### Returns a list of tuples (label,expr) - -def p_plist(p): - '''plist : plist COMMA pitem - | pitem''' - if len(p) > 3: - p[0] = p[1] - p[0].append(p[3]) - else: - p[0] = [p[1]] - -def p_item_string(p): - '''pitem : STRING''' - p[0] = (p[1][1:-1],None) - -def p_item_string_expr(p): - '''pitem : STRING expr''' - p[0] = (p[1][1:-1],p[2]) - -def p_item_expr(p): - '''pitem : expr''' - p[0] = ("",p[1]) - -#### Empty - -def p_empty(p): - '''empty : ''' - -#### Catastrophic error handler -def p_error(p): - if not p: - print "SYNTAX ERROR AT EOF" - -bparser = yacc.yacc() - -def parse(data): - bparser.error = 0 - p = bparser.parse(data) - if bparser.error: return None - return p - - - - - - - - - - - - - - diff --git a/chall/ply-2.2/example/BASIC/dim.bas b/chall/ply-2.2/example/BASIC/dim.bas deleted file mode 100644 index 87bd95b..0000000 --- a/chall/ply-2.2/example/BASIC/dim.bas +++ /dev/null @@ -1,14 +0,0 @@ -5 DIM A(50,15) -10 FOR I = 1 TO 50 -20 FOR J = 1 TO 15 -30 LET A(I,J) = I + J -35 REM PRINT I,J, A(I,J) -40 NEXT J -50 NEXT I -100 FOR I = 1 TO 50 -110 FOR J = 1 TO 15 -120 PRINT A(I,J), -130 NEXT J -140 PRINT -150 NEXT I -999 END diff --git a/chall/ply-2.2/example/BASIC/func.bas b/chall/ply-2.2/example/BASIC/func.bas deleted file mode 100644 index 447ee16..0000000 --- a/chall/ply-2.2/example/BASIC/func.bas +++ /dev/null @@ -1,5 +0,0 @@ -10 DEF FDX(X) = 2*X -20 FOR I = 0 TO 100 -30 PRINT FDX(I) -40 NEXT I -50 END diff --git a/chall/ply-2.2/example/BASIC/gcd.bas b/chall/ply-2.2/example/BASIC/gcd.bas deleted file mode 100644 index d0b7746..0000000 --- a/chall/ply-2.2/example/BASIC/gcd.bas +++ /dev/null @@ -1,22 +0,0 @@ -10 PRINT "A","B","C","GCD" -20 READ A,B,C -30 LET X = A -40 LET Y = B -50 GOSUB 200 -60 LET X = G -70 LET Y = C -80 GOSUB 200 -90 PRINT A, B, C, G -100 GOTO 20 -110 DATA 60, 90, 120 -120 DATA 38456, 64872, 98765 -130 DATA 32, 384, 72 -200 LET Q = INT(X/Y) -210 LET R = X - Q*Y -220 IF R = 0 THEN 300 -230 LET X = Y -240 LET Y = R -250 GOTO 200 -300 LET G = Y -310 RETURN -999 END diff --git a/chall/ply-2.2/example/BASIC/gosub.bas b/chall/ply-2.2/example/BASIC/gosub.bas deleted file mode 100644 index 99737b1..0000000 --- a/chall/ply-2.2/example/BASIC/gosub.bas +++ /dev/null @@ -1,13 +0,0 @@ -100 LET X = 3 -110 GOSUB 400 -120 PRINT U, V, W -200 LET X = 5 -210 GOSUB 400 -220 LET Z = U + 2*V + 3*W -230 PRINT Z -240 GOTO 999 -400 LET U = X*X -410 LET V = X*X*X -420 LET W = X*X*X*X + X*X*X + X*X + X -430 RETURN -999 END diff --git a/chall/ply-2.2/example/BASIC/hello.bas b/chall/ply-2.2/example/BASIC/hello.bas deleted file mode 100644 index cc6f0b0..0000000 --- a/chall/ply-2.2/example/BASIC/hello.bas +++ /dev/null @@ -1,4 +0,0 @@ -5 REM HELLO WORLD PROGAM -10 PRINT "HELLO WORLD" -99 END - diff --git a/chall/ply-2.2/example/BASIC/linear.bas b/chall/ply-2.2/example/BASIC/linear.bas deleted file mode 100644 index 56c0822..0000000 --- a/chall/ply-2.2/example/BASIC/linear.bas +++ /dev/null @@ -1,17 +0,0 @@ -1 REM ::: SOLVE A SYSTEM OF LINEAR EQUATIONS -2 REM ::: A1*X1 + A2*X2 = B1 -3 REM ::: A3*X1 + A4*X2 = B2 -4 REM -------------------------------------- -10 READ A1, A2, A3, A4 -15 LET D = A1 * A4 - A3 * A2 -20 IF D = 0 THEN 65 -30 READ B1, B2 -37 LET X1 = (B1*A4 - B2*A2) / D -42 LET X2 = (A1*B2 - A3*B1) / D -55 PRINT X1, X2 -60 GOTO 30 -65 PRINT "NO UNIQUE SOLUTION" -70 DATA 1, 2, 4 -80 DATA 2, -7, 5 -85 DATA 1, 3, 4, -7 -90 END diff --git a/chall/ply-2.2/example/BASIC/maxsin.bas b/chall/ply-2.2/example/BASIC/maxsin.bas deleted file mode 100644 index b969015..0000000 --- a/chall/ply-2.2/example/BASIC/maxsin.bas +++ /dev/null @@ -1,12 +0,0 @@ -5 PRINT "X VALUE", "SINE", "RESOLUTION" -10 READ D -20 LET M = -1 -30 FOR X = 0 TO 3 STEP D -40 IF SIN(X) <= M THEN 80 -50 LET X0 = X -60 LET M = SIN(X) -80 NEXT X -85 PRINT X0, M, D -90 GOTO 10 -100 DATA .1, .01, .001 -110 END diff --git a/chall/ply-2.2/example/BASIC/powers.bas b/chall/ply-2.2/example/BASIC/powers.bas deleted file mode 100644 index a454dc3..0000000 --- a/chall/ply-2.2/example/BASIC/powers.bas +++ /dev/null @@ -1,13 +0,0 @@ -5 PRINT "THIS PROGRAM COMPUTES AND PRINTS THE NTH POWERS" -6 PRINT "OF THE NUMBERS LESS THAN OR EQUAL TO N FOR VARIOUS" -7 PRINT "N FROM 1 THROUGH 7" -8 PRINT -10 FOR N = 1 TO 7 -15 PRINT "N = "N -20 FOR I = 1 TO N -30 PRINT I^N, -40 NEXT I -50 PRINT -60 PRINT -70 NEXT N -80 END diff --git a/chall/ply-2.2/example/BASIC/rand.bas b/chall/ply-2.2/example/BASIC/rand.bas deleted file mode 100644 index 4ff7a14..0000000 --- a/chall/ply-2.2/example/BASIC/rand.bas +++ /dev/null @@ -1,4 +0,0 @@ -10 FOR I = 1 TO 20 -20 PRINT INT(10*RND(0)) -30 NEXT I -40 END diff --git a/chall/ply-2.2/example/BASIC/sales.bas b/chall/ply-2.2/example/BASIC/sales.bas deleted file mode 100644 index a39aefb..0000000 --- a/chall/ply-2.2/example/BASIC/sales.bas +++ /dev/null @@ -1,20 +0,0 @@ -10 FOR I = 1 TO 3 -20 READ P(I) -30 NEXT I -40 FOR I = 1 TO 3 -50 FOR J = 1 TO 5 -60 READ S(I,J) -70 NEXT J -80 NEXT I -90 FOR J = 1 TO 5 -100 LET S = 0 -110 FOR I = 1 TO 3 -120 LET S = S + P(I) * S(I,J) -130 NEXT I -140 PRINT "TOTAL SALES FOR SALESMAN"J, "$"S -150 NEXT J -200 DATA 1.25, 4.30, 2.50 -210 DATA 40, 20, 37, 29, 42 -220 DATA 10, 16, 3, 21, 8 -230 DATA 35, 47, 29, 16, 33 -300 END diff --git a/chall/ply-2.2/example/BASIC/sears.bas b/chall/ply-2.2/example/BASIC/sears.bas deleted file mode 100644 index 5ced397..0000000 --- a/chall/ply-2.2/example/BASIC/sears.bas +++ /dev/null @@ -1,18 +0,0 @@ -1 REM :: THIS PROGRAM COMPUTES HOW MANY TIMES YOU HAVE TO FOLD -2 REM :: A PIECE OF PAPER SO THAT IT IS TALLER THAN THE -3 REM :: SEARS TOWER. -4 REM :: S = HEIGHT OF TOWER (METERS) -5 REM :: T = THICKNESS OF PAPER (MILLIMETERS) -10 LET S = 442 -20 LET T = 0.1 -30 REM CONVERT T TO METERS -40 LET T = T * .001 -50 LET F = 1 -60 LET H = T -100 IF H > S THEN 200 -120 LET H = 2 * H -125 LET F = F + 1 -130 GOTO 100 -200 PRINT "NUMBER OF FOLDS ="F -220 PRINT "FINAL HEIGHT ="H -999 END diff --git a/chall/ply-2.2/example/BASIC/sqrt1.bas b/chall/ply-2.2/example/BASIC/sqrt1.bas deleted file mode 100644 index 6673a91..0000000 --- a/chall/ply-2.2/example/BASIC/sqrt1.bas +++ /dev/null @@ -1,5 +0,0 @@ -10 LET X = 0 -20 LET X = X + 1 -30 PRINT X, SQR(X) -40 IF X < 100 THEN 20 -50 END diff --git a/chall/ply-2.2/example/BASIC/sqrt2.bas b/chall/ply-2.2/example/BASIC/sqrt2.bas deleted file mode 100644 index 862d85e..0000000 --- a/chall/ply-2.2/example/BASIC/sqrt2.bas +++ /dev/null @@ -1,4 +0,0 @@ -10 FOR X = 1 TO 100 -20 PRINT X, SQR(X) -30 NEXT X -40 END diff --git a/chall/ply-2.2/example/GardenSnake/GardenSnake.py b/chall/ply-2.2/example/GardenSnake/GardenSnake.py deleted file mode 100644 index 2a7f45e..0000000 --- a/chall/ply-2.2/example/GardenSnake/GardenSnake.py +++ /dev/null @@ -1,709 +0,0 @@ -# GardenSnake - a parser generator demonstration program -# -# This implements a modified version of a subset of Python: -# - only 'def', 'return' and 'if' statements -# - 'if' only has 'then' clause (no elif nor else) -# - single-quoted strings only, content in raw format -# - numbers are decimal.Decimal instances (not integers or floats) -# - no print statment; use the built-in 'print' function -# - only < > == + - / * implemented (and unary + -) -# - assignment and tuple assignment work -# - no generators of any sort -# - no ... well, no quite a lot - -# Why? I'm thinking about a new indentation-based configuration -# language for a project and wanted to figure out how to do it. Once -# I got that working I needed a way to test it out. My original AST -# was dumb so I decided to target Python's AST and compile it into -# Python code. Plus, it's pretty cool that it only took a day or so -# from sitting down with Ply to having working code. - -# This uses David Beazley's Ply from http://www.dabeaz.com/ply/ - -# This work is hereby released into the Public Domain. To view a copy of -# the public domain dedication, visit -# http://creativecommons.org/licenses/publicdomain/ or send a letter to -# Creative Commons, 543 Howard Street, 5th Floor, San Francisco, -# California, 94105, USA. -# -# Portions of this work are derived from Python's Grammar definition -# and may be covered under the Python copyright and license -# -# Andrew Dalke / Dalke Scientific Software, LLC -# 30 August 2006 / Cape Town, South Africa - -# Changelog: -# 30 August - added link to CC license; removed the "swapcase" encoding - -# Modifications for inclusion in PLY distribution -import sys -sys.path.insert(0,"../..") -from ply import * - -##### Lexer ###### -#import lex -import decimal - -tokens = ( - 'DEF', - 'IF', - 'NAME', - 'NUMBER', # Python decimals - 'STRING', # single quoted strings only; syntax of raw strings - 'LPAR', - 'RPAR', - 'COLON', - 'EQ', - 'ASSIGN', - 'LT', - 'GT', - 'PLUS', - 'MINUS', - 'MULT', - 'DIV', - 'RETURN', - 'WS', - 'NEWLINE', - 'COMMA', - 'SEMICOLON', - 'INDENT', - 'DEDENT', - 'ENDMARKER', - ) - -#t_NUMBER = r'\d+' -# taken from decmial.py but without the leading sign -def t_NUMBER(t): - r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?""" - t.value = decimal.Decimal(t.value) - return t - -def t_STRING(t): - r"'([^\\']+|\\'|\\\\)*'" # I think this is right ... - t.value=t.value[1:-1].decode("string-escape") # .swapcase() # for fun - return t - -t_COLON = r':' -t_EQ = r'==' -t_ASSIGN = r'=' -t_LT = r'<' -t_GT = r'>' -t_PLUS = r'\+' -t_MINUS = r'-' -t_MULT = r'\*' -t_DIV = r'/' -t_COMMA = r',' -t_SEMICOLON = r';' - -# Ply nicely documented how to do this. - -RESERVED = { - "def": "DEF", - "if": "IF", - "return": "RETURN", - } - -def t_NAME(t): - r'[a-zA-Z_][a-zA-Z0-9_]*' - t.type = RESERVED.get(t.value, "NAME") - return t - -# Putting this before t_WS let it consume lines with only comments in -# them so the latter code never sees the WS part. Not consuming the -# newline. Needed for "if 1: #comment" -def t_comment(t): - r"[ ]*\043[^\n]*" # \043 is '#' - pass - - -# Whitespace -def t_WS(t): - r' [ ]+ ' - if t.lexer.at_line_start and t.lexer.paren_count == 0: - return t - -# Don't generate newline tokens when inside of parenthesis, eg -# a = (1, -# 2, 3) -def t_newline(t): - r'\n+' - t.lexer.lineno += len(t.value) - t.type = "NEWLINE" - if t.lexer.paren_count == 0: - return t - -def t_LPAR(t): - r'\(' - t.lexer.paren_count += 1 - return t - -def t_RPAR(t): - r'\)' - # check for underflow? should be the job of the parser - t.lexer.paren_count -= 1 - return t - - -def t_error(t): - raise SyntaxError("Unknown symbol %r" % (t.value[0],)) - print "Skipping", repr(t.value[0]) - t.lexer.skip(1) - -## I implemented INDENT / DEDENT generation as a post-processing filter - -# The original lex token stream contains WS and NEWLINE characters. -# WS will only occur before any other tokens on a line. - -# I have three filters. One tags tokens by adding two attributes. -# "must_indent" is True if the token must be indented from the -# previous code. The other is "at_line_start" which is True for WS -# and the first non-WS/non-NEWLINE on a line. It flags the check so -# see if the new line has changed indication level. - -# Python's syntax has three INDENT states -# 0) no colon hence no need to indent -# 1) "if 1: go()" - simple statements have a COLON but no need for an indent -# 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indent -NO_INDENT = 0 -MAY_INDENT = 1 -MUST_INDENT = 2 - -# only care about whitespace at the start of a line -def track_tokens_filter(lexer, tokens): - lexer.at_line_start = at_line_start = True - indent = NO_INDENT - saw_colon = False - for token in tokens: - token.at_line_start = at_line_start - - if token.type == "COLON": - at_line_start = False - indent = MAY_INDENT - token.must_indent = False - - elif token.type == "NEWLINE": - at_line_start = True - if indent == MAY_INDENT: - indent = MUST_INDENT - token.must_indent = False - - elif token.type == "WS": - assert token.at_line_start == True - at_line_start = True - token.must_indent = False - - else: - # A real token; only indent after COLON NEWLINE - if indent == MUST_INDENT: - token.must_indent = True - else: - token.must_indent = False - at_line_start = False - indent = NO_INDENT - - yield token - lexer.at_line_start = at_line_start - -def _new_token(type, lineno): - tok = lex.LexToken() - tok.type = type - tok.value = None - tok.lineno = lineno - return tok - -# Synthesize a DEDENT tag -def DEDENT(lineno): - return _new_token("DEDENT", lineno) - -# Synthesize an INDENT tag -def INDENT(lineno): - return _new_token("INDENT", lineno) - - -# Track the indentation level and emit the right INDENT / DEDENT events. -def indentation_filter(tokens): - # A stack of indentation levels; will never pop item 0 - levels = [0] - token = None - depth = 0 - prev_was_ws = False - for token in tokens: -## if 1: -## print "Process", token, -## if token.at_line_start: -## print "at_line_start", -## if token.must_indent: -## print "must_indent", -## print - - # WS only occurs at the start of the line - # There may be WS followed by NEWLINE so - # only track the depth here. Don't indent/dedent - # until there's something real. - if token.type == "WS": - assert depth == 0 - depth = len(token.value) - prev_was_ws = True - # WS tokens are never passed to the parser - continue - - if token.type == "NEWLINE": - depth = 0 - if prev_was_ws or token.at_line_start: - # ignore blank lines - continue - # pass the other cases on through - yield token - continue - - # then it must be a real token (not WS, not NEWLINE) - # which can affect the indentation level - - prev_was_ws = False - if token.must_indent: - # The current depth must be larger than the previous level - if not (depth > levels[-1]): - raise IndentationError("expected an indented block") - - levels.append(depth) - yield INDENT(token.lineno) - - elif token.at_line_start: - # Must be on the same level or one of the previous levels - if depth == levels[-1]: - # At the same level - pass - elif depth > levels[-1]: - raise IndentationError("indentation increase but not in new block") - else: - # Back up; but only if it matches a previous level - try: - i = levels.index(depth) - except ValueError: - raise IndentationError("inconsistent indentation") - for _ in range(i+1, len(levels)): - yield DEDENT(token.lineno) - levels.pop() - - yield token - - ### Finished processing ### - - # Must dedent any remaining levels - if len(levels) > 1: - assert token is not None - for _ in range(1, len(levels)): - yield DEDENT(token.lineno) - - -# The top-level filter adds an ENDMARKER, if requested. -# Python's grammar uses it. -def filter(lexer, add_endmarker = True): - token = None - tokens = iter(lexer.token, None) - tokens = track_tokens_filter(lexer, tokens) - for token in indentation_filter(tokens): - yield token - - if add_endmarker: - lineno = 1 - if token is not None: - lineno = token.lineno - yield _new_token("ENDMARKER", lineno) - -# Combine Ply and my filters into a new lexer - -class IndentLexer(object): - def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0): - self.lexer = lex.lex(debug=debug, optimize=optimize, lextab=lextab, reflags=reflags) - self.token_stream = None - def input(self, s, add_endmarker=True): - self.lexer.paren_count = 0 - self.lexer.input(s) - self.token_stream = filter(self.lexer, add_endmarker) - def token(self): - try: - return self.token_stream.next() - except StopIteration: - return None - -########## Parser (tokens -> AST) ###### - -# also part of Ply -#import yacc - -# I use the Python AST -from compiler import ast - -# Helper function -def Assign(left, right): - names = [] - if isinstance(left, ast.Name): - # Single assignment on left - return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right) - elif isinstance(left, ast.Tuple): - # List of things - make sure they are Name nodes - names = [] - for child in left.getChildren(): - if not isinstance(child, ast.Name): - raise SyntaxError("that assignment not supported") - names.append(child.name) - ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names] - return ast.Assign([ast.AssTuple(ass_list)], right) - else: - raise SyntaxError("Can't do that yet") - - -# The grammar comments come from Python's Grammar/Grammar file - -## NB: compound_stmt in single_input is followed by extra NEWLINE! -# file_input: (NEWLINE | stmt)* ENDMARKER -def p_file_input_end(p): - """file_input_end : file_input ENDMARKER""" - p[0] = ast.Stmt(p[1]) -def p_file_input(p): - """file_input : file_input NEWLINE - | file_input stmt - | NEWLINE - | stmt""" - if isinstance(p[len(p)-1], basestring): - if len(p) == 3: - p[0] = p[1] - else: - p[0] = [] # p == 2 --> only a blank line - else: - if len(p) == 3: - p[0] = p[1] + p[2] - else: - p[0] = p[1] - - -# funcdef: [decorators] 'def' NAME parameters ':' suite -# ignoring decorators -def p_funcdef(p): - "funcdef : DEF NAME parameters COLON suite" - p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5]) - -# parameters: '(' [varargslist] ')' -def p_parameters(p): - """parameters : LPAR RPAR - | LPAR varargslist RPAR""" - if len(p) == 3: - p[0] = [] - else: - p[0] = p[2] - - -# varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) | -# highly simplified -def p_varargslist(p): - """varargslist : varargslist COMMA NAME - | NAME""" - if len(p) == 4: - p[0] = p[1] + p[3] - else: - p[0] = [p[1]] - -# stmt: simple_stmt | compound_stmt -def p_stmt_simple(p): - """stmt : simple_stmt""" - # simple_stmt is a list - p[0] = p[1] - -def p_stmt_compound(p): - """stmt : compound_stmt""" - p[0] = [p[1]] - -# simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE -def p_simple_stmt(p): - """simple_stmt : small_stmts NEWLINE - | small_stmts SEMICOLON NEWLINE""" - p[0] = p[1] - -def p_small_stmts(p): - """small_stmts : small_stmts SEMICOLON small_stmt - | small_stmt""" - if len(p) == 4: - p[0] = p[1] + [p[3]] - else: - p[0] = [p[1]] - -# small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | -# import_stmt | global_stmt | exec_stmt | assert_stmt -def p_small_stmt(p): - """small_stmt : flow_stmt - | expr_stmt""" - p[0] = p[1] - -# expr_stmt: testlist (augassign (yield_expr|testlist) | -# ('=' (yield_expr|testlist))*) -# augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | -# '<<=' | '>>=' | '**=' | '//=') -def p_expr_stmt(p): - """expr_stmt : testlist ASSIGN testlist - | testlist """ - if len(p) == 2: - # a list of expressions - p[0] = ast.Discard(p[1]) - else: - p[0] = Assign(p[1], p[3]) - -def p_flow_stmt(p): - "flow_stmt : return_stmt" - p[0] = p[1] - -# return_stmt: 'return' [testlist] -def p_return_stmt(p): - "return_stmt : RETURN testlist" - p[0] = ast.Return(p[2]) - - -def p_compound_stmt(p): - """compound_stmt : if_stmt - | funcdef""" - p[0] = p[1] - -def p_if_stmt(p): - 'if_stmt : IF test COLON suite' - p[0] = ast.If([(p[2], p[4])], None) - -def p_suite(p): - """suite : simple_stmt - | NEWLINE INDENT stmts DEDENT""" - if len(p) == 2: - p[0] = ast.Stmt(p[1]) - else: - p[0] = ast.Stmt(p[3]) - - -def p_stmts(p): - """stmts : stmts stmt - | stmt""" - if len(p) == 3: - p[0] = p[1] + p[2] - else: - p[0] = p[1] - -## No using Python's approach because Ply supports precedence - -# comparison: expr (comp_op expr)* -# arith_expr: term (('+'|'-') term)* -# term: factor (('*'|'/'|'%'|'//') factor)* -# factor: ('+'|'-'|'~') factor | power -# comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' - -def make_lt_compare((left, right)): - return ast.Compare(left, [('<', right),]) -def make_gt_compare((left, right)): - return ast.Compare(left, [('>', right),]) -def make_eq_compare((left, right)): - return ast.Compare(left, [('==', right),]) - - -binary_ops = { - "+": ast.Add, - "-": ast.Sub, - "*": ast.Mul, - "/": ast.Div, - "<": make_lt_compare, - ">": make_gt_compare, - "==": make_eq_compare, -} -unary_ops = { - "+": ast.UnaryAdd, - "-": ast.UnarySub, - } -precedence = ( - ("left", "EQ", "GT", "LT"), - ("left", "PLUS", "MINUS"), - ("left", "MULT", "DIV"), - ) - -def p_comparison(p): - """comparison : comparison PLUS comparison - | comparison MINUS comparison - | comparison MULT comparison - | comparison DIV comparison - | comparison LT comparison - | comparison EQ comparison - | comparison GT comparison - | PLUS comparison - | MINUS comparison - | power""" - if len(p) == 4: - p[0] = binary_ops[p[2]]((p[1], p[3])) - elif len(p) == 3: - p[0] = unary_ops[p[1]](p[2]) - else: - p[0] = p[1] - -# power: atom trailer* ['**' factor] -# trailers enables function calls. I only allow one level of calls -# so this is 'trailer' -def p_power(p): - """power : atom - | atom trailer""" - if len(p) == 2: - p[0] = p[1] - else: - if p[2][0] == "CALL": - p[0] = ast.CallFunc(p[1], p[2][1], None, None) - else: - raise AssertionError("not implemented") - -def p_atom_name(p): - """atom : NAME""" - p[0] = ast.Name(p[1]) - -def p_atom_number(p): - """atom : NUMBER - | STRING""" - p[0] = ast.Const(p[1]) - -def p_atom_tuple(p): - """atom : LPAR testlist RPAR""" - p[0] = p[2] - -# trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME -def p_trailer(p): - "trailer : LPAR arglist RPAR" - p[0] = ("CALL", p[2]) - -# testlist: test (',' test)* [','] -# Contains shift/reduce error -def p_testlist(p): - """testlist : testlist_multi COMMA - | testlist_multi """ - if len(p) == 2: - p[0] = p[1] - else: - # May need to promote singleton to tuple - if isinstance(p[1], list): - p[0] = p[1] - else: - p[0] = [p[1]] - # Convert into a tuple? - if isinstance(p[0], list): - p[0] = ast.Tuple(p[0]) - -def p_testlist_multi(p): - """testlist_multi : testlist_multi COMMA test - | test""" - if len(p) == 2: - # singleton - p[0] = p[1] - else: - if isinstance(p[1], list): - p[0] = p[1] + [p[3]] - else: - # singleton -> tuple - p[0] = [p[1], p[3]] - - -# test: or_test ['if' or_test 'else' test] | lambdef -# as I don't support 'and', 'or', and 'not' this works down to 'comparison' -def p_test(p): - "test : comparison" - p[0] = p[1] - - - -# arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test) -# XXX INCOMPLETE: this doesn't allow the trailing comma -def p_arglist(p): - """arglist : arglist COMMA argument - | argument""" - if len(p) == 4: - p[0] = p[1] + [p[3]] - else: - p[0] = [p[1]] - -# argument: test [gen_for] | test '=' test # Really [keyword '='] test -def p_argument(p): - "argument : test" - p[0] = p[1] - -def p_error(p): - #print "Error!", repr(p) - raise SyntaxError(p) - - -class GardenSnakeParser(object): - def __init__(self, lexer = None): - if lexer is None: - lexer = IndentLexer() - self.lexer = lexer - self.parser = yacc.yacc(start="file_input_end") - - def parse(self, code): - self.lexer.input(code) - result = self.parser.parse(lexer = self.lexer) - return ast.Module(None, result) - - -###### Code generation ###### - -from compiler import misc, syntax, pycodegen - -class GardenSnakeCompiler(object): - def __init__(self): - self.parser = GardenSnakeParser() - def compile(self, code, filename="<string>"): - tree = self.parser.parse(code) - #print tree - misc.set_filename(filename, tree) - syntax.check(tree) - gen = pycodegen.ModuleCodeGenerator(tree) - code = gen.getCode() - return code - -####### Test code ####### - -compile = GardenSnakeCompiler().compile - -code = r""" - -print('LET\'S TRY THIS \\OUT') - -#Comment here -def x(a): - print('called with',a) - if a == 1: - return 2 - if a*2 > 10: return 999 / 4 - # Another comment here - - return a+2*3 - -ints = (1, 2, - 3, 4, -5) -print('mutiline-expression', ints) - -t = 4+1/3*2+6*(9-5+1) -print('predence test; should be 34+2/3:', t, t==(34+2/3)) - -print('numbers', 1,2,3,4,5) -if 1: - 8 - a=9 - print(x(a)) - -print(x(1)) -print(x(2)) -print(x(8),'3') -print('this is decimal', 1/5) -print('BIG DECIMAL', 1.234567891234567e12345) - -""" - -# Set up the GardenSnake run-time environment -def print_(*args): - print "-->", " ".join(map(str,args)) - -globals()["print"] = print_ - -compiled_code = compile(code) - -exec compiled_code in globals() -print "Done" diff --git a/chall/ply-2.2/example/GardenSnake/README b/chall/ply-2.2/example/GardenSnake/README deleted file mode 100644 index 4d8be2d..0000000 --- a/chall/ply-2.2/example/GardenSnake/README +++ /dev/null @@ -1,5 +0,0 @@ -This example is Andrew Dalke's GardenSnake language. It shows how to process an -indentation-like language like Python. Further details can be found here: - -http://dalkescientific.com/writings/diary/archive/2006/08/30/gardensnake_language.html - diff --git a/chall/ply-2.2/example/README b/chall/ply-2.2/example/README deleted file mode 100644 index 63519b5..0000000 --- a/chall/ply-2.2/example/README +++ /dev/null @@ -1,10 +0,0 @@ -Simple examples: - calc - Simple calculator - classcalc - Simple calculate defined as a class - -Complex examples - ansic - ANSI C grammar from K&R - BASIC - A small BASIC interpreter - GardenSnake - A simple python-like language - yply - Converts Unix yacc files to PLY programs. - diff --git a/chall/ply-2.2/example/ansic/README b/chall/ply-2.2/example/ansic/README deleted file mode 100644 index e049d3b..0000000 --- a/chall/ply-2.2/example/ansic/README +++ /dev/null @@ -1,2 +0,0 @@ -This example is incomplete. Was going to specify an ANSI C parser. -This is part of it. diff --git a/chall/ply-2.2/example/ansic/clex.py b/chall/ply-2.2/example/ansic/clex.py deleted file mode 100644 index 6b9d7e7..0000000 --- a/chall/ply-2.2/example/ansic/clex.py +++ /dev/null @@ -1,164 +0,0 @@ -# ---------------------------------------------------------------------- -# clex.py -# -# A lexer for ANSI C. -# ---------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -import ply.lex as lex - -# Reserved words -reserved = ( - 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', - 'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER', - 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF', - 'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE', - ) - -tokens = reserved + ( - # Literals (identifier, integer constant, float constant, string constant, char const) - 'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST', - - # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=) - 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', - 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT', - 'LOR', 'LAND', 'LNOT', - 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE', - - # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=) - 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL', - 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL', - - # Increment/decrement (++,--) - 'PLUSPLUS', 'MINUSMINUS', - - # Structure dereference (->) - 'ARROW', - - # Conditional operator (?) - 'CONDOP', - - # Delimeters ( ) [ ] { } , . ; : - 'LPAREN', 'RPAREN', - 'LBRACKET', 'RBRACKET', - 'LBRACE', 'RBRACE', - 'COMMA', 'PERIOD', 'SEMI', 'COLON', - - # Ellipsis (...) - 'ELLIPSIS', - ) - -# Completely ignored characters -t_ignore = ' \t\x0c' - -# Newlines -def t_NEWLINE(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - -# Operators -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_MOD = r'%' -t_OR = r'\|' -t_AND = r'&' -t_NOT = r'~' -t_XOR = r'\^' -t_LSHIFT = r'<<' -t_RSHIFT = r'>>' -t_LOR = r'\|\|' -t_LAND = r'&&' -t_LNOT = r'!' -t_LT = r'<' -t_GT = r'>' -t_LE = r'<=' -t_GE = r'>=' -t_EQ = r'==' -t_NE = r'!=' - -# Assignment operators - -t_EQUALS = r'=' -t_TIMESEQUAL = r'\*=' -t_DIVEQUAL = r'/=' -t_MODEQUAL = r'%=' -t_PLUSEQUAL = r'\+=' -t_MINUSEQUAL = r'-=' -t_LSHIFTEQUAL = r'<<=' -t_RSHIFTEQUAL = r'>>=' -t_ANDEQUAL = r'&=' -t_OREQUAL = r'\|=' -t_XOREQUAL = r'^=' - -# Increment/decrement -t_PLUSPLUS = r'\+\+' -t_MINUSMINUS = r'--' - -# -> -t_ARROW = r'->' - -# ? -t_CONDOP = r'\?' - -# Delimeters -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_LBRACKET = r'\[' -t_RBRACKET = r'\]' -t_LBRACE = r'\{' -t_RBRACE = r'\}' -t_COMMA = r',' -t_PERIOD = r'\.' -t_SEMI = r';' -t_COLON = r':' -t_ELLIPSIS = r'\.\.\.' - -# Identifiers and reserved words - -reserved_map = { } -for r in reserved: - reserved_map[r.lower()] = r - -def t_ID(t): - r'[A-Za-z_][\w_]*' - t.type = reserved_map.get(t.value,"ID") - return t - -# Integer literal -t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?' - -# Floating literal -t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' - -# String literal -t_SCONST = r'\"([^\\\n]|(\\.))*?\"' - -# Character constant 'c' or L'c' -t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\'' - -# Comments -def t_comment(t): - r' /\*(.|\n)*?\*/' - t.lineno += t.value.count('\n') - -# Preprocessor directive (ignored) -def t_preprocessor(t): - r'\#(.)*?\n' - t.lineno += 1 - -def t_error(t): - print "Illegal character %s" % repr(t.value[0]) - t.lexer.skip(1) - -lexer = lex.lex(optimize=1) -if __name__ == "__main__": - lex.runmain(lexer) - - - - - diff --git a/chall/ply-2.2/example/ansic/cparse.py b/chall/ply-2.2/example/ansic/cparse.py deleted file mode 100644 index b6a0c42..0000000 --- a/chall/ply-2.2/example/ansic/cparse.py +++ /dev/null @@ -1,863 +0,0 @@ -# ----------------------------------------------------------------------------- -# cparse.py -# -# Simple parser for ANSI C. Based on the grammar in K&R, 2nd Ed. -# ----------------------------------------------------------------------------- - -import sys -import clex -import ply.yacc as yacc - -# Get the token map -tokens = clex.tokens - -# translation-unit: - -def p_translation_unit_1(t): - 'translation_unit : external_declaration' - pass - -def p_translation_unit_2(t): - 'translation_unit : translation_unit external_declaration' - pass - -# external-declaration: - -def p_external_declaration_1(t): - 'external_declaration : function_definition' - pass - -def p_external_declaration_2(t): - 'external_declaration : declaration' - pass - -# function-definition: - -def p_function_definition_1(t): - 'function_definition : declaration_specifiers declarator declaration_list compound_statement' - pass - -def p_function_definition_2(t): - 'function_definition : declarator declaration_list compound_statement' - pass - -def p_function_definition_3(t): - 'function_definition : declarator compound_statement' - pass - -def p_function_definition_4(t): - 'function_definition : declaration_specifiers declarator compound_statement' - pass - -# declaration: - -def p_declaration_1(t): - 'declaration : declaration_specifiers init_declarator_list SEMI' - pass - -def p_declaration_2(t): - 'declaration : declaration_specifiers SEMI' - pass - -# declaration-list: - -def p_declaration_list_1(t): - 'declaration_list : declaration' - pass - -def p_declaration_list_2(t): - 'declaration_list : declaration_list declaration ' - pass - -# declaration-specifiers -def p_declaration_specifiers_1(t): - 'declaration_specifiers : storage_class_specifier declaration_specifiers' - pass - -def p_declaration_specifiers_2(t): - 'declaration_specifiers : type_specifier declaration_specifiers' - pass - -def p_declaration_specifiers_3(t): - 'declaration_specifiers : type_qualifier declaration_specifiers' - pass - -def p_declaration_specifiers_4(t): - 'declaration_specifiers : storage_class_specifier' - pass - -def p_declaration_specifiers_5(t): - 'declaration_specifiers : type_specifier' - pass - -def p_declaration_specifiers_6(t): - 'declaration_specifiers : type_qualifier' - pass - -# storage-class-specifier -def p_storage_class_specifier(t): - '''storage_class_specifier : AUTO - | REGISTER - | STATIC - | EXTERN - | TYPEDEF - ''' - pass - -# type-specifier: -def p_type_specifier(t): - '''type_specifier : VOID - | CHAR - | SHORT - | INT - | LONG - | FLOAT - | DOUBLE - | SIGNED - | UNSIGNED - | struct_or_union_specifier - | enum_specifier - | TYPEID - ''' - pass - -# type-qualifier: -def p_type_qualifier(t): - '''type_qualifier : CONST - | VOLATILE''' - pass - -# struct-or-union-specifier - -def p_struct_or_union_specifier_1(t): - 'struct_or_union_specifier : struct_or_union ID LBRACE struct_declaration_list RBRACE' - pass - -def p_struct_or_union_specifier_2(t): - 'struct_or_union_specifier : struct_or_union LBRACE struct_declaration_list RBRACE' - pass - -def p_struct_or_union_specifier_3(t): - 'struct_or_union_specifier : struct_or_union ID' - pass - -# struct-or-union: -def p_struct_or_union(t): - '''struct_or_union : STRUCT - | UNION - ''' - pass - -# struct-declaration-list: - -def p_struct_declaration_list_1(t): - 'struct_declaration_list : struct_declaration' - pass - -def p_struct_declaration_list_2(t): - 'struct_declaration_list : struct_declarator_list struct_declaration' - pass - -# init-declarator-list: - -def p_init_declarator_list_1(t): - 'init_declarator_list : init_declarator' - pass - -def p_init_declarator_list_2(t): - 'init_declarator_list : init_declarator_list COMMA init_declarator' - pass - -# init-declarator - -def p_init_declarator_1(t): - 'init_declarator : declarator' - pass - -def p_init_declarator_2(t): - 'init_declarator : declarator EQUALS initializer' - pass - -# struct-declaration: - -def p_struct_declaration(t): - 'struct_declaration : specifier_qualifier_list struct_declarator_list SEMI' - pass - -# specifier-qualifier-list: - -def p_specifier_qualifier_list_1(t): - 'specifier_qualifier_list : type_specifier specifier_qualifier_list' - pass - -def p_specifier_qualifier_list_2(t): - 'specifier_qualifier_list : type_specifier' - pass - -def p_specifier_qualifier_list_3(t): - 'specifier_qualifier_list : type_qualifier specifier_qualifier_list' - pass - -def p_specifier_qualifier_list_4(t): - 'specifier_qualifier_list : type_qualifier' - pass - -# struct-declarator-list: - -def p_struct_declarator_list_1(t): - 'struct_declarator_list : struct_declarator' - pass - -def p_struct_declarator_list_2(t): - 'struct_declarator_list : struct_declarator_list COMMA struct_declarator' - pass - -# struct-declarator: - -def p_struct_declarator_1(t): - 'struct_declarator : declarator' - pass - -def p_struct_declarator_2(t): - 'struct_declarator : declarator COLON constant_expression' - pass - -def p_struct_declarator_3(t): - 'struct_declarator : COLON constant_expression' - pass - -# enum-specifier: - -def p_enum_specifier_1(t): - 'enum_specifier : ENUM ID LBRACE enumerator_list RBRACE' - pass - -def p_enum_specifier_2(t): - 'enum_specifier : ENUM LBRACE enumerator_list RBRACE' - pass - -def p_enum_specifier_3(t): - 'enum_specifier : ENUM ID' - pass - -# enumerator_list: -def p_enumerator_list_1(t): - 'enumerator_list : enumerator' - pass - -def p_enumerator_list_2(t): - 'enumerator_list : enumerator_list COMMA enumerator' - pass - -# enumerator: -def p_enumerator_1(t): - 'enumerator : ID' - pass - -def p_enumerator_2(t): - 'enumerator : ID EQUALS constant_expression' - pass - -# declarator: - -def p_declarator_1(t): - 'declarator : pointer direct_declarator' - pass - -def p_declarator_2(t): - 'declarator : direct_declarator' - pass - -# direct-declarator: - -def p_direct_declarator_1(t): - 'direct_declarator : ID' - pass - -def p_direct_declarator_2(t): - 'direct_declarator : LPAREN declarator RPAREN' - pass - -def p_direct_declarator_3(t): - 'direct_declarator : direct_declarator LBRACKET constant_expression_opt RBRACKET' - pass - -def p_direct_declarator_4(t): - 'direct_declarator : direct_declarator LPAREN parameter_type_list RPAREN ' - pass - -def p_direct_declarator_5(t): - 'direct_declarator : direct_declarator LPAREN identifier_list RPAREN ' - pass - -def p_direct_declarator_6(t): - 'direct_declarator : direct_declarator LPAREN RPAREN ' - pass - -# pointer: -def p_pointer_1(t): - 'pointer : TIMES type_qualifier_list' - pass - -def p_pointer_2(t): - 'pointer : TIMES' - pass - -def p_pointer_3(t): - 'pointer : TIMES type_qualifier_list pointer' - pass - -def p_pointer_4(t): - 'pointer : TIMES pointer' - pass - -# type-qualifier-list: - -def p_type_qualifier_list_1(t): - 'type_qualifier_list : type_qualifier' - pass - -def p_type_qualifier_list_2(t): - 'type_qualifier_list : type_qualifier_list type_qualifier' - pass - -# parameter-type-list: - -def p_parameter_type_list_1(t): - 'parameter_type_list : parameter_list' - pass - -def p_parameter_type_list_2(t): - 'parameter_type_list : parameter_list COMMA ELLIPSIS' - pass - -# parameter-list: - -def p_parameter_list_1(t): - 'parameter_list : parameter_declaration' - pass - -def p_parameter_list_2(t): - 'parameter_list : parameter_list COMMA parameter_declaration' - pass - -# parameter-declaration: -def p_parameter_declaration_1(t): - 'parameter_declaration : declaration_specifiers declarator' - pass - -def p_parameter_declaration_2(t): - 'parameter_declaration : declaration_specifiers abstract_declarator_opt' - pass - -# identifier-list: -def p_identifier_list_1(t): - 'identifier_list : ID' - pass - -def p_identifier_list_2(t): - 'identifier_list : identifier_list COMMA ID' - pass - -# initializer: - -def p_initializer_1(t): - 'initializer : assignment_expression' - pass - -def p_initializer_2(t): - '''initializer : LBRACE initializer_list RBRACE - | LBRACE initializer_list COMMA RBRACE''' - pass - -# initializer-list: - -def p_initializer_list_1(t): - 'initializer_list : initializer' - pass - -def p_initializer_list_2(t): - 'initializer_list : initializer_list COMMA initializer' - pass - -# type-name: - -def p_type_name(t): - 'type_name : specifier_qualifier_list abstract_declarator_opt' - pass - -def p_abstract_declarator_opt_1(t): - 'abstract_declarator_opt : empty' - pass - -def p_abstract_declarator_opt_2(t): - 'abstract_declarator_opt : abstract_declarator' - pass - -# abstract-declarator: - -def p_abstract_declarator_1(t): - 'abstract_declarator : pointer ' - pass - -def p_abstract_declarator_2(t): - 'abstract_declarator : pointer direct_abstract_declarator' - pass - -def p_abstract_declarator_3(t): - 'abstract_declarator : direct_abstract_declarator' - pass - -# direct-abstract-declarator: - -def p_direct_abstract_declarator_1(t): - 'direct_abstract_declarator : LPAREN abstract_declarator RPAREN' - pass - -def p_direct_abstract_declarator_2(t): - 'direct_abstract_declarator : direct_abstract_declarator LBRACKET constant_expression_opt RBRACKET' - pass - -def p_direct_abstract_declarator_3(t): - 'direct_abstract_declarator : LBRACKET constant_expression_opt RBRACKET' - pass - -def p_direct_abstract_declarator_4(t): - 'direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN' - pass - -def p_direct_abstract_declarator_5(t): - 'direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN' - pass - -# Optional fields in abstract declarators - -def p_constant_expression_opt_1(t): - 'constant_expression_opt : empty' - pass - -def p_constant_expression_opt_2(t): - 'constant_expression_opt : constant_expression' - pass - -def p_parameter_type_list_opt_1(t): - 'parameter_type_list_opt : empty' - pass - -def p_parameter_type_list_opt_2(t): - 'parameter_type_list_opt : parameter_type_list' - pass - -# statement: - -def p_statement(t): - ''' - statement : labeled_statement - | expression_statement - | compound_statement - | selection_statement - | iteration_statement - | jump_statement - ''' - pass - -# labeled-statement: - -def p_labeled_statement_1(t): - 'labeled_statement : ID COLON statement' - pass - -def p_labeled_statement_2(t): - 'labeled_statement : CASE constant_expression COLON statement' - pass - -def p_labeled_statement_3(t): - 'labeled_statement : DEFAULT COLON statement' - pass - -# expression-statement: -def p_expression_statement(t): - 'expression_statement : expression_opt SEMI' - pass - -# compound-statement: - -def p_compound_statement_1(t): - 'compound_statement : LBRACE declaration_list statement_list RBRACE' - pass - -def p_compound_statement_2(t): - 'compound_statement : LBRACE statement_list RBRACE' - pass - -def p_compound_statement_3(t): - 'compound_statement : LBRACE declaration_list RBRACE' - pass - -def p_compound_statement_4(t): - 'compound_statement : LBRACE RBRACE' - pass - -# statement-list: - -def p_statement_list_1(t): - 'statement_list : statement' - pass - -def p_statement_list_2(t): - 'statement_list : statement_list statement' - pass - -# selection-statement - -def p_selection_statement_1(t): - 'selection_statement : IF LPAREN expression RPAREN statement' - pass - -def p_selection_statement_2(t): - 'selection_statement : IF LPAREN expression RPAREN statement ELSE statement ' - pass - -def p_selection_statement_3(t): - 'selection_statement : SWITCH LPAREN expression RPAREN statement ' - pass - -# iteration_statement: - -def p_iteration_statement_1(t): - 'iteration_statement : WHILE LPAREN expression RPAREN statement' - pass - -def p_iteration_statement_2(t): - 'iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN statement ' - pass - -def p_iteration_statement_3(t): - 'iteration_statement : DO statement WHILE LPAREN expression RPAREN SEMI' - pass - -# jump_statement: - -def p_jump_statement_1(t): - 'jump_statement : GOTO ID SEMI' - pass - -def p_jump_statement_2(t): - 'jump_statement : CONTINUE SEMI' - pass - -def p_jump_statement_3(t): - 'jump_statement : BREAK SEMI' - pass - -def p_jump_statement_4(t): - 'jump_statement : RETURN expression_opt SEMI' - pass - -def p_expression_opt_1(t): - 'expression_opt : empty' - pass - -def p_expression_opt_2(t): - 'expression_opt : expression' - pass - -# expression: -def p_expression_1(t): - 'expression : assignment_expression' - pass - -def p_expression_2(t): - 'expression : expression COMMA assignment_expression' - pass - -# assigment_expression: -def p_assignment_expression_1(t): - 'assignment_expression : conditional_expression' - pass - -def p_assignment_expression_2(t): - 'assignment_expression : unary_expression assignment_operator assignment_expression' - pass - -# assignment_operator: -def p_assignment_operator(t): - ''' - assignment_operator : EQUALS - | TIMESEQUAL - | DIVEQUAL - | MODEQUAL - | PLUSEQUAL - | MINUSEQUAL - | LSHIFTEQUAL - | RSHIFTEQUAL - | ANDEQUAL - | OREQUAL - | XOREQUAL - ''' - pass - -# conditional-expression -def p_conditional_expression_1(t): - 'conditional_expression : logical_or_expression' - pass - -def p_conditional_expression_2(t): - 'conditional_expression : logical_or_expression CONDOP expression COLON conditional_expression ' - pass - -# constant-expression - -def p_constant_expression(t): - 'constant_expression : conditional_expression' - pass - -# logical-or-expression - -def p_logical_or_expression_1(t): - 'logical_or_expression : logical_and_expression' - pass - -def p_logical_or_expression_2(t): - 'logical_or_expression : logical_or_expression LOR logical_and_expression' - pass - -# logical-and-expression - -def p_logical_and_expression_1(t): - 'logical_and_expression : inclusive_or_expression' - pass - -def p_logical_and_expression_2(t): - 'logical_and_expression : logical_and_expression LAND inclusive_or_expression' - pass - -# inclusive-or-expression: - -def p_inclusive_or_expression_1(t): - 'inclusive_or_expression : exclusive_or_expression' - pass - -def p_inclusive_or_expression_2(t): - 'inclusive_or_expression : inclusive_or_expression OR exclusive_or_expression' - pass - -# exclusive-or-expression: - -def p_exclusive_or_expression_1(t): - 'exclusive_or_expression : and_expression' - pass - -def p_exclusive_or_expression_2(t): - 'exclusive_or_expression : exclusive_or_expression XOR and_expression' - pass - -# AND-expression - -def p_and_expression_1(t): - 'and_expression : equality_expression' - pass - -def p_and_expression_2(t): - 'and_expression : and_expression AND equality_expression' - pass - - -# equality-expression: -def p_equality_expression_1(t): - 'equality_expression : relational_expression' - pass - -def p_equality_expression_2(t): - 'equality_expression : equality_expression EQ relational_expression' - pass - -def p_equality_expression_3(t): - 'equality_expression : equality_expression NE relational_expression' - pass - - -# relational-expression: -def p_relational_expression_1(t): - 'relational_expression : shift_expression' - pass - -def p_relational_expression_2(t): - 'relational_expression : relational_expression LT shift_expression' - pass - -def p_relational_expression_3(t): - 'relational_expression : relational_expression GT shift_expression' - pass - -def p_relational_expression_4(t): - 'relational_expression : relational_expression LE shift_expression' - pass - -def p_relational_expression_5(t): - 'relational_expression : relational_expression GE shift_expression' - pass - -# shift-expression - -def p_shift_expression_1(t): - 'shift_expression : additive_expression' - pass - -def p_shift_expression_2(t): - 'shift_expression : shift_expression LSHIFT additive_expression' - pass - -def p_shift_expression_3(t): - 'shift_expression : shift_expression RSHIFT additive_expression' - pass - -# additive-expression - -def p_additive_expression_1(t): - 'additive_expression : multiplicative_expression' - pass - -def p_additive_expression_2(t): - 'additive_expression : additive_expression PLUS multiplicative_expression' - pass - -def p_additive_expression_3(t): - 'additive_expression : additive_expression MINUS multiplicative_expression' - pass - -# multiplicative-expression - -def p_multiplicative_expression_1(t): - 'multiplicative_expression : cast_expression' - pass - -def p_multiplicative_expression_2(t): - 'multiplicative_expression : multiplicative_expression TIMES cast_expression' - pass - -def p_multiplicative_expression_3(t): - 'multiplicative_expression : multiplicative_expression DIVIDE cast_expression' - pass - -def p_multiplicative_expression_4(t): - 'multiplicative_expression : multiplicative_expression MOD cast_expression' - pass - -# cast-expression: - -def p_cast_expression_1(t): - 'cast_expression : unary_expression' - pass - -def p_cast_expression_2(t): - 'cast_expression : LPAREN type_name RPAREN cast_expression' - pass - -# unary-expression: -def p_unary_expression_1(t): - 'unary_expression : postfix_expression' - pass - -def p_unary_expression_2(t): - 'unary_expression : PLUSPLUS unary_expression' - pass - -def p_unary_expression_3(t): - 'unary_expression : MINUSMINUS unary_expression' - pass - -def p_unary_expression_4(t): - 'unary_expression : unary_operator cast_expression' - pass - -def p_unary_expression_5(t): - 'unary_expression : SIZEOF unary_expression' - pass - -def p_unary_expression_6(t): - 'unary_expression : SIZEOF LPAREN type_name RPAREN' - pass - -#unary-operator -def p_unary_operator(t): - '''unary_operator : AND - | TIMES - | PLUS - | MINUS - | NOT - | LNOT ''' - pass - -# postfix-expression: -def p_postfix_expression_1(t): - 'postfix_expression : primary_expression' - pass - -def p_postfix_expression_2(t): - 'postfix_expression : postfix_expression LBRACKET expression RBRACKET' - pass - -def p_postfix_expression_3(t): - 'postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN' - pass - -def p_postfix_expression_4(t): - 'postfix_expression : postfix_expression LPAREN RPAREN' - pass - -def p_postfix_expression_5(t): - 'postfix_expression : postfix_expression PERIOD ID' - pass - -def p_postfix_expression_6(t): - 'postfix_expression : postfix_expression ARROW ID' - pass - -def p_postfix_expression_7(t): - 'postfix_expression : postfix_expression PLUSPLUS' - pass - -def p_postfix_expression_8(t): - 'postfix_expression : postfix_expression MINUSMINUS' - pass - -# primary-expression: -def p_primary_expression(t): - '''primary_expression : ID - | constant - | SCONST - | LPAREN expression RPAREN''' - pass - -# argument-expression-list: -def p_argument_expression_list(t): - '''argument_expression_list : assignment_expression - | argument_expression_list COMMA assignment_expression''' - pass - -# constant: -def p_constant(t): - '''constant : ICONST - | FCONST - | CCONST''' - pass - - -def p_empty(t): - 'empty : ' - pass - -def p_error(t): - print "Whoa. We're hosed" - -import profile -# Build the grammar - -yacc.yacc(method='LALR') - -#profile.run("yacc.yacc(method='LALR')") - - - - diff --git a/chall/ply-2.2/example/ansic/lextab.py b/chall/ply-2.2/example/ansic/lextab.py deleted file mode 100644 index ce9804b..0000000 --- a/chall/ply-2.2/example/ansic/lextab.py +++ /dev/null @@ -1,8 +0,0 @@ -# lextab.py. This file automatically created by PLY (version 2.2). Don't edit! -_lextokens = {'SHORT': None, 'SIGNED': None, 'TIMES': None, 'TYPEID': None, 'GT': None, 'ARROW': None, 'FCONST': None, 'CONST': None, 'GE': None, 'PERIOD': None, 'SEMI': None, 'REGISTER': None, 'ENUM': None, 'SIZEOF': None, 'COMMA': None, 'RBRACE': None, 'RPAREN': None, 'RSHIFTEQUAL': None, 'LT': None, 'OREQUAL': None, 'XOREQUAL': None, 'DOUBLE': None, 'LBRACE': None, 'STRUCT': None, 'LPAREN': None, 'PLUSEQUAL': None, 'LNOT': None, 'NOT': None, 'CONDOP': None, 'LE': None, 'FLOAT': None, 'GOTO': None, 'LOR': None, 'EQ': None, 'MOD': None, 'ICONST': None, 'LONG': None, 'PLUS': None, 'DIVIDE': None, 'WHILE': None, 'UNION': None, 'CHAR': None, 'SWITCH': None, 'DO': None, 'FOR': None, 'VOID': None, 'EXTERN': None, 'RETURN': None, 'MINUSEQUAL': None, 'ELSE': None, 'ANDEQUAL': None, 'BREAK': None, 'CCONST': None, 'INT': None, 'DIVEQUAL': None, 'DEFAULT': None, 'TIMESEQUAL': None, 'MINUS': None, 'OR': None, 'CONTINUE': None, 'IF': None, 'UNSIGNED': None, 'ID': None, 'MINUSMINUS': None, 'COLON': None, 'LSHIFTEQUAL': None, 'RBRACKET': None, 'VOLATILE': None, 'CASE': None, 'PLUSPLUS': None, 'RSHIFT': None, 'MODEQUAL': None, 'LAND': None, 'AND': None, 'ELLIPSIS': None, 'STATIC': None, 'LBRACKET': None, 'LSHIFT': None, 'NE': None, 'TYPEDEF': None, 'AUTO': None, 'XOR': None, 'EQUALS': None, 'SCONST': None} -_lexreflags = 0 -_lexliterals = '' -_lexstateinfo = {'INITIAL': 'inclusive'} -_lexstatere = {'INITIAL': [('(?P<t_NEWLINE>\\n+)|(?P<t_ID>[A-Za-z_][\\w_]*)|(?P<t_comment> /\\*(.|\\n)*?\\*/)|(?P<t_preprocessor>\\#(.)*?\\n)|(?P<t_FCONST>((\\d+)(\\.\\d+)(e(\\+|-)?(\\d+))? | (\\d+)e(\\+|-)?(\\d+))([lL]|[fF])?)|(?P<t_ICONST>\\d+([uU]|[lL]|[uU][lL]|[lL][uU])?)|(?P<t_CCONST>(L)?\\\'([^\\\\\\n]|(\\\\.))*?\\\')|(?P<t_SCONST>\\"([^\\\\\\n]|(\\\\.))*?\\")|(?P<t_ELLIPSIS>\\.\\.\\.)|(?P<t_LOR>\\|\\|)|(?P<t_PLUSPLUS>\\+\\+)|(?P<t_TIMESEQUAL>\\*=)|(?P<t_RSHIFTEQUAL>>>=)|(?P<t_OREQUAL>\\|=)|(?P<t_PLUSEQUAL>\\+=)|(?P<t_LSHIFTEQUAL><<=)|(?P<t_RBRACKET>\\])|(?P<t_MODEQUAL>%=)|(?P<t_XOREQUAL>^=)|(?P<t_LSHIFT><<)|(?P<t_TIMES>\\*)|(?P<t_LAND>&&)|(?P<t_MINUSMINUS>--)|(?P<t_NE>!=)|(?P<t_LPAREN>\\()|(?P<t_ANDEQUAL>&=)|(?P<t_RSHIFT>>>)|(?P<t_LBRACKET>\\[)|(?P<t_LBRACE>\\{)|(?P<t_OR>\\|)|(?P<t_RBRACE>\\})|(?P<t_ARROW>->)|(?P<t_PLUS>\\+)|(?P<t_CONDOP>\\?)|(?P<t_LE><=)|(?P<t_MINUSEQUAL>-=)|(?P<t_PERIOD>\\.)|(?P<t_DIVEQUAL>/=)|(?P<t_EQ>==)|(?P<t_GE>>=)|(?P<t_RPAREN>\\))|(?P<t_XOR>\\^)|(?P<t_SEMI>;)|(?P<t_AND>&)|(?P<t_NOT>~)|(?P<t_EQUALS>=)|(?P<t_MOD>%)|(?P<t_LT><)|(?P<t_MINUS>-)|(?P<t_LNOT>!)|(?P<t_DIVIDE>/)|(?P<t_COMMA>,)|(?P<t_GT>>)|(?P<t_COLON>:)', [None, ('t_NEWLINE', 'NEWLINE'), ('t_ID', 'ID'), ('t_comment', 'comment'), None, ('t_preprocessor', 'preprocessor'), None, (None, 'FCONST'), None, None, None, None, None, None, None, None, None, None, (None, 'ICONST'), None, (None, 'CCONST'), None, None, None, (None, 'SCONST'), None, None, (None, 'ELLIPSIS'), (None, 'LOR'), (None, 'PLUSPLUS'), (None, 'TIMESEQUAL'), (None, 'RSHIFTEQUAL'), (None, 'OREQUAL'), (None, 'PLUSEQUAL'), (None, 'LSHIFTEQUAL'), (None, 'RBRACKET'), (None, 'MODEQUAL'), (None, 'XOREQUAL'), (None, 'LSHIFT'), (None, 'TIMES'), (None, 'LAND'), (None, 'MINUSMINUS'), (None, 'NE'), (None, 'LPAREN'), (None, 'ANDEQUAL'), (None, 'RSHIFT'), (None, 'LBRACKET'), (None, 'LBRACE'), (None, 'OR'), (None, 'RBRACE'), (None, 'ARROW'), (None, 'PLUS'), (None, 'CONDOP'), (None, 'LE'), (None, 'MINUSEQUAL'), (None, 'PERIOD'), (None, 'DIVEQUAL'), (None, 'EQ'), (None, 'GE'), (None, 'RPAREN'), (None, 'XOR'), (None, 'SEMI'), (None, 'AND'), (None, 'NOT'), (None, 'EQUALS'), (None, 'MOD'), (None, 'LT'), (None, 'MINUS'), (None, 'LNOT'), (None, 'DIVIDE'), (None, 'COMMA'), (None, 'GT'), (None, 'COLON')])]} -_lexstateignore = {'INITIAL': ' \t\f'} -_lexstateerrorf = {'INITIAL': 't_error'} diff --git a/chall/ply-2.2/example/calc/calc.py b/chall/ply-2.2/example/calc/calc.py deleted file mode 100644 index 5bf5d5d..0000000 --- a/chall/ply-2.2/example/calc/calc.py +++ /dev/null @@ -1,105 +0,0 @@ -# ----------------------------------------------------------------------------- -# calc.py -# -# A simple calculator with variables. This is from O'Reilly's -# "Lex and Yacc", p. 63. -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -tokens = ( - 'NAME','NUMBER', - ) - -literals = ['=','+','-','*','/', '(',')'] - -# Tokens - -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print "Integer value too large", t.value - t.value = 0 - return t - -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - -def t_error(t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) - -# Build the lexer -import ply.lex as lex -lex.lex() - -# Parsing rules - -precedence = ( - ('left','+','-'), - ('left','*','/'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(p): - 'statement : NAME "=" expression' - names[p[1]] = p[3] - -def p_statement_expr(p): - 'statement : expression' - print p[1] - -def p_expression_binop(p): - '''expression : expression '+' expression - | expression '-' expression - | expression '*' expression - | expression '/' expression''' - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] - -def p_expression_uminus(p): - "expression : '-' expression %prec UMINUS" - p[0] = -p[2] - -def p_expression_group(p): - "expression : '(' expression ')'" - p[0] = p[2] - -def p_expression_number(p): - "expression : NUMBER" - p[0] = p[1] - -def p_expression_name(p): - "expression : NAME" - try: - p[0] = names[p[1]] - except LookupError: - print "Undefined name '%s'" % p[1] - p[0] = 0 - -def p_error(p): - print "Syntax error at '%s'" % p.value - -import ply.yacc as yacc -yacc.yacc() - -while 1: - try: - s = raw_input('calc > ') - except EOFError: - break - if not s: continue - yacc.parse(s) diff --git a/chall/ply-2.2/example/classcalc/calc.py b/chall/ply-2.2/example/classcalc/calc.py deleted file mode 100755 index 7ec09a6..0000000 --- a/chall/ply-2.2/example/classcalc/calc.py +++ /dev/null @@ -1,152 +0,0 @@ -#!/usr/bin/env python - -# ----------------------------------------------------------------------------- -# calc.py -# -# A simple calculator with variables. This is from O'Reilly's -# "Lex and Yacc", p. 63. -# -# Class-based example contributed to PLY by David McNab -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -import readline -import ply.lex as lex -import ply.yacc as yacc -import os - -class Parser: - """ - Base class for a lexer/parser that has the rules defined as methods - """ - tokens = () - precedence = () - - def __init__(self, **kw): - self.debug = kw.get('debug', 0) - self.names = { } - try: - modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__ - except: - modname = "parser"+"_"+self.__class__.__name__ - self.debugfile = modname + ".dbg" - self.tabmodule = modname + "_" + "parsetab" - #print self.debugfile, self.tabmodule - - # Build the lexer and parser - lex.lex(module=self, debug=self.debug) - yacc.yacc(module=self, - debug=self.debug, - debugfile=self.debugfile, - tabmodule=self.tabmodule) - - def run(self): - while 1: - try: - s = raw_input('calc > ') - except EOFError: - break - if not s: continue - yacc.parse(s) - - -class Calc(Parser): - - tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','EXP', 'TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - - # Tokens - - t_PLUS = r'\+' - t_MINUS = r'-' - t_EXP = r'\*\*' - t_TIMES = r'\*' - t_DIVIDE = r'/' - t_EQUALS = r'=' - t_LPAREN = r'\(' - t_RPAREN = r'\)' - t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - - def t_NUMBER(self, t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print "Integer value too large", t.value - t.value = 0 - #print "parsed number %s" % repr(t.value) - return t - - t_ignore = " \t" - - def t_newline(self, t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - - def t_error(self, t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) - - # Parsing rules - - precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('left', 'EXP'), - ('right','UMINUS'), - ) - - def p_statement_assign(self, p): - 'statement : NAME EQUALS expression' - self.names[p[1]] = p[3] - - def p_statement_expr(self, p): - 'statement : expression' - print p[1] - - def p_expression_binop(self, p): - """ - expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression - | expression EXP expression - """ - #print [repr(p[i]) for i in range(0,4)] - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] - elif p[2] == '**': p[0] = p[1] ** p[3] - - def p_expression_uminus(self, p): - 'expression : MINUS expression %prec UMINUS' - p[0] = -p[2] - - def p_expression_group(self, p): - 'expression : LPAREN expression RPAREN' - p[0] = p[2] - - def p_expression_number(self, p): - 'expression : NUMBER' - p[0] = p[1] - - def p_expression_name(self, p): - 'expression : NAME' - try: - p[0] = self.names[p[1]] - except LookupError: - print "Undefined name '%s'" % p[1] - p[0] = 0 - - def p_error(self, p): - print "Syntax error at '%s'" % p.value - -if __name__ == '__main__': - calc = Calc() - calc.run() diff --git a/chall/ply-2.2/example/cleanup.sh b/chall/ply-2.2/example/cleanup.sh deleted file mode 100755 index 3e115f4..0000000 --- a/chall/ply-2.2/example/cleanup.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -rm -f */*.pyc */parsetab.py */parser.out */*~ */*.class diff --git a/chall/ply-2.2/example/hedit/hedit.py b/chall/ply-2.2/example/hedit/hedit.py deleted file mode 100644 index a3c58c7..0000000 --- a/chall/ply-2.2/example/hedit/hedit.py +++ /dev/null @@ -1,48 +0,0 @@ -# ----------------------------------------------------------------------------- -# hedit.py -# -# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson) -# -# These tokens can't be easily tokenized because they are of the following -# form: -# -# nHc1...cn -# -# where n is a positive integer and c1 ... cn are characters. -# -# This example shows how to modify the state of the lexer to parse -# such tokens -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - - -tokens = ( - 'H_EDIT_DESCRIPTOR', - ) - -# Tokens -t_ignore = " \t\n" - -def t_H_EDIT_DESCRIPTOR(t): - r"\d+H.*" # This grabs all of the remaining text - i = t.value.index('H') - n = eval(t.value[:i]) - - # Adjust the tokenizing position - t.lexer.lexpos -= len(t.value) - (i+1+n) - - t.value = t.value[i+1:i+1+n] - return t - -def t_error(t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) - -# Build the lexer -import ply.lex as lex -lex.lex() -lex.runmain() - - diff --git a/chall/ply-2.2/example/newclasscalc/calc.py b/chall/ply-2.2/example/newclasscalc/calc.py deleted file mode 100755 index b021b6b..0000000 --- a/chall/ply-2.2/example/newclasscalc/calc.py +++ /dev/null @@ -1,155 +0,0 @@ -#!/usr/bin/env python - -# ----------------------------------------------------------------------------- -# calc.py -# -# A simple calculator with variables. This is from O'Reilly's -# "Lex and Yacc", p. 63. -# -# Class-based example contributed to PLY by David McNab. -# -# Modified to use new-style classes. Test case. -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -import readline -import ply.lex as lex -import ply.yacc as yacc -import os - -class Parser(object): - """ - Base class for a lexer/parser that has the rules defined as methods - """ - tokens = () - precedence = () - - - def __init__(self, **kw): - self.debug = kw.get('debug', 0) - self.names = { } - try: - modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__ - except: - modname = "parser"+"_"+self.__class__.__name__ - self.debugfile = modname + ".dbg" - self.tabmodule = modname + "_" + "parsetab" - #print self.debugfile, self.tabmodule - - # Build the lexer and parser - lex.lex(module=self, debug=self.debug) - yacc.yacc(module=self, - debug=self.debug, - debugfile=self.debugfile, - tabmodule=self.tabmodule) - - def run(self): - while 1: - try: - s = raw_input('calc > ') - except EOFError: - break - if not s: continue - yacc.parse(s) - - -class Calc(Parser): - - tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','EXP', 'TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - - # Tokens - - t_PLUS = r'\+' - t_MINUS = r'-' - t_EXP = r'\*\*' - t_TIMES = r'\*' - t_DIVIDE = r'/' - t_EQUALS = r'=' - t_LPAREN = r'\(' - t_RPAREN = r'\)' - t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - - def t_NUMBER(self, t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print "Integer value too large", t.value - t.value = 0 - #print "parsed number %s" % repr(t.value) - return t - - t_ignore = " \t" - - def t_newline(self, t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - - def t_error(self, t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) - - # Parsing rules - - precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('left', 'EXP'), - ('right','UMINUS'), - ) - - def p_statement_assign(self, p): - 'statement : NAME EQUALS expression' - self.names[p[1]] = p[3] - - def p_statement_expr(self, p): - 'statement : expression' - print p[1] - - def p_expression_binop(self, p): - """ - expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression - | expression EXP expression - """ - #print [repr(p[i]) for i in range(0,4)] - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] - elif p[2] == '**': p[0] = p[1] ** p[3] - - def p_expression_uminus(self, p): - 'expression : MINUS expression %prec UMINUS' - p[0] = -p[2] - - def p_expression_group(self, p): - 'expression : LPAREN expression RPAREN' - p[0] = p[2] - - def p_expression_number(self, p): - 'expression : NUMBER' - p[0] = p[1] - - def p_expression_name(self, p): - 'expression : NAME' - try: - p[0] = self.names[p[1]] - except LookupError: - print "Undefined name '%s'" % p[1] - p[0] = 0 - - def p_error(self, p): - print "Syntax error at '%s'" % p.value - -if __name__ == '__main__': - calc = Calc() - calc.run() diff --git a/chall/ply-2.2/example/optcalc/README b/chall/ply-2.2/example/optcalc/README deleted file mode 100644 index 6d196f0..0000000 --- a/chall/ply-2.2/example/optcalc/README +++ /dev/null @@ -1,9 +0,0 @@ -An example showing how to use Python optimized mode. -To run: - - - First run 'python calc.py' - - - Then run 'python -OO calc.py' - -If working corretly, the second version should run the -same way. diff --git a/chall/ply-2.2/example/optcalc/calc.py b/chall/ply-2.2/example/optcalc/calc.py deleted file mode 100644 index 325f67c..0000000 --- a/chall/ply-2.2/example/optcalc/calc.py +++ /dev/null @@ -1,113 +0,0 @@ -# ----------------------------------------------------------------------------- -# calc.py -# -# A simple calculator with variables. This is from O'Reilly's -# "Lex and Yacc", p. 63. -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print "Integer value too large", t.value - t.value = 0 - return t - -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - -def t_error(t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) - -# Build the lexer -import ply.lex as lex -lex.lex(optimize=1) - -# Parsing rules - -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - elif t[2] == '<': t[0] = t[1] < t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - -def p_error(t): - print "Syntax error at '%s'" % t.value - -import ply.yacc as yacc -yacc.yacc(optimize=1) - -while 1: - try: - s = raw_input('calc > ') - except EOFError: - break - yacc.parse(s) - diff --git a/chall/ply-2.2/example/unicalc/calc.py b/chall/ply-2.2/example/unicalc/calc.py deleted file mode 100644 index 7e60433..0000000 --- a/chall/ply-2.2/example/unicalc/calc.py +++ /dev/null @@ -1,114 +0,0 @@ -# ----------------------------------------------------------------------------- -# calc.py -# -# A simple calculator with variables. This is from O'Reilly's -# "Lex and Yacc", p. 63. -# -# This example uses unicode strings for tokens, docstrings, and input. -# ----------------------------------------------------------------------------- - -import sys -sys.path.insert(0,"../..") - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = ur'\+' -t_MINUS = ur'-' -t_TIMES = ur'\*' -t_DIVIDE = ur'/' -t_EQUALS = ur'=' -t_LPAREN = ur'\(' -t_RPAREN = ur'\)' -t_NAME = ur'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - ur'\d+' - try: - t.value = int(t.value) - except ValueError: - print "Integer value too large", t.value - t.value = 0 - return t - -t_ignore = u" \t" - -def t_newline(t): - ur'\n+' - t.lexer.lineno += t.value.count("\n") - -def t_error(t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) - -# Build the lexer -import ply.lex as lex -lex.lex() - -# Parsing rules - -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(p): - 'statement : NAME EQUALS expression' - names[p[1]] = p[3] - -def p_statement_expr(p): - 'statement : expression' - print p[1] - -def p_expression_binop(p): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if p[2] == u'+' : p[0] = p[1] + p[3] - elif p[2] == u'-': p[0] = p[1] - p[3] - elif p[2] == u'*': p[0] = p[1] * p[3] - elif p[2] == u'/': p[0] = p[1] / p[3] - -def p_expression_uminus(p): - 'expression : MINUS expression %prec UMINUS' - p[0] = -p[2] - -def p_expression_group(p): - 'expression : LPAREN expression RPAREN' - p[0] = p[2] - -def p_expression_number(p): - 'expression : NUMBER' - p[0] = p[1] - -def p_expression_name(p): - 'expression : NAME' - try: - p[0] = names[p[1]] - except LookupError: - print "Undefined name '%s'" % p[1] - p[0] = 0 - -def p_error(p): - print "Syntax error at '%s'" % p.value - -import ply.yacc as yacc -yacc.yacc() - -while 1: - try: - s = raw_input('calc > ') - except EOFError: - break - if not s: continue - yacc.parse(unicode(s)) diff --git a/chall/ply-2.2/example/yply/README b/chall/ply-2.2/example/yply/README deleted file mode 100644 index bfadf36..0000000 --- a/chall/ply-2.2/example/yply/README +++ /dev/null @@ -1,41 +0,0 @@ -yply.py - -This example implements a program yply.py that converts a UNIX-yacc -specification file into a PLY-compatible program. To use, simply -run it like this: - - % python yply.py [-nocode] inputfile.y >myparser.py - -The output of this program is Python code. In the output, -any C code in the original file is included, but is commented out. -If you use the -nocode option, then all of the C code in the -original file is just discarded. - -To use the resulting grammer with PLY, you'll need to edit the -myparser.py file. Within this file, some stub code is included that -can be used to test the construction of the parsing tables. However, -you'll need to do more editing to make a workable parser. - -Disclaimer: This just an example I threw together in an afternoon. -It might have some bugs. However, it worked when I tried it on -a yacc-specified C++ parser containing 442 rules and 855 parsing -states. - -Comments: - -1. This example does not parse specification files meant for lex/flex. - You'll need to specify the tokenizer on your own. - -2. This example shows a number of interesting PLY features including - - - Parsing of literal text delimited by nested parentheses - - Some interaction between the parser and the lexer. - - Use of literals in the grammar specification - - One pass compilation. The program just emits the result, - there is no intermediate parse tree. - -3. This program could probably be cleaned up and enhanced a lot. - It would be great if someone wanted to work on this (hint). - --Dave - diff --git a/chall/ply-2.2/example/yply/ylex.py b/chall/ply-2.2/example/yply/ylex.py deleted file mode 100644 index 67d2354..0000000 --- a/chall/ply-2.2/example/yply/ylex.py +++ /dev/null @@ -1,112 +0,0 @@ -# lexer for yacc-grammars -# -# Author: David Beazley (dave@dabeaz.com) -# Date : October 2, 2006 - -import sys -sys.path.append("../..") - -from ply import * - -tokens = ( - 'LITERAL','SECTION','TOKEN','LEFT','RIGHT','PREC','START','TYPE','NONASSOC','UNION','CODE', - 'ID','QLITERAL','NUMBER', -) - -states = (('code','exclusive'),) - -literals = [ ';', ',', '<', '>', '|',':' ] -t_ignore = ' \t' - -t_TOKEN = r'%token' -t_LEFT = r'%left' -t_RIGHT = r'%right' -t_NONASSOC = r'%nonassoc' -t_PREC = r'%prec' -t_START = r'%start' -t_TYPE = r'%type' -t_UNION = r'%union' -t_ID = r'[a-zA-Z_][a-zA-Z_0-9]*' -t_QLITERAL = r'''(?P<quote>['"]).*?(?P=quote)''' -t_NUMBER = r'\d+' - -def t_SECTION(t): - r'%%' - if getattr(t.lexer,"lastsection",0): - t.value = t.lexer.lexdata[t.lexpos+2:] - t.lexer.lexpos = len(t.lexer.lexdata) - else: - t.lexer.lastsection = 0 - return t - -# Comments -def t_ccomment(t): - r'/\*(.|\n)*?\*/' - t.lineno += t.value.count('\n') - -t_ignore_cppcomment = r'//.*' - -def t_LITERAL(t): - r'%\{(.|\n)*?%\}' - t.lexer.lineno += t.value.count("\n") - return t - -def t_NEWLINE(t): - r'\n' - t.lexer.lineno += 1 - -def t_code(t): - r'\{' - t.lexer.codestart = t.lexpos - t.lexer.level = 1 - t.lexer.begin('code') - -def t_code_ignore_string(t): - r'\"([^\\\n]|(\\.))*?\"' - -def t_code_ignore_char(t): - r'\'([^\\\n]|(\\.))*?\'' - -def t_code_ignore_comment(t): - r'/\*(.|\n)*?\*/' - -def t_code_ignore_cppcom(t): - r'//.*' - -def t_code_lbrace(t): - r'\{' - t.lexer.level += 1 - -def t_code_rbrace(t): - r'\}' - t.lexer.level -= 1 - if t.lexer.level == 0: - t.type = 'CODE' - t.value = t.lexer.lexdata[t.lexer.codestart:t.lexpos+1] - t.lexer.begin('INITIAL') - t.lexer.lineno += t.value.count('\n') - return t - -t_code_ignore_nonspace = r'[^\s\}\'\"\{]+' -t_code_ignore_whitespace = r'\s+' -t_code_ignore = "" - -def t_code_error(t): - raise RuntimeError - -def t_error(t): - print "%d: Illegal character '%s'" % (t.lineno, t.value[0]) - print t.value - t.lexer.skip(1) - -lex.lex() - -if __name__ == '__main__': - lex.runmain() - - - - - - - diff --git a/chall/ply-2.2/example/yply/yparse.py b/chall/ply-2.2/example/yply/yparse.py deleted file mode 100644 index ab5b884..0000000 --- a/chall/ply-2.2/example/yply/yparse.py +++ /dev/null @@ -1,217 +0,0 @@ -# parser for Unix yacc-based grammars -# -# Author: David Beazley (dave@dabeaz.com) -# Date : October 2, 2006 - -import ylex -tokens = ylex.tokens - -from ply import * - -tokenlist = [] -preclist = [] - -emit_code = 1 - -def p_yacc(p): - '''yacc : defsection rulesection''' - -def p_defsection(p): - '''defsection : definitions SECTION - | SECTION''' - p.lexer.lastsection = 1 - print "tokens = ", repr(tokenlist) - print - print "precedence = ", repr(preclist) - print - print "# -------------- RULES ----------------" - print - -def p_rulesection(p): - '''rulesection : rules SECTION''' - - print "# -------------- RULES END ----------------" - print_code(p[2],0) - -def p_definitions(p): - '''definitions : definitions definition - | definition''' - -def p_definition_literal(p): - '''definition : LITERAL''' - print_code(p[1],0) - -def p_definition_start(p): - '''definition : START ID''' - print "start = '%s'" % p[2] - -def p_definition_token(p): - '''definition : toktype opttype idlist optsemi ''' - for i in p[3]: - if i[0] not in "'\"": - tokenlist.append(i) - if p[1] == '%left': - preclist.append(('left',) + tuple(p[3])) - elif p[1] == '%right': - preclist.append(('right',) + tuple(p[3])) - elif p[1] == '%nonassoc': - preclist.append(('nonassoc',)+ tuple(p[3])) - -def p_toktype(p): - '''toktype : TOKEN - | LEFT - | RIGHT - | NONASSOC''' - p[0] = p[1] - -def p_opttype(p): - '''opttype : '<' ID '>' - | empty''' - -def p_idlist(p): - '''idlist : idlist optcomma tokenid - | tokenid''' - if len(p) == 2: - p[0] = [p[1]] - else: - p[0] = p[1] - p[1].append(p[3]) - -def p_tokenid(p): - '''tokenid : ID - | ID NUMBER - | QLITERAL - | QLITERAL NUMBER''' - p[0] = p[1] - -def p_optsemi(p): - '''optsemi : ';' - | empty''' - -def p_optcomma(p): - '''optcomma : ',' - | empty''' - -def p_definition_type(p): - '''definition : TYPE '<' ID '>' namelist optsemi''' - # type declarations are ignored - -def p_namelist(p): - '''namelist : namelist optcomma ID - | ID''' - -def p_definition_union(p): - '''definition : UNION CODE optsemi''' - # Union declarations are ignored - -def p_rules(p): - '''rules : rules rule - | rule''' - if len(p) == 2: - rule = p[1] - else: - rule = p[2] - - # Print out a Python equivalent of this rule - - embedded = [ ] # Embedded actions (a mess) - embed_count = 0 - - rulename = rule[0] - rulecount = 1 - for r in rule[1]: - # r contains one of the rule possibilities - print "def p_%s_%d(p):" % (rulename,rulecount) - prod = [] - prodcode = "" - for i in range(len(r)): - item = r[i] - if item[0] == '{': # A code block - if i == len(r) - 1: - prodcode = item - break - else: - # an embedded action - embed_name = "_embed%d_%s" % (embed_count,rulename) - prod.append(embed_name) - embedded.append((embed_name,item)) - embed_count += 1 - else: - prod.append(item) - print " '''%s : %s'''" % (rulename, " ".join(prod)) - # Emit code - print_code(prodcode,4) - print - rulecount += 1 - - for e,code in embedded: - print "def p_%s(p):" % e - print " '''%s : '''" % e - print_code(code,4) - print - -def p_rule(p): - '''rule : ID ':' rulelist ';' ''' - p[0] = (p[1],[p[3]]) - -def p_rule2(p): - '''rule : ID ':' rulelist morerules ';' ''' - p[4].insert(0,p[3]) - p[0] = (p[1],p[4]) - -def p_rule_empty(p): - '''rule : ID ':' ';' ''' - p[0] = (p[1],[[]]) - -def p_rule_empty2(p): - '''rule : ID ':' morerules ';' ''' - - p[3].insert(0,[]) - p[0] = (p[1],p[3]) - -def p_morerules(p): - '''morerules : morerules '|' rulelist - | '|' rulelist - | '|' ''' - - if len(p) == 2: - p[0] = [[]] - elif len(p) == 3: - p[0] = [p[2]] - else: - p[0] = p[1] - p[0].append(p[3]) - -# print "morerules", len(p), p[0] - -def p_rulelist(p): - '''rulelist : rulelist ruleitem - | ruleitem''' - - if len(p) == 2: - p[0] = [p[1]] - else: - p[0] = p[1] - p[1].append(p[2]) - -def p_ruleitem(p): - '''ruleitem : ID - | QLITERAL - | CODE - | PREC''' - p[0] = p[1] - -def p_empty(p): - '''empty : ''' - -def p_error(p): - pass - -yacc.yacc(debug=0) - -def print_code(code,indent): - if not emit_code: return - codelines = code.splitlines() - for c in codelines: - print "%s# %s" % (" "*indent,c) - diff --git a/chall/ply-2.2/example/yply/yply.py b/chall/ply-2.2/example/yply/yply.py deleted file mode 100755 index a439817..0000000 --- a/chall/ply-2.2/example/yply/yply.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/local/bin/python -# yply.py -# -# Author: David Beazley (dave@dabeaz.com) -# Date : October 2, 2006 -# -# Converts a UNIX-yacc specification file into a PLY-compatible -# specification. To use, simply do this: -# -# % python yply.py [-nocode] inputfile.y >myparser.py -# -# The output of this program is Python code. In the output, -# any C code in the original file is included, but is commented. -# If you use the -nocode option, then all of the C code in the -# original file is discarded. -# -# Disclaimer: This just an example I threw together in an afternoon. -# It might have some bugs. However, it worked when I tried it on -# a yacc-specified C++ parser containing 442 rules and 855 parsing -# states. -# - -import sys -sys.path.insert(0,"../..") - -import ylex -import yparse - -from ply import * - -if len(sys.argv) == 1: - print "usage : yply.py [-nocode] inputfile" - raise SystemExit - -if len(sys.argv) == 3: - if sys.argv[1] == '-nocode': - yparse.emit_code = 0 - else: - print "Unknown option '%s'" % sys.argv[1] - raise SystemExit - filename = sys.argv[2] -else: - filename = sys.argv[1] - -yacc.parse(open(filename).read()) - -print """ -if __name__ == '__main__': - from ply import * - yacc.yacc() -""" - - diff --git a/chall/ply-2.2/ply/__init__.py b/chall/ply-2.2/ply/__init__.py deleted file mode 100644 index 853a985..0000000 --- a/chall/ply-2.2/ply/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# PLY package -# Author: David Beazley (dave@dabeaz.com) - -__all__ = ['lex','yacc'] diff --git a/chall/ply-2.2/ply/__init__.pyc b/chall/ply-2.2/ply/__init__.pyc Binary files differdeleted file mode 100644 index 99d3089..0000000 --- a/chall/ply-2.2/ply/__init__.pyc +++ /dev/null diff --git a/chall/ply-2.2/ply/lex.py b/chall/ply-2.2/ply/lex.py deleted file mode 100644 index c149366..0000000 --- a/chall/ply-2.2/ply/lex.py +++ /dev/null @@ -1,866 +0,0 @@ -#----------------------------------------------------------------------------- -# ply: lex.py -# -# Author: David M. Beazley (dave@dabeaz.com) -# -# Copyright (C) 2001-2006, David M. Beazley -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# -# See the file COPYING for a complete copy of the LGPL. -#----------------------------------------------------------------------------- - -__version__ = "2.2" - -import re, sys, types - -# Regular expression used to match valid token names -_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') - -# Available instance types. This is used when lexers are defined by a class. -# It's a little funky because I want to preserve backwards compatibility -# with Python 2.0 where types.ObjectType is undefined. - -try: - _INSTANCETYPE = (types.InstanceType, types.ObjectType) -except AttributeError: - _INSTANCETYPE = types.InstanceType - class object: pass # Note: needed if no new-style classes present - -# Exception thrown when invalid token encountered and no default error -# handler is defined. -class LexError(Exception): - def __init__(self,message,s): - self.args = (message,) - self.text = s - -# Token class -class LexToken(object): - def __str__(self): - return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos) - def __repr__(self): - return str(self) - def skip(self,n): - self.lexer.skip(n) - -# ----------------------------------------------------------------------------- -# Lexer class -# -# This class encapsulates all of the methods and data associated with a lexer. -# -# input() - Store a new string in the lexer -# token() - Get the next token -# ----------------------------------------------------------------------------- - -class Lexer: - def __init__(self): - self.lexre = None # Master regular expression. This is a list of - # tuples (re,findex) where re is a compiled - # regular expression and findex is a list - # mapping regex group numbers to rules - self.lexretext = None # Current regular expression strings - self.lexstatere = {} # Dictionary mapping lexer states to master regexs - self.lexstateretext = {} # Dictionary mapping lexer states to regex strings - self.lexstate = "INITIAL" # Current lexer state - self.lexstatestack = [] # Stack of lexer states - self.lexstateinfo = None # State information - self.lexstateignore = {} # Dictionary of ignored characters for each state - self.lexstateerrorf = {} # Dictionary of error functions for each state - self.lexreflags = 0 # Optional re compile flags - self.lexdata = None # Actual input data (as a string) - self.lexpos = 0 # Current position in input text - self.lexlen = 0 # Length of the input text - self.lexerrorf = None # Error rule (if any) - self.lextokens = None # List of valid tokens - self.lexignore = "" # Ignored characters - self.lexliterals = "" # Literal characters that can be passed through - self.lexmodule = None # Module - self.lineno = 1 # Current line number - self.lexdebug = 0 # Debugging mode - self.lexoptimize = 0 # Optimized mode - - def clone(self,object=None): - c = Lexer() - c.lexstatere = self.lexstatere - c.lexstateinfo = self.lexstateinfo - c.lexstateretext = self.lexstateretext - c.lexstate = self.lexstate - c.lexstatestack = self.lexstatestack - c.lexstateignore = self.lexstateignore - c.lexstateerrorf = self.lexstateerrorf - c.lexreflags = self.lexreflags - c.lexdata = self.lexdata - c.lexpos = self.lexpos - c.lexlen = self.lexlen - c.lextokens = self.lextokens - c.lexdebug = self.lexdebug - c.lineno = self.lineno - c.lexoptimize = self.lexoptimize - c.lexliterals = self.lexliterals - c.lexmodule = self.lexmodule - - # If the object parameter has been supplied, it means we are attaching the - # lexer to a new object. In this case, we have to rebind all methods in - # the lexstatere and lexstateerrorf tables. - - if object: - newtab = { } - for key, ritem in self.lexstatere.items(): - newre = [] - for cre, findex in ritem: - newfindex = [] - for f in findex: - if not f or not f[0]: - newfindex.append(f) - continue - newfindex.append((getattr(object,f[0].__name__),f[1])) - newre.append((cre,newfindex)) - newtab[key] = newre - c.lexstatere = newtab - c.lexstateerrorf = { } - for key, ef in self.lexstateerrorf.items(): - c.lexstateerrorf[key] = getattr(object,ef.__name__) - c.lexmodule = object - - # Set up other attributes - c.begin(c.lexstate) - return c - - # ------------------------------------------------------------ - # writetab() - Write lexer information to a table file - # ------------------------------------------------------------ - def writetab(self,tabfile): - tf = open(tabfile+".py","w") - tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__)) - tf.write("_lextokens = %s\n" % repr(self.lextokens)) - tf.write("_lexreflags = %s\n" % repr(self.lexreflags)) - tf.write("_lexliterals = %s\n" % repr(self.lexliterals)) - tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo)) - - tabre = { } - for key, lre in self.lexstatere.items(): - titem = [] - for i in range(len(lre)): - titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1]))) - tabre[key] = titem - - tf.write("_lexstatere = %s\n" % repr(tabre)) - tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore)) - - taberr = { } - for key, ef in self.lexstateerrorf.items(): - if ef: - taberr[key] = ef.__name__ - else: - taberr[key] = None - tf.write("_lexstateerrorf = %s\n" % repr(taberr)) - tf.close() - - # ------------------------------------------------------------ - # readtab() - Read lexer information from a tab file - # ------------------------------------------------------------ - def readtab(self,tabfile,fdict): - exec "import %s as lextab" % tabfile - self.lextokens = lextab._lextokens - self.lexreflags = lextab._lexreflags - self.lexliterals = lextab._lexliterals - self.lexstateinfo = lextab._lexstateinfo - self.lexstateignore = lextab._lexstateignore - self.lexstatere = { } - self.lexstateretext = { } - for key,lre in lextab._lexstatere.items(): - titem = [] - txtitem = [] - for i in range(len(lre)): - titem.append((re.compile(lre[i][0],lextab._lexreflags),_names_to_funcs(lre[i][1],fdict))) - txtitem.append(lre[i][0]) - self.lexstatere[key] = titem - self.lexstateretext[key] = txtitem - self.lexstateerrorf = { } - for key,ef in lextab._lexstateerrorf.items(): - self.lexstateerrorf[key] = fdict[ef] - self.begin('INITIAL') - - # ------------------------------------------------------------ - # input() - Push a new string into the lexer - # ------------------------------------------------------------ - def input(self,s): - if not (isinstance(s,types.StringType) or isinstance(s,types.UnicodeType)): - raise ValueError, "Expected a string" - self.lexdata = s - self.lexpos = 0 - self.lexlen = len(s) - - # ------------------------------------------------------------ - # begin() - Changes the lexing state - # ------------------------------------------------------------ - def begin(self,state): - if not self.lexstatere.has_key(state): - raise ValueError, "Undefined state" - self.lexre = self.lexstatere[state] - self.lexretext = self.lexstateretext[state] - self.lexignore = self.lexstateignore.get(state,"") - self.lexerrorf = self.lexstateerrorf.get(state,None) - self.lexstate = state - - # ------------------------------------------------------------ - # push_state() - Changes the lexing state and saves old on stack - # ------------------------------------------------------------ - def push_state(self,state): - self.lexstatestack.append(self.lexstate) - self.begin(state) - - # ------------------------------------------------------------ - # pop_state() - Restores the previous state - # ------------------------------------------------------------ - def pop_state(self): - self.begin(self.lexstatestack.pop()) - - # ------------------------------------------------------------ - # current_state() - Returns the current lexing state - # ------------------------------------------------------------ - def current_state(self): - return self.lexstate - - # ------------------------------------------------------------ - # skip() - Skip ahead n characters - # ------------------------------------------------------------ - def skip(self,n): - self.lexpos += n - - # ------------------------------------------------------------ - # token() - Return the next token from the Lexer - # - # Note: This function has been carefully implemented to be as fast - # as possible. Don't make changes unless you really know what - # you are doing - # ------------------------------------------------------------ - def token(self): - # Make local copies of frequently referenced attributes - lexpos = self.lexpos - lexlen = self.lexlen - lexignore = self.lexignore - lexdata = self.lexdata - - while lexpos < lexlen: - # This code provides some short-circuit code for whitespace, tabs, and other ignored characters - if lexdata[lexpos] in lexignore: - lexpos += 1 - continue - - # Look for a regular expression match - for lexre,lexindexfunc in self.lexre: - m = lexre.match(lexdata,lexpos) - if not m: continue - - # Set last match in lexer so that rules can access it if they want - self.lexmatch = m - - # Create a token for return - tok = LexToken() - tok.value = m.group() - tok.lineno = self.lineno - tok.lexpos = lexpos - tok.lexer = self - - lexpos = m.end() - i = m.lastindex - func,tok.type = lexindexfunc[i] - self.lexpos = lexpos - - if not func: - # If no token type was set, it's an ignored token - if tok.type: return tok - break - - # if func not callable, it means it's an ignored token - if not callable(func): - break - - # If token is processed by a function, call it - newtok = func(tok) - - # Every function must return a token, if nothing, we just move to next token - if not newtok: - lexpos = self.lexpos # This is here in case user has updated lexpos. - break - - # Verify type of the token. If not in the token map, raise an error - if not self.lexoptimize: - if not self.lextokens.has_key(newtok.type): - raise LexError, ("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( - func.func_code.co_filename, func.func_code.co_firstlineno, - func.__name__, newtok.type),lexdata[lexpos:]) - - return newtok - else: - # No match, see if in literals - if lexdata[lexpos] in self.lexliterals: - tok = LexToken() - tok.value = lexdata[lexpos] - tok.lineno = self.lineno - tok.lexer = self - tok.type = tok.value - tok.lexpos = lexpos - self.lexpos = lexpos + 1 - return tok - - # No match. Call t_error() if defined. - if self.lexerrorf: - tok = LexToken() - tok.value = self.lexdata[lexpos:] - tok.lineno = self.lineno - tok.type = "error" - tok.lexer = self - tok.lexpos = lexpos - self.lexpos = lexpos - newtok = self.lexerrorf(tok) - if lexpos == self.lexpos: - # Error method didn't change text position at all. This is an error. - raise LexError, ("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) - lexpos = self.lexpos - if not newtok: continue - return newtok - - self.lexpos = lexpos - raise LexError, ("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:]) - - self.lexpos = lexpos + 1 - if self.lexdata is None: - raise RuntimeError, "No input string given with input()" - return None - -# ----------------------------------------------------------------------------- -# _validate_file() -# -# This checks to see if there are duplicated t_rulename() functions or strings -# in the parser input file. This is done using a simple regular expression -# match on each line in the filename. -# ----------------------------------------------------------------------------- - -def _validate_file(filename): - import os.path - base,ext = os.path.splitext(filename) - if ext != '.py': return 1 # No idea what the file is. Return OK - - try: - f = open(filename) - lines = f.readlines() - f.close() - except IOError: - return 1 # Oh well - - fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') - sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') - counthash = { } - linen = 1 - noerror = 1 - for l in lines: - m = fre.match(l) - if not m: - m = sre.match(l) - if m: - name = m.group(1) - prev = counthash.get(name) - if not prev: - counthash[name] = linen - else: - print "%s:%d: Rule %s redefined. Previously defined on line %d" % (filename,linen,name,prev) - noerror = 0 - linen += 1 - return noerror - -# ----------------------------------------------------------------------------- -# _funcs_to_names() -# -# Given a list of regular expression functions, this converts it to a list -# suitable for output to a table file -# ----------------------------------------------------------------------------- - -def _funcs_to_names(funclist): - result = [] - for f in funclist: - if f and f[0]: - result.append((f[0].__name__,f[1])) - else: - result.append(f) - return result - -# ----------------------------------------------------------------------------- -# _names_to_funcs() -# -# Given a list of regular expression function names, this converts it back to -# functions. -# ----------------------------------------------------------------------------- - -def _names_to_funcs(namelist,fdict): - result = [] - for n in namelist: - if n and n[0]: - result.append((fdict[n[0]],n[1])) - else: - result.append(n) - return result - -# ----------------------------------------------------------------------------- -# _form_master_re() -# -# This function takes a list of all of the regex components and attempts to -# form the master regular expression. Given limitations in the Python re -# module, it may be necessary to break the master regex into separate expressions. -# ----------------------------------------------------------------------------- - -def _form_master_re(relist,reflags,ldict): - if not relist: return [] - regex = "|".join(relist) - try: - lexre = re.compile(regex,re.VERBOSE | reflags) - - # Build the index to function map for the matching engine - lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1) - for f,i in lexre.groupindex.items(): - handle = ldict.get(f,None) - if type(handle) in (types.FunctionType, types.MethodType): - lexindexfunc[i] = (handle,handle.__name__[2:]) - elif handle is not None: - # If rule was specified as a string, we build an anonymous - # callback function to carry out the action - if f.find("ignore_") > 0: - lexindexfunc[i] = (None,None) - print "IGNORE", f - else: - lexindexfunc[i] = (None, f[2:]) - - return [(lexre,lexindexfunc)],[regex] - except Exception,e: - m = int(len(relist)/2) - if m == 0: m = 1 - llist, lre = _form_master_re(relist[:m],reflags,ldict) - rlist, rre = _form_master_re(relist[m:],reflags,ldict) - return llist+rlist, lre+rre - -# ----------------------------------------------------------------------------- -# def _statetoken(s,names) -# -# Given a declaration name s of the form "t_" and a dictionary whose keys are -# state names, this function returns a tuple (states,tokenname) where states -# is a tuple of state names and tokenname is the name of the token. For example, -# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') -# ----------------------------------------------------------------------------- - -def _statetoken(s,names): - nonstate = 1 - parts = s.split("_") - for i in range(1,len(parts)): - if not names.has_key(parts[i]) and parts[i] != 'ANY': break - if i > 1: - states = tuple(parts[1:i]) - else: - states = ('INITIAL',) - - if 'ANY' in states: - states = tuple(names.keys()) - - tokenname = "_".join(parts[i:]) - return (states,tokenname) - -# ----------------------------------------------------------------------------- -# lex(module) -# -# Build all of the regular expression rules from definitions in the supplied module -# ----------------------------------------------------------------------------- -def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0): - global lexer - ldict = None - stateinfo = { 'INITIAL' : 'inclusive'} - error = 0 - files = { } - lexobj = Lexer() - lexobj.lexdebug = debug - lexobj.lexoptimize = optimize - global token,input - - if nowarn: warn = 0 - else: warn = 1 - - if object: module = object - - if module: - # User supplied a module object. - if isinstance(module, types.ModuleType): - ldict = module.__dict__ - elif isinstance(module, _INSTANCETYPE): - _items = [(k,getattr(module,k)) for k in dir(module)] - ldict = { } - for (i,v) in _items: - ldict[i] = v - else: - raise ValueError,"Expected a module or instance" - lexobj.lexmodule = module - - else: - # No module given. We might be able to get information from the caller. - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - f = f.f_back # Walk out to our calling function - ldict = f.f_globals # Grab its globals dictionary - - if optimize and lextab: - try: - lexobj.readtab(lextab,ldict) - token = lexobj.token - input = lexobj.input - lexer = lexobj - return lexobj - - except ImportError: - pass - - # Get the tokens, states, and literals variables (if any) - if (module and isinstance(module,_INSTANCETYPE)): - tokens = getattr(module,"tokens",None) - states = getattr(module,"states",None) - literals = getattr(module,"literals","") - else: - tokens = ldict.get("tokens",None) - states = ldict.get("states",None) - literals = ldict.get("literals","") - - if not tokens: - raise SyntaxError,"lex: module does not define 'tokens'" - if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)): - raise SyntaxError,"lex: tokens must be a list or tuple." - - # Build a dictionary of valid token names - lexobj.lextokens = { } - if not optimize: - for n in tokens: - if not _is_identifier.match(n): - print "lex: Bad token name '%s'" % n - error = 1 - if warn and lexobj.lextokens.has_key(n): - print "lex: Warning. Token '%s' multiply defined." % n - lexobj.lextokens[n] = None - else: - for n in tokens: lexobj.lextokens[n] = None - - if debug: - print "lex: tokens = '%s'" % lexobj.lextokens.keys() - - try: - for c in literals: - if not (isinstance(c,types.StringType) or isinstance(c,types.UnicodeType)) or len(c) > 1: - print "lex: Invalid literal %s. Must be a single character" % repr(c) - error = 1 - continue - - except TypeError: - print "lex: Invalid literals specification. literals must be a sequence of characters." - error = 1 - - lexobj.lexliterals = literals - - # Build statemap - if states: - if not (isinstance(states,types.TupleType) or isinstance(states,types.ListType)): - print "lex: states must be defined as a tuple or list." - error = 1 - else: - for s in states: - if not isinstance(s,types.TupleType) or len(s) != 2: - print "lex: invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')" % repr(s) - error = 1 - continue - name, statetype = s - if not isinstance(name,types.StringType): - print "lex: state name %s must be a string" % repr(name) - error = 1 - continue - if not (statetype == 'inclusive' or statetype == 'exclusive'): - print "lex: state type for state %s must be 'inclusive' or 'exclusive'" % name - error = 1 - continue - if stateinfo.has_key(name): - print "lex: state '%s' already defined." % name - error = 1 - continue - stateinfo[name] = statetype - - # Get a list of symbols with the t_ or s_ prefix - tsymbols = [f for f in ldict.keys() if f[:2] == 't_' ] - - # Now build up a list of functions and a list of strings - - funcsym = { } # Symbols defined as functions - strsym = { } # Symbols defined as strings - toknames = { } # Mapping of symbols to token names - - for s in stateinfo.keys(): - funcsym[s] = [] - strsym[s] = [] - - ignore = { } # Ignore strings by state - errorf = { } # Error functions by state - - if len(tsymbols) == 0: - raise SyntaxError,"lex: no rules of the form t_rulename are defined." - - for f in tsymbols: - t = ldict[f] - states, tokname = _statetoken(f,stateinfo) - toknames[f] = tokname - - if callable(t): - for s in states: funcsym[s].append((f,t)) - elif (isinstance(t, types.StringType) or isinstance(t,types.UnicodeType)): - for s in states: strsym[s].append((f,t)) - else: - print "lex: %s not defined as a function or string" % f - error = 1 - - # Sort the functions by line number - for f in funcsym.values(): - f.sort(lambda x,y: cmp(x[1].func_code.co_firstlineno,y[1].func_code.co_firstlineno)) - - # Sort the strings by regular expression length - for s in strsym.values(): - s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1]))) - - regexs = { } - - # Build the master regular expressions - for state in stateinfo.keys(): - regex_list = [] - - # Add rules defined by functions first - for fname, f in funcsym[state]: - line = f.func_code.co_firstlineno - file = f.func_code.co_filename - files[file] = None - tokname = toknames[fname] - - ismethod = isinstance(f, types.MethodType) - - if not optimize: - nargs = f.func_code.co_argcount - if ismethod: - reqargs = 2 - else: - reqargs = 1 - if nargs > reqargs: - print "%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__) - error = 1 - continue - - if nargs < reqargs: - print "%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__) - error = 1 - continue - - if tokname == 'ignore': - print "%s:%d: Rule '%s' must be defined as a string." % (file,line,f.__name__) - error = 1 - continue - - if tokname == 'error': - errorf[state] = f - continue - - if f.__doc__: - if not optimize: - try: - c = re.compile("(?P<%s>%s)" % (f.__name__,f.__doc__), re.VERBOSE | reflags) - if c.match(""): - print "%s:%d: Regular expression for rule '%s' matches empty string." % (file,line,f.__name__) - error = 1 - continue - except re.error,e: - print "%s:%d: Invalid regular expression for rule '%s'. %s" % (file,line,f.__name__,e) - if '#' in f.__doc__: - print "%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'." % (file,line, f.__name__) - error = 1 - continue - - if debug: - print "lex: Adding rule %s -> '%s' (state '%s')" % (f.__name__,f.__doc__, state) - - # Okay. The regular expression seemed okay. Let's append it to the master regular - # expression we're building - - regex_list.append("(?P<%s>%s)" % (f.__name__,f.__doc__)) - else: - print "%s:%d: No regular expression defined for rule '%s'" % (file,line,f.__name__) - - # Now add all of the simple rules - for name,r in strsym[state]: - tokname = toknames[name] - - if tokname == 'ignore': - ignore[state] = r - continue - - if not optimize: - if tokname == 'error': - raise SyntaxError,"lex: Rule '%s' must be defined as a function" % name - error = 1 - continue - - if not lexobj.lextokens.has_key(tokname) and tokname.find("ignore_") < 0: - print "lex: Rule '%s' defined for an unspecified token %s." % (name,tokname) - error = 1 - continue - try: - c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | reflags) - if (c.match("")): - print "lex: Regular expression for rule '%s' matches empty string." % name - error = 1 - continue - except re.error,e: - print "lex: Invalid regular expression for rule '%s'. %s" % (name,e) - if '#' in r: - print "lex: Make sure '#' in rule '%s' is escaped with '\\#'." % name - - error = 1 - continue - if debug: - print "lex: Adding rule %s -> '%s' (state '%s')" % (name,r,state) - - regex_list.append("(?P<%s>%s)" % (name,r)) - - if not regex_list: - print "lex: No rules defined for state '%s'" % state - error = 1 - - regexs[state] = regex_list - - - if not optimize: - for f in files.keys(): - if not _validate_file(f): - error = 1 - - if error: - raise SyntaxError,"lex: Unable to build lexer." - - # From this point forward, we're reasonably confident that we can build the lexer. - # No more errors will be generated, but there might be some warning messages. - - # Build the master regular expressions - - for state in regexs.keys(): - lexre, re_text = _form_master_re(regexs[state],reflags,ldict) - lexobj.lexstatere[state] = lexre - lexobj.lexstateretext[state] = re_text - if debug: - for i in range(len(re_text)): - print "lex: state '%s'. regex[%d] = '%s'" % (state, i, re_text[i]) - - # For inclusive states, we need to add the INITIAL state - for state,type in stateinfo.items(): - if state != "INITIAL" and type == 'inclusive': - lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) - lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) - - lexobj.lexstateinfo = stateinfo - lexobj.lexre = lexobj.lexstatere["INITIAL"] - lexobj.lexretext = lexobj.lexstateretext["INITIAL"] - - # Set up ignore variables - lexobj.lexstateignore = ignore - lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","") - - # Set up error functions - lexobj.lexstateerrorf = errorf - lexobj.lexerrorf = errorf.get("INITIAL",None) - if warn and not lexobj.lexerrorf: - print "lex: Warning. no t_error rule is defined." - - # Check state information for ignore and error rules - for s,stype in stateinfo.items(): - if stype == 'exclusive': - if warn and not errorf.has_key(s): - print "lex: Warning. no error rule is defined for exclusive state '%s'" % s - if warn and not ignore.has_key(s) and lexobj.lexignore: - print "lex: Warning. no ignore rule is defined for exclusive state '%s'" % s - elif stype == 'inclusive': - if not errorf.has_key(s): - errorf[s] = errorf.get("INITIAL",None) - if not ignore.has_key(s): - ignore[s] = ignore.get("INITIAL","") - - - # Create global versions of the token() and input() functions - token = lexobj.token - input = lexobj.input - lexer = lexobj - - # If in optimize mode, we write the lextab - if lextab and optimize: - lexobj.writetab(lextab) - - return lexobj - -# ----------------------------------------------------------------------------- -# runmain() -# -# This runs the lexer as a main program -# ----------------------------------------------------------------------------- - -def runmain(lexer=None,data=None): - if not data: - try: - filename = sys.argv[1] - f = open(filename) - data = f.read() - f.close() - except IndexError: - print "Reading from standard input (type EOF to end):" - data = sys.stdin.read() - - if lexer: - _input = lexer.input - else: - _input = input - _input(data) - if lexer: - _token = lexer.token - else: - _token = token - - while 1: - tok = _token() - if not tok: break - print "(%s,%r,%d,%d)" % (tok.type, tok.value, tok.lineno,tok.lexpos) - - -# ----------------------------------------------------------------------------- -# @TOKEN(regex) -# -# This decorator function can be used to set the regex expression on a function -# when its docstring might need to be set in an alternative way -# ----------------------------------------------------------------------------- - -def TOKEN(r): - def set_doc(f): - f.__doc__ = r - return f - return set_doc - -# Alternative spelling of the TOKEN decorator -Token = TOKEN - diff --git a/chall/ply-2.2/ply/lex.pyc b/chall/ply-2.2/ply/lex.pyc Binary files differdeleted file mode 100644 index 19af7ed..0000000 --- a/chall/ply-2.2/ply/lex.pyc +++ /dev/null diff --git a/chall/ply-2.2/ply/yacc.py b/chall/ply-2.2/ply/yacc.py deleted file mode 100644 index caf98af..0000000 --- a/chall/ply-2.2/ply/yacc.py +++ /dev/null @@ -1,2209 +0,0 @@ -#----------------------------------------------------------------------------- -# ply: yacc.py -# -# Author(s): David M. Beazley (dave@dabeaz.com) -# -# Copyright (C) 2001-2006, David M. Beazley -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# -# See the file COPYING for a complete copy of the LGPL. -# -# -# This implements an LR parser that is constructed from grammar rules defined -# as Python functions. The grammer is specified by supplying the BNF inside -# Python documentation strings. The inspiration for this technique was borrowed -# from John Aycock's Spark parsing system. PLY might be viewed as cross between -# Spark and the GNU bison utility. -# -# The current implementation is only somewhat object-oriented. The -# LR parser itself is defined in terms of an object (which allows multiple -# parsers to co-exist). However, most of the variables used during table -# construction are defined in terms of global variables. Users shouldn't -# notice unless they are trying to define multiple parsers at the same -# time using threads (in which case they should have their head examined). -# -# This implementation supports both SLR and LALR(1) parsing. LALR(1) -# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), -# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, -# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced -# by the more efficient DeRemer and Pennello algorithm. -# -# :::::::: WARNING ::::::: -# -# Construction of LR parsing tables is fairly complicated and expensive. -# To make this module run fast, a *LOT* of work has been put into -# optimization---often at the expensive of readability and what might -# consider to be good Python "coding style." Modify the code at your -# own risk! -# ---------------------------------------------------------------------------- - -__version__ = "2.2" - -#----------------------------------------------------------------------------- -# === User configurable parameters === -# -# Change these to modify the default behavior of yacc (if you wish) -#----------------------------------------------------------------------------- - -yaccdebug = 1 # Debugging mode. If set, yacc generates a - # a 'parser.out' file in the current directory - -debug_file = 'parser.out' # Default name of the debugging file -tab_module = 'parsetab' # Default name of the table module -default_lr = 'LALR' # Default LR table generation method - -error_count = 3 # Number of symbols that must be shifted to leave recovery mode - -import re, types, sys, cStringIO, md5, os.path - -# Exception raised for yacc-related errors -class YaccError(Exception): pass - -#----------------------------------------------------------------------------- -# === LR Parsing Engine === -# -# The following classes are used for the LR parser itself. These are not -# used during table construction and are independent of the actual LR -# table generation algorithm -#----------------------------------------------------------------------------- - -# This class is used to hold non-terminal grammar symbols during parsing. -# It normally has the following attributes set: -# .type = Grammar symbol type -# .value = Symbol value -# .lineno = Starting line number -# .endlineno = Ending line number (optional, set automatically) -# .lexpos = Starting lex position -# .endlexpos = Ending lex position (optional, set automatically) - -class YaccSymbol: - def __str__(self): return self.type - def __repr__(self): return str(self) - -# This class is a wrapper around the objects actually passed to each -# grammar rule. Index lookup and assignment actually assign the -# .value attribute of the underlying YaccSymbol object. -# The lineno() method returns the line number of a given -# item (or 0 if not defined). The linespan() method returns -# a tuple of (startline,endline) representing the range of lines -# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) -# representing the range of positional information for a symbol. - -class YaccProduction: - def __init__(self,s,stack=None): - self.slice = s - self.pbstack = [] - self.stack = stack - - def __getitem__(self,n): - if type(n) == types.IntType: - if n >= 0: return self.slice[n].value - else: return self.stack[n].value - else: - return [s.value for s in self.slice[n.start:n.stop:n.step]] - - def __setitem__(self,n,v): - self.slice[n].value = v - - def __len__(self): - return len(self.slice) - - def lineno(self,n): - return getattr(self.slice[n],"lineno",0) - - def linespan(self,n): - startline = getattr(self.slice[n],"lineno",0) - endline = getattr(self.slice[n],"endlineno",startline) - return startline,endline - - def lexpos(self,n): - return getattr(self.slice[n],"lexpos",0) - - def lexspan(self,n): - startpos = getattr(self.slice[n],"lexpos",0) - endpos = getattr(self.slice[n],"endlexpos",startpos) - return startpos,endpos - - def pushback(self,n): - if n <= 0: - raise ValueError, "Expected a positive value" - if n > (len(self.slice)-1): - raise ValueError, "Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1) - for i in range(0,n): - self.pbstack.append(self.slice[-i-1]) - -# The LR Parsing engine. This is defined as a class so that multiple parsers -# can exist in the same process. A user never instantiates this directly. -# Instead, the global yacc() function should be used to create a suitable Parser -# object. - -class Parser: - def __init__(self,magic=None): - - # This is a hack to keep users from trying to instantiate a Parser - # object directly. - - if magic != "xyzzy": - raise YaccError, "Can't instantiate Parser. Use yacc() instead." - - # Reset internal state - self.productions = None # List of productions - self.errorfunc = None # Error handling function - self.action = { } # LR Action table - self.goto = { } # LR goto table - self.require = { } # Attribute require table - self.method = "Unknown LR" # Table construction method used - - def errok(self): - self.errorcount = 0 - - def restart(self): - del self.statestack[:] - del self.symstack[:] - sym = YaccSymbol() - sym.type = '$end' - self.symstack.append(sym) - self.statestack.append(0) - - def parse(self,input=None,lexer=None,debug=0): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table - goto = self.goto # Local reference to goto table - prod = self.productions # Local reference to production list - pslice = YaccProduction(None) # Production object passed to grammar rules - pslice.parser = self # Parser object - self.errorcount = 0 # Used during error recovery - - # If no lexer was given, we will try to use the lex module - if not lexer: - import lex - lexer = lex.lexer - - pslice.lexer = lexer - - # If input was supplied, pass to lexer - if input: - lexer.input(input) - - # Tokenize function - get_token = lexer.token - - statestack = [ ] # Stack of parsing states - self.statestack = statestack - symstack = [ ] # Stack of grammar symbols - self.symstack = symstack - - pslice.stack = symstack # Put in the production - errtoken = None # Err token - - # The start state is assumed to be (0,$end) - statestack.append(0) - sym = YaccSymbol() - sym.type = '$end' - symstack.append(sym) - - while 1: - # Get the next symbol on the input. If a lookahead symbol - # is already set, we just use that. Otherwise, we'll pull - # the next token off of the lookaheadstack or from the lexer - if debug > 1: - print 'state', statestack[-1] - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' - if debug: - errorlead = ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip() - - # Check the action table - s = statestack[-1] - ltype = lookahead.type - t = actions.get((s,ltype),None) - - if debug > 1: - print 'action', t - if t is not None: - if t > 0: - # shift a symbol on the stack - if ltype == '$end': - # Error, end of input - sys.stderr.write("yacc: Parse error. EOF\n") - return - statestack.append(t) - if debug > 1: - sys.stderr.write("%-60s shift state %s\n" % (errorlead, t)) - symstack.append(lookahead) - lookahead = None - - # Decrease error count on successful shift - if self.errorcount > 0: - self.errorcount -= 1 - - continue - - if t < 0: - # reduce a symbol on the stack, emit a production - p = prod[-t] - pname = p.name - plen = p.len - - # Get production function - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - if debug > 1: - sys.stderr.write("%-60s reduce %d\n" % (errorlead, -t)) - - if plen: - targ = symstack[-plen-1:] - targ[0] = sym - try: - sym.lineno = targ[1].lineno - sym.endlineno = getattr(targ[-1],"endlineno",targ[-1].lineno) - sym.lexpos = targ[1].lexpos - sym.endlexpos = getattr(targ[-1],"endlexpos",targ[-1].lexpos) - except AttributeError: - sym.lineno = 0 - del symstack[-plen:] - del statestack[-plen:] - else: - sym.lineno = 0 - targ = [ sym ] - pslice.slice = targ - pslice.pbstack = [] - # Call the grammar rule with our special slice object - p.func(pslice) - - # If there was a pushback, put that on the stack - if pslice.pbstack: - lookaheadstack.append(lookahead) - for _t in pslice.pbstack: - lookaheadstack.append(_t) - lookahead = None - - symstack.append(sym) - statestack.append(goto[statestack[-1],pname]) - continue - - if t == 0: - n = symstack[-1] - return getattr(n,"value",None) - sys.stderr.write(errorlead, "\n") - - if t == None: - if debug: - sys.stderr.write(errorlead + "\n") - # We have some kind of parsing error here. To handle - # this, we are going to push the current token onto - # the tokenstack and replace it with an 'error' token. - # If there are any synchronization rules, they may - # catch it. - # - # In addition to pushing the error token, we call call - # the user defined p_error() function if this is the - # first syntax error. This function is only called if - # errorcount == 0. - if not self.errorcount: - self.errorcount = error_count - errtoken = lookahead - if errtoken.type == '$end': - errtoken = None # End of file! - if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - - if not self.errorcount: - # User must have done some kind of panic - # mode recovery on their own. The - # returned token is the next lookahead - lookahead = tok - errtoken = None - continue - else: - if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 - if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) - else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) - else: - sys.stderr.write("yacc: Parse error in input. EOF\n") - return - - else: - self.errorcount = error_count - - # case 1: the statestack only has 1 entry on it. If we're in this state, the - # entire parse has been rolled back and we're completely hosed. The token is - # discarded and we just keep going. - - if len(statestack) <= 1 and lookahead.type != '$end': - lookahead = None - errtoken = None - # Nuke the pushback stack - del lookaheadstack[:] - continue - - # case 2: the statestack has a couple of entries on it, but we're - # at the end of the file. nuke the top entry and generate an error token - - # Start nuking entries on the stack - if lookahead.type == '$end': - # Whoa. We're really hosed here. Bail out - return - - if lookahead.type != 'error': - sym = symstack[-1] - if sym.type == 'error': - # Hmmm. Error is on top of stack, we'll just nuke input - # symbol and continue - lookahead = None - continue - t = YaccSymbol() - t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t - else: - symstack.pop() - statestack.pop() - - continue - - # Call an error function here - raise RuntimeError, "yacc: internal parser error!!!\n" - -# ----------------------------------------------------------------------------- -# === Parser Construction === -# -# The following functions and variables are used to implement the yacc() function -# itself. This is pretty hairy stuff involving lots of error checking, -# construction of LR items, kernels, and so forth. Although a lot of -# this work is done using global variables, the resulting Parser object -# is completely self contained--meaning that it is safe to repeatedly -# call yacc() with different grammars in the same application. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# validate_file() -# -# This function checks to see if there are duplicated p_rulename() functions -# in the parser module file. Without this function, it is really easy for -# users to make mistakes by cutting and pasting code fragments (and it's a real -# bugger to try and figure out why the resulting parser doesn't work). Therefore, -# we just do a little regular expression pattern matching of def statements -# to try and detect duplicates. -# ----------------------------------------------------------------------------- - -def validate_file(filename): - base,ext = os.path.splitext(filename) - if ext != '.py': return 1 # No idea. Assume it's okay. - - try: - f = open(filename) - lines = f.readlines() - f.close() - except IOError: - return 1 # Oh well - - # Match def p_funcname( - fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') - counthash = { } - linen = 1 - noerror = 1 - for l in lines: - m = fre.match(l) - if m: - name = m.group(1) - prev = counthash.get(name) - if not prev: - counthash[name] = linen - else: - sys.stderr.write("%s:%d: Function %s redefined. Previously defined on line %d\n" % (filename,linen,name,prev)) - noerror = 0 - linen += 1 - return noerror - -# This function looks for functions that might be grammar rules, but which don't have the proper p_suffix. -def validate_dict(d): - for n,v in d.items(): - if n[0:2] == 'p_' and type(v) in (types.FunctionType, types.MethodType): continue - if n[0:2] == 't_': continue - - if n[0:2] == 'p_': - sys.stderr.write("yacc: Warning. '%s' not defined as a function\n" % n) - if 1 and isinstance(v,types.FunctionType) and v.func_code.co_argcount == 1: - try: - doc = v.__doc__.split(" ") - if doc[1] == ':': - sys.stderr.write("%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix.\n" % (v.func_code.co_filename, v.func_code.co_firstlineno,n)) - except StandardError: - pass - -# ----------------------------------------------------------------------------- -# === GRAMMAR FUNCTIONS === -# -# The following global variables and functions are used to store, manipulate, -# and verify the grammar rules specified by the user. -# ----------------------------------------------------------------------------- - -# Initialize all of the global variables used during grammar construction -def initialize_vars(): - global Productions, Prodnames, Prodmap, Terminals - global Nonterminals, First, Follow, Precedence, LRitems - global Errorfunc, Signature, Requires - - Productions = [None] # A list of all of the productions. The first - # entry is always reserved for the purpose of - # building an augmented grammar - - Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all - # productions of that nonterminal. - - Prodmap = { } # A dictionary that is only used to detect duplicate - # productions. - - Terminals = { } # A dictionary mapping the names of terminal symbols to a - # list of the rules where they are used. - - Nonterminals = { } # A dictionary mapping names of nonterminals to a list - # of rule numbers where they are used. - - First = { } # A dictionary of precomputed FIRST(x) symbols - - Follow = { } # A dictionary of precomputed FOLLOW(x) symbols - - Precedence = { } # Precedence rules for each terminal. Contains tuples of the - # form ('right',level) or ('nonassoc', level) or ('left',level) - - LRitems = [ ] # A list of all LR items for the grammar. These are the - # productions with the "dot" like E -> E . PLUS E - - Errorfunc = None # User defined error handler - - Signature = md5.new() # Digital signature of the grammar rules, precedence - # and other information. Used to determined when a - # parsing table needs to be regenerated. - - Requires = { } # Requires list - - # File objects used when creating the parser.out debugging file - global _vf, _vfc - _vf = cStringIO.StringIO() - _vfc = cStringIO.StringIO() - -# ----------------------------------------------------------------------------- -# class Production: -# -# This class stores the raw information about a single production or grammar rule. -# It has a few required attributes: -# -# name - Name of the production (nonterminal) -# prod - A list of symbols making up its production -# number - Production number. -# -# In addition, a few additional attributes are used to help with debugging or -# optimization of table generation. -# -# file - File where production action is defined. -# lineno - Line number where action is defined -# func - Action function -# prec - Precedence level -# lr_next - Next LR item. Example, if we are ' E -> E . PLUS E' -# then lr_next refers to 'E -> E PLUS . E' -# lr_index - LR item index (location of the ".") in the prod list. -# lookaheads - LALR lookahead symbols for this item -# len - Length of the production (number of symbols on right hand side) -# ----------------------------------------------------------------------------- - -class Production: - def __init__(self,**kw): - for k,v in kw.items(): - setattr(self,k,v) - self.lr_index = -1 - self.lr0_added = 0 # Flag indicating whether or not added to LR0 closure - self.lr1_added = 0 # Flag indicating whether or not added to LR1 - self.usyms = [ ] - self.lookaheads = { } - self.lk_added = { } - self.setnumbers = [ ] - - def __str__(self): - if self.prod: - s = "%s -> %s" % (self.name," ".join(self.prod)) - else: - s = "%s -> <empty>" % self.name - return s - - def __repr__(self): - return str(self) - - # Compute lr_items from the production - def lr_item(self,n): - if n > len(self.prod): return None - p = Production() - p.name = self.name - p.prod = list(self.prod) - p.number = self.number - p.lr_index = n - p.lookaheads = { } - p.setnumbers = self.setnumbers - p.prod.insert(n,".") - p.prod = tuple(p.prod) - p.len = len(p.prod) - p.usyms = self.usyms - - # Precompute list of productions immediately following - try: - p.lrafter = Prodnames[p.prod[n+1]] - except (IndexError,KeyError),e: - p.lrafter = [] - try: - p.lrbefore = p.prod[n-1] - except IndexError: - p.lrbefore = None - - return p - -class MiniProduction: - pass - -# regex matching identifiers -_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') - -# ----------------------------------------------------------------------------- -# add_production() -# -# Given an action function, this function assembles a production rule. -# The production rule is assumed to be found in the function's docstring. -# This rule has the general syntax: -# -# name1 ::= production1 -# | production2 -# | production3 -# ... -# | productionn -# name2 ::= production1 -# | production2 -# ... -# ----------------------------------------------------------------------------- - -def add_production(f,file,line,prodname,syms): - - if Terminals.has_key(prodname): - sys.stderr.write("%s:%d: Illegal rule name '%s'. Already defined as a token.\n" % (file,line,prodname)) - return -1 - if prodname == 'error': - sys.stderr.write("%s:%d: Illegal rule name '%s'. error is a reserved word.\n" % (file,line,prodname)) - return -1 - - if not _is_identifier.match(prodname): - sys.stderr.write("%s:%d: Illegal rule name '%s'\n" % (file,line,prodname)) - return -1 - - for x in range(len(syms)): - s = syms[x] - if s[0] in "'\"": - try: - c = eval(s) - if (len(c) > 1): - sys.stderr.write("%s:%d: Literal token %s in rule '%s' may only be a single character\n" % (file,line,s, prodname)) - return -1 - if not Terminals.has_key(c): - Terminals[c] = [] - syms[x] = c - continue - except SyntaxError: - pass - if not _is_identifier.match(s) and s != '%prec': - sys.stderr.write("%s:%d: Illegal name '%s' in rule '%s'\n" % (file,line,s, prodname)) - return -1 - - # See if the rule is already in the rulemap - map = "%s -> %s" % (prodname,syms) - if Prodmap.has_key(map): - m = Prodmap[map] - sys.stderr.write("%s:%d: Duplicate rule %s.\n" % (file,line, m)) - sys.stderr.write("%s:%d: Previous definition at %s:%d\n" % (file,line, m.file, m.line)) - return -1 - - p = Production() - p.name = prodname - p.prod = syms - p.file = file - p.line = line - p.func = f - p.number = len(Productions) - - - Productions.append(p) - Prodmap[map] = p - if not Nonterminals.has_key(prodname): - Nonterminals[prodname] = [ ] - - # Add all terminals to Terminals - i = 0 - while i < len(p.prod): - t = p.prod[i] - if t == '%prec': - try: - precname = p.prod[i+1] - except IndexError: - sys.stderr.write("%s:%d: Syntax error. Nothing follows %%prec.\n" % (p.file,p.line)) - return -1 - - prec = Precedence.get(precname,None) - if not prec: - sys.stderr.write("%s:%d: Nothing known about the precedence of '%s'\n" % (p.file,p.line,precname)) - return -1 - else: - p.prec = prec - del p.prod[i] - del p.prod[i] - continue - - if Terminals.has_key(t): - Terminals[t].append(p.number) - # Is a terminal. We'll assign a precedence to p based on this - if not hasattr(p,"prec"): - p.prec = Precedence.get(t,('right',0)) - else: - if not Nonterminals.has_key(t): - Nonterminals[t] = [ ] - Nonterminals[t].append(p.number) - i += 1 - - if not hasattr(p,"prec"): - p.prec = ('right',0) - - # Set final length of productions - p.len = len(p.prod) - p.prod = tuple(p.prod) - - # Calculate unique syms in the production - p.usyms = [ ] - for s in p.prod: - if s not in p.usyms: - p.usyms.append(s) - - # Add to the global productions list - try: - Prodnames[p.name].append(p) - except KeyError: - Prodnames[p.name] = [ p ] - return 0 - -# Given a raw rule function, this function rips out its doc string -# and adds rules to the grammar - -def add_function(f): - line = f.func_code.co_firstlineno - file = f.func_code.co_filename - error = 0 - - if isinstance(f,types.MethodType): - reqdargs = 2 - else: - reqdargs = 1 - - if f.func_code.co_argcount > reqdargs: - sys.stderr.write("%s:%d: Rule '%s' has too many arguments.\n" % (file,line,f.__name__)) - return -1 - - if f.func_code.co_argcount < reqdargs: - sys.stderr.write("%s:%d: Rule '%s' requires an argument.\n" % (file,line,f.__name__)) - return -1 - - if f.__doc__: - # Split the doc string into lines - pstrings = f.__doc__.splitlines() - lastp = None - dline = line - for ps in pstrings: - dline += 1 - p = ps.split() - if not p: continue - try: - if p[0] == '|': - # This is a continuation of a previous rule - if not lastp: - sys.stderr.write("%s:%d: Misplaced '|'.\n" % (file,dline)) - return -1 - prodname = lastp - if len(p) > 1: - syms = p[1:] - else: - syms = [ ] - else: - prodname = p[0] - lastp = prodname - assign = p[1] - if len(p) > 2: - syms = p[2:] - else: - syms = [ ] - if assign != ':' and assign != '::=': - sys.stderr.write("%s:%d: Syntax error. Expected ':'\n" % (file,dline)) - return -1 - - - e = add_production(f,file,dline,prodname,syms) - error += e - - - except StandardError: - sys.stderr.write("%s:%d: Syntax error in rule '%s'\n" % (file,dline,ps)) - error -= 1 - else: - sys.stderr.write("%s:%d: No documentation string specified in function '%s'\n" % (file,line,f.__name__)) - return error - - -# Cycle checking code (Michael Dyck) - -def compute_reachable(): - ''' - Find each symbol that can be reached from the start symbol. - Print a warning for any nonterminals that can't be reached. - (Unused terminals have already had their warning.) - ''' - Reachable = { } - for s in Terminals.keys() + Nonterminals.keys(): - Reachable[s] = 0 - - mark_reachable_from( Productions[0].prod[0], Reachable ) - - for s in Nonterminals.keys(): - if not Reachable[s]: - sys.stderr.write("yacc: Symbol '%s' is unreachable.\n" % s) - -def mark_reachable_from(s, Reachable): - ''' - Mark all symbols that are reachable from symbol s. - ''' - if Reachable[s]: - # We've already reached symbol s. - return - Reachable[s] = 1 - for p in Prodnames.get(s,[]): - for r in p.prod: - mark_reachable_from(r, Reachable) - -# ----------------------------------------------------------------------------- -# compute_terminates() -# -# This function looks at the various parsing rules and tries to detect -# infinite recursion cycles (grammar rules where there is no possible way -# to derive a string of only terminals). -# ----------------------------------------------------------------------------- -def compute_terminates(): - ''' - Raise an error for any symbols that don't terminate. - ''' - Terminates = {} - - # Terminals: - for t in Terminals.keys(): - Terminates[t] = 1 - - Terminates['$end'] = 1 - - # Nonterminals: - - # Initialize to false: - for n in Nonterminals.keys(): - Terminates[n] = 0 - - # Then propagate termination until no change: - while 1: - some_change = 0 - for (n,pl) in Prodnames.items(): - # Nonterminal n terminates iff any of its productions terminates. - for p in pl: - # Production p terminates iff all of its rhs symbols terminate. - for s in p.prod: - if not Terminates[s]: - # The symbol s does not terminate, - # so production p does not terminate. - p_terminates = 0 - break - else: - # didn't break from the loop, - # so every symbol s terminates - # so production p terminates. - p_terminates = 1 - - if p_terminates: - # symbol n terminates! - if not Terminates[n]: - Terminates[n] = 1 - some_change = 1 - # Don't need to consider any more productions for this n. - break - - if not some_change: - break - - some_error = 0 - for (s,terminates) in Terminates.items(): - if not terminates: - if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error': - # s is used-but-not-defined, and we've already warned of that, - # so it would be overkill to say that it's also non-terminating. - pass - else: - sys.stderr.write("yacc: Infinite recursion detected for symbol '%s'.\n" % s) - some_error = 1 - - return some_error - -# ----------------------------------------------------------------------------- -# verify_productions() -# -# This function examines all of the supplied rules to see if they seem valid. -# ----------------------------------------------------------------------------- -def verify_productions(cycle_check=1): - error = 0 - for p in Productions: - if not p: continue - - for s in p.prod: - if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error': - sys.stderr.write("%s:%d: Symbol '%s' used, but not defined as a token or a rule.\n" % (p.file,p.line,s)) - error = 1 - continue - - unused_tok = 0 - # Now verify all of the tokens - if yaccdebug: - _vf.write("Unused terminals:\n\n") - for s,v in Terminals.items(): - if s != 'error' and not v: - sys.stderr.write("yacc: Warning. Token '%s' defined, but not used.\n" % s) - if yaccdebug: _vf.write(" %s\n"% s) - unused_tok += 1 - - # Print out all of the productions - if yaccdebug: - _vf.write("\nGrammar\n\n") - for i in range(1,len(Productions)): - _vf.write("Rule %-5d %s\n" % (i, Productions[i])) - - unused_prod = 0 - # Verify the use of all productions - for s,v in Nonterminals.items(): - if not v: - p = Prodnames[s][0] - sys.stderr.write("%s:%d: Warning. Rule '%s' defined, but not used.\n" % (p.file,p.line, s)) - unused_prod += 1 - - - if unused_tok == 1: - sys.stderr.write("yacc: Warning. There is 1 unused token.\n") - if unused_tok > 1: - sys.stderr.write("yacc: Warning. There are %d unused tokens.\n" % unused_tok) - - if unused_prod == 1: - sys.stderr.write("yacc: Warning. There is 1 unused rule.\n") - if unused_prod > 1: - sys.stderr.write("yacc: Warning. There are %d unused rules.\n" % unused_prod) - - if yaccdebug: - _vf.write("\nTerminals, with rules where they appear\n\n") - ks = Terminals.keys() - ks.sort() - for k in ks: - _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Terminals[k]]))) - _vf.write("\nNonterminals, with rules where they appear\n\n") - ks = Nonterminals.keys() - ks.sort() - for k in ks: - _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Nonterminals[k]]))) - - if (cycle_check): - compute_reachable() - error += compute_terminates() -# error += check_cycles() - return error - -# ----------------------------------------------------------------------------- -# build_lritems() -# -# This function walks the list of productions and builds a complete set of the -# LR items. The LR items are stored in two ways: First, they are uniquely -# numbered and placed in the list _lritems. Second, a linked list of LR items -# is built for each production. For example: -# -# E -> E PLUS E -# -# Creates the list -# -# [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] -# ----------------------------------------------------------------------------- - -def build_lritems(): - for p in Productions: - lastlri = p - lri = p.lr_item(0) - i = 0 - while 1: - lri = p.lr_item(i) - lastlri.lr_next = lri - if not lri: break - lri.lr_num = len(LRitems) - LRitems.append(lri) - lastlri = lri - i += 1 - - # In order for the rest of the parser generator to work, we need to - # guarantee that no more lritems are generated. Therefore, we nuke - # the p.lr_item method. (Only used in debugging) - # Production.lr_item = None - -# ----------------------------------------------------------------------------- -# add_precedence() -# -# Given a list of precedence rules, add to the precedence table. -# ----------------------------------------------------------------------------- - -def add_precedence(plist): - plevel = 0 - error = 0 - for p in plist: - plevel += 1 - try: - prec = p[0] - terms = p[1:] - if prec != 'left' and prec != 'right' and prec != 'nonassoc': - sys.stderr.write("yacc: Invalid precedence '%s'\n" % prec) - return -1 - for t in terms: - if Precedence.has_key(t): - sys.stderr.write("yacc: Precedence already specified for terminal '%s'\n" % t) - error += 1 - continue - Precedence[t] = (prec,plevel) - except: - sys.stderr.write("yacc: Invalid precedence table.\n") - error += 1 - - return error - -# ----------------------------------------------------------------------------- -# augment_grammar() -# -# Compute the augmented grammar. This is just a rule S' -> start where start -# is the starting symbol. -# ----------------------------------------------------------------------------- - -def augment_grammar(start=None): - if not start: - start = Productions[1].name - Productions[0] = Production(name="S'",prod=[start],number=0,len=1,prec=('right',0),func=None) - Productions[0].usyms = [ start ] - Nonterminals[start].append(0) - - -# ------------------------------------------------------------------------- -# first() -# -# Compute the value of FIRST1(beta) where beta is a tuple of symbols. -# -# During execution of compute_first1, the result may be incomplete. -# Afterward (e.g., when called from compute_follow()), it will be complete. -# ------------------------------------------------------------------------- -def first(beta): - - # We are computing First(x1,x2,x3,...,xn) - result = [ ] - for x in beta: - x_produces_empty = 0 - - # Add all the non-<empty> symbols of First[x] to the result. - for f in First[x]: - if f == '<empty>': - x_produces_empty = 1 - else: - if f not in result: result.append(f) - - if x_produces_empty: - # We have to consider the next x in beta, - # i.e. stay in the loop. - pass - else: - # We don't have to consider any further symbols in beta. - break - else: - # There was no 'break' from the loop, - # so x_produces_empty was true for all x in beta, - # so beta produces empty as well. - result.append('<empty>') - - return result - - -# FOLLOW(x) -# Given a non-terminal. This function computes the set of all symbols -# that might follow it. Dragon book, p. 189. - -def compute_follow(start=None): - # Add '$end' to the follow list of the start symbol - for k in Nonterminals.keys(): - Follow[k] = [ ] - - if not start: - start = Productions[1].name - - Follow[start] = [ '$end' ] - - while 1: - didadd = 0 - for p in Productions[1:]: - # Here is the production set - for i in range(len(p.prod)): - B = p.prod[i] - if Nonterminals.has_key(B): - # Okay. We got a non-terminal in a production - fst = first(p.prod[i+1:]) - hasempty = 0 - for f in fst: - if f != '<empty>' and f not in Follow[B]: - Follow[B].append(f) - didadd = 1 - if f == '<empty>': - hasempty = 1 - if hasempty or i == (len(p.prod)-1): - # Add elements of follow(a) to follow(b) - for f in Follow[p.name]: - if f not in Follow[B]: - Follow[B].append(f) - didadd = 1 - if not didadd: break - - if 0 and yaccdebug: - _vf.write('\nFollow:\n') - for k in Nonterminals.keys(): - _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Follow[k]]))) - -# ------------------------------------------------------------------------- -# compute_first1() -# -# Compute the value of FIRST1(X) for all symbols -# ------------------------------------------------------------------------- -def compute_first1(): - - # Terminals: - for t in Terminals.keys(): - First[t] = [t] - - First['$end'] = ['$end'] - First['#'] = ['#'] # what's this for? - - # Nonterminals: - - # Initialize to the empty set: - for n in Nonterminals.keys(): - First[n] = [] - - # Then propagate symbols until no change: - while 1: - some_change = 0 - for n in Nonterminals.keys(): - for p in Prodnames[n]: - for f in first(p.prod): - if f not in First[n]: - First[n].append( f ) - some_change = 1 - if not some_change: - break - - if 0 and yaccdebug: - _vf.write('\nFirst:\n') - for k in Nonterminals.keys(): - _vf.write("%-20s : %s\n" % - (k, " ".join([str(s) for s in First[k]]))) - -# ----------------------------------------------------------------------------- -# === SLR Generation === -# -# The following functions are used to construct SLR (Simple LR) parsing tables -# as described on p.221-229 of the dragon book. -# ----------------------------------------------------------------------------- - -# Global variables for the LR parsing engine -def lr_init_vars(): - global _lr_action, _lr_goto, _lr_method - global _lr_goto_cache, _lr0_cidhash - - _lr_action = { } # Action table - _lr_goto = { } # Goto table - _lr_method = "Unknown" # LR method used - _lr_goto_cache = { } - _lr0_cidhash = { } - - -# Compute the LR(0) closure operation on I, where I is a set of LR(0) items. -# prodlist is a list of productions. - -_add_count = 0 # Counter used to detect cycles - -def lr0_closure(I): - global _add_count - - _add_count += 1 - prodlist = Productions - - # Add everything in I to J - J = I[:] - didadd = 1 - while didadd: - didadd = 0 - for j in J: - for x in j.lrafter: - if x.lr0_added == _add_count: continue - # Add B --> .G to J - J.append(x.lr_next) - x.lr0_added = _add_count - didadd = 1 - - return J - -# Compute the LR(0) goto function goto(I,X) where I is a set -# of LR(0) items and X is a grammar symbol. This function is written -# in a way that guarantees uniqueness of the generated goto sets -# (i.e. the same goto set will never be returned as two different Python -# objects). With uniqueness, we can later do fast set comparisons using -# id(obj) instead of element-wise comparison. - -def lr0_goto(I,x): - # First we look for a previously cached entry - g = _lr_goto_cache.get((id(I),x),None) - if g: return g - - # Now we generate the goto set in a way that guarantees uniqueness - # of the result - - s = _lr_goto_cache.get(x,None) - if not s: - s = { } - _lr_goto_cache[x] = s - - gs = [ ] - for p in I: - n = p.lr_next - if n and n.lrbefore == x: - s1 = s.get(id(n),None) - if not s1: - s1 = { } - s[id(n)] = s1 - gs.append(n) - s = s1 - g = s.get('$end',None) - if not g: - if gs: - g = lr0_closure(gs) - s['$end'] = g - else: - s['$end'] = gs - _lr_goto_cache[(id(I),x)] = g - return g - -_lr0_cidhash = { } - -# Compute the LR(0) sets of item function -def lr0_items(): - - C = [ lr0_closure([Productions[0].lr_next]) ] - i = 0 - for I in C: - _lr0_cidhash[id(I)] = i - i += 1 - - # Loop over the items in C and each grammar symbols - i = 0 - while i < len(C): - I = C[i] - i += 1 - - # Collect all of the symbols that could possibly be in the goto(I,X) sets - asyms = { } - for ii in I: - for s in ii.usyms: - asyms[s] = None - - for x in asyms.keys(): - g = lr0_goto(I,x) - if not g: continue - if _lr0_cidhash.has_key(id(g)): continue - _lr0_cidhash[id(g)] = len(C) - C.append(g) - - return C - -# ----------------------------------------------------------------------------- -# ==== LALR(1) Parsing ==== -# -# LALR(1) parsing is almost exactly the same as SLR except that instead of -# relying upon Follow() sets when performing reductions, a more selective -# lookahead set that incorporates the state of the LR(0) machine is utilized. -# Thus, we mainly just have to focus on calculating the lookahead sets. -# -# The method used here is due to DeRemer and Pennelo (1982). -# -# DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) -# Lookahead Sets", ACM Transactions on Programming Languages and Systems, -# Vol. 4, No. 4, Oct. 1982, pp. 615-649 -# -# Further details can also be found in: -# -# J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", -# McGraw-Hill Book Company, (1985). -# -# Note: This implementation is a complete replacement of the LALR(1) -# implementation in PLY-1.x releases. That version was based on -# a less efficient algorithm and it had bugs in its implementation. -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# compute_nullable_nonterminals() -# -# Creates a dictionary containing all of the non-terminals that might produce -# an empty production. -# ----------------------------------------------------------------------------- - -def compute_nullable_nonterminals(): - nullable = {} - num_nullable = 0 - while 1: - for p in Productions[1:]: - if p.len == 0: - nullable[p.name] = 1 - continue - for t in p.prod: - if not nullable.has_key(t): break - else: - nullable[p.name] = 1 - if len(nullable) == num_nullable: break - num_nullable = len(nullable) - return nullable - -# ----------------------------------------------------------------------------- -# find_nonterminal_trans(C) -# -# Given a set of LR(0) items, this functions finds all of the non-terminal -# transitions. These are transitions in which a dot appears immediately before -# a non-terminal. Returns a list of tuples of the form (state,N) where state -# is the state number and N is the nonterminal symbol. -# -# The input C is the set of LR(0) items. -# ----------------------------------------------------------------------------- - -def find_nonterminal_transitions(C): - trans = [] - for state in range(len(C)): - for p in C[state]: - if p.lr_index < p.len - 1: - t = (state,p.prod[p.lr_index+1]) - if Nonterminals.has_key(t[1]): - if t not in trans: trans.append(t) - state = state + 1 - return trans - -# ----------------------------------------------------------------------------- -# dr_relation() -# -# Computes the DR(p,A) relationships for non-terminal transitions. The input -# is a tuple (state,N) where state is a number and N is a nonterminal symbol. -# -# Returns a list of terminals. -# ----------------------------------------------------------------------------- - -def dr_relation(C,trans,nullable): - dr_set = { } - state,N = trans - terms = [] - - g = lr0_goto(C[state],N) - for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index+1] - if Terminals.has_key(a): - if a not in terms: terms.append(a) - - # This extra bit is to handle the start state - if state == 0 and N == Productions[0].prod[0]: - terms.append('$end') - - return terms - -# ----------------------------------------------------------------------------- -# reads_relation() -# -# Computes the READS() relation (p,A) READS (t,C). -# ----------------------------------------------------------------------------- - -def reads_relation(C, trans, empty): - # Look for empty transitions - rel = [] - state, N = trans - - g = lr0_goto(C[state],N) - j = _lr0_cidhash.get(id(g),-1) - for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index + 1] - if empty.has_key(a): - rel.append((j,a)) - - return rel - -# ----------------------------------------------------------------------------- -# compute_lookback_includes() -# -# Determines the lookback and includes relations -# -# LOOKBACK: -# -# This relation is determined by running the LR(0) state machine forward. -# For example, starting with a production "N : . A B C", we run it forward -# to obtain "N : A B C ." We then build a relationship between this final -# state and the starting state. These relationships are stored in a dictionary -# lookdict. -# -# INCLUDES: -# -# Computes the INCLUDE() relation (p,A) INCLUDES (p',B). -# -# This relation is used to determine non-terminal transitions that occur -# inside of other non-terminal transition states. (p,A) INCLUDES (p', B) -# if the following holds: -# -# B -> LAT, where T -> epsilon and p' -L-> p -# -# L is essentially a prefix (which may be empty), T is a suffix that must be -# able to derive an empty string. State p' must lead to state p with the string L. -# -# ----------------------------------------------------------------------------- - -def compute_lookback_includes(C,trans,nullable): - - lookdict = {} # Dictionary of lookback relations - includedict = {} # Dictionary of include relations - - # Make a dictionary of non-terminal transitions - dtrans = {} - for t in trans: - dtrans[t] = 1 - - # Loop over all transitions and compute lookbacks and includes - for state,N in trans: - lookb = [] - includes = [] - for p in C[state]: - if p.name != N: continue - - # Okay, we have a name match. We now follow the production all the way - # through the state machine until we get the . on the right hand side - - lr_index = p.lr_index - j = state - while lr_index < p.len - 1: - lr_index = lr_index + 1 - t = p.prod[lr_index] - - # Check to see if this symbol and state are a non-terminal transition - if dtrans.has_key((j,t)): - # Yes. Okay, there is some chance that this is an includes relation - # the only way to know for certain is whether the rest of the - # production derives empty - - li = lr_index + 1 - while li < p.len: - if Terminals.has_key(p.prod[li]): break # No forget it - if not nullable.has_key(p.prod[li]): break - li = li + 1 - else: - # Appears to be a relation between (j,t) and (state,N) - includes.append((j,t)) - - g = lr0_goto(C[j],t) # Go to next set - j = _lr0_cidhash.get(id(g),-1) # Go to next state - - # When we get here, j is the final state, now we have to locate the production - for r in C[j]: - if r.name != p.name: continue - if r.len != p.len: continue - i = 0 - # This look is comparing a production ". A B C" with "A B C ." - while i < r.lr_index: - if r.prod[i] != p.prod[i+1]: break - i = i + 1 - else: - lookb.append((j,r)) - for i in includes: - if not includedict.has_key(i): includedict[i] = [] - includedict[i].append((state,N)) - lookdict[(state,N)] = lookb - - return lookdict,includedict - -# ----------------------------------------------------------------------------- -# digraph() -# traverse() -# -# The following two functions are used to compute set valued functions -# of the form: -# -# F(x) = F'(x) U U{F(y) | x R y} -# -# This is used to compute the values of Read() sets as well as FOLLOW sets -# in LALR(1) generation. -# -# Inputs: X - An input set -# R - A relation -# FP - Set-valued function -# ------------------------------------------------------------------------------ - -def digraph(X,R,FP): - N = { } - for x in X: - N[x] = 0 - stack = [] - F = { } - for x in X: - if N[x] == 0: traverse(x,N,stack,F,X,R,FP) - return F - -def traverse(x,N,stack,F,X,R,FP): - stack.append(x) - d = len(stack) - N[x] = d - F[x] = FP(x) # F(X) <- F'(x) - - rel = R(x) # Get y's related to x - for y in rel: - if N[y] == 0: - traverse(y,N,stack,F,X,R,FP) - N[x] = min(N[x],N[y]) - for a in F.get(y,[]): - if a not in F[x]: F[x].append(a) - if N[x] == d: - N[stack[-1]] = sys.maxint - F[stack[-1]] = F[x] - element = stack.pop() - while element != x: - N[stack[-1]] = sys.maxint - F[stack[-1]] = F[x] - element = stack.pop() - -# ----------------------------------------------------------------------------- -# compute_read_sets() -# -# Given a set of LR(0) items, this function computes the read sets. -# -# Inputs: C = Set of LR(0) items -# ntrans = Set of nonterminal transitions -# nullable = Set of empty transitions -# -# Returns a set containing the read sets -# ----------------------------------------------------------------------------- - -def compute_read_sets(C, ntrans, nullable): - FP = lambda x: dr_relation(C,x,nullable) - R = lambda x: reads_relation(C,x,nullable) - F = digraph(ntrans,R,FP) - return F - -# ----------------------------------------------------------------------------- -# compute_follow_sets() -# -# Given a set of LR(0) items, a set of non-terminal transitions, a readset, -# and an include set, this function computes the follow sets -# -# Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} -# -# Inputs: -# ntrans = Set of nonterminal transitions -# readsets = Readset (previously computed) -# inclsets = Include sets (previously computed) -# -# Returns a set containing the follow sets -# ----------------------------------------------------------------------------- - -def compute_follow_sets(ntrans,readsets,inclsets): - FP = lambda x: readsets[x] - R = lambda x: inclsets.get(x,[]) - F = digraph(ntrans,R,FP) - return F - -# ----------------------------------------------------------------------------- -# add_lookaheads() -# -# Attaches the lookahead symbols to grammar rules. -# -# Inputs: lookbacks - Set of lookback relations -# followset - Computed follow set -# -# This function directly attaches the lookaheads to productions contained -# in the lookbacks set -# ----------------------------------------------------------------------------- - -def add_lookaheads(lookbacks,followset): - for trans,lb in lookbacks.items(): - # Loop over productions in lookback - for state,p in lb: - if not p.lookaheads.has_key(state): - p.lookaheads[state] = [] - f = followset.get(trans,[]) - for a in f: - if a not in p.lookaheads[state]: p.lookaheads[state].append(a) - -# ----------------------------------------------------------------------------- -# add_lalr_lookaheads() -# -# This function does all of the work of adding lookahead information for use -# with LALR parsing -# ----------------------------------------------------------------------------- - -def add_lalr_lookaheads(C): - # Determine all of the nullable nonterminals - nullable = compute_nullable_nonterminals() - - # Find all non-terminal transitions - trans = find_nonterminal_transitions(C) - - # Compute read sets - readsets = compute_read_sets(C,trans,nullable) - - # Compute lookback/includes relations - lookd, included = compute_lookback_includes(C,trans,nullable) - - # Compute LALR FOLLOW sets - followsets = compute_follow_sets(trans,readsets,included) - - # Add all of the lookaheads - add_lookaheads(lookd,followsets) - -# ----------------------------------------------------------------------------- -# lr_parse_table() -# -# This function constructs the parse tables for SLR or LALR -# ----------------------------------------------------------------------------- -def lr_parse_table(method): - global _lr_method - goto = _lr_goto # Goto array - action = _lr_action # Action array - actionp = { } # Action production array (temporary) - - _lr_method = method - - n_srconflict = 0 - n_rrconflict = 0 - - if yaccdebug: - sys.stderr.write("yacc: Generating %s parsing table...\n" % method) - _vf.write("\n\nParsing method: %s\n\n" % method) - - # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items - # This determines the number of states - - C = lr0_items() - - if method == 'LALR': - add_lalr_lookaheads(C) - - # Build the parser table, state by state - st = 0 - for I in C: - # Loop over each production in I - actlist = [ ] # List of actions - - if yaccdebug: - _vf.write("\nstate %d\n\n" % st) - for p in I: - _vf.write(" (%d) %s\n" % (p.number, str(p))) - _vf.write("\n") - - for p in I: - try: - if p.prod[-1] == ".": - if p.name == "S'": - # Start symbol. Accept! - action[st,"$end"] = 0 - actionp[st,"$end"] = p - else: - # We are at the end of a production. Reduce! - if method == 'LALR': - laheads = p.lookaheads[st] - else: - laheads = Follow[p.name] - for a in laheads: - actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) - r = action.get((st,a),None) - if r is not None: - # Whoa. Have a shift/reduce or reduce/reduce conflict - if r > 0: - # Need to decide on shift or reduce here - # By default we favor shifting. Need to add - # some precedence rules here. - sprec,slevel = Productions[actionp[st,a].number].prec - rprec,rlevel = Precedence.get(a,('right',0)) - if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): - # We really need to reduce here. - action[st,a] = -p.number - actionp[st,a] = p - if not slevel and not rlevel: - _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) - _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) - n_srconflict += 1 - elif (slevel == rlevel) and (rprec == 'nonassoc'): - action[st,a] = None - else: - # Hmmm. Guess we'll keep the shift - if not rlevel: - _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) - _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) - n_srconflict +=1 - elif r < 0: - # Reduce/reduce conflict. In this case, we favor the rule - # that was defined first in the grammar file - oldp = Productions[-r] - pp = Productions[p.number] - if oldp.line > pp.line: - action[st,a] = -p.number - actionp[st,a] = p - # sys.stderr.write("Reduce/reduce conflict in state %d\n" % st) - n_rrconflict += 1 - _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a])) - _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a])) - else: - sys.stderr.write("Unknown conflict in state %d\n" % st) - else: - action[st,a] = -p.number - actionp[st,a] = p - else: - i = p.lr_index - a = p.prod[i+1] # Get symbol right after the "." - if Terminals.has_key(a): - g = lr0_goto(I,a) - j = _lr0_cidhash.get(id(g),-1) - if j >= 0: - # We are in a shift state - actlist.append((a,p,"shift and go to state %d" % j)) - r = action.get((st,a),None) - if r is not None: - # Whoa have a shift/reduce or shift/shift conflict - if r > 0: - if r != j: - sys.stderr.write("Shift/shift conflict in state %d\n" % st) - elif r < 0: - # Do a precedence check. - # - if precedence of reduce rule is higher, we reduce. - # - if precedence of reduce is same and left assoc, we reduce. - # - otherwise we shift - rprec,rlevel = Productions[actionp[st,a].number].prec - sprec,slevel = Precedence.get(a,('right',0)) - if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')): - # We decide to shift here... highest precedence to shift - action[st,a] = j - actionp[st,a] = p - if not rlevel: - n_srconflict += 1 - _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) - _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) - elif (slevel == rlevel) and (rprec == 'nonassoc'): - action[st,a] = None - else: - # Hmmm. Guess we'll keep the reduce - if not slevel and not rlevel: - n_srconflict +=1 - _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) - _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) - - else: - sys.stderr.write("Unknown conflict in state %d\n" % st) - else: - action[st,a] = j - actionp[st,a] = p - - except StandardError,e: - raise YaccError, "Hosed in lr_parse_table", e - - # Print the actions associated with each terminal - if yaccdebug: - _actprint = { } - for a,p,m in actlist: - if action.has_key((st,a)): - if p is actionp[st,a]: - _vf.write(" %-15s %s\n" % (a,m)) - _actprint[(a,m)] = 1 - _vf.write("\n") - for a,p,m in actlist: - if action.has_key((st,a)): - if p is not actionp[st,a]: - if not _actprint.has_key((a,m)): - _vf.write(" ! %-15s [ %s ]\n" % (a,m)) - _actprint[(a,m)] = 1 - - # Construct the goto table for this state - if yaccdebug: - _vf.write("\n") - nkeys = { } - for ii in I: - for s in ii.usyms: - if Nonterminals.has_key(s): - nkeys[s] = None - for n in nkeys.keys(): - g = lr0_goto(I,n) - j = _lr0_cidhash.get(id(g),-1) - if j >= 0: - goto[st,n] = j - if yaccdebug: - _vf.write(" %-30s shift and go to state %d\n" % (n,j)) - - st += 1 - - if yaccdebug: - if n_srconflict == 1: - sys.stderr.write("yacc: %d shift/reduce conflict\n" % n_srconflict) - if n_srconflict > 1: - sys.stderr.write("yacc: %d shift/reduce conflicts\n" % n_srconflict) - if n_rrconflict == 1: - sys.stderr.write("yacc: %d reduce/reduce conflict\n" % n_rrconflict) - if n_rrconflict > 1: - sys.stderr.write("yacc: %d reduce/reduce conflicts\n" % n_rrconflict) - -# ----------------------------------------------------------------------------- -# ==== LR Utility functions ==== -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# _lr_write_tables() -# -# This function writes the LR parsing tables to a file -# ----------------------------------------------------------------------------- - -def lr_write_tables(modulename=tab_module,outputdir=''): - filename = os.path.join(outputdir,modulename) + ".py" - try: - f = open(filename,"w") - - f.write(""" -# %s -# This file is automatically generated. Do not edit. - -_lr_method = %s - -_lr_signature = %s -""" % (filename, repr(_lr_method), repr(Signature.digest()))) - - # Change smaller to 0 to go back to original tables - smaller = 1 - - # Factor out names to try and make smaller - if smaller: - items = { } - - for k,v in _lr_action.items(): - i = items.get(k[1]) - if not i: - i = ([],[]) - items[k[1]] = i - i[0].append(k[0]) - i[1].append(v) - - f.write("\n_lr_action_items = {") - for k,v in items.items(): - f.write("%r:([" % k) - for i in v[0]: - f.write("%r," % i) - f.write("],[") - for i in v[1]: - f.write("%r," % i) - - f.write("]),") - f.write("}\n") - - f.write(""" -_lr_action = { } -for _k, _v in _lr_action_items.items(): - for _x,_y in zip(_v[0],_v[1]): - _lr_action[(_x,_k)] = _y -del _lr_action_items -""") - - else: - f.write("\n_lr_action = { "); - for k,v in _lr_action.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); - - if smaller: - # Factor out names to try and make smaller - items = { } - - for k,v in _lr_goto.items(): - i = items.get(k[1]) - if not i: - i = ([],[]) - items[k[1]] = i - i[0].append(k[0]) - i[1].append(v) - - f.write("\n_lr_goto_items = {") - for k,v in items.items(): - f.write("%r:([" % k) - for i in v[0]: - f.write("%r," % i) - f.write("],[") - for i in v[1]: - f.write("%r," % i) - - f.write("]),") - f.write("}\n") - - f.write(""" -_lr_goto = { } -for _k, _v in _lr_goto_items.items(): - for _x,_y in zip(_v[0],_v[1]): - _lr_goto[(_x,_k)] = _y -del _lr_goto_items -""") - else: - f.write("\n_lr_goto = { "); - for k,v in _lr_goto.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); - - # Write production table - f.write("_lr_productions = [\n") - for p in Productions: - if p: - if (p.func): - f.write(" (%r,%d,%r,%r,%d),\n" % (p.name, p.len, p.func.__name__,p.file,p.line)) - else: - f.write(" (%r,%d,None,None,None),\n" % (p.name, p.len)) - else: - f.write(" None,\n") - f.write("]\n") - - f.close() - - except IOError,e: - print "Unable to create '%s'" % filename - print e - return - -def lr_read_tables(module=tab_module,optimize=0): - global _lr_action, _lr_goto, _lr_productions, _lr_method - try: - exec "import %s as parsetab" % module - - if (optimize) or (Signature.digest() == parsetab._lr_signature): - _lr_action = parsetab._lr_action - _lr_goto = parsetab._lr_goto - _lr_productions = parsetab._lr_productions - _lr_method = parsetab._lr_method - return 1 - else: - return 0 - - except (ImportError,AttributeError): - return 0 - - -# Available instance types. This is used when parsers are defined by a class. -# it's a little funky because I want to preserve backwards compatibility -# with Python 2.0 where types.ObjectType is undefined. - -try: - _INSTANCETYPE = (types.InstanceType, types.ObjectType) -except AttributeError: - _INSTANCETYPE = types.InstanceType - -# ----------------------------------------------------------------------------- -# yacc(module) -# -# Build the parser module -# ----------------------------------------------------------------------------- - -def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, start=None, check_recursion=1, optimize=0,write_tables=1,debugfile=debug_file,outputdir=''): - global yaccdebug - yaccdebug = debug - - initialize_vars() - files = { } - error = 0 - - - # Add parsing method to signature - Signature.update(method) - - # If a "module" parameter was supplied, extract its dictionary. - # Note: a module may in fact be an instance as well. - - if module: - # User supplied a module object. - if isinstance(module, types.ModuleType): - ldict = module.__dict__ - elif isinstance(module, _INSTANCETYPE): - _items = [(k,getattr(module,k)) for k in dir(module)] - ldict = { } - for i in _items: - ldict[i[0]] = i[1] - else: - raise ValueError,"Expected a module" - - else: - # No module given. We might be able to get information from the caller. - # Throw an exception and unwind the traceback to get the globals - - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - f = f.f_back # Walk out to our calling function - ldict = f.f_globals # Grab its globals dictionary - - # Add starting symbol to signature - if not start: - start = ldict.get("start",None) - if start: - Signature.update(start) - - # If running in optimized mode. We're going to - - if (optimize and lr_read_tables(tabmodule,1)): - # Read parse table - del Productions[:] - for p in _lr_productions: - if not p: - Productions.append(None) - else: - m = MiniProduction() - m.name = p[0] - m.len = p[1] - m.file = p[3] - m.line = p[4] - if p[2]: - m.func = ldict[p[2]] - Productions.append(m) - - else: - # Get the tokens map - if (module and isinstance(module,_INSTANCETYPE)): - tokens = getattr(module,"tokens",None) - else: - tokens = ldict.get("tokens",None) - - if not tokens: - raise YaccError,"module does not define a list 'tokens'" - if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)): - raise YaccError,"tokens must be a list or tuple." - - # Check to see if a requires dictionary is defined. - requires = ldict.get("require",None) - if requires: - if not (isinstance(requires,types.DictType)): - raise YaccError,"require must be a dictionary." - - for r,v in requires.items(): - try: - if not (isinstance(v,types.ListType)): - raise TypeError - v1 = [x.split(".") for x in v] - Requires[r] = v1 - except StandardError: - print "Invalid specification for rule '%s' in require. Expected a list of strings" % r - - - # Build the dictionary of terminals. We a record a 0 in the - # dictionary to track whether or not a terminal is actually - # used in the grammar - - if 'error' in tokens: - print "yacc: Illegal token 'error'. Is a reserved word." - raise YaccError,"Illegal token name" - - for n in tokens: - if Terminals.has_key(n): - print "yacc: Warning. Token '%s' multiply defined." % n - Terminals[n] = [ ] - - Terminals['error'] = [ ] - - # Get the precedence map (if any) - prec = ldict.get("precedence",None) - if prec: - if not (isinstance(prec,types.ListType) or isinstance(prec,types.TupleType)): - raise YaccError,"precedence must be a list or tuple." - add_precedence(prec) - Signature.update(repr(prec)) - - for n in tokens: - if not Precedence.has_key(n): - Precedence[n] = ('right',0) # Default, right associative, 0 precedence - - # Look for error handler - ef = ldict.get('p_error',None) - if ef: - if isinstance(ef,types.FunctionType): - ismethod = 0 - elif isinstance(ef, types.MethodType): - ismethod = 1 - else: - raise YaccError,"'p_error' defined, but is not a function or method." - eline = ef.func_code.co_firstlineno - efile = ef.func_code.co_filename - files[efile] = None - - if (ef.func_code.co_argcount != 1+ismethod): - raise YaccError,"%s:%d: p_error() requires 1 argument." % (efile,eline) - global Errorfunc - Errorfunc = ef - else: - print "yacc: Warning. no p_error() function is defined." - - # Get the list of built-in functions with p_ prefix - symbols = [ldict[f] for f in ldict.keys() - if (type(ldict[f]) in (types.FunctionType, types.MethodType) and ldict[f].__name__[:2] == 'p_' - and ldict[f].__name__ != 'p_error')] - - # Check for non-empty symbols - if len(symbols) == 0: - raise YaccError,"no rules of the form p_rulename are defined." - - # Sort the symbols by line number - symbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno)) - - # Add all of the symbols to the grammar - for f in symbols: - if (add_function(f)) < 0: - error += 1 - else: - files[f.func_code.co_filename] = None - - # Make a signature of the docstrings - for f in symbols: - if f.__doc__: - Signature.update(f.__doc__) - - lr_init_vars() - - if error: - raise YaccError,"Unable to construct parser." - - if not lr_read_tables(tabmodule): - - # Validate files - for filename in files.keys(): - if not validate_file(filename): - error = 1 - - # Validate dictionary - validate_dict(ldict) - - if start and not Prodnames.has_key(start): - raise YaccError,"Bad starting symbol '%s'" % start - - augment_grammar(start) - error = verify_productions(cycle_check=check_recursion) - otherfunc = [ldict[f] for f in ldict.keys() - if (type(f) in (types.FunctionType,types.MethodType) and ldict[f].__name__[:2] != 'p_')] - - if error: - raise YaccError,"Unable to construct parser." - - build_lritems() - compute_first1() - compute_follow(start) - - if method in ['SLR','LALR']: - lr_parse_table(method) - else: - raise YaccError, "Unknown parsing method '%s'" % method - - if write_tables: - lr_write_tables(tabmodule,outputdir) - - if yaccdebug: - try: - f = open(os.path.join(outputdir,debugfile),"w") - f.write(_vfc.getvalue()) - f.write("\n\n") - f.write(_vf.getvalue()) - f.close() - except IOError,e: - print "yacc: can't create '%s'" % debugfile,e - - # Made it here. Create a parser object and set up its internal state. - # Set global parse() method to bound method of parser object. - - p = Parser("xyzzy") - p.productions = Productions - p.errorfunc = Errorfunc - p.action = _lr_action - p.goto = _lr_goto - p.method = _lr_method - p.require = Requires - - global parse - parse = p.parse - - global parser - parser = p - - # Clean up all of the globals we created - if (not optimize): - yacc_cleanup() - return p - -# yacc_cleanup function. Delete all of the global variables -# used during table construction - -def yacc_cleanup(): - global _lr_action, _lr_goto, _lr_method, _lr_goto_cache - del _lr_action, _lr_goto, _lr_method, _lr_goto_cache - - global Productions, Prodnames, Prodmap, Terminals - global Nonterminals, First, Follow, Precedence, LRitems - global Errorfunc, Signature, Requires - - del Productions, Prodnames, Prodmap, Terminals - del Nonterminals, First, Follow, Precedence, LRitems - del Errorfunc, Signature, Requires - - global _vf, _vfc - del _vf, _vfc - - -# Stub that raises an error if parsing is attempted without first calling yacc() -def parse(*args,**kwargs): - raise YaccError, "yacc: No parser built with yacc()" - diff --git a/chall/ply-2.2/ply/yacc.pyc b/chall/ply-2.2/ply/yacc.pyc Binary files differdeleted file mode 100644 index 4b1adf7..0000000 --- a/chall/ply-2.2/ply/yacc.pyc +++ /dev/null diff --git a/chall/ply-2.2/setup.py b/chall/ply-2.2/setup.py deleted file mode 100644 index 0e74964..0000000 --- a/chall/ply-2.2/setup.py +++ /dev/null @@ -1,27 +0,0 @@ -from distutils.core import setup - -setup(name = "ply", - description="Python Lex & Yacc", - long_description = """ -PLY is yet another implementation of lex and yacc for Python. Although several other -parsing tools are available for Python, there are several reasons why you might -want to take a look at PLY: - -It's implemented entirely in Python. - -It uses LR-parsing which is reasonably efficient and well suited for larger grammars. - -PLY provides most of the standard lex/yacc features including support for empty -productions, precedence rules, error recovery, and support for ambiguous grammars. - -PLY is extremely easy to use and provides very extensive error checking. -""", - license="""Lesser GPL (LGPL)""", - version = "2.2", - author = "David Beazley", - author_email = "dave@dabeaz.com", - maintainer = "David Beazley", - maintainer_email = "dave@dabeaz.com", - url = "http://www.dabeaz.com/ply/", - packages = ['ply'], - ) diff --git a/chall/ply-2.2/test/README b/chall/ply-2.2/test/README deleted file mode 100644 index aac12b0..0000000 --- a/chall/ply-2.2/test/README +++ /dev/null @@ -1,11 +0,0 @@ -This directory mostly contains tests for various types of error -conditions. To run: - - $ python testlex.py . - $ python testyacc.py . - -The tests can also be run using the Python unittest module. - - $ python rununit.py - -The script 'cleanup.sh' cleans up this directory to its original state. diff --git a/chall/ply-2.2/test/calclex.py b/chall/ply-2.2/test/calclex.py deleted file mode 100644 index 2550734..0000000 --- a/chall/ply-2.2/test/calclex.py +++ /dev/null @@ -1,49 +0,0 @@ -# ----------------------------------------------------------------------------- -# calclex.py -# ----------------------------------------------------------------------------- -import sys - -sys.path.append("..") -import ply.lex as lex - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - try: - t.value = int(t.value) - except ValueError: - print "Integer value too large", t.value - t.value = 0 - return t - -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lineno += t.value.count("\n") - -def t_error(t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) - -# Build the lexer -lex.lex() - - - diff --git a/chall/ply-2.2/test/cleanup.sh b/chall/ply-2.2/test/cleanup.sh deleted file mode 100755 index d7d99b6..0000000 --- a/chall/ply-2.2/test/cleanup.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh - -rm -f *~ *.pyc *.dif *.out - diff --git a/chall/ply-2.2/test/lex_doc1.exp b/chall/ply-2.2/test/lex_doc1.exp deleted file mode 100644 index 5b63c1e..0000000 --- a/chall/ply-2.2/test/lex_doc1.exp +++ /dev/null @@ -1 +0,0 @@ -./lex_doc1.py:18: No regular expression defined for rule 't_NUMBER' diff --git a/chall/ply-2.2/test/lex_doc1.py b/chall/ply-2.2/test/lex_doc1.py deleted file mode 100644 index 3951b5c..0000000 --- a/chall/ply-2.2/test/lex_doc1.py +++ /dev/null @@ -1,30 +0,0 @@ -# lex_token.py -# -# Missing documentation string - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -def t_NUMBER(t): - pass - -def t_error(t): - pass - - -import sys -sys.tracebacklimit = 0 - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_dup1.exp b/chall/ply-2.2/test/lex_dup1.exp deleted file mode 100644 index 2098a40..0000000 --- a/chall/ply-2.2/test/lex_dup1.exp +++ /dev/null @@ -1,2 +0,0 @@ -./lex_dup1.py:20: Rule t_NUMBER redefined. Previously defined on line 18 -SyntaxError: lex: Unable to build lexer. diff --git a/chall/ply-2.2/test/lex_dup1.py b/chall/ply-2.2/test/lex_dup1.py deleted file mode 100644 index 68f8092..0000000 --- a/chall/ply-2.2/test/lex_dup1.py +++ /dev/null @@ -1,29 +0,0 @@ -# lex_token.py -# -# Duplicated rule specifiers - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -t_NUMBER = r'\d+' - -def t_error(t): - pass - -sys.tracebacklimit = 0 - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_dup2.exp b/chall/ply-2.2/test/lex_dup2.exp deleted file mode 100644 index d327cfe..0000000 --- a/chall/ply-2.2/test/lex_dup2.exp +++ /dev/null @@ -1,2 +0,0 @@ -./lex_dup2.py:22: Rule t_NUMBER redefined. Previously defined on line 18 -SyntaxError: lex: Unable to build lexer. diff --git a/chall/ply-2.2/test/lex_dup2.py b/chall/ply-2.2/test/lex_dup2.py deleted file mode 100644 index f4d346e..0000000 --- a/chall/ply-2.2/test/lex_dup2.py +++ /dev/null @@ -1,33 +0,0 @@ -# lex_token.py -# -# Duplicated rule specifiers - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -def t_NUMBER(t): - r'\d+' - pass - -def t_NUMBER(t): - r'\d+' - pass - -def t_error(t): - pass - -sys.tracebacklimit = 0 - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_dup3.exp b/chall/ply-2.2/test/lex_dup3.exp deleted file mode 100644 index ec0680c..0000000 --- a/chall/ply-2.2/test/lex_dup3.exp +++ /dev/null @@ -1,2 +0,0 @@ -./lex_dup3.py:20: Rule t_NUMBER redefined. Previously defined on line 18 -SyntaxError: lex: Unable to build lexer. diff --git a/chall/ply-2.2/test/lex_dup3.py b/chall/ply-2.2/test/lex_dup3.py deleted file mode 100644 index e17b520..0000000 --- a/chall/ply-2.2/test/lex_dup3.py +++ /dev/null @@ -1,31 +0,0 @@ -# lex_token.py -# -# Duplicated rule specifiers - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_NUMBER(t): - r'\d+' - pass - -def t_error(t): - pass - -sys.tracebacklimit = 0 - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_empty.exp b/chall/ply-2.2/test/lex_empty.exp deleted file mode 100644 index af38602..0000000 --- a/chall/ply-2.2/test/lex_empty.exp +++ /dev/null @@ -1 +0,0 @@ -SyntaxError: lex: no rules of the form t_rulename are defined. diff --git a/chall/ply-2.2/test/lex_empty.py b/chall/ply-2.2/test/lex_empty.py deleted file mode 100644 index 96625f7..0000000 --- a/chall/ply-2.2/test/lex_empty.py +++ /dev/null @@ -1,20 +0,0 @@ -# lex_token.py -# -# No rules defined - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -sys.tracebacklimit = 0 - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_error1.exp b/chall/ply-2.2/test/lex_error1.exp deleted file mode 100644 index baa19e5..0000000 --- a/chall/ply-2.2/test/lex_error1.exp +++ /dev/null @@ -1 +0,0 @@ -lex: Warning. no t_error rule is defined. diff --git a/chall/ply-2.2/test/lex_error1.py b/chall/ply-2.2/test/lex_error1.py deleted file mode 100644 index a99d9be..0000000 --- a/chall/ply-2.2/test/lex_error1.py +++ /dev/null @@ -1,24 +0,0 @@ -# lex_token.py -# -# Missing t_error() rule - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -sys.tracebacklimit = 0 - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_error2.exp b/chall/ply-2.2/test/lex_error2.exp deleted file mode 100644 index fb1b55c..0000000 --- a/chall/ply-2.2/test/lex_error2.exp +++ /dev/null @@ -1 +0,0 @@ -SyntaxError: lex: Rule 't_error' must be defined as a function diff --git a/chall/ply-2.2/test/lex_error2.py b/chall/ply-2.2/test/lex_error2.py deleted file mode 100644 index a59c8d4..0000000 --- a/chall/ply-2.2/test/lex_error2.py +++ /dev/null @@ -1,26 +0,0 @@ -# lex_token.py -# -# t_error defined, but not function - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -t_error = "foo" - -sys.tracebacklimit = 0 - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_error3.exp b/chall/ply-2.2/test/lex_error3.exp deleted file mode 100644 index 1b482bf..0000000 --- a/chall/ply-2.2/test/lex_error3.exp +++ /dev/null @@ -1,2 +0,0 @@ -./lex_error3.py:20: Rule 't_error' requires an argument. -SyntaxError: lex: Unable to build lexer. diff --git a/chall/ply-2.2/test/lex_error3.py b/chall/ply-2.2/test/lex_error3.py deleted file mode 100644 index 584600f..0000000 --- a/chall/ply-2.2/test/lex_error3.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_token.py -# -# t_error defined as function, but with wrong # args - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(): - pass - -sys.tracebacklimit = 0 - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_error4.exp b/chall/ply-2.2/test/lex_error4.exp deleted file mode 100644 index 98505a2..0000000 --- a/chall/ply-2.2/test/lex_error4.exp +++ /dev/null @@ -1,2 +0,0 @@ -./lex_error4.py:20: Rule 't_error' has too many arguments. -SyntaxError: lex: Unable to build lexer. diff --git a/chall/ply-2.2/test/lex_error4.py b/chall/ply-2.2/test/lex_error4.py deleted file mode 100644 index d05de74..0000000 --- a/chall/ply-2.2/test/lex_error4.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_token.py -# -# t_error defined as function, but too many args - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(t,s): - pass - -sys.tracebacklimit = 0 - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_hedit.exp b/chall/ply-2.2/test/lex_hedit.exp deleted file mode 100644 index 7b27dcb..0000000 --- a/chall/ply-2.2/test/lex_hedit.exp +++ /dev/null @@ -1,3 +0,0 @@ -(H_EDIT_DESCRIPTOR,'abc',1,0) -(H_EDIT_DESCRIPTOR,'abcdefghij',1,6) -(H_EDIT_DESCRIPTOR,'xy',1,20) diff --git a/chall/ply-2.2/test/lex_hedit.py b/chall/ply-2.2/test/lex_hedit.py deleted file mode 100644 index 0f87423..0000000 --- a/chall/ply-2.2/test/lex_hedit.py +++ /dev/null @@ -1,47 +0,0 @@ -# ----------------------------------------------------------------------------- -# hedit.py -# -# Paring of Fortran H Edit descriptions (Contributed by Pearu Peterson) -# -# These tokens can't be easily tokenized because they are of the following -# form: -# -# nHc1...cn -# -# where n is a positive integer and c1 ... cn are characters. -# -# This example shows how to modify the state of the lexer to parse -# such tokens -# ----------------------------------------------------------------------------- -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = ( - 'H_EDIT_DESCRIPTOR', - ) - -# Tokens -t_ignore = " \t\n" - -def t_H_EDIT_DESCRIPTOR(t): - r"\d+H.*" # This grabs all of the remaining text - i = t.value.index('H') - n = eval(t.value[:i]) - - # Adjust the tokenizing position - t.lexer.lexpos -= len(t.value) - (i+1+n) - t.value = t.value[i+1:i+1+n] - return t - -def t_error(t): - print "Illegal character '%s'" % t.value[0] - t.lexer.skip(1) - -# Build the lexer -lex.lex() -lex.runmain(data="3Habc 10Habcdefghij 2Hxy") - - - diff --git a/chall/ply-2.2/test/lex_ignore.exp b/chall/ply-2.2/test/lex_ignore.exp deleted file mode 100644 index 85e2961..0000000 --- a/chall/ply-2.2/test/lex_ignore.exp +++ /dev/null @@ -1,7 +0,0 @@ -./lex_ignore.py:20: Rule 't_ignore' must be defined as a string. -Traceback (most recent call last): - File "./lex_ignore.py", line 29, in ? - lex.lex() - File "../ply/lex.py", line 758, in lex - raise SyntaxError,"lex: Unable to build lexer." -SyntaxError: lex: Unable to build lexer. diff --git a/chall/ply-2.2/test/lex_ignore.py b/chall/ply-2.2/test/lex_ignore.py deleted file mode 100644 index 94b0266..0000000 --- a/chall/ply-2.2/test/lex_ignore.py +++ /dev/null @@ -1,31 +0,0 @@ -# lex_token.py -# -# Improperly specific ignore declaration - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_ignore(t): - ' \t' - pass - -def t_error(t): - pass - -import sys - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_nowarn.exp b/chall/ply-2.2/test/lex_nowarn.exp deleted file mode 100644 index e69de29..0000000 --- a/chall/ply-2.2/test/lex_nowarn.exp +++ /dev/null diff --git a/chall/ply-2.2/test/lex_nowarn.py b/chall/ply-2.2/test/lex_nowarn.py deleted file mode 100644 index d60d31c..0000000 --- a/chall/ply-2.2/test/lex_nowarn.py +++ /dev/null @@ -1,30 +0,0 @@ -# lex_token.py -# -# Missing t_error() rule - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - "NUMBER", - ] - -states = (('foo','exclusive'),) - -t_ignore = ' \t' -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -t_foo_NUMBER = r'\d+' - -sys.tracebacklimit = 0 - -lex.lex(nowarn=1) - - diff --git a/chall/ply-2.2/test/lex_re1.exp b/chall/ply-2.2/test/lex_re1.exp deleted file mode 100644 index b9e621c..0000000 --- a/chall/ply-2.2/test/lex_re1.exp +++ /dev/null @@ -1,7 +0,0 @@ -lex: Invalid regular expression for rule 't_NUMBER'. unbalanced parenthesis -Traceback (most recent call last): - File "./lex_re1.py", line 25, in ? - lex.lex() - File "../ply/lex.py", line 758, in lex - raise SyntaxError,"lex: Unable to build lexer." -SyntaxError: lex: Unable to build lexer. diff --git a/chall/ply-2.2/test/lex_re1.py b/chall/ply-2.2/test/lex_re1.py deleted file mode 100644 index 9e544fe..0000000 --- a/chall/ply-2.2/test/lex_re1.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_token.py -# -# Bad regular expression in a string - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'(\d+' - -def t_error(t): - pass - -import sys - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_re2.exp b/chall/ply-2.2/test/lex_re2.exp deleted file mode 100644 index 7ba89b4..0000000 --- a/chall/ply-2.2/test/lex_re2.exp +++ /dev/null @@ -1,7 +0,0 @@ -lex: Regular expression for rule 't_PLUS' matches empty string. -Traceback (most recent call last): - File "./lex_re2.py", line 25, in ? - lex.lex() - File "../ply/lex.py", line 758, in lex - raise SyntaxError,"lex: Unable to build lexer." -SyntaxError: lex: Unable to build lexer. diff --git a/chall/ply-2.2/test/lex_re2.py b/chall/ply-2.2/test/lex_re2.py deleted file mode 100644 index 522b415..0000000 --- a/chall/ply-2.2/test/lex_re2.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_token.py -# -# Regular expression rule matches empty string - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+?' -t_MINUS = r'-' -t_NUMBER = r'(\d+)' - -def t_error(t): - pass - -import sys - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_re3.exp b/chall/ply-2.2/test/lex_re3.exp deleted file mode 100644 index 7cdcae4..0000000 --- a/chall/ply-2.2/test/lex_re3.exp +++ /dev/null @@ -1,8 +0,0 @@ -lex: Invalid regular expression for rule 't_POUND'. unbalanced parenthesis -lex: Make sure '#' in rule 't_POUND' is escaped with '\#'. -Traceback (most recent call last): - File "./lex_re3.py", line 27, in ? - lex.lex() - File "../ply/lex.py", line 758, in lex - raise SyntaxError,"lex: Unable to build lexer." -SyntaxError: lex: Unable to build lexer. diff --git a/chall/ply-2.2/test/lex_re3.py b/chall/ply-2.2/test/lex_re3.py deleted file mode 100644 index 099e156..0000000 --- a/chall/ply-2.2/test/lex_re3.py +++ /dev/null @@ -1,29 +0,0 @@ -# lex_token.py -# -# Regular expression rule matches empty string - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - "POUND", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'(\d+)' -t_POUND = r'#' - -def t_error(t): - pass - -import sys - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_rule1.exp b/chall/ply-2.2/test/lex_rule1.exp deleted file mode 100644 index 0c23ca2..0000000 --- a/chall/ply-2.2/test/lex_rule1.exp +++ /dev/null @@ -1,2 +0,0 @@ -lex: t_NUMBER not defined as a function or string -SyntaxError: lex: Unable to build lexer. diff --git a/chall/ply-2.2/test/lex_rule1.py b/chall/ply-2.2/test/lex_rule1.py deleted file mode 100644 index e49a15b..0000000 --- a/chall/ply-2.2/test/lex_rule1.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_token.py -# -# Rule defined as some other type - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = 1 - -def t_error(t): - pass - -sys.tracebacklimit = 0 - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_state1.exp b/chall/ply-2.2/test/lex_state1.exp deleted file mode 100644 index 8b58050..0000000 --- a/chall/ply-2.2/test/lex_state1.exp +++ /dev/null @@ -1,7 +0,0 @@ -lex: states must be defined as a tuple or list. -Traceback (most recent call last): - File "./lex_state1.py", line 38, in ? - lex.lex() - File "../ply/lex.py", line 758, in lex - raise SyntaxError,"lex: Unable to build lexer." -SyntaxError: lex: Unable to build lexer. diff --git a/chall/ply-2.2/test/lex_state1.py b/chall/ply-2.2/test/lex_state1.py deleted file mode 100644 index 7eb2976..0000000 --- a/chall/ply-2.2/test/lex_state1.py +++ /dev/null @@ -1,40 +0,0 @@ -# lex_state1.py -# -# Bad state declaration - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -states = 'comment' - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print "Entering comment state" - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print "comment body", t - t.lexer.begin('INITIAL') - -def t_error(t): - pass - -import sys - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_state2.exp b/chall/ply-2.2/test/lex_state2.exp deleted file mode 100644 index 11c33a7..0000000 --- a/chall/ply-2.2/test/lex_state2.exp +++ /dev/null @@ -1,8 +0,0 @@ -lex: invalid state specifier 'comment'. Must be a tuple (statename,'exclusive|inclusive') -lex: invalid state specifier 'example'. Must be a tuple (statename,'exclusive|inclusive') -Traceback (most recent call last): - File "./lex_state2.py", line 38, in ? - lex.lex() - File "../ply/lex.py", line 758, in lex - raise SyntaxError,"lex: Unable to build lexer." -SyntaxError: lex: Unable to build lexer. diff --git a/chall/ply-2.2/test/lex_state2.py b/chall/ply-2.2/test/lex_state2.py deleted file mode 100644 index b76b0db..0000000 --- a/chall/ply-2.2/test/lex_state2.py +++ /dev/null @@ -1,40 +0,0 @@ -# lex_state2.py -# -# Bad state declaration - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -states = ('comment','example') - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print "Entering comment state" - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print "comment body", t - t.lexer.begin('INITIAL') - -def t_error(t): - pass - -import sys - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_state3.exp b/chall/ply-2.2/test/lex_state3.exp deleted file mode 100644 index 2c3442c..0000000 --- a/chall/ply-2.2/test/lex_state3.exp +++ /dev/null @@ -1,8 +0,0 @@ -lex: state name 1 must be a string -lex: No rules defined for state 'example' -Traceback (most recent call last): - File "./lex_state3.py", line 40, in ? - lex.lex() - File "../ply/lex.py", line 758, in lex - raise SyntaxError,"lex: Unable to build lexer." -SyntaxError: lex: Unable to build lexer. diff --git a/chall/ply-2.2/test/lex_state3.py b/chall/ply-2.2/test/lex_state3.py deleted file mode 100644 index fb4ce6c..0000000 --- a/chall/ply-2.2/test/lex_state3.py +++ /dev/null @@ -1,42 +0,0 @@ -# lex_state2.py -# -# Bad state declaration - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -comment = 1 -states = ((comment, 'inclusive'), - ('example', 'exclusive')) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print "Entering comment state" - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print "comment body", t - t.lexer.begin('INITIAL') - -def t_error(t): - pass - -import sys - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_state4.exp b/chall/ply-2.2/test/lex_state4.exp deleted file mode 100644 index 7497a47..0000000 --- a/chall/ply-2.2/test/lex_state4.exp +++ /dev/null @@ -1,7 +0,0 @@ -lex: state type for state comment must be 'inclusive' or 'exclusive' -Traceback (most recent call last): - File "./lex_state4.py", line 39, in ? - lex.lex() - File "../ply/lex.py", line 758, in lex - raise SyntaxError,"lex: Unable to build lexer." -SyntaxError: lex: Unable to build lexer. diff --git a/chall/ply-2.2/test/lex_state4.py b/chall/ply-2.2/test/lex_state4.py deleted file mode 100644 index 0993aa9..0000000 --- a/chall/ply-2.2/test/lex_state4.py +++ /dev/null @@ -1,41 +0,0 @@ -# lex_state2.py -# -# Bad state declaration - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -comment = 1 -states = (('comment', 'exclsive'),) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print "Entering comment state" - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print "comment body", t - t.lexer.begin('INITIAL') - -def t_error(t): - pass - -import sys - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_state5.exp b/chall/ply-2.2/test/lex_state5.exp deleted file mode 100644 index e9e43e8..0000000 --- a/chall/ply-2.2/test/lex_state5.exp +++ /dev/null @@ -1,7 +0,0 @@ -lex: state 'comment' already defined. -Traceback (most recent call last): - File "./lex_state5.py", line 40, in ? - lex.lex() - File "../ply/lex.py", line 758, in lex - raise SyntaxError,"lex: Unable to build lexer." -SyntaxError: lex: Unable to build lexer. diff --git a/chall/ply-2.2/test/lex_state5.py b/chall/ply-2.2/test/lex_state5.py deleted file mode 100644 index c3c1cbf..0000000 --- a/chall/ply-2.2/test/lex_state5.py +++ /dev/null @@ -1,42 +0,0 @@ -# lex_state2.py -# -# Bad state declaration - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -comment = 1 -states = (('comment', 'exclusive'), - ('comment', 'exclusive')) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print "Entering comment state" - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print "comment body", t - t.lexer.begin('INITIAL') - -def t_error(t): - pass - -import sys - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_state_noerror.exp b/chall/ply-2.2/test/lex_state_noerror.exp deleted file mode 100644 index e14149f..0000000 --- a/chall/ply-2.2/test/lex_state_noerror.exp +++ /dev/null @@ -1 +0,0 @@ -lex: Warning. no error rule is defined for exclusive state 'comment' diff --git a/chall/ply-2.2/test/lex_state_noerror.py b/chall/ply-2.2/test/lex_state_noerror.py deleted file mode 100644 index 853b157..0000000 --- a/chall/ply-2.2/test/lex_state_noerror.py +++ /dev/null @@ -1,41 +0,0 @@ -# lex_state2.py -# -# Declaration of a state for which no rules are defined - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -comment = 1 -states = (('comment', 'exclusive'),) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print "Entering comment state" - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print "comment body", t - t.lexer.begin('INITIAL') - -def t_error(t): - pass - -import sys - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_state_norule.exp b/chall/ply-2.2/test/lex_state_norule.exp deleted file mode 100644 index a8ff4ca..0000000 --- a/chall/ply-2.2/test/lex_state_norule.exp +++ /dev/null @@ -1,7 +0,0 @@ -lex: No rules defined for state 'example' -Traceback (most recent call last): - File "./lex_state_norule.py", line 40, in ? - lex.lex() - File "../ply/lex.py", line 758, in lex - raise SyntaxError,"lex: Unable to build lexer." -SyntaxError: lex: Unable to build lexer. diff --git a/chall/ply-2.2/test/lex_state_norule.py b/chall/ply-2.2/test/lex_state_norule.py deleted file mode 100644 index e48a319..0000000 --- a/chall/ply-2.2/test/lex_state_norule.py +++ /dev/null @@ -1,42 +0,0 @@ -# lex_state2.py -# -# Declaration of a state for which no rules are defined - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -comment = 1 -states = (('comment', 'exclusive'), - ('example', 'exclusive')) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print "Entering comment state" - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print "comment body", t - t.lexer.begin('INITIAL') - -def t_error(t): - pass - -import sys - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_state_try.exp b/chall/ply-2.2/test/lex_state_try.exp deleted file mode 100644 index 65f2e38..0000000 --- a/chall/ply-2.2/test/lex_state_try.exp +++ /dev/null @@ -1,7 +0,0 @@ -(NUMBER,'3',1,0) -(PLUS,'+',1,2) -(NUMBER,'4',1,4) -Entering comment state -comment body LexToken(comment_body_part,'This is a comment */',1,9) -(PLUS,'+',1,30) -(NUMBER,'10',1,32) diff --git a/chall/ply-2.2/test/lex_state_try.py b/chall/ply-2.2/test/lex_state_try.py deleted file mode 100644 index a16403e..0000000 --- a/chall/ply-2.2/test/lex_state_try.py +++ /dev/null @@ -1,48 +0,0 @@ -# lex_state2.py -# -# Declaration of a state for which no rules are defined - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -comment = 1 -states = (('comment', 'exclusive'),) - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -t_ignore = " \t" - -# Comments -def t_comment(t): - r'/\*' - t.lexer.begin('comment') - print "Entering comment state" - -def t_comment_body_part(t): - r'(.|\n)*\*/' - print "comment body", t - t.lexer.begin('INITIAL') - -def t_error(t): - pass - -t_comment_error = t_error -t_comment_ignore = t_ignore - -import sys - -lex.lex() - -data = "3 + 4 /* This is a comment */ + 10" - -lex.runmain(data=data) diff --git a/chall/ply-2.2/test/lex_token1.exp b/chall/ply-2.2/test/lex_token1.exp deleted file mode 100644 index 3792831..0000000 --- a/chall/ply-2.2/test/lex_token1.exp +++ /dev/null @@ -1 +0,0 @@ -SyntaxError: lex: module does not define 'tokens' diff --git a/chall/ply-2.2/test/lex_token1.py b/chall/ply-2.2/test/lex_token1.py deleted file mode 100644 index 380c31c..0000000 --- a/chall/ply-2.2/test/lex_token1.py +++ /dev/null @@ -1,21 +0,0 @@ -# lex_token.py -# -# Tests for absence of tokens variable - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(t): - pass - -sys.tracebacklimit = 0 - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_token2.exp b/chall/ply-2.2/test/lex_token2.exp deleted file mode 100644 index 3f98fe5..0000000 --- a/chall/ply-2.2/test/lex_token2.exp +++ /dev/null @@ -1 +0,0 @@ -SyntaxError: lex: tokens must be a list or tuple. diff --git a/chall/ply-2.2/test/lex_token2.py b/chall/ply-2.2/test/lex_token2.py deleted file mode 100644 index 87db8a0..0000000 --- a/chall/ply-2.2/test/lex_token2.py +++ /dev/null @@ -1,23 +0,0 @@ -# lex_token.py -# -# Tests for tokens of wrong type - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = "PLUS MINUS NUMBER" - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(t): - pass - -sys.tracebacklimit = 0 - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_token3.exp b/chall/ply-2.2/test/lex_token3.exp deleted file mode 100644 index d991d3c..0000000 --- a/chall/ply-2.2/test/lex_token3.exp +++ /dev/null @@ -1,2 +0,0 @@ -lex: Rule 't_MINUS' defined for an unspecified token MINUS. -SyntaxError: lex: Unable to build lexer. diff --git a/chall/ply-2.2/test/lex_token3.py b/chall/ply-2.2/test/lex_token3.py deleted file mode 100644 index 27ce947..0000000 --- a/chall/ply-2.2/test/lex_token3.py +++ /dev/null @@ -1,27 +0,0 @@ -# lex_token.py -# -# tokens is right type, but is missing a token for one rule - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(t): - pass - - -sys.tracebacklimit = 0 - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_token4.exp b/chall/ply-2.2/test/lex_token4.exp deleted file mode 100644 index 3dd88e0..0000000 --- a/chall/ply-2.2/test/lex_token4.exp +++ /dev/null @@ -1,2 +0,0 @@ -lex: Bad token name '-' -SyntaxError: lex: Unable to build lexer. diff --git a/chall/ply-2.2/test/lex_token4.py b/chall/ply-2.2/test/lex_token4.py deleted file mode 100644 index 612ff13..0000000 --- a/chall/ply-2.2/test/lex_token4.py +++ /dev/null @@ -1,28 +0,0 @@ -# lex_token.py -# -# Bad token name - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "-", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' -t_NUMBER = r'\d+' - -def t_error(t): - pass - -sys.tracebacklimit = 0 - -lex.lex() - - diff --git a/chall/ply-2.2/test/lex_token5.exp b/chall/ply-2.2/test/lex_token5.exp deleted file mode 100644 index 2f03889..0000000 --- a/chall/ply-2.2/test/lex_token5.exp +++ /dev/null @@ -1 +0,0 @@ -ply.lex.LexError: ./lex_token5.py:19: Rule 't_NUMBER' returned an unknown token type 'NUM' diff --git a/chall/ply-2.2/test/lex_token5.py b/chall/ply-2.2/test/lex_token5.py deleted file mode 100644 index 77fabde..0000000 --- a/chall/ply-2.2/test/lex_token5.py +++ /dev/null @@ -1,33 +0,0 @@ -# lex_token.py -# -# Return a bad token name - -import sys -sys.path.insert(0,"..") - -import ply.lex as lex - -tokens = [ - "PLUS", - "MINUS", - "NUMBER", - ] - -t_PLUS = r'\+' -t_MINUS = r'-' - -def t_NUMBER(t): - r'\d+' - t.type = "NUM" - return t - -def t_error(t): - pass - -sys.tracebacklimit = 0 - -lex.lex() -lex.input("1234") -t = lex.token() - - diff --git a/chall/ply-2.2/test/rununit.py b/chall/ply-2.2/test/rununit.py deleted file mode 100644 index d6b36fd..0000000 --- a/chall/ply-2.2/test/rununit.py +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env python -'''Script to run all tests using python "unittest" module''' - -__author__ = "Miki Tebeka <miki.tebeka@zoran.com>" - -from unittest import TestCase, main, makeSuite, TestSuite -from os import popen, environ, remove -from glob import glob -from sys import executable, argv -from os.path import isfile, basename, splitext - -# Add path to lex.py and yacc.py -environ["PYTHONPATH"] = ".." - -class PLYTest(TestCase): - '''General test case for PLY test''' - def _runtest(self, filename): - '''Run a single test file an compare result''' - exp_file = filename.replace(".py", ".exp") - self.failUnless(isfile(exp_file), "can't find %s" % exp_file) - pipe = popen("%s %s 2>&1" % (executable, filename)) - out = pipe.read().strip() - self.failUnlessEqual(out, open(exp_file).read().strip()) - - -class LexText(PLYTest): - '''Testing Lex''' - pass - -class YaccTest(PLYTest): - '''Testing Yacc''' - - def tearDown(self): - '''Cleanup parsetab.py[c] file''' - for ext in (".py", ".pyc"): - fname = "parsetab%s" % ext - if isfile(fname): - remove(fname) - -def add_test(klass, filename): - '''Add a test to TestCase class''' - def t(self): - self._runtest(filename) - # Test name is test_FILENAME without the ./ and without the .py - setattr(klass, "test_%s" % (splitext(basename(filename))[0]), t) - -# Add lex tests -for file in glob("./lex_*.py"): - add_test(LexText, file) -lex_suite = makeSuite(LexText, "test_") - -# Add yacc tests -for file in glob("./yacc_*.py"): - add_test(YaccTest, file) -yacc_suite = makeSuite(YaccTest, "test_") - -# All tests suite -test_suite = TestSuite((lex_suite, yacc_suite)) - -if __name__ == "__main__": - main() - diff --git a/chall/ply-2.2/test/testlex.py b/chall/ply-2.2/test/testlex.py deleted file mode 100755 index 2dae47a..0000000 --- a/chall/ply-2.2/test/testlex.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/local/bin -# ---------------------------------------------------------------------- -# testlex.py -# -# Run tests for the lexing module -# ---------------------------------------------------------------------- - -import sys,os,glob - -if len(sys.argv) < 2: - print "Usage: python testlex.py directory" - raise SystemExit - -dirname = None -make = 0 - -for o in sys.argv[1:]: - if o == '-make': - make = 1 - else: - dirname = o - break - -if not dirname: - print "Usage: python testlex.py [-make] directory" - raise SystemExit - -f = glob.glob("%s/%s" % (dirname,"lex_*.py")) - -print "**** Running tests for lex ****" - -for t in f: - name = t[:-3] - print "Testing %-32s" % name, - if make: - if not os.path.exists("%s.exp" % name): - os.system("python %s.py >%s.exp 2>&1" % (name,name)) - passed = 1 - else: - os.system("python %s.py >%s.out 2>&1" % (name,name)) - a = os.system("diff %s.out %s.exp >%s.dif" % (name,name,name)) - if a == 0: - passed = 1 - else: - passed = 0 - - if passed: - print "Passed" - else: - print "Failed. See %s.dif" % name - - - - - - - diff --git a/chall/ply-2.2/test/testyacc.py b/chall/ply-2.2/test/testyacc.py deleted file mode 100644 index f976ff5..0000000 --- a/chall/ply-2.2/test/testyacc.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/local/bin -# ---------------------------------------------------------------------- -# testyacc.py -# -# Run tests for the yacc module -# ---------------------------------------------------------------------- - -import sys,os,glob - -if len(sys.argv) < 2: - print "Usage: python testyacc.py directory" - raise SystemExit - -dirname = None -make = 0 - -for o in sys.argv[1:]: - if o == '-make': - make = 1 - else: - dirname = o - break - -if not dirname: - print "Usage: python testyacc.py [-make] directory" - raise SystemExit - -f = glob.glob("%s/%s" % (dirname,"yacc_*.py")) - -print "**** Running tests for yacc ****" - -for t in f: - name = t[:-3] - print "Testing %-32s" % name, - os.system("rm -f %s/parsetab.*" % dirname) - if make: - if not os.path.exists("%s.exp" % name): - os.system("python %s.py >%s.exp 2>&1" % (name,name)) - passed = 1 - else: - os.system("python %s.py >%s.out 2>&1" % (name,name)) - a = os.system("diff %s.out %s.exp >%s.dif" % (name,name,name)) - if a == 0: - passed = 1 - else: - passed = 0 - - if passed: - print "Passed" - else: - print "Failed. See %s.dif" % name - - - - - - - diff --git a/chall/ply-2.2/test/yacc_badargs.exp b/chall/ply-2.2/test/yacc_badargs.exp deleted file mode 100644 index e994676..0000000 --- a/chall/ply-2.2/test/yacc_badargs.exp +++ /dev/null @@ -1,3 +0,0 @@ -./yacc_badargs.py:23: Rule 'p_statement_assign' has too many arguments. -./yacc_badargs.py:27: Rule 'p_statement_expr' requires an argument. -ply.yacc.YaccError: Unable to construct parser. diff --git a/chall/ply-2.2/test/yacc_badargs.py b/chall/ply-2.2/test/yacc_badargs.py deleted file mode 100644 index 810e529..0000000 --- a/chall/ply-2.2/test/yacc_badargs.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badargs.py -# -# Rules with wrong # args -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t,s): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(): - 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - -def p_error(t): - print "Syntax error at '%s'" % t.value - -yacc.yacc() - - - - diff --git a/chall/ply-2.2/test/yacc_badprec.exp b/chall/ply-2.2/test/yacc_badprec.exp deleted file mode 100644 index f4f574b..0000000 --- a/chall/ply-2.2/test/yacc_badprec.exp +++ /dev/null @@ -1 +0,0 @@ -ply.yacc.YaccError: precedence must be a list or tuple. diff --git a/chall/ply-2.2/test/yacc_badprec.py b/chall/ply-2.2/test/yacc_badprec.py deleted file mode 100644 index 8f64652..0000000 --- a/chall/ply-2.2/test/yacc_badprec.py +++ /dev/null @@ -1,65 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badprec.py -# -# Bad precedence specifier -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 - -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = "blah" - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - -def p_error(t): - print "Syntax error at '%s'" % t.value - -yacc.yacc() - - - - diff --git a/chall/ply-2.2/test/yacc_badprec2.exp b/chall/ply-2.2/test/yacc_badprec2.exp deleted file mode 100644 index 8fac075..0000000 --- a/chall/ply-2.2/test/yacc_badprec2.exp +++ /dev/null @@ -1,3 +0,0 @@ -yacc: Invalid precedence table. -yacc: Generating LALR parsing table... -yacc: 8 shift/reduce conflicts diff --git a/chall/ply-2.2/test/yacc_badprec2.py b/chall/ply-2.2/test/yacc_badprec2.py deleted file mode 100644 index 206bda7..0000000 --- a/chall/ply-2.2/test/yacc_badprec2.py +++ /dev/null @@ -1,69 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badprec2.py -# -# Bad precedence -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 - -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - 42, - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - -def p_error(t): - print "Syntax error at '%s'" % t.value - -yacc.yacc() - - - - diff --git a/chall/ply-2.2/test/yacc_badrule.exp b/chall/ply-2.2/test/yacc_badrule.exp deleted file mode 100644 index a87bf7d..0000000 --- a/chall/ply-2.2/test/yacc_badrule.exp +++ /dev/null @@ -1,5 +0,0 @@ -./yacc_badrule.py:25: Syntax error. Expected ':' -./yacc_badrule.py:29: Syntax error in rule 'statement' -./yacc_badrule.py:34: Syntax error. Expected ':' -./yacc_badrule.py:43: Syntax error. Expected ':' -ply.yacc.YaccError: Unable to construct parser. diff --git a/chall/ply-2.2/test/yacc_badrule.py b/chall/ply-2.2/test/yacc_badrule.py deleted file mode 100644 index f5fef8a..0000000 --- a/chall/ply-2.2/test/yacc_badrule.py +++ /dev/null @@ -1,69 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badrule.py -# -# Syntax problems in the rule strings -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 - -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression: MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - -def p_error(t): - print "Syntax error at '%s'" % t.value - -yacc.yacc() - - - - diff --git a/chall/ply-2.2/test/yacc_badtok.exp b/chall/ply-2.2/test/yacc_badtok.exp deleted file mode 100644 index ccdc0e7..0000000 --- a/chall/ply-2.2/test/yacc_badtok.exp +++ /dev/null @@ -1 +0,0 @@ -ply.yacc.YaccError: tokens must be a list or tuple. diff --git a/chall/ply-2.2/test/yacc_badtok.py b/chall/ply-2.2/test/yacc_badtok.py deleted file mode 100644 index 4f2af51..0000000 --- a/chall/ply-2.2/test/yacc_badtok.py +++ /dev/null @@ -1,70 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_badtok.py -# -# A grammar, but tokens is a bad datatype -# ----------------------------------------------------------------------------- - -import sys -sys.tracebacklimit = 0 - -sys.path.insert(0,"..") -import ply.yacc as yacc - -tokens = "Hello" - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - -def p_error(t): - print "Syntax error at '%s'" % t.value - -yacc.yacc() - - - - diff --git a/chall/ply-2.2/test/yacc_dup.exp b/chall/ply-2.2/test/yacc_dup.exp deleted file mode 100644 index fdfb210..0000000 --- a/chall/ply-2.2/test/yacc_dup.exp +++ /dev/null @@ -1,4 +0,0 @@ -./yacc_dup.py:28: Function p_statement redefined. Previously defined on line 24 -yacc: Warning. Token 'EQUALS' defined, but not used. -yacc: Warning. There is 1 unused token. -yacc: Generating LALR parsing table... diff --git a/chall/ply-2.2/test/yacc_dup.py b/chall/ply-2.2/test/yacc_dup.py deleted file mode 100644 index e0b683d..0000000 --- a/chall/ply-2.2/test/yacc_dup.py +++ /dev/null @@ -1,69 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_dup.py -# -# Duplicated rule name -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 - -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement(t): - 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - -def p_error(t): - print "Syntax error at '%s'" % t.value - -yacc.yacc() - - - - diff --git a/chall/ply-2.2/test/yacc_error1.exp b/chall/ply-2.2/test/yacc_error1.exp deleted file mode 100644 index 13bed04..0000000 --- a/chall/ply-2.2/test/yacc_error1.exp +++ /dev/null @@ -1 +0,0 @@ -ply.yacc.YaccError: ./yacc_error1.py:62: p_error() requires 1 argument. diff --git a/chall/ply-2.2/test/yacc_error1.py b/chall/ply-2.2/test/yacc_error1.py deleted file mode 100644 index 2768fc1..0000000 --- a/chall/ply-2.2/test/yacc_error1.py +++ /dev/null @@ -1,69 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_error1.py -# -# Bad p_error() function -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 - -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - -def p_error(t,s): - print "Syntax error at '%s'" % t.value - -yacc.yacc() - - - - diff --git a/chall/ply-2.2/test/yacc_error2.exp b/chall/ply-2.2/test/yacc_error2.exp deleted file mode 100644 index 4a7628d..0000000 --- a/chall/ply-2.2/test/yacc_error2.exp +++ /dev/null @@ -1 +0,0 @@ -ply.yacc.YaccError: ./yacc_error2.py:62: p_error() requires 1 argument. diff --git a/chall/ply-2.2/test/yacc_error2.py b/chall/ply-2.2/test/yacc_error2.py deleted file mode 100644 index 8f3a052..0000000 --- a/chall/ply-2.2/test/yacc_error2.py +++ /dev/null @@ -1,69 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_error1.py -# -# Bad p_error() function -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 - -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - -def p_error(): - print "Syntax error at '%s'" % t.value - -yacc.yacc() - - - - diff --git a/chall/ply-2.2/test/yacc_error3.exp b/chall/ply-2.2/test/yacc_error3.exp deleted file mode 100644 index 7fca2fe..0000000 --- a/chall/ply-2.2/test/yacc_error3.exp +++ /dev/null @@ -1 +0,0 @@ -ply.yacc.YaccError: 'p_error' defined, but is not a function or method. diff --git a/chall/ply-2.2/test/yacc_error3.py b/chall/ply-2.2/test/yacc_error3.py deleted file mode 100644 index b387de5..0000000 --- a/chall/ply-2.2/test/yacc_error3.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_error1.py -# -# Bad p_error() function -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 - -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - -p_error = "blah" - -yacc.yacc() - - - - diff --git a/chall/ply-2.2/test/yacc_inf.exp b/chall/ply-2.2/test/yacc_inf.exp deleted file mode 100644 index 88cfa4a..0000000 --- a/chall/ply-2.2/test/yacc_inf.exp +++ /dev/null @@ -1,5 +0,0 @@ -yacc: Warning. Token 'NUMBER' defined, but not used. -yacc: Warning. There is 1 unused token. -yacc: Infinite recursion detected for symbol 'statement'. -yacc: Infinite recursion detected for symbol 'expression'. -ply.yacc.YaccError: Unable to construct parser. diff --git a/chall/ply-2.2/test/yacc_inf.py b/chall/ply-2.2/test/yacc_inf.py deleted file mode 100644 index 9b9aef7..0000000 --- a/chall/ply-2.2/test/yacc_inf.py +++ /dev/null @@ -1,57 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_inf.py -# -# Infinite recursion -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 - -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_error(t): - print "Syntax error at '%s'" % t.value - -yacc.yacc() - - - - diff --git a/chall/ply-2.2/test/yacc_missing1.exp b/chall/ply-2.2/test/yacc_missing1.exp deleted file mode 100644 index de63d4f..0000000 --- a/chall/ply-2.2/test/yacc_missing1.exp +++ /dev/null @@ -1,2 +0,0 @@ -./yacc_missing1.py:25: Symbol 'location' used, but not defined as a token or a rule. -ply.yacc.YaccError: Unable to construct parser. diff --git a/chall/ply-2.2/test/yacc_missing1.py b/chall/ply-2.2/test/yacc_missing1.py deleted file mode 100644 index fbc54d8..0000000 --- a/chall/ply-2.2/test/yacc_missing1.py +++ /dev/null @@ -1,69 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_missing1.py -# -# Grammar with a missing rule -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 - -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : location EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - -def p_error(t): - print "Syntax error at '%s'" % t.value - -yacc.yacc() - - - - diff --git a/chall/ply-2.2/test/yacc_nodoc.exp b/chall/ply-2.2/test/yacc_nodoc.exp deleted file mode 100644 index 889ccfc..0000000 --- a/chall/ply-2.2/test/yacc_nodoc.exp +++ /dev/null @@ -1,2 +0,0 @@ -./yacc_nodoc.py:28: No documentation string specified in function 'p_statement_expr' -yacc: Generating LALR parsing table... diff --git a/chall/ply-2.2/test/yacc_nodoc.py b/chall/ply-2.2/test/yacc_nodoc.py deleted file mode 100644 index 4c5ab20..0000000 --- a/chall/ply-2.2/test/yacc_nodoc.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_nodoc.py -# -# Rule with a missing doc-string -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 - -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - -def p_error(t): - print "Syntax error at '%s'" % t.value - -yacc.yacc() - - - - diff --git a/chall/ply-2.2/test/yacc_noerror.exp b/chall/ply-2.2/test/yacc_noerror.exp deleted file mode 100644 index 658f907..0000000 --- a/chall/ply-2.2/test/yacc_noerror.exp +++ /dev/null @@ -1,2 +0,0 @@ -yacc: Generating LALR parsing table... -yacc: Warning. no p_error() function is defined. diff --git a/chall/ply-2.2/test/yacc_noerror.py b/chall/ply-2.2/test/yacc_noerror.py deleted file mode 100644 index 9c11838..0000000 --- a/chall/ply-2.2/test/yacc_noerror.py +++ /dev/null @@ -1,67 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_noerror.py -# -# No p_error() rule defined. -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 - -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - - -yacc.yacc() - - - - diff --git a/chall/ply-2.2/test/yacc_nop.exp b/chall/ply-2.2/test/yacc_nop.exp deleted file mode 100644 index 515fff7..0000000 --- a/chall/ply-2.2/test/yacc_nop.exp +++ /dev/null @@ -1,2 +0,0 @@ -./yacc_nop.py:28: Warning. Possible grammar rule 'statement_expr' defined without p_ prefix. -yacc: Generating LALR parsing table... diff --git a/chall/ply-2.2/test/yacc_nop.py b/chall/ply-2.2/test/yacc_nop.py deleted file mode 100644 index c0b431d..0000000 --- a/chall/ply-2.2/test/yacc_nop.py +++ /dev/null @@ -1,69 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_nop.py -# -# Possible grammar rule defined without p_ prefix -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 - -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def statement_expr(t): - 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - -def p_error(t): - print "Syntax error at '%s'" % t.value - -yacc.yacc() - - - - diff --git a/chall/ply-2.2/test/yacc_notfunc.exp b/chall/ply-2.2/test/yacc_notfunc.exp deleted file mode 100644 index f73bc93..0000000 --- a/chall/ply-2.2/test/yacc_notfunc.exp +++ /dev/null @@ -1,4 +0,0 @@ -yacc: Warning. 'p_statement_assign' not defined as a function -yacc: Warning. Token 'EQUALS' defined, but not used. -yacc: Warning. There is 1 unused token. -yacc: Generating LALR parsing table... diff --git a/chall/ply-2.2/test/yacc_notfunc.py b/chall/ply-2.2/test/yacc_notfunc.py deleted file mode 100644 index 8389355..0000000 --- a/chall/ply-2.2/test/yacc_notfunc.py +++ /dev/null @@ -1,67 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_notfunc.py -# -# p_rule not defined as a function -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 - -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -p_statement_assign = "Blah" - -def p_statement_expr(t): - 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - -def p_error(t): - print "Syntax error at '%s'" % t.value - -yacc.yacc() - - - - diff --git a/chall/ply-2.2/test/yacc_notok.exp b/chall/ply-2.2/test/yacc_notok.exp deleted file mode 100644 index d2399fe..0000000 --- a/chall/ply-2.2/test/yacc_notok.exp +++ /dev/null @@ -1 +0,0 @@ -ply.yacc.YaccError: module does not define a list 'tokens' diff --git a/chall/ply-2.2/test/yacc_notok.py b/chall/ply-2.2/test/yacc_notok.py deleted file mode 100644 index e566a1b..0000000 --- a/chall/ply-2.2/test/yacc_notok.py +++ /dev/null @@ -1,68 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_notok.py -# -# A grammar, but we forgot to import the tokens list -# ----------------------------------------------------------------------------- - -import sys -sys.tracebacklimit = 0 - -sys.path.insert(0,"..") -import ply.yacc as yacc - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - -def p_error(t): - print "Syntax error at '%s'" % t.value - -yacc.yacc() - - - - diff --git a/chall/ply-2.2/test/yacc_rr.exp b/chall/ply-2.2/test/yacc_rr.exp deleted file mode 100644 index f73cefd..0000000 --- a/chall/ply-2.2/test/yacc_rr.exp +++ /dev/null @@ -1,2 +0,0 @@ -yacc: Generating LALR parsing table... -yacc: 1 reduce/reduce conflict diff --git a/chall/ply-2.2/test/yacc_rr.py b/chall/ply-2.2/test/yacc_rr.py deleted file mode 100644 index bb8cba2..0000000 --- a/chall/ply-2.2/test/yacc_rr.py +++ /dev/null @@ -1,73 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_rr.py -# -# A grammar with a reduce/reduce conflict -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 - -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_assign_2(t): - 'statement : NAME EQUALS NUMBER' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - -def p_error(t): - print "Syntax error at '%s'" % t.value - -yacc.yacc() - - - - diff --git a/chall/ply-2.2/test/yacc_simple.exp b/chall/ply-2.2/test/yacc_simple.exp deleted file mode 100644 index 3836031..0000000 --- a/chall/ply-2.2/test/yacc_simple.exp +++ /dev/null @@ -1 +0,0 @@ -yacc: Generating LALR parsing table... diff --git a/chall/ply-2.2/test/yacc_simple.py b/chall/ply-2.2/test/yacc_simple.py deleted file mode 100644 index b5dc9f3..0000000 --- a/chall/ply-2.2/test/yacc_simple.py +++ /dev/null @@ -1,69 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_simple.py -# -# A simple, properly specifier grammar -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 - -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - -def p_error(t): - print "Syntax error at '%s'" % t.value - -yacc.yacc() - - - - diff --git a/chall/ply-2.2/test/yacc_sr.exp b/chall/ply-2.2/test/yacc_sr.exp deleted file mode 100644 index 1b76450..0000000 --- a/chall/ply-2.2/test/yacc_sr.exp +++ /dev/null @@ -1,2 +0,0 @@ -yacc: Generating LALR parsing table... -yacc: 20 shift/reduce conflicts diff --git a/chall/ply-2.2/test/yacc_sr.py b/chall/ply-2.2/test/yacc_sr.py deleted file mode 100644 index e2f03ec..0000000 --- a/chall/ply-2.2/test/yacc_sr.py +++ /dev/null @@ -1,64 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_sr.py -# -# A grammar with shift-reduce conflicts -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 - -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - -def p_error(t): - print "Syntax error at '%s'" % t.value - -yacc.yacc() - - - - diff --git a/chall/ply-2.2/test/yacc_term1.exp b/chall/ply-2.2/test/yacc_term1.exp deleted file mode 100644 index 40f9bdf..0000000 --- a/chall/ply-2.2/test/yacc_term1.exp +++ /dev/null @@ -1,2 +0,0 @@ -./yacc_term1.py:25: Illegal rule name 'NUMBER'. Already defined as a token. -ply.yacc.YaccError: Unable to construct parser. diff --git a/chall/ply-2.2/test/yacc_term1.py b/chall/ply-2.2/test/yacc_term1.py deleted file mode 100644 index bbc52da..0000000 --- a/chall/ply-2.2/test/yacc_term1.py +++ /dev/null @@ -1,69 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_term1.py -# -# Terminal used on the left-hand-side -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 - -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'NUMBER : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - -def p_error(t): - print "Syntax error at '%s'" % t.value - -yacc.yacc() - - - - diff --git a/chall/ply-2.2/test/yacc_unused.exp b/chall/ply-2.2/test/yacc_unused.exp deleted file mode 100644 index 6caafd2..0000000 --- a/chall/ply-2.2/test/yacc_unused.exp +++ /dev/null @@ -1,4 +0,0 @@ -./yacc_unused.py:63: Symbol 'COMMA' used, but not defined as a token or a rule. -yacc: Symbol 'COMMA' is unreachable. -yacc: Symbol 'exprlist' is unreachable. -ply.yacc.YaccError: Unable to construct parser. diff --git a/chall/ply-2.2/test/yacc_unused.py b/chall/ply-2.2/test/yacc_unused.py deleted file mode 100644 index 3a61f99..0000000 --- a/chall/ply-2.2/test/yacc_unused.py +++ /dev/null @@ -1,78 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_unused.py -# -# A grammar with an unused rule -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 - -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - -def p_expr_list(t): - 'exprlist : exprlist COMMA expression' - pass - -def p_expr_list_2(t): - 'exprlist : expression' - pass - - -def p_error(t): - print "Syntax error at '%s'" % t.value - -yacc.yacc() - - - - diff --git a/chall/ply-2.2/test/yacc_uprec.exp b/chall/ply-2.2/test/yacc_uprec.exp deleted file mode 100644 index eb9a398..0000000 --- a/chall/ply-2.2/test/yacc_uprec.exp +++ /dev/null @@ -1,2 +0,0 @@ -./yacc_uprec.py:38: Nothing known about the precedence of 'UMINUS' -ply.yacc.YaccError: Unable to construct parser. diff --git a/chall/ply-2.2/test/yacc_uprec.py b/chall/ply-2.2/test/yacc_uprec.py deleted file mode 100644 index 0e8711e..0000000 --- a/chall/ply-2.2/test/yacc_uprec.py +++ /dev/null @@ -1,64 +0,0 @@ -# ----------------------------------------------------------------------------- -# yacc_uprec.py -# -# A grammar with a bad %prec specifier -# ----------------------------------------------------------------------------- -import sys -sys.tracebacklimit = 0 - -sys.path.insert(0,"..") -import ply.yacc as yacc - -from calclex import tokens - -# Parsing rules - -# dictionary of names -names = { } - -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] - -def p_statement_expr(t): - 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[3] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' - try: - t[0] = names[t[1]] - except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 - -def p_error(t): - print "Syntax error at '%s'" % t.value - -yacc.yacc() - - - - |
