summaryrefslogtreecommitdiffstats
path: root/chall/lolpython.py
diff options
context:
space:
mode:
Diffstat (limited to 'chall/lolpython.py')
-rw-r--r--chall/lolpython.py768
1 files changed, 0 insertions, 768 deletions
diff --git a/chall/lolpython.py b/chall/lolpython.py
deleted file mode 100644
index 6337124..0000000
--- a/chall/lolpython.py
+++ /dev/null
@@ -1,768 +0,0 @@
-#!/usr/bin/env python
-# Implementation of the LOLPython language.
-# Converts from LOLPython to Python then optionally runs the Python.
-
-# This package depends on PLY -- http://www.dabeaz.com/ply/
-
-# Written by Andrew Dalke <dalke@dalkescientific.com>
-# Dalke Scientific Software, LLC
-# 1 June 2007, Gothenburg, Sweden
-#
-# This software is in the public domain. For details see:
-# http://creativecommons.org/licenses/publicdomain/
-
-
-import sys
-import keyword
-import os
-import types
-from cStringIO import StringIO
-from ply import *
-
-
-__NAME__ = "lolpython"
-__VERSION__ = "1.0"
-
-# Translating LOLPython tokens to Python tokens
-# This could be cleaned up. For example, some of
-# these tokens could be merged into one.
-tokens = (
- "NAME", # variable names
- "RESERVED", # Used for Python reserved names
- "NUMBER", # Integers and floats
- "STRING",
- "OP", # Like the Python OP
- "CLOSE", # Don't really need this..
-
- "COMMENT",
- "AUTOCALL", # write t.value then add '('
- "INLINE", # write t.value directly
- "FUTURE", # for the "I FUTURE CAT WITH" statement
- "PRINT", # VISIBLE -> stdout or COMPLAIN -> stderr
-
- "ENDMARKER",
- "COLON",
- "WS",
- "NEWLINE",
-)
-
-# Helper functions for making given token types
-def OP(t, value):
- t.type = "OP"
- t.value = value
- return t
-
-def RESERVED(t, value):
- t.type = "RESERVED"
- t.value = value
- return t
-
-def AUTOCALL(t, value):
- t.type = "AUTOCALL"
- t.value = "tuple"
- t.lexer.paren_stack.append(")")
- return t
-
-def INLINE(t, value):
- t.type = "INLINE"
- t.value = value
- return t
-
-#####
-
-# ply uses a large regex for token detection, and sre is limited to
-# 100 groups. This grammar pushes the limit. I use (?:non-grouping)
-# parens to keep the count down.
-
-
-def t_ASSIGN(t): # cannot be a simple pattern because it must
- r'CAN[ ]+HA[SZ]\b' # come before the t_NAME definition
- return OP(t, "=")
-
-def t_SINGLE_QUOTE_STRING(t):
- r"'([^\\']+|\\'|\\\\)*'" # I think this is right ...
- t.type = "STRING"
- t.value = t.value[1:-1].decode("string-escape")
- return t
-
-def t_DOUBLE_QUOTE_STRING(t):
- r'"([^\\"]+|\\"|\\\\)*"'
- t.type = "STRING"
- t.value = t.value[1:-1].decode("string-escape")
- print(t.value)
- return t
-
-# and LOL quoted strings! They end with /LOL
-# No way to have "/LOL" in the string.
-def t_LOL_STRING(t):
- r"LOL[ ]*((?!/LOL).|\n)*[ ]*/LOL"
- t.type = "STRING"
- t.value = t.value[3:-4].strip(" ")
- return t
-
-# Aliases for the same thing - for extra cuteness
-def t_LSQUARE(t):
- r"(?:SOME|LOOK[ ]AT|LET[ ]+THE)\b"
- t.lexer.paren_stack.append(']')
- return OP(t, "[")
-
-def t_LPAREN(t):
- r"(?:WIT|THEZ)\b"
- t.lexer.paren_stack.append(')')
- return OP(t, "(")
-
-def t_LBRACE(t):
- r"BUCKET\b"
- t.lexer.paren_stack.append("}")
- return OP(t, "{")
-
-def t_CLOSE(t):
- r"(?:OK(!+|\b)|!+)"
- stack = t.lexer.paren_stack
- if t.value.startswith("OK"):
- num_closes = len(t.value)-1 # OK -> 1, OK! -> 2, OK!!->3
- else:
- num_closes = len(t.value) # ! -> 1, !! -> 2
- # Which close is this? I use "OK" to match (, [ and {
- if len(stack) < num_closes:
- raise AssertionError("not enough opens on the stack: line %d"
- % (t.lineno,))
- t.value = "".join(stack[-num_closes:][::-1])
- del stack[-num_closes:]
- return t
-
-def t_EQ(t):
- r"KINDA[ ]+LIKE\b"
- return OP(t, "==")
-
-def t_NE(t):
- r"(?:KINDA[ ]+)?NOT[ ]+LIKE\b"
- return OP(t, "!=")
-
-def t_is(t):
- r"KINDA[ ]+IS\b"
- return RESERVED(t, "is")
-
-def t_GT(t):
- r"ATE[ ]+MORE[ ]+CHEEZBURGERS?[ ]+THAN\b"
- return OP(t, ">")
-
-def t_LT(t):
- r"ATE[ ]+FEWER[ ]+CHEEZBURGERS?[ ]+THAN\b"
- return OP(t, "<")
-
-def t_GTE(t):
- r"BIG[ ]+LIKE\b"
- return OP(t, ">=")
-
-def t_LTE(t):
- r"SMALL[ ]+LIKE\b"
- return OP(t, "<=")
-
-def t_RETURN(t):
- r"U[ ]+TAKE\b"
- return RESERVED(t, "return")
-
-def t_yield(t):
- r"U[ ]+BORROW\b"
- return RESERVED(t, "yield")
-
-def t_ELIF(t):
- r"OR[ ]+IZ\b"
- return RESERVED(t, "elif")
-
-def t_ELSE(t):
- r"(?:(?:I[ ]+GIVE[ ]+UP|IZ[ ]+KEWL|ALL[ ]+DONE)|NOPE)\b"
- return RESERVED(t, "else")
-
-def t_COLON(t):
- r"\?"
- t.value = ":"
- return t
-
-def t_FROM(t):
- r"IN[ ]+MAI\b"
- return RESERVED(t, "from")
-
-def t_EXCEPT(t):
- r"O[ ]+NOES\b"
- return RESERVED(t, "except")
-
-def t_PLUS(t):
- r"ALONG[ ]+WITH\b"
- return OP(t, "+")
-def t_MINUS(t):
- r"TAKE[ ]+AWAY\b"
- return OP(t, "-")
-
-def t_PLUS_EQUAL(t):
- r"GETZ[ ]+ANOTHR\b"
- return OP(t, "+=")
-
-def t_MINUS_EQUAL(t):
- r"THROW[SZ]?[ ]+AWAY\b"
- return OP(t, "-=")
-
-def t_DIV(t):
- r"SMASHES[ ]+INTO\b"
- return OP(t, "/")
-def t_DIV_EQUAL(t):
- r"SMASHES[ ]+INTO[ ]+HAS\b"
- return OP(t, "/=")
-def t_TRUEDIV(t):
- r"SMASHES[ ]+NICELY[ ]+INTO\b"
- return OP(t, "//")
-def t_MUL(t):
- r"OF[ ]THOSE\b"
- return OP(t, "*")
-def t_MUL_EQUAL(t):
- r"COPIES[ ]+(?:HIM|HER|IT)SELF[ ]+BY\b"
- return OP(t, "*=")
-def t_POW(t):
- r"BY[ ]+GRAYSKULL[ ]+POWER"
- return OP(t, "**")
-def t_IN(t):
- r"IN[ ]+(?:UR|THE|THIS)\b"
- return OP(t, "in")
-def t_del(t):
- r"DO[ ]+NOT[ ]+WANT\b"
- return RESERVED(t, "del")
-def t_and(t):
- r"\&"
- return RESERVED(t, "and")
-def t_or(t):
- r"OR[ ]+MABEE\b"
- return RESERVED(t, "or")
-
-def t_pass(t):
- r"I[ ]+IZ[ ]+CUTE\b"
- return RESERVED(t, "pass")
-
-def t_forever(t):
- r"WHILE[ ]+I[ ]+CUTE\b"
- return INLINE(t, "while 1")
-
-def t_def(t):
- r"SO[ ]+IM[ ]+LIKE\b"
- return RESERVED(t, "def")
-
-def t_class(t):
- r"ME[ ]+MAKE[ ]\b"
- return RESERVED(t, "class")
-
-def t_future(t):
- r"I[ ]+FUTURE[ ]+CAT[ ]+WITH\b"
- t.type = "FUTURE"
- return t
-
-def t_assert(t):
- r"SO[ ]+GOOD\b"
- return RESERVED(t, "assert")
-
-def t_assert_not(t):
- r"AINT[ ]+GOOD\b"
- return INLINE(t, "assert not ")
-
-def t_for(t):
- r"GIMME[ ]+EACH\b"
- return RESERVED(t, "for")
-
-def t_list(t):
- r"ALL[ ]+OF\b"
- return AUTOCALL(t, "tuple")
-
-RESERVED_VALUES = {
- "EASTERBUNNY": ("NUMBER", "0"),
- "CHEEZBURGER": ("NUMBER", "1"),
- "CHOKOLET": ("NUMBER", "-1"),
- "TWIN": ("NUMBER", "2"),
- "TWINZ": ("NUMBER", "2"),
- "TWINS": ("NUMBER", "2"),
- "EVILTWIN": ("NUMBER", "-2"),
- "EVILTWINZ": ("NUMBER", "-2"),
- "EVILTWINS": ("NUMBER", "-2"),
- "ALLFINGERZ": ("NUMBER", "10"),
- "TOEZ": ("NUMBER", "-10"),
- "ONE": ("NUMBER", "1"),
- "ONCE": ("NUMBER", "1"),
- "TWO": ("NUMBER", "2"),
- "TWICE": ("NUMBER", "2"),
- "THR33": ("NUMBER", "3"),
- "FOUR": ("NUMBER", "4"),
- "FIV": ("NUMBER", "5"),
- "SIKS": ("NUMBER", "6"),
- "SEVN": ("NUMBER", "7"),
- "ATE": ("NUMBER", "8"),
- "NINE": ("NUMBER", "9"),
- "MEH": ("NAME", "False"),
- "YEAH": ("NAME", "True"),
- "VISIBLE": ("PRINT", "stdout"),
- "COMPLAIN": ("PRINT", "stderr"),
- "AND": ("OP", ","),
- "BLACKHOLE": ("RESERVED", "ZeroDivisionError"),
- "DONOTLIKE": ("AUTOCALL", "AssertionError"),
-
- "ANTI": ("OP", "-"),
- "IZ": ("RESERVED", "if"),
- "GIMME": ("RESERVED", "import"),
- "LIKE": ("RESERVED", "as"),
- "OWN": ("OP", "."),
-
- "PLZ": ("RESERVED", "try"),
- "HALP": ("RESERVED", "raise"),
- "WHATEVER": ("RESERVED", "finally"),
- "KTHX": ("RESERVED", "continue"),
- "KTHXBYE": ("RESERVED", "break"),
-
- "OVER": ("OP", "/"),
-
- "AINT": ("RESERVED", "not"),
- "ME": ("RESERVED", "self"),
-
- "STRING": ("AUTOCALL", "str"),
- "NUMBR": ("AUTOCALL", "int"),
- "BIGNESS": ("AUTOCALL", "len"),
- "NUMBRZ": ("AUTOCALL", "range"),
- "ADDED": ("AUTOCALL", ".append"),
-
- "ARGZ": ("INLINE", "_lol_sys.argv"),
- "THINGZ": ("INLINE", "()"), # invisible tuple didn't sound right
- "THING": ("INLINE", "()"), # sometimes it's better in singular form
- "MY": ("INLINE", "self."),
- "MYSELF": ("INLINE", "(self)"),
-
- "EVEN": ("INLINE", "% 2 == 0"),
- "ODD": ("INLINE", "% 2 == 1"),
- "WIF": ("RESERVED", "with"),
- }
-
-def t_FLOAT(t):
- r"""(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][-+]? \d+)?"""
- t.value = t.value
- t.type = "NUMBER"
- return t
-
-def t_INT(t):
- r"\d+"
- t.type = "NUMBER"
- return t
-
-def t_INVISIBLE(t):
- r"INVISIBLE([ ]+(LIST|STRING|BUCKET))?\b"
- if "LIST" in t.value:
- t.type = "INLINE"
- t.value = "[]"
- elif "STRING" in t.value:
- t.type = "INLINE"
- t.value = '""'
- elif "BUCKET" in t.value:
- t.type = "INLINE"
- t.value = "{}"
- else:
- RESERVED(t, "None")
- return t
-
-# Not consuming the newline. Needed for "IZ EASTERBUNNY? BTW comment"
-def t_COMMENT(t):
- r"[ ]*(?:BTW|WTF)[^\n]*"
- return t
-
-def t_NAME(t):
- r'[a-zA-Z_][a-zA-Z0-9_]*'
- if t.value in RESERVED_VALUES:
- type, value = RESERVED_VALUES[t.value]
- t.type = type
- t.value = value
- if t.type == "AUTOCALL":
- t.lexer.paren_stack.append(")")
- return t
-
-def t_WS(t):
- r' [ ]+ '
- if t.lexer.at_line_start and not t.lexer.paren_stack:
- return t
-
-
-# Don't generate newline tokens when inside of parens
-def t_newline(t):
- r'\n+'
- t.lexer.lineno += len(t.value)
- t.type = "NEWLINE"
- if not t.lexer.paren_stack:
- return t
-
-
-def t_error(t):
- raise SyntaxError("Unknown symbol %r" % (t.value[0],))
- print "Skipping", repr(t.value[0])
- t.lexer.skip(1)
-
-
-## I implemented INDENT / DEDENT generation as a post-processing filter
-
-# The original lex token stream contains WS and NEWLINE characters.
-# WS will only occur before any other tokens on a line.
-
-# I have three filters. One tags tokens by adding two attributes.
-# "must_indent" is True if the token must be indented from the
-# previous code. The other is "at_line_start" which is True for WS
-# and the first non-WS/non-NEWLINE on a line. It flags the check so
-# see if the new line has changed indication level.
-
-# Python's syntax has three INDENT states
-# 0) no colon hence no need to indent
-# 1) "if 1: go()" - simple statements have a COLON but no need for an indent
-# 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indent
-NO_INDENT = 0
-MAY_INDENT = 1
-MUST_INDENT = 2
-
-# only care about whitespace at the start of a line
-def track_tokens_filter(lexer, tokens):
- lexer.at_line_start = at_line_start = True
- indent = NO_INDENT
- for token in tokens:
- token.at_line_start = at_line_start
-
- if token.type == "COLON":
- at_line_start = False
- indent = MAY_INDENT
- token.must_indent = False
-
- elif token.type == "NEWLINE":
- at_line_start = True
- if indent == MAY_INDENT:
- indent = MUST_INDENT
- token.must_indent = False
-
- elif token.type == "WS":
- assert token.at_line_start == True
- at_line_start = True
- token.must_indent = False
-
- elif token.type == "COMMENT":
- pass
-
- else:
- # A real token; only indent after COLON NEWLINE
- if indent == MUST_INDENT:
- token.must_indent = True
- else:
- token.must_indent = False
- at_line_start = False
-
- indent = NO_INDENT
-
- yield token
- lexer.at_line_start = at_line_start
-
-def _new_token(type, lineno):
- tok = lex.LexToken()
- tok.type = type
- tok.value = None
- tok.lineno = lineno
- tok.lexpos = -1
- return tok
-
-# Synthesize a DEDENT tag
-def DEDENT(lineno):
- return _new_token("DEDENT", lineno)
-
-# Synthesize an INDENT tag
-def INDENT(lineno):
- return _new_token("INDENT", lineno)
-
-
-# Track the indentation level and emit the right INDENT / DEDENT events.
-def indentation_filter(tokens):
- # A stack of indentation levels; will never pop item 0
- levels = [0]
- token = None
- depth = 0
- prev_was_ws = False
- for token in tokens:
-## if 1:
-## print "Process", token,
-## if token.at_line_start:
-## print "at_line_start",
-## if token.must_indent:
-## print "must_indent",
-## print
-
- # WS only occurs at the start of the line
- # There may be WS followed by NEWLINE so
- # only track the depth here. Don't indent/dedent
- # until there's something real.
- if token.type == "WS":
- assert depth == 0
- depth = len(token.value)
- prev_was_ws = True
- # Don't forward WS to the parser
- continue
-
- if token.type == "NEWLINE":
- depth = 0
- if prev_was_ws or token.at_line_start:
- # ignore blank lines
- continue
- # pass the other cases on through
- yield token
- continue
-
- if token.type == "COMMENT":
- yield token
- continue
-
- # then it must be a real token (not WS, not NEWLINE)
- # which can affect the indentation level
-
- prev_was_ws = False
- if token.must_indent:
- # The current depth must be larger than the previous level
- if not (depth > levels[-1]):
- raise IndentationError("expected an indented block")
-
- levels.append(depth)
- yield INDENT(token.lineno)
-
- elif token.at_line_start:
- # Must be on the same level or one of the previous levels
- if depth == levels[-1]:
- # At the same level
- pass
- elif depth > levels[-1]:
- raise IndentationError("indentation increase but not in new block")
- else:
- # Back up; but only if it matches a previous level
- try:
- i = levels.index(depth)
- except ValueError:
- raise IndentationError("inconsistent indentation")
- for _ in range(i+1, len(levels)):
- yield DEDENT(token.lineno)
- levels.pop()
-
- yield token
-
- ### Finished processing ###
-
- # Must dedent any remaining levels
- if len(levels) > 1:
- assert token is not None
- for _ in range(1, len(levels)):
- yield DEDENT(token.lineno)
-
-
-# The top-level filter adds an ENDMARKER, if requested.
-# Python's grammar uses it.
-def token_filter(lexer, add_endmarker = True):
- token = None
- tokens = iter(lexer.token, None)
- tokens = track_tokens_filter(lexer, tokens)
- for token in indentation_filter(tokens):
- yield token
-
- if add_endmarker:
- lineno = 1
- if token is not None:
- lineno = token.lineno
- yield _new_token("ENDMARKER", lineno)
-
-class LOLLexer(object):
- def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0):
- self.lexer = lex.lex(debug=debug, optimize=optimize,
- lextab=lextab, reflags=reflags)
- self.token_stream = None
- def input(self, s, add_endmarker=True):
- self.lexer.paren_stack = []
- self.lexer.input(s)
- self.token_stream = token_filter(self.lexer, add_endmarker)
- def token(self):
- try:
- return self.token_stream.next()
- except StopIteration:
- return None
-
-# Helper class to generate logically correct indented Python code
-class IndentWriter(object):
- def __init__(self, outfile):
- self.outfile = outfile
- self.at_first_column = True
- self.indent = 0
- def write(self, text):
- if self.at_first_column:
- self.outfile.write(" "*self.indent)
- self.at_first_column = False
- self.outfile.write(text)
-
-# Split things up because the from __future__ statements must
-# go before any other code.
-HEADER = """# LOLPython to Python converter version 1.0
-# Written by Andrew Dalke, who should have been working on better things.
-
-"""
-
-BODY = """
-# sys is used for COMPLAIN and ARGZ
-import sys as _lol_sys
-
-"""
-
-def to_python(s):
- L = LOLLexer()
- L.input(s)
-
- header = StringIO()
- header.write(HEADER)
- header_output = IndentWriter(header)
-
- body = StringIO()
- body.write(BODY)
- body_output = IndentWriter(body)
-
- write = body_output.write
- output = body_output
-
- for t in iter(L.token_stream):
- if t.type == "NAME":
- # Need to escape names which are Python variables Do that
- # by appending an "_". But then I also need to make sure
- # that "yield_" does not collide with "yield". And you
- # thought you were being clever trying to use a Python
- # variable. :)
- name = t.value.rstrip("_")
- if name in keyword.kwlist:
- write(t.value + "_ ")
- else:
- write(t.value + " ")
-
- elif t.type in ("RESERVED", "OP", "NUMBER", "CLOSE"):
- # While not pretty, I'll put a space after each
- # term because it's the simplest solution. Otherwise
- # I'll need to track the amount of whitespace between
- # the tokens in the original text.
- write(t.value+" ")
-
- # XXX escape names which are special in Python!
- elif t.type == "STRING":
- write(repr(t.value) + " ")
-
- elif t.type == "COMMENT":
- # Not enough information to keep comments on the correct
- # indentation level. This is good enough. Ugly though.
- # Maybe I need to fix the tokenizer.
- write("#"+ t.value[3:]+"\n")
- output.at_first_column = True
-
- elif t.type == "COLON":
- write(":")
-
- elif t.type == "INDENT":
- output.indent += 1
- pass
- elif t.type == "DEDENT":
- output.indent -= 1
- pass
- elif t.type == "NEWLINE":
- write(t.value)
- output.at_first_column = True
- output = body_output
- write = output.write
- elif t.type == "PRINT":
- if t.value == "stdout":
- write("print ")
- elif t.value == "stderr":
- write("print >>_lol_sys.stderr, ")
- else:
- raise AssertionError(t.value)
- elif t.type == "AUTOCALL":
- write(t.value + "(")
- elif t.type == "INLINE":
- write(t.value)
- elif t.type == "ENDMARKER":
- write("\n# The end.\n")
- elif t.type == "WS":
- output.leading_ws = t.value
- elif t.type == "FUTURE":
- # Write to the header. This is a hack. Err, a hairball.
- output = header_output
- write = output.write
- write("from __future__ import ")
-
- else:
- raise AssertionError(t.type)
-
- return header.getvalue() + body.getvalue()
-
-
-# API code for doing the translation and exec'ing the result
-
-def execfile(infile, module_name="__lolmain__"):
- "file, module_name -- exec the lolpython file in a newly created module"
- if not hasattr(infile, "read"):
- s = open(infile).read()
- else:
- s = infile.read()
- return execstring(s, module_name)
-
-def execstring(s, module_name="__lolmain__"):
- "s, module_name -- exec the lolpython string in a newly created module"
- python_s = to_python(s)
- # Doing this bit of trickiness so I can have LOLPython code act
- # like __main__. This fix is enough to fool unittest.
- m = types.ModuleType(module_name)
- sys.modules[module_name] = m
- exec python_s in m.__dict__
- return m
-
-def convert_file(infile, outfile):
- "read LOLPython code from infile, write converted Python code to outfile"
- if not hasattr(outfile, "write"):
- outfile = open(outfile, "w")
- outfile.write(to_python(infile.read()))
-
-def convert(filenames):
- "convert LOLPython filenames into corresponding Python '.py' files"
- if not filenames:
- convert_file(sys.stdin, sys.stdout)
- else:
- for filename in filenames:
- base, ext = os.path.splitext(filename)
- convert_file(open(filename), open(base+".py", "w"))
-
-def help():
- print """convert and run a lolpython program
-Commands are:
- lolpython Read a lolpython program from stdin and execute it
- lolpython --convert Convert a lolpython program from stdin
- and generate python to stdout
- lolpython --convert filename1 [filename....]
- Convert a list of lolpython files into Python files
- lolpython filename [arg1 [arg2 ...]]
- Run a lolpython program using optional arguments
-"""
-
-def main(argv):
- if len(argv) >= 2:
- if argv[1] == "--convert":
- convert(argv[2:])
- return
- if argv[1] == "--help":
- help()
- return
- if argv[1] == "--version":
- print __NAME__ + " " + __VERSION__
- return
-
- # otherwise, run the lolpython program
- sys.argv = sys.argv[1:]
- filename = sys.argv[0]
- execfile(filename, "__main__")
- else:
- # commands from stdin
- execfile(sys.stdin)
-
-
-
-if __name__ == "__main__":
- main(sys.argv)