diff options
Diffstat (limited to 'chall/lolpython.py')
| -rw-r--r-- | chall/lolpython.py | 768 |
1 files changed, 0 insertions, 768 deletions
diff --git a/chall/lolpython.py b/chall/lolpython.py deleted file mode 100644 index 6337124..0000000 --- a/chall/lolpython.py +++ /dev/null @@ -1,768 +0,0 @@ -#!/usr/bin/env python -# Implementation of the LOLPython language. -# Converts from LOLPython to Python then optionally runs the Python. - -# This package depends on PLY -- http://www.dabeaz.com/ply/ - -# Written by Andrew Dalke <dalke@dalkescientific.com> -# Dalke Scientific Software, LLC -# 1 June 2007, Gothenburg, Sweden -# -# This software is in the public domain. For details see: -# http://creativecommons.org/licenses/publicdomain/ - - -import sys -import keyword -import os -import types -from cStringIO import StringIO -from ply import * - - -__NAME__ = "lolpython" -__VERSION__ = "1.0" - -# Translating LOLPython tokens to Python tokens -# This could be cleaned up. For example, some of -# these tokens could be merged into one. -tokens = ( - "NAME", # variable names - "RESERVED", # Used for Python reserved names - "NUMBER", # Integers and floats - "STRING", - "OP", # Like the Python OP - "CLOSE", # Don't really need this.. - - "COMMENT", - "AUTOCALL", # write t.value then add '(' - "INLINE", # write t.value directly - "FUTURE", # for the "I FUTURE CAT WITH" statement - "PRINT", # VISIBLE -> stdout or COMPLAIN -> stderr - - "ENDMARKER", - "COLON", - "WS", - "NEWLINE", -) - -# Helper functions for making given token types -def OP(t, value): - t.type = "OP" - t.value = value - return t - -def RESERVED(t, value): - t.type = "RESERVED" - t.value = value - return t - -def AUTOCALL(t, value): - t.type = "AUTOCALL" - t.value = "tuple" - t.lexer.paren_stack.append(")") - return t - -def INLINE(t, value): - t.type = "INLINE" - t.value = value - return t - -##### - -# ply uses a large regex for token detection, and sre is limited to -# 100 groups. This grammar pushes the limit. I use (?:non-grouping) -# parens to keep the count down. - - -def t_ASSIGN(t): # cannot be a simple pattern because it must - r'CAN[ ]+HA[SZ]\b' # come before the t_NAME definition - return OP(t, "=") - -def t_SINGLE_QUOTE_STRING(t): - r"'([^\\']+|\\'|\\\\)*'" # I think this is right ... - t.type = "STRING" - t.value = t.value[1:-1].decode("string-escape") - return t - -def t_DOUBLE_QUOTE_STRING(t): - r'"([^\\"]+|\\"|\\\\)*"' - t.type = "STRING" - t.value = t.value[1:-1].decode("string-escape") - print(t.value) - return t - -# and LOL quoted strings! They end with /LOL -# No way to have "/LOL" in the string. -def t_LOL_STRING(t): - r"LOL[ ]*((?!/LOL).|\n)*[ ]*/LOL" - t.type = "STRING" - t.value = t.value[3:-4].strip(" ") - return t - -# Aliases for the same thing - for extra cuteness -def t_LSQUARE(t): - r"(?:SOME|LOOK[ ]AT|LET[ ]+THE)\b" - t.lexer.paren_stack.append(']') - return OP(t, "[") - -def t_LPAREN(t): - r"(?:WIT|THEZ)\b" - t.lexer.paren_stack.append(')') - return OP(t, "(") - -def t_LBRACE(t): - r"BUCKET\b" - t.lexer.paren_stack.append("}") - return OP(t, "{") - -def t_CLOSE(t): - r"(?:OK(!+|\b)|!+)" - stack = t.lexer.paren_stack - if t.value.startswith("OK"): - num_closes = len(t.value)-1 # OK -> 1, OK! -> 2, OK!!->3 - else: - num_closes = len(t.value) # ! -> 1, !! -> 2 - # Which close is this? I use "OK" to match (, [ and { - if len(stack) < num_closes: - raise AssertionError("not enough opens on the stack: line %d" - % (t.lineno,)) - t.value = "".join(stack[-num_closes:][::-1]) - del stack[-num_closes:] - return t - -def t_EQ(t): - r"KINDA[ ]+LIKE\b" - return OP(t, "==") - -def t_NE(t): - r"(?:KINDA[ ]+)?NOT[ ]+LIKE\b" - return OP(t, "!=") - -def t_is(t): - r"KINDA[ ]+IS\b" - return RESERVED(t, "is") - -def t_GT(t): - r"ATE[ ]+MORE[ ]+CHEEZBURGERS?[ ]+THAN\b" - return OP(t, ">") - -def t_LT(t): - r"ATE[ ]+FEWER[ ]+CHEEZBURGERS?[ ]+THAN\b" - return OP(t, "<") - -def t_GTE(t): - r"BIG[ ]+LIKE\b" - return OP(t, ">=") - -def t_LTE(t): - r"SMALL[ ]+LIKE\b" - return OP(t, "<=") - -def t_RETURN(t): - r"U[ ]+TAKE\b" - return RESERVED(t, "return") - -def t_yield(t): - r"U[ ]+BORROW\b" - return RESERVED(t, "yield") - -def t_ELIF(t): - r"OR[ ]+IZ\b" - return RESERVED(t, "elif") - -def t_ELSE(t): - r"(?:(?:I[ ]+GIVE[ ]+UP|IZ[ ]+KEWL|ALL[ ]+DONE)|NOPE)\b" - return RESERVED(t, "else") - -def t_COLON(t): - r"\?" - t.value = ":" - return t - -def t_FROM(t): - r"IN[ ]+MAI\b" - return RESERVED(t, "from") - -def t_EXCEPT(t): - r"O[ ]+NOES\b" - return RESERVED(t, "except") - -def t_PLUS(t): - r"ALONG[ ]+WITH\b" - return OP(t, "+") -def t_MINUS(t): - r"TAKE[ ]+AWAY\b" - return OP(t, "-") - -def t_PLUS_EQUAL(t): - r"GETZ[ ]+ANOTHR\b" - return OP(t, "+=") - -def t_MINUS_EQUAL(t): - r"THROW[SZ]?[ ]+AWAY\b" - return OP(t, "-=") - -def t_DIV(t): - r"SMASHES[ ]+INTO\b" - return OP(t, "/") -def t_DIV_EQUAL(t): - r"SMASHES[ ]+INTO[ ]+HAS\b" - return OP(t, "/=") -def t_TRUEDIV(t): - r"SMASHES[ ]+NICELY[ ]+INTO\b" - return OP(t, "//") -def t_MUL(t): - r"OF[ ]THOSE\b" - return OP(t, "*") -def t_MUL_EQUAL(t): - r"COPIES[ ]+(?:HIM|HER|IT)SELF[ ]+BY\b" - return OP(t, "*=") -def t_POW(t): - r"BY[ ]+GRAYSKULL[ ]+POWER" - return OP(t, "**") -def t_IN(t): - r"IN[ ]+(?:UR|THE|THIS)\b" - return OP(t, "in") -def t_del(t): - r"DO[ ]+NOT[ ]+WANT\b" - return RESERVED(t, "del") -def t_and(t): - r"\&" - return RESERVED(t, "and") -def t_or(t): - r"OR[ ]+MABEE\b" - return RESERVED(t, "or") - -def t_pass(t): - r"I[ ]+IZ[ ]+CUTE\b" - return RESERVED(t, "pass") - -def t_forever(t): - r"WHILE[ ]+I[ ]+CUTE\b" - return INLINE(t, "while 1") - -def t_def(t): - r"SO[ ]+IM[ ]+LIKE\b" - return RESERVED(t, "def") - -def t_class(t): - r"ME[ ]+MAKE[ ]\b" - return RESERVED(t, "class") - -def t_future(t): - r"I[ ]+FUTURE[ ]+CAT[ ]+WITH\b" - t.type = "FUTURE" - return t - -def t_assert(t): - r"SO[ ]+GOOD\b" - return RESERVED(t, "assert") - -def t_assert_not(t): - r"AINT[ ]+GOOD\b" - return INLINE(t, "assert not ") - -def t_for(t): - r"GIMME[ ]+EACH\b" - return RESERVED(t, "for") - -def t_list(t): - r"ALL[ ]+OF\b" - return AUTOCALL(t, "tuple") - -RESERVED_VALUES = { - "EASTERBUNNY": ("NUMBER", "0"), - "CHEEZBURGER": ("NUMBER", "1"), - "CHOKOLET": ("NUMBER", "-1"), - "TWIN": ("NUMBER", "2"), - "TWINZ": ("NUMBER", "2"), - "TWINS": ("NUMBER", "2"), - "EVILTWIN": ("NUMBER", "-2"), - "EVILTWINZ": ("NUMBER", "-2"), - "EVILTWINS": ("NUMBER", "-2"), - "ALLFINGERZ": ("NUMBER", "10"), - "TOEZ": ("NUMBER", "-10"), - "ONE": ("NUMBER", "1"), - "ONCE": ("NUMBER", "1"), - "TWO": ("NUMBER", "2"), - "TWICE": ("NUMBER", "2"), - "THR33": ("NUMBER", "3"), - "FOUR": ("NUMBER", "4"), - "FIV": ("NUMBER", "5"), - "SIKS": ("NUMBER", "6"), - "SEVN": ("NUMBER", "7"), - "ATE": ("NUMBER", "8"), - "NINE": ("NUMBER", "9"), - "MEH": ("NAME", "False"), - "YEAH": ("NAME", "True"), - "VISIBLE": ("PRINT", "stdout"), - "COMPLAIN": ("PRINT", "stderr"), - "AND": ("OP", ","), - "BLACKHOLE": ("RESERVED", "ZeroDivisionError"), - "DONOTLIKE": ("AUTOCALL", "AssertionError"), - - "ANTI": ("OP", "-"), - "IZ": ("RESERVED", "if"), - "GIMME": ("RESERVED", "import"), - "LIKE": ("RESERVED", "as"), - "OWN": ("OP", "."), - - "PLZ": ("RESERVED", "try"), - "HALP": ("RESERVED", "raise"), - "WHATEVER": ("RESERVED", "finally"), - "KTHX": ("RESERVED", "continue"), - "KTHXBYE": ("RESERVED", "break"), - - "OVER": ("OP", "/"), - - "AINT": ("RESERVED", "not"), - "ME": ("RESERVED", "self"), - - "STRING": ("AUTOCALL", "str"), - "NUMBR": ("AUTOCALL", "int"), - "BIGNESS": ("AUTOCALL", "len"), - "NUMBRZ": ("AUTOCALL", "range"), - "ADDED": ("AUTOCALL", ".append"), - - "ARGZ": ("INLINE", "_lol_sys.argv"), - "THINGZ": ("INLINE", "()"), # invisible tuple didn't sound right - "THING": ("INLINE", "()"), # sometimes it's better in singular form - "MY": ("INLINE", "self."), - "MYSELF": ("INLINE", "(self)"), - - "EVEN": ("INLINE", "% 2 == 0"), - "ODD": ("INLINE", "% 2 == 1"), - "WIF": ("RESERVED", "with"), - } - -def t_FLOAT(t): - r"""(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][-+]? \d+)?""" - t.value = t.value - t.type = "NUMBER" - return t - -def t_INT(t): - r"\d+" - t.type = "NUMBER" - return t - -def t_INVISIBLE(t): - r"INVISIBLE([ ]+(LIST|STRING|BUCKET))?\b" - if "LIST" in t.value: - t.type = "INLINE" - t.value = "[]" - elif "STRING" in t.value: - t.type = "INLINE" - t.value = '""' - elif "BUCKET" in t.value: - t.type = "INLINE" - t.value = "{}" - else: - RESERVED(t, "None") - return t - -# Not consuming the newline. Needed for "IZ EASTERBUNNY? BTW comment" -def t_COMMENT(t): - r"[ ]*(?:BTW|WTF)[^\n]*" - return t - -def t_NAME(t): - r'[a-zA-Z_][a-zA-Z0-9_]*' - if t.value in RESERVED_VALUES: - type, value = RESERVED_VALUES[t.value] - t.type = type - t.value = value - if t.type == "AUTOCALL": - t.lexer.paren_stack.append(")") - return t - -def t_WS(t): - r' [ ]+ ' - if t.lexer.at_line_start and not t.lexer.paren_stack: - return t - - -# Don't generate newline tokens when inside of parens -def t_newline(t): - r'\n+' - t.lexer.lineno += len(t.value) - t.type = "NEWLINE" - if not t.lexer.paren_stack: - return t - - -def t_error(t): - raise SyntaxError("Unknown symbol %r" % (t.value[0],)) - print "Skipping", repr(t.value[0]) - t.lexer.skip(1) - - -## I implemented INDENT / DEDENT generation as a post-processing filter - -# The original lex token stream contains WS and NEWLINE characters. -# WS will only occur before any other tokens on a line. - -# I have three filters. One tags tokens by adding two attributes. -# "must_indent" is True if the token must be indented from the -# previous code. The other is "at_line_start" which is True for WS -# and the first non-WS/non-NEWLINE on a line. It flags the check so -# see if the new line has changed indication level. - -# Python's syntax has three INDENT states -# 0) no colon hence no need to indent -# 1) "if 1: go()" - simple statements have a COLON but no need for an indent -# 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indent -NO_INDENT = 0 -MAY_INDENT = 1 -MUST_INDENT = 2 - -# only care about whitespace at the start of a line -def track_tokens_filter(lexer, tokens): - lexer.at_line_start = at_line_start = True - indent = NO_INDENT - for token in tokens: - token.at_line_start = at_line_start - - if token.type == "COLON": - at_line_start = False - indent = MAY_INDENT - token.must_indent = False - - elif token.type == "NEWLINE": - at_line_start = True - if indent == MAY_INDENT: - indent = MUST_INDENT - token.must_indent = False - - elif token.type == "WS": - assert token.at_line_start == True - at_line_start = True - token.must_indent = False - - elif token.type == "COMMENT": - pass - - else: - # A real token; only indent after COLON NEWLINE - if indent == MUST_INDENT: - token.must_indent = True - else: - token.must_indent = False - at_line_start = False - - indent = NO_INDENT - - yield token - lexer.at_line_start = at_line_start - -def _new_token(type, lineno): - tok = lex.LexToken() - tok.type = type - tok.value = None - tok.lineno = lineno - tok.lexpos = -1 - return tok - -# Synthesize a DEDENT tag -def DEDENT(lineno): - return _new_token("DEDENT", lineno) - -# Synthesize an INDENT tag -def INDENT(lineno): - return _new_token("INDENT", lineno) - - -# Track the indentation level and emit the right INDENT / DEDENT events. -def indentation_filter(tokens): - # A stack of indentation levels; will never pop item 0 - levels = [0] - token = None - depth = 0 - prev_was_ws = False - for token in tokens: -## if 1: -## print "Process", token, -## if token.at_line_start: -## print "at_line_start", -## if token.must_indent: -## print "must_indent", -## print - - # WS only occurs at the start of the line - # There may be WS followed by NEWLINE so - # only track the depth here. Don't indent/dedent - # until there's something real. - if token.type == "WS": - assert depth == 0 - depth = len(token.value) - prev_was_ws = True - # Don't forward WS to the parser - continue - - if token.type == "NEWLINE": - depth = 0 - if prev_was_ws or token.at_line_start: - # ignore blank lines - continue - # pass the other cases on through - yield token - continue - - if token.type == "COMMENT": - yield token - continue - - # then it must be a real token (not WS, not NEWLINE) - # which can affect the indentation level - - prev_was_ws = False - if token.must_indent: - # The current depth must be larger than the previous level - if not (depth > levels[-1]): - raise IndentationError("expected an indented block") - - levels.append(depth) - yield INDENT(token.lineno) - - elif token.at_line_start: - # Must be on the same level or one of the previous levels - if depth == levels[-1]: - # At the same level - pass - elif depth > levels[-1]: - raise IndentationError("indentation increase but not in new block") - else: - # Back up; but only if it matches a previous level - try: - i = levels.index(depth) - except ValueError: - raise IndentationError("inconsistent indentation") - for _ in range(i+1, len(levels)): - yield DEDENT(token.lineno) - levels.pop() - - yield token - - ### Finished processing ### - - # Must dedent any remaining levels - if len(levels) > 1: - assert token is not None - for _ in range(1, len(levels)): - yield DEDENT(token.lineno) - - -# The top-level filter adds an ENDMARKER, if requested. -# Python's grammar uses it. -def token_filter(lexer, add_endmarker = True): - token = None - tokens = iter(lexer.token, None) - tokens = track_tokens_filter(lexer, tokens) - for token in indentation_filter(tokens): - yield token - - if add_endmarker: - lineno = 1 - if token is not None: - lineno = token.lineno - yield _new_token("ENDMARKER", lineno) - -class LOLLexer(object): - def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0): - self.lexer = lex.lex(debug=debug, optimize=optimize, - lextab=lextab, reflags=reflags) - self.token_stream = None - def input(self, s, add_endmarker=True): - self.lexer.paren_stack = [] - self.lexer.input(s) - self.token_stream = token_filter(self.lexer, add_endmarker) - def token(self): - try: - return self.token_stream.next() - except StopIteration: - return None - -# Helper class to generate logically correct indented Python code -class IndentWriter(object): - def __init__(self, outfile): - self.outfile = outfile - self.at_first_column = True - self.indent = 0 - def write(self, text): - if self.at_first_column: - self.outfile.write(" "*self.indent) - self.at_first_column = False - self.outfile.write(text) - -# Split things up because the from __future__ statements must -# go before any other code. -HEADER = """# LOLPython to Python converter version 1.0 -# Written by Andrew Dalke, who should have been working on better things. - -""" - -BODY = """ -# sys is used for COMPLAIN and ARGZ -import sys as _lol_sys - -""" - -def to_python(s): - L = LOLLexer() - L.input(s) - - header = StringIO() - header.write(HEADER) - header_output = IndentWriter(header) - - body = StringIO() - body.write(BODY) - body_output = IndentWriter(body) - - write = body_output.write - output = body_output - - for t in iter(L.token_stream): - if t.type == "NAME": - # Need to escape names which are Python variables Do that - # by appending an "_". But then I also need to make sure - # that "yield_" does not collide with "yield". And you - # thought you were being clever trying to use a Python - # variable. :) - name = t.value.rstrip("_") - if name in keyword.kwlist: - write(t.value + "_ ") - else: - write(t.value + " ") - - elif t.type in ("RESERVED", "OP", "NUMBER", "CLOSE"): - # While not pretty, I'll put a space after each - # term because it's the simplest solution. Otherwise - # I'll need to track the amount of whitespace between - # the tokens in the original text. - write(t.value+" ") - - # XXX escape names which are special in Python! - elif t.type == "STRING": - write(repr(t.value) + " ") - - elif t.type == "COMMENT": - # Not enough information to keep comments on the correct - # indentation level. This is good enough. Ugly though. - # Maybe I need to fix the tokenizer. - write("#"+ t.value[3:]+"\n") - output.at_first_column = True - - elif t.type == "COLON": - write(":") - - elif t.type == "INDENT": - output.indent += 1 - pass - elif t.type == "DEDENT": - output.indent -= 1 - pass - elif t.type == "NEWLINE": - write(t.value) - output.at_first_column = True - output = body_output - write = output.write - elif t.type == "PRINT": - if t.value == "stdout": - write("print ") - elif t.value == "stderr": - write("print >>_lol_sys.stderr, ") - else: - raise AssertionError(t.value) - elif t.type == "AUTOCALL": - write(t.value + "(") - elif t.type == "INLINE": - write(t.value) - elif t.type == "ENDMARKER": - write("\n# The end.\n") - elif t.type == "WS": - output.leading_ws = t.value - elif t.type == "FUTURE": - # Write to the header. This is a hack. Err, a hairball. - output = header_output - write = output.write - write("from __future__ import ") - - else: - raise AssertionError(t.type) - - return header.getvalue() + body.getvalue() - - -# API code for doing the translation and exec'ing the result - -def execfile(infile, module_name="__lolmain__"): - "file, module_name -- exec the lolpython file in a newly created module" - if not hasattr(infile, "read"): - s = open(infile).read() - else: - s = infile.read() - return execstring(s, module_name) - -def execstring(s, module_name="__lolmain__"): - "s, module_name -- exec the lolpython string in a newly created module" - python_s = to_python(s) - # Doing this bit of trickiness so I can have LOLPython code act - # like __main__. This fix is enough to fool unittest. - m = types.ModuleType(module_name) - sys.modules[module_name] = m - exec python_s in m.__dict__ - return m - -def convert_file(infile, outfile): - "read LOLPython code from infile, write converted Python code to outfile" - if not hasattr(outfile, "write"): - outfile = open(outfile, "w") - outfile.write(to_python(infile.read())) - -def convert(filenames): - "convert LOLPython filenames into corresponding Python '.py' files" - if not filenames: - convert_file(sys.stdin, sys.stdout) - else: - for filename in filenames: - base, ext = os.path.splitext(filename) - convert_file(open(filename), open(base+".py", "w")) - -def help(): - print """convert and run a lolpython program -Commands are: - lolpython Read a lolpython program from stdin and execute it - lolpython --convert Convert a lolpython program from stdin - and generate python to stdout - lolpython --convert filename1 [filename....] - Convert a list of lolpython files into Python files - lolpython filename [arg1 [arg2 ...]] - Run a lolpython program using optional arguments -""" - -def main(argv): - if len(argv) >= 2: - if argv[1] == "--convert": - convert(argv[2:]) - return - if argv[1] == "--help": - help() - return - if argv[1] == "--version": - print __NAME__ + " " + __VERSION__ - return - - # otherwise, run the lolpython program - sys.argv = sys.argv[1:] - filename = sys.argv[0] - execfile(filename, "__main__") - else: - # commands from stdin - execfile(sys.stdin) - - - -if __name__ == "__main__": - main(sys.argv) |
