cscg24-lolpython

CSCG 2024 Challenge 'Can I Haz Lolpython?'
git clone https://git.sinitax.com/sinitax/cscg24-lolpython
Log | Files | Refs | sfeed.txt

clex.py (3919B)


      1# ----------------------------------------------------------------------
      2# clex.py
      3#
      4# A lexer for ANSI C.
      5# ----------------------------------------------------------------------
      6
      7import sys
      8sys.path.insert(0,"../..")
      9
     10import ply.lex as lex
     11
     12# Reserved words
     13reserved = (
     14    'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE',
     15    'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER',
     16    'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF',
     17    'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE',
     18    )
     19
     20tokens = reserved + (
     21    # Literals (identifier, integer constant, float constant, string constant, char const)
     22    'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST',
     23
     24    # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
     25    'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
     26    'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
     27    'LOR', 'LAND', 'LNOT',
     28    'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
     29    
     30    # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
     31    'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
     32    'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
     33
     34    # Increment/decrement (++,--)
     35    'PLUSPLUS', 'MINUSMINUS',
     36
     37    # Structure dereference (->)
     38    'ARROW',
     39
     40    # Conditional operator (?)
     41    'CONDOP',
     42    
     43    # Delimeters ( ) [ ] { } , . ; :
     44    'LPAREN', 'RPAREN',
     45    'LBRACKET', 'RBRACKET',
     46    'LBRACE', 'RBRACE',
     47    'COMMA', 'PERIOD', 'SEMI', 'COLON',
     48
     49    # Ellipsis (...)
     50    'ELLIPSIS',
     51    )
     52
     53# Completely ignored characters
     54t_ignore           = ' \t\x0c'
     55
     56# Newlines
     57def t_NEWLINE(t):
     58    r'\n+'
     59    t.lexer.lineno += t.value.count("\n")
     60    
     61# Operators
     62t_PLUS             = r'\+'
     63t_MINUS            = r'-'
     64t_TIMES            = r'\*'
     65t_DIVIDE           = r'/'
     66t_MOD              = r'%'
     67t_OR               = r'\|'
     68t_AND              = r'&'
     69t_NOT              = r'~'
     70t_XOR              = r'\^'
     71t_LSHIFT           = r'<<'
     72t_RSHIFT           = r'>>'
     73t_LOR              = r'\|\|'
     74t_LAND             = r'&&'
     75t_LNOT             = r'!'
     76t_LT               = r'<'
     77t_GT               = r'>'
     78t_LE               = r'<='
     79t_GE               = r'>='
     80t_EQ               = r'=='
     81t_NE               = r'!='
     82
     83# Assignment operators
     84
     85t_EQUALS           = r'='
     86t_TIMESEQUAL       = r'\*='
     87t_DIVEQUAL         = r'/='
     88t_MODEQUAL         = r'%='
     89t_PLUSEQUAL        = r'\+='
     90t_MINUSEQUAL       = r'-='
     91t_LSHIFTEQUAL      = r'<<='
     92t_RSHIFTEQUAL      = r'>>='
     93t_ANDEQUAL         = r'&='
     94t_OREQUAL          = r'\|='
     95t_XOREQUAL         = r'^='
     96
     97# Increment/decrement
     98t_PLUSPLUS         = r'\+\+'
     99t_MINUSMINUS       = r'--'
    100
    101# ->
    102t_ARROW            = r'->'
    103
    104# ?
    105t_CONDOP           = r'\?'
    106
    107# Delimeters
    108t_LPAREN           = r'\('
    109t_RPAREN           = r'\)'
    110t_LBRACKET         = r'\['
    111t_RBRACKET         = r'\]'
    112t_LBRACE           = r'\{'
    113t_RBRACE           = r'\}'
    114t_COMMA            = r','
    115t_PERIOD           = r'\.'
    116t_SEMI             = r';'
    117t_COLON            = r':'
    118t_ELLIPSIS         = r'\.\.\.'
    119
    120# Identifiers and reserved words
    121
    122reserved_map = { }
    123for r in reserved:
    124    reserved_map[r.lower()] = r
    125
    126def t_ID(t):
    127    r'[A-Za-z_][\w_]*'
    128    t.type = reserved_map.get(t.value,"ID")
    129    return t
    130
    131# Integer literal
    132t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
    133
    134# Floating literal
    135t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
    136
    137# String literal
    138t_SCONST = r'\"([^\\\n]|(\\.))*?\"'
    139
    140# Character constant 'c' or L'c'
    141t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\''
    142
    143# Comments
    144def t_comment(t):
    145    r' /\*(.|\n)*?\*/'
    146    t.lineno += t.value.count('\n')
    147
    148# Preprocessor directive (ignored)
    149def t_preprocessor(t):
    150    r'\#(.)*?\n'
    151    t.lineno += 1
    152    
    153def t_error(t):
    154    print "Illegal character %s" % repr(t.value[0])
    155    t.lexer.skip(1)
    156    
    157lexer = lex.lex(optimize=1)
    158if __name__ == "__main__":
    159    lex.runmain(lexer)
    160
    161    
    162
    163
    164