from enum import Enum from typing import List class T(Enum): STRING = 0 INTEGER = 1 FLOAT = 2 BOOL = 3 LABEL = 4 IDENT = 5 IDENTASSIGN = 6 PLUS = 20 MINUS = 21 STAR = 22 SLASH = 23 HAT = 24 EXP = 25 EQ = 26 DEF = 27 DOUBLEQUOTES = 28 GREATER = 29 LESSER = 30 QUESTION = 60 LCURLY = 61 RCURLY = 62 BLOCK = 63 TYPE = 98 PRINT = 99 FUNCTION = 100 DUP = 101 DROP = 102 COMMENT = 997 ILEGAL = 998 EOF = 999 class Token: def __init__(self, type: T, literal: str, pos: int) -> None: self.type = type self.literal = literal self.pos = int def __str__(self) -> str: return "(" + self.literal + " " + look_up[self.type] + ")" look_up = { T.BOOL: "bool", T.STRING: "string", T.INTEGER: "integer", T.IDENT: "identifier", T.IDENTASSIGN: "assignment", T.LABEL: "label", T.FUNCTION: "function", T.DUP: "dup", T.DROP: "drop", T.PLUS: "operator", T.MINUS: "operator", T.GREATER: "operator", T.LESSER: "operator", T.STAR: "operator", T.SLASH: "operator", T.DOUBLEQUOTES: "\"", T.EOF: "eof", T.ILEGAL: "ilegal", T.EQ: "operator", T.DEF: "def", T.PRINT: "print", T.TYPE: "type", T.COMMENT: "comment", T.BLOCK: "operator" } # LEXING INLINE FUNCT ONS is_label = lambda x: x[0] == ":" and x[1:].isalpha() is_ws = lambda x: x == " " or x == "\n" or x == "\t" or x == "\r" def read_ident(source: str, index: int): i = index begin = i while len(source) > i and source[i].isalpha() and not is_ws(source[i]): i += 1 end = i return (source[begin:end], i) def read_string(source: str, index: int): i = index begin = i while source[i] != "\"": i += 1 end = i return (source[begin:end], i) def read_integer(source: str, index: int): i = index begin = i while source[i].isdigit(): i += 1 end = i return (source[begin:end], i) def tokenize(source: str) -> List: size = len(source) i = 0 tokens = [] while 1: if i >= size-1: tokens.append(Token(T.EOF, 'eof', i)) break match source[i]: case ' ' | '\t' | '\n': pass case '+': tokens.append(Token(T.PLUS, '+', i)) case '-': tokens.append(Token(T.MINUS, '-', i)) case '*': tokens.append(Token(T.STAR, '*', i)) case '/': tokens.append(Token(T.SLASH, '/', i)) case '=': if source[i+1] != None and source[i+1].isalpha(): i += 1 ident, i = read_ident(source, i) tokens.append(Token(T.IDENTASSIGN, ident, i)) else: tokens.append(Token(T.EQ, '=', i)) case '>': tokens.append(Token(T.GREATER, '>', i)) case '<': tokens.append(Token(T.LESSER, '<', i)) case 'T' | 'F': tokens.append(Token(T.BOOL, source[i], i)) case '#': while source[i] != '\n': i += 1 case '|': tokens.append(Token(T.BLOCK, source[i], i)) i += 1 case '"': i += 1 string, i = read_string(source, i) tokens.append(Token(T.STRING, string, i)) case ':': i += 1 label_ident, i = read_ident(source, i) tokens.append(Token(T.LABEL, label_ident, i)) case _: if source[i].isdigit(): aux, i = read_integer(source, i) tokens.append(Token(T.INTEGER, aux, i)) elif source[i].isalpha(): aux, i = read_ident(source, i) if aux == "print": tokens.append(Token(T.PRINT, aux, i)) elif aux == "type": tokens.append(Token(T.TYPE, aux, i)) elif aux == "dup": tokens.append(Token(T.DUP, aux, i)) elif aux == "drop": tokens.append(Token(T.DROP, aux, i)) elif aux == "def": tokens.append(Token(T.DEF, aux, i)) else: tokens.append(Token(T.IDENT, aux, i)) else: tokens.append(Token(T.ILEGAL, "\0", i)) i += 1 return tokens