From 0daa8b2240f6f80f44e3632423d6646c304b7b19 Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Sat, 15 Apr 2023 14:27:29 -0400 Subject: [PATCH] Undo accidental push --- kor/experimental/s_exprs.py | 63 --------------- kor/experimental/tokenizer.py | 128 ------------------------------ tests/experimental/__init__.py | 0 tests/experimental/test_parser.py | 32 -------- 4 files changed, 223 deletions(-) delete mode 100644 kor/experimental/s_exprs.py delete mode 100644 kor/experimental/tokenizer.py delete mode 100644 tests/experimental/__init__.py delete mode 100644 tests/experimental/test_parser.py diff --git a/kor/experimental/s_exprs.py b/kor/experimental/s_exprs.py deleted file mode 100644 index a95200d..0000000 --- a/kor/experimental/s_exprs.py +++ /dev/null @@ -1,63 +0,0 @@ -import abc -import dataclasses -from typing import List, Mapping, Optional - - -@dataclasses.dataclass(frozen=True) -class SExpr(abc.ABC): - pass - - -@dataclasses.dataclass(frozen=True) -class Number(SExpr): - value: float | int - - def __str__(self) -> str: - return str(self.value) - - -@dataclasses.dataclass(frozen=True) -class String(SExpr): - value: str - - def __str__(self) -> str: - return f'"{self.value}"' - - -@dataclasses.dataclass(frozen=True) -class SList(SExpr): - elements: List[SExpr] - - def __str__(self) -> str: - return f'({" ".join(str(elem) for elem in self.elements)})' - - -@dataclasses.dataclass(frozen=True) -class Object(SExpr): - properties: Mapping[str, SExpr] - - def __str__(self) -> str: - return ( - f'{{{", ".join(f"{key}: {str(value)}" for key, value in self.properties)}}}' - ) - - -@dataclasses.dataclass(frozen=True) -class Function(SExpr): - name: str - params: List[SExpr] - return_type: Optional[SExpr] - - def __str__(self) -> str: - params_str = " ".join(str(param) for param in self.params) - return_type_str = f" -> {str(self.return_type)}" if self.return_type else "" - return f"({self.name} {params_str}{return_type_str})" - - -@dataclasses.dataclass(frozen=True) -class TypeAnnotation(SExpr): - value: SExpr - - def __str__(self) -> str: - """Return the string representation of the type annotation.""" - return self.value.__class__.__name__ diff --git a/kor/experimental/tokenizer.py b/kor/experimental/tokenizer.py deleted file mode 100644 index 6e7501f..0000000 --- a/kor/experimental/tokenizer.py +++ /dev/null @@ -1,128 +0,0 @@ -import re -from typing import List, Tuple, Union, Sequence, Literal -from kor.experimental.s_exprs import ( - SExpr, - Number, - String, - SList, - Object, - Function, - TypeAnnotation, -) - - -def tokenize(s_expression: str) -> List[Tuple[str, Union[str, float]]]: - """Tokenize a s-expression into a list of tokens.""" - token_pattern = r""" - (?P\s+) | - (?P-?\d+(?:\.\d+)?) | - (?P"(?:[^\\"]|\\.)*") | - (?P[^\s()\[\]{}'`",;]+) | - (?P\() | - (?P\)) - """ - tokens: List[Tuple[str, Union[str, float]]] = [] - scanner = re.finditer(token_pattern, s_expression, re.VERBOSE) - - for match in scanner: - if match.lastgroup != "whitespace": - token_value = match.group(match.lastgroup) - if match.lastgroup == "number": - token_value = token_value - tokens.append((match.lastgroup, token_value)) - - return tokens - - -TokenType = Union[ - Literal["open_paren"], - Literal["close_paren"], - Literal["number"], - Literal["string"], - Literal["symbol"], -] - - -class Parser: - def __init__(self, tokens: Sequence[Tuple[TokenType, str]]) -> None: - """Initialize the parser.""" - self.tokens = tokens - self.position = 0 - - def parse(self): - return self.parse_sexp() - - def parse_sexp(self): - if self.position >= len(self.tokens): - return None - - token_type, token_value = self.tokens[self.position] - - if token_type == "open_paren": - self.position += 1 - elements = [] - - while ( - self.position < len(self.tokens) - and self.tokens[self.position][0] != "close_paren" - ): - elements.append(self.parse_sexp()) - - if ( - self.position < len(self.tokens) - and self.tokens[self.position][0] == "close_paren" - ): - self.position += 1 - else: - raise ValueError("Unbalanced parentheses") - - return SList(elements) - elif token_type == "number": - self.position += 1 - return Number(float(token_value)) - elif token_type == "string": - self.position += 1 - return String(eval(token_value)) - elif token_type == "symbol": - self.position += 1 - return Function(token_value) - - else: - raise ValueError(f"Unexpected token: {token_type}, {token_value}") - - def parse_function(self) -> Function: - if ( - self.position >= len(self.tokens) - or self.tokens[self.position][0] != "open_paren" - ): - raise ValueError("Function parameters expected") - self.position += 1 - - parameters = [] - while ( - self.position < len(self.tokens) - and self.tokens[self.position][0] != "close_paren" - ): - token_type, token_value = self.tokens[self.position] - if token_type != "symbol": - raise ValueError("Function parameters must be symbols") - parameters.append(Symbol(token_value)) - self.position += 1 - - if ( - self.position < len(self.tokens) - and self.tokens[self.position][0] == "close_paren" - ): - self.position += 1 - else: - raise ValueError("Unbalanced parentheses") - - body = self.parse_sexp() - - return Function(parameters, body) - - -def parse_s_expression(s_expression): - tokens = tokenize(s_expression) - parser = Parser(tokens) - return parser.parse() diff --git a/tests/experimental/__init__.py b/tests/experimental/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/experimental/test_parser.py b/tests/experimental/test_parser.py deleted file mode 100644 index ab09376..0000000 --- a/tests/experimental/test_parser.py +++ /dev/null @@ -1,32 +0,0 @@ -from kor.experimental.tokenizer import tokenize - - -def test_tokenize(): - assert tokenize("(+ 1 2)") == [ - ("open_paren", "("), - ("symbol", "+"), - ("number", "1"), - ("number", "2"), - ("close_paren", ")"), - ] - - assert tokenize("(add (sub 1 2) 3)") == [ - ("open_paren", "("), - ("symbol", "add"), - ("open_paren", "("), - ("symbol", "sub"), - ("number", "1"), - ("number", "2"), - ("close_paren", ")"), - ("number", "3"), - ("close_paren", ")"), - ] - - assert tokenize('(concat "hello" "world")') == [ - ("open_paren", "("), - ("symbol", "concat"), - ("string", '"hello"'), - ("string", '"world"'), - ("close_paren", ")"), - ] -