From 061f396e9ad5e3e0382db8c8a4150359e7d15ee4 Mon Sep 17 00:00:00 2001 From: Tom Anderson Date: Fri, 30 Aug 2024 23:24:28 +1000 Subject: [PATCH] begin implementing `Parsable` for some basic AST nodes --- src/hir/expression/array.rs | 129 ++++++++++++++++++++++++++++++ src/hir/expression/assign.rs | 97 ++++++++++++++++++++++ src/hir/expression/boolean.rs | 113 +++++++++++++++++++++----- src/hir/expression/ident.rs | 114 ++++++++++++++++++++------ src/hir/expression/integer.rs | 102 ++++++++++++++++++----- src/hir/mod.rs | 8 +- src/stage/parse/expression/mod.rs | 6 ++ src/stage/parse/mod.rs | 22 +++-- src/stage/parse/parser.rs | 19 ++--- 9 files changed, 523 insertions(+), 87 deletions(-) diff --git a/src/hir/expression/array.rs b/src/hir/expression/array.rs index 9218d5a..d3d2fc3 100644 --- a/src/hir/expression/array.rs +++ b/src/hir/expression/array.rs @@ -1,3 +1,5 @@ +use crate::stage::parse::{ParseError, Precedence}; + use super::*; ast_node! { @@ -8,6 +10,59 @@ ast_node! { } } +impl Parsable for Array { + fn register(parser: &mut Parser) { + assert!( + parser.register_prefix(Token::LeftSquare, |parser, compiler, lexer| { + // Parse opening square bracket + let span_start = match lexer.next_spanned().unwrap() { + (Token::LeftSquare, span) => span.start, + (token, _) => { + return Err(ParseError::ExpectedToken { + expected: Box::new(Token::LeftSquare), + found: Box::new(token), + reason: "array literal must start with square brace".to_string(), + }); + } + }; + + // Parse each of the items, deliminated by a comma + let mut init = Vec::new(); + let mut expect_item = true; + let span_end = loop { + dbg!("loop"); + match (lexer.peek_token().unwrap(), expect_item) { + (Token::Comma, false) => { + expect_item = true; + lexer.next_token(); + } + (Token::RightSquare, _) => { + break lexer.next_spanned().unwrap().1.end; + } + (_, true) => { + init.push(parser.parse(compiler, lexer, Precedence::Lowest)?); + expect_item = false; + } + (token, _) => { + return Err(ParseError::ExpectedToken { + expected: Box::new(Token::RightSquare), + found: Box::new(token.clone()), + reason: "expected a comma or closing brace".to_string(), + }); + } + } + }; + + Ok(Expression::Array(Array { + init, + span: span_start..span_end, + ty_info: None, + })) + }) + ); + } +} + impl SolveType for Array { type State = Scope; @@ -42,3 +97,77 @@ impl SolveType for Array { }) } } + +#[cfg(test)] +mod test { + use super::*; + use rstest::*; + + mod parse { + use crate::stage::parse::{Lexer, Precedence}; + + use super::*; + + #[fixture] + fn parser() -> Parser { + let mut parser = Parser::new(); + + Array::::register(&mut parser); + + // Use integer parser for testing + Integer::::register(&mut parser); + + parser + } + + #[rstest] + #[case::empty("[]", 0)] + #[case::single("[1]", 1)] + #[case::single_trailing("[1,]", 1)] + #[case::double("[1, 2]", 2)] + #[case::double_trailing("[1, 2,]", 2)] + #[case::triple("[1, 2, 3]", 3)] + #[case::triple_trailing("[1, 2, 3,]", 3)] + fn flat(parser: Parser, #[case] source: &str, #[case] items: usize) { + let array = parser + .parse( + &mut Compiler::default(), + &mut Lexer::from(source), + Precedence::Lowest, + ) + .unwrap(); + + assert_array_len(&array, items); + } + + #[rstest] + fn nested(parser: Parser) { + let array = parser + .parse( + &mut Compiler::default(), + &mut Lexer::from("[[1,], [1, 2,], [1, 2, 3,],]"), + Precedence::Lowest, + ) + .unwrap(); + + let array = assert_array_len(&array, 3); + + assert_array_len(&array.init[0], 1); + assert_array_len(&array.init[1], 2); + assert_array_len(&array.init[2], 3); + } + + fn assert_array_len( + expression: &Expression, + length: usize, + ) -> &Array { + let Expression::Array(array) = expression else { + panic!("expected to parse array"); + }; + + assert_eq!(array.init.len(), length); + + array + } + } +} diff --git a/src/hir/expression/assign.rs b/src/hir/expression/assign.rs index 9162637..2639fa8 100644 --- a/src/hir/expression/assign.rs +++ b/src/hir/expression/assign.rs @@ -1,3 +1,5 @@ +use crate::stage::parse::{ParseError, Precedence}; + use super::*; ast_node! { @@ -9,6 +11,44 @@ ast_node! { } } +impl Parsable for Assign { + fn register(parser: &mut Parser) { + assert!( + parser.register_infix(Token::Eq, |parser, compiler, lexer, left| { + let (binding, binding_span) = match left { + Expression::Ident(Ident { binding, span, .. }) => (binding, span), + lhs => { + return Err(ParseError::InvalidInfixLhs { + found: Box::new(lhs), + reason: "assign must start with ident".to_string(), + }); + } + }; + + match lexer.next_token().unwrap() { + Token::Eq => (), + token => { + return Err(ParseError::ExpectedToken { + expected: Box::new(Token::Eq), + found: Box::new(token), + reason: "equals sign following binding for assign".to_string(), + }); + } + } + + let value = parser.parse(compiler, lexer, Precedence::Lowest)?; + + Ok(Expression::Assign(Assign { + span: binding_span.start..value.span().end, + binding, + value: Box::new(value), + ty_info: None, + })) + }) + ); + } +} + impl SolveType for Assign { type State = Scope; @@ -41,3 +81,60 @@ impl SolveType for Assign { }) } } + +#[cfg(test)] +mod test { + use super::*; + use rstest::*; + + mod parse { + use crate::stage::parse::Lexer; + + use super::*; + + #[fixture] + fn parser() -> Parser { + let mut parser = Parser::new(); + + Assign::::register(&mut parser); + + // Register additional parsers for testing + Integer::::register(&mut parser); + Ident::::register(&mut parser); + + parser + } + + #[rstest] + #[case::valid_integer_rhs("myident", "1")] + #[case::valid_ident_rhs("myident", "otherident")] + fn success(parser: Parser, #[case] lhs: &str, #[case] rhs: &str) { + let mut compiler = Compiler::default(); + + let assign = parser + .parse( + &mut compiler, + &mut Lexer::from(format!("{lhs} = {rhs}").as_str()), + Precedence::Lowest, + ) + .unwrap(); + + let Expression::Assign(assign) = dbg!(assign) else { + panic!("expected to parse assignment") + }; + + assert_eq!(lhs, compiler.symbols.resolve(assign.binding).unwrap()); + } + + #[rstest] + fn invalid(parser: Parser) { + let result = parser.parse( + &mut Compiler::default(), + &mut Lexer::from("1 = otherident"), + Precedence::Lowest, + ); + + assert!(matches!(result, Err(ParseError::InvalidInfixLhs { .. }))); + } + } +} diff --git a/src/hir/expression/boolean.rs b/src/hir/expression/boolean.rs index 2f98f0d..b977e0c 100644 --- a/src/hir/expression/boolean.rs +++ b/src/hir/expression/boolean.rs @@ -1,3 +1,5 @@ +use crate::stage::parse::{Lexer, ParseError}; + use super::*; ast_node! { @@ -8,6 +10,41 @@ ast_node! { } } +impl Parsable for Boolean { + fn register(parser: &mut Parser) { + fn parse(lexer: &mut Lexer) -> Result, ParseError> { + let (token, span) = lexer.next_spanned().unwrap(); + + let value = match token { + Token::True => true, + Token::False => false, + token => { + return Err(ParseError::ExpectedToken { + expected: Box::new(Token::True), + found: Box::new(token), + reason: "expected boolean".to_string(), + }); + } + }; + + Ok(Expression::Boolean(Boolean { + value, + span, + ty_info: None, + })) + } + + assert!( + parser.register_prefix(Token::True, |_, _, lexer| parse(lexer)), + "successfully register parser for `true` token" + ); + assert!( + parser.register_prefix(Token::False, |_, _, lexer| parse(lexer)), + "successfully register parser for `false` token" + ); + } +} + impl SolveType for Boolean { type State = Scope; @@ -28,30 +65,64 @@ impl SolveType for Boolean { } #[cfg(test)] -mod test_boolean { +mod test { use super::*; - #[test] - fn boolean_infer() { - assert_eq!( - Boolean::new(false, Span::default(), Default::default()) - .solve(&mut Compiler::default(), &mut Scope::new()) - .unwrap() - .ty_info - .ty, - Ty::Boolean - ); + use rstest::*; + + mod parse { + use super::*; + use crate::stage::parse::Precedence; + + #[rstest] + #[case::t_true("true", true)] + #[case::t_false("false", false)] + fn success(#[case] source: &str, #[case] value: bool) { + let mut parser = Parser::new(); + + Boolean::::register(&mut parser); + + let boolean = parser + .parse( + &mut Compiler::default(), + &mut Lexer::from(source), + Precedence::Lowest, + ) + .unwrap(); + + let Expression::Boolean(boolean) = boolean else { + panic!("expected boolean to be returned"); + }; + + assert_eq!(boolean.value, value); + } } - #[test] - fn boolean_return() { - assert_eq!( - Boolean::new(false, Span::default(), Default::default()) - .solve(&mut Compiler::default(), &mut Scope::new()) - .unwrap() - .ty_info - .return_ty, - None, - ); + mod ty { + use super::*; + + #[test] + fn boolean_infer() { + assert_eq!( + Boolean::new(false, Span::default(), Default::default()) + .solve(&mut Compiler::default(), &mut Scope::new()) + .unwrap() + .ty_info + .ty, + Ty::Boolean + ); + } + + #[test] + fn boolean_return() { + assert_eq!( + Boolean::new(false, Span::default(), Default::default()) + .solve(&mut Compiler::default(), &mut Scope::new()) + .unwrap() + .ty_info + .return_ty, + None, + ); + } } } diff --git a/src/hir/expression/ident.rs b/src/hir/expression/ident.rs index ec74b5f..6b3aa21 100644 --- a/src/hir/expression/ident.rs +++ b/src/hir/expression/ident.rs @@ -1,3 +1,5 @@ +use crate::stage::parse::ParseError; + use super::*; use std::hash::Hash; @@ -10,6 +12,34 @@ ast_node! { } } +impl Parsable for Ident { + fn register(parser: &mut Parser) { + parser.register_prefix_test( + |token| matches!(token, Token::Ident(_)), + |_, compiler, lexer| { + let (value, span) = match lexer.next_spanned().unwrap() { + (Token::Ident(value), span) => (value, span), + (token, _) => { + return Err(ParseError::ExpectedToken { + expected: Box::new(Token::Ident(String::new())), + found: Box::new(token), + reason: "expected ident".to_string(), + }); + } + }; + + let binding = compiler.symbols.get_or_intern(value); + + Ok(Expression::Ident(Ident { + binding, + span, + ty_info: None, + })) + }, + ); + } +} + impl SolveType for Ident { type State = Scope; @@ -56,37 +86,73 @@ mod test_ident { use super::*; - #[test] - fn ident_present() { - // Set up a reference symbol - let symbol = Symbol::try_from_usize(0).unwrap(); + mod parse { + use crate::stage::parse::{Lexer, Precedence}; + + use super::*; + + #[test] + fn success() { + let mut parser = Parser::new(); - // Create a scope and add the symbol to it - let mut scope = Scope::new(); - scope.register(symbol, Ty::Int); + Ident::::register(&mut parser); - let i = Ident::new(symbol, Span::default(), Default::default()); + let mut compiler = Compiler::default(); - // Run the type solve - let ty_info = i - .solve(&mut Compiler::default(), &mut scope) - .unwrap() - .ty_info; + let ident = parser + .parse( + &mut compiler, + &mut Lexer::from("someident"), + Precedence::Lowest, + ) + .unwrap(); - assert_eq!(ty_info.ty, Ty::Int); - assert_eq!(ty_info.return_ty, None); + let Expression::Ident(ident) = ident else { + panic!("expected ident to be parsed"); + }; + + assert_eq!( + compiler.symbols.resolve(ident.binding).unwrap(), + "someident" + ); + } } - #[test] - fn ident_infer_missing() { - let i = Ident::new( - Symbol::try_from_usize(0).unwrap(), - Span::default(), - Default::default(), - ); + mod ty { + use super::*; + + #[test] + fn ident_present() { + // Set up a reference symbol + let symbol = Symbol::try_from_usize(0).unwrap(); + + // Create a scope and add the symbol to it + let mut scope = Scope::new(); + scope.register(symbol, Ty::Int); + + let i = Ident::new(symbol, Span::default(), Default::default()); + + // Run the type solve + let ty_info = i + .solve(&mut Compiler::default(), &mut scope) + .unwrap() + .ty_info; + + assert_eq!(ty_info.ty, Ty::Int); + assert_eq!(ty_info.return_ty, None); + } + + #[test] + fn ident_infer_missing() { + let i = Ident::new( + Symbol::try_from_usize(0).unwrap(), + Span::default(), + Default::default(), + ); - let result = i.solve(&mut Compiler::default(), &mut Scope::new()); + let result = i.solve(&mut Compiler::default(), &mut Scope::new()); - assert!(result.is_err()); + assert!(result.is_err()); + } } } diff --git a/src/hir/expression/integer.rs b/src/hir/expression/integer.rs index 030f2f9..0e3fa21 100644 --- a/src/hir/expression/integer.rs +++ b/src/hir/expression/integer.rs @@ -1,3 +1,5 @@ +use crate::stage::parse::ParseError; + use super::*; ast_node! { @@ -8,6 +10,32 @@ ast_node! { } } +impl Parsable for Integer { + fn register(parser: &mut Parser) { + parser.register_prefix_test( + |token| matches!(token, Token::Integer(_)), + |_, _, lexer| { + let (value, span) = match lexer.next_spanned().unwrap() { + (Token::Integer(value), span) => (value, span), + (token, _) => { + return Err(ParseError::ExpectedToken { + expected: Box::new(Token::Integer(0)), + found: Box::new(token), + reason: "expected integer".to_string(), + }); + } + }; + + Ok(Expression::Integer(Integer { + value, + span, + ty_info: None, + })) + }, + ) + } +} + impl SolveType for Integer { type State = Scope; @@ -27,27 +55,61 @@ impl SolveType for Integer { mod test_integer { use super::*; - #[test] - fn integer_infer() { - assert_eq!( - Integer::new(0, Span::default(), Default::default()) - .solve(&mut Compiler::default(), &mut Scope::new()) - .unwrap() - .ty_info - .ty, - Ty::Int - ); + use rstest::*; + + mod parse { + use crate::stage::parse::{Lexer, Precedence}; + + use super::*; + + #[rstest] + #[case::single_digit(1)] + #[case::multi_digit(123)] + fn success(#[case] value: i64) { + let mut parser = Parser::new(); + + Integer::::register(&mut parser); + + let integer = parser + .parse( + &mut Compiler::default(), + &mut Lexer::from(value.to_string().as_str()), + Precedence::Lowest, + ) + .unwrap(); + + let Expression::Integer(integer) = integer else { + panic!("expected integer to be returned"); + }; + + assert_eq!(integer.value, value); + } } - #[test] - fn integer_return() { - assert_eq!( - Integer::new(0, Span::default(), Default::default()) - .solve(&mut Compiler::default(), &mut Scope::new()) - .unwrap() - .ty_info - .return_ty, - None, - ); + mod ty { + use super::*; + #[test] + fn integer_infer() { + assert_eq!( + Integer::new(0, Span::default(), Default::default()) + .solve(&mut Compiler::default(), &mut Scope::new()) + .unwrap() + .ty_info + .ty, + Ty::Int + ); + } + + #[test] + fn integer_return() { + assert_eq!( + Integer::new(0, Span::default(), Default::default()) + .solve(&mut Compiler::default(), &mut Scope::new()) + .unwrap() + .ty_info + .return_ty, + None, + ); + } } } diff --git a/src/hir/mod.rs b/src/hir/mod.rs index f14bb80..b18e3b2 100644 --- a/src/hir/mod.rs +++ b/src/hir/mod.rs @@ -11,7 +11,7 @@ use crate::{ token::Token, ty::Ty, }, - stage::type_check::TyError, + stage::{parse::parser::Parser, type_check::TyError}, util::{scope::Scope, span::Span}, }; @@ -20,6 +20,12 @@ pub use function::*; pub use program::*; pub use statement::*; +#[allow(dead_code)] +pub trait Parsable { + /// Register the parser for this node against the provided parser. + fn register(parser: &mut Parser); +} + pub trait SolveType: UntypedAstNode { type State; diff --git a/src/stage/parse/expression/mod.rs b/src/stage/parse/expression/mod.rs index 4b1ab6a..55f2276 100644 --- a/src/stage/parse/expression/mod.rs +++ b/src/stage/parse/expression/mod.rs @@ -21,6 +21,7 @@ mod e_loop; #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum Precedence { Lowest, + Assign, Binary, Equality, Sum, @@ -41,6 +42,11 @@ impl Precedence { | Token::LeftAngleEq | Token::RightAngleEq => Precedence::Equality, Token::LeftParen | Token::LeftSquare => Precedence::Call, + Token::Eq + | Token::AddAssign + | Token::MinusAssign + | Token::DivAssign + | Token::MulAssign => Precedence::Assign, _ => Precedence::Lowest, } } diff --git a/src/stage/parse/mod.rs b/src/stage/parse/mod.rs index 21c69f2..77fccf7 100644 --- a/src/stage/parse/mod.rs +++ b/src/stage/parse/mod.rs @@ -1,7 +1,7 @@ mod block; mod expression; mod function; -mod parser; +pub mod parser; mod statement; mod ty; @@ -17,7 +17,7 @@ use crate::repr::token::*; use crate::util::span::*; use self::block::*; -use self::expression::*; +pub use self::expression::*; use self::function::*; use self::statement::*; @@ -35,6 +35,12 @@ pub enum ParseError { reason: String, }, + #[error("invalid infix left hand side: {reason} ({found:?})")] + InvalidInfixLhs { + found: Box, + reason: String, + }, + #[error("invalid literal, expected `{expected}`")] InvalidLiteral { expected: String }, @@ -80,7 +86,7 @@ pub fn parse(compiler: &mut Compiler, source: &str) -> Result { +pub struct Lexer<'source> { next: Option<(Token, Span)>, lexer: Peekable>, } @@ -93,19 +99,19 @@ impl<'source> Lexer<'source> { } } - fn next_token(&mut self) -> Option { + pub fn next_token(&mut self) -> Option { self.next_spanned().map(|(token, _)| token) } - fn peek_token(&mut self) -> Option<&Token> { + pub fn peek_token(&mut self) -> Option<&Token> { self.peek_spanned().map(|(token, _)| token) } - fn next_spanned(&mut self) -> Option<(Token, Span)> { + pub fn next_spanned(&mut self) -> Option<(Token, Span)> { self.next.take().or_else(|| self.next()) } - fn peek_spanned(&mut self) -> Option<(&Token, &Span)> { + pub fn peek_spanned(&mut self) -> Option<(&Token, &Span)> { self.next .as_ref() .map(|(token, span)| (token, span)) @@ -116,7 +122,7 @@ impl<'source> Lexer<'source> { }) } - fn double_peek_token(&mut self) -> Option<&Token> { + pub fn double_peek_token(&mut self) -> Option<&Token> { if self.next.is_none() { self.next = self.next(); } diff --git a/src/stage/parse/parser.rs b/src/stage/parse/parser.rs index 29ce42e..5dbd74e 100644 --- a/src/stage/parse/parser.rs +++ b/src/stage/parse/parser.rs @@ -1,6 +1,3 @@ -// Temporary until this is integrated -#![allow(dead_code)] - use std::collections::HashMap; use crate::{compiler::Compiler, repr::ast::untyped::Expression}; @@ -8,7 +5,7 @@ use crate::{compiler::Compiler, repr::ast::untyped::Expression}; use super::{Lexer, ParseError, Token}; /// Function capable of parsing an infix expression out of the provided lexer. -type InfixParser = fn( +pub type InfixParser = fn( parser: &Parser, compiler: &mut Compiler, lexer: &mut Lexer, @@ -16,17 +13,18 @@ type InfixParser = fn( ) -> Result; /// Function capable of parsing a prefix expression out of the provided lexer. -type PrefixParser = fn( +pub type PrefixParser = fn( parser: &Parser, compiler: &mut Compiler, lexer: &mut Lexer, ) -> Result; /// Function to test whether a token is a match to parse. -type TokenTest = fn(token: &Token) -> bool; +pub type TokenTest = fn(token: &Token) -> bool; /// Composable parser, allowing for components of the parser to be dynamically registered. -struct Parser { +#[derive(Default)] +pub struct Parser { /// Infix parse function to run when a given token is presented during infix parsing. infix: HashMap, /// Dynamic tests to run on a token during infix parsing. These will be run after the infix map is checked. @@ -40,12 +38,7 @@ struct Parser { impl Parser { /// Create a new instance of the parser. pub fn new() -> Self { - Self { - infix: HashMap::new(), - infix_tests: Vec::new(), - prefix: HashMap::new(), - prefix_tests: Vec::new(), - } + Self::default() } /// Register a new prefix parser against a token. Will return `false` if the token has already