From 4108cb9d21b785c8e6dce2bb0425c111fd3fb33e Mon Sep 17 00:00:00 2001 From: tezlm Date: Tue, 9 May 2023 06:48:20 -0700 Subject: [PATCH] some random changes --- src/compiler.rs | 6 ++++ src/lexer.rs | 34 +++++++++++++--------- src/main.rs | 37 ++++++++++++++++++++---- src/parser.rs | 76 +++++++++++++++++++++++++++++++++++++++++++++++-- src/runner.rs | 4 +-- 5 files changed, 133 insertions(+), 24 deletions(-) create mode 100644 src/compiler.rs diff --git a/src/compiler.rs b/src/compiler.rs new file mode 100644 index 0000000..87ff14e --- /dev/null +++ b/src/compiler.rs @@ -0,0 +1,6 @@ +use crate::parser; +use crate::lexer; + +pub fn compile(tree: &parser::Tree) { + +} diff --git a/src/lexer.rs b/src/lexer.rs index e08215c..32cfdac 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,26 +1,32 @@ use once_cell::sync::Lazy; use regex::{Regex, RegexBuilder}; -#[derive(Debug, PartialEq, Clone, Copy)] +#[derive(Debug, PartialEq, Eq, Clone, Copy)] pub enum TokenType { - Space, Int, Float, String, Ident, Paran, Symb + Space, Int, Float, String, Ident, Paran, Symbol, Keyword, } +macro_rules! regex { + ($regex:expr) => { + RegexBuilder::new($regex).case_insensitive(true).build().unwrap() + } +} + +static TOKEN_MATCHERS: Lazy<[(TokenType, Regex); 9]> = once_cell::sync::Lazy::new(|| [ + (TokenType::Space, regex!(r"^[ \n\t]+")), + (TokenType::Int, regex!(r"^0(x[0-9a-f]+|b[01]+|o[0-7]+)")), + (TokenType::Float, regex!(r"^[0-9]+(\.[0-9]+)?")), + (TokenType::String, regex!(r#"^"(.*?[^\\])?""#)), + (TokenType::String, regex!(r"^'(.*?[^\\])?'")), + (TokenType::Ident, regex!(r"^[a-z_][a-z0-9_]*")), + (TokenType::Paran, regex!(r"^[\[\]\(\)\{\}\<\>]")), + (TokenType::Symbol, regex!(r"^[^ \n\ta-z0-9_\[\]\(\)]{1,2}")), + (TokenType::Keyword, regex!(r"^(let|mut|const|type|fn|if|else|match|for|in|while|loop|export|import|struct|enum|async)")), +]); + pub fn lex(input: &str) -> Vec<(TokenType, &str)> { let mut code = input.to_owned(); let mut tokens = Vec::new(); - static TOKEN_MATCHERS: Lazy<[(TokenType, Regex); 8]> = once_cell::sync::Lazy::new(|| { - [ - (TokenType::Space, RegexBuilder::new(r"^[ \n\t]+").case_insensitive(true).build().unwrap()), - (TokenType::Int, RegexBuilder::new(r"^0(x[0-9a-f]+|b[01]+|o[0-7]+)").case_insensitive(true).build().unwrap()), - (TokenType::Float, RegexBuilder::new(r"^[0-9]+(\.[0-9]+)?").case_insensitive(true).build().unwrap()), - (TokenType::String, RegexBuilder::new(r#"^"(.*?[^\\])?""#).case_insensitive(true).build().unwrap()), - (TokenType::String, RegexBuilder::new(r"^'(.*?[^\\])?'").case_insensitive(true).build().unwrap()), - (TokenType::Ident, RegexBuilder::new(r"^[a-z_][a-z0-9_]*").case_insensitive(true).build().unwrap()), - (TokenType::Paran, RegexBuilder::new(r"^[\[\]\(\)]").case_insensitive(true).build().unwrap()), - (TokenType::Symb, RegexBuilder::new(r"^[^ \n\ta-z0-9_\[\]\(\)]{1,2}").case_insensitive(true).build().unwrap()), - ] - }); let mut i = 0; while !code.is_empty() { for (token_type, regex) in TOKEN_MATCHERS.iter() { diff --git a/src/main.rs b/src/main.rs index 5d1034e..f5a8b7b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,9 +6,12 @@ mod lexer; mod parser; +mod compiler; mod runner; -fn test(code: &str) { +use runner::Value; + +fn test(code: &str) -> Value { let tokens = lexer::lex(code); let tokens: Vec<_> = tokens .iter() @@ -16,14 +19,11 @@ fn test(code: &str) { .collect(); let tree = parser::parse(&mut tokens.iter().peekable(), 0); let result = runner::expr(&tree); - dbg!(result); + dbg!(&result); + result } fn main() { - test("123"); - test("-123"); - test("2 + 3 * 4"); - test("(2 + 3) * 4"); // test("math.sin"); // test("math.sin(10 * math.pi)"); test("\"foo\" in [\"fooo\", \"bar\", \"baz\"]"); @@ -35,3 +35,28 @@ fn main() { // test("str.capitalize(\"hello world\")"); // test("\"apple\" in str.split(\"apple banana orange\") ? \"apple exists\" : \"apple doesn\'t exist\""); } + +#[cfg(test)] +mod tests { + use super::{Value, test}; + + #[test] + fn test_number() { + assert_eq!(test("123"), Value::Number(123.0)); + } + + #[test] + fn test_negate() { + assert_eq!(test("-123"), Value::Number(-123.0)); + } + + #[test] + fn test_expr() { + assert_eq!(test("2 + 3 * 4"), Value::Number(14.0)); + } + + #[test] + fn test_paran() { + assert_eq!(test("(2 + 3) * 4"), Value::Number(20.0)); + } +} diff --git a/src/parser.rs b/src/parser.rs index 874010e..fc40144 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,5 +1,10 @@ use crate::lexer; +#[derive(Debug)] +pub enum ParserError { + InvalidOperator(String), +} + #[derive(Debug)] pub enum Tree { Unary { op: String, term: Box }, @@ -11,6 +16,73 @@ pub enum Tree { Value { term: String, kind: lexer::TokenType }, } +#[derive(Debug)] +pub enum UnaryOpType { + Negative, Not, +} + +#[derive(Debug)] +pub enum BinaryOpType { + Dot, Subtract, Multiply, Power, Divide, Percent, Add, + Equal, NotEqual, GreaterOrEqual, Greater, LessOrEqual, Less, + In, And, Or, Comma, +} + +#[derive(Debug)] +pub enum TernaryOpType { + Switch +} + +impl TryFrom<&str> for UnaryOpType { + type Error = ParserError; + + fn try_from(value: &str) -> Result { + match value { + "-" => Ok(UnaryOpType::Negative), + "!" => Ok(UnaryOpType::Not), + _ => Err(ParserError::InvalidOperator(value.to_string())), + } + } +} + +impl TryFrom<&str> for BinaryOpType { + type Error = ParserError; + + fn try_from(value: &str) -> Result { + match value { + "." => Ok(BinaryOpType::Dot), + "-" => Ok(BinaryOpType::Subtract), + "*" => Ok(BinaryOpType::Multiply), + "**" => Ok(BinaryOpType::Power), + "/" => Ok(BinaryOpType::Divide), + "%" => Ok(BinaryOpType::Percent), + "+" => Ok(BinaryOpType::Add), + "==" => Ok(BinaryOpType::Equal), + "!=" => Ok(BinaryOpType::NotEqual), + ">=" => Ok(BinaryOpType::GreaterOrEqual), + ">" => Ok(BinaryOpType::Greater), + "<=" => Ok(BinaryOpType::LessOrEqual), + "<" => Ok(BinaryOpType::Less), + "in" => Ok(BinaryOpType::In), + "&&" => Ok(BinaryOpType::And), + "||" => Ok(BinaryOpType::Or), + "," => Ok(BinaryOpType::Comma), + _ => Err(ParserError::InvalidOperator(value.to_string())), + } + } +} + +impl TryFrom<&str> for TernaryOpType { + type Error = ParserError; + + fn try_from(value: &str) -> Result { + match value { + "?" => Ok(TernaryOpType::Switch), + _ => Err(ParserError::InvalidOperator(value.to_string())), + } + } +} + pub fn parse(tokens: &mut std::iter::Peekable>, min_bp: usize) -> Tree { let mut lhs = match tokens.next().expect("missing token") { token @ (_, "-") | token @ (_, "!") => Tree::Unary { op: token.1.to_owned(), term: Box::new(parse(tokens, 19)) }, @@ -44,7 +116,7 @@ pub fn parse(tokens: &mut std::iter::Peekable panic!("unexpected token {}", token.1), + token @ (lexer::TokenType::Symbol, _) => panic!("unexpected token {}", token.1), token @ (_, "in") => panic!("unexpected token {}", token.1), token @ (kind, _) => Tree::Value { term: token.1.to_owned(), kind: *kind }, }; @@ -53,7 +125,7 @@ pub fn parse(tokens: &mut std::iter::Peekable break, - (lexer::TokenType::Symb, _) | (lexer::TokenType::Paran, _) | (_, "in") => {}, + (lexer::TokenType::Symbol, _) | (lexer::TokenType::Paran, _) | (_, "in") => {}, (_, s) => panic!("unexpected token {}", s), }; let Some((left_bp, right_bp)) = get_bp(next.1) else { panic!("invalid symbol") }; diff --git a/src/runner.rs b/src/runner.rs index a512a2d..8104201 100644 --- a/src/runner.rs +++ b/src/runner.rs @@ -153,10 +153,10 @@ pub fn expr(tree: &parser::Tree) -> Value { } }, Tree::Array { terms } => { - if terms.len() == 0 { + if terms.is_empty() { Value::Array { kind: ValueType::Number, values: vec![] } } else { - let values: Vec<_> = terms.iter().map(|t| expr(t)).collect(); + let values: Vec<_> = terms.iter().map(expr).collect(); let kind = values[0].get_type(); Value::Array { kind, values } }