From bb17150b220a9b52a31684b3c781f1baa2c4d2c1 Mon Sep 17 00:00:00 2001 From: tezlm Date: Tue, 9 May 2023 13:05:54 -0700 Subject: [PATCH] compile to bytecode --- ideas.md | 10 ++ spec.md | 3 - src/compiler.rs | 172 +++++++++++++++++++++++++++++- src/lexer.rs | 2 +- src/main.rs | 31 +++++- src/{parser.rs => parser/expr.rs} | 10 +- src/parser/mod.rs | 6 ++ src/runner.rs | 92 ++++++++++++++-- 8 files changed, 301 insertions(+), 25 deletions(-) create mode 100644 ideas.md delete mode 100644 spec.md rename src/{parser.rs => parser/expr.rs} (97%) create mode 100644 src/parser/mod.rs diff --git a/ideas.md b/ideas.md new file mode 100644 index 0000000..1ee1b71 --- /dev/null +++ b/ideas.md @@ -0,0 +1,10 @@ +# expr + +i want a mix of rust and typescript to make a scripting language thats nice to use + +goals: + +- easy interop with rust +- an extensive stdlib +- support for fancy stuff like static typing and pattern matching +- fast to write diff --git a/spec.md b/spec.md deleted file mode 100644 index 1a73319..0000000 --- a/spec.md +++ /dev/null @@ -1,3 +0,0 @@ -# expr spec - -TODO diff --git a/src/compiler.rs b/src/compiler.rs index 87ff14e..67846d6 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -1,6 +1,172 @@ use crate::parser; -use crate::lexer; -pub fn compile(tree: &parser::Tree) { - +pub struct Bytecode { + pub code: Vec, + pub data: Vec, +} + +type Register = u8; +type Pointer = u8; + +#[derive(Debug)] +pub enum Instruction { + BinaryOperator { + operator: BinaryOperation, + left: Register, + right: Register, + dest: Register, + }, + UnaryOperator { + operator: UnaryOperation, + input: Register, + dest: Register, + }, + Load { + input: Pointer, + dest: Register, + }, + Array { + dest: Register, + }, + ArrayAppend { + source: Register, + dest: Register, + }, + Return { + source: Register, + }, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Data { + Unsigned(u64), + Integer(i64), + Float(f64), + String(String), + Boolean(bool), + Array(Vec), +} + +#[derive(Debug)] +pub enum BinaryOperation { + Add, Subtract, Multiply, Divide, + In, +} + +#[derive(Debug)] +pub enum UnaryOperation { + Negate, +} + +pub struct Compiler { + code: Vec, + data: Vec, + register: Register, +} + +impl Compiler { + fn tree(&mut self, tree: &parser::Tree) -> Register { + match tree { + parser::Tree::Value { term, kind } => { + use crate::lexer::TokenType; + let data = match kind { + TokenType::Float => { + let float: f64 = term.parse().unwrap(); + Data::Float(float) + }, + TokenType::String => { + Data::String(term[1..term.len() - 1].to_string()) + }, + _ => todo!(), + }; + let reg = self.next_register(); + let idx = self.push_data(data); + self.code.push(Instruction::Load { + input: idx, + dest: reg, + }); + reg + }, + parser::Tree::Unary { op, term } => { + let reg_input = self.tree(term); + let reg_dest = self.next_register(); + let operator = match op.as_str() { + "-" => UnaryOperation::Negate, + _ => todo!(), + }; + self.code.push(Instruction::UnaryOperator { + operator, + input: reg_input, + dest: reg_dest, + }); + reg_dest + }, + parser::Tree::Binary { op, left, right } => { + let reg_left = self.tree(left); + let reg_right = self.tree(right); + let reg_dest = self.next_register(); + let operator = match op.as_str() { + "+" => BinaryOperation::Add, + "-" => BinaryOperation::Subtract, + "*" => BinaryOperation::Multiply, + "/" => BinaryOperation::Divide, + "in" => BinaryOperation::In, + _ => todo!(), + }; + self.code.push(Instruction::BinaryOperator { + operator, + left: reg_left, + right: reg_right, + dest: reg_dest, + }); + reg_dest + }, + parser::Tree::Array { terms } => { + // TODO: 0 length arrays + let reg = self.next_register(); + self.code.push(Instruction::Array { dest: reg }); + for term in terms { + let reg_term = self.tree(term); + self.code.push(Instruction::ArrayAppend { + source: reg_term, + dest: reg, + }); + } + reg + }, + unknown => todo!("unknown token {:?}", unknown), + } + } + + fn next_register(&mut self) -> Register { + let r = self.register; + self.register += 1; + r + } + + fn push_data(&mut self, data: Data) -> u8 { + let len = self.data.len(); + self.data.push(data); + len as u8 + } +} + +pub fn expr(tree: &parser::Tree) -> Bytecode { + let mut compiler = Compiler { + code: Vec::new(), + data: Vec::new(), + register: 0, + }; + let reg = compiler.tree(tree); + let mut bytecode = Bytecode { + code: compiler.code, + data: compiler.data, + }; + bytecode.code.push(Instruction::Return { source: reg }); + bytecode +} + +#[test] +fn sizeof_instruction_is_4_bytes() { + assert_eq!(std::mem::size_of::(), 4); } diff --git a/src/lexer.rs b/src/lexer.rs index 32cfdac..b58522d 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -19,7 +19,7 @@ static TOKEN_MATCHERS: Lazy<[(TokenType, Regex); 9]> = once_cell::sync::Lazy::ne (TokenType::String, regex!(r#"^"(.*?[^\\])?""#)), (TokenType::String, regex!(r"^'(.*?[^\\])?'")), (TokenType::Ident, regex!(r"^[a-z_][a-z0-9_]*")), - (TokenType::Paran, regex!(r"^[\[\]\(\)\{\}\<\>]")), + (TokenType::Paran, regex!(r"^[\[\](){}<>]")), (TokenType::Symbol, regex!(r"^[^ \n\ta-z0-9_\[\]\(\)]{1,2}")), (TokenType::Keyword, regex!(r"^(let|mut|const|type|fn|if|else|match|for|in|while|loop|export|import|struct|enum|async)")), ]); diff --git a/src/main.rs b/src/main.rs index f5a8b7b..ecd8ad0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,9 @@ // TODO: -// make "rusty" instead of directly ported from js -// modularize into separate files? // do proper error handling instead of panics everywhere // find out how to use macros or reduce code size for `expr()` +#![allow(dead_code, unused)] + mod lexer; mod parser; mod compiler; @@ -18,7 +18,8 @@ fn test(code: &str) -> Value { .filter(|(t, _)| *t != lexer::TokenType::Space) .collect(); let tree = parser::parse(&mut tokens.iter().peekable(), 0); - let result = runner::expr(&tree); + let bytecode = compiler::expr(&tree); + let result = runner::execute(&bytecode); dbg!(&result); result } @@ -26,10 +27,8 @@ fn test(code: &str) -> Value { fn main() { // test("math.sin"); // test("math.sin(10 * math.pi)"); - test("\"foo\" in [\"fooo\", \"bar\", \"baz\"]"); test("\"foo\" == \"foo\" ? 1 : 4"); test("false ? 3 : 4"); - test("\"hello\" + \", \" + \"world\""); // test("\"hello\".length"); // test("[1, 2, 3].length"); // test("str.capitalize(\"hello world\")"); @@ -59,4 +58,26 @@ mod tests { fn test_paran() { assert_eq!(test("(2 + 3) * 4"), Value::Number(20.0)); } + + #[test] + fn test_string() { + assert_eq!(test("\"hello!\""), Value::String("hello!".to_owned())); + } + + #[test] + fn test_array_in() { + assert_eq!(test("\"foo\" in [\"foo\", \"bar\", \"baz\"]"), Value::Boolean(true)); + } + + #[test] + fn test_array_not_in() { + assert_eq!(test("\"foo\" in [\"fooo\", \"bar\", \"baz\"]"), Value::Boolean(false)); + } + + #[test] + fn test_string_add() { + assert_eq!(test("\"hello\" + \", \" + \"world\""), Value::String("hello, world".to_owned())); + } + + } diff --git a/src/parser.rs b/src/parser/expr.rs similarity index 97% rename from src/parser.rs rename to src/parser/expr.rs index fc40144..000a8bc 100644 --- a/src/parser.rs +++ b/src/parser/expr.rs @@ -1,11 +1,11 @@ use crate::lexer; -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum ParserError { InvalidOperator(String), } -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum Tree { Unary { op: String, term: Box }, Binary { op: String, left: Box, right: Box }, @@ -16,19 +16,19 @@ pub enum Tree { Value { term: String, kind: lexer::TokenType }, } -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum UnaryOpType { Negative, Not, } -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum BinaryOpType { Dot, Subtract, Multiply, Power, Divide, Percent, Add, Equal, NotEqual, GreaterOrEqual, Greater, LessOrEqual, Less, In, And, Or, Comma, } -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq)] pub enum TernaryOpType { Switch } diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..fec7ae9 --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,6 @@ +mod expr; + +pub use expr::{ + UnaryOpType, BinaryOpType, TernaryOpType, + parse, Tree, ParserError, +}; diff --git a/src/runner.rs b/src/runner.rs index 8104201..b442ed4 100644 --- a/src/runner.rs +++ b/src/runner.rs @@ -28,7 +28,7 @@ impl Value { } } -pub fn expr(tree: &parser::Tree) -> Value { +pub fn execute_old(tree: &parser::Tree) -> Value { use parser::Tree; match tree { Tree::Value { term, kind } => { @@ -45,7 +45,7 @@ pub fn expr(tree: &parser::Tree) -> Value { } }, Tree::Unary { op, term } => { - let term = expr(term.as_ref()); + let term = execute_old(term.as_ref()); match op.as_str() { "-" => match term { Value::Number(n) => Value::Number(-n), @@ -59,8 +59,8 @@ pub fn expr(tree: &parser::Tree) -> Value { } }, Tree::Binary { op, left, right } => { - let left = expr(left.as_ref()); - let right = expr(right.as_ref()); + let left = execute_old(left.as_ref()); + let right = execute_old(right.as_ref()); match op.as_str() { "+" => match (left, right) { (Value::Number(left), Value::Number(right)) => Value::Number(left + right), @@ -137,16 +137,16 @@ pub fn expr(tree: &parser::Tree) -> Value { } }, Tree::Ternary { op, left, middle, right } => { - let left = expr(left.as_ref()); + let left = execute_old(left.as_ref()); match op.as_str() { "?" => { let Value::Boolean(cond) = left else { panic!("cannot use non boolean as boolean"); }; if cond { - expr(middle.as_ref()) + execute_old(middle.as_ref()) } else { - expr(right.as_ref()) + execute_old(right.as_ref()) } }, _ => unreachable!(), @@ -156,7 +156,7 @@ pub fn expr(tree: &parser::Tree) -> Value { if terms.is_empty() { Value::Array { kind: ValueType::Number, values: vec![] } } else { - let values: Vec<_> = terms.iter().map(expr).collect(); + let values: Vec<_> = terms.iter().map(execute_old).collect(); let kind = values[0].get_type(); Value::Array { kind, values } } @@ -165,3 +165,79 @@ pub fn expr(tree: &parser::Tree) -> Value { idk => panic!("idk: {:?}", idk), } } + +use super::compiler; + +pub fn execute(bytecode: &compiler::Bytecode) -> Value { + use compiler::Data; + // let mut data: [f64; 256] = [0.0; 256]; + let mut data: [Data; 8] = [0; 8].map(|_| Data::Float(0.0)); + for instruction in &bytecode.code { + use compiler::Instruction::*; + match instruction { + BinaryOperator { operator, left, right, dest } => { + use compiler::BinaryOperation::*; + match data[*left as usize] { + Data::Float(left) => { + let Data::Float(right) = data[*right as usize] else { panic!("invalid right hand side") }; + match operator { + Add => data[*dest as usize] = Data::Float(left + right), + Subtract => data[*dest as usize] = Data::Float(left - right), + Multiply => data[*dest as usize] = Data::Float(left * right), + Divide => data[*dest as usize] = Data::Float(left / right), + _ => todo!(), + } + }, + ref d @ Data::String(ref s) => { + match data[*right as usize] { + Data::String(ref s2) => { + match operator { + Add => data[*dest as usize] = Data::String(s.clone() + &s2), + _ => todo!(), + } + }, + Data::Array(ref arr) => { + match operator { + In => data[*dest as usize] = Data::Boolean(arr.contains(d)), + _ => todo!(), + } + }, + _ => todo!(), + } + }, + _ => todo!(), + } + }, + UnaryOperator { operator, input, dest } => { + use compiler::UnaryOperation::*; + let Data::Float(input) = data[*input as usize] else { panic!("not a float") }; + match operator { + Negate => data[*dest as usize] = Data::Float(-input), + _ => todo!(), + } + }, + Load { input, dest } => { + data[*dest as usize] = bytecode.data[*input as usize].clone(); + }, + Return { source } => { + dbg!(source); + // dbg!(&d); + return match data[*source as usize].clone() { + Data::Float(f) => Value::Number(f), + Data::String(s) => Value::String(s), + Data::Boolean(b) => Value::Boolean(b), + _ => todo!(), + }; + }, + Array { dest } => { + data[*dest as usize] = Data::Array(Vec::new()) + }, + ArrayAppend { source, dest } => { + let item = data[*source as usize].clone(); + let Data::Array(ref mut arr) = data[*dest as usize] else { panic!("not an array") }; + arr.push(item); + }, + } + } + Value::Number(10.0) +}