compile to bytecode

This commit is contained in:
tezlm 2023-05-09 13:05:54 -07:00
parent 4108cb9d21
commit bb17150b22
Signed by: tezlm
GPG key ID: 649733FCD94AFBBA
8 changed files with 301 additions and 25 deletions

10
ideas.md Normal file
View file

@ -0,0 +1,10 @@
# expr
i want a mix of rust and typescript to make a scripting language thats nice to use
goals:
- easy interop with rust
- an extensive stdlib
- support for fancy stuff like static typing and pattern matching
- fast to write

View file

@ -1,3 +0,0 @@
# expr spec
TODO

View file

@ -1,6 +1,172 @@
use crate::parser;
use crate::lexer;
pub fn compile(tree: &parser::Tree) {
pub struct Bytecode {
pub code: Vec<Instruction>,
pub data: Vec<Data>,
}
type Register = u8;
type Pointer = u8;
#[derive(Debug)]
pub enum Instruction {
BinaryOperator {
operator: BinaryOperation,
left: Register,
right: Register,
dest: Register,
},
UnaryOperator {
operator: UnaryOperation,
input: Register,
dest: Register,
},
Load {
input: Pointer,
dest: Register,
},
Array {
dest: Register,
},
ArrayAppend {
source: Register,
dest: Register,
},
Return {
source: Register,
},
}
#[derive(Debug, Clone, PartialEq)]
pub enum Data {
Unsigned(u64),
Integer(i64),
Float(f64),
String(String),
Boolean(bool),
Array(Vec<Data>),
}
#[derive(Debug)]
pub enum BinaryOperation {
Add, Subtract, Multiply, Divide,
In,
}
#[derive(Debug)]
pub enum UnaryOperation {
Negate,
}
pub struct Compiler {
code: Vec<Instruction>,
data: Vec<Data>,
register: Register,
}
impl Compiler {
fn tree(&mut self, tree: &parser::Tree) -> Register {
match tree {
parser::Tree::Value { term, kind } => {
use crate::lexer::TokenType;
let data = match kind {
TokenType::Float => {
let float: f64 = term.parse().unwrap();
Data::Float(float)
},
TokenType::String => {
Data::String(term[1..term.len() - 1].to_string())
},
_ => todo!(),
};
let reg = self.next_register();
let idx = self.push_data(data);
self.code.push(Instruction::Load {
input: idx,
dest: reg,
});
reg
},
parser::Tree::Unary { op, term } => {
let reg_input = self.tree(term);
let reg_dest = self.next_register();
let operator = match op.as_str() {
"-" => UnaryOperation::Negate,
_ => todo!(),
};
self.code.push(Instruction::UnaryOperator {
operator,
input: reg_input,
dest: reg_dest,
});
reg_dest
},
parser::Tree::Binary { op, left, right } => {
let reg_left = self.tree(left);
let reg_right = self.tree(right);
let reg_dest = self.next_register();
let operator = match op.as_str() {
"+" => BinaryOperation::Add,
"-" => BinaryOperation::Subtract,
"*" => BinaryOperation::Multiply,
"/" => BinaryOperation::Divide,
"in" => BinaryOperation::In,
_ => todo!(),
};
self.code.push(Instruction::BinaryOperator {
operator,
left: reg_left,
right: reg_right,
dest: reg_dest,
});
reg_dest
},
parser::Tree::Array { terms } => {
// TODO: 0 length arrays
let reg = self.next_register();
self.code.push(Instruction::Array { dest: reg });
for term in terms {
let reg_term = self.tree(term);
self.code.push(Instruction::ArrayAppend {
source: reg_term,
dest: reg,
});
}
reg
},
unknown => todo!("unknown token {:?}", unknown),
}
}
fn next_register(&mut self) -> Register {
let r = self.register;
self.register += 1;
r
}
fn push_data(&mut self, data: Data) -> u8 {
let len = self.data.len();
self.data.push(data);
len as u8
}
}
pub fn expr(tree: &parser::Tree) -> Bytecode {
let mut compiler = Compiler {
code: Vec::new(),
data: Vec::new(),
register: 0,
};
let reg = compiler.tree(tree);
let mut bytecode = Bytecode {
code: compiler.code,
data: compiler.data,
};
bytecode.code.push(Instruction::Return { source: reg });
bytecode
}
#[test]
fn sizeof_instruction_is_4_bytes() {
assert_eq!(std::mem::size_of::<Instruction>(), 4);
}

View file

@ -19,7 +19,7 @@ static TOKEN_MATCHERS: Lazy<[(TokenType, Regex); 9]> = once_cell::sync::Lazy::ne
(TokenType::String, regex!(r#"^"(.*?[^\\])?""#)),
(TokenType::String, regex!(r"^'(.*?[^\\])?'")),
(TokenType::Ident, regex!(r"^[a-z_][a-z0-9_]*")),
(TokenType::Paran, regex!(r"^[\[\]\(\)\{\}\<\>]")),
(TokenType::Paran, regex!(r"^[\[\](){}<>]")),
(TokenType::Symbol, regex!(r"^[^ \n\ta-z0-9_\[\]\(\)]{1,2}")),
(TokenType::Keyword, regex!(r"^(let|mut|const|type|fn|if|else|match|for|in|while|loop|export|import|struct|enum|async)")),
]);

View file

@ -1,9 +1,9 @@
// TODO:
// make "rusty" instead of directly ported from js
// modularize into separate files?
// do proper error handling instead of panics everywhere
// find out how to use macros or reduce code size for `expr()`
#![allow(dead_code, unused)]
mod lexer;
mod parser;
mod compiler;
@ -18,7 +18,8 @@ fn test(code: &str) -> Value {
.filter(|(t, _)| *t != lexer::TokenType::Space)
.collect();
let tree = parser::parse(&mut tokens.iter().peekable(), 0);
let result = runner::expr(&tree);
let bytecode = compiler::expr(&tree);
let result = runner::execute(&bytecode);
dbg!(&result);
result
}
@ -26,10 +27,8 @@ fn test(code: &str) -> Value {
fn main() {
// test("math.sin");
// test("math.sin(10 * math.pi)");
test("\"foo\" in [\"fooo\", \"bar\", \"baz\"]");
test("\"foo\" == \"foo\" ? 1 : 4");
test("false ? 3 : 4");
test("\"hello\" + \", \" + \"world\"");
// test("\"hello\".length");
// test("[1, 2, 3].length");
// test("str.capitalize(\"hello world\")");
@ -59,4 +58,26 @@ mod tests {
fn test_paran() {
assert_eq!(test("(2 + 3) * 4"), Value::Number(20.0));
}
#[test]
fn test_string() {
assert_eq!(test("\"hello!\""), Value::String("hello!".to_owned()));
}
#[test]
fn test_array_in() {
assert_eq!(test("\"foo\" in [\"foo\", \"bar\", \"baz\"]"), Value::Boolean(true));
}
#[test]
fn test_array_not_in() {
assert_eq!(test("\"foo\" in [\"fooo\", \"bar\", \"baz\"]"), Value::Boolean(false));
}
#[test]
fn test_string_add() {
assert_eq!(test("\"hello\" + \", \" + \"world\""), Value::String("hello, world".to_owned()));
}
}

View file

@ -1,11 +1,11 @@
use crate::lexer;
#[derive(Debug)]
#[derive(Debug, Clone)]
pub enum ParserError {
InvalidOperator(String),
}
#[derive(Debug)]
#[derive(Debug, Clone)]
pub enum Tree {
Unary { op: String, term: Box<Tree> },
Binary { op: String, left: Box<Tree>, right: Box<Tree> },
@ -16,19 +16,19 @@ pub enum Tree {
Value { term: String, kind: lexer::TokenType },
}
#[derive(Debug)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum UnaryOpType {
Negative, Not,
}
#[derive(Debug)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum BinaryOpType {
Dot, Subtract, Multiply, Power, Divide, Percent, Add,
Equal, NotEqual, GreaterOrEqual, Greater, LessOrEqual, Less,
In, And, Or, Comma,
}
#[derive(Debug)]
#[derive(Debug, Clone, PartialEq)]
pub enum TernaryOpType {
Switch
}

6
src/parser/mod.rs Normal file
View file

@ -0,0 +1,6 @@
mod expr;
pub use expr::{
UnaryOpType, BinaryOpType, TernaryOpType,
parse, Tree, ParserError,
};

View file

@ -28,7 +28,7 @@ impl Value {
}
}
pub fn expr(tree: &parser::Tree) -> Value {
pub fn execute_old(tree: &parser::Tree) -> Value {
use parser::Tree;
match tree {
Tree::Value { term, kind } => {
@ -45,7 +45,7 @@ pub fn expr(tree: &parser::Tree) -> Value {
}
},
Tree::Unary { op, term } => {
let term = expr(term.as_ref());
let term = execute_old(term.as_ref());
match op.as_str() {
"-" => match term {
Value::Number(n) => Value::Number(-n),
@ -59,8 +59,8 @@ pub fn expr(tree: &parser::Tree) -> Value {
}
},
Tree::Binary { op, left, right } => {
let left = expr(left.as_ref());
let right = expr(right.as_ref());
let left = execute_old(left.as_ref());
let right = execute_old(right.as_ref());
match op.as_str() {
"+" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Number(left + right),
@ -137,16 +137,16 @@ pub fn expr(tree: &parser::Tree) -> Value {
}
},
Tree::Ternary { op, left, middle, right } => {
let left = expr(left.as_ref());
let left = execute_old(left.as_ref());
match op.as_str() {
"?" => {
let Value::Boolean(cond) = left else {
panic!("cannot use non boolean as boolean");
};
if cond {
expr(middle.as_ref())
execute_old(middle.as_ref())
} else {
expr(right.as_ref())
execute_old(right.as_ref())
}
},
_ => unreachable!(),
@ -156,7 +156,7 @@ pub fn expr(tree: &parser::Tree) -> Value {
if terms.is_empty() {
Value::Array { kind: ValueType::Number, values: vec![] }
} else {
let values: Vec<_> = terms.iter().map(expr).collect();
let values: Vec<_> = terms.iter().map(execute_old).collect();
let kind = values[0].get_type();
Value::Array { kind, values }
}
@ -165,3 +165,79 @@ pub fn expr(tree: &parser::Tree) -> Value {
idk => panic!("idk: {:?}", idk),
}
}
use super::compiler;
pub fn execute(bytecode: &compiler::Bytecode) -> Value {
use compiler::Data;
// let mut data: [f64; 256] = [0.0; 256];
let mut data: [Data; 8] = [0; 8].map(|_| Data::Float(0.0));
for instruction in &bytecode.code {
use compiler::Instruction::*;
match instruction {
BinaryOperator { operator, left, right, dest } => {
use compiler::BinaryOperation::*;
match data[*left as usize] {
Data::Float(left) => {
let Data::Float(right) = data[*right as usize] else { panic!("invalid right hand side") };
match operator {
Add => data[*dest as usize] = Data::Float(left + right),
Subtract => data[*dest as usize] = Data::Float(left - right),
Multiply => data[*dest as usize] = Data::Float(left * right),
Divide => data[*dest as usize] = Data::Float(left / right),
_ => todo!(),
}
},
ref d @ Data::String(ref s) => {
match data[*right as usize] {
Data::String(ref s2) => {
match operator {
Add => data[*dest as usize] = Data::String(s.clone() + &s2),
_ => todo!(),
}
},
Data::Array(ref arr) => {
match operator {
In => data[*dest as usize] = Data::Boolean(arr.contains(d)),
_ => todo!(),
}
},
_ => todo!(),
}
},
_ => todo!(),
}
},
UnaryOperator { operator, input, dest } => {
use compiler::UnaryOperation::*;
let Data::Float(input) = data[*input as usize] else { panic!("not a float") };
match operator {
Negate => data[*dest as usize] = Data::Float(-input),
_ => todo!(),
}
},
Load { input, dest } => {
data[*dest as usize] = bytecode.data[*input as usize].clone();
},
Return { source } => {
dbg!(source);
// dbg!(&d);
return match data[*source as usize].clone() {
Data::Float(f) => Value::Number(f),
Data::String(s) => Value::String(s),
Data::Boolean(b) => Value::Boolean(b),
_ => todo!(),
};
},
Array { dest } => {
data[*dest as usize] = Data::Array(Vec::new())
},
ArrayAppend { source, dest } => {
let item = data[*source as usize].clone();
let Data::Array(ref mut arr) = data[*dest as usize] else { panic!("not an array") };
arr.push(item);
},
}
}
Value::Number(10.0)
}