compile to bytecode
This commit is contained in:
parent
4108cb9d21
commit
bb17150b22
8 changed files with 301 additions and 25 deletions
10
ideas.md
Normal file
10
ideas.md
Normal file
|
@ -0,0 +1,10 @@
|
|||
# expr
|
||||
|
||||
i want a mix of rust and typescript to make a scripting language thats nice to use
|
||||
|
||||
goals:
|
||||
|
||||
- easy interop with rust
|
||||
- an extensive stdlib
|
||||
- support for fancy stuff like static typing and pattern matching
|
||||
- fast to write
|
3
spec.md
3
spec.md
|
@ -1,3 +0,0 @@
|
|||
# expr spec
|
||||
|
||||
TODO
|
172
src/compiler.rs
172
src/compiler.rs
|
@ -1,6 +1,172 @@
|
|||
use crate::parser;
|
||||
use crate::lexer;
|
||||
|
||||
pub fn compile(tree: &parser::Tree) {
|
||||
|
||||
pub struct Bytecode {
|
||||
pub code: Vec<Instruction>,
|
||||
pub data: Vec<Data>,
|
||||
}
|
||||
|
||||
type Register = u8;
|
||||
type Pointer = u8;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Instruction {
|
||||
BinaryOperator {
|
||||
operator: BinaryOperation,
|
||||
left: Register,
|
||||
right: Register,
|
||||
dest: Register,
|
||||
},
|
||||
UnaryOperator {
|
||||
operator: UnaryOperation,
|
||||
input: Register,
|
||||
dest: Register,
|
||||
},
|
||||
Load {
|
||||
input: Pointer,
|
||||
dest: Register,
|
||||
},
|
||||
Array {
|
||||
dest: Register,
|
||||
},
|
||||
ArrayAppend {
|
||||
source: Register,
|
||||
dest: Register,
|
||||
},
|
||||
Return {
|
||||
source: Register,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Data {
|
||||
Unsigned(u64),
|
||||
Integer(i64),
|
||||
Float(f64),
|
||||
String(String),
|
||||
Boolean(bool),
|
||||
Array(Vec<Data>),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum BinaryOperation {
|
||||
Add, Subtract, Multiply, Divide,
|
||||
In,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum UnaryOperation {
|
||||
Negate,
|
||||
}
|
||||
|
||||
pub struct Compiler {
|
||||
code: Vec<Instruction>,
|
||||
data: Vec<Data>,
|
||||
register: Register,
|
||||
}
|
||||
|
||||
impl Compiler {
|
||||
fn tree(&mut self, tree: &parser::Tree) -> Register {
|
||||
match tree {
|
||||
parser::Tree::Value { term, kind } => {
|
||||
use crate::lexer::TokenType;
|
||||
let data = match kind {
|
||||
TokenType::Float => {
|
||||
let float: f64 = term.parse().unwrap();
|
||||
Data::Float(float)
|
||||
},
|
||||
TokenType::String => {
|
||||
Data::String(term[1..term.len() - 1].to_string())
|
||||
},
|
||||
_ => todo!(),
|
||||
};
|
||||
let reg = self.next_register();
|
||||
let idx = self.push_data(data);
|
||||
self.code.push(Instruction::Load {
|
||||
input: idx,
|
||||
dest: reg,
|
||||
});
|
||||
reg
|
||||
},
|
||||
parser::Tree::Unary { op, term } => {
|
||||
let reg_input = self.tree(term);
|
||||
let reg_dest = self.next_register();
|
||||
let operator = match op.as_str() {
|
||||
"-" => UnaryOperation::Negate,
|
||||
_ => todo!(),
|
||||
};
|
||||
self.code.push(Instruction::UnaryOperator {
|
||||
operator,
|
||||
input: reg_input,
|
||||
dest: reg_dest,
|
||||
});
|
||||
reg_dest
|
||||
},
|
||||
parser::Tree::Binary { op, left, right } => {
|
||||
let reg_left = self.tree(left);
|
||||
let reg_right = self.tree(right);
|
||||
let reg_dest = self.next_register();
|
||||
let operator = match op.as_str() {
|
||||
"+" => BinaryOperation::Add,
|
||||
"-" => BinaryOperation::Subtract,
|
||||
"*" => BinaryOperation::Multiply,
|
||||
"/" => BinaryOperation::Divide,
|
||||
"in" => BinaryOperation::In,
|
||||
_ => todo!(),
|
||||
};
|
||||
self.code.push(Instruction::BinaryOperator {
|
||||
operator,
|
||||
left: reg_left,
|
||||
right: reg_right,
|
||||
dest: reg_dest,
|
||||
});
|
||||
reg_dest
|
||||
},
|
||||
parser::Tree::Array { terms } => {
|
||||
// TODO: 0 length arrays
|
||||
let reg = self.next_register();
|
||||
self.code.push(Instruction::Array { dest: reg });
|
||||
for term in terms {
|
||||
let reg_term = self.tree(term);
|
||||
self.code.push(Instruction::ArrayAppend {
|
||||
source: reg_term,
|
||||
dest: reg,
|
||||
});
|
||||
}
|
||||
reg
|
||||
},
|
||||
unknown => todo!("unknown token {:?}", unknown),
|
||||
}
|
||||
}
|
||||
|
||||
fn next_register(&mut self) -> Register {
|
||||
let r = self.register;
|
||||
self.register += 1;
|
||||
r
|
||||
}
|
||||
|
||||
fn push_data(&mut self, data: Data) -> u8 {
|
||||
let len = self.data.len();
|
||||
self.data.push(data);
|
||||
len as u8
|
||||
}
|
||||
}
|
||||
|
||||
pub fn expr(tree: &parser::Tree) -> Bytecode {
|
||||
let mut compiler = Compiler {
|
||||
code: Vec::new(),
|
||||
data: Vec::new(),
|
||||
register: 0,
|
||||
};
|
||||
let reg = compiler.tree(tree);
|
||||
let mut bytecode = Bytecode {
|
||||
code: compiler.code,
|
||||
data: compiler.data,
|
||||
};
|
||||
bytecode.code.push(Instruction::Return { source: reg });
|
||||
bytecode
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sizeof_instruction_is_4_bytes() {
|
||||
assert_eq!(std::mem::size_of::<Instruction>(), 4);
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ static TOKEN_MATCHERS: Lazy<[(TokenType, Regex); 9]> = once_cell::sync::Lazy::ne
|
|||
(TokenType::String, regex!(r#"^"(.*?[^\\])?""#)),
|
||||
(TokenType::String, regex!(r"^'(.*?[^\\])?'")),
|
||||
(TokenType::Ident, regex!(r"^[a-z_][a-z0-9_]*")),
|
||||
(TokenType::Paran, regex!(r"^[\[\]\(\)\{\}\<\>]")),
|
||||
(TokenType::Paran, regex!(r"^[\[\](){}<>]")),
|
||||
(TokenType::Symbol, regex!(r"^[^ \n\ta-z0-9_\[\]\(\)]{1,2}")),
|
||||
(TokenType::Keyword, regex!(r"^(let|mut|const|type|fn|if|else|match|for|in|while|loop|export|import|struct|enum|async)")),
|
||||
]);
|
||||
|
|
31
src/main.rs
31
src/main.rs
|
@ -1,9 +1,9 @@
|
|||
// TODO:
|
||||
// make "rusty" instead of directly ported from js
|
||||
// modularize into separate files?
|
||||
// do proper error handling instead of panics everywhere
|
||||
// find out how to use macros or reduce code size for `expr()`
|
||||
|
||||
#![allow(dead_code, unused)]
|
||||
|
||||
mod lexer;
|
||||
mod parser;
|
||||
mod compiler;
|
||||
|
@ -18,7 +18,8 @@ fn test(code: &str) -> Value {
|
|||
.filter(|(t, _)| *t != lexer::TokenType::Space)
|
||||
.collect();
|
||||
let tree = parser::parse(&mut tokens.iter().peekable(), 0);
|
||||
let result = runner::expr(&tree);
|
||||
let bytecode = compiler::expr(&tree);
|
||||
let result = runner::execute(&bytecode);
|
||||
dbg!(&result);
|
||||
result
|
||||
}
|
||||
|
@ -26,10 +27,8 @@ fn test(code: &str) -> Value {
|
|||
fn main() {
|
||||
// test("math.sin");
|
||||
// test("math.sin(10 * math.pi)");
|
||||
test("\"foo\" in [\"fooo\", \"bar\", \"baz\"]");
|
||||
test("\"foo\" == \"foo\" ? 1 : 4");
|
||||
test("false ? 3 : 4");
|
||||
test("\"hello\" + \", \" + \"world\"");
|
||||
// test("\"hello\".length");
|
||||
// test("[1, 2, 3].length");
|
||||
// test("str.capitalize(\"hello world\")");
|
||||
|
@ -59,4 +58,26 @@ mod tests {
|
|||
fn test_paran() {
|
||||
assert_eq!(test("(2 + 3) * 4"), Value::Number(20.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string() {
|
||||
assert_eq!(test("\"hello!\""), Value::String("hello!".to_owned()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_array_in() {
|
||||
assert_eq!(test("\"foo\" in [\"foo\", \"bar\", \"baz\"]"), Value::Boolean(true));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_array_not_in() {
|
||||
assert_eq!(test("\"foo\" in [\"fooo\", \"bar\", \"baz\"]"), Value::Boolean(false));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_add() {
|
||||
assert_eq!(test("\"hello\" + \", \" + \"world\""), Value::String("hello, world".to_owned()));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
use crate::lexer;
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ParserError {
|
||||
InvalidOperator(String),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Tree {
|
||||
Unary { op: String, term: Box<Tree> },
|
||||
Binary { op: String, left: Box<Tree>, right: Box<Tree> },
|
||||
|
@ -16,19 +16,19 @@ pub enum Tree {
|
|||
Value { term: String, kind: lexer::TokenType },
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum UnaryOpType {
|
||||
Negative, Not,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum BinaryOpType {
|
||||
Dot, Subtract, Multiply, Power, Divide, Percent, Add,
|
||||
Equal, NotEqual, GreaterOrEqual, Greater, LessOrEqual, Less,
|
||||
In, And, Or, Comma,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum TernaryOpType {
|
||||
Switch
|
||||
}
|
6
src/parser/mod.rs
Normal file
6
src/parser/mod.rs
Normal file
|
@ -0,0 +1,6 @@
|
|||
mod expr;
|
||||
|
||||
pub use expr::{
|
||||
UnaryOpType, BinaryOpType, TernaryOpType,
|
||||
parse, Tree, ParserError,
|
||||
};
|
|
@ -28,7 +28,7 @@ impl Value {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn expr(tree: &parser::Tree) -> Value {
|
||||
pub fn execute_old(tree: &parser::Tree) -> Value {
|
||||
use parser::Tree;
|
||||
match tree {
|
||||
Tree::Value { term, kind } => {
|
||||
|
@ -45,7 +45,7 @@ pub fn expr(tree: &parser::Tree) -> Value {
|
|||
}
|
||||
},
|
||||
Tree::Unary { op, term } => {
|
||||
let term = expr(term.as_ref());
|
||||
let term = execute_old(term.as_ref());
|
||||
match op.as_str() {
|
||||
"-" => match term {
|
||||
Value::Number(n) => Value::Number(-n),
|
||||
|
@ -59,8 +59,8 @@ pub fn expr(tree: &parser::Tree) -> Value {
|
|||
}
|
||||
},
|
||||
Tree::Binary { op, left, right } => {
|
||||
let left = expr(left.as_ref());
|
||||
let right = expr(right.as_ref());
|
||||
let left = execute_old(left.as_ref());
|
||||
let right = execute_old(right.as_ref());
|
||||
match op.as_str() {
|
||||
"+" => match (left, right) {
|
||||
(Value::Number(left), Value::Number(right)) => Value::Number(left + right),
|
||||
|
@ -137,16 +137,16 @@ pub fn expr(tree: &parser::Tree) -> Value {
|
|||
}
|
||||
},
|
||||
Tree::Ternary { op, left, middle, right } => {
|
||||
let left = expr(left.as_ref());
|
||||
let left = execute_old(left.as_ref());
|
||||
match op.as_str() {
|
||||
"?" => {
|
||||
let Value::Boolean(cond) = left else {
|
||||
panic!("cannot use non boolean as boolean");
|
||||
};
|
||||
if cond {
|
||||
expr(middle.as_ref())
|
||||
execute_old(middle.as_ref())
|
||||
} else {
|
||||
expr(right.as_ref())
|
||||
execute_old(right.as_ref())
|
||||
}
|
||||
},
|
||||
_ => unreachable!(),
|
||||
|
@ -156,7 +156,7 @@ pub fn expr(tree: &parser::Tree) -> Value {
|
|||
if terms.is_empty() {
|
||||
Value::Array { kind: ValueType::Number, values: vec![] }
|
||||
} else {
|
||||
let values: Vec<_> = terms.iter().map(expr).collect();
|
||||
let values: Vec<_> = terms.iter().map(execute_old).collect();
|
||||
let kind = values[0].get_type();
|
||||
Value::Array { kind, values }
|
||||
}
|
||||
|
@ -165,3 +165,79 @@ pub fn expr(tree: &parser::Tree) -> Value {
|
|||
idk => panic!("idk: {:?}", idk),
|
||||
}
|
||||
}
|
||||
|
||||
use super::compiler;
|
||||
|
||||
pub fn execute(bytecode: &compiler::Bytecode) -> Value {
|
||||
use compiler::Data;
|
||||
// let mut data: [f64; 256] = [0.0; 256];
|
||||
let mut data: [Data; 8] = [0; 8].map(|_| Data::Float(0.0));
|
||||
for instruction in &bytecode.code {
|
||||
use compiler::Instruction::*;
|
||||
match instruction {
|
||||
BinaryOperator { operator, left, right, dest } => {
|
||||
use compiler::BinaryOperation::*;
|
||||
match data[*left as usize] {
|
||||
Data::Float(left) => {
|
||||
let Data::Float(right) = data[*right as usize] else { panic!("invalid right hand side") };
|
||||
match operator {
|
||||
Add => data[*dest as usize] = Data::Float(left + right),
|
||||
Subtract => data[*dest as usize] = Data::Float(left - right),
|
||||
Multiply => data[*dest as usize] = Data::Float(left * right),
|
||||
Divide => data[*dest as usize] = Data::Float(left / right),
|
||||
_ => todo!(),
|
||||
}
|
||||
},
|
||||
ref d @ Data::String(ref s) => {
|
||||
match data[*right as usize] {
|
||||
Data::String(ref s2) => {
|
||||
match operator {
|
||||
Add => data[*dest as usize] = Data::String(s.clone() + &s2),
|
||||
_ => todo!(),
|
||||
}
|
||||
},
|
||||
Data::Array(ref arr) => {
|
||||
match operator {
|
||||
In => data[*dest as usize] = Data::Boolean(arr.contains(d)),
|
||||
_ => todo!(),
|
||||
}
|
||||
},
|
||||
_ => todo!(),
|
||||
}
|
||||
},
|
||||
_ => todo!(),
|
||||
}
|
||||
},
|
||||
UnaryOperator { operator, input, dest } => {
|
||||
use compiler::UnaryOperation::*;
|
||||
let Data::Float(input) = data[*input as usize] else { panic!("not a float") };
|
||||
match operator {
|
||||
Negate => data[*dest as usize] = Data::Float(-input),
|
||||
_ => todo!(),
|
||||
}
|
||||
},
|
||||
Load { input, dest } => {
|
||||
data[*dest as usize] = bytecode.data[*input as usize].clone();
|
||||
},
|
||||
Return { source } => {
|
||||
dbg!(source);
|
||||
// dbg!(&d);
|
||||
return match data[*source as usize].clone() {
|
||||
Data::Float(f) => Value::Number(f),
|
||||
Data::String(s) => Value::String(s),
|
||||
Data::Boolean(b) => Value::Boolean(b),
|
||||
_ => todo!(),
|
||||
};
|
||||
},
|
||||
Array { dest } => {
|
||||
data[*dest as usize] = Data::Array(Vec::new())
|
||||
},
|
||||
ArrayAppend { source, dest } => {
|
||||
let item = data[*source as usize].clone();
|
||||
let Data::Array(ref mut arr) = data[*dest as usize] else { panic!("not an array") };
|
||||
arr.push(item);
|
||||
},
|
||||
}
|
||||
}
|
||||
Value::Number(10.0)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue