arrays; split into separate files

This commit is contained in:
tezlm 2023-02-24 12:22:42 -08:00
parent 43bfff002f
commit 27587a5e3f
Signed by: tezlm
GPG key ID: 649733FCD94AFBBA
4 changed files with 356 additions and 311 deletions

40
src/lexer.rs Normal file
View file

@ -0,0 +1,40 @@
use once_cell::sync::Lazy;
use regex::{Regex, RegexBuilder};
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum TokenType {
Space, Int, Float, String, Ident, Paran, Symb
}
pub fn lex(input: &str) -> Vec<(TokenType, &str)> {
let mut code = input.to_owned();
let mut tokens = Vec::new();
static TOKEN_MATCHERS: Lazy<[(TokenType, Regex); 8]> = once_cell::sync::Lazy::new(|| {
[
(TokenType::Space, RegexBuilder::new(r"^[ \n\t]+").case_insensitive(true).build().unwrap()),
(TokenType::Int, RegexBuilder::new(r"^0(x[0-9a-f]+|b[01]+|o[0-7]+)").case_insensitive(true).build().unwrap()),
(TokenType::Float, RegexBuilder::new(r"^[0-9]+(\.[0-9]+)?").case_insensitive(true).build().unwrap()),
(TokenType::String, RegexBuilder::new(r#"^"(.*?[^\\])?""#).case_insensitive(true).build().unwrap()),
(TokenType::String, RegexBuilder::new(r"^'(.*?[^\\])?'").case_insensitive(true).build().unwrap()),
(TokenType::Ident, RegexBuilder::new(r"^[a-z_][a-z0-9_]*").case_insensitive(true).build().unwrap()),
(TokenType::Paran, RegexBuilder::new(r"^[\[\]\(\)]").case_insensitive(true).build().unwrap()),
(TokenType::Symb, RegexBuilder::new(r"^[^ \n\ta-z0-9_\[\]\(\)]{1,2}").case_insensitive(true).build().unwrap()),
]
});
let mut i = 0;
while !code.is_empty() {
for (token_type, regex) in TOKEN_MATCHERS.iter() {
let found = regex.find(&code);
let Some(mat) = found else {
continue;
};
let end = mat.end();
tokens.push((*token_type, &input[i..i + end]));
code = code[end..].to_owned();
i += end;
break;
}
}
tokens
}

View file

@ -1,331 +1,35 @@
#![allow(dead_code)]
// TODO:
// make "rusty" instead of directly ported from js
// use enums instead of strings!
// modularize into separate files?
// do proper error handling instead of panics everywhere
// find out how to use macros or reduce code size for `expr()`
use once_cell::sync::Lazy;
use regex::{Regex, RegexBuilder};
fn lex(input: &str) -> Vec<(String, &str)> {
let mut code = input.to_owned();
let mut tokens = Vec::new();
static TOKEN_MATCHERS: Lazy<[(String, Regex); 8]> = once_cell::sync::Lazy::new(|| {
[
("space".into(), RegexBuilder::new(r"^[ \n\t]+").case_insensitive(true).build().unwrap()),
("int".into(), RegexBuilder::new(r"^0(x[0-9a-f]+|b[01]+|o[0-7]+)").case_insensitive(true).build().unwrap()),
("float".into(), RegexBuilder::new(r"^[0-9]+(\.[0-9]+)?").case_insensitive(true).build().unwrap()),
("str".into(), RegexBuilder::new(r#"^"(.*?[^\\])?""#).case_insensitive(true).build().unwrap()),
("str".into(), RegexBuilder::new(r"^'(.*?[^\\])?'").case_insensitive(true).build().unwrap()),
("ident".into(), RegexBuilder::new(r"^[a-z_][a-z0-9_]*").case_insensitive(true).build().unwrap()),
("paran".into(), RegexBuilder::new(r"^[\[\]\(\)]").case_insensitive(true).build().unwrap()),
("symb".into(), RegexBuilder::new(r"^[^ \n\ta-z0-9_\[\]\(\)]{1,2}").case_insensitive(true).build().unwrap()),
]
});
let mut i = 0;
while !code.is_empty() {
for (name, reg) in TOKEN_MATCHERS.iter() {
let found = reg.find(&code);
let Some(mat) = found else {
continue;
};
let end = mat.end();
tokens.push((name.clone(), &input[i..i + end]));
code = code[end..].to_owned();
i += end;
break;
}
}
tokens
}
#[derive(Debug)]
enum Tree {
Unary { op: String, term: Box<Tree> },
Binary { op: String, left: Box<Tree>, right: Box<Tree> },
Ternary { op: String, left: Box<Tree>, middle: Box<Tree>, right: Box<Tree> },
Function { ident: Box<Tree>, terms: Vec<Tree> },
Index { array: Box<Tree>, index: Box<Tree> },
Array { terms: Vec<Tree> },
Value { term: String, kind: String },
}
fn parse(tokens: &mut std::iter::Peekable<std::slice::Iter<&(String, &str)>>, min_bp: usize) -> Tree {
let mut lhs = match tokens.next().expect("missing token") {
token @ (_, "-") | token @ (_, "!") => Tree::Unary { op: token.1.to_owned(), term: Box::new(parse(tokens, 19)) },
(_, "(") => {
if tokens.peek().map(|i| i.1) == Some(")") {
panic!("empty paranthases");
}
let newlhs = parse(tokens, 0);
if tokens.peek().map(|i| i.1) != Some(")") {
panic!("missing closing paranthase");
}
tokens.next();
newlhs
},
(_, "[") => {
let mut cons = Vec::new();
let mut next = tokens.peek();
while next.map(|i| i.1) != Some("]") {
cons.push(parse(tokens, 0));
next = tokens.peek();
if next.map(|i| i.1) == Some(",") {
tokens.next();
next = tokens.peek();
} else {
break;
}
}
if next.map(|i| i.1) != Some("]") {
panic!("missing closing bracket");
}
tokens.next();
Tree::Array { terms: cons }
},
token @ (symb, _) if symb == "symb" => panic!("unexpected token {}", token.1),
token @ (_, "in") => panic!("unexpected token {}", token.1),
token @ (kind, _) => Tree::Value { term: token.1.to_owned(), kind: kind.clone() },
};
loop {
let Some(next) = tokens.peek() else { break };
match next {
(t, s) if t != "symb" && t != "paran" && s != &"in" => panic!("unexpected token {}", s),
(_, ")") | (_, "]") | (_, ",") | (_, ":") => break,
(_, _) => {},
};
let Some((left_bp, right_bp)) = get_bp(next.1) else { panic!("invalid symbol") };
if left_bp < min_bp { break }
match next.1 {
t @ "(" | t @ "[" => {
tokens.next();
if t == "(" {
let mut cons: Vec<Tree> = Vec::new();
while tokens.peek().map(|i| i.1) != Some(")") {
cons.push(parse(tokens, 0));
if tokens.peek().map(|i| i.1) == Some(",") {
tokens.next();
} else {
break;
}
}
if tokens.peek().map(|i| i.1) != Some(")") { panic!("missing a closing paranthase") }
lhs = Tree::Function { ident: Box::new(lhs), terms: cons };
} else {
let index = parse(tokens, 0);
if tokens.peek().map(|i| i.1) != Some("]") { panic!("missing a closing bracket") }
lhs = Tree::Index { array: Box::new(lhs), index: Box::new(index) };
}
},
"?" => {
tokens.next();
let middle = parse(tokens, 0);
if tokens.next().map(|i| i.1) != Some(":") { panic!("missing a colon") }
lhs = Tree::Ternary {
op: "?".into(),
left: Box::new(lhs),
middle: Box::new(middle),
right: Box::new(parse(tokens, right_bp)),
};
},
symbol => {
tokens.next();
lhs = Tree::Binary {
op: symbol.to_owned(),
left: Box::new(lhs),
right: Box::new(parse(tokens, right_bp)),
};
}
}
}
return lhs;
fn get_bp(symb: &str) -> Option<(usize, usize)> {
let binding = match symb {
"." => (24, 25),
"(" => (22, 23),
"[" => (20, 21),
// negation/not: 19
"**" => (18, 17),
"*" => (15, 16),
"/" => (15, 16),
"%" => (15, 16),
"+" => (13, 14),
"-" => (13, 14),
"==" => (11, 12),
"!=" => (11, 12),
">=" => (11, 12),
"<=" => (11, 12),
">" => (11, 12),
"<" => (11, 12),
"in" => (9, 10),
"&&" => (7, 8),
"||" => (5, 6),
"?" => (4, 3),
"," => (2, 1),
_ => return None,
};
Some(binding)
}
}
#[derive(Debug)]
enum Value {
Number(f64),
String(String),
Boolean(bool),
Array { kind: Box<Value>, values: Vec<Value> },
}
fn expr(tree: &Tree) -> Value {
match tree {
Tree::Value { term, kind } => {
match kind.as_str() {
"float" => Value::Number(term.parse().expect("invalid number")),
"integer" => Value::Number(term.parse().expect("invalid number")),
"str" => Value::String(term.clone()),
"ident" => match term.as_str() {
"true" => Value::Boolean(true),
"false" => Value::Boolean(false),
_ => todo!(),
},
_ => unreachable!(),
}
},
Tree::Unary { op, term } => {
let term = expr(term.as_ref());
match op.as_str() {
"-" => match term {
Value::Number(n) => Value::Number(-n),
_ => panic!("cannot negate non number"),
},
"!" => match term {
Value::Boolean(b) => Value::Boolean(!b),
_ => panic!("cannot invert non boolean"),
},
_ => unreachable!(),
}
},
Tree::Binary { op, left, right } => {
let left = expr(left.as_ref());
let right = expr(right.as_ref());
match op.as_str() {
// note to self: i should make a macro for this (or find out how to do this properly lol)
"+" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Number(left + right),
(Value::String(left), Value::String(right)) => Value::String(left + right.as_str()),
_ => panic!("cannot negate add non numbers or non strings"),
},
"-" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Number(left - right),
_ => panic!("cannot subtract non number"),
},
"*" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Number(left * right),
_ => panic!("cannot multiply non number"),
},
"/" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Number(left / right),
_ => panic!("cannot divide non number"),
},
"%" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Number(left % right),
_ => panic!("cannot modulo non number"),
},
"**" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Number(left.powf(right)),
_ => panic!("cannot exponentiate non number"),
},
"==" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Boolean(left == right),
(Value::String(left), Value::String(right)) => Value::Boolean(left == right),
(Value::Boolean(left), Value::Boolean(right)) => Value::Boolean(left == right),
_ => panic!("cannot compare different types"),
},
"!=" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Boolean(left != right),
(Value::String(left), Value::String(right)) => Value::Boolean(left != right),
(Value::Boolean(left), Value::Boolean(right)) => Value::Boolean(left != right),
_ => panic!("cannot compare different types"),
},
">" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Boolean(left > right),
(Value::String(left), Value::String(right)) => Value::Boolean(left > right),
_ => panic!("cannot compare different types"),
},
"<" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Boolean(left < right),
(Value::String(left), Value::String(right)) => Value::Boolean(left < right),
_ => panic!("cannot compare different types"),
},
">=" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Boolean(left <= right),
(Value::String(left), Value::String(right)) => Value::Boolean(left <= right),
_ => panic!("cannot compare different types"),
},
"<=" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Boolean(left <= right),
(Value::String(left), Value::String(right)) => Value::Boolean(left <= right),
_ => panic!("cannot compare different types"),
},
"&&" => match (left, right) {
(Value::Boolean(left), Value::Boolean(right)) => Value::Boolean(left && right),
_ => panic!("cannot compare different types"),
},
"||" => match (left, right) {
(Value::Boolean(left), Value::Boolean(right)) => Value::Boolean(left || right),
_ => panic!("cannot compare different types"),
},
_ => todo!(),
}
},
Tree::Ternary { op, left, middle, right } => {
let left = expr(left.as_ref());
match op.as_str() {
"?" => {
let Value::Boolean(cond) = left else {
panic!("cannot use non boolean as boolean");
};
if cond {
expr(middle.as_ref())
} else {
expr(right.as_ref())
}
},
_ => unreachable!(),
}
},
_ => todo!(),
}
}
mod lexer;
mod parser;
mod runner;
fn test(code: &str) {
let tokens = lex(code);
let tokens = lexer::lex(code);
let tokens: Vec<_> = tokens
.iter()
.filter(|(t, _)| t != "space")
.filter(|(t, _)| *t != lexer::TokenType::Space)
.collect();
let tree = parse(&mut tokens.iter().peekable(), 0);
let result = expr(&tree);
let tree = parser::parse(&mut tokens.iter().peekable(), 0);
let result = runner::expr(&tree);
dbg!(result);
}
fn main() {
test("123");
// test("-123");
// test("2 + 3 * 4");
// test("(2 + 3) * 4");
test("-123");
test("2 + 3 * 4");
test("(2 + 3) * 4");
// test("math.sin");
// test("math.sin(10 * math.pi)");
// test("\"foo\" in [\"fooo\", \"bar\", \"baz\"]");
// test("\"foo\" ? 1 : 4");
// test("true ? 3 4");
// test("\"hello\" + \", \" + \"world\"");
test("\"foo\" in [\"fooo\", \"bar\", \"baz\"]");
test("\"foo\" == \"foo\" ? 1 : 4");
test("false ? 3 : 4");
test("\"hello\" + \", \" + \"world\"");
// test("\"hello\".length");
// test("[1, 2, 3].length");
// test("str.capitalize(\"hello world\")");

134
src/parser.rs Normal file
View file

@ -0,0 +1,134 @@
use crate::lexer;
#[derive(Debug)]
pub enum Tree {
Unary { op: String, term: Box<Tree> },
Binary { op: String, left: Box<Tree>, right: Box<Tree> },
Ternary { op: String, left: Box<Tree>, middle: Box<Tree>, right: Box<Tree> },
Function { ident: Box<Tree>, terms: Vec<Tree> },
Index { array: Box<Tree>, index: Box<Tree> },
Array { terms: Vec<Tree> },
Value { term: String, kind: lexer::TokenType },
}
pub fn parse(tokens: &mut std::iter::Peekable<std::slice::Iter<&(lexer::TokenType, &str)>>, min_bp: usize) -> Tree {
let mut lhs = match tokens.next().expect("missing token") {
token @ (_, "-") | token @ (_, "!") => Tree::Unary { op: token.1.to_owned(), term: Box::new(parse(tokens, 19)) },
(_, "(") => {
if tokens.peek().map(|i| i.1) == Some(")") {
panic!("empty paranthases");
}
let newlhs = parse(tokens, 0);
if tokens.peek().map(|i| i.1) != Some(")") {
panic!("missing closing paranthase");
}
tokens.next();
newlhs
},
(_, "[") => {
let mut cons = Vec::new();
let mut next = tokens.peek();
while next.map(|i| i.1) != Some("]") {
cons.push(parse(tokens, 0));
next = tokens.peek();
if next.map(|i| i.1) == Some(",") {
tokens.next();
next = tokens.peek();
} else {
break;
}
}
if next.map(|i| i.1) != Some("]") {
panic!("missing closing bracket");
}
tokens.next();
Tree::Array { terms: cons }
},
token @ (lexer::TokenType::Symb, _) => panic!("unexpected token {}", token.1),
token @ (_, "in") => panic!("unexpected token {}", token.1),
token @ (kind, _) => Tree::Value { term: token.1.to_owned(), kind: *kind },
};
loop {
let Some(next) = tokens.peek() else { break };
match next {
(_, ")") | (_, "]") | (_, ",") | (_, ":") => break,
(lexer::TokenType::Symb, _) | (lexer::TokenType::Paran, _) | (_, "in") => {},
(_, s) => panic!("unexpected token {}", s),
};
let Some((left_bp, right_bp)) = get_bp(next.1) else { panic!("invalid symbol") };
if left_bp < min_bp { break }
match next.1 {
t @ "(" | t @ "[" => {
tokens.next();
if t == "(" {
let mut cons: Vec<Tree> = Vec::new();
while tokens.peek().map(|i| i.1) != Some(")") {
cons.push(parse(tokens, 0));
if tokens.peek().map(|i| i.1) == Some(",") {
tokens.next();
} else {
break;
}
}
if tokens.peek().map(|i| i.1) != Some(")") { panic!("missing a closing paranthase") }
lhs = Tree::Function { ident: Box::new(lhs), terms: cons };
} else {
let index = parse(tokens, 0);
if tokens.peek().map(|i| i.1) != Some("]") { panic!("missing a closing bracket") }
lhs = Tree::Index { array: Box::new(lhs), index: Box::new(index) };
}
},
"?" => {
tokens.next();
let middle = parse(tokens, 0);
if tokens.next().map(|i| i.1) != Some(":") { panic!("missing a colon") }
lhs = Tree::Ternary {
op: "?".into(),
left: Box::new(lhs),
middle: Box::new(middle),
right: Box::new(parse(tokens, right_bp)),
};
},
symbol => {
tokens.next();
lhs = Tree::Binary {
op: symbol.to_owned(),
left: Box::new(lhs),
right: Box::new(parse(tokens, right_bp)),
};
}
}
}
return lhs;
fn get_bp(symb: &str) -> Option<(usize, usize)> {
let binding = match symb {
"." => (24, 25),
"(" => (22, 23),
"[" => (20, 21),
// negation/not: 19
"**" => (18, 17),
"*" => (15, 16),
"/" => (15, 16),
"%" => (15, 16),
"+" => (13, 14),
"-" => (13, 14),
"==" => (11, 12),
"!=" => (11, 12),
">=" => (11, 12),
"<=" => (11, 12),
">" => (11, 12),
"<" => (11, 12),
"in" => (9, 10),
"&&" => (7, 8),
"||" => (5, 6),
"?" => (4, 3),
"," => (2, 1),
_ => return None,
};
Some(binding)
}
}

167
src/runner.rs Normal file
View file

@ -0,0 +1,167 @@
use crate::parser;
use crate::lexer;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ValueType {
Number,
String,
Boolean,
Array(Box<ValueType>),
}
#[derive(Debug, PartialEq)]
pub enum Value {
Number(f64),
String(String),
Boolean(bool),
Array { kind: ValueType, values: Vec<Value> },
}
impl Value {
pub fn get_type(&self) -> ValueType {
match self {
Self::Number(_) => ValueType::Number,
Self::String(_) => ValueType::String,
Self::Boolean(_) => ValueType::Boolean,
Self::Array { kind, .. } => ValueType::Array(Box::new(kind.clone())),
}
}
}
pub fn expr(tree: &parser::Tree) -> Value {
use parser::Tree;
match tree {
Tree::Value { term, kind } => {
match kind {
lexer::TokenType::Float => Value::Number(term.parse().expect("invalid number")),
lexer::TokenType::Int => Value::Number(term.parse().expect("invalid number")),
lexer::TokenType::String => Value::String(term[1..term.len() - 1].to_string()),
lexer::TokenType::Ident => match term.as_str() {
"true" => Value::Boolean(true),
"false" => Value::Boolean(false),
_ => todo!(),
},
_ => unreachable!(),
}
},
Tree::Unary { op, term } => {
let term = expr(term.as_ref());
match op.as_str() {
"-" => match term {
Value::Number(n) => Value::Number(-n),
_ => panic!("cannot negate non number"),
},
"!" => match term {
Value::Boolean(b) => Value::Boolean(!b),
_ => panic!("cannot invert non boolean"),
},
_ => unreachable!(),
}
},
Tree::Binary { op, left, right } => {
let left = expr(left.as_ref());
let right = expr(right.as_ref());
match op.as_str() {
"+" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Number(left + right),
(Value::String(left), Value::String(right)) => Value::String(left + right.as_str()),
_ => panic!("cannot negate add non numbers or non strings"),
},
"-" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Number(left - right),
_ => panic!("cannot subtract non number"),
},
"*" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Number(left * right),
_ => panic!("cannot multiply non number"),
},
"/" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Number(left / right),
_ => panic!("cannot divide non number"),
},
"%" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Number(left % right),
_ => panic!("cannot modulo non number"),
},
"**" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Number(left.powf(right)),
_ => panic!("cannot exponentiate non number"),
},
"==" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Boolean(left == right),
(Value::String(left), Value::String(right)) => Value::Boolean(left == right),
(Value::Boolean(left), Value::Boolean(right)) => Value::Boolean(left == right),
_ => panic!("cannot compare different types"),
},
"!=" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Boolean(left != right),
(Value::String(left), Value::String(right)) => Value::Boolean(left != right),
(Value::Boolean(left), Value::Boolean(right)) => Value::Boolean(left != right),
_ => panic!("cannot compare different types"),
},
">" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Boolean(left > right),
(Value::String(left), Value::String(right)) => Value::Boolean(left > right),
_ => panic!("cannot compare different types"),
},
"<" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Boolean(left < right),
(Value::String(left), Value::String(right)) => Value::Boolean(left < right),
_ => panic!("cannot compare different types"),
},
">=" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Boolean(left <= right),
(Value::String(left), Value::String(right)) => Value::Boolean(left <= right),
_ => panic!("cannot compare different types"),
},
"<=" => match (left, right) {
(Value::Number(left), Value::Number(right)) => Value::Boolean(left <= right),
(Value::String(left), Value::String(right)) => Value::Boolean(left <= right),
_ => panic!("cannot compare different types"),
},
"&&" => match (left, right) {
(Value::Boolean(left), Value::Boolean(right)) => Value::Boolean(left && right),
_ => panic!("cannot compare different types"),
},
"||" => match (left, right) {
(Value::Boolean(left), Value::Boolean(right)) => Value::Boolean(left || right),
_ => panic!("cannot compare different types"),
},
"in" => match (left, right) {
(val @ Value::Boolean(_), Value::Array { kind: ValueType::Boolean, values }) => Value::Boolean(values.contains(&val)),
(val @ Value::Number(_), Value::Array { kind: ValueType::Number, values }) => Value::Boolean(values.contains(&val)),
(val @ Value::String(_), Value::Array { kind: ValueType::String, values }) => Value::Boolean(values.contains(&val)),
_ => panic!("cannot compare different arrays"),
},
_ => todo!(),
}
},
Tree::Ternary { op, left, middle, right } => {
let left = expr(left.as_ref());
match op.as_str() {
"?" => {
let Value::Boolean(cond) = left else {
panic!("cannot use non boolean as boolean");
};
if cond {
expr(middle.as_ref())
} else {
expr(right.as_ref())
}
},
_ => unreachable!(),
}
},
Tree::Array { terms } => {
if terms.len() == 0 {
Value::Array { kind: ValueType::Number, values: vec![] }
} else {
let values: Vec<_> = terms.iter().map(|t| expr(t)).collect();
let kind = values[0].get_type();
Value::Array { kind, values }
}
},
// _ => todo!(),
idk => panic!("idk: {:?}", idk),
}
}