*very* crude port of expr.ts to rust

This commit is contained in:
tezlm 2023-02-14 09:16:38 -08:00
parent f3ead13685
commit ced1c64a96
Signed by: tezlm
GPG key ID: 649733FCD94AFBBA
6 changed files with 327 additions and 1 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
/target

49
Cargo.lock generated Normal file
View file

@ -0,0 +1,49 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "0.7.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"
dependencies = [
"memchr",
]
[[package]]
name = "expr"
version = "0.1.0"
dependencies = [
"once_cell",
"regex",
]
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "once_cell"
version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66"
[[package]]
name = "regex"
version = "1.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.6.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"

10
Cargo.toml Normal file
View file

@ -0,0 +1,10 @@
[package]
name = "expr"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
once_cell = "1.17.0"
regex = "1.7.1"

View file

@ -1,3 +1,5 @@
// an expression language
function run(code: string, ctx: { [name: string]: any } = {}): any {
const tokens = lex(code);
const tree = parse(tokens);
@ -186,6 +188,7 @@ function test(code: string) {
const ctx = {
math: {
rand: () => Math.random(),
abs: (n) => Math.abs(n),
sin: (n) => Math.sin(n),
cos: (n) => Math.cos(n),
tan: (n) => Math.tan(n),
@ -233,4 +236,4 @@ test('"hello" + ", " + "world"');
test('"hello".length');
test('[1, 2, 3].length');
test('str.capitalize("hello world")');
test('"apple" in str.split("apple banana orange")');
test('"apple" in str.split("apple banana orange") ? "apple exists" : "apple doesn\'t exist"');

3
spec.md Normal file
View file

@ -0,0 +1,3 @@
# expr spec
TODO

260
src/main.rs Normal file
View file

@ -0,0 +1,260 @@
#![allow(dead_code)]
use once_cell::sync::Lazy;
use regex::{Regex, RegexBuilder};
fn lex(input: &str) -> Vec<(String, &str)> {
let mut code = input.to_owned();
let mut tokens = Vec::new();
static TOKEN_MATCHERS: Lazy<[(String, Regex); 8]> = once_cell::sync::Lazy::new(|| {
[
("space".into(), RegexBuilder::new(r"^[ \n\t]+").case_insensitive(true).build().unwrap()),
("int".into(), RegexBuilder::new(r"^0(x[0-9a-f]+|b[01]+|o[0-7]+)").case_insensitive(true).build().unwrap()),
("float".into(), RegexBuilder::new(r"^[0-9]+(\.[0-9]+)?").case_insensitive(true).build().unwrap()),
("str".into(), RegexBuilder::new(r#"^"(.*?[^\\])?""#).case_insensitive(true).build().unwrap()),
("str".into(), RegexBuilder::new(r"^'(.*?[^\\])?'").case_insensitive(true).build().unwrap()),
("ident".into(), RegexBuilder::new(r"^[a-z_][a-z0-9_]*").case_insensitive(true).build().unwrap()),
("paran".into(), RegexBuilder::new(r"^[\[\]\(\)]").case_insensitive(true).build().unwrap()),
("symb".into(), RegexBuilder::new(r"^[^ \n\ta-z0-9_\[\]\(\)]{1,2}").case_insensitive(true).build().unwrap()),
]
});
let mut i = 0;
while !code.is_empty() {
for (name, reg) in TOKEN_MATCHERS.iter() {
let found = reg.find(&code);
let Some(mat) = found else {
continue;
};
let end = mat.end();
tokens.push((name.clone(), &input[i..i + end]));
code = code[end..].to_owned();
i += end;
break;
}
}
tokens
}
#[derive(Debug)]
enum Tree {
Unary { op: String, term: Box<Tree> },
Binary { op: String, left: Box<Tree>, right: Box<Tree> },
Ternary { op: String, left: Box<Tree>, middle: Box<Tree>, right: Box<Tree> },
Function { ident: Box<Tree>, terms: Vec<Tree> },
Index { array: Box<Tree>, index: Box<Tree> },
Array { terms: Vec<Tree> },
Value { term: String },
}
fn parse(tokens: &mut std::iter::Peekable<std::slice::Iter<&(String, &str)>>, min_bp: usize) -> Tree {
let mut lhs = match tokens.next().expect("missing token") {
token @ (_, "-") | token @ (_, "!") => Tree::Unary { op: token.1.to_owned(), term: Box::new(parse(tokens, 19)) },
(_, "(") => {
if tokens.peek().map(|i| i.1) == Some(")") {
panic!("empty paranthases");
}
let newlhs = parse(tokens, 0);
if tokens.peek().map(|i| i.1) != Some(")") {
panic!("missing closing paranthase");
}
tokens.next();
newlhs
},
(_, "[") => {
let mut cons = Vec::new();
let mut next = tokens.peek();
while next.map(|i| i.1) != Some("]") {
cons.push(parse(tokens, 0));
next = tokens.peek();
if next.map(|i| i.1) == Some(",") {
tokens.next();
next = tokens.peek();
} else {
break;
}
}
if next.map(|i| i.1) != Some("]") {
panic!("missing closing bracket");
}
tokens.next();
Tree::Array { terms: cons }
},
token @ (symb, _) if symb == "symb" => panic!("unexpected token {}", token.1),
token @ (_, "in") => panic!("unexpected token {}", token.1),
token @ (_, _) => Tree::Value { term: token.1.to_owned() },
};
loop {
let Some(next) = tokens.peek() else { break };
match next {
(t, s) if t != "symb" && t != "paran" && s != &"in" => panic!("unexpected token {}", s),
(_, ")") | (_, "]") | (_, ",") | (_, ":") => break,
(_, _) => {},
};
let Some((left_bp, right_bp)) = get_bp(next.1) else { panic!("invalid symbol") };
if left_bp < min_bp { break }
match next.1 {
t @ "(" | t @ "[" => {
tokens.next();
if t == "(" {
let mut cons: Vec<Tree> = Vec::new();
while tokens.peek().map(|i| i.1) != Some(")") {
cons.push(parse(tokens, 0));
if tokens.peek().map(|i| i.1) == Some(",") {
tokens.next();
} else {
break;
}
}
if tokens.peek().map(|i| i.1) != Some(")") { panic!("missing a closing paranthase") }
lhs = Tree::Function { ident: Box::new(lhs), terms: cons };
} else {
let index = parse(tokens, 0);
if tokens.peek().map(|i| i.1) != Some("]") { panic!("missing a closing bracket") }
lhs = Tree::Index { array: Box::new(lhs), index: Box::new(index) };
}
},
"?" => {
tokens.next();
let middle = parse(tokens, 0);
if tokens.next().map(|i| i.1) != Some(":") { panic!("missing a colon") }
lhs = Tree::Ternary {
op: "?".into(),
left: Box::new(lhs),
middle: Box::new(middle),
right: Box::new(parse(tokens, right_bp)),
};
},
symbol => {
tokens.next();
lhs = Tree::Binary {
op: symbol.to_owned(),
left: Box::new(lhs),
right: Box::new(parse(tokens, right_bp)),
};
}
}
}
return lhs;
fn get_bp(symb: &str) -> Option<(usize, usize)> {
let binding = match symb {
"." => (24, 25),
"(" => (22, 23),
"[" => (20, 21),
// negation/not: 19
"**" => (18, 17),
"*" => (15, 16),
"/" => (15, 16),
"%" => (15, 16),
"+" => (13, 14),
"-" => (13, 14),
"==" => (11, 12),
"!=" => (11, 12),
">=" => (11, 12),
"<=" => (11, 12),
">" => (11, 12),
"<" => (11, 12),
"in" => (9, 10),
"&&" => (7, 8),
"||" => (5, 6),
"?" => (4, 3),
"," => (2, 1),
_ => return None,
};
Some(binding)
}
}
fn expr(tree: Tree) {
// match Tree {
// Tree::Unary { op, term }
// }
}
// function expr(tree, ctx: { [name: string]: any } = {}): any {
// switch (tree.type) {
// case "int": return parseInt(tree.val);
// case "float": return parseFloat(tree.val);
// case "str": return tree.val.slice(1, -1);
// case "ident":
// switch (tree.val) {
// case "true": return true;
// case "false": return false;
// case "null": return null;
// default: return Object.hasOwn(ctx, tree.val) && ctx[tree.val];
// }
// }
// if (tree.op) {
// if (tree.op.val === ".") {
// if (tree.cons[1].type !== "ident") throw "expected ident";
// const obj = expr(tree.cons[0], ctx);
// const idx = tree.cons[1].val;
// if (!Object.hasOwn(obj, idx)) throw `${idx} doesn't exist on object`;
// return obj[idx];
// }
// const op = ({
// "**": (a, b) => a ** b,
// "*": (a, b) => a * b,
// "/": (a, b) => a / b,
// "%": (a, b) => a % b,
// "+": (a, b) => a + b,
// "-": (a, b) => b === undefined ? -a : a - b,
// "==": (a, b) => a == b,
// "!=": (a, b) => a != b,
// ">=": (a, b) => a >= b,
// "<=": (a, b) => a <= b,
// ">": (a, b) => a > b,
// "<": (a, b) => a < b,
// "in": (a, b) => {
// if (!Array.isArray(b)) throw "can only use `in` on arrays";
// return b.includes(a);
// },
// "&&": (a, b) => a && b,
// "||": (a, b) => a || b,
// "?": (cond, a, b) => cond ? a : b,
// ",": (a, b) => (a, b),
// })[tree.op.val];
// if (!op) throw `invalid operator ${op}`;
// return op(...tree.cons.map(i => expr(i, ctx)));
// }
// if (tree.array) return tree.array.map(i => expr(i, ctx));
// if (tree.func) return expr(tree.func, ctx)(...tree.cons.map(i => expr(i, ctx)));
// if (tree.index) {
// const obj = expr(tree.index, ctx);
// const idx = expr(tree.with, ctx);
// if (!Object.hasOwn(obj, idx)) throw `${idx} doesn't exist on object`;
// return obj[idx];
// }
// }
fn test(code: &str) {
let tokens = lex(code);
let tokens: Vec<_> = tokens
.iter()
.filter(|(t, _)| t != "space")
.collect();
let tree = parse(&mut tokens.iter().peekable(), 0);
dbg!(tree);
}
fn main() {
// test("-123");
// test("2 + 3 * 4");
// test("(2 + 3) * 4");
// test("math.sin");
// test("math.sin(10 * math.pi)");
// test("\"foo\" in [\"fooo\", \"bar\", \"baz\"]");
// test("\"foo\" ? 1 : 4");
// test("\"hello\" + \", \" + \"world\"");
// test("\"hello\".length");
// test("[1, 2, 3].length");
// test("str.capitalize(\"hello world\")");
// test("\"apple\" in str.split(\"apple banana orange\") ? \"apple exists\" : \"apple doesn\'t exist\"");
}