diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..5d2c316 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,49 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "0.7.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +dependencies = [ + "memchr", +] + +[[package]] +name = "expr" +version = "0.1.0" +dependencies = [ + "once_cell", + "regex", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "once_cell" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66" + +[[package]] +name = "regex" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..6b9093d --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "expr" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +once_cell = "1.17.0" +regex = "1.7.1" diff --git a/expr.ts b/expr.ts index 44f5438..44aacaa 100644 --- a/expr.ts +++ b/expr.ts @@ -1,3 +1,5 @@ +// an expression language + function run(code: string, ctx: { [name: string]: any } = {}): any { const tokens = lex(code); const tree = parse(tokens); @@ -186,6 +188,7 @@ function test(code: string) { const ctx = { math: { rand: () => Math.random(), + abs: (n) => Math.abs(n), sin: (n) => Math.sin(n), cos: (n) => Math.cos(n), tan: (n) => Math.tan(n), @@ -233,4 +236,4 @@ test('"hello" + ", " + "world"'); test('"hello".length'); test('[1, 2, 3].length'); test('str.capitalize("hello world")'); -test('"apple" in str.split("apple banana orange")'); +test('"apple" in str.split("apple banana orange") ? "apple exists" : "apple doesn\'t exist"'); diff --git a/spec.md b/spec.md new file mode 100644 index 0000000..1a73319 --- /dev/null +++ b/spec.md @@ -0,0 +1,3 @@ +# expr spec + +TODO diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..7c7ac68 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,260 @@ +#![allow(dead_code)] + +use once_cell::sync::Lazy; +use regex::{Regex, RegexBuilder}; + +fn lex(input: &str) -> Vec<(String, &str)> { + let mut code = input.to_owned(); + let mut tokens = Vec::new(); + static TOKEN_MATCHERS: Lazy<[(String, Regex); 8]> = once_cell::sync::Lazy::new(|| { + [ + ("space".into(), RegexBuilder::new(r"^[ \n\t]+").case_insensitive(true).build().unwrap()), + ("int".into(), RegexBuilder::new(r"^0(x[0-9a-f]+|b[01]+|o[0-7]+)").case_insensitive(true).build().unwrap()), + ("float".into(), RegexBuilder::new(r"^[0-9]+(\.[0-9]+)?").case_insensitive(true).build().unwrap()), + ("str".into(), RegexBuilder::new(r#"^"(.*?[^\\])?""#).case_insensitive(true).build().unwrap()), + ("str".into(), RegexBuilder::new(r"^'(.*?[^\\])?'").case_insensitive(true).build().unwrap()), + ("ident".into(), RegexBuilder::new(r"^[a-z_][a-z0-9_]*").case_insensitive(true).build().unwrap()), + ("paran".into(), RegexBuilder::new(r"^[\[\]\(\)]").case_insensitive(true).build().unwrap()), + ("symb".into(), RegexBuilder::new(r"^[^ \n\ta-z0-9_\[\]\(\)]{1,2}").case_insensitive(true).build().unwrap()), + ] + }); + let mut i = 0; + while !code.is_empty() { + for (name, reg) in TOKEN_MATCHERS.iter() { + let found = reg.find(&code); + let Some(mat) = found else { + continue; + }; + let end = mat.end(); + tokens.push((name.clone(), &input[i..i + end])); + code = code[end..].to_owned(); + i += end; + break; + } + } + tokens +} + +#[derive(Debug)] +enum Tree { + Unary { op: String, term: Box }, + Binary { op: String, left: Box, right: Box }, + Ternary { op: String, left: Box, middle: Box, right: Box }, + Function { ident: Box, terms: Vec }, + Index { array: Box, index: Box }, + Array { terms: Vec }, + Value { term: String }, +} + +fn parse(tokens: &mut std::iter::Peekable>, min_bp: usize) -> Tree { + let mut lhs = match tokens.next().expect("missing token") { + token @ (_, "-") | token @ (_, "!") => Tree::Unary { op: token.1.to_owned(), term: Box::new(parse(tokens, 19)) }, + (_, "(") => { + if tokens.peek().map(|i| i.1) == Some(")") { + panic!("empty paranthases"); + } + let newlhs = parse(tokens, 0); + if tokens.peek().map(|i| i.1) != Some(")") { + panic!("missing closing paranthase"); + } + tokens.next(); + newlhs + }, + (_, "[") => { + let mut cons = Vec::new(); + let mut next = tokens.peek(); + while next.map(|i| i.1) != Some("]") { + cons.push(parse(tokens, 0)); + next = tokens.peek(); + if next.map(|i| i.1) == Some(",") { + tokens.next(); + next = tokens.peek(); + } else { + break; + } + } + if next.map(|i| i.1) != Some("]") { + panic!("missing closing bracket"); + } + tokens.next(); + Tree::Array { terms: cons } + }, + token @ (symb, _) if symb == "symb" => panic!("unexpected token {}", token.1), + token @ (_, "in") => panic!("unexpected token {}", token.1), + token @ (_, _) => Tree::Value { term: token.1.to_owned() }, + }; + + loop { + let Some(next) = tokens.peek() else { break }; + match next { + (t, s) if t != "symb" && t != "paran" && s != &"in" => panic!("unexpected token {}", s), + (_, ")") | (_, "]") | (_, ",") | (_, ":") => break, + (_, _) => {}, + }; + let Some((left_bp, right_bp)) = get_bp(next.1) else { panic!("invalid symbol") }; + if left_bp < min_bp { break } + + match next.1 { + t @ "(" | t @ "[" => { + tokens.next(); + if t == "(" { + let mut cons: Vec = Vec::new(); + while tokens.peek().map(|i| i.1) != Some(")") { + cons.push(parse(tokens, 0)); + if tokens.peek().map(|i| i.1) == Some(",") { + tokens.next(); + } else { + break; + } + } + if tokens.peek().map(|i| i.1) != Some(")") { panic!("missing a closing paranthase") } + lhs = Tree::Function { ident: Box::new(lhs), terms: cons }; + } else { + let index = parse(tokens, 0); + if tokens.peek().map(|i| i.1) != Some("]") { panic!("missing a closing bracket") } + lhs = Tree::Index { array: Box::new(lhs), index: Box::new(index) }; + } + }, + "?" => { + tokens.next(); + let middle = parse(tokens, 0); + if tokens.next().map(|i| i.1) != Some(":") { panic!("missing a colon") } + lhs = Tree::Ternary { + op: "?".into(), + left: Box::new(lhs), + middle: Box::new(middle), + right: Box::new(parse(tokens, right_bp)), + }; + }, + symbol => { + tokens.next(); + lhs = Tree::Binary { + op: symbol.to_owned(), + left: Box::new(lhs), + right: Box::new(parse(tokens, right_bp)), + }; + } + } + } + + return lhs; + + fn get_bp(symb: &str) -> Option<(usize, usize)> { + let binding = match symb { + "." => (24, 25), + "(" => (22, 23), + "[" => (20, 21), + // negation/not: 19 + "**" => (18, 17), + "*" => (15, 16), + "/" => (15, 16), + "%" => (15, 16), + "+" => (13, 14), + "-" => (13, 14), + "==" => (11, 12), + "!=" => (11, 12), + ">=" => (11, 12), + "<=" => (11, 12), + ">" => (11, 12), + "<" => (11, 12), + "in" => (9, 10), + "&&" => (7, 8), + "||" => (5, 6), + "?" => (4, 3), + "," => (2, 1), + _ => return None, + }; + Some(binding) + } +} + +fn expr(tree: Tree) { + // match Tree { + // Tree::Unary { op, term } + // } +} + +// function expr(tree, ctx: { [name: string]: any } = {}): any { +// switch (tree.type) { +// case "int": return parseInt(tree.val); +// case "float": return parseFloat(tree.val); +// case "str": return tree.val.slice(1, -1); +// case "ident": +// switch (tree.val) { +// case "true": return true; +// case "false": return false; +// case "null": return null; +// default: return Object.hasOwn(ctx, tree.val) && ctx[tree.val]; +// } +// } + +// if (tree.op) { +// if (tree.op.val === ".") { +// if (tree.cons[1].type !== "ident") throw "expected ident"; +// const obj = expr(tree.cons[0], ctx); +// const idx = tree.cons[1].val; +// if (!Object.hasOwn(obj, idx)) throw `${idx} doesn't exist on object`; +// return obj[idx]; +// } +// const op = ({ +// "**": (a, b) => a ** b, +// "*": (a, b) => a * b, +// "/": (a, b) => a / b, +// "%": (a, b) => a % b, +// "+": (a, b) => a + b, +// "-": (a, b) => b === undefined ? -a : a - b, +// "==": (a, b) => a == b, +// "!=": (a, b) => a != b, +// ">=": (a, b) => a >= b, +// "<=": (a, b) => a <= b, +// ">": (a, b) => a > b, +// "<": (a, b) => a < b, +// "in": (a, b) => { +// if (!Array.isArray(b)) throw "can only use `in` on arrays"; +// return b.includes(a); +// }, +// "&&": (a, b) => a && b, +// "||": (a, b) => a || b, +// "?": (cond, a, b) => cond ? a : b, +// ",": (a, b) => (a, b), +// })[tree.op.val]; +// if (!op) throw `invalid operator ${op}`; +// return op(...tree.cons.map(i => expr(i, ctx))); +// } + +// if (tree.array) return tree.array.map(i => expr(i, ctx)); +// if (tree.func) return expr(tree.func, ctx)(...tree.cons.map(i => expr(i, ctx))); + +// if (tree.index) { +// const obj = expr(tree.index, ctx); +// const idx = expr(tree.with, ctx); +// if (!Object.hasOwn(obj, idx)) throw `${idx} doesn't exist on object`; +// return obj[idx]; +// } +// } + + +fn test(code: &str) { + let tokens = lex(code); + let tokens: Vec<_> = tokens + .iter() + .filter(|(t, _)| t != "space") + .collect(); + let tree = parse(&mut tokens.iter().peekable(), 0); + dbg!(tree); +} + +fn main() { + // test("-123"); + // test("2 + 3 * 4"); + // test("(2 + 3) * 4"); + // test("math.sin"); + // test("math.sin(10 * math.pi)"); + // test("\"foo\" in [\"fooo\", \"bar\", \"baz\"]"); + // test("\"foo\" ? 1 : 4"); + // test("\"hello\" + \", \" + \"world\""); + // test("\"hello\".length"); + // test("[1, 2, 3].length"); + // test("str.capitalize(\"hello world\")"); + // test("\"apple\" in str.split(\"apple banana orange\") ? \"apple exists\" : \"apple doesn\'t exist\""); +}