a few more changes

This commit is contained in:
tezlm 2023-10-01 23:23:13 -07:00
parent 9c73e38ac2
commit c1820ef6ff
Signed by: tezlm
GPG key ID: 649733FCD94AFBBA
10 changed files with 542 additions and 351 deletions

View file

@ -2,7 +2,3 @@
name = "lang" name = "lang"
version = "0.1.0" version = "0.1.0"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

View file

@ -1,7 +1,7 @@
(module (module
;; import from wasi ;; import from wasi
;; fn fd_write(fd, *iovs, iovs_len, nwritten) -> bytes_written ;; fn fd_write(fd, *iovs, iovs_len, nwritten) -> bytes_written
(import "wasi_unstable" "fd_write" (func $fd_write (param i32 i32 i32 i32) (result i32))) (import "wasi_snapshot_preview1" "fd_write" (func $fd_write (param i32 i32 i32 i32) (result i32)))
;; create memory (size = 1 page = 64KiB) ;; create memory (size = 1 page = 64KiB)
(memory $foobar 1) (memory $foobar 1)

34
print2.wat Normal file
View file

@ -0,0 +1,34 @@
(module
;; import from wasi
;; fn fd_write(fd, *iovs, iovs_len, nwritten) -> bytes_written
(import "wasi_snapshot_preview1" "fd_write" (func $fd_write (param i32 i32 i32 i32) (result i32)))
;; create memory (size = 1 page = 64KiB)
(memory $foobar 1)
;; export memory - it's required, but we don't use it so the size is set to 0
(export "memory" (memory 0))
;; write string to memory (offset = 8 bytes)
(data (i32.const 8) "Hello, world!\n")
(func $main (export "_start")
;; iov.iov_base - pointer to string (offset = 0 bytes)
;; the string's offset is 8 bytes in memory
(i32.store (i32.const 0) (i32.const 8))
;; iov.iov_len - length of the hello world string (offset = 4 bytes)
;; the string's length is 14 bytes
(i32.store (i32.const 4) (i32.const 14))
(i32.const 2)
(call $fd_write
;; (i32.const 1) ;; fd: stdout = 1
(i32.const 0) ;; data: pointer to memory - this is the first memory we create (index 0)
(i32.const 1) ;; data_len: there's 1 string
(i32.const 2468) ;; nwritten: i don't care about this, write it wherever
)
drop ;; drop number of bytes written
)
)

148
src/data.rs Normal file
View file

@ -0,0 +1,148 @@
use std::collections::HashMap;
use crate::lexer::{Token, Symbol};
#[rustfmt::skip]
#[derive(Debug, Clone)]
pub enum BinaryOp {
Pow,
Mul, Div, Mod,
Add, Sub,
Shl, Shr,
Less, LessEq, Greater, GreaterEq,
Eq, Neq,
BitAnd,
Xor,
BitOr,
LogicAnd,
LogicOr,
// TODO
// Set,
Comma,
}
#[derive(Debug, Clone)]
pub enum PrefixOp {
Minus,
LogicNot,
BitNot,
}
#[derive(Debug, Clone)]
pub enum SuffixOp {
Unravel,
}
#[derive(Debug, Clone)]
pub enum Statement {
Let(String, Expr),
// Type(String, Type),
Expr(Expr),
// Func(String, ...),
// Break,
// Continue,
// Type,
}
#[derive(Debug, Clone)]
pub struct Block(pub Vec<Statement>);
#[derive(Debug, Clone)]
pub enum Expr {
Literal(Literal),
Variable(String),
Binary(BinaryOp, Box<Expr>, Box<Expr>),
Unary(PrefixOp, Box<Expr>),
Match(Box<Expr>, Vec<(Pattern, Expr)>),
// Call(String, Vec<Expr>),
Block(Block),
}
#[derive(Debug, Clone, PartialEq)]
pub enum Pattern {
Literal(Literal),
}
#[derive(Debug, Clone, PartialEq)]
pub enum Literal {
Integer(i64),
Float(f64),
Boolean(bool),
String(String),
Char(char),
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum Type {
Integer,
Float,
Boolean,
String,
Char,
// Struct(HashMap<String, Type>),
// Enum(HashMap<String, Option<Type>>),
// Newtype(HashMap<String, Type>),
// Function(Vec<Type>, Box<Type>),
Tuple(Vec<Type>),
}
impl BinaryOp {
pub fn precedence(&self) -> (u8, u8) {
match self {
Self::Pow => (23, 22),
Self::Mul | Self::Div | Self::Mod => (20, 21),
Self::Add | Self::Sub => (18, 19),
Self::Shl | Self::Shr => (16, 17),
Self::Less | Self::LessEq | Self::Greater | Self::GreaterEq => (14, 15),
Self::Eq | Self::Neq => (12, 13),
Self::BitAnd => (10, 11),
Self::Xor => (8, 9),
Self::BitOr => (6, 7),
Self::LogicAnd => (4, 5),
Self::LogicOr => (2, 3),
Self::Comma => (0, 1),
}
}
pub fn from_token(token: &Token) -> Option<Self> {
let op = match token {
Token::Symbol(Symbol::DoubleStar) => Self::Pow,
Token::Symbol(Symbol::Star) => Self::Mul,
Token::Symbol(Symbol::Slash) => Self::Div,
Token::Symbol(Symbol::Percent) => Self::Mod,
Token::Symbol(Symbol::Plus) => Self::Add,
Token::Symbol(Symbol::Minus) => Self::Sub,
Token::Symbol(Symbol::Shl) => Self::Shl,
Token::Symbol(Symbol::Shr) => Self::Shr,
Token::Symbol(Symbol::Less) => Self::Less,
Token::Symbol(Symbol::LessEq) => Self::LessEq,
Token::Symbol(Symbol::Greater) => Self::Greater,
Token::Symbol(Symbol::GreaterEq) => Self::GreaterEq,
Token::Symbol(Symbol::Eq) => Self::Eq,
Token::Symbol(Symbol::Neq) => Self::Neq,
Token::Symbol(Symbol::And) => Self::BitAnd,
Token::Symbol(Symbol::Carat) => Self::Xor,
Token::Symbol(Symbol::Pipe) => Self::BitOr,
Token::Symbol(Symbol::DoubleAnd) => Self::LogicAnd,
Token::Symbol(Symbol::DoublePipe) => Self::LogicOr,
_ => return None,
};
Some(op)
}
}
impl PrefixOp {
// pub fn precedence(&self) -> (u8, u8) {
// todo!(),
// }
pub fn from_token(token: &Token) -> Option<Self> {
let op = match token {
Token::Symbol(Symbol::Minus) => Self::Minus,
Token::Symbol(Symbol::Not) => Self::LogicNot,
Token::Symbol(Symbol::DoublePipe) => Self::BitNot,
_ => return None,
};
Some(op)
}
}

View file

@ -5,7 +5,30 @@ optimizations
- write negative numberss directly instead of as positive + sign flip - write negative numberss directly instead of as positive + sign flip
*/ */
use crate::parser::{Expr, Literal, BinaryOp, PrefixOp, Statement, Context}; use crate::data::{Expr, Literal, BinaryOp, PrefixOp, Statement, Pattern, Type};
use crate::parser::Context;
pub struct Generator {
output: Box<dyn std::io::Write>,
}
impl Generator {
pub fn new(output: Box<dyn std::io::Write>) -> Generator {
Generator {
output
}
}
fn write_module(&mut self) {
write!(self.output, "(module");
write!(self.output, ")");
}
fn write_func(&mut self) {
write!(self.output, "(func ");
write!(self.output, ")");
}
}
pub fn generate(expr: &Expr) { pub fn generate(expr: &Expr) {
println!(); println!();
@ -19,15 +42,15 @@ pub fn generate(expr: &Expr) {
// println!(r#"(module (func (export "_start") (result f64) (local $match f64)"#); // println!(r#"(module (func (export "_start") (result f64) (local $match f64)"#);
for (name, _) in &exprs { for (name, _) in &exprs {
let ty = match ctx.locals.get(name).unwrap() { let ty = match ctx.locals.get(name).unwrap() {
crate::parser::Type::Integer => "i32", Type::Integer => "i32",
crate::parser::Type::Float => "f64", Type::Float => "f64",
_ => todo!(), _ => todo!(),
}; };
println!("(local ${name} {ty})"); println!("(local ${name} {ty})");
} }
for (name, expr) in &exprs { for (name, expr) in &exprs {
gen_expr(expr, &ctx); gen_expr(expr, &ctx);
println!("(local.set ${name})"); println!("local.set ${name}");
} }
gen_expr(&expr, &ctx); gen_expr(&expr, &ctx);
@ -47,13 +70,13 @@ fn gen_expr(expr: &Expr, ctx: &Context) {
println!("local.get ${name}"); println!("local.get ${name}");
} }
Expr::Binary(op, a, b) => { Expr::Binary(op, a, b) => {
gen_expr(a, ctx); gen_expr(&a, ctx);
gen_expr(b, ctx); gen_expr(&b, ctx);
let ty = match expr.infer(&ctx).unwrap() { let ty = match expr.infer(&ctx).unwrap() {
crate::parser::Type::Integer => "i32", Type::Integer => "i32",
crate::parser::Type::Float => "f64", Type::Float => "f64",
crate::parser::Type::Boolean => "i32", Type::Boolean => "i32",
_ => todo!(), _ => todo!(),
}; };
match op { match op {
@ -70,6 +93,15 @@ fn gen_expr(expr: &Expr, ctx: &Context) {
} }
} }
Expr::Unary(op, e) => { Expr::Unary(op, e) => {
if let Expr::Literal(lit) = e.as_ref() {
match lit {
Literal::Integer(int) => println!("i32.const {}", -int),
Literal::Float(f) => println!("f64.const {}", -f),
_ => unreachable!(),
}
return;
}
gen_expr(e, ctx); gen_expr(e, ctx);
match op { match op {
PrefixOp::Minus => { PrefixOp::Minus => {
@ -87,18 +119,17 @@ fn gen_expr(expr: &Expr, ctx: &Context) {
} }
} }
} }
// FIXME: awful code until i fix patching up parser and lexer
Expr::Match(cond, arms) => { Expr::Match(cond, arms) => {
println!(";; --- set match variable"); println!(";; --- set match variable");
println!("(local.set $match (");
gen_expr(cond, ctx); gen_expr(cond, ctx);
println!("))"); println!("local.set $match");
println!(";; --- generate match"); println!(";; --- generate match");
for (idx, (pat, expr)) in arms.iter().enumerate() { for (idx, (pat, expr)) in arms.iter().enumerate() {
// FIXME: hardcoded until patern matching works better
match pat { match pat {
crate::parser::Pattern::Literal(lit) => match lit { Pattern::Literal(lit) => match lit {
Literal::Integer(int) => println!("i32.const {}", int), Literal::Integer(int) => println!("i32.const {}", int),
Literal::Boolean(b) => println!("i32.const {}", if *b { 1 } else { 0 }), Literal::Boolean(b) => println!("i32.const {}", if *b { 1 } else { 0 }),
_ => todo!(), _ => todo!(),

View file

@ -1,12 +1,13 @@
use crate::Error; use crate::Error;
#[derive(Debug)]
pub struct Lexer { pub struct Lexer {
input: Vec<char>, input: Vec<char>,
pos: usize, pos: usize,
} }
#[rustfmt::skip] #[rustfmt::skip]
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum Token { pub enum Token {
Number { radix: u32, text: String }, Number { radix: u32, text: String },
Ident(String), Ident(String),
@ -17,8 +18,19 @@ pub enum Token {
OpenBrace, CloseBrace, OpenBrace, CloseBrace,
OpenBracket, CloseBracket, OpenBracket, CloseBracket,
Symbol(Symbol),
Let, Const, Type, Fn,
True, False,
If, Else, Match,
While, Loop, For, Break, Continue,
}
#[rustfmt::skip]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Symbol {
Plus, Minus, Star, DoubleStar, Slash, Percent, Plus, Minus, Star, DoubleStar, Slash, Percent,
Pipe, DoublePipe, And, DoubleAnd, Carat, Shl, Shr, Pipe, DoublePipe, And, DoubleAnd, Carat, Shl, Shr, Tilde,
PlusSet, MinusSet, StarSet, DoubleStarSet, SlashSet, PercentSet, PlusSet, MinusSet, StarSet, DoubleStarSet, SlashSet, PercentSet,
PipeSet, DoublePipeSet, AndSet, DoubleAndSet, CaratSet, ShlSet, ShrSet, PipeSet, DoublePipeSet, AndSet, DoubleAndSet, CaratSet, ShlSet, ShrSet,
@ -26,11 +38,6 @@ pub enum Token {
Set, Eq, Neq, Less, LessEq, Greater, GreaterEq, Not, Set, Eq, Neq, Less, LessEq, Greater, GreaterEq, Not,
Dot, DoubleDot, TripleDot, Comma, Question, Colon, DoubleColon, Semicolon, Dot, DoubleDot, TripleDot, Comma, Question, Colon, DoubleColon, Semicolon,
ThinArrow, FatArrow, ThinArrow, FatArrow,
Let, Const, Type, Fn,
True, False,
If, Else, Match,
While, Loop, For, Break, Continue,
} }
impl Lexer { impl Lexer {
@ -106,7 +113,7 @@ impl Lexer {
_ if !ch.is_ascii_alphanumeric() => { _ if !ch.is_ascii_alphanumeric() => {
self.pos -= 2; self.pos -= 2;
10 10
}, }
_ => return Err(Error::SyntaxError(format!("unknown number radix {ch}"))), _ => return Err(Error::SyntaxError(format!("unknown number radix {ch}"))),
} }
} }
@ -197,129 +204,139 @@ impl Lexer {
} }
}; };
} }
let token = match ch { let token = match ch {
'(' => Token::OpenParan, '(' => Some(Token::OpenParan),
')' => Token::CloseParan, ')' => Some(Token::CloseParan),
'[' => Token::OpenBracket, '[' => Some(Token::OpenBracket),
']' => Token::CloseBracket, ']' => Some(Token::CloseBracket),
'{' => Token::OpenBrace, '{' => Some(Token::OpenBrace),
'}' => Token::CloseBrace, '}' => Some(Token::CloseBrace),
'+' => settable!(Token::Plus, Token::PlusSet), _ => None,
};
if let Some(token) = token {
self.pos += 1;
return Ok(token);
}
let symbol = match ch {
'~' => Symbol::Tilde,
'+' => settable!(Symbol::Plus, Symbol::PlusSet),
'-' => match self.input.get(self.pos + 1) { '-' => match self.input.get(self.pos + 1) {
Some('>') => { Some('>') => {
self.pos += 1; self.pos += 1;
Token::ThinArrow Symbol::ThinArrow
}, }
Some('=') => { Some('=') => {
self.pos += 1; self.pos += 1;
Token::MinusSet Symbol::MinusSet
} }
_ => Token::Minus, _ => Symbol::Minus,
}, },
'*' => match self.input.get(self.pos + 1) { '*' => match self.input.get(self.pos + 1) {
Some('*') => { Some('*') => {
self.pos += 1; self.pos += 1;
settable!(Token::DoubleStar, Token::DoubleStarSet) settable!(Symbol::DoubleStar, Symbol::DoubleStarSet)
}, }
Some('=') => { Some('=') => {
self.pos += 1; self.pos += 1;
Token::StarSet Symbol::StarSet
} }
_ => Token::Star, _ => Symbol::Star,
}, },
// TODO: comments // TODO: comments
'/' => settable!(Token::Slash, Token::SlashSet), '/' => settable!(Symbol::Slash, Symbol::SlashSet),
'%' => settable!(Token::Percent, Token::PercentSet), '%' => settable!(Symbol::Percent, Symbol::PercentSet),
'|' => match self.input.get(self.pos + 1) { '|' => match self.input.get(self.pos + 1) {
Some('|') => { Some('|') => {
self.pos += 1; self.pos += 1;
settable!(Token::DoublePipe, Token::DoublePipeSet) settable!(Symbol::DoublePipe, Symbol::DoublePipeSet)
}, }
Some('=') => { Some('=') => {
self.pos += 1; self.pos += 1;
Token::PipeSet Symbol::PipeSet
} }
_ => Token::Pipe, _ => Symbol::Pipe,
}, },
'&' => match self.input.get(self.pos + 1) { '&' => match self.input.get(self.pos + 1) {
Some('&') => { Some('&') => {
self.pos += 1; self.pos += 1;
settable!(Token::DoubleAnd, Token::DoubleAndSet) settable!(Symbol::DoubleAnd, Symbol::DoubleAndSet)
} }
Some('=') => { Some('=') => {
self.pos += 1; self.pos += 1;
Token::AndSet Symbol::AndSet
} }
_ => Token::And, _ => Symbol::And,
}, },
'^' => settable!(Token::Carat, Token::CaratSet), '^' => settable!(Symbol::Carat, Symbol::CaratSet),
'=' => match self.input.get(self.pos + 1) { '=' => match self.input.get(self.pos + 1) {
Some('=') => { Some('=') => {
self.pos += 1; self.pos += 1;
Token::Eq Symbol::Eq
} }
Some('>') => { Some('>') => {
self.pos += 1; self.pos += 1;
Token::FatArrow Symbol::FatArrow
} }
_ => Token::Set, _ => Symbol::Set,
}, },
'!' => match self.input.get(self.pos + 1) { '!' => match self.input.get(self.pos + 1) {
Some('=') => { Some('=') => {
self.pos += 1; self.pos += 1;
Token::Neq Symbol::Neq
} }
_ => Token::Not, _ => Symbol::Not,
}, },
'<' => match self.input.get(self.pos + 1) { '<' => match self.input.get(self.pos + 1) {
Some('=') => { Some('=') => {
self.pos += 1; self.pos += 1;
Token::LessEq Symbol::LessEq
} }
Some('<') => { Some('<') => {
self.pos += 1; self.pos += 1;
settable!(Token::Shl, Token::ShlSet) settable!(Symbol::Shl, Symbol::ShlSet)
} }
_ => Token::Less, _ => Symbol::Less,
}, },
'>' => match self.input.get(self.pos + 1) { '>' => match self.input.get(self.pos + 1) {
Some('=') => { Some('=') => {
self.pos += 1; self.pos += 1;
Token::GreaterEq Symbol::GreaterEq
} }
Some('>') => { Some('>') => {
self.pos += 1; self.pos += 1;
settable!(Token::Shr, Token::ShrSet) settable!(Symbol::Shr, Symbol::ShrSet)
} }
_ => Token::Greater, _ => Symbol::Greater,
}, },
'.' => match self.input.get(self.pos + 1) { '.' => match self.input.get(self.pos + 1) {
Some('.') => match self.input.get(self.pos + 1) { Some('.') => match self.input.get(self.pos + 1) {
Some('.') => { Some('.') => {
self.pos += 2; self.pos += 2;
Token::TripleDot Symbol::TripleDot
} }
_ => { _ => {
self.pos += 1; self.pos += 1;
Token::DoubleDot Symbol::DoubleDot
}, }
}, },
_ => Token::Dot, _ => Symbol::Dot,
}, },
':' => match self.input.get(self.pos + 1) { ':' => match self.input.get(self.pos + 1) {
Some(':') => { Some(':') => {
self.pos += 1; self.pos += 1;
Token::DoubleColon Symbol::DoubleColon
} }
_ => Token::Colon, _ => Symbol::Colon,
}, },
',' => Token::Comma, ',' => Symbol::Comma,
';' => Token::Semicolon, ';' => Symbol::Semicolon,
'?' => Token::Question, '?' => Symbol::Question,
_ => return Err(Error::SyntaxError(format!("unexpected character {}", ch))), _ => return Err(Error::SyntaxError(format!("unexpected character {}", ch))),
}; };
self.pos += 1; self.pos += 1;
Ok(token) Ok(Token::Symbol(symbol))
} }
} }

View file

@ -4,16 +4,42 @@ a second time when generating (so the types are known), there should be
a better way a better way
*/ */
#![allow(unused)]
mod data;
mod error; mod error;
mod generator; mod generator;
mod lexer; mod lexer;
mod parser; mod parser;
mod types;
pub use error::Error; pub use error::Error;
use parser::Context; use parser::Context;
use crate::data::Statement;
pub struct Foo {
a: u8,
b: Bar,
}
pub struct Bar {
a: u8,
b: i32,
}
fn main() { fn main() {
let mut lexer = lexer::Lexer::new("!{ let foo = 8; let bar = foo * 3; foo + bar < 10 }".into()); // let mut lexer = lexer::Lexer::new("{ let foo = 8; let bar = foo * 3; foo + bar < 10 }".into());
// let mut lexer = lexer::Lexer::new("{ let foo = 8; let bar = foo * -3; foo + bar }".into());
// let mut lexer = lexer::Lexer::new("123 + 456".into());
let mut lexer = lexer::Lexer::new("{
let foo = 8;
let bar = foo * -3;
match foo + bar < 10 {
true => 123,
false => 456,
}
}".into());
let mut tokens = vec![]; let mut tokens = vec![];
loop { loop {
@ -26,7 +52,9 @@ fn main() {
} }
} }
} }
// dbg!(&tokens);
dbg!(&tokens);
let mut parser = parser::Parser::new(tokens); let mut parser = parser::Parser::new(tokens);
let mut statements = vec![]; let mut statements = vec![];
loop { loop {
@ -35,8 +63,8 @@ fn main() {
Ok(Some(tree)) => { Ok(Some(tree)) => {
dbg!(&tree); dbg!(&tree);
match &tree { match &tree {
parser::Statement::Let(..) => todo!(), Statement::Let(..) => todo!(),
parser::Statement::Expr(expr) => match expr.infer(&Context::new()) { Statement::Expr(expr) => match expr.infer(&Context::new()) {
Ok(ty) => eprintln!("type: {:?}", ty), Ok(ty) => eprintln!("type: {:?}", ty),
Err(err) => eprintln!("err: {:?}", err), Err(err) => eprintln!("err: {:?}", err),
}, },
@ -51,7 +79,7 @@ fn main() {
} }
let expr = match &statements[0] { let expr = match &statements[0] {
crate::parser::Statement::Expr(expr) => expr, Statement::Expr(expr) => expr,
_ => todo!(), _ => todo!(),
}; };

View file

@ -1,6 +1,7 @@
use std::collections::HashMap; use std::collections::HashMap;
use crate::lexer::Token; use crate::data::{BinaryOp, Block, Expr, Literal, Pattern, PrefixOp, Statement, Type};
use crate::lexer::{Token, Symbol};
use crate::Error; use crate::Error;
pub struct Parser { pub struct Parser {
@ -8,82 +9,6 @@ pub struct Parser {
pos: usize, pos: usize,
} }
#[derive(Debug, Clone)]
pub enum BinaryOp {
Pow,
Mul,
Div,
Mod,
Add,
Sub,
Shl,
Shr,
Less,
LessEq,
Greater,
GreaterEq,
Eq,
Neq,
BitAnd,
Xor,
BitOr,
LogicAnd,
LogicOr,
// TODO
// Set,
}
#[derive(Debug, Clone)]
pub enum PrefixOp {
Minus,
LogicNot,
BitNot,
}
#[derive(Debug, Clone)]
pub enum SuffixOp {
Unravel,
}
#[derive(Debug, Clone)]
pub enum Statement {
Let(String, Expr),
// Type(String, Type),
Expr(Expr),
// Func(String, ...),
// Break,
// Continue,
// Type,
}
#[derive(Debug, Clone)]
pub struct Block(pub Vec<Statement>);
#[derive(Debug, Clone)]
pub enum Expr {
Literal(Literal),
Variable(String),
Binary(BinaryOp, Box<Expr>, Box<Expr>),
Unary(PrefixOp, Box<Expr>),
Match(Box<Expr>, Vec<(Pattern, Expr)>),
// Call(String, Vec<Expr>),
Block(Block),
}
#[derive(Debug, Clone, PartialEq)]
pub enum Pattern {
Literal(Literal),
}
#[derive(Debug, Clone, PartialEq)]
pub enum Literal {
Integer(i64),
Float(f64),
Boolean(bool),
String(String),
Char(char),
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Context { pub struct Context {
pub locals: HashMap<String, Type>, pub locals: HashMap<String, Type>,
@ -113,25 +38,34 @@ impl Parser {
} }
pub fn next(&mut self) -> Result<Option<Statement>, Error> { pub fn next(&mut self) -> Result<Option<Statement>, Error> {
self.parse_statement() match self.peek_tok() {
Some(_) => self.parse_statement(),
None => Ok(None),
}
} }
fn parse_statement(&mut self) -> Result<Option<Statement>, Error> { fn parse_statement(&mut self) -> Result<Option<Statement>, Error> {
let Some(tok) = self.peek_tok() else { let Some(tok) = self.peek_tok() else {
return Ok(None); return Err(Error::SyntaxError(format!("unexpected eof")));
}; };
let stmt = match tok { let stmt = match tok {
Token::Let => { Token::Let => {
self.eat(Token::Let)?; self.eat(Token::Let)?;
let name = match self.next_tok() { let name = match self.next_tok() {
Some(Token::Ident(ident)) => ident.to_string(), Some(Token::Ident(ident)) => ident.to_string(),
Some(tk) => return Err(Error::SyntaxError(format!("expected identifier, got {tk:?}"))), Some(tk) => {
None => return Err(Error::SyntaxError(format!("expected identifier, got eof"))), return Err(Error::SyntaxError(format!(
"expected identifier, got {tk:?}"
)))
}
None => {
return Err(Error::SyntaxError(format!("expected identifier, got eof")))
}
}; };
self.eat(Token::Set)?; self.eat(Token::Symbol(Symbol::Set))?;
let expr = self.parse_expr(0)?; let expr = self.parse_expr(0)?;
Statement::Let(name, expr) Statement::Let(name, expr)
}, }
_ => Statement::Expr(self.parse_expr(0)?), _ => Statement::Expr(self.parse_expr(0)?),
}; };
Ok(Some(stmt)) Ok(Some(stmt))
@ -147,7 +81,7 @@ impl Parser {
}; };
statements.push(self.parse_statement()?.unwrap()); statements.push(self.parse_statement()?.unwrap());
match self.peek_tok() { match self.peek_tok() {
Some(Token::Semicolon) => self.next_tok(), Some(Token::Symbol(Symbol::Semicolon)) => self.next_tok(),
Some(Token::CloseBrace) => break, Some(Token::CloseBrace) => break,
Some(tok) => return Err(Error::SyntaxError(format!("unexpected token {tok:?}"))), Some(tok) => return Err(Error::SyntaxError(format!("unexpected token {tok:?}"))),
None => return Err(Error::syn("unexpected eof")), None => return Err(Error::syn("unexpected eof")),
@ -166,9 +100,18 @@ impl Parser {
Expr::Literal(Literal::Integer(text.parse().unwrap())) Expr::Literal(Literal::Integer(text.parse().unwrap()))
} }
} }
Token::Ident(ident) => Expr::Variable(ident.to_string()), Token::Ident(ident) => {
let ident = ident.clone();
if self.peek_tok().is_some_and(|t| *t == Token::OpenParan) {
// function calls
todo!()
} else {
Expr::Variable(ident)
}
}
Token::False => Expr::Literal(Literal::Boolean(false)), Token::False => Expr::Literal(Literal::Boolean(false)),
Token::True => Expr::Literal(Literal::Boolean(true)), Token::True => Expr::Literal(Literal::Boolean(true)),
Token::Char(ch) => Expr::Literal(Literal::Char(*ch)),
Token::If => { Token::If => {
let cond = self.parse_expr(0)?; let cond = self.parse_expr(0)?;
self.eat(Token::OpenBrace)?; self.eat(Token::OpenBrace)?;
@ -192,17 +135,19 @@ impl Parser {
}; };
let mut map = vec![(Pattern::Literal(Literal::Boolean(true)), Expr::Block(block))]; let mut map = vec![(Pattern::Literal(Literal::Boolean(true)), Expr::Block(block))];
if let Some(otherwise) = otherwise { if let Some(otherwise) = otherwise {
map.push((Pattern::Literal(Literal::Boolean(false)), Expr::Block(otherwise))); map.push((
Pattern::Literal(Literal::Boolean(false)),
Expr::Block(otherwise),
));
} }
Expr::Match(Box::new(cond), map) Expr::Match(Box::new(cond), map)
} }
Token::Minus => { Token::Symbol(_) => {
let Some(op) = PrefixOp::from_token(&tok) else {
return Err(Error::SyntaxError(format!("unexpected token {tok:?}")));
};
let expr = self.parse_expr(1)?; let expr = self.parse_expr(1)?;
Expr::Unary(PrefixOp::Minus, Box::new(expr)) Expr::Unary(op, Box::new(expr))
}
Token::Not => {
let expr = self.parse_expr(1)?;
Expr::Unary(PrefixOp::LogicNot, Box::new(expr))
} }
Token::Match => { Token::Match => {
let expr = self.parse_expr(0)?; let expr = self.parse_expr(0)?;
@ -210,14 +155,19 @@ impl Parser {
self.eat(Token::OpenBrace)?; self.eat(Token::OpenBrace)?;
loop { loop {
let pat = self.parse_pattern()?; let pat = self.parse_pattern()?;
self.eat(Token::FatArrow)?; self.eat(Token::Symbol(Symbol::FatArrow))?;
let expr = self.parse_expr(0)?; let expr = self.parse_expr(0)?;
arms.push((pat, expr)); arms.push((pat, expr));
if self.peek_tok().is_some_and(|t| t == &Token::Comma) {
if self.peek_tok().is_some_and(|t| t == &Token::Symbol(Symbol::Comma)) {
self.next_tok(); self.next_tok();
} else { } else {
break; break;
} }
if self.peek_tok().is_none() || self.peek_tok().is_some_and(|t| t == &Token::CloseBrace) {
break;
}
} }
self.eat(Token::CloseBrace)?; self.eat(Token::CloseBrace)?;
Expr::Match(Box::new(expr), arms) Expr::Match(Box::new(expr), arms)
@ -261,186 +211,8 @@ impl Parser {
} }
Token::False => Pattern::Literal(Literal::Boolean(false)), Token::False => Pattern::Literal(Literal::Boolean(false)),
Token::True => Pattern::Literal(Literal::Boolean(true)), Token::True => Pattern::Literal(Literal::Boolean(true)),
_ => todo!(), _ => todo!("no pattern for {:?} yet", tok),
}; };
Ok(pat) Ok(pat)
} }
} }
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum Type {
Integer,
Float,
Boolean,
String,
Char,
Function(Vec<Type>, Box<Type>),
Tuple(Vec<Type>),
}
impl Expr {
pub fn infer(&self, ctx: &Context) -> Result<Type, Error> {
match self {
Self::Literal(lit) => lit.infer(),
Self::Binary(op, lhs, rhs) => Ok(op.infer(lhs.infer(ctx)?, rhs.infer(ctx)?)?),
Self::Unary(op, expr) => Ok(op.infer(expr.infer(ctx)?)?),
Self::Variable(name) => match ctx.locals.get(name) {
Some(ty) => Ok(ty.clone()),
None => Err(Error::ReferenceError(format!("cannot find variable {name}"))),
},
Self::Match(item, arms) => {
let mut match_ty = None;
let item_ty = item.infer(ctx)?;
for (pat, expr) in arms {
let ty = expr.infer(ctx)?;
let pat_ty = pat.infer()?;
if item_ty != pat_ty {
return Err(Error::ty("cannot compare different type"));
}
if match_ty.is_some_and(|mty| mty != ty) {
return Err(Error::ty("branch returns different type"));
}
match_ty = Some(ty);
}
// TODO: exhaustiveness checks
let Some(match_ty) = match_ty else {
// TODO: infallible types? `enum Nope {}`
return Err(Error::ty("match has no branches to infer"));
};
Ok(match_ty)
}
Self::Block(block) => block.infer(ctx),
}
}
}
impl BinaryOp {
pub fn infer(&self, a: Type, b: Type) -> Result<Type, Error> {
use BinaryOp as B;
use Type as T;
let ty = match (self, a, b) {
(B::Add | B::Sub | B::Mul | B::Div | B::Mod | B::Pow, T::Integer, T::Integer) => T::Integer,
(B::Eq | B::Neq | B::Less | B::LessEq | B::Greater | B::GreaterEq, T::Integer, T::Integer) => T::Boolean,
(B::Add | B::Sub | B::Mul | B::Div | B::Mod | B::Pow, T::Float, T::Float) => T::Float,
(B::Eq | B::Neq | B::Less | B::LessEq | B::Greater | B::GreaterEq, T::Float, T::Float) => T::Boolean,
// (B::Add | B::Sub | B::Mul | B::Div, T::Float, T::Float) => T::Float,
(op, a, b) => {
return Err(Error::TypeError(format!(
"operator {op:?} cannot be applied to {a:?} and {b:?}"
)))
}
};
Ok(ty)
}
fn precedence(&self) -> (u8, u8) {
match self {
Self::Pow => (22, 21),
Self::Mul | Self::Div | Self::Mod => (19, 20),
Self::Add | Self::Sub => (17, 18),
Self::Shl | Self::Shr => (15, 16),
Self::Less | Self::LessEq | Self::Greater | Self::GreaterEq => (13, 14),
Self::Eq | Self::Neq => (11, 12),
Self::BitAnd => (9, 10),
Self::Xor => (7, 8),
Self::BitOr => (5, 6),
Self::LogicAnd => (3, 4),
Self::LogicOr => (1, 2),
}
}
fn from_token(token: &Token) -> Option<Self> {
let op = match token {
Token::DoubleStar => Self::Pow,
Token::Star => Self::Mul,
Token::Slash => Self::Div,
Token::Percent => Self::Mod,
Token::Plus => Self::Add,
Token::Minus => Self::Sub,
Token::Shl => Self::Shl,
Token::Shr => Self::Shr,
Token::Less => Self::Less,
Token::LessEq => Self::LessEq,
Token::Greater => Self::Greater,
Token::GreaterEq => Self::GreaterEq,
Token::Eq => Self::Eq,
Token::Neq => Self::Neq,
Token::And => Self::BitAnd,
Token::Carat => Self::Xor,
Token::Pipe => Self::BitOr,
Token::DoubleAnd => Self::LogicAnd,
Token::DoublePipe => Self::LogicOr,
_ => return None,
};
Some(op)
}
}
impl PrefixOp {
pub fn infer(&self, a: Type) -> Result<Type, Error> {
use Type as T;
use PrefixOp as U;
let ty = match (self, a) {
(U::Minus, T::Integer) => T::Integer,
// (U::Minus, T::Float) => T::Float,
(U::LogicNot, T::Boolean) => T::Boolean,
(op, ty) => {
return Err(Error::TypeError(format!(
"operator {op:?} cannot be applied to {ty:?}"
)))
}
};
Ok(ty)
}
}
impl Block {
#[allow(clippy::never_loop)] // for now
pub fn infer(&self, ctx: &Context) -> Result<Type, Error> {
let mut ctx = ctx.clone();
let mut ty = Type::Tuple(vec![]);
for statement in &self.0 {
match statement {
Statement::Expr(expr) => ty = expr.infer(&ctx)?,
Statement::Let(name, expr) => {
let var_ty = expr.infer(&ctx)?;
ctx.locals.insert(name.clone(), var_ty);
ty = Type::Tuple(vec![]);
}
}
}
Ok(ty)
}
}
impl Literal {
fn infer(&self) -> Result<Type, Error> {
match self {
Literal::Integer(_) => Ok(Type::Integer),
Literal::Float(_) => Ok(Type::Float),
Literal::Boolean(_) => Ok(Type::Boolean),
Literal::String(_) => Ok(Type::String),
Literal::Char(_) => Ok(Type::Char),
}
}
}
impl Pattern {
fn infer(&self) -> Result<Type, Error> {
match self {
Pattern::Literal(lit) => lit.infer(),
}
}
}
impl Context {
pub fn new() -> Context {
Context {
locals: HashMap::new(),
}
}
}

147
src/types.rs Normal file
View file

@ -0,0 +1,147 @@
// trait Types {
// fn infer();
// }
use std::collections::HashMap;
use crate::{
data::{BinaryOp, Block, Expr, Literal, Pattern, PrefixOp, Statement, Type},
parser::Context,
Error,
};
impl Expr {
pub fn infer(&self, ctx: &Context) -> Result<Type, Error> {
match self {
Self::Literal(lit) => lit.infer(),
Self::Binary(op, lhs, rhs) => Ok(op.infer(lhs.infer(ctx)?, rhs.infer(ctx)?)?),
Self::Unary(op, expr) => Ok(op.infer(expr.infer(ctx)?)?),
Self::Variable(name) => match ctx.locals.get(name) {
Some(ty) => Ok(ty.clone()),
None => Err(Error::ReferenceError(format!(
"cannot find variable {name}"
))),
},
Self::Match(item, arms) => {
let mut match_ty = None;
let item_ty = item.infer(ctx)?;
for (pat, expr) in arms {
let ty = expr.infer(ctx)?;
let pat_ty = pat.infer()?;
if item_ty != pat_ty {
return Err(Error::ty("cannot compare different type"));
}
if match_ty.is_some_and(|mty| mty != ty) {
return Err(Error::ty("branch returns different type"));
}
match_ty = Some(ty);
}
// TODO: exhaustiveness checks
let Some(match_ty) = match_ty else {
// TODO: infallible types? `enum Nope {}`
return Err(Error::ty("match has no branches to infer"));
};
Ok(match_ty)
}
Self::Block(block) => block.infer(ctx),
}
}
}
impl BinaryOp {
pub fn infer(&self, a: Type, b: Type) -> Result<Type, Error> {
use BinaryOp as B;
use Type as T;
let ty = match (self, a, b) {
(B::Add | B::Sub | B::Mul | B::Div | B::Mod | B::Pow, T::Integer, T::Integer) => {
T::Integer
}
(
B::Eq | B::Neq | B::Less | B::LessEq | B::Greater | B::GreaterEq,
T::Integer,
T::Integer,
) => T::Boolean,
(B::Add | B::Sub | B::Mul | B::Div | B::Mod | B::Pow, T::Float, T::Float) => T::Float,
(
B::Eq | B::Neq | B::Less | B::LessEq | B::Greater | B::GreaterEq,
T::Float,
T::Float,
) => T::Boolean,
// (B::Add | B::Sub | B::Mul | B::Div, T::Float, T::Float) => T::Float,
(op, a, b) => {
return Err(Error::TypeError(format!(
"operator {op:?} cannot be applied to {a:?} and {b:?}"
)))
}
};
Ok(ty)
}
}
impl PrefixOp {
pub fn infer(&self, a: Type) -> Result<Type, Error> {
use PrefixOp as U;
use Type as T;
let ty = match (self, a) {
(U::Minus, T::Integer) => T::Integer,
// (U::Minus, T::Float) => T::Float,
(U::LogicNot, T::Boolean) => T::Boolean,
(op, ty) => {
return Err(Error::TypeError(format!(
"operator {op:?} cannot be applied to {ty:?}"
)))
}
};
Ok(ty)
}
}
impl Block {
pub fn infer(&self, ctx: &Context) -> Result<Type, Error> {
let mut ctx = ctx.clone();
let mut ty = Type::Tuple(vec![]);
for statement in &self.0 {
match statement {
Statement::Expr(expr) => ty = expr.infer(&ctx)?,
Statement::Let(name, expr) => {
let var_ty = expr.infer(&ctx)?;
ctx.locals.insert(name.clone(), var_ty);
ty = Type::Tuple(vec![]);
}
}
}
Ok(ty)
}
}
impl Literal {
fn infer(&self) -> Result<Type, Error> {
match self {
Literal::Integer(_) => Ok(Type::Integer),
Literal::Float(_) => Ok(Type::Float),
Literal::Boolean(_) => Ok(Type::Boolean),
Literal::String(_) => Ok(Type::String),
Literal::Char(_) => Ok(Type::Char),
}
}
}
impl Pattern {
fn infer(&self) -> Result<Type, Error> {
match self {
Pattern::Literal(lit) => lit.infer(),
}
}
}
impl Context {
pub fn new() -> Context {
Context {
locals: HashMap::new(),
}
}
}

18
test.wat Normal file
View file

@ -0,0 +1,18 @@
(module (func (export "_start") (result i32) (local $match i32)
(local $foo i32)
(local $bar i32)
i32.const 8
local.set $foo
local.get $foo
i32.const -3
i32.mul
local.set $bar
;; --- set match variable
local.get $foo
local.get $bar
i32.add
i32.const 10
i32.lt_u
local.set $match
local.get $match
))