diff --git a/Cargo.toml b/Cargo.toml index 7541543..defb8fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,3 @@ name = "lang" version = "0.1.0" edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] diff --git a/print.wat b/print.wat index 41c085d..3fdb13d 100644 --- a/print.wat +++ b/print.wat @@ -1,7 +1,7 @@ (module ;; import from wasi ;; fn fd_write(fd, *iovs, iovs_len, nwritten) -> bytes_written - (import "wasi_unstable" "fd_write" (func $fd_write (param i32 i32 i32 i32) (result i32))) + (import "wasi_snapshot_preview1" "fd_write" (func $fd_write (param i32 i32 i32 i32) (result i32))) ;; create memory (size = 1 page = 64KiB) (memory $foobar 1) diff --git a/print2.wat b/print2.wat new file mode 100644 index 0000000..09da700 --- /dev/null +++ b/print2.wat @@ -0,0 +1,34 @@ +(module + ;; import from wasi + ;; fn fd_write(fd, *iovs, iovs_len, nwritten) -> bytes_written + (import "wasi_snapshot_preview1" "fd_write" (func $fd_write (param i32 i32 i32 i32) (result i32))) + + ;; create memory (size = 1 page = 64KiB) + (memory $foobar 1) + + ;; export memory - it's required, but we don't use it so the size is set to 0 + (export "memory" (memory 0)) + + ;; write string to memory (offset = 8 bytes) + (data (i32.const 8) "Hello, world!\n") + + (func $main (export "_start") + ;; iov.iov_base - pointer to string (offset = 0 bytes) + ;; the string's offset is 8 bytes in memory + (i32.store (i32.const 0) (i32.const 8)) + + ;; iov.iov_len - length of the hello world string (offset = 4 bytes) + ;; the string's length is 14 bytes + (i32.store (i32.const 4) (i32.const 14)) + + (i32.const 2) + + (call $fd_write + ;; (i32.const 1) ;; fd: stdout = 1 + (i32.const 0) ;; data: pointer to memory - this is the first memory we create (index 0) + (i32.const 1) ;; data_len: there's 1 string + (i32.const 2468) ;; nwritten: i don't care about this, write it wherever + ) + drop ;; drop number of bytes written + ) +) diff --git a/src/data.rs b/src/data.rs new file mode 100644 index 0000000..fdca518 --- /dev/null +++ b/src/data.rs @@ -0,0 +1,148 @@ +use std::collections::HashMap; + +use crate::lexer::{Token, Symbol}; + +#[rustfmt::skip] +#[derive(Debug, Clone)] +pub enum BinaryOp { + Pow, + Mul, Div, Mod, + Add, Sub, + Shl, Shr, + Less, LessEq, Greater, GreaterEq, + Eq, Neq, + BitAnd, + Xor, + BitOr, + LogicAnd, + LogicOr, + // TODO + // Set, + Comma, +} + +#[derive(Debug, Clone)] +pub enum PrefixOp { + Minus, + LogicNot, + BitNot, +} + +#[derive(Debug, Clone)] +pub enum SuffixOp { + Unravel, +} + +#[derive(Debug, Clone)] +pub enum Statement { + Let(String, Expr), + // Type(String, Type), + Expr(Expr), + // Func(String, ...), + // Break, + // Continue, + // Type, +} + +#[derive(Debug, Clone)] +pub struct Block(pub Vec); + +#[derive(Debug, Clone)] +pub enum Expr { + Literal(Literal), + Variable(String), + Binary(BinaryOp, Box, Box), + Unary(PrefixOp, Box), + Match(Box, Vec<(Pattern, Expr)>), + // Call(String, Vec), + Block(Block), +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Pattern { + Literal(Literal), +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Literal { + Integer(i64), + Float(f64), + Boolean(bool), + String(String), + Char(char), +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum Type { + Integer, + Float, + Boolean, + String, + Char, + // Struct(HashMap), + // Enum(HashMap>), + // Newtype(HashMap), + // Function(Vec, Box), + Tuple(Vec), +} + +impl BinaryOp { + pub fn precedence(&self) -> (u8, u8) { + match self { + Self::Pow => (23, 22), + Self::Mul | Self::Div | Self::Mod => (20, 21), + Self::Add | Self::Sub => (18, 19), + Self::Shl | Self::Shr => (16, 17), + Self::Less | Self::LessEq | Self::Greater | Self::GreaterEq => (14, 15), + Self::Eq | Self::Neq => (12, 13), + Self::BitAnd => (10, 11), + Self::Xor => (8, 9), + Self::BitOr => (6, 7), + Self::LogicAnd => (4, 5), + Self::LogicOr => (2, 3), + Self::Comma => (0, 1), + } + } + + pub fn from_token(token: &Token) -> Option { + let op = match token { + Token::Symbol(Symbol::DoubleStar) => Self::Pow, + Token::Symbol(Symbol::Star) => Self::Mul, + Token::Symbol(Symbol::Slash) => Self::Div, + Token::Symbol(Symbol::Percent) => Self::Mod, + Token::Symbol(Symbol::Plus) => Self::Add, + Token::Symbol(Symbol::Minus) => Self::Sub, + Token::Symbol(Symbol::Shl) => Self::Shl, + Token::Symbol(Symbol::Shr) => Self::Shr, + Token::Symbol(Symbol::Less) => Self::Less, + Token::Symbol(Symbol::LessEq) => Self::LessEq, + Token::Symbol(Symbol::Greater) => Self::Greater, + Token::Symbol(Symbol::GreaterEq) => Self::GreaterEq, + Token::Symbol(Symbol::Eq) => Self::Eq, + Token::Symbol(Symbol::Neq) => Self::Neq, + Token::Symbol(Symbol::And) => Self::BitAnd, + Token::Symbol(Symbol::Carat) => Self::Xor, + Token::Symbol(Symbol::Pipe) => Self::BitOr, + Token::Symbol(Symbol::DoubleAnd) => Self::LogicAnd, + Token::Symbol(Symbol::DoublePipe) => Self::LogicOr, + _ => return None, + }; + Some(op) + } +} + +impl PrefixOp { + // pub fn precedence(&self) -> (u8, u8) { + // todo!(), + // } + + pub fn from_token(token: &Token) -> Option { + let op = match token { + Token::Symbol(Symbol::Minus) => Self::Minus, + Token::Symbol(Symbol::Not) => Self::LogicNot, + Token::Symbol(Symbol::DoublePipe) => Self::BitNot, + _ => return None, + }; + Some(op) + } +} diff --git a/src/generator.rs b/src/generator.rs index e0581bd..5be20f3 100644 --- a/src/generator.rs +++ b/src/generator.rs @@ -5,7 +5,30 @@ optimizations - write negative numberss directly instead of as positive + sign flip */ -use crate::parser::{Expr, Literal, BinaryOp, PrefixOp, Statement, Context}; +use crate::data::{Expr, Literal, BinaryOp, PrefixOp, Statement, Pattern, Type}; +use crate::parser::Context; + +pub struct Generator { + output: Box, +} + +impl Generator { + pub fn new(output: Box) -> Generator { + Generator { + output + } + } + + fn write_module(&mut self) { + write!(self.output, "(module"); + write!(self.output, ")"); + } + + fn write_func(&mut self) { + write!(self.output, "(func "); + write!(self.output, ")"); + } +} pub fn generate(expr: &Expr) { println!(); @@ -19,15 +42,15 @@ pub fn generate(expr: &Expr) { // println!(r#"(module (func (export "_start") (result f64) (local $match f64)"#); for (name, _) in &exprs { let ty = match ctx.locals.get(name).unwrap() { - crate::parser::Type::Integer => "i32", - crate::parser::Type::Float => "f64", + Type::Integer => "i32", + Type::Float => "f64", _ => todo!(), }; println!("(local ${name} {ty})"); } for (name, expr) in &exprs { gen_expr(expr, &ctx); - println!("(local.set ${name})"); + println!("local.set ${name}"); } gen_expr(&expr, &ctx); @@ -47,13 +70,13 @@ fn gen_expr(expr: &Expr, ctx: &Context) { println!("local.get ${name}"); } Expr::Binary(op, a, b) => { - gen_expr(a, ctx); - gen_expr(b, ctx); + gen_expr(&a, ctx); + gen_expr(&b, ctx); let ty = match expr.infer(&ctx).unwrap() { - crate::parser::Type::Integer => "i32", - crate::parser::Type::Float => "f64", - crate::parser::Type::Boolean => "i32", + Type::Integer => "i32", + Type::Float => "f64", + Type::Boolean => "i32", _ => todo!(), }; match op { @@ -70,6 +93,15 @@ fn gen_expr(expr: &Expr, ctx: &Context) { } } Expr::Unary(op, e) => { + if let Expr::Literal(lit) = e.as_ref() { + match lit { + Literal::Integer(int) => println!("i32.const {}", -int), + Literal::Float(f) => println!("f64.const {}", -f), + _ => unreachable!(), + } + return; + } + gen_expr(e, ctx); match op { PrefixOp::Minus => { @@ -87,18 +119,17 @@ fn gen_expr(expr: &Expr, ctx: &Context) { } } } + // FIXME: awful code until i fix patching up parser and lexer Expr::Match(cond, arms) => { println!(";; --- set match variable"); - println!("(local.set $match ("); gen_expr(cond, ctx); - println!("))"); + println!("local.set $match"); println!(";; --- generate match"); for (idx, (pat, expr)) in arms.iter().enumerate() { - // FIXME: hardcoded until patern matching works better match pat { - crate::parser::Pattern::Literal(lit) => match lit { + Pattern::Literal(lit) => match lit { Literal::Integer(int) => println!("i32.const {}", int), Literal::Boolean(b) => println!("i32.const {}", if *b { 1 } else { 0 }), _ => todo!(), diff --git a/src/lexer.rs b/src/lexer.rs index 41542fe..6ac76de 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,12 +1,13 @@ use crate::Error; +#[derive(Debug)] pub struct Lexer { input: Vec, pos: usize, } #[rustfmt::skip] -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum Token { Number { radix: u32, text: String }, Ident(String), @@ -17,8 +18,19 @@ pub enum Token { OpenBrace, CloseBrace, OpenBracket, CloseBracket, + Symbol(Symbol), + + Let, Const, Type, Fn, + True, False, + If, Else, Match, + While, Loop, For, Break, Continue, +} + +#[rustfmt::skip] +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Symbol { Plus, Minus, Star, DoubleStar, Slash, Percent, - Pipe, DoublePipe, And, DoubleAnd, Carat, Shl, Shr, + Pipe, DoublePipe, And, DoubleAnd, Carat, Shl, Shr, Tilde, PlusSet, MinusSet, StarSet, DoubleStarSet, SlashSet, PercentSet, PipeSet, DoublePipeSet, AndSet, DoubleAndSet, CaratSet, ShlSet, ShrSet, @@ -26,11 +38,6 @@ pub enum Token { Set, Eq, Neq, Less, LessEq, Greater, GreaterEq, Not, Dot, DoubleDot, TripleDot, Comma, Question, Colon, DoubleColon, Semicolon, ThinArrow, FatArrow, - - Let, Const, Type, Fn, - True, False, - If, Else, Match, - While, Loop, For, Break, Continue, } impl Lexer { @@ -106,7 +113,7 @@ impl Lexer { _ if !ch.is_ascii_alphanumeric() => { self.pos -= 2; 10 - }, + } _ => return Err(Error::SyntaxError(format!("unknown number radix {ch}"))), } } @@ -197,129 +204,139 @@ impl Lexer { } }; } - + let token = match ch { - '(' => Token::OpenParan, - ')' => Token::CloseParan, - '[' => Token::OpenBracket, - ']' => Token::CloseBracket, - '{' => Token::OpenBrace, - '}' => Token::CloseBrace, - '+' => settable!(Token::Plus, Token::PlusSet), + '(' => Some(Token::OpenParan), + ')' => Some(Token::CloseParan), + '[' => Some(Token::OpenBracket), + ']' => Some(Token::CloseBracket), + '{' => Some(Token::OpenBrace), + '}' => Some(Token::CloseBrace), + _ => None, + }; + + if let Some(token) = token { + self.pos += 1; + return Ok(token); + } + + let symbol = match ch { + '~' => Symbol::Tilde, + '+' => settable!(Symbol::Plus, Symbol::PlusSet), '-' => match self.input.get(self.pos + 1) { Some('>') => { self.pos += 1; - Token::ThinArrow - }, + Symbol::ThinArrow + } Some('=') => { self.pos += 1; - Token::MinusSet + Symbol::MinusSet } - _ => Token::Minus, + _ => Symbol::Minus, }, '*' => match self.input.get(self.pos + 1) { Some('*') => { self.pos += 1; - settable!(Token::DoubleStar, Token::DoubleStarSet) - }, + settable!(Symbol::DoubleStar, Symbol::DoubleStarSet) + } Some('=') => { self.pos += 1; - Token::StarSet + Symbol::StarSet } - _ => Token::Star, + _ => Symbol::Star, }, // TODO: comments - '/' => settable!(Token::Slash, Token::SlashSet), - '%' => settable!(Token::Percent, Token::PercentSet), + '/' => settable!(Symbol::Slash, Symbol::SlashSet), + '%' => settable!(Symbol::Percent, Symbol::PercentSet), '|' => match self.input.get(self.pos + 1) { Some('|') => { self.pos += 1; - settable!(Token::DoublePipe, Token::DoublePipeSet) - }, + settable!(Symbol::DoublePipe, Symbol::DoublePipeSet) + } Some('=') => { self.pos += 1; - Token::PipeSet + Symbol::PipeSet } - _ => Token::Pipe, + _ => Symbol::Pipe, }, '&' => match self.input.get(self.pos + 1) { Some('&') => { self.pos += 1; - settable!(Token::DoubleAnd, Token::DoubleAndSet) + settable!(Symbol::DoubleAnd, Symbol::DoubleAndSet) } Some('=') => { self.pos += 1; - Token::AndSet + Symbol::AndSet } - _ => Token::And, + _ => Symbol::And, }, - '^' => settable!(Token::Carat, Token::CaratSet), + '^' => settable!(Symbol::Carat, Symbol::CaratSet), '=' => match self.input.get(self.pos + 1) { Some('=') => { self.pos += 1; - Token::Eq + Symbol::Eq } Some('>') => { self.pos += 1; - Token::FatArrow + Symbol::FatArrow } - _ => Token::Set, + _ => Symbol::Set, }, '!' => match self.input.get(self.pos + 1) { Some('=') => { self.pos += 1; - Token::Neq + Symbol::Neq } - _ => Token::Not, + _ => Symbol::Not, }, '<' => match self.input.get(self.pos + 1) { Some('=') => { self.pos += 1; - Token::LessEq + Symbol::LessEq } Some('<') => { self.pos += 1; - settable!(Token::Shl, Token::ShlSet) + settable!(Symbol::Shl, Symbol::ShlSet) } - _ => Token::Less, + _ => Symbol::Less, }, '>' => match self.input.get(self.pos + 1) { Some('=') => { self.pos += 1; - Token::GreaterEq + Symbol::GreaterEq } Some('>') => { self.pos += 1; - settable!(Token::Shr, Token::ShrSet) + settable!(Symbol::Shr, Symbol::ShrSet) } - _ => Token::Greater, + _ => Symbol::Greater, }, '.' => match self.input.get(self.pos + 1) { Some('.') => match self.input.get(self.pos + 1) { Some('.') => { self.pos += 2; - Token::TripleDot + Symbol::TripleDot } _ => { self.pos += 1; - Token::DoubleDot - }, + Symbol::DoubleDot + } }, - _ => Token::Dot, + _ => Symbol::Dot, }, ':' => match self.input.get(self.pos + 1) { Some(':') => { self.pos += 1; - Token::DoubleColon + Symbol::DoubleColon } - _ => Token::Colon, + _ => Symbol::Colon, }, - ',' => Token::Comma, - ';' => Token::Semicolon, - '?' => Token::Question, + ',' => Symbol::Comma, + ';' => Symbol::Semicolon, + '?' => Symbol::Question, _ => return Err(Error::SyntaxError(format!("unexpected character {}", ch))), }; self.pos += 1; - Ok(token) + Ok(Token::Symbol(symbol)) } } diff --git a/src/main.rs b/src/main.rs index 5f9d059..b440240 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,16 +4,42 @@ a second time when generating (so the types are known), there should be a better way */ +#![allow(unused)] + +mod data; mod error; mod generator; mod lexer; mod parser; +mod types; pub use error::Error; use parser::Context; +use crate::data::Statement; + +pub struct Foo { + a: u8, + b: Bar, +} + +pub struct Bar { + a: u8, + b: i32, +} + fn main() { - let mut lexer = lexer::Lexer::new("!{ let foo = 8; let bar = foo * 3; foo + bar < 10 }".into()); + // let mut lexer = lexer::Lexer::new("{ let foo = 8; let bar = foo * 3; foo + bar < 10 }".into()); + // let mut lexer = lexer::Lexer::new("{ let foo = 8; let bar = foo * -3; foo + bar }".into()); + // let mut lexer = lexer::Lexer::new("123 + 456".into()); + let mut lexer = lexer::Lexer::new("{ + let foo = 8; + let bar = foo * -3; + match foo + bar < 10 { + true => 123, + false => 456, + } + }".into()); let mut tokens = vec![]; loop { @@ -26,7 +52,9 @@ fn main() { } } } - // dbg!(&tokens); + + dbg!(&tokens); + let mut parser = parser::Parser::new(tokens); let mut statements = vec![]; loop { @@ -35,8 +63,8 @@ fn main() { Ok(Some(tree)) => { dbg!(&tree); match &tree { - parser::Statement::Let(..) => todo!(), - parser::Statement::Expr(expr) => match expr.infer(&Context::new()) { + Statement::Let(..) => todo!(), + Statement::Expr(expr) => match expr.infer(&Context::new()) { Ok(ty) => eprintln!("type: {:?}", ty), Err(err) => eprintln!("err: {:?}", err), }, @@ -51,7 +79,7 @@ fn main() { } let expr = match &statements[0] { - crate::parser::Statement::Expr(expr) => expr, + Statement::Expr(expr) => expr, _ => todo!(), }; diff --git a/src/parser.rs b/src/parser.rs index 9dc2e26..efe138c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; -use crate::lexer::Token; +use crate::data::{BinaryOp, Block, Expr, Literal, Pattern, PrefixOp, Statement, Type}; +use crate::lexer::{Token, Symbol}; use crate::Error; pub struct Parser { @@ -8,82 +9,6 @@ pub struct Parser { pos: usize, } -#[derive(Debug, Clone)] -pub enum BinaryOp { - Pow, - Mul, - Div, - Mod, - Add, - Sub, - Shl, - Shr, - Less, - LessEq, - Greater, - GreaterEq, - Eq, - Neq, - BitAnd, - Xor, - BitOr, - LogicAnd, - LogicOr, - // TODO - // Set, -} - -#[derive(Debug, Clone)] -pub enum PrefixOp { - Minus, - LogicNot, - BitNot, -} - -#[derive(Debug, Clone)] -pub enum SuffixOp { - Unravel, -} - -#[derive(Debug, Clone)] -pub enum Statement { - Let(String, Expr), - // Type(String, Type), - Expr(Expr), - // Func(String, ...), - // Break, - // Continue, - // Type, -} - -#[derive(Debug, Clone)] -pub struct Block(pub Vec); - -#[derive(Debug, Clone)] -pub enum Expr { - Literal(Literal), - Variable(String), - Binary(BinaryOp, Box, Box), - Unary(PrefixOp, Box), - Match(Box, Vec<(Pattern, Expr)>), - // Call(String, Vec), - Block(Block), -} - -#[derive(Debug, Clone, PartialEq)] -pub enum Pattern { - Literal(Literal), -} - -#[derive(Debug, Clone, PartialEq)] -pub enum Literal { - Integer(i64), - Float(f64), - Boolean(bool), - String(String), - Char(char), -} - #[derive(Debug, Clone)] pub struct Context { pub locals: HashMap, @@ -113,25 +38,34 @@ impl Parser { } pub fn next(&mut self) -> Result, Error> { - self.parse_statement() + match self.peek_tok() { + Some(_) => self.parse_statement(), + None => Ok(None), + } } fn parse_statement(&mut self) -> Result, Error> { let Some(tok) = self.peek_tok() else { - return Ok(None); + return Err(Error::SyntaxError(format!("unexpected eof"))); }; let stmt = match tok { Token::Let => { self.eat(Token::Let)?; let name = match self.next_tok() { Some(Token::Ident(ident)) => ident.to_string(), - Some(tk) => return Err(Error::SyntaxError(format!("expected identifier, got {tk:?}"))), - None => return Err(Error::SyntaxError(format!("expected identifier, got eof"))), + Some(tk) => { + return Err(Error::SyntaxError(format!( + "expected identifier, got {tk:?}" + ))) + } + None => { + return Err(Error::SyntaxError(format!("expected identifier, got eof"))) + } }; - self.eat(Token::Set)?; + self.eat(Token::Symbol(Symbol::Set))?; let expr = self.parse_expr(0)?; Statement::Let(name, expr) - }, + } _ => Statement::Expr(self.parse_expr(0)?), }; Ok(Some(stmt)) @@ -147,7 +81,7 @@ impl Parser { }; statements.push(self.parse_statement()?.unwrap()); match self.peek_tok() { - Some(Token::Semicolon) => self.next_tok(), + Some(Token::Symbol(Symbol::Semicolon)) => self.next_tok(), Some(Token::CloseBrace) => break, Some(tok) => return Err(Error::SyntaxError(format!("unexpected token {tok:?}"))), None => return Err(Error::syn("unexpected eof")), @@ -166,9 +100,18 @@ impl Parser { Expr::Literal(Literal::Integer(text.parse().unwrap())) } } - Token::Ident(ident) => Expr::Variable(ident.to_string()), + Token::Ident(ident) => { + let ident = ident.clone(); + if self.peek_tok().is_some_and(|t| *t == Token::OpenParan) { + // function calls + todo!() + } else { + Expr::Variable(ident) + } + } Token::False => Expr::Literal(Literal::Boolean(false)), Token::True => Expr::Literal(Literal::Boolean(true)), + Token::Char(ch) => Expr::Literal(Literal::Char(*ch)), Token::If => { let cond = self.parse_expr(0)?; self.eat(Token::OpenBrace)?; @@ -192,17 +135,19 @@ impl Parser { }; let mut map = vec![(Pattern::Literal(Literal::Boolean(true)), Expr::Block(block))]; if let Some(otherwise) = otherwise { - map.push((Pattern::Literal(Literal::Boolean(false)), Expr::Block(otherwise))); + map.push(( + Pattern::Literal(Literal::Boolean(false)), + Expr::Block(otherwise), + )); } Expr::Match(Box::new(cond), map) } - Token::Minus => { + Token::Symbol(_) => { + let Some(op) = PrefixOp::from_token(&tok) else { + return Err(Error::SyntaxError(format!("unexpected token {tok:?}"))); + }; let expr = self.parse_expr(1)?; - Expr::Unary(PrefixOp::Minus, Box::new(expr)) - } - Token::Not => { - let expr = self.parse_expr(1)?; - Expr::Unary(PrefixOp::LogicNot, Box::new(expr)) + Expr::Unary(op, Box::new(expr)) } Token::Match => { let expr = self.parse_expr(0)?; @@ -210,14 +155,19 @@ impl Parser { self.eat(Token::OpenBrace)?; loop { let pat = self.parse_pattern()?; - self.eat(Token::FatArrow)?; + self.eat(Token::Symbol(Symbol::FatArrow))?; let expr = self.parse_expr(0)?; arms.push((pat, expr)); - if self.peek_tok().is_some_and(|t| t == &Token::Comma) { + + if self.peek_tok().is_some_and(|t| t == &Token::Symbol(Symbol::Comma)) { self.next_tok(); } else { break; } + + if self.peek_tok().is_none() || self.peek_tok().is_some_and(|t| t == &Token::CloseBrace) { + break; + } } self.eat(Token::CloseBrace)?; Expr::Match(Box::new(expr), arms) @@ -261,186 +211,8 @@ impl Parser { } Token::False => Pattern::Literal(Literal::Boolean(false)), Token::True => Pattern::Literal(Literal::Boolean(true)), - _ => todo!(), + _ => todo!("no pattern for {:?} yet", tok), }; Ok(pat) } } - -#[derive(Debug, PartialEq, Eq, Clone)] -pub enum Type { - Integer, - Float, - Boolean, - String, - Char, - Function(Vec, Box), - Tuple(Vec), -} - -impl Expr { - pub fn infer(&self, ctx: &Context) -> Result { - match self { - Self::Literal(lit) => lit.infer(), - Self::Binary(op, lhs, rhs) => Ok(op.infer(lhs.infer(ctx)?, rhs.infer(ctx)?)?), - Self::Unary(op, expr) => Ok(op.infer(expr.infer(ctx)?)?), - Self::Variable(name) => match ctx.locals.get(name) { - Some(ty) => Ok(ty.clone()), - None => Err(Error::ReferenceError(format!("cannot find variable {name}"))), - }, - Self::Match(item, arms) => { - let mut match_ty = None; - let item_ty = item.infer(ctx)?; - for (pat, expr) in arms { - let ty = expr.infer(ctx)?; - let pat_ty = pat.infer()?; - if item_ty != pat_ty { - return Err(Error::ty("cannot compare different type")); - } - if match_ty.is_some_and(|mty| mty != ty) { - return Err(Error::ty("branch returns different type")); - } - match_ty = Some(ty); - } - // TODO: exhaustiveness checks - let Some(match_ty) = match_ty else { - // TODO: infallible types? `enum Nope {}` - return Err(Error::ty("match has no branches to infer")); - }; - Ok(match_ty) - } - Self::Block(block) => block.infer(ctx), - } - } -} - -impl BinaryOp { - pub fn infer(&self, a: Type, b: Type) -> Result { - use BinaryOp as B; - use Type as T; - - let ty = match (self, a, b) { - (B::Add | B::Sub | B::Mul | B::Div | B::Mod | B::Pow, T::Integer, T::Integer) => T::Integer, - (B::Eq | B::Neq | B::Less | B::LessEq | B::Greater | B::GreaterEq, T::Integer, T::Integer) => T::Boolean, - (B::Add | B::Sub | B::Mul | B::Div | B::Mod | B::Pow, T::Float, T::Float) => T::Float, - (B::Eq | B::Neq | B::Less | B::LessEq | B::Greater | B::GreaterEq, T::Float, T::Float) => T::Boolean, - // (B::Add | B::Sub | B::Mul | B::Div, T::Float, T::Float) => T::Float, - (op, a, b) => { - return Err(Error::TypeError(format!( - "operator {op:?} cannot be applied to {a:?} and {b:?}" - ))) - } - }; - - Ok(ty) - } - - fn precedence(&self) -> (u8, u8) { - match self { - Self::Pow => (22, 21), - Self::Mul | Self::Div | Self::Mod => (19, 20), - Self::Add | Self::Sub => (17, 18), - Self::Shl | Self::Shr => (15, 16), - Self::Less | Self::LessEq | Self::Greater | Self::GreaterEq => (13, 14), - Self::Eq | Self::Neq => (11, 12), - Self::BitAnd => (9, 10), - Self::Xor => (7, 8), - Self::BitOr => (5, 6), - Self::LogicAnd => (3, 4), - Self::LogicOr => (1, 2), - } - } - - fn from_token(token: &Token) -> Option { - let op = match token { - Token::DoubleStar => Self::Pow, - Token::Star => Self::Mul, - Token::Slash => Self::Div, - Token::Percent => Self::Mod, - Token::Plus => Self::Add, - Token::Minus => Self::Sub, - Token::Shl => Self::Shl, - Token::Shr => Self::Shr, - Token::Less => Self::Less, - Token::LessEq => Self::LessEq, - Token::Greater => Self::Greater, - Token::GreaterEq => Self::GreaterEq, - Token::Eq => Self::Eq, - Token::Neq => Self::Neq, - Token::And => Self::BitAnd, - Token::Carat => Self::Xor, - Token::Pipe => Self::BitOr, - Token::DoubleAnd => Self::LogicAnd, - Token::DoublePipe => Self::LogicOr, - _ => return None, - }; - Some(op) - } -} - -impl PrefixOp { - pub fn infer(&self, a: Type) -> Result { - use Type as T; - use PrefixOp as U; - - let ty = match (self, a) { - (U::Minus, T::Integer) => T::Integer, - // (U::Minus, T::Float) => T::Float, - (U::LogicNot, T::Boolean) => T::Boolean, - (op, ty) => { - return Err(Error::TypeError(format!( - "operator {op:?} cannot be applied to {ty:?}" - ))) - } - }; - - Ok(ty) - } -} - -impl Block { - #[allow(clippy::never_loop)] // for now - pub fn infer(&self, ctx: &Context) -> Result { - let mut ctx = ctx.clone(); - let mut ty = Type::Tuple(vec![]); - for statement in &self.0 { - match statement { - Statement::Expr(expr) => ty = expr.infer(&ctx)?, - Statement::Let(name, expr) => { - let var_ty = expr.infer(&ctx)?; - ctx.locals.insert(name.clone(), var_ty); - ty = Type::Tuple(vec![]); - } - } - } - Ok(ty) - } -} - -impl Literal { - fn infer(&self) -> Result { - match self { - Literal::Integer(_) => Ok(Type::Integer), - Literal::Float(_) => Ok(Type::Float), - Literal::Boolean(_) => Ok(Type::Boolean), - Literal::String(_) => Ok(Type::String), - Literal::Char(_) => Ok(Type::Char), - } - } -} - -impl Pattern { - fn infer(&self) -> Result { - match self { - Pattern::Literal(lit) => lit.infer(), - } - } -} - -impl Context { - pub fn new() -> Context { - Context { - locals: HashMap::new(), - } - } -} diff --git a/src/types.rs b/src/types.rs new file mode 100644 index 0000000..1d6e331 --- /dev/null +++ b/src/types.rs @@ -0,0 +1,147 @@ +// trait Types { +// fn infer(); +// } + +use std::collections::HashMap; + +use crate::{ + data::{BinaryOp, Block, Expr, Literal, Pattern, PrefixOp, Statement, Type}, + parser::Context, + Error, +}; + +impl Expr { + pub fn infer(&self, ctx: &Context) -> Result { + match self { + Self::Literal(lit) => lit.infer(), + Self::Binary(op, lhs, rhs) => Ok(op.infer(lhs.infer(ctx)?, rhs.infer(ctx)?)?), + Self::Unary(op, expr) => Ok(op.infer(expr.infer(ctx)?)?), + Self::Variable(name) => match ctx.locals.get(name) { + Some(ty) => Ok(ty.clone()), + None => Err(Error::ReferenceError(format!( + "cannot find variable {name}" + ))), + }, + Self::Match(item, arms) => { + let mut match_ty = None; + let item_ty = item.infer(ctx)?; + for (pat, expr) in arms { + let ty = expr.infer(ctx)?; + let pat_ty = pat.infer()?; + if item_ty != pat_ty { + return Err(Error::ty("cannot compare different type")); + } + if match_ty.is_some_and(|mty| mty != ty) { + return Err(Error::ty("branch returns different type")); + } + match_ty = Some(ty); + } + // TODO: exhaustiveness checks + let Some(match_ty) = match_ty else { + // TODO: infallible types? `enum Nope {}` + return Err(Error::ty("match has no branches to infer")); + }; + Ok(match_ty) + } + Self::Block(block) => block.infer(ctx), + } + } +} + +impl BinaryOp { + pub fn infer(&self, a: Type, b: Type) -> Result { + use BinaryOp as B; + use Type as T; + + let ty = match (self, a, b) { + (B::Add | B::Sub | B::Mul | B::Div | B::Mod | B::Pow, T::Integer, T::Integer) => { + T::Integer + } + ( + B::Eq | B::Neq | B::Less | B::LessEq | B::Greater | B::GreaterEq, + T::Integer, + T::Integer, + ) => T::Boolean, + (B::Add | B::Sub | B::Mul | B::Div | B::Mod | B::Pow, T::Float, T::Float) => T::Float, + ( + B::Eq | B::Neq | B::Less | B::LessEq | B::Greater | B::GreaterEq, + T::Float, + T::Float, + ) => T::Boolean, + // (B::Add | B::Sub | B::Mul | B::Div, T::Float, T::Float) => T::Float, + (op, a, b) => { + return Err(Error::TypeError(format!( + "operator {op:?} cannot be applied to {a:?} and {b:?}" + ))) + } + }; + + Ok(ty) + } +} + +impl PrefixOp { + pub fn infer(&self, a: Type) -> Result { + use PrefixOp as U; + use Type as T; + + let ty = match (self, a) { + (U::Minus, T::Integer) => T::Integer, + // (U::Minus, T::Float) => T::Float, + (U::LogicNot, T::Boolean) => T::Boolean, + (op, ty) => { + return Err(Error::TypeError(format!( + "operator {op:?} cannot be applied to {ty:?}" + ))) + } + }; + + Ok(ty) + } +} + +impl Block { + pub fn infer(&self, ctx: &Context) -> Result { + let mut ctx = ctx.clone(); + let mut ty = Type::Tuple(vec![]); + for statement in &self.0 { + match statement { + Statement::Expr(expr) => ty = expr.infer(&ctx)?, + Statement::Let(name, expr) => { + let var_ty = expr.infer(&ctx)?; + ctx.locals.insert(name.clone(), var_ty); + ty = Type::Tuple(vec![]); + } + } + } + Ok(ty) + } +} + +impl Literal { + fn infer(&self) -> Result { + match self { + Literal::Integer(_) => Ok(Type::Integer), + Literal::Float(_) => Ok(Type::Float), + Literal::Boolean(_) => Ok(Type::Boolean), + Literal::String(_) => Ok(Type::String), + Literal::Char(_) => Ok(Type::Char), + } + } +} + +impl Pattern { + fn infer(&self) -> Result { + match self { + Pattern::Literal(lit) => lit.infer(), + } + } +} + +impl Context { + pub fn new() -> Context { + Context { + locals: HashMap::new(), + } + } +} diff --git a/test.wat b/test.wat new file mode 100644 index 0000000..0762429 --- /dev/null +++ b/test.wat @@ -0,0 +1,18 @@ +(module (func (export "_start") (result i32) (local $match i32) +(local $foo i32) +(local $bar i32) +i32.const 8 +local.set $foo +local.get $foo +i32.const -3 +i32.mul +local.set $bar +;; --- set match variable +local.get $foo +local.get $bar +i32.add +i32.const 10 +i32.lt_u +local.set $match +local.get $match +))