diff --git a/src/ast.rs b/src/ast.rs index 09d17f4..9296f49 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,6 +1,6 @@ use std::{fmt::Debug, rc::Rc}; -use crate::token::Token; +use crate::token::{Token, TokenType}; pub trait Node: Debug {} @@ -50,14 +50,35 @@ impl Statement for Return {} impl Node for Return {} #[derive(Debug)] -pub struct Identifier { +pub struct ExpressionStatement { + // TODO: probably not needed pub token: Token, + pub expression: Rc, +} + +impl Statement for ExpressionStatement {} + +impl Node for ExpressionStatement {} + +#[derive(Debug)] +pub struct Identifier { + pub token_type: TokenType, + pub value: String, } impl Node for Identifier {} impl Expression for Identifier {} +#[derive(Debug)] +pub struct IntegerLiteral { + pub value: i64, +} + +impl Node for IntegerLiteral {} + +impl Expression for IntegerLiteral {} + #[derive(Debug)] pub struct DummyExpression {} diff --git a/src/parser.rs b/src/parser.rs index 26faacb..08710ae 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,27 +1,44 @@ use std::{fmt, rc::Rc}; use crate::{ - ast::{DummyExpression, Expression, Identifier, Let, Program, Statement}, + ast::{ + DummyExpression, Expression, ExpressionStatement, Identifier, IntegerLiteral, Let, Program, + Return, Statement, + }, lexer::Lexer, - token::Token, + token::{Token, TokenType}, }; pub type Result = std::result::Result; +type PrefixParseFn = Box Rc>; +type InfixParseFn = Box) -> Rc>; + #[derive(Debug)] pub enum Error { - UnexpectedToken { - expected: Token, - actual: Option, + UnexpectedTokenType { + expected: TokenType, + actual: Option, }, } +#[derive(PartialEq, PartialOrd)] +enum Precedence { + Lowest = 0, + Equals, + LessGreater, + Sum, + Product, + Prefix, + Call, +} + impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "parser error: ")?; use Error::*; match self { - UnexpectedToken { expected, actual } => write!( + UnexpectedTokenType { expected, actual } => write!( f, "expected token `{:?}`, actual token: `{:?}`", expected, actual @@ -78,7 +95,8 @@ impl Parser { use Token::*; match token { Let => self.parse_let_statement(), - t => None, + Return => self.parse_return_statement(), + _ => self.parse_expression_statement(), } } None => unreachable!(), @@ -86,69 +104,173 @@ impl Parser { } fn parse_let_statement(&mut self) -> Option> { - let token = self.cur_token.clone().unwrap(); - - if !self.expect_peek(&Token::Ident("".to_string())) { + let token = if let Some(token) = &self.cur_token { + token.clone() + } else { return None; - } - - let name = Identifier { - token: self.cur_token.clone().unwrap(), }; - if !self.expect_peek(&Token::Assign) { + let value = self.expect_peek_ident()?; + + let name = Identifier { + token_type: TokenType::Let, + value, + }; + + if !self.expect_peek(TokenType::Assign) { return None; } - let value = self.parse_expression()?; + while !self.cur_token_is(TokenType::Semicolon) { + self.next(); + } + let value = Rc::new(DummyExpression {}); Some(Rc::new(Let { token, name, value })) } - fn parse_expression(&mut self) -> Option> { - while !self.cur_token_is(&Token::Semicolon) { + fn parse_return_statement(&mut self) -> Option> { + let token = self.cur_token.clone().unwrap(); + + while !self.cur_token_is(TokenType::Semicolon) { self.next(); } - Some(Rc::new(DummyExpression {})) + let value = Rc::new(DummyExpression {}); + + Some(Rc::new(Return { token, value })) } - fn expect_peek(&mut self, token: &Token) -> bool { - if self.peek_token.is_none() { - self.errors.push(Error::UnexpectedToken { - expected: token.clone(), + fn parse_expression_statement(&mut self) -> Option> { + let token = self.cur_token.clone()?; + + let expression = self.parse_expression(Precedence::Lowest)?; + + if self.peek_token_is(TokenType::Semicolon) { + self.next(); + } + + Some(Rc::new(ExpressionStatement { token, expression })) + } + + fn parse_expression(&mut self, precedence: Precedence) -> Option> { + let prefix = self.prefix_parse_fn()?; + + let left_exp = prefix(self); + Some(left_exp) + } + + fn parse_identifier(&mut self, value: String) -> Rc { + Rc::new(Identifier { + token_type: TokenType::Ident, + value, + }) + } + + fn parse_integer_literal(&mut self, value: u64) -> Rc { + let value = if let Ok(value) = i64::try_from(value) { + value + } else { + // TODO: emit error + return Rc::new(DummyExpression {}); + }; + Rc::new(IntegerLiteral { value }) + } + + fn expect_peek(&mut self, token_type: TokenType) -> bool { + let peek_token = if let Some(token) = &self.peek_token { + token + } else { + self.errors.push(Error::UnexpectedTokenType { + expected: token_type, actual: None, }); return false; - } + }; - let peek_token = self.peek_token.clone().unwrap(); - - if token.is_same_type(&peek_token) { + if token_type == peek_token.token_type() { self.next(); return true; } else { - self.errors.push(Error::UnexpectedToken { - expected: token.clone(), + self.errors.push(Error::UnexpectedTokenType { + expected: token_type, actual: None, }); return false; } } - fn peek_token_is(&self, token: &Token) -> bool { - if self.peek_token.is_none() { - return false; + fn expect_peek_ident(&mut self) -> Option { + let peek_token = self.peek_token.clone()?; + match peek_token { + Token::Ident(value) => { + self.next(); + Some(value) + } + _ => None, } - - self.peek_token.clone().unwrap().is_same_type(token) } - fn cur_token_is(&self, token: &Token) -> bool { - if self.cur_token.is_none() { - return false; + fn expect_cur_int_literal(&mut self) -> Option { + let peek_token = self.peek_token.clone()?; + match peek_token { + Token::Int(value) => { + self.next(); + Some(value) + } + _ => None, } + } - self.cur_token.clone().unwrap().is_same_type(token) + fn peek_token_is(&self, token_type: TokenType) -> bool { + let peek_token = if let Some(token) = &self.peek_token { + token + } else { + return false; + }; + + peek_token.token_type() == token_type + } + + fn cur_token_is(&self, token_type: TokenType) -> bool { + let cur_token = if let Some(token) = &self.cur_token { + token + } else { + return false; + }; + + cur_token.token_type() == token_type + } + + fn prefix_parse_fn(&self) -> Option { + let token = if let Some(token) = &self.cur_token { + token + } else { + return None; + }; + + use Token::*; + match token { + Ident(value) => { + let value = value.clone(); + Some(Box::new(move |parser| { + Self::parse_identifier(parser, value) + })) + } + Int(value) => { + let value = *value; + Some(Box::new(move |parser| { + Self::parse_integer_literal(parser, value) + })) + } + _ => None, + } + } + + fn infix_parse_fn(token: &Token) -> InfixParseFn { + use Token::*; + match token { + _ => unimplemented!(), + } } } @@ -201,10 +323,73 @@ mod tests { } } + #[test] + fn return_statements() { + let source = "return 5;\ + return 10;\ + return 838383;\ + " + .to_string(); + + let lexer = Lexer::new(source); + + let mut parser = Parser::new(lexer); + + let program = parser.parse().unwrap(); + check_parser_errors(parser); + + assert_eq!(program.statements.len(), 3); + + for stmt in program.statements { + assert_eq!( + format!("{stmt:?}"), + "Return { token: Return, value: DummyExpression }" + ) + } + } + + #[test] + fn identifier_expression() { + let source = "foobar;".to_owned(); + + let lexer = Lexer::new(source); + + let mut parser = Parser::new(lexer); + + let program = parser.parse().unwrap(); + check_parser_errors(parser); + + let expected_identifiers = vec!["foobar"]; + let mut statements_iter = program.statements.iter(); + for tt in expected_identifiers { + let statement = statements_iter.next().unwrap(); + test_identifier_expression(statement.clone(), tt); + } + } + + #[test] + fn integer_literal_expression() { + let source = "6;".to_owned(); + + let lexer = Lexer::new(source); + + let mut parser = Parser::new(lexer); + + let program = parser.parse().unwrap(); + check_parser_errors(parser); + + let expected_integers = vec![6]; + let mut statements_iter = program.statements.iter(); + for tt in expected_integers { + let statement = statements_iter.next().unwrap(); + test_integer_literal_expression(statement.clone(), tt); + } + } + fn test_let_statement(stmt: Rc, name: &str) { assert_eq!( format!("{stmt:?}"), - format!("Let {{ token: Let, name: Identifier {{ token: Ident(\"{name}\") }}, value: DummyExpression }}"), + format!("Let {{ token: Let, name: Identifier {{ token_type: Let, value: \"{name}\" }}, value: DummyExpression }}"), ); } @@ -221,4 +406,22 @@ mod tests { panic!("{err}"); } + + fn test_identifier_expression(stmt: Rc, name: &str) { + assert_eq!( + format!("{stmt:?}"), + format!( + "ExpressionStatement {{ token: Ident(\"{name}\"), expression: Identifier {{ token_type: Ident, value: \"{name}\" }} }}" + ), + ); + } + + fn test_integer_literal_expression(stmt: Rc, num: i64) { + assert_eq!( + format!("{stmt:?}"), + format!( + "ExpressionStatement {{ token: Int({num}), expression: IntegerLiteral {{ value: {num} }} }}" + ), + ); + } } diff --git a/src/token.rs b/src/token.rs index f07a866..396c1ae 100644 --- a/src/token.rs +++ b/src/token.rs @@ -4,7 +4,44 @@ pub enum Token { EOF, Ident(String), - Int(i64), + Int(u64), + + Assign, + Plus, + Minus, + Bang, + Asterisk, + Slash, + + Lt, + Gt, + Eq, + NotEq, + + Comma, + Semicolon, + + Lparen, + Rparen, + Lbrace, + Rbrace, + + Function, + Let, + True, + False, + If, + Else, + Return, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum TokenType { + Illegal, + EOF, + + Ident, + Int, Assign, Plus, @@ -36,24 +73,35 @@ pub enum Token { } impl Token { - pub fn is_same_type(&self, other: &Token) -> bool { - use Token::*; + pub fn token_type(&self) -> TokenType { match self { - Ident(_) => { - if let Ident(_) = other { - true - } else { - false - } - } - Int(_) => { - if let Int(_) = other { - true - } else { - false - } - } - tok => tok == other, + Token::Illegal => TokenType::Illegal, + Token::EOF => TokenType::EOF, + Token::Ident(_) => TokenType::Ident, + Token::Int(_) => TokenType::Int, + Token::Assign => TokenType::Assign, + Token::Plus => TokenType::Plus, + Token::Minus => TokenType::Minus, + Token::Bang => TokenType::Bang, + Token::Asterisk => TokenType::Asterisk, + Token::Slash => TokenType::Slash, + Token::Lt => TokenType::Lt, + Token::Gt => TokenType::Gt, + Token::Eq => TokenType::Eq, + Token::NotEq => TokenType::NotEq, + Token::Comma => TokenType::Comma, + Token::Semicolon => TokenType::Semicolon, + Token::Lparen => TokenType::Lparen, + Token::Rparen => TokenType::Rparen, + Token::Lbrace => TokenType::Lbrace, + Token::Rbrace => TokenType::Rbrace, + Token::Function => TokenType::Function, + Token::Let => TokenType::Let, + Token::True => TokenType::True, + Token::False => TokenType::False, + Token::If => TokenType::If, + Token::Else => TokenType::Else, + Token::Return => TokenType::Return, } }