refactor and parse identifiers and int literals

This commit is contained in:
Victor Timofei 2023-09-07 23:12:13 +03:00
parent 04ece2b9a7
commit a16d6eb464
Signed by: vtimofei
GPG Key ID: B790DCEBE281403A
3 changed files with 331 additions and 59 deletions

View File

@ -1,6 +1,6 @@
use std::{fmt::Debug, rc::Rc}; use std::{fmt::Debug, rc::Rc};
use crate::token::Token; use crate::token::{Token, TokenType};
pub trait Node: Debug {} pub trait Node: Debug {}
@ -50,14 +50,35 @@ impl Statement for Return {}
impl Node for Return {} impl Node for Return {}
#[derive(Debug)] #[derive(Debug)]
pub struct Identifier { pub struct ExpressionStatement {
// TODO: probably not needed
pub token: Token, pub token: Token,
pub expression: Rc<dyn Expression>,
}
impl Statement for ExpressionStatement {}
impl Node for ExpressionStatement {}
#[derive(Debug)]
pub struct Identifier {
pub token_type: TokenType,
pub value: String,
} }
impl Node for Identifier {} impl Node for Identifier {}
impl Expression for Identifier {} impl Expression for Identifier {}
#[derive(Debug)]
pub struct IntegerLiteral {
pub value: i64,
}
impl Node for IntegerLiteral {}
impl Expression for IntegerLiteral {}
#[derive(Debug)] #[derive(Debug)]
pub struct DummyExpression {} pub struct DummyExpression {}

View File

@ -1,27 +1,44 @@
use std::{fmt, rc::Rc}; use std::{fmt, rc::Rc};
use crate::{ use crate::{
ast::{DummyExpression, Expression, Identifier, Let, Program, Statement}, ast::{
DummyExpression, Expression, ExpressionStatement, Identifier, IntegerLiteral, Let, Program,
Return, Statement,
},
lexer::Lexer, lexer::Lexer,
token::Token, token::{Token, TokenType},
}; };
pub type Result<T> = std::result::Result<T, Error>; pub type Result<T> = std::result::Result<T, Error>;
type PrefixParseFn = Box<dyn FnOnce(&mut Parser) -> Rc<dyn Expression>>;
type InfixParseFn = Box<dyn FnOnce(&mut Parser, Rc<dyn Expression>) -> Rc<dyn Expression>>;
#[derive(Debug)] #[derive(Debug)]
pub enum Error { pub enum Error {
UnexpectedToken { UnexpectedTokenType {
expected: Token, expected: TokenType,
actual: Option<Token>, actual: Option<TokenType>,
}, },
} }
#[derive(PartialEq, PartialOrd)]
enum Precedence {
Lowest = 0,
Equals,
LessGreater,
Sum,
Product,
Prefix,
Call,
}
impl fmt::Display for Error { impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "parser error: ")?; write!(f, "parser error: ")?;
use Error::*; use Error::*;
match self { match self {
UnexpectedToken { expected, actual } => write!( UnexpectedTokenType { expected, actual } => write!(
f, f,
"expected token `{:?}`, actual token: `{:?}`", "expected token `{:?}`, actual token: `{:?}`",
expected, actual expected, actual
@ -78,7 +95,8 @@ impl Parser {
use Token::*; use Token::*;
match token { match token {
Let => self.parse_let_statement(), Let => self.parse_let_statement(),
t => None, Return => self.parse_return_statement(),
_ => self.parse_expression_statement(),
} }
} }
None => unreachable!(), None => unreachable!(),
@ -86,69 +104,173 @@ impl Parser {
} }
fn parse_let_statement(&mut self) -> Option<Rc<dyn Statement>> { fn parse_let_statement(&mut self) -> Option<Rc<dyn Statement>> {
let token = self.cur_token.clone().unwrap(); let token = if let Some(token) = &self.cur_token {
token.clone()
if !self.expect_peek(&Token::Ident("".to_string())) { } else {
return None; return None;
}
let name = Identifier {
token: self.cur_token.clone().unwrap(),
}; };
if !self.expect_peek(&Token::Assign) { let value = self.expect_peek_ident()?;
let name = Identifier {
token_type: TokenType::Let,
value,
};
if !self.expect_peek(TokenType::Assign) {
return None; return None;
} }
let value = self.parse_expression()?; while !self.cur_token_is(TokenType::Semicolon) {
self.next();
}
let value = Rc::new(DummyExpression {});
Some(Rc::new(Let { token, name, value })) Some(Rc::new(Let { token, name, value }))
} }
fn parse_expression(&mut self) -> Option<Rc<dyn Expression>> { fn parse_return_statement(&mut self) -> Option<Rc<dyn Statement>> {
while !self.cur_token_is(&Token::Semicolon) { let token = self.cur_token.clone().unwrap();
while !self.cur_token_is(TokenType::Semicolon) {
self.next(); self.next();
} }
Some(Rc::new(DummyExpression {})) let value = Rc::new(DummyExpression {});
Some(Rc::new(Return { token, value }))
} }
fn expect_peek(&mut self, token: &Token) -> bool { fn parse_expression_statement(&mut self) -> Option<Rc<dyn Statement>> {
if self.peek_token.is_none() { let token = self.cur_token.clone()?;
self.errors.push(Error::UnexpectedToken {
expected: token.clone(), let expression = self.parse_expression(Precedence::Lowest)?;
if self.peek_token_is(TokenType::Semicolon) {
self.next();
}
Some(Rc::new(ExpressionStatement { token, expression }))
}
fn parse_expression(&mut self, precedence: Precedence) -> Option<Rc<dyn Expression>> {
let prefix = self.prefix_parse_fn()?;
let left_exp = prefix(self);
Some(left_exp)
}
fn parse_identifier(&mut self, value: String) -> Rc<dyn Expression> {
Rc::new(Identifier {
token_type: TokenType::Ident,
value,
})
}
fn parse_integer_literal(&mut self, value: u64) -> Rc<dyn Expression> {
let value = if let Ok(value) = i64::try_from(value) {
value
} else {
// TODO: emit error
return Rc::new(DummyExpression {});
};
Rc::new(IntegerLiteral { value })
}
fn expect_peek(&mut self, token_type: TokenType) -> bool {
let peek_token = if let Some(token) = &self.peek_token {
token
} else {
self.errors.push(Error::UnexpectedTokenType {
expected: token_type,
actual: None, actual: None,
}); });
return false; return false;
} };
let peek_token = self.peek_token.clone().unwrap(); if token_type == peek_token.token_type() {
if token.is_same_type(&peek_token) {
self.next(); self.next();
return true; return true;
} else { } else {
self.errors.push(Error::UnexpectedToken { self.errors.push(Error::UnexpectedTokenType {
expected: token.clone(), expected: token_type,
actual: None, actual: None,
}); });
return false; return false;
} }
} }
fn peek_token_is(&self, token: &Token) -> bool { fn expect_peek_ident(&mut self) -> Option<String> {
if self.peek_token.is_none() { let peek_token = self.peek_token.clone()?;
return false; match peek_token {
Token::Ident(value) => {
self.next();
Some(value)
}
_ => None,
} }
self.peek_token.clone().unwrap().is_same_type(token)
} }
fn cur_token_is(&self, token: &Token) -> bool { fn expect_cur_int_literal(&mut self) -> Option<u64> {
if self.cur_token.is_none() { let peek_token = self.peek_token.clone()?;
return false; match peek_token {
Token::Int(value) => {
self.next();
Some(value)
}
_ => None,
} }
}
self.cur_token.clone().unwrap().is_same_type(token) fn peek_token_is(&self, token_type: TokenType) -> bool {
let peek_token = if let Some(token) = &self.peek_token {
token
} else {
return false;
};
peek_token.token_type() == token_type
}
fn cur_token_is(&self, token_type: TokenType) -> bool {
let cur_token = if let Some(token) = &self.cur_token {
token
} else {
return false;
};
cur_token.token_type() == token_type
}
fn prefix_parse_fn(&self) -> Option<PrefixParseFn> {
let token = if let Some(token) = &self.cur_token {
token
} else {
return None;
};
use Token::*;
match token {
Ident(value) => {
let value = value.clone();
Some(Box::new(move |parser| {
Self::parse_identifier(parser, value)
}))
}
Int(value) => {
let value = *value;
Some(Box::new(move |parser| {
Self::parse_integer_literal(parser, value)
}))
}
_ => None,
}
}
fn infix_parse_fn(token: &Token) -> InfixParseFn {
use Token::*;
match token {
_ => unimplemented!(),
}
} }
} }
@ -201,10 +323,73 @@ mod tests {
} }
} }
#[test]
fn return_statements() {
let source = "return 5;\
return 10;\
return 838383;\
"
.to_string();
let lexer = Lexer::new(source);
let mut parser = Parser::new(lexer);
let program = parser.parse().unwrap();
check_parser_errors(parser);
assert_eq!(program.statements.len(), 3);
for stmt in program.statements {
assert_eq!(
format!("{stmt:?}"),
"Return { token: Return, value: DummyExpression }"
)
}
}
#[test]
fn identifier_expression() {
let source = "foobar;".to_owned();
let lexer = Lexer::new(source);
let mut parser = Parser::new(lexer);
let program = parser.parse().unwrap();
check_parser_errors(parser);
let expected_identifiers = vec!["foobar"];
let mut statements_iter = program.statements.iter();
for tt in expected_identifiers {
let statement = statements_iter.next().unwrap();
test_identifier_expression(statement.clone(), tt);
}
}
#[test]
fn integer_literal_expression() {
let source = "6;".to_owned();
let lexer = Lexer::new(source);
let mut parser = Parser::new(lexer);
let program = parser.parse().unwrap();
check_parser_errors(parser);
let expected_integers = vec![6];
let mut statements_iter = program.statements.iter();
for tt in expected_integers {
let statement = statements_iter.next().unwrap();
test_integer_literal_expression(statement.clone(), tt);
}
}
fn test_let_statement(stmt: Rc<dyn Statement>, name: &str) { fn test_let_statement(stmt: Rc<dyn Statement>, name: &str) {
assert_eq!( assert_eq!(
format!("{stmt:?}"), format!("{stmt:?}"),
format!("Let {{ token: Let, name: Identifier {{ token: Ident(\"{name}\") }}, value: DummyExpression }}"), format!("Let {{ token: Let, name: Identifier {{ token_type: Let, value: \"{name}\" }}, value: DummyExpression }}"),
); );
} }
@ -221,4 +406,22 @@ mod tests {
panic!("{err}"); panic!("{err}");
} }
fn test_identifier_expression(stmt: Rc<dyn Statement>, name: &str) {
assert_eq!(
format!("{stmt:?}"),
format!(
"ExpressionStatement {{ token: Ident(\"{name}\"), expression: Identifier {{ token_type: Ident, value: \"{name}\" }} }}"
),
);
}
fn test_integer_literal_expression(stmt: Rc<dyn Statement>, num: i64) {
assert_eq!(
format!("{stmt:?}"),
format!(
"ExpressionStatement {{ token: Int({num}), expression: IntegerLiteral {{ value: {num} }} }}"
),
);
}
} }

View File

@ -4,7 +4,44 @@ pub enum Token {
EOF, EOF,
Ident(String), Ident(String),
Int(i64), Int(u64),
Assign,
Plus,
Minus,
Bang,
Asterisk,
Slash,
Lt,
Gt,
Eq,
NotEq,
Comma,
Semicolon,
Lparen,
Rparen,
Lbrace,
Rbrace,
Function,
Let,
True,
False,
If,
Else,
Return,
}
#[derive(Debug, PartialEq, Eq)]
pub enum TokenType {
Illegal,
EOF,
Ident,
Int,
Assign, Assign,
Plus, Plus,
@ -36,24 +73,35 @@ pub enum Token {
} }
impl Token { impl Token {
pub fn is_same_type(&self, other: &Token) -> bool { pub fn token_type(&self) -> TokenType {
use Token::*;
match self { match self {
Ident(_) => { Token::Illegal => TokenType::Illegal,
if let Ident(_) = other { Token::EOF => TokenType::EOF,
true Token::Ident(_) => TokenType::Ident,
} else { Token::Int(_) => TokenType::Int,
false Token::Assign => TokenType::Assign,
} Token::Plus => TokenType::Plus,
} Token::Minus => TokenType::Minus,
Int(_) => { Token::Bang => TokenType::Bang,
if let Int(_) = other { Token::Asterisk => TokenType::Asterisk,
true Token::Slash => TokenType::Slash,
} else { Token::Lt => TokenType::Lt,
false Token::Gt => TokenType::Gt,
} Token::Eq => TokenType::Eq,
} Token::NotEq => TokenType::NotEq,
tok => tok == other, Token::Comma => TokenType::Comma,
Token::Semicolon => TokenType::Semicolon,
Token::Lparen => TokenType::Lparen,
Token::Rparen => TokenType::Rparen,
Token::Lbrace => TokenType::Lbrace,
Token::Rbrace => TokenType::Rbrace,
Token::Function => TokenType::Function,
Token::Let => TokenType::Let,
Token::True => TokenType::True,
Token::False => TokenType::False,
Token::If => TokenType::If,
Token::Else => TokenType::Else,
Token::Return => TokenType::Return,
} }
} }