refactor and parse identifiers and int literals

This commit is contained in:
Victor Timofei 2023-09-07 23:12:13 +03:00
parent 04ece2b9a7
commit a16d6eb464
Signed by: vtimofei
GPG Key ID: B790DCEBE281403A
3 changed files with 331 additions and 59 deletions

View File

@ -1,6 +1,6 @@
use std::{fmt::Debug, rc::Rc};
use crate::token::Token;
use crate::token::{Token, TokenType};
pub trait Node: Debug {}
@ -50,14 +50,35 @@ impl Statement for Return {}
impl Node for Return {}
#[derive(Debug)]
pub struct Identifier {
pub struct ExpressionStatement {
// TODO: probably not needed
pub token: Token,
pub expression: Rc<dyn Expression>,
}
impl Statement for ExpressionStatement {}
impl Node for ExpressionStatement {}
#[derive(Debug)]
pub struct Identifier {
pub token_type: TokenType,
pub value: String,
}
impl Node for Identifier {}
impl Expression for Identifier {}
#[derive(Debug)]
pub struct IntegerLiteral {
pub value: i64,
}
impl Node for IntegerLiteral {}
impl Expression for IntegerLiteral {}
#[derive(Debug)]
pub struct DummyExpression {}

View File

@ -1,27 +1,44 @@
use std::{fmt, rc::Rc};
use crate::{
ast::{DummyExpression, Expression, Identifier, Let, Program, Statement},
ast::{
DummyExpression, Expression, ExpressionStatement, Identifier, IntegerLiteral, Let, Program,
Return, Statement,
},
lexer::Lexer,
token::Token,
token::{Token, TokenType},
};
pub type Result<T> = std::result::Result<T, Error>;
type PrefixParseFn = Box<dyn FnOnce(&mut Parser) -> Rc<dyn Expression>>;
type InfixParseFn = Box<dyn FnOnce(&mut Parser, Rc<dyn Expression>) -> Rc<dyn Expression>>;
#[derive(Debug)]
pub enum Error {
UnexpectedToken {
expected: Token,
actual: Option<Token>,
UnexpectedTokenType {
expected: TokenType,
actual: Option<TokenType>,
},
}
#[derive(PartialEq, PartialOrd)]
enum Precedence {
Lowest = 0,
Equals,
LessGreater,
Sum,
Product,
Prefix,
Call,
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "parser error: ")?;
use Error::*;
match self {
UnexpectedToken { expected, actual } => write!(
UnexpectedTokenType { expected, actual } => write!(
f,
"expected token `{:?}`, actual token: `{:?}`",
expected, actual
@ -78,7 +95,8 @@ impl Parser {
use Token::*;
match token {
Let => self.parse_let_statement(),
t => None,
Return => self.parse_return_statement(),
_ => self.parse_expression_statement(),
}
}
None => unreachable!(),
@ -86,69 +104,173 @@ impl Parser {
}
fn parse_let_statement(&mut self) -> Option<Rc<dyn Statement>> {
let token = self.cur_token.clone().unwrap();
if !self.expect_peek(&Token::Ident("".to_string())) {
let token = if let Some(token) = &self.cur_token {
token.clone()
} else {
return None;
}
let name = Identifier {
token: self.cur_token.clone().unwrap(),
};
if !self.expect_peek(&Token::Assign) {
let value = self.expect_peek_ident()?;
let name = Identifier {
token_type: TokenType::Let,
value,
};
if !self.expect_peek(TokenType::Assign) {
return None;
}
let value = self.parse_expression()?;
while !self.cur_token_is(TokenType::Semicolon) {
self.next();
}
let value = Rc::new(DummyExpression {});
Some(Rc::new(Let { token, name, value }))
}
fn parse_expression(&mut self) -> Option<Rc<dyn Expression>> {
while !self.cur_token_is(&Token::Semicolon) {
fn parse_return_statement(&mut self) -> Option<Rc<dyn Statement>> {
let token = self.cur_token.clone().unwrap();
while !self.cur_token_is(TokenType::Semicolon) {
self.next();
}
Some(Rc::new(DummyExpression {}))
let value = Rc::new(DummyExpression {});
Some(Rc::new(Return { token, value }))
}
fn expect_peek(&mut self, token: &Token) -> bool {
if self.peek_token.is_none() {
self.errors.push(Error::UnexpectedToken {
expected: token.clone(),
fn parse_expression_statement(&mut self) -> Option<Rc<dyn Statement>> {
let token = self.cur_token.clone()?;
let expression = self.parse_expression(Precedence::Lowest)?;
if self.peek_token_is(TokenType::Semicolon) {
self.next();
}
Some(Rc::new(ExpressionStatement { token, expression }))
}
fn parse_expression(&mut self, precedence: Precedence) -> Option<Rc<dyn Expression>> {
let prefix = self.prefix_parse_fn()?;
let left_exp = prefix(self);
Some(left_exp)
}
fn parse_identifier(&mut self, value: String) -> Rc<dyn Expression> {
Rc::new(Identifier {
token_type: TokenType::Ident,
value,
})
}
fn parse_integer_literal(&mut self, value: u64) -> Rc<dyn Expression> {
let value = if let Ok(value) = i64::try_from(value) {
value
} else {
// TODO: emit error
return Rc::new(DummyExpression {});
};
Rc::new(IntegerLiteral { value })
}
fn expect_peek(&mut self, token_type: TokenType) -> bool {
let peek_token = if let Some(token) = &self.peek_token {
token
} else {
self.errors.push(Error::UnexpectedTokenType {
expected: token_type,
actual: None,
});
return false;
}
};
let peek_token = self.peek_token.clone().unwrap();
if token.is_same_type(&peek_token) {
if token_type == peek_token.token_type() {
self.next();
return true;
} else {
self.errors.push(Error::UnexpectedToken {
expected: token.clone(),
self.errors.push(Error::UnexpectedTokenType {
expected: token_type,
actual: None,
});
return false;
}
}
fn peek_token_is(&self, token: &Token) -> bool {
if self.peek_token.is_none() {
return false;
fn expect_peek_ident(&mut self) -> Option<String> {
let peek_token = self.peek_token.clone()?;
match peek_token {
Token::Ident(value) => {
self.next();
Some(value)
}
_ => None,
}
self.peek_token.clone().unwrap().is_same_type(token)
}
fn cur_token_is(&self, token: &Token) -> bool {
if self.cur_token.is_none() {
return false;
fn expect_cur_int_literal(&mut self) -> Option<u64> {
let peek_token = self.peek_token.clone()?;
match peek_token {
Token::Int(value) => {
self.next();
Some(value)
}
_ => None,
}
}
self.cur_token.clone().unwrap().is_same_type(token)
fn peek_token_is(&self, token_type: TokenType) -> bool {
let peek_token = if let Some(token) = &self.peek_token {
token
} else {
return false;
};
peek_token.token_type() == token_type
}
fn cur_token_is(&self, token_type: TokenType) -> bool {
let cur_token = if let Some(token) = &self.cur_token {
token
} else {
return false;
};
cur_token.token_type() == token_type
}
fn prefix_parse_fn(&self) -> Option<PrefixParseFn> {
let token = if let Some(token) = &self.cur_token {
token
} else {
return None;
};
use Token::*;
match token {
Ident(value) => {
let value = value.clone();
Some(Box::new(move |parser| {
Self::parse_identifier(parser, value)
}))
}
Int(value) => {
let value = *value;
Some(Box::new(move |parser| {
Self::parse_integer_literal(parser, value)
}))
}
_ => None,
}
}
fn infix_parse_fn(token: &Token) -> InfixParseFn {
use Token::*;
match token {
_ => unimplemented!(),
}
}
}
@ -201,10 +323,73 @@ mod tests {
}
}
#[test]
fn return_statements() {
let source = "return 5;\
return 10;\
return 838383;\
"
.to_string();
let lexer = Lexer::new(source);
let mut parser = Parser::new(lexer);
let program = parser.parse().unwrap();
check_parser_errors(parser);
assert_eq!(program.statements.len(), 3);
for stmt in program.statements {
assert_eq!(
format!("{stmt:?}"),
"Return { token: Return, value: DummyExpression }"
)
}
}
#[test]
fn identifier_expression() {
let source = "foobar;".to_owned();
let lexer = Lexer::new(source);
let mut parser = Parser::new(lexer);
let program = parser.parse().unwrap();
check_parser_errors(parser);
let expected_identifiers = vec!["foobar"];
let mut statements_iter = program.statements.iter();
for tt in expected_identifiers {
let statement = statements_iter.next().unwrap();
test_identifier_expression(statement.clone(), tt);
}
}
#[test]
fn integer_literal_expression() {
let source = "6;".to_owned();
let lexer = Lexer::new(source);
let mut parser = Parser::new(lexer);
let program = parser.parse().unwrap();
check_parser_errors(parser);
let expected_integers = vec![6];
let mut statements_iter = program.statements.iter();
for tt in expected_integers {
let statement = statements_iter.next().unwrap();
test_integer_literal_expression(statement.clone(), tt);
}
}
fn test_let_statement(stmt: Rc<dyn Statement>, name: &str) {
assert_eq!(
format!("{stmt:?}"),
format!("Let {{ token: Let, name: Identifier {{ token: Ident(\"{name}\") }}, value: DummyExpression }}"),
format!("Let {{ token: Let, name: Identifier {{ token_type: Let, value: \"{name}\" }}, value: DummyExpression }}"),
);
}
@ -221,4 +406,22 @@ mod tests {
panic!("{err}");
}
fn test_identifier_expression(stmt: Rc<dyn Statement>, name: &str) {
assert_eq!(
format!("{stmt:?}"),
format!(
"ExpressionStatement {{ token: Ident(\"{name}\"), expression: Identifier {{ token_type: Ident, value: \"{name}\" }} }}"
),
);
}
fn test_integer_literal_expression(stmt: Rc<dyn Statement>, num: i64) {
assert_eq!(
format!("{stmt:?}"),
format!(
"ExpressionStatement {{ token: Int({num}), expression: IntegerLiteral {{ value: {num} }} }}"
),
);
}
}

View File

@ -4,7 +4,44 @@ pub enum Token {
EOF,
Ident(String),
Int(i64),
Int(u64),
Assign,
Plus,
Minus,
Bang,
Asterisk,
Slash,
Lt,
Gt,
Eq,
NotEq,
Comma,
Semicolon,
Lparen,
Rparen,
Lbrace,
Rbrace,
Function,
Let,
True,
False,
If,
Else,
Return,
}
#[derive(Debug, PartialEq, Eq)]
pub enum TokenType {
Illegal,
EOF,
Ident,
Int,
Assign,
Plus,
@ -36,24 +73,35 @@ pub enum Token {
}
impl Token {
pub fn is_same_type(&self, other: &Token) -> bool {
use Token::*;
pub fn token_type(&self) -> TokenType {
match self {
Ident(_) => {
if let Ident(_) = other {
true
} else {
false
}
}
Int(_) => {
if let Int(_) = other {
true
} else {
false
}
}
tok => tok == other,
Token::Illegal => TokenType::Illegal,
Token::EOF => TokenType::EOF,
Token::Ident(_) => TokenType::Ident,
Token::Int(_) => TokenType::Int,
Token::Assign => TokenType::Assign,
Token::Plus => TokenType::Plus,
Token::Minus => TokenType::Minus,
Token::Bang => TokenType::Bang,
Token::Asterisk => TokenType::Asterisk,
Token::Slash => TokenType::Slash,
Token::Lt => TokenType::Lt,
Token::Gt => TokenType::Gt,
Token::Eq => TokenType::Eq,
Token::NotEq => TokenType::NotEq,
Token::Comma => TokenType::Comma,
Token::Semicolon => TokenType::Semicolon,
Token::Lparen => TokenType::Lparen,
Token::Rparen => TokenType::Rparen,
Token::Lbrace => TokenType::Lbrace,
Token::Rbrace => TokenType::Rbrace,
Token::Function => TokenType::Function,
Token::Let => TokenType::Let,
Token::True => TokenType::True,
Token::False => TokenType::False,
Token::If => TokenType::If,
Token::Else => TokenType::Else,
Token::Return => TokenType::Return,
}
}