Add `let` statement parser

This commit is contained in:
Victor Timofei 2023-09-05 00:10:14 +03:00
commit a67d4cb273
Signed by: vtimofei
GPG Key ID: B790DCEBE281403A
8 changed files with 603 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

8
Cargo.toml Normal file
View File

@ -0,0 +1,8 @@
[package]
name = "monkeyrs"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

69
src/ast.rs Normal file
View File

@ -0,0 +1,69 @@
use std::{fmt::Debug, rc::Rc};
use crate::token::Token;
pub trait Node: Debug {}
pub trait Statement: Node {
fn statement_node(&self);
}
pub trait Expression: Node {
fn expression_node(&self);
}
pub struct Program {
pub statements: Vec<Rc<dyn Statement>>,
}
impl Node for Program {}
impl Debug for Program {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Vec<")?;
for statement in &self.statements {
write!(f, "Box<")?;
statement.fmt(f)?;
write!(f, ">, ")?;
}
write!(f, ">")?;
Ok(())
}
}
#[derive(Debug)]
pub struct LetStatement {
pub token: Token,
pub name: Identifier,
pub value: Box<dyn Expression>,
}
impl Node for LetStatement {}
impl Statement for LetStatement {
fn statement_node(&self) {}
}
#[derive(Debug)]
pub struct Identifier {
pub token: Token,
}
impl Node for Identifier {}
impl Expression for Identifier {
fn expression_node(&self) {}
}
#[derive(Debug)]
pub struct DummyExpression {}
impl Node for DummyExpression {}
impl Expression for DummyExpression {
fn expression_node(&self) {
panic!("this is dummy");
}
}

251
src/lexer.rs Normal file
View File

@ -0,0 +1,251 @@
use crate::token::Token;
pub struct Lexer {
source: String,
position: usize,
read_pos: usize,
ch: char,
}
impl Lexer {
pub fn new(source: String) -> Self {
let mut lexer = Self {
source,
position: 0,
read_pos: 0,
ch: char::from_u32(0).unwrap(),
};
lexer.read_char();
lexer
}
fn read_char(&mut self) {
self.ch = if self.read_pos >= self.source.chars().count() {
char::from_u32(0).unwrap()
} else {
self.source.chars().nth(self.read_pos).unwrap()
};
self.position = self.read_pos;
self.read_pos += 1;
}
fn read_identifier(&mut self) -> Token {
let pos = self.position;
while is_letter(self.ch) {
self.read_char();
}
let literal = &self.source[pos..self.position];
return Token::lookup_ident(literal);
}
fn skip_whitespace(&mut self) {
while self.ch == ' ' || self.ch == '\t' || self.ch == '\n' || self.ch == '\t' {
self.read_char();
}
}
fn read_number(&mut self) -> Token {
let pos = self.position;
while is_digit(self.ch) {
self.read_char();
}
Token::Int(self.source[pos..self.position].parse().unwrap())
}
fn peek_char(&self) -> char {
if self.read_pos >= self.source.chars().count() {
char::from_u32(0).unwrap()
} else {
self.source.chars().nth(self.read_pos).unwrap()
}
}
}
impl Iterator for Lexer {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
use Token::*;
self.skip_whitespace();
let token = match self.ch {
'=' => {
if self.peek_char() == '=' {
self.read_char();
Eq
} else {
Assign
}
}
';' => Semicolon,
'(' => Lparen,
')' => Rparen,
',' => Comma,
'+' => Plus,
'-' => Minus,
'!' => {
if self.peek_char() == '=' {
self.read_char();
NotEq
} else {
Bang
}
}
'/' => Slash,
'*' => Asterisk,
'<' => Lt,
'>' => Gt,
'{' => Lbrace,
'}' => Rbrace,
c if c as u32 == 0 => EOF,
_ => {
let tok = if is_letter(self.ch) {
self.read_identifier()
} else if is_digit(self.ch) {
self.read_number()
} else {
Illegal
};
return Some(tok);
}
};
self.read_char();
Some(token)
}
}
fn is_letter(ch: char) -> bool {
'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
}
fn is_digit(ch: char) -> bool {
'0' <= ch && ch <= '9'
}
#[cfg(test)]
mod tests {
use crate::lexer::Lexer;
use crate::token::Token;
#[test]
fn test_next_token() {
let input = "let five = 5;\
let ten = 10;\
\
let add = fn(x, y) {\
x + y;\
};\
\
let result = add(five, ten);\
!-/*5;
5 < 10 > 5;
if (5 < 10) {
return true;
} else {
return false;
}
10 == 10;
10 != 9;
"
.to_string();
use Token::*;
let tests = vec![
Let,
Ident("five".to_string()),
Assign,
Int(5),
Semicolon,
Let,
Ident("ten".to_string()),
Assign,
Int(10),
Semicolon,
Let,
Ident("add".to_string()),
Assign,
Function,
Lparen,
Ident("x".to_string()),
Comma,
Ident("y".to_string()),
Rparen,
Lbrace,
Ident("x".to_string()),
Plus,
Ident("y".to_string()),
Semicolon,
Rbrace,
Semicolon,
Let,
Ident("result".to_string()),
Assign,
Ident("add".to_string()),
Lparen,
Ident("five".to_string()),
Comma,
Ident("ten".to_string()),
Rparen,
Semicolon,
Bang,
Minus,
Slash,
Asterisk,
Int(5),
Semicolon,
Int(5),
Lt,
Int(10),
Gt,
Int(5),
Semicolon,
If,
Lparen,
Int(5),
Lt,
Int(10),
Rparen,
Lbrace,
Return,
True,
Semicolon,
Rbrace,
Else,
Lbrace,
Return,
False,
Semicolon,
Rbrace,
Int(10),
Eq,
Int(10),
Semicolon,
Int(10),
NotEq,
Int(9),
Semicolon,
EOF,
];
let mut lexer_it = Lexer::new(input);
for (i, tt) in tests.iter().enumerate() {
let token = lexer_it.next();
println!("{i}");
assert_eq!(*tt, token.unwrap());
}
}
}

11
src/main.rs Normal file
View File

@ -0,0 +1,11 @@
mod ast;
mod lexer;
mod repl;
mod token;
mod parser;
fn main() {
let stdout = std::io::stdout();
let stdin = std::io::stdin();
repl::start(stdout, stdin);
}

172
src/parser.rs Normal file
View File

@ -0,0 +1,172 @@
use std::rc::Rc;
use crate::{lexer::Lexer, token::Token, ast::{Program, Statement, LetStatement, Identifier, DummyExpression}};
pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug)]
enum Error {
UnexpectedToken{
expected: Token,
actual: Option<Token>,
},
}
struct Parser {
lexer: Lexer,
cur_token: Option<Token>,
peek_token: Option<Token>,
}
impl Parser {
pub fn new(mut lexer: Lexer) -> Self {
let cur_token = lexer.next();
let peek_token = lexer.next();
Self { lexer, cur_token, peek_token }
}
pub fn parse(&mut self) -> Result<Program> {
let mut program = Program {
statements: Vec::new(),
};
let mut done = Some(());
while done.is_some() {
let stmt = self.parse_statement();
program.statements.push(stmt?);
done = self.next();
}
Ok(program)
}
fn parse_statement(&mut self) -> Result<Rc<dyn Statement>> {
match &self.cur_token {
Some(token) => {
use Token::*;
match token {
Let => self.parse_let_statement(),
t => unimplemented!("{t:?} statement token not impl"),
}
}
None => unreachable!()
}
}
fn parse_let_statement(&mut self) -> Result<Rc<dyn Statement>> {
let token = self.cur_token.clone().unwrap();
self.expect_peek(&Token::Ident("".to_string()))?;
let name = Identifier {
token: self.cur_token.clone().unwrap(),
};
self.expect_peek(&Token::Assign)?;
while !self.cur_token_is(&Token::Semicolon) {
self.next();
}
Ok(Rc::new(LetStatement {
token,
name,
value: Box::new(DummyExpression {}),
}))
}
fn expect_peek(&mut self, token: &Token) -> Result<()> {
if self.peek_token.is_none() {
return Err(Error::UnexpectedToken { expected: token.clone(), actual: None });
}
let peek_token = self.peek_token.clone().unwrap();
if token.is_same_type(&peek_token) {
self.next();
Ok(())
} else {
Err(Error::UnexpectedToken { expected: token.clone(), actual: None })
}
}
fn peek_token_is(&self, token: &Token) -> bool {
if self.peek_token.is_none() {
return false;
}
self.peek_token.clone().unwrap().is_same_type(token)
}
fn cur_token_is(&self, token: &Token) -> bool {
if self.cur_token.is_none() {
return false;
}
self.cur_token.clone().unwrap().is_same_type(token)
}
}
impl Iterator for Parser {
type Item = ();
fn next(&mut self) -> Option<Self::Item> {
let peek_token = self.lexer.next();
self.cur_token = self.peek_token.clone();
self.peek_token = peek_token;
match self.cur_token {
None | Some(Token::EOF) => None,
Some(_) => Some(()),
}
}
}
#[cfg(test)]
mod tests {
use std::rc::Rc;
use crate::{lexer::Lexer, ast::Statement};
use super::Parser;
#[test]
fn let_statements() {
let source = "let x = 5;\
let y = 10;\
let foobar = 838383;\
".to_string();
let lexer = Lexer::new(source);
let mut parser = Parser::new(lexer);
let program = parser.parse().unwrap();
assert_eq!(program.statements.len(), 3);
let expected_identifiers = vec![
"x",
"y",
"foobar",
];
let mut statements_iter = program.statements.iter();
for tt in expected_identifiers {
let statement = statements_iter.next().unwrap();
test_let_statement(statement.clone(), tt);
}
}
fn test_let_statement(stmt: Rc<dyn Statement>, name: &str) {
assert_eq!(
format!("{stmt:?}"),
format!("LetStatement {{ token: Let, name: Identifier {{ token: Ident(\"{name}\") }}, value: DummyExpression }}"),
);
}
}

30
src/repl.rs Normal file
View File

@ -0,0 +1,30 @@
use std::io::{BufRead, BufReader, Read, Write};
use crate::{lexer::Lexer, token::Token};
const PROMPT: &str = ">> ";
pub fn start(mut w: impl Write, r: impl Read) {
let mut reader = BufReader::new(r);
loop {
write!(w, "{PROMPT}").unwrap();
w.flush().unwrap();
let mut line = String::new();
reader.read_line(&mut line).unwrap();
if line.len() == 0 {
writeln!(w, "").unwrap();
return;
}
let lex = Lexer::new(line);
for token in lex {
if token == Token::EOF {
break;
}
writeln!(w, "{token:?}").unwrap();
}
}
}

61
src/token.rs Normal file
View File

@ -0,0 +1,61 @@
#[derive(Debug, PartialEq, Clone)]
pub enum Token {
Illegal,
EOF,
Ident(String),
Int(i64),
Assign,
Plus,
Minus,
Bang,
Asterisk,
Slash,
Lt,
Gt,
Eq,
NotEq,
Comma,
Semicolon,
Lparen,
Rparen,
Lbrace,
Rbrace,
Function,
Let,
True,
False,
If,
Else,
Return,
}
impl Token {
pub fn is_same_type(&self, other: &Token) -> bool {
use Token::*;
match self {
Ident(_) => if let Ident(_) = other { true } else { false },
Int(_) => if let Int(_) = other { true } else { false },
tok => tok == other,
}
}
pub fn lookup_ident(ident: &str) -> Token {
use Token::*;
match ident {
"fn" => Function,
"let" => Let,
"true" => True,
"false" => False,
"if" => If,
"else" => Else,
"return" => Return,
ident => Ident(ident.to_string()),
}
}
}