monkeyrs/src/lexer.rs

252 lines
5.6 KiB
Rust
Raw Normal View History

2023-09-04 21:10:14 +00:00
use crate::token::Token;
pub struct Lexer {
source: String,
position: usize,
read_pos: usize,
ch: char,
}
impl Lexer {
pub fn new(source: String) -> Self {
let mut lexer = Self {
source,
position: 0,
read_pos: 0,
ch: char::from_u32(0).unwrap(),
};
lexer.read_char();
lexer
}
fn read_char(&mut self) {
self.ch = if self.read_pos >= self.source.chars().count() {
char::from_u32(0).unwrap()
} else {
self.source.chars().nth(self.read_pos).unwrap()
};
self.position = self.read_pos;
self.read_pos += 1;
}
fn read_identifier(&mut self) -> Token {
let pos = self.position;
while is_letter(self.ch) {
self.read_char();
}
let literal = &self.source[pos..self.position];
return Token::lookup_ident(literal);
}
fn skip_whitespace(&mut self) {
while self.ch == ' ' || self.ch == '\t' || self.ch == '\n' || self.ch == '\t' {
self.read_char();
}
}
fn read_number(&mut self) -> Token {
let pos = self.position;
while is_digit(self.ch) {
self.read_char();
}
Token::Int(self.source[pos..self.position].parse().unwrap())
}
fn peek_char(&self) -> char {
if self.read_pos >= self.source.chars().count() {
char::from_u32(0).unwrap()
} else {
self.source.chars().nth(self.read_pos).unwrap()
}
}
}
impl Iterator for Lexer {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
use Token::*;
self.skip_whitespace();
let token = match self.ch {
'=' => {
if self.peek_char() == '=' {
self.read_char();
Eq
} else {
Assign
}
}
';' => Semicolon,
'(' => Lparen,
')' => Rparen,
',' => Comma,
'+' => Plus,
'-' => Minus,
'!' => {
if self.peek_char() == '=' {
self.read_char();
NotEq
} else {
Bang
}
}
'/' => Slash,
'*' => Asterisk,
'<' => Lt,
'>' => Gt,
'{' => Lbrace,
'}' => Rbrace,
c if c as u32 == 0 => EOF,
_ => {
let tok = if is_letter(self.ch) {
self.read_identifier()
} else if is_digit(self.ch) {
self.read_number()
} else {
Illegal
};
return Some(tok);
}
};
self.read_char();
Some(token)
}
}
fn is_letter(ch: char) -> bool {
'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
}
fn is_digit(ch: char) -> bool {
'0' <= ch && ch <= '9'
}
#[cfg(test)]
mod tests {
use crate::lexer::Lexer;
use crate::token::Token;
#[test]
2023-09-09 19:30:13 +00:00
fn next_token() {
2023-09-04 21:10:14 +00:00
let input = "let five = 5;\
let ten = 10;\
\
let add = fn(x, y) {\
x + y;\
};\
\
let result = add(five, ten);\
!-/*5;
5 < 10 > 5;
if (5 < 10) {
return true;
} else {
return false;
}
10 == 10;
10 != 9;
"
.to_string();
use Token::*;
let tests = vec![
Let,
Ident("five".to_string()),
Assign,
Int(5),
Semicolon,
Let,
Ident("ten".to_string()),
Assign,
Int(10),
Semicolon,
Let,
Ident("add".to_string()),
Assign,
Function,
Lparen,
Ident("x".to_string()),
Comma,
Ident("y".to_string()),
Rparen,
Lbrace,
Ident("x".to_string()),
Plus,
Ident("y".to_string()),
Semicolon,
Rbrace,
Semicolon,
Let,
Ident("result".to_string()),
Assign,
Ident("add".to_string()),
Lparen,
Ident("five".to_string()),
Comma,
Ident("ten".to_string()),
Rparen,
Semicolon,
Bang,
Minus,
Slash,
Asterisk,
Int(5),
Semicolon,
Int(5),
Lt,
Int(10),
Gt,
Int(5),
Semicolon,
If,
Lparen,
Int(5),
Lt,
Int(10),
Rparen,
Lbrace,
Return,
True,
Semicolon,
Rbrace,
Else,
Lbrace,
Return,
False,
Semicolon,
Rbrace,
Int(10),
Eq,
Int(10),
Semicolon,
Int(10),
NotEq,
Int(9),
Semicolon,
EOF,
];
let mut lexer_it = Lexer::new(input);
for (i, tt) in tests.iter().enumerate() {
let token = lexer_it.next();
println!("{i}");
assert_eq!(*tt, token.unwrap());
}
}
}