2023-09-04 21:10:14 +00:00
|
|
|
use crate::token::Token;
|
|
|
|
|
|
|
|
pub struct Lexer {
|
|
|
|
source: String,
|
|
|
|
position: usize,
|
|
|
|
read_pos: usize,
|
|
|
|
ch: char,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Lexer {
|
|
|
|
pub fn new(source: String) -> Self {
|
|
|
|
let mut lexer = Self {
|
|
|
|
source,
|
|
|
|
position: 0,
|
|
|
|
read_pos: 0,
|
|
|
|
ch: char::from_u32(0).unwrap(),
|
|
|
|
};
|
|
|
|
|
|
|
|
lexer.read_char();
|
|
|
|
|
|
|
|
lexer
|
|
|
|
}
|
|
|
|
|
|
|
|
fn read_char(&mut self) {
|
|
|
|
self.ch = if self.read_pos >= self.source.chars().count() {
|
|
|
|
char::from_u32(0).unwrap()
|
|
|
|
} else {
|
|
|
|
self.source.chars().nth(self.read_pos).unwrap()
|
|
|
|
};
|
|
|
|
|
|
|
|
self.position = self.read_pos;
|
|
|
|
self.read_pos += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn read_identifier(&mut self) -> Token {
|
|
|
|
let pos = self.position;
|
|
|
|
|
|
|
|
while is_letter(self.ch) {
|
|
|
|
self.read_char();
|
|
|
|
}
|
|
|
|
|
|
|
|
let literal = &self.source[pos..self.position];
|
|
|
|
|
|
|
|
return Token::lookup_ident(literal);
|
|
|
|
}
|
|
|
|
|
|
|
|
fn skip_whitespace(&mut self) {
|
|
|
|
while self.ch == ' ' || self.ch == '\t' || self.ch == '\n' || self.ch == '\t' {
|
|
|
|
self.read_char();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn read_number(&mut self) -> Token {
|
|
|
|
let pos = self.position;
|
|
|
|
|
|
|
|
while is_digit(self.ch) {
|
|
|
|
self.read_char();
|
|
|
|
}
|
|
|
|
|
|
|
|
Token::Int(self.source[pos..self.position].parse().unwrap())
|
|
|
|
}
|
|
|
|
|
|
|
|
fn peek_char(&self) -> char {
|
|
|
|
if self.read_pos >= self.source.chars().count() {
|
|
|
|
char::from_u32(0).unwrap()
|
|
|
|
} else {
|
|
|
|
self.source.chars().nth(self.read_pos).unwrap()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Iterator for Lexer {
|
|
|
|
type Item = Token;
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
use Token::*;
|
|
|
|
|
|
|
|
self.skip_whitespace();
|
|
|
|
|
|
|
|
let token = match self.ch {
|
|
|
|
'=' => {
|
|
|
|
if self.peek_char() == '=' {
|
|
|
|
self.read_char();
|
|
|
|
Eq
|
|
|
|
} else {
|
|
|
|
Assign
|
|
|
|
}
|
|
|
|
}
|
|
|
|
';' => Semicolon,
|
|
|
|
'(' => Lparen,
|
|
|
|
')' => Rparen,
|
|
|
|
',' => Comma,
|
|
|
|
'+' => Plus,
|
|
|
|
'-' => Minus,
|
|
|
|
'!' => {
|
|
|
|
if self.peek_char() == '=' {
|
|
|
|
self.read_char();
|
|
|
|
NotEq
|
|
|
|
} else {
|
|
|
|
Bang
|
|
|
|
}
|
|
|
|
}
|
|
|
|
'/' => Slash,
|
|
|
|
'*' => Asterisk,
|
|
|
|
'<' => Lt,
|
|
|
|
'>' => Gt,
|
|
|
|
'{' => Lbrace,
|
|
|
|
'}' => Rbrace,
|
|
|
|
c if c as u32 == 0 => EOF,
|
|
|
|
_ => {
|
|
|
|
let tok = if is_letter(self.ch) {
|
|
|
|
self.read_identifier()
|
|
|
|
} else if is_digit(self.ch) {
|
|
|
|
self.read_number()
|
|
|
|
} else {
|
|
|
|
Illegal
|
|
|
|
};
|
|
|
|
return Some(tok);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
self.read_char();
|
|
|
|
|
|
|
|
Some(token)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn is_letter(ch: char) -> bool {
|
|
|
|
'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
|
|
|
|
}
|
|
|
|
|
|
|
|
fn is_digit(ch: char) -> bool {
|
|
|
|
'0' <= ch && ch <= '9'
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use crate::lexer::Lexer;
|
|
|
|
use crate::token::Token;
|
|
|
|
|
|
|
|
#[test]
|
2023-09-09 19:30:13 +00:00
|
|
|
fn next_token() {
|
2023-09-04 21:10:14 +00:00
|
|
|
let input = "let five = 5;\
|
|
|
|
let ten = 10;\
|
|
|
|
\
|
|
|
|
let add = fn(x, y) {\
|
|
|
|
x + y;\
|
|
|
|
};\
|
|
|
|
\
|
|
|
|
let result = add(five, ten);\
|
|
|
|
!-/*5;
|
|
|
|
5 < 10 > 5;
|
|
|
|
if (5 < 10) {
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
10 == 10;
|
|
|
|
10 != 9;
|
|
|
|
"
|
|
|
|
.to_string();
|
|
|
|
|
|
|
|
use Token::*;
|
|
|
|
|
|
|
|
let tests = vec![
|
|
|
|
Let,
|
|
|
|
Ident("five".to_string()),
|
|
|
|
Assign,
|
|
|
|
Int(5),
|
|
|
|
Semicolon,
|
|
|
|
Let,
|
|
|
|
Ident("ten".to_string()),
|
|
|
|
Assign,
|
|
|
|
Int(10),
|
|
|
|
Semicolon,
|
|
|
|
Let,
|
|
|
|
Ident("add".to_string()),
|
|
|
|
Assign,
|
|
|
|
Function,
|
|
|
|
Lparen,
|
|
|
|
Ident("x".to_string()),
|
|
|
|
Comma,
|
|
|
|
Ident("y".to_string()),
|
|
|
|
Rparen,
|
|
|
|
Lbrace,
|
|
|
|
Ident("x".to_string()),
|
|
|
|
Plus,
|
|
|
|
Ident("y".to_string()),
|
|
|
|
Semicolon,
|
|
|
|
Rbrace,
|
|
|
|
Semicolon,
|
|
|
|
Let,
|
|
|
|
Ident("result".to_string()),
|
|
|
|
Assign,
|
|
|
|
Ident("add".to_string()),
|
|
|
|
Lparen,
|
|
|
|
Ident("five".to_string()),
|
|
|
|
Comma,
|
|
|
|
Ident("ten".to_string()),
|
|
|
|
Rparen,
|
|
|
|
Semicolon,
|
|
|
|
Bang,
|
|
|
|
Minus,
|
|
|
|
Slash,
|
|
|
|
Asterisk,
|
|
|
|
Int(5),
|
|
|
|
Semicolon,
|
|
|
|
Int(5),
|
|
|
|
Lt,
|
|
|
|
Int(10),
|
|
|
|
Gt,
|
|
|
|
Int(5),
|
|
|
|
Semicolon,
|
|
|
|
If,
|
|
|
|
Lparen,
|
|
|
|
Int(5),
|
|
|
|
Lt,
|
|
|
|
Int(10),
|
|
|
|
Rparen,
|
|
|
|
Lbrace,
|
|
|
|
Return,
|
|
|
|
True,
|
|
|
|
Semicolon,
|
|
|
|
Rbrace,
|
|
|
|
Else,
|
|
|
|
Lbrace,
|
|
|
|
Return,
|
|
|
|
False,
|
|
|
|
Semicolon,
|
|
|
|
Rbrace,
|
|
|
|
Int(10),
|
|
|
|
Eq,
|
|
|
|
Int(10),
|
|
|
|
Semicolon,
|
|
|
|
Int(10),
|
|
|
|
NotEq,
|
|
|
|
Int(9),
|
|
|
|
Semicolon,
|
|
|
|
EOF,
|
|
|
|
];
|
|
|
|
|
|
|
|
let mut lexer_it = Lexer::new(input);
|
|
|
|
|
|
|
|
for (i, tt) in tests.iter().enumerate() {
|
|
|
|
let token = lexer_it.next();
|
|
|
|
|
|
|
|
println!("{i}");
|
|
|
|
assert_eq!(*tt, token.unwrap());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|