lib-laddertypes/src/lexer.rs

166 lines
5.1 KiB
Rust
Raw Normal View History

2023-10-01 13:11:29 +02:00
2023-10-02 01:27:50 +02:00
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
2023-10-01 13:11:29 +02:00
#[derive(PartialEq, Eq, Clone, Debug)]
pub enum LadderTypeToken {
Symbol( String ),
Char( char ),
2023-10-01 18:43:04 +02:00
Num( i64 ),
2023-10-01 13:11:29 +02:00
Open,
Close,
Ladder,
}
#[derive(PartialEq, Eq, Clone, Debug)]
pub enum LexError {
/// found a non-digit character inside a numeric token
InvalidDigit,
/// quoted character token didnt close correctly with '
InvalidChar,
}
2023-10-02 01:27:50 +02:00
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
2023-10-01 13:11:29 +02:00
#[derive(PartialEq, Eq, Clone, Debug)]
enum LexerState {
Any,
Sym( String ),
2023-10-01 18:43:04 +02:00
Num( i64 ),
2023-10-01 13:11:29 +02:00
Char( Option<char> )
}
impl LexerState {
fn into_token(self) -> Option<LadderTypeToken> {
match self {
LexerState::Any => None,
LexerState::Sym(s) => Some(LadderTypeToken::Symbol(s)),
LexerState::Num(n) => Some(LadderTypeToken::Num(n)),
LexerState::Char(c) => Some(LadderTypeToken::Char(c?))
}
}
}
2023-10-02 01:27:50 +02:00
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
2023-10-01 13:11:29 +02:00
pub struct LadderTypeLexer<It>
where It: std::iter::Iterator<Item = char>
{
chars: std::iter::Peekable<It>,
}
2023-10-01 17:52:48 +02:00
impl<It> From<It> for LadderTypeLexer<It>
2023-10-01 13:11:29 +02:00
where It: Iterator<Item = char>
{
2023-10-01 17:52:48 +02:00
fn from(chars: It) -> Self {
2023-10-01 13:11:29 +02:00
LadderTypeLexer {
chars: chars.peekable()
}
}
}
impl<It> Iterator for LadderTypeLexer<It>
where It: Iterator<Item = char>
{
type Item = Result<LadderTypeToken, LexError>;
fn next(&mut self) -> Option<Self::Item> {
let mut state = LexerState::Any;
while let Some(c) = self.chars.peek() {
match &mut state {
// determine token type
LexerState::Any => {
match c {
'<' => { self.chars.next(); return Some(Ok(LadderTypeToken::Open)); },
'>' => { self.chars.next(); return Some(Ok(LadderTypeToken::Close)); },
'~' => { self.chars.next(); return Some(Ok(LadderTypeToken::Ladder)); },
'\'' => { self.chars.next(); state = LexerState::Char(None); },
c => {
2023-10-02 01:30:10 +02:00
if c.is_whitespace() {
self.chars.next();
} else if c.is_alphabetic() {
2023-10-01 13:11:29 +02:00
state = LexerState::Sym( String::new() );
} else if c.is_digit(10) {
state = LexerState::Num( 0 );
}
}
}
}
LexerState::Char(val) => {
*val = Some(
match self.chars.next() {
Some('\\') => {
match self.chars.next() {
Some('0') => '\0',
Some('n') => '\n',
Some('t') => '\t',
Some(c) => c,
None => {
return Some(Err(LexError::InvalidChar));
}
}
}
Some(c) => c,
None => {
return Some(Err(LexError::InvalidChar));
}
});
2023-10-01 13:11:29 +02:00
match self.chars.next() {
Some('\'') => {
if let Some(token) = state.clone().into_token() {
return Some(Ok(token));
}
}
_ => {
return Some(Err(LexError::InvalidChar));
}
}
}
_ => {
if c.is_whitespace() || *c == '>' || *c == '~' {
// finish the current token
if let Some(token) = state.clone().into_token() {
return Some(Ok(token));
}
} else {
// append to the current token
let c = self.chars.next().unwrap();
match &mut state {
LexerState::Sym(s) => {
s.push(c);
}
LexerState::Num(n) => {
if let Some(d) = c.to_digit(10) {
2023-10-01 18:43:04 +02:00
*n = (*n) * 10 + d as i64;
2023-10-01 13:11:29 +02:00
} else {
return Some(Err(LexError::InvalidDigit));
}
}
_ => {}
}
}
}
}
}
if let Some(token) = state.into_token() {
Some(Ok(token))
} else {
None
}
}
}
2023-10-02 01:27:50 +02:00
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\