#[derive(PartialEq, Eq, Clone, Debug)] pub enum LTIRToken { Symbol( String ), Char( char ), Num( i64 ), // SingleQuote(String), // DoubleQuote(String), // TripleQuote(String), Lambda, AssignType, AssignValue, ExprOpen, ExprClose, BlockOpen, BlockClose, StatementSep, } #[derive(PartialEq, Eq, Clone, Debug)] pub enum LexError { InvalidDigit, InvalidChar } #[derive(PartialEq, Eq, Clone, Debug)] pub enum LexerState { Any, Sym( String ), Num( i64 ), Char( Option ) } impl LexerState { fn into_token(self) -> Option< LTIRToken > { match self { LexerState::Any => None, LexerState::Sym(s) => Some(LTIRToken::Symbol(s)), LexerState::Num(n) => Some(LTIRToken::Num(n)), LexerState::Char(c) => Some(LTIRToken::Char(c?)) } } } pub struct LTIRLexer where It: std::iter::Iterator { chars: std::iter::Peekable, } impl LTIRLexer where It: Iterator { pub fn into_inner(self) -> std::iter::Peekable { self.chars } } impl From for LTIRLexer where It: Iterator { fn from(chars: It) -> Self { LTIRLexer { chars: chars.peekable() } } } impl Iterator for LTIRLexer where It: Iterator { type Item = Result; fn next(&mut self) -> Option { let mut state = LexerState::Any; while let Some(c) = self.chars.peek() { match &mut state { // determine token type LexerState::Any => { match c { 'λ' => { self.chars.next(); return Some(Ok(LTIRToken::Lambda)); }, '(' => { self.chars.next(); return Some(Ok(LTIRToken::ExprOpen)); }, ')' => { self.chars.next(); return Some(Ok(LTIRToken::ExprClose)); }, '{' => { self.chars.next(); return Some(Ok(LTIRToken::BlockOpen)); }, '}' => { self.chars.next(); return Some(Ok(LTIRToken::BlockClose)); }, ':' => { self.chars.next(); return Some(Ok(LTIRToken::AssignType)); }, '=' => { self.chars.next(); return Some(Ok(LTIRToken::AssignValue)); }, ';' => { self.chars.next(); return Some(Ok(LTIRToken::StatementSep)); }, '\'' => { self.chars.next(); state = LexerState::Char(None); }, c => { if c.is_whitespace() { self.chars.next(); } else if c.is_digit(10) { state = LexerState::Num( 0 ); } else { state = LexerState::Sym( String::new() ); } } } } LexerState::Char(val) => { *val = Some( match self.chars.next() { Some('\\') => { match self.chars.next() { Some('0') => '\0', Some('n') => '\n', Some('t') => '\t', Some(c) => c, None => { return Some(Err(LexError::InvalidChar)); } } } Some(c) => c, None => { return Some(Err(LexError::InvalidChar)); } }); match self.chars.next() { Some('\'') => { if let Some(token) = state.clone().into_token() { return Some(Ok(token)); } } _ => { return Some(Err(LexError::InvalidChar)); } } } _ => { if c.is_whitespace() || *c == '(' || *c == ')' || *c == '{' || *c == '}' || *c == ';' || *c == '=' || *c == ':' { // finish the current token if let Some(token) = state.clone().into_token() { return Some(Ok(token)); } } else { // append to the current token let c = self.chars.next().unwrap(); match &mut state { LexerState::Sym(s) => { s.push(c); } LexerState::Num(n) => { if let Some(d) = c.to_digit(10) { *n = (*n) * 10 + d as i64; } else { return Some(Err(LexError::InvalidDigit)); } } _ => {} } } } } } if let Some(token) = state.into_token() { Some(Ok(token)) } else { None } } }