#[derive(PartialEq, Eq, Clone, Debug)] pub enum LTIRToken { Symbol(String), Char(char), Num(i64), // SingleQuote(String), // DoubleQuote(String), // TripleQuote(String), Lambda, LambdaBody, AssignType(String), AssignValue, ExprOpen, ExprClose, BlockOpen, BlockClose, StatementSep, } #[derive(PartialEq, Eq, Clone, Debug)] pub enum LexError { InvalidDigit, InvalidChar, } #[derive(PartialEq, Eq, Clone, Debug)] pub enum LexerState { Any, TypeTerm(String), Sym(String), Num(i64), Char(Option), } impl LexerState { fn into_token(self) -> Option { match self { LexerState::Any => None, LexerState::TypeTerm(s) => Some(LTIRToken::AssignType(s)), LexerState::Sym(s) => Some(LTIRToken::Symbol(s)), LexerState::Num(n) => Some(LTIRToken::Num(n)), LexerState::Char(c) => Some(LTIRToken::Char(c?)), } } } pub struct LTIRLexer where It: std::iter::Iterator, { chars: std::iter::Peekable, } impl LTIRLexer where It: Iterator, { pub fn into_inner(self) -> std::iter::Peekable { self.chars } } impl From for LTIRLexer where It: Iterator, { fn from(chars: It) -> Self { LTIRLexer { chars: chars.peekable(), } } } impl Iterator for LTIRLexer where It: Iterator, { type Item = Result; fn next(&mut self) -> Option { let mut state = LexerState::Any; while let Some(c) = self.chars.peek() { match &mut state { // determine token type LexerState::Any => match c { 'λ' => { self.chars.next(); return Some(Ok(LTIRToken::Lambda)); } '.' => { self.chars.next(); return Some(Ok(LTIRToken::LambdaBody)); } '(' => { self.chars.next(); return Some(Ok(LTIRToken::ExprOpen)); } ')' => { self.chars.next(); return Some(Ok(LTIRToken::ExprClose)); } '{' => { self.chars.next(); return Some(Ok(LTIRToken::BlockOpen)); } '}' => { self.chars.next(); return Some(Ok(LTIRToken::BlockClose)); } ':' => { self.chars.next(); state = LexerState::TypeTerm(String::new()); } '=' => { self.chars.next(); return Some(Ok(LTIRToken::AssignValue)); } ';' => { self.chars.next(); return Some(Ok(LTIRToken::StatementSep)); } '\'' => { self.chars.next(); state = LexerState::Char(None); } c => { if c.is_whitespace() { self.chars.next(); } else if c.is_digit(10) { state = LexerState::Num(0); } else { state = LexerState::Sym(String::new()); } } }, LexerState::Char(val) => { *val = Some(match self.chars.next() { Some('\\') => match self.chars.next() { Some('0') => '\0', Some('n') => '\n', Some('t') => '\t', Some(c) => c, None => { return Some(Err(LexError::InvalidChar)); } }, Some(c) => c, None => { return Some(Err(LexError::InvalidChar)); } }); match self.chars.next() { Some('\'') => { if let Some(token) = state.clone().into_token() { return Some(Ok(token)); } } _ => { return Some(Err(LexError::InvalidChar)); } } } LexerState::TypeTerm(s) => { if *c == '=' || *c == '.' { if let Some(token) = state.clone().into_token() { return Some(Ok(token)); } } else { if let Some(c) = self.chars.next() { s.push(c); } } } _ => { if c.is_whitespace() || *c == '(' || *c == ')' || *c == '{' || *c == '}' || *c == ';' || *c == '=' || *c == ':' || *c == '.' { // finish the current token if let Some(token) = state.clone().into_token() { return Some(Ok(token)); } } else { // append to the current token let c = self.chars.next().unwrap(); match &mut state { LexerState::Sym(s) => { s.push(c); } LexerState::Num(n) => { if let Some(d) = c.to_digit(10) { *n = (*n) * 10 + d as i64; } else { return Some(Err(LexError::InvalidDigit)); } } _ => {} } } } } } if let Some(token) = state.into_token() { Some(Ok(token)) } else { None } } } mod tests { #[test] fn test_lexer() { let mut lexer = crate::lexer::LTIRLexer::from( "let var1:ℕ=123; let square =λx.* x x; let sqrt = λx:ℝ~machine::Float64~machine::Word.(f64-sqrt x); let magnitude = λx:ℝ .λy:ℝ .sqrt (+ (* x x) (* y y)); " .chars(), ); for token in lexer { eprintln!("token = {:?}", token); } } }