#[derive(PartialEq, Eq, Clone, Debug)] pub enum LTIRToken { Symbol(String), Char(char), Num(i64), // SingleQuote(String), // DoubleQuote(String), // TripleQuote(String), Lambda, MapsTo, AssignType(String), AssignValue, ExprOpen, ExprClose, BlockOpen, BlockClose, StatementSep, } #[derive(PartialEq, Eq, Clone, Debug)] pub enum LexError { InvalidDigit, InvalidChar, } #[derive(PartialEq, Eq, Clone, Debug)] pub enum LexerState { Any, TypeTerm(String), Sym(String), Num(i64), Char(Option), } impl LexerState { fn into_token(self) -> Option { match self { LexerState::Any => None, LexerState::TypeTerm(s) => Some(LTIRToken::AssignType(s)), LexerState::Sym(s) => Some(LTIRToken::Symbol(s)), LexerState::Num(n) => Some(LTIRToken::Num(n)), LexerState::Char(c) => Some(LTIRToken::Char(c?)), } } } pub struct LTIRLexer where It: std::iter::Iterator, { chars: std::iter::Peekable, position: usize } impl LTIRLexer where It: Iterator, { pub fn into_inner(self) -> std::iter::Peekable { self.chars } } impl From for LTIRLexer where It: Iterator, { fn from(chars: It) -> Self { LTIRLexer { chars: chars.peekable(), position: 0, } } } #[derive(Clone, Debug)] pub struct InputRegionTag { begin: usize, end: usize } impl InputRegionTag { pub fn max( a: InputRegionTag, b: InputRegionTag ) -> InputRegionTag { InputRegionTag { begin: usize::min( a.begin, b.begin ), end: usize::max( a.end, b.end ) } } } impl Iterator for LTIRLexer where It: Iterator, { type Item = (InputRegionTag, Result); fn next(&mut self) -> Option { let mut state = LexerState::Any; let mut region = InputRegionTag{ begin: self.position, end: self.position }; while let Some(c) = self.chars.peek() { match &mut state { // determine token type LexerState::Any => match c { 'λ' => { self.chars.next(); self.position += 1; region.end += 1; return Some((region, Ok(LTIRToken::Lambda))); } '↦' => { self.chars.next(); self.position += 1; region.end += 1; return Some((region, Ok(LTIRToken::MapsTo))); } '(' => { self.chars.next(); self.position += 1; region.end += 1; return Some((region, Ok(LTIRToken::ExprOpen))); } ')' => { self.chars.next(); self.position += 1; region.end += 1; return Some((region, Ok(LTIRToken::ExprClose))); } '{' => { self.chars.next(); self.position += 1; region.end += 1; return Some((region, Ok(LTIRToken::BlockOpen))); } '}' => { self.chars.next(); self.position += 1; region.end += 1; return Some((region, Ok(LTIRToken::BlockClose))); } ':' => { self.chars.next(); self.position += 1; region.end += 1; state = LexerState::TypeTerm(String::new()); } '=' => { self.chars.next(); self.position += 1; region.end += 1; return Some((region, Ok(LTIRToken::AssignValue))); } ';' => { self.chars.next(); self.position += 1; region.end += 1; return Some((region, Ok(LTIRToken::StatementSep))); } '\'' => { self.chars.next(); self.position += 1; region.end += 1; state = LexerState::Char(None); } c => { if c.is_whitespace() { self.chars.next(); self.position += 1; region.begin += 1; region.end += 1; } else if c.is_digit(10) { state = LexerState::Num(0); } else { state = LexerState::Sym(String::new()); } } }, LexerState::Char(val) => { self.position += 2; region.end += 2; *val = Some(match self.chars.next() { Some('\\') => { self.position += 1; region.end += 1; match self.chars.next() { Some('0') => '\0', Some('n') => '\n', Some('t') => '\t', Some(c) => c, None => { return Some((region, Err(LexError::InvalidChar))); } } }, Some(c) => c, None => { return Some((region, Err(LexError::InvalidChar))); } }); match self.chars.next() { Some('\'') => { if let Some(token) = state.clone().into_token() { return Some((region, Ok(token))); } } _ => { return Some((region, Err(LexError::InvalidChar))); } } } LexerState::TypeTerm(s) => { if *c == '=' || *c == '↦' || *c == ';' { if let Some(token) = state.clone().into_token() { return Some((region, Ok(token))); } } else { if let Some(c) = self.chars.next() { self.position += 1; region.end += 1; s.push(c); } } } _ => { if c.is_whitespace() || *c == '(' || *c == ')' || *c == '{' || *c == '}' || *c == ';' || *c == '=' || *c == ':' || *c == '↦' { // finish the current token if let Some(token) = state.clone().into_token() { return Some((region, Ok(token))); } } else { // append to the current token let c = self.chars.next().unwrap(); self.position += 1; region.end += 1; match &mut state { LexerState::Sym(s) => { s.push(c); } LexerState::Num(n) => { if let Some(d) = c.to_digit(10) { *n = (*n) * 10 + d as i64; } else { return Some((region, Err(LexError::InvalidDigit))); } } _ => {} } } } } } if let Some(token) = state.into_token() { Some((region, Ok(token))) } else { None } } } mod tests { #[test] fn test_lexer() { let mut lexer = crate::lexer::LTIRLexer::from( "let var1:ℕ=123; let square =λx.* x x; let sqrt = λx:ℝ~machine::Float64~machine::Word.(f64-sqrt x); let magnitude = λx:ℝ .λy:ℝ .sqrt (+ (* x x) (* y y)); " .chars(), ); for (range, token) in lexer { eprintln!("[{:?}] {:?}", range, token); } } }