commit b6117ec12d641a61c745fa70768e8ef5f0921806 Author: Michael Sippel Date: Sun Oct 1 13:11:29 2023 +0200 initial lexer diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..0a57fd3 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,6 @@ +[package] +authors = ["Michael Sippel "] +edition = "2018" +name = "laddertypes" +version = "0.1.0" + diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..93039e1 --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,147 @@ + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>> + +#[derive(PartialEq, Eq, Clone, Debug)] +pub enum LadderTypeToken { + Symbol( String ), + Char( char ), + Num( u64 ), + Open, + Close, + Ladder, +} + +#[derive(PartialEq, Eq, Clone, Debug)] +pub enum LexError { + /// found a non-digit character inside a numeric token + InvalidDigit, + + /// quoted character token didnt close correctly with ' + InvalidChar, +} + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>> + +#[derive(PartialEq, Eq, Clone, Debug)] +enum LexerState { + Any, + Sym( String ), + Num( u64 ), + Char( Option ) +} + +impl LexerState { + fn into_token(self) -> Option { + match self { + LexerState::Any => None, + LexerState::Sym(s) => Some(LadderTypeToken::Symbol(s)), + LexerState::Num(n) => Some(LadderTypeToken::Num(n)), + LexerState::Char(c) => Some(LadderTypeToken::Char(c?)) + } + } +} + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>> + +pub struct LadderTypeLexer +where It: std::iter::Iterator +{ + chars: std::iter::Peekable, +} + +impl LadderTypeLexer +where It: Iterator +{ + pub fn new(chars: It) -> Self { + LadderTypeLexer { + chars: chars.peekable() + } + } +} + +impl Iterator for LadderTypeLexer +where It: Iterator +{ + type Item = Result; + + fn next(&mut self) -> Option { + let mut state = LexerState::Any; + + while let Some(c) = self.chars.peek() { + match &mut state { + + // determine token type + LexerState::Any => { + match c { + '<' => { self.chars.next(); return Some(Ok(LadderTypeToken::Open)); }, + '>' => { self.chars.next(); return Some(Ok(LadderTypeToken::Close)); }, + '~' => { self.chars.next(); return Some(Ok(LadderTypeToken::Ladder)); }, + '\'' => { self.chars.next(); state = LexerState::Char(None); }, + ' ' => { self.chars.next(); }, + c => { + if c.is_alphabetic() { + state = LexerState::Sym( String::new() ); + } else if c.is_digit(10) { + state = LexerState::Num( 0 ); + } + } + } + } + + LexerState::Char(val) => { + // todo escape characters + *val = self.chars.next(); + + match self.chars.next() { + Some('\'') => { + if let Some(token) = state.clone().into_token() { + return Some(Ok(token)); + } + } + _ => { + return Some(Err(LexError::InvalidChar)); + } + } + } + + _ => { + + if c.is_whitespace() || *c == '>' || *c == '~' { + // finish the current token + + if let Some(token) = state.clone().into_token() { + return Some(Ok(token)); + } + } else { + // append to the current token + + let c = self.chars.next().unwrap(); + + match &mut state { + LexerState::Sym(s) => { + s.push(c); + } + + LexerState::Num(n) => { + if let Some(d) = c.to_digit(10) { + *n = (*n) * 10 + d as u64; + } else { + return Some(Err(LexError::InvalidDigit)); + } + } + + _ => {} + } + } + } + } + } + + if let Some(token) = state.into_token() { + Some(Ok(token)) + } else { + None + } + } +} + diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..bae0eed --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,139 @@ + +pub mod lexer; +//pub mod bimap; +//pub mod dict; +//pub mod term; + +pub use { +// dict::*, +// term::*, +}; + +#[cfg(test)] +mod tests { + #[test] + fn test_lexer() { + use crate::lexer::*; + + { + let mut lex = LadderTypeLexer::new("symbol".chars()); + + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("symbol".into()))) ); + assert_eq!( lex.next(), None ); + } + { + let mut lex = LadderTypeLexer::new("1234".chars()); + + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Num(1234))) ); + assert_eq!( lex.next(), None ); + } + { + let mut lex = LadderTypeLexer::new("123xxx".chars()); + assert_eq!( lex.next(), Some(Err(LexError::InvalidDigit)) ); + } + { + let mut lex = LadderTypeLexer::new("'x'".chars()); + + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Char('x'))) ); + assert_eq!( lex.next(), None ); + } + { + let mut lex = LadderTypeLexer::new("'xx'".chars()); + assert_eq!( lex.next(), Some(Err(LexError::InvalidChar)) ); + } + { + let mut lex = LadderTypeLexer::new("abc~def".chars()); + + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("abc".into()))) ); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Ladder)) ); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("def".into()))) ); + assert_eq!( lex.next(), None ); + } + { + let mut lex = LadderTypeLexer::new("abc ~ def".chars()); + + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("abc".into()))) ); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Ladder)) ); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("def".into()))) ); + assert_eq!( lex.next(), None ); + } + + { + let mut lex = LadderTypeLexer::new("".chars()); + + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Open)) ); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Seq".into()))) ); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Char".into()))) ); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Close)) ); + assert_eq!( lex.next(), None ); + } + { + let mut lex = LadderTypeLexer::new(" ".chars()); + + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Open)) ); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Seq".into()))) ); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Char".into()))) ); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Close)) ); + assert_eq!( lex.next(), None ); + } + + { + let mut lex = LadderTypeLexer::new("~~ℕ~~~Unicode>".chars()); + + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Open))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Seq".into())))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Date".into())))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Ladder))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Open))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("TimeSince".into())))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("UnixEpoch".into())))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Close))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Ladder))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Open))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Duration".into())))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Seconds".into())))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Close))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Ladder))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("ℕ".into())))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Ladder))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Open))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("PosInt".into())))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Num(10)))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("BigEndian".into())))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Close))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Ladder))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Open))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Seq".into())))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Open))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Digit".into())))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Num(10)))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Close))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Ladder))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Unicode".into())))); + assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Close))); + assert_eq!( lex.next(), None ); + } + } + + #[test] + fn test_parse() { + // todo + } + + #[test] + fn test_normalize() { + // todo + } + + #[test] + fn test_curry() { + // todo + } + + #[test] + fn test_subtype() { + // todo + } +} + +