initial lexer
This commit is contained in:
commit
b6117ec12d
3 changed files with 292 additions and 0 deletions
6
Cargo.toml
Normal file
6
Cargo.toml
Normal file
|
@ -0,0 +1,6 @@
|
|||
[package]
|
||||
authors = ["Michael Sippel <micha@fragmental.art>"]
|
||||
edition = "2018"
|
||||
name = "laddertypes"
|
||||
version = "0.1.0"
|
||||
|
147
src/lexer.rs
Normal file
147
src/lexer.rs
Normal file
|
@ -0,0 +1,147 @@
|
|||
|
||||
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>
|
||||
|
||||
#[derive(PartialEq, Eq, Clone, Debug)]
|
||||
pub enum LadderTypeToken {
|
||||
Symbol( String ),
|
||||
Char( char ),
|
||||
Num( u64 ),
|
||||
Open,
|
||||
Close,
|
||||
Ladder,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Clone, Debug)]
|
||||
pub enum LexError {
|
||||
/// found a non-digit character inside a numeric token
|
||||
InvalidDigit,
|
||||
|
||||
/// quoted character token didnt close correctly with '
|
||||
InvalidChar,
|
||||
}
|
||||
|
||||
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>
|
||||
|
||||
#[derive(PartialEq, Eq, Clone, Debug)]
|
||||
enum LexerState {
|
||||
Any,
|
||||
Sym( String ),
|
||||
Num( u64 ),
|
||||
Char( Option<char> )
|
||||
}
|
||||
|
||||
impl LexerState {
|
||||
fn into_token(self) -> Option<LadderTypeToken> {
|
||||
match self {
|
||||
LexerState::Any => None,
|
||||
LexerState::Sym(s) => Some(LadderTypeToken::Symbol(s)),
|
||||
LexerState::Num(n) => Some(LadderTypeToken::Num(n)),
|
||||
LexerState::Char(c) => Some(LadderTypeToken::Char(c?))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>
|
||||
|
||||
pub struct LadderTypeLexer<It>
|
||||
where It: std::iter::Iterator<Item = char>
|
||||
{
|
||||
chars: std::iter::Peekable<It>,
|
||||
}
|
||||
|
||||
impl<It> LadderTypeLexer<It>
|
||||
where It: Iterator<Item = char>
|
||||
{
|
||||
pub fn new(chars: It) -> Self {
|
||||
LadderTypeLexer {
|
||||
chars: chars.peekable()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<It> Iterator for LadderTypeLexer<It>
|
||||
where It: Iterator<Item = char>
|
||||
{
|
||||
type Item = Result<LadderTypeToken, LexError>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let mut state = LexerState::Any;
|
||||
|
||||
while let Some(c) = self.chars.peek() {
|
||||
match &mut state {
|
||||
|
||||
// determine token type
|
||||
LexerState::Any => {
|
||||
match c {
|
||||
'<' => { self.chars.next(); return Some(Ok(LadderTypeToken::Open)); },
|
||||
'>' => { self.chars.next(); return Some(Ok(LadderTypeToken::Close)); },
|
||||
'~' => { self.chars.next(); return Some(Ok(LadderTypeToken::Ladder)); },
|
||||
'\'' => { self.chars.next(); state = LexerState::Char(None); },
|
||||
' ' => { self.chars.next(); },
|
||||
c => {
|
||||
if c.is_alphabetic() {
|
||||
state = LexerState::Sym( String::new() );
|
||||
} else if c.is_digit(10) {
|
||||
state = LexerState::Num( 0 );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LexerState::Char(val) => {
|
||||
// todo escape characters
|
||||
*val = self.chars.next();
|
||||
|
||||
match self.chars.next() {
|
||||
Some('\'') => {
|
||||
if let Some(token) = state.clone().into_token() {
|
||||
return Some(Ok(token));
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return Some(Err(LexError::InvalidChar));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_ => {
|
||||
|
||||
if c.is_whitespace() || *c == '>' || *c == '~' {
|
||||
// finish the current token
|
||||
|
||||
if let Some(token) = state.clone().into_token() {
|
||||
return Some(Ok(token));
|
||||
}
|
||||
} else {
|
||||
// append to the current token
|
||||
|
||||
let c = self.chars.next().unwrap();
|
||||
|
||||
match &mut state {
|
||||
LexerState::Sym(s) => {
|
||||
s.push(c);
|
||||
}
|
||||
|
||||
LexerState::Num(n) => {
|
||||
if let Some(d) = c.to_digit(10) {
|
||||
*n = (*n) * 10 + d as u64;
|
||||
} else {
|
||||
return Some(Err(LexError::InvalidDigit));
|
||||
}
|
||||
}
|
||||
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(token) = state.into_token() {
|
||||
Some(Ok(token))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
139
src/lib.rs
Normal file
139
src/lib.rs
Normal file
|
@ -0,0 +1,139 @@
|
|||
|
||||
pub mod lexer;
|
||||
//pub mod bimap;
|
||||
//pub mod dict;
|
||||
//pub mod term;
|
||||
|
||||
pub use {
|
||||
// dict::*,
|
||||
// term::*,
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn test_lexer() {
|
||||
use crate::lexer::*;
|
||||
|
||||
{
|
||||
let mut lex = LadderTypeLexer::new("symbol".chars());
|
||||
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("symbol".into()))) );
|
||||
assert_eq!( lex.next(), None );
|
||||
}
|
||||
{
|
||||
let mut lex = LadderTypeLexer::new("1234".chars());
|
||||
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Num(1234))) );
|
||||
assert_eq!( lex.next(), None );
|
||||
}
|
||||
{
|
||||
let mut lex = LadderTypeLexer::new("123xxx".chars());
|
||||
assert_eq!( lex.next(), Some(Err(LexError::InvalidDigit)) );
|
||||
}
|
||||
{
|
||||
let mut lex = LadderTypeLexer::new("'x'".chars());
|
||||
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Char('x'))) );
|
||||
assert_eq!( lex.next(), None );
|
||||
}
|
||||
{
|
||||
let mut lex = LadderTypeLexer::new("'xx'".chars());
|
||||
assert_eq!( lex.next(), Some(Err(LexError::InvalidChar)) );
|
||||
}
|
||||
{
|
||||
let mut lex = LadderTypeLexer::new("abc~def".chars());
|
||||
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("abc".into()))) );
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Ladder)) );
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("def".into()))) );
|
||||
assert_eq!( lex.next(), None );
|
||||
}
|
||||
{
|
||||
let mut lex = LadderTypeLexer::new("abc ~ def".chars());
|
||||
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("abc".into()))) );
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Ladder)) );
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("def".into()))) );
|
||||
assert_eq!( lex.next(), None );
|
||||
}
|
||||
|
||||
{
|
||||
let mut lex = LadderTypeLexer::new("<Seq Char>".chars());
|
||||
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Open)) );
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Seq".into()))) );
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Char".into()))) );
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Close)) );
|
||||
assert_eq!( lex.next(), None );
|
||||
}
|
||||
{
|
||||
let mut lex = LadderTypeLexer::new(" <Seq Char >".chars());
|
||||
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Open)) );
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Seq".into()))) );
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Char".into()))) );
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Close)) );
|
||||
assert_eq!( lex.next(), None );
|
||||
}
|
||||
|
||||
{
|
||||
let mut lex = LadderTypeLexer::new("<Seq Date~<TimeSince UnixEpoch>~<Duration Seconds>~ℕ~<PosInt 10 BigEndian>~<Seq <Digit 10>~Unicode>".chars());
|
||||
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Open)));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Seq".into()))));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Date".into()))));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Ladder)));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Open)));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("TimeSince".into()))));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("UnixEpoch".into()))));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Close)));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Ladder)));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Open)));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Duration".into()))));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Seconds".into()))));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Close)));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Ladder)));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("ℕ".into()))));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Ladder)));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Open)));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("PosInt".into()))));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Num(10))));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("BigEndian".into()))));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Close)));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Ladder)));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Open)));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Seq".into()))));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Open)));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Digit".into()))));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Num(10))));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Close)));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Ladder)));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Symbol("Unicode".into()))));
|
||||
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Close)));
|
||||
assert_eq!( lex.next(), None );
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse() {
|
||||
// todo
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize() {
|
||||
// todo
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_curry() {
|
||||
// todo
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_subtype() {
|
||||
// todo
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in a new issue