From 8fa38ca7d35f61b8f2633b9e3ea8cc6f61fb532b Mon Sep 17 00:00:00 2001 From: Michael Sippel Date: Mon, 2 Oct 2023 01:28:22 +0200 Subject: [PATCH] initial parser implementation --- src/lib.rs | 3 +- src/parser.rs | 136 +++++++++++++++++++++++++++++++ src/term.rs | 17 ++++ src/test/parser.rs | 195 ++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 348 insertions(+), 3 deletions(-) create mode 100644 src/parser.rs diff --git a/src/lib.rs b/src/lib.rs index c156d89..1f1ffcb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,9 @@ -pub mod lexer; pub mod bimap; pub mod dict; pub mod term; +pub mod lexer; +pub mod parser; #[cfg(test)] mod test; diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..7c7c1fc --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,136 @@ +use { + std::iter::Peekable, + crate::{ + dict::*, + term::*, + lexer::* + } +}; + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ + +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum ParseError { + LexError(LexError), + UnexpectedClose, + UnexpectedLadder, + UnexpectedEnd, + UnexpectedToken +} + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ + +impl std::str::FromStr for TypeTerm { + type Err = ParseError; + + fn from_str(s : &str) -> Result { + // creating a new context every time is not that useful.. + let mut dict = TypeDict::new(); + dict.parse(&mut LadderTypeLexer::from(s.chars()).peekable()) + } +} + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ + +impl TypeDict { + fn parse_app( &mut self, tokens: &mut Peekable> ) -> Result + where It: Iterator + { + let mut args = Vec::new(); + while let Some(tok) = tokens.peek() { + match tok { + Ok(LadderTypeToken::Close) => { + tokens.next(); + return Ok(TypeTerm::App(args)); + } + _ => { + match self.parse_partial(tokens) { + Ok(a) => { args.push(a); } + Err(err) => { return Err(err); } + } + } + } + } + Err(ParseError::UnexpectedEnd) + } + + fn parse_rung( &mut self, tokens: &mut Peekable> ) -> Result + where It: Iterator + { + match tokens.next() { + Some(Ok(LadderTypeToken::Open)) => self.parse_app(tokens), + Some(Ok(LadderTypeToken::Close)) => Err(ParseError::UnexpectedClose), + Some(Ok(LadderTypeToken::Ladder)) => Err(ParseError::UnexpectedLadder), + Some(Ok(LadderTypeToken::Symbol(s))) => + Ok(TypeTerm::TypeID( + if let Some(tyid) = self.get_typeid(&s) { + tyid + } else { + self.add_typename(s) + } + )), + Some(Ok(LadderTypeToken::Char(c))) => Ok(TypeTerm::Char(c)), + Some(Ok(LadderTypeToken::Num(n))) => Ok(TypeTerm::Num(n)), + Some(Err(err)) => Err(ParseError::LexError(err)), + None => Err(ParseError::UnexpectedEnd) + } + } + + fn parse_partial( &mut self, tokens: &mut Peekable> ) -> Result + where It: Iterator + { + let mut rungs = Vec::new(); + + match self.parse_rung(tokens) { + Ok(t) => { rungs.push(t); } + Err(err) => { return Err(err); } + } + + while let Some(tok) = tokens.peek() { + match tok { + Ok(LadderTypeToken::Ladder) => { + tokens.next(); + + if rungs.len() > 0 { + match self.parse_rung(tokens) { + Ok(t) => { rungs.push(t); } + Err(err) => { return Err(err); } + } + } else { + return Err(ParseError::UnexpectedLadder); + } + } + Err(lexerr) => { + return Err(ParseError::LexError(lexerr.clone())); + } + _ => { + break; + } + } + } + + match rungs.len() { + 0 => Err(ParseError::UnexpectedEnd), + 1 => Ok(rungs[0].clone()), + _ => Ok(TypeTerm::Ladder(rungs)), + } + } + + pub fn parse( &mut self, tokens: &mut Peekable> ) -> Result + where It: Iterator + { + match self.parse_partial(tokens) { + Ok(t) => { + if let Some(tok) = tokens.peek() { + Err(ParseError::UnexpectedToken) + } else { + Ok(t) + } + } + Err(err) => Err(err) + } + } +} + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ + diff --git a/src/term.rs b/src/term.rs index 3420218..9b58b69 100644 --- a/src/term.rs +++ b/src/term.rs @@ -54,6 +54,23 @@ impl TypeTerm { self } + pub fn repr_as(&mut self, t: impl Into) -> &mut Self { + match self { + TypeTerm::Ladder(rungs) => { + rungs.push(t.into()); + } + + _ => { + *self = TypeTerm::Ladder(vec![ + self.clone(), + t.into() + ]) + } + } + + self + } + pub fn num_arg(&mut self, v: i64) -> &mut Self { self.arg(TypeTerm::Num(v)) } diff --git a/src/test/parser.rs b/src/test/parser.rs index 996ee06..16c9744 100644 --- a/src/test/parser.rs +++ b/src/test/parser.rs @@ -1,7 +1,198 @@ +use { + crate::{term::*, dict::*, parser::*}, + std::str::FromStr +}; + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ + #[test] -fn test_parser() { - // todo +fn test_parser_id() { + assert_eq!( + Ok(TypeTerm::TypeID(TypeID::Fun(0))), + TypeTerm::from_str("A") + ); +} + +#[test] +fn test_parser_num() { + assert_eq!( + Ok(TypeTerm::Num(1234)), + TypeTerm::from_str("1234") + ); +} + +#[test] +fn test_parser_char() { + assert_eq!( + Ok(TypeTerm::Char('x')), + TypeTerm::from_str("'x'") + ); +} + +#[test] +fn test_parser_app() { + assert_eq!( + TypeTerm::from_str(""), + Ok(TypeTerm::App(vec![ + TypeTerm::TypeID(TypeID::Fun(0)), + TypeTerm::TypeID(TypeID::Fun(1)), + ])) + ); + assert_eq!( + TypeTerm::from_str(""), + Ok(TypeTerm::App(vec![ + TypeTerm::TypeID(TypeID::Fun(0)), + TypeTerm::TypeID(TypeID::Fun(1)), + TypeTerm::TypeID(TypeID::Fun(2)), + ])) + ); +} + +#[test] +fn test_parser_unexpected_close() { + assert_eq!( + TypeTerm::from_str(">"), + Err(ParseError::UnexpectedClose) + ); +} + +#[test] +fn test_parser_unexpected_token() { + assert_eq!( + TypeTerm::from_str("A B"), + Err(ParseError::UnexpectedToken) + ); +} + +#[test] +fn test_parser_ladder() { + assert_eq!( + TypeTerm::from_str("A~B"), + Ok(TypeTerm::Ladder(vec![ + TypeTerm::TypeID(TypeID::Fun(0)), + TypeTerm::TypeID(TypeID::Fun(1)), + ])) + ); + assert_eq!( + TypeTerm::from_str("A~B~C"), + Ok(TypeTerm::Ladder(vec![ + TypeTerm::TypeID(TypeID::Fun(0)), + TypeTerm::TypeID(TypeID::Fun(1)), + TypeTerm::TypeID(TypeID::Fun(2)), + ])) + ); +} + +#[test] +fn test_parser_ladder_outside() { + assert_eq!( + TypeTerm::from_str("~C"), + Ok(TypeTerm::Ladder(vec![ + TypeTerm::App(vec![ + TypeTerm::TypeID(TypeID::Fun(0)), + TypeTerm::TypeID(TypeID::Fun(1)), + ]), + TypeTerm::TypeID(TypeID::Fun(2)), + ])) + ); +} + +#[test] +fn test_parser_ladder_inside() { + assert_eq!( + TypeTerm::from_str(""), + Ok(TypeTerm::App(vec![ + TypeTerm::TypeID(TypeID::Fun(0)), + TypeTerm::Ladder(vec![ + TypeTerm::TypeID(TypeID::Fun(1)), + TypeTerm::TypeID(TypeID::Fun(2)), + ]) + ])) + ); +} + +#[test] +fn test_parser_ladder_between() { + assert_eq!( + TypeTerm::from_str(">"), + Ok(TypeTerm::App(vec![ + TypeTerm::TypeID(TypeID::Fun(0)), + TypeTerm::Ladder(vec![ + TypeTerm::TypeID(TypeID::Fun(1)), + TypeTerm::App(vec![ + TypeTerm::TypeID(TypeID::Fun(2)), + TypeTerm::TypeID(TypeID::Fun(3)), + ]) + ]) + ])) + ); } +#[test] +fn test_parser_ladder_large() { + assert_eq!( + TypeTerm::from_str( + " \ + ~ \ + ~ℕ \ + ~ \ + ~< Seq ~Unicode > > \ + ~ \ + ~ \ + ~UTF-8 \ + ~"), + + Ok( + TypeTerm::Ladder(vec![ + TypeTerm::App(vec![ + TypeTerm::TypeID(TypeID::Fun(0)), + TypeTerm::Ladder(vec![ + TypeTerm::TypeID(TypeID::Fun(1)), + TypeTerm::App(vec![ + TypeTerm::TypeID(TypeID::Fun(2)), + TypeTerm::TypeID(TypeID::Fun(3)) + ]), + TypeTerm::App(vec![ + TypeTerm::TypeID(TypeID::Fun(4)), + TypeTerm::TypeID(TypeID::Fun(5)) + ]), + TypeTerm::TypeID(TypeID::Fun(6)), + TypeTerm::App(vec![ + TypeTerm::TypeID(TypeID::Fun(7)), + TypeTerm::Num(10), + TypeTerm::TypeID(TypeID::Fun(8)) + ]), + TypeTerm::App(vec![ + TypeTerm::TypeID(TypeID::Fun(0)), + TypeTerm::Ladder(vec![ + TypeTerm::App(vec![ + TypeTerm::TypeID(TypeID::Fun(9)), + TypeTerm::Num(10) + ]), + TypeTerm::TypeID(TypeID::Fun(10)) + ]) + ]) + ]) + ]), + TypeTerm::App(vec![ + TypeTerm::TypeID(TypeID::Fun(11)), + TypeTerm::TypeID(TypeID::Fun(10)), + TypeTerm::Char(':') + ]), + TypeTerm::App(vec![ + TypeTerm::TypeID(TypeID::Fun(0)), + TypeTerm::TypeID(TypeID::Fun(10)) + ]), + TypeTerm::TypeID(TypeID::Fun(12)), + TypeTerm::App(vec![ + TypeTerm::TypeID(TypeID::Fun(0)), + TypeTerm::TypeID(TypeID::Fun(13)) + ]) + ]) + ) + ); +} +