From 8fa38ca7d35f61b8f2633b9e3ea8cc6f61fb532b Mon Sep 17 00:00:00 2001
From: Michael Sippel <micha@fragmental.art>
Date: Mon, 2 Oct 2023 01:28:22 +0200
Subject: [PATCH] initial parser implementation

---
 src/lib.rs         |   3 +-
 src/parser.rs      | 136 +++++++++++++++++++++++++++++++
 src/term.rs        |  17 ++++
 src/test/parser.rs | 195 ++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 348 insertions(+), 3 deletions(-)
 create mode 100644 src/parser.rs

diff --git a/src/lib.rs b/src/lib.rs
index c156d89..1f1ffcb 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,8 +1,9 @@
 
-pub mod lexer;
 pub mod bimap;
 pub mod dict;
 pub mod term;
+pub mod lexer;
+pub mod parser;
 
 #[cfg(test)]
 mod test;
diff --git a/src/parser.rs b/src/parser.rs
new file mode 100644
index 0000000..7c7c1fc
--- /dev/null
+++ b/src/parser.rs
@@ -0,0 +1,136 @@
+use {
+    std::iter::Peekable,
+    crate::{
+        dict::*,
+        term::*,
+        lexer::*
+    }
+};
+
+//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
+
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub enum ParseError {
+    LexError(LexError),
+    UnexpectedClose,
+    UnexpectedLadder,
+    UnexpectedEnd,
+    UnexpectedToken
+}
+
+//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
+
+impl std::str::FromStr for TypeTerm {
+    type Err = ParseError;
+
+    fn from_str(s : &str) -> Result<Self, Self::Err> {
+        // creating a new context every time is not that useful..
+        let mut dict = TypeDict::new();
+        dict.parse(&mut LadderTypeLexer::from(s.chars()).peekable())
+    }
+}
+
+//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
+
+impl TypeDict {
+    fn parse_app<It>( &mut self, tokens: &mut Peekable<LadderTypeLexer<It>> ) -> Result<TypeTerm, ParseError>
+    where It: Iterator<Item = char>
+    {
+        let mut args = Vec::new();
+        while let Some(tok) = tokens.peek() {
+            match tok {
+                Ok(LadderTypeToken::Close) => {
+                    tokens.next();
+                    return Ok(TypeTerm::App(args));
+                }
+                _ => {
+                    match self.parse_partial(tokens) {
+                        Ok(a) => { args.push(a); }
+                        Err(err) => { return Err(err); }
+                    }
+                }
+            }
+        }
+        Err(ParseError::UnexpectedEnd)
+    }
+
+    fn parse_rung<It>( &mut self, tokens: &mut Peekable<LadderTypeLexer<It>> ) -> Result<TypeTerm, ParseError>
+    where It: Iterator<Item = char>
+    {
+        match tokens.next() {
+            Some(Ok(LadderTypeToken::Open)) => self.parse_app(tokens),
+            Some(Ok(LadderTypeToken::Close)) => Err(ParseError::UnexpectedClose),
+            Some(Ok(LadderTypeToken::Ladder)) => Err(ParseError::UnexpectedLadder),
+            Some(Ok(LadderTypeToken::Symbol(s))) =>
+                Ok(TypeTerm::TypeID(
+                    if let Some(tyid) = self.get_typeid(&s) {
+                        tyid
+                    } else {
+                        self.add_typename(s)
+                    }
+                )),
+            Some(Ok(LadderTypeToken::Char(c))) => Ok(TypeTerm::Char(c)),
+            Some(Ok(LadderTypeToken::Num(n))) => Ok(TypeTerm::Num(n)),
+            Some(Err(err)) => Err(ParseError::LexError(err)),
+            None => Err(ParseError::UnexpectedEnd)
+        }
+    }
+    
+    fn parse_partial<It>( &mut self, tokens: &mut Peekable<LadderTypeLexer<It>> ) -> Result<TypeTerm, ParseError>
+    where It: Iterator<Item = char>
+    {
+        let mut rungs = Vec::new();
+
+        match self.parse_rung(tokens) {
+            Ok(t) => { rungs.push(t); }
+            Err(err) => { return Err(err); }
+        }
+        
+        while let Some(tok) = tokens.peek() {
+            match tok {
+                Ok(LadderTypeToken::Ladder) => {
+                    tokens.next();
+
+                    if rungs.len() > 0 {
+                        match self.parse_rung(tokens) {
+                            Ok(t) => { rungs.push(t); }
+                            Err(err) => { return Err(err); }
+                        }
+                    } else {
+                        return Err(ParseError::UnexpectedLadder);    
+                    }
+                }
+                Err(lexerr) => {
+                    return Err(ParseError::LexError(lexerr.clone()));
+                }
+                _ => {
+                    break;
+                }
+            }
+        }
+
+        match rungs.len() {
+            0 => Err(ParseError::UnexpectedEnd),
+            1 => Ok(rungs[0].clone()),
+            _ => Ok(TypeTerm::Ladder(rungs)),
+        }
+    }
+
+    pub fn parse<It>( &mut self, tokens: &mut Peekable<LadderTypeLexer<It>> ) -> Result<TypeTerm, ParseError>
+    where It: Iterator<Item = char>
+    {
+        match self.parse_partial(tokens) {
+            Ok(t) => {
+                if let Some(tok) = tokens.peek() {
+                    Err(ParseError::UnexpectedToken)
+                } else {
+                    Ok(t)
+                }
+            }
+            Err(err) => Err(err)
+        }
+    }
+}
+
+//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
+
diff --git a/src/term.rs b/src/term.rs
index 3420218..9b58b69 100644
--- a/src/term.rs
+++ b/src/term.rs
@@ -54,6 +54,23 @@ impl TypeTerm {
         self
     }
 
+    pub fn repr_as(&mut self, t: impl Into<TypeTerm>) -> &mut Self {
+        match self {
+            TypeTerm::Ladder(rungs) => {
+                rungs.push(t.into());                
+            }
+
+            _ => {
+                *self = TypeTerm::Ladder(vec![
+                    self.clone(),
+                    t.into()
+                ])
+            }
+        }
+
+        self
+    }
+
     pub fn num_arg(&mut self, v: i64) -> &mut Self {
         self.arg(TypeTerm::Num(v))
     }
diff --git a/src/test/parser.rs b/src/test/parser.rs
index 996ee06..16c9744 100644
--- a/src/test/parser.rs
+++ b/src/test/parser.rs
@@ -1,7 +1,198 @@
 
+use {
+    crate::{term::*, dict::*, parser::*},
+    std::str::FromStr
+};
+
+//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
+
 #[test]
-fn test_parser() {
-    // todo
+fn test_parser_id() {
+    assert_eq!(
+        Ok(TypeTerm::TypeID(TypeID::Fun(0))),
+        TypeTerm::from_str("A")
+    );
+}
+
+#[test]
+fn test_parser_num() {
+    assert_eq!(
+        Ok(TypeTerm::Num(1234)),
+        TypeTerm::from_str("1234")
+    );
+}
+
+#[test]
+fn test_parser_char() {
+    assert_eq!(
+        Ok(TypeTerm::Char('x')),
+        TypeTerm::from_str("'x'")
+    );
+}
+
+#[test]
+fn test_parser_app() {
+    assert_eq!(
+        TypeTerm::from_str("<A B>"),
+        Ok(TypeTerm::App(vec![
+            TypeTerm::TypeID(TypeID::Fun(0)),
+            TypeTerm::TypeID(TypeID::Fun(1)),
+        ]))
+    );
+    assert_eq!(
+        TypeTerm::from_str("<A B C>"),
+        Ok(TypeTerm::App(vec![
+            TypeTerm::TypeID(TypeID::Fun(0)),
+            TypeTerm::TypeID(TypeID::Fun(1)),
+            TypeTerm::TypeID(TypeID::Fun(2)),
+        ]))
+    );
+}
+
+#[test]
+fn test_parser_unexpected_close() {
+    assert_eq!(
+        TypeTerm::from_str(">"),
+        Err(ParseError::UnexpectedClose)
+    );
+}
+
+#[test]
+fn test_parser_unexpected_token() {
+    assert_eq!(
+        TypeTerm::from_str("A B"),
+        Err(ParseError::UnexpectedToken)
+    );
+}
+
+#[test]
+fn test_parser_ladder() {
+    assert_eq!(
+        TypeTerm::from_str("A~B"),
+        Ok(TypeTerm::Ladder(vec![
+            TypeTerm::TypeID(TypeID::Fun(0)),
+            TypeTerm::TypeID(TypeID::Fun(1)),
+        ]))
+    );
+    assert_eq!(
+        TypeTerm::from_str("A~B~C"),
+        Ok(TypeTerm::Ladder(vec![
+            TypeTerm::TypeID(TypeID::Fun(0)),
+            TypeTerm::TypeID(TypeID::Fun(1)),
+            TypeTerm::TypeID(TypeID::Fun(2)),
+        ]))
+    );
+}
+
+#[test]
+fn test_parser_ladder_outside() {
+    assert_eq!(
+        TypeTerm::from_str("<A B>~C"),
+        Ok(TypeTerm::Ladder(vec![
+            TypeTerm::App(vec![
+                TypeTerm::TypeID(TypeID::Fun(0)),
+                TypeTerm::TypeID(TypeID::Fun(1)),
+            ]),
+            TypeTerm::TypeID(TypeID::Fun(2)),
+        ]))
+    );    
+}
+
+#[test]
+fn test_parser_ladder_inside() {
+    assert_eq!(
+        TypeTerm::from_str("<A B~C>"),
+        Ok(TypeTerm::App(vec![
+            TypeTerm::TypeID(TypeID::Fun(0)),
+            TypeTerm::Ladder(vec![
+                TypeTerm::TypeID(TypeID::Fun(1)),
+                TypeTerm::TypeID(TypeID::Fun(2)),
+            ])
+        ]))
+    );    
+}
+
+#[test]
+fn test_parser_ladder_between() {
+    assert_eq!(
+        TypeTerm::from_str("<A B~<C D>>"),
+        Ok(TypeTerm::App(vec![
+            TypeTerm::TypeID(TypeID::Fun(0)),
+            TypeTerm::Ladder(vec![
+                TypeTerm::TypeID(TypeID::Fun(1)),
+                TypeTerm::App(vec![
+                    TypeTerm::TypeID(TypeID::Fun(2)),
+                    TypeTerm::TypeID(TypeID::Fun(3)),
+                ])
+            ])
+        ]))
+    );    
 }
 
 
+#[test]
+fn test_parser_ladder_large() {
+    assert_eq!(
+        TypeTerm::from_str(
+            "<Seq Date \
+                  ~<TimeSince UnixEpoch> \
+                  ~<Duration Seconds> \
+                  ~ℕ \
+                  ~<PosInt 10 BigEndian> \
+                  ~< Seq <Digit 10>~Unicode > > \
+              ~<SepSeq Unicode ':'> \
+              ~<Seq Unicode> \
+              ~UTF-8 \
+              ~<Seq Byte>"),
+
+        Ok(
+            TypeTerm::Ladder(vec![
+                TypeTerm::App(vec![
+                    TypeTerm::TypeID(TypeID::Fun(0)),
+                    TypeTerm::Ladder(vec![
+                        TypeTerm::TypeID(TypeID::Fun(1)),
+                        TypeTerm::App(vec![
+                            TypeTerm::TypeID(TypeID::Fun(2)),
+                            TypeTerm::TypeID(TypeID::Fun(3))
+                        ]),
+                        TypeTerm::App(vec![
+                            TypeTerm::TypeID(TypeID::Fun(4)),
+                            TypeTerm::TypeID(TypeID::Fun(5))
+                        ]),
+                        TypeTerm::TypeID(TypeID::Fun(6)),
+                        TypeTerm::App(vec![
+                            TypeTerm::TypeID(TypeID::Fun(7)),
+                            TypeTerm::Num(10),
+                            TypeTerm::TypeID(TypeID::Fun(8))
+                        ]),
+                        TypeTerm::App(vec![
+                            TypeTerm::TypeID(TypeID::Fun(0)),
+                            TypeTerm::Ladder(vec![
+                                TypeTerm::App(vec![
+                                    TypeTerm::TypeID(TypeID::Fun(9)),
+                                    TypeTerm::Num(10)
+                                ]),
+                                TypeTerm::TypeID(TypeID::Fun(10))
+                            ])
+                        ])
+                    ])
+                ]),
+                TypeTerm::App(vec![
+                    TypeTerm::TypeID(TypeID::Fun(11)),
+                    TypeTerm::TypeID(TypeID::Fun(10)),
+                    TypeTerm::Char(':')
+                ]),
+                TypeTerm::App(vec![
+                    TypeTerm::TypeID(TypeID::Fun(0)),
+                    TypeTerm::TypeID(TypeID::Fun(10))
+                ]),
+                TypeTerm::TypeID(TypeID::Fun(12)),
+                TypeTerm::App(vec![
+                    TypeTerm::TypeID(TypeID::Fun(0)),
+                    TypeTerm::TypeID(TypeID::Fun(13))
+                ])
+            ])
+        )
+    );
+}
+