diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d5d6d5a --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +target/ +*~ +\#* +.\#* diff --git a/README.md b/README.md index eea0d5f..d4dc395 100644 --- a/README.md +++ b/README.md @@ -5,35 +5,31 @@ Rust Implementation of Ladder-Types (parsing, unification, rewriting, etc) ## Ladder Types -*Ladder Types* provide a way to distinguish between multiple *concrete -representations* of the same *abstract / conceptual type*. +In order to implement complex datastructures and algorithms, usually +many layers of abstraction are built ontop of each other. +Consequently higher-level data types are encoded into lower-level data +types, forming a chain of embeddings from concept to `rock bottom' of +byte streams. While a high-level type makes claims about the +semantics of objects of that type, high-level types are ambiguous in +regard to their concrete syntactical representation or memory +layout. However for compositions to be type-safe, compatibility of +concrete represenations must be ensured. -The intuition is to capture a 'represented-as' relation in the -structure of a type term. Formally, we introduce a new type -constructor, called the *ladder type*, written as `T1 ~ T2`, where -`T1` and `T2` are types. The term `T1 ~ T2` can be read by `T1 is -represented as T2`. +For example in the unix shell, many different tools & utilities +coexist. Depending on the application domain, each of them will +potentially make use of different representational forms for the same +abstract concepts. E.g. for the concept 'natural number', many +representations do exist, e.g. with variation over radices, +endianness, digit encoding etc. - - -This can be particularly useful in contexts where no standard -representational forms exist, e.g. in the unix shell where many -different tools & utilities come into contact, each making use of -potentially different representational forms, depending on the -application domain. - -Trying to introduce a global type system at IPC level would result in -naming conflicts: Abstract concepts like 'natural number' do not have -any standard representational form, as it is the case with other more -contained languages. With a simple type name like this it is not clear -which of the many equivalent representations is required, e.g. for an -*integer number* variations over radices, endianness, digit encoding -etc. exist. - -Usually, many layers of abstraction are built ontop of each other, -where higher-level data types are encoded into lower-level data types, -forming a chain of embeddings from concept to `rock bottom' of byte -streams. +Intuitively, *ladder types* provide a way to distinguish between +multiple *concrete representations* of the same *abstract / conceptual +type*, by capturing the *represented-as* of layered data formats in +the structure of type-terms. Formally, we introduce a new type +constructor, called the *ladder type*, written `T1 ~ T2`, where `T1` +and `T2` are types. The type-term `T1 ~ T2` then expresses the +abstract type of `T1` being represented in terms of the concrete type +`T2`, which can be read by "`T1` represented as `T2`". #### Example @@ -42,7 +38,7 @@ each represented as unix-timestamp written as decimal number in big-endian, encoded as UTF-8 string. ``` -<Seq Timepoint +<Seq TimePoint ~<TimeSince UnixEpoch> ~<Duration Seconds> ~ℕ @@ -66,15 +62,18 @@ this: ```rust use laddertypes::*; -let mut dict = TypeDict::new(); +fn main() { + let mut dict = TypeDict::new(); -let t1 = dict.parse("<A B~X C>").expect("couldnt parse typeterm"); -let t2 = dict.parse("<<A B~X> C>").expect("couldnt parse typeterm"); + let t1 = dict.parse("<A B~X C>").expect("couldnt parse typeterm"); + let t2 = dict.parse("<<A B~X> C>").expect("couldnt parse typeterm"); -assert_eq!( t1.clone().curry(), t2 ); -assert_eq!( t1, t2.clone().decurry() ); + assert_eq!( t1.clone().curry(), t2 ); + assert_eq!( t1, t2.clone().decurry() ); +} ``` -### License + +## License [GPLv3](COPYING) diff --git a/src/lib.rs b/src/lib.rs index 3f725bc..1a270cc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,9 +4,11 @@ pub mod dict; pub mod term; pub mod lexer; pub mod parser; +pub mod unparser; pub mod curry; pub mod lnf; pub mod subtype; +pub mod unification; #[cfg(test)] mod test; @@ -14,5 +16,6 @@ mod test; pub use { dict::*, term::*, + unification::* }; diff --git a/src/lnf.rs b/src/lnf.rs index 9e38aea..b284df5 100644 --- a/src/lnf.rs +++ b/src/lnf.rs @@ -45,7 +45,7 @@ impl TypeTerm { } TypeTerm::App(args) => { - let mut args_iter = args.into_iter(); + let args_iter = args.into_iter(); new_ladder.push( TypeTerm::App(vec![]) ); diff --git a/src/subtype.rs b/src/subtype.rs index 8f398b1..8c46752 100644 --- a/src/subtype.rs +++ b/src/subtype.rs @@ -19,20 +19,20 @@ impl TypeTerm { None } - pub fn is_syntactic_subtype_of(&self, expected_type: &TypeTerm) -> Result<usize, Option<(usize, usize)>> { + pub fn is_syntactic_subtype_of(&self, expected_type: &TypeTerm) -> Result<usize, (usize, usize)> { if let Some((first_match, provided_type)) = self.is_semantic_subtype_of( expected_type ) { let provided_lnf = provided_type.get_lnf_vec(); let expected_lnf = expected_type.clone().get_lnf_vec(); for i in 0 .. usize::min( provided_lnf.len(), expected_lnf.len() ) { if provided_lnf[i] != expected_lnf[i] { - return Err(Some((first_match, first_match+i))) + return Err((first_match, first_match+i)) } } Ok(first_match) } else { - Err(None) + Err((0,0)) } } @@ -43,7 +43,7 @@ impl TypeTerm { t.is_semantic_subtype_of(self) } - pub fn is_syntactic_supertype_of(&self, t: &TypeTerm) -> Result<usize, Option<(usize, usize)>> { + pub fn is_syntactic_supertype_of(&self, t: &TypeTerm) -> Result<usize, (usize, usize)> { t.is_syntactic_subtype_of(self) } } diff --git a/src/term.rs b/src/term.rs index 25fee14..f5e8f5f 100644 --- a/src/term.rs +++ b/src/term.rs @@ -78,6 +78,36 @@ impl TypeTerm { pub fn char_arg(&mut self, c: char) -> &mut Self { self.arg(TypeTerm::Char(c)) } + + /// recursively apply substitution to all subterms, + /// which will replace all occurences of variables which map + /// some type-term in `subst` + pub fn apply_substitution( + &mut self, + subst: &impl Fn(&TypeID) -> Option<TypeTerm> + ) -> &mut Self { + match self { + TypeTerm::TypeID(typid) => { + if let Some(t) = subst(typid) { + *self = t; + } + } + + TypeTerm::Ladder(rungs) => { + for r in rungs.iter_mut() { + r.apply_substitution(subst); + } + } + TypeTerm::App(args) => { + for r in args.iter_mut() { + r.apply_substitution(subst); + } + } + _ => {} + } + + self + } } //<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ diff --git a/src/test/mod.rs b/src/test/mod.rs index 9d13c07..d116412 100644 --- a/src/test/mod.rs +++ b/src/test/mod.rs @@ -4,4 +4,6 @@ pub mod parser; pub mod curry; pub mod lnf; pub mod subtype; +pub mod substitution; +pub mod unification; diff --git a/src/test/parser.rs b/src/test/parser.rs index fe75075..1166229 100644 --- a/src/test/parser.rs +++ b/src/test/parser.rs @@ -7,9 +7,18 @@ use { #[test] fn test_parser_id() { + let mut dict = TypeDict::new(); + + dict.add_varname("T".into()); + + assert_eq!( + Ok(TypeTerm::TypeID(TypeID::Var(0))), + dict.parse("T") + ); + assert_eq!( Ok(TypeTerm::TypeID(TypeID::Fun(0))), - TypeDict::new().parse("A") + dict.parse("A") ); } diff --git a/src/test/substitution.rs b/src/test/substitution.rs new file mode 100644 index 0000000..7959b08 --- /dev/null +++ b/src/test/substitution.rs @@ -0,0 +1,32 @@ + +use { + crate::{dict::*, term::*}, + std::iter::FromIterator +}; + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ + +#[test] +fn test_subst() { + let mut dict = TypeDict::new(); + + let mut σ = std::collections::HashMap::new(); + + // T --> ℕ + σ.insert + (dict.add_varname(String::from("T")), + dict.parse("ℕ").unwrap()); + + // U --> <Seq Char> + σ.insert + (dict.add_varname(String::from("U")), + dict.parse("<Seq Char>").unwrap()); + + + assert_eq!( + dict.parse("<Seq T~U>").unwrap() + .apply_substitution(&|typid|{ σ.get(typid).cloned() }).clone(), + dict.parse("<Seq ℕ~<Seq Char>>").unwrap() + ); +} + diff --git a/src/test/subtype.rs b/src/test/subtype.rs index 54ad397..08cc5c7 100644 --- a/src/test/subtype.rs +++ b/src/test/subtype.rs @@ -62,7 +62,7 @@ fn test_syntactic_subtype() { .is_syntactic_subtype_of( &dict.parse("C~G").expect("parse errror") ), - Err(Some((2,3))) + Err((2,3)) ); assert_eq!( @@ -70,7 +70,7 @@ fn test_syntactic_subtype() { .is_syntactic_subtype_of( &dict.parse("G~F~K").expect("parse errror") ), - Err(None) + Err((0,0)) ); assert_eq!( diff --git a/src/test/unification.rs b/src/test/unification.rs new file mode 100644 index 0000000..34d355d --- /dev/null +++ b/src/test/unification.rs @@ -0,0 +1,118 @@ + +use { + crate::{dict::*, term::*, unification::*}, + std::iter::FromIterator +}; + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ + +fn test_unify(ts1: &str, ts2: &str, expect_unificator: bool) { + let mut dict = TypeDict::new(); + dict.add_varname(String::from("T")); + dict.add_varname(String::from("U")); + dict.add_varname(String::from("V")); + dict.add_varname(String::from("W")); + + let mut t1 = dict.parse(ts1).unwrap(); + let mut t2 = dict.parse(ts2).unwrap(); + let σ = crate::unify( &t1, &t2 ); + + if expect_unificator { + assert!(σ.is_ok()); + + let σ = σ.unwrap(); + + assert_eq!( + t1.apply_substitution(&|v| σ.get(v).cloned()), + t2.apply_substitution(&|v| σ.get(v).cloned()) + ); + } else { + assert!(! σ.is_ok()); + } +} + +#[test] +fn test_unification_error() { + let mut dict = TypeDict::new(); + dict.add_varname(String::from("T")); + + assert_eq!( + crate::unify( + &dict.parse("<A T>").unwrap(), + &dict.parse("<B T>").unwrap() + ), + + Err(UnificationError { + addr: vec![0], + t1: dict.parse("A").unwrap(), + t2: dict.parse("B").unwrap() + }) + ); + + assert_eq!( + crate::unify( + &dict.parse("<V <U A> T>").unwrap(), + &dict.parse("<V <U B> T>").unwrap() + ), + + Err(UnificationError { + addr: vec![1, 1], + t1: dict.parse("A").unwrap(), + t2: dict.parse("B").unwrap() + }) + ); +} + +#[test] +fn test_unification() { + test_unify("A", "A", true); + test_unify("A", "B", false); + test_unify("<Seq T>", "<Seq Ascii~Char>", true); + test_unify("<Seq T>", "<U Char>", true); + + test_unify( + "<Seq Path~<Seq Char>>~<SepSeq Char '\n'>~<Seq Char>", + "<Seq T~<Seq Char>>~<SepSeq Char '\n'>~<Seq Char>", + true + ); + + let mut dict = TypeDict::new(); + + dict.add_varname(String::from("T")); + dict.add_varname(String::from("U")); + dict.add_varname(String::from("V")); + dict.add_varname(String::from("W")); + + assert_eq!( + UnificationProblem::new(vec![ + (dict.parse("U").unwrap(), dict.parse("<Seq Char>").unwrap()), + (dict.parse("T").unwrap(), dict.parse("<Seq U>").unwrap()), + ]).solve(), + Ok( + vec![ + // T + (TypeID::Var(0), dict.parse("<Seq <Seq Char>>").unwrap()), + + // U + (TypeID::Var(1), dict.parse("<Seq Char>").unwrap()) + ].into_iter().collect() + ) + ); + + assert_eq!( + UnificationProblem::new(vec![ + (dict.parse("<Seq T>").unwrap(), dict.parse("<Seq W~<Seq Char>>").unwrap()), + (dict.parse("<Seq ℕ>").unwrap(), dict.parse("<Seq W>").unwrap()), + ]).solve(), + Ok( + vec![ + // W + (TypeID::Var(3), dict.parse("ℕ").unwrap()), + + // T + (TypeID::Var(0), dict.parse("ℕ~<Seq Char>").unwrap()) + ].into_iter().collect() + ) + ); +} + diff --git a/src/unification.rs b/src/unification.rs new file mode 100644 index 0000000..abbc1fe --- /dev/null +++ b/src/unification.rs @@ -0,0 +1,100 @@ +use { + std::collections::HashMap, + crate::{term::*, dict::*} +}; + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ + +#[derive(Clone, Eq, PartialEq, Debug)] +pub struct UnificationError { + pub addr: Vec<usize>, + pub t1: TypeTerm, + pub t2: TypeTerm +} + +pub struct UnificationProblem { + eqs: Vec<(TypeTerm, TypeTerm, Vec<usize>)>, + σ: HashMap<TypeID, TypeTerm> +} + +impl UnificationProblem { + pub fn new(eqs: Vec<(TypeTerm, TypeTerm)>) -> Self { + UnificationProblem { + eqs: eqs.iter().map(|(lhs,rhs)| (lhs.clone(),rhs.clone(),vec![])).collect(), + σ: HashMap::new() + } + } + + pub fn eval_equation(&mut self, lhs: TypeTerm, rhs: TypeTerm, addr: Vec<usize>) -> Result<(), UnificationError> { + match (lhs.clone(), rhs.clone()) { + (TypeTerm::TypeID(TypeID::Var(varid)), t) | + (t, TypeTerm::TypeID(TypeID::Var(varid))) => { + self.σ.insert(TypeID::Var(varid), t.clone()); + + // update all values in substitution + let mut new_σ = HashMap::new(); + for (v, tt) in self.σ.iter() { + let mut tt = tt.clone(); + tt.apply_substitution(&|v| self.σ.get(v).cloned()); + new_σ.insert(v.clone(), tt); + } + self.σ = new_σ; + + Ok(()) + } + + (TypeTerm::TypeID(a1), TypeTerm::TypeID(a2)) => { + if a1 == a2 { Ok(()) } else { Err(UnificationError{ addr, t1: lhs, t2: rhs}) } + } + (TypeTerm::Num(n1), TypeTerm::Num(n2)) => { + if n1 == n2 { Ok(()) } else { Err(UnificationError{ addr, t1: lhs, t2: rhs}) } + } + (TypeTerm::Char(c1), TypeTerm::Char(c2)) => { + if c1 == c2 { Ok(()) } else { Err(UnificationError{ addr, t1: lhs, t2: rhs}) } + } + + (TypeTerm::Ladder(a1), TypeTerm::Ladder(a2)) | + (TypeTerm::App(a1), TypeTerm::App(a2)) => { + if a1.len() == a2.len() { + for (i, (x, y)) in a1.iter().cloned().zip(a2.iter().cloned()).enumerate() { + let mut new_addr = addr.clone(); + new_addr.push(i); + self.eqs.push((x, y, new_addr)); + } + Ok(()) + } else { + Err(UnificationError{ addr, t1: lhs, t2: rhs }) + } + } + + (TypeTerm::Ladder(l1), TypeTerm::Ladder(l2)) => { + Err(UnificationError{ addr, t1: lhs, t2: rhs }) + } + + _ => Err(UnificationError{ addr, t1: lhs, t2: rhs}) + } + } + + pub fn solve(mut self) -> Result<HashMap<TypeID, TypeTerm>, UnificationError> { + while self.eqs.len() > 0 { + while let Some( (mut lhs,mut rhs,addr) ) = self.eqs.pop() { + lhs.apply_substitution(&|v| self.σ.get(v).cloned()); + rhs.apply_substitution(&|v| self.σ.get(v).cloned()); + self.eval_equation(lhs, rhs, addr)?; + } + } + + Ok(self.σ) + } +} + +pub fn unify( + t1: &TypeTerm, + t2: &TypeTerm +) -> Result<HashMap<TypeID, TypeTerm>, UnificationError> { + let mut unification = UnificationProblem::new(vec![ (t1.clone(), t2.clone()) ]); + unification.solve() +} + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ + diff --git a/src/unparser.rs b/src/unparser.rs new file mode 100644 index 0000000..ccf754d --- /dev/null +++ b/src/unparser.rs @@ -0,0 +1,47 @@ +use crate::{dict::*, term::*}; + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ + +impl TypeDict { + pub fn unparse(&self, t: &TypeTerm) -> String { + match t { + TypeTerm::TypeID(id) => self.get_typename(id).unwrap(), + TypeTerm::Num(n) => format!("{}", n), + TypeTerm::Char(c) => match c { + '\0' => "'\\0'".into(), + '\n' => "'\\n'".into(), + '\t' => "'\\t'".into(), + '\'' => "'\\''".into(), + c => format!("'{}'", c) + }, + TypeTerm::Ladder(rungs) => { + let mut s = String::new(); + let mut first = true; + for r in rungs.iter() { + if !first { + s.push('~'); + } + first = false; + s.push_str(&mut self.unparse(r)); + } + s + } + TypeTerm::App(args) => { + let mut s = String::new(); + s.push('<'); + let mut first = true; + for r in args.iter() { + if !first { + s.push(' '); + } + first = false; + s.push_str(&mut self.unparse(r)); + } + s.push('>'); + s + } + } + } +} + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\