Compare commits

...

10 commits

13 changed files with 386 additions and 42 deletions

4
.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
target/
*~
\#*
.\#*

View file

@ -5,35 +5,31 @@ Rust Implementation of Ladder-Types (parsing, unification, rewriting, etc)
## Ladder Types ## Ladder Types
*Ladder Types* provide a way to distinguish between multiple *concrete In order to implement complex datastructures and algorithms, usually
representations* of the same *abstract / conceptual type*. many layers of abstraction are built ontop of each other.
Consequently higher-level data types are encoded into lower-level data
types, forming a chain of embeddings from concept to `rock bottom' of
byte streams. While a high-level type makes claims about the
semantics of objects of that type, high-level types are ambiguous in
regard to their concrete syntactical representation or memory
layout. However for compositions to be type-safe, compatibility of
concrete represenations must be ensured.
The intuition is to capture a 'represented-as' relation in the For example in the unix shell, many different tools & utilities
structure of a type term. Formally, we introduce a new type coexist. Depending on the application domain, each of them will
constructor, called the *ladder type*, written as `T1 ~ T2`, where potentially make use of different representational forms for the same
`T1` and `T2` are types. The term `T1 ~ T2` can be read by `T1 is abstract concepts. E.g. for the concept 'natural number', many
represented as T2`. representations do exist, e.g. with variation over radices,
endianness, digit encoding etc.
Intuitively, *ladder types* provide a way to distinguish between
multiple *concrete representations* of the same *abstract / conceptual
This can be particularly useful in contexts where no standard type*, by capturing the *represented-as* of layered data formats in
representational forms exist, e.g. in the unix shell where many the structure of type-terms. Formally, we introduce a new type
different tools & utilities come into contact, each making use of constructor, called the *ladder type*, written `T1 ~ T2`, where `T1`
potentially different representational forms, depending on the and `T2` are types. The type-term `T1 ~ T2` then expresses the
application domain. abstract type of `T1` being represented in terms of the concrete type
`T2`, which can be read by "`T1` represented as `T2`".
Trying to introduce a global type system at IPC level would result in
naming conflicts: Abstract concepts like 'natural number' do not have
any standard representational form, as it is the case with other more
contained languages. With a simple type name like this it is not clear
which of the many equivalent representations is required, e.g. for an
*integer number* variations over radices, endianness, digit encoding
etc. exist.
Usually, many layers of abstraction are built ontop of each other,
where higher-level data types are encoded into lower-level data types,
forming a chain of embeddings from concept to `rock bottom' of byte
streams.
#### Example #### Example
@ -42,7 +38,7 @@ each represented as unix-timestamp written as decimal number in
big-endian, encoded as UTF-8 string. big-endian, encoded as UTF-8 string.
``` ```
<Seq Timepoint <Seq TimePoint
~<TimeSince UnixEpoch> ~<TimeSince UnixEpoch>
~<Duration Seconds> ~<Duration Seconds>
~ ~
@ -66,15 +62,18 @@ this:
```rust ```rust
use laddertypes::*; use laddertypes::*;
let mut dict = TypeDict::new(); fn main() {
let mut dict = TypeDict::new();
let t1 = dict.parse("<A B~X C>").expect("couldnt parse typeterm"); let t1 = dict.parse("<A B~X C>").expect("couldnt parse typeterm");
let t2 = dict.parse("<<A B~X> C>").expect("couldnt parse typeterm"); let t2 = dict.parse("<<A B~X> C>").expect("couldnt parse typeterm");
assert_eq!( t1.clone().curry(), t2 ); assert_eq!( t1.clone().curry(), t2 );
assert_eq!( t1, t2.clone().decurry() ); assert_eq!( t1, t2.clone().decurry() );
}
``` ```
### License
## License
[GPLv3](COPYING) [GPLv3](COPYING)

View file

@ -4,9 +4,11 @@ pub mod dict;
pub mod term; pub mod term;
pub mod lexer; pub mod lexer;
pub mod parser; pub mod parser;
pub mod unparser;
pub mod curry; pub mod curry;
pub mod lnf; pub mod lnf;
pub mod subtype; pub mod subtype;
pub mod unification;
#[cfg(test)] #[cfg(test)]
mod test; mod test;
@ -14,5 +16,6 @@ mod test;
pub use { pub use {
dict::*, dict::*,
term::*, term::*,
unification::*
}; };

View file

@ -45,7 +45,7 @@ impl TypeTerm {
} }
TypeTerm::App(args) => { TypeTerm::App(args) => {
let mut args_iter = args.into_iter(); let args_iter = args.into_iter();
new_ladder.push( TypeTerm::App(vec![]) ); new_ladder.push( TypeTerm::App(vec![]) );

View file

@ -19,20 +19,20 @@ impl TypeTerm {
None None
} }
pub fn is_syntactic_subtype_of(&self, expected_type: &TypeTerm) -> Result<usize, Option<(usize, usize)>> { pub fn is_syntactic_subtype_of(&self, expected_type: &TypeTerm) -> Result<usize, (usize, usize)> {
if let Some((first_match, provided_type)) = self.is_semantic_subtype_of( expected_type ) { if let Some((first_match, provided_type)) = self.is_semantic_subtype_of( expected_type ) {
let provided_lnf = provided_type.get_lnf_vec(); let provided_lnf = provided_type.get_lnf_vec();
let expected_lnf = expected_type.clone().get_lnf_vec(); let expected_lnf = expected_type.clone().get_lnf_vec();
for i in 0 .. usize::min( provided_lnf.len(), expected_lnf.len() ) { for i in 0 .. usize::min( provided_lnf.len(), expected_lnf.len() ) {
if provided_lnf[i] != expected_lnf[i] { if provided_lnf[i] != expected_lnf[i] {
return Err(Some((first_match, first_match+i))) return Err((first_match, first_match+i))
} }
} }
Ok(first_match) Ok(first_match)
} else { } else {
Err(None) Err((0,0))
} }
} }
@ -43,7 +43,7 @@ impl TypeTerm {
t.is_semantic_subtype_of(self) t.is_semantic_subtype_of(self)
} }
pub fn is_syntactic_supertype_of(&self, t: &TypeTerm) -> Result<usize, Option<(usize, usize)>> { pub fn is_syntactic_supertype_of(&self, t: &TypeTerm) -> Result<usize, (usize, usize)> {
t.is_syntactic_subtype_of(self) t.is_syntactic_subtype_of(self)
} }
} }

View file

@ -78,6 +78,36 @@ impl TypeTerm {
pub fn char_arg(&mut self, c: char) -> &mut Self { pub fn char_arg(&mut self, c: char) -> &mut Self {
self.arg(TypeTerm::Char(c)) self.arg(TypeTerm::Char(c))
} }
/// recursively apply substitution to all subterms,
/// which will replace all occurences of variables which map
/// some type-term in `subst`
pub fn apply_substitution(
&mut self,
subst: &impl Fn(&TypeID) -> Option<TypeTerm>
) -> &mut Self {
match self {
TypeTerm::TypeID(typid) => {
if let Some(t) = subst(typid) {
*self = t;
}
}
TypeTerm::Ladder(rungs) => {
for r in rungs.iter_mut() {
r.apply_substitution(subst);
}
}
TypeTerm::App(args) => {
for r in args.iter_mut() {
r.apply_substitution(subst);
}
}
_ => {}
}
self
}
} }
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ //<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\

View file

@ -4,4 +4,6 @@ pub mod parser;
pub mod curry; pub mod curry;
pub mod lnf; pub mod lnf;
pub mod subtype; pub mod subtype;
pub mod substitution;
pub mod unification;

View file

@ -7,9 +7,18 @@ use {
#[test] #[test]
fn test_parser_id() { fn test_parser_id() {
let mut dict = TypeDict::new();
dict.add_varname("T".into());
assert_eq!(
Ok(TypeTerm::TypeID(TypeID::Var(0))),
dict.parse("T")
);
assert_eq!( assert_eq!(
Ok(TypeTerm::TypeID(TypeID::Fun(0))), Ok(TypeTerm::TypeID(TypeID::Fun(0))),
TypeDict::new().parse("A") dict.parse("A")
); );
} }

32
src/test/substitution.rs Normal file
View file

@ -0,0 +1,32 @@
use {
crate::{dict::*, term::*},
std::iter::FromIterator
};
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
#[test]
fn test_subst() {
let mut dict = TypeDict::new();
let mut σ = std::collections::HashMap::new();
// T -->
σ.insert
(dict.add_varname(String::from("T")),
dict.parse("").unwrap());
// U --> <Seq Char>
σ.insert
(dict.add_varname(String::from("U")),
dict.parse("<Seq Char>").unwrap());
assert_eq!(
dict.parse("<Seq T~U>").unwrap()
.apply_substitution(&|typid|{ σ.get(typid).cloned() }).clone(),
dict.parse("<Seq ~<Seq Char>>").unwrap()
);
}

View file

@ -62,7 +62,7 @@ fn test_syntactic_subtype() {
.is_syntactic_subtype_of( .is_syntactic_subtype_of(
&dict.parse("C~G").expect("parse errror") &dict.parse("C~G").expect("parse errror")
), ),
Err(Some((2,3))) Err((2,3))
); );
assert_eq!( assert_eq!(
@ -70,7 +70,7 @@ fn test_syntactic_subtype() {
.is_syntactic_subtype_of( .is_syntactic_subtype_of(
&dict.parse("G~F~K").expect("parse errror") &dict.parse("G~F~K").expect("parse errror")
), ),
Err(None) Err((0,0))
); );
assert_eq!( assert_eq!(

118
src/test/unification.rs Normal file
View file

@ -0,0 +1,118 @@
use {
crate::{dict::*, term::*, unification::*},
std::iter::FromIterator
};
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
fn test_unify(ts1: &str, ts2: &str, expect_unificator: bool) {
let mut dict = TypeDict::new();
dict.add_varname(String::from("T"));
dict.add_varname(String::from("U"));
dict.add_varname(String::from("V"));
dict.add_varname(String::from("W"));
let mut t1 = dict.parse(ts1).unwrap();
let mut t2 = dict.parse(ts2).unwrap();
let σ = crate::unify( &t1, &t2 );
if expect_unificator {
assert!(σ.is_ok());
let σ = σ.unwrap();
assert_eq!(
t1.apply_substitution(&|v| σ.get(v).cloned()),
t2.apply_substitution(&|v| σ.get(v).cloned())
);
} else {
assert!(! σ.is_ok());
}
}
#[test]
fn test_unification_error() {
let mut dict = TypeDict::new();
dict.add_varname(String::from("T"));
assert_eq!(
crate::unify(
&dict.parse("<A T>").unwrap(),
&dict.parse("<B T>").unwrap()
),
Err(UnificationError {
addr: vec![0],
t1: dict.parse("A").unwrap(),
t2: dict.parse("B").unwrap()
})
);
assert_eq!(
crate::unify(
&dict.parse("<V <U A> T>").unwrap(),
&dict.parse("<V <U B> T>").unwrap()
),
Err(UnificationError {
addr: vec![1, 1],
t1: dict.parse("A").unwrap(),
t2: dict.parse("B").unwrap()
})
);
}
#[test]
fn test_unification() {
test_unify("A", "A", true);
test_unify("A", "B", false);
test_unify("<Seq T>", "<Seq Ascii~Char>", true);
test_unify("<Seq T>", "<U Char>", true);
test_unify(
"<Seq Path~<Seq Char>>~<SepSeq Char '\n'>~<Seq Char>",
"<Seq T~<Seq Char>>~<SepSeq Char '\n'>~<Seq Char>",
true
);
let mut dict = TypeDict::new();
dict.add_varname(String::from("T"));
dict.add_varname(String::from("U"));
dict.add_varname(String::from("V"));
dict.add_varname(String::from("W"));
assert_eq!(
UnificationProblem::new(vec![
(dict.parse("U").unwrap(), dict.parse("<Seq Char>").unwrap()),
(dict.parse("T").unwrap(), dict.parse("<Seq U>").unwrap()),
]).solve(),
Ok(
vec![
// T
(TypeID::Var(0), dict.parse("<Seq <Seq Char>>").unwrap()),
// U
(TypeID::Var(1), dict.parse("<Seq Char>").unwrap())
].into_iter().collect()
)
);
assert_eq!(
UnificationProblem::new(vec![
(dict.parse("<Seq T>").unwrap(), dict.parse("<Seq W~<Seq Char>>").unwrap()),
(dict.parse("<Seq >").unwrap(), dict.parse("<Seq W>").unwrap()),
]).solve(),
Ok(
vec![
// W
(TypeID::Var(3), dict.parse("").unwrap()),
// T
(TypeID::Var(0), dict.parse("~<Seq Char>").unwrap())
].into_iter().collect()
)
);
}

100
src/unification.rs Normal file
View file

@ -0,0 +1,100 @@
use {
std::collections::HashMap,
crate::{term::*, dict::*}
};
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
#[derive(Clone, Eq, PartialEq, Debug)]
pub struct UnificationError {
pub addr: Vec<usize>,
pub t1: TypeTerm,
pub t2: TypeTerm
}
pub struct UnificationProblem {
eqs: Vec<(TypeTerm, TypeTerm, Vec<usize>)>,
σ: HashMap<TypeID, TypeTerm>
}
impl UnificationProblem {
pub fn new(eqs: Vec<(TypeTerm, TypeTerm)>) -> Self {
UnificationProblem {
eqs: eqs.iter().map(|(lhs,rhs)| (lhs.clone(),rhs.clone(),vec![])).collect(),
σ: HashMap::new()
}
}
pub fn eval_equation(&mut self, lhs: TypeTerm, rhs: TypeTerm, addr: Vec<usize>) -> Result<(), UnificationError> {
match (lhs.clone(), rhs.clone()) {
(TypeTerm::TypeID(TypeID::Var(varid)), t) |
(t, TypeTerm::TypeID(TypeID::Var(varid))) => {
self.σ.insert(TypeID::Var(varid), t.clone());
// update all values in substitution
let mut new_σ = HashMap::new();
for (v, tt) in self.σ.iter() {
let mut tt = tt.clone();
tt.apply_substitution(&|v| self.σ.get(v).cloned());
new_σ.insert(v.clone(), tt);
}
self.σ = new_σ;
Ok(())
}
(TypeTerm::TypeID(a1), TypeTerm::TypeID(a2)) => {
if a1 == a2 { Ok(()) } else { Err(UnificationError{ addr, t1: lhs, t2: rhs}) }
}
(TypeTerm::Num(n1), TypeTerm::Num(n2)) => {
if n1 == n2 { Ok(()) } else { Err(UnificationError{ addr, t1: lhs, t2: rhs}) }
}
(TypeTerm::Char(c1), TypeTerm::Char(c2)) => {
if c1 == c2 { Ok(()) } else { Err(UnificationError{ addr, t1: lhs, t2: rhs}) }
}
(TypeTerm::Ladder(a1), TypeTerm::Ladder(a2)) |
(TypeTerm::App(a1), TypeTerm::App(a2)) => {
if a1.len() == a2.len() {
for (i, (x, y)) in a1.iter().cloned().zip(a2.iter().cloned()).enumerate() {
let mut new_addr = addr.clone();
new_addr.push(i);
self.eqs.push((x, y, new_addr));
}
Ok(())
} else {
Err(UnificationError{ addr, t1: lhs, t2: rhs })
}
}
(TypeTerm::Ladder(l1), TypeTerm::Ladder(l2)) => {
Err(UnificationError{ addr, t1: lhs, t2: rhs })
}
_ => Err(UnificationError{ addr, t1: lhs, t2: rhs})
}
}
pub fn solve(mut self) -> Result<HashMap<TypeID, TypeTerm>, UnificationError> {
while self.eqs.len() > 0 {
while let Some( (mut lhs,mut rhs,addr) ) = self.eqs.pop() {
lhs.apply_substitution(&|v| self.σ.get(v).cloned());
rhs.apply_substitution(&|v| self.σ.get(v).cloned());
self.eval_equation(lhs, rhs, addr)?;
}
}
Ok(self.σ)
}
}
pub fn unify(
t1: &TypeTerm,
t2: &TypeTerm
) -> Result<HashMap<TypeID, TypeTerm>, UnificationError> {
let mut unification = UnificationProblem::new(vec![ (t1.clone(), t2.clone()) ]);
unification.solve()
}
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\

47
src/unparser.rs Normal file
View file

@ -0,0 +1,47 @@
use crate::{dict::*, term::*};
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
impl TypeDict {
pub fn unparse(&self, t: &TypeTerm) -> String {
match t {
TypeTerm::TypeID(id) => self.get_typename(id).unwrap(),
TypeTerm::Num(n) => format!("{}", n),
TypeTerm::Char(c) => match c {
'\0' => "'\\0'".into(),
'\n' => "'\\n'".into(),
'\t' => "'\\t'".into(),
'\'' => "'\\''".into(),
c => format!("'{}'", c)
},
TypeTerm::Ladder(rungs) => {
let mut s = String::new();
let mut first = true;
for r in rungs.iter() {
if !first {
s.push('~');
}
first = false;
s.push_str(&mut self.unparse(r));
}
s
}
TypeTerm::App(args) => {
let mut s = String::new();
s.push('<');
let mut first = true;
for r in args.iter() {
if !first {
s.push(' ');
}
first = false;
s.push_str(&mut self.unparse(r));
}
s.push('>');
s
}
}
}
}
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\