Compare commits

...

10 commits

13 changed files with 386 additions and 42 deletions

4
.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
target/
*~
\#*
.\#*

View file

@ -5,35 +5,31 @@ Rust Implementation of Ladder-Types (parsing, unification, rewriting, etc)
## Ladder Types
*Ladder Types* provide a way to distinguish between multiple *concrete
representations* of the same *abstract / conceptual type*.
In order to implement complex datastructures and algorithms, usually
many layers of abstraction are built ontop of each other.
Consequently higher-level data types are encoded into lower-level data
types, forming a chain of embeddings from concept to `rock bottom' of
byte streams. While a high-level type makes claims about the
semantics of objects of that type, high-level types are ambiguous in
regard to their concrete syntactical representation or memory
layout. However for compositions to be type-safe, compatibility of
concrete represenations must be ensured.
The intuition is to capture a 'represented-as' relation in the
structure of a type term. Formally, we introduce a new type
constructor, called the *ladder type*, written as `T1 ~ T2`, where
`T1` and `T2` are types. The term `T1 ~ T2` can be read by `T1 is
represented as T2`.
For example in the unix shell, many different tools & utilities
coexist. Depending on the application domain, each of them will
potentially make use of different representational forms for the same
abstract concepts. E.g. for the concept 'natural number', many
representations do exist, e.g. with variation over radices,
endianness, digit encoding etc.
This can be particularly useful in contexts where no standard
representational forms exist, e.g. in the unix shell where many
different tools & utilities come into contact, each making use of
potentially different representational forms, depending on the
application domain.
Trying to introduce a global type system at IPC level would result in
naming conflicts: Abstract concepts like 'natural number' do not have
any standard representational form, as it is the case with other more
contained languages. With a simple type name like this it is not clear
which of the many equivalent representations is required, e.g. for an
*integer number* variations over radices, endianness, digit encoding
etc. exist.
Usually, many layers of abstraction are built ontop of each other,
where higher-level data types are encoded into lower-level data types,
forming a chain of embeddings from concept to `rock bottom' of byte
streams.
Intuitively, *ladder types* provide a way to distinguish between
multiple *concrete representations* of the same *abstract / conceptual
type*, by capturing the *represented-as* of layered data formats in
the structure of type-terms. Formally, we introduce a new type
constructor, called the *ladder type*, written `T1 ~ T2`, where `T1`
and `T2` are types. The type-term `T1 ~ T2` then expresses the
abstract type of `T1` being represented in terms of the concrete type
`T2`, which can be read by "`T1` represented as `T2`".
#### Example
@ -42,7 +38,7 @@ each represented as unix-timestamp written as decimal number in
big-endian, encoded as UTF-8 string.
```
<Seq Timepoint
<Seq TimePoint
~<TimeSince UnixEpoch>
~<Duration Seconds>
~
@ -66,15 +62,18 @@ this:
```rust
use laddertypes::*;
let mut dict = TypeDict::new();
fn main() {
let mut dict = TypeDict::new();
let t1 = dict.parse("<A B~X C>").expect("couldnt parse typeterm");
let t2 = dict.parse("<<A B~X> C>").expect("couldnt parse typeterm");
let t1 = dict.parse("<A B~X C>").expect("couldnt parse typeterm");
let t2 = dict.parse("<<A B~X> C>").expect("couldnt parse typeterm");
assert_eq!( t1.clone().curry(), t2 );
assert_eq!( t1, t2.clone().decurry() );
assert_eq!( t1.clone().curry(), t2 );
assert_eq!( t1, t2.clone().decurry() );
}
```
### License
## License
[GPLv3](COPYING)

View file

@ -4,9 +4,11 @@ pub mod dict;
pub mod term;
pub mod lexer;
pub mod parser;
pub mod unparser;
pub mod curry;
pub mod lnf;
pub mod subtype;
pub mod unification;
#[cfg(test)]
mod test;
@ -14,5 +16,6 @@ mod test;
pub use {
dict::*,
term::*,
unification::*
};

View file

@ -45,7 +45,7 @@ impl TypeTerm {
}
TypeTerm::App(args) => {
let mut args_iter = args.into_iter();
let args_iter = args.into_iter();
new_ladder.push( TypeTerm::App(vec![]) );

View file

@ -19,20 +19,20 @@ impl TypeTerm {
None
}
pub fn is_syntactic_subtype_of(&self, expected_type: &TypeTerm) -> Result<usize, Option<(usize, usize)>> {
pub fn is_syntactic_subtype_of(&self, expected_type: &TypeTerm) -> Result<usize, (usize, usize)> {
if let Some((first_match, provided_type)) = self.is_semantic_subtype_of( expected_type ) {
let provided_lnf = provided_type.get_lnf_vec();
let expected_lnf = expected_type.clone().get_lnf_vec();
for i in 0 .. usize::min( provided_lnf.len(), expected_lnf.len() ) {
if provided_lnf[i] != expected_lnf[i] {
return Err(Some((first_match, first_match+i)))
return Err((first_match, first_match+i))
}
}
Ok(first_match)
} else {
Err(None)
Err((0,0))
}
}
@ -43,7 +43,7 @@ impl TypeTerm {
t.is_semantic_subtype_of(self)
}
pub fn is_syntactic_supertype_of(&self, t: &TypeTerm) -> Result<usize, Option<(usize, usize)>> {
pub fn is_syntactic_supertype_of(&self, t: &TypeTerm) -> Result<usize, (usize, usize)> {
t.is_syntactic_subtype_of(self)
}
}

View file

@ -78,6 +78,36 @@ impl TypeTerm {
pub fn char_arg(&mut self, c: char) -> &mut Self {
self.arg(TypeTerm::Char(c))
}
/// recursively apply substitution to all subterms,
/// which will replace all occurences of variables which map
/// some type-term in `subst`
pub fn apply_substitution(
&mut self,
subst: &impl Fn(&TypeID) -> Option<TypeTerm>
) -> &mut Self {
match self {
TypeTerm::TypeID(typid) => {
if let Some(t) = subst(typid) {
*self = t;
}
}
TypeTerm::Ladder(rungs) => {
for r in rungs.iter_mut() {
r.apply_substitution(subst);
}
}
TypeTerm::App(args) => {
for r in args.iter_mut() {
r.apply_substitution(subst);
}
}
_ => {}
}
self
}
}
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\

View file

@ -4,4 +4,6 @@ pub mod parser;
pub mod curry;
pub mod lnf;
pub mod subtype;
pub mod substitution;
pub mod unification;

View file

@ -7,9 +7,18 @@ use {
#[test]
fn test_parser_id() {
let mut dict = TypeDict::new();
dict.add_varname("T".into());
assert_eq!(
Ok(TypeTerm::TypeID(TypeID::Var(0))),
dict.parse("T")
);
assert_eq!(
Ok(TypeTerm::TypeID(TypeID::Fun(0))),
TypeDict::new().parse("A")
dict.parse("A")
);
}

32
src/test/substitution.rs Normal file
View file

@ -0,0 +1,32 @@
use {
crate::{dict::*, term::*},
std::iter::FromIterator
};
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
#[test]
fn test_subst() {
let mut dict = TypeDict::new();
let mut σ = std::collections::HashMap::new();
// T -->
σ.insert
(dict.add_varname(String::from("T")),
dict.parse("").unwrap());
// U --> <Seq Char>
σ.insert
(dict.add_varname(String::from("U")),
dict.parse("<Seq Char>").unwrap());
assert_eq!(
dict.parse("<Seq T~U>").unwrap()
.apply_substitution(&|typid|{ σ.get(typid).cloned() }).clone(),
dict.parse("<Seq ~<Seq Char>>").unwrap()
);
}

View file

@ -62,7 +62,7 @@ fn test_syntactic_subtype() {
.is_syntactic_subtype_of(
&dict.parse("C~G").expect("parse errror")
),
Err(Some((2,3)))
Err((2,3))
);
assert_eq!(
@ -70,7 +70,7 @@ fn test_syntactic_subtype() {
.is_syntactic_subtype_of(
&dict.parse("G~F~K").expect("parse errror")
),
Err(None)
Err((0,0))
);
assert_eq!(

118
src/test/unification.rs Normal file
View file

@ -0,0 +1,118 @@
use {
crate::{dict::*, term::*, unification::*},
std::iter::FromIterator
};
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
fn test_unify(ts1: &str, ts2: &str, expect_unificator: bool) {
let mut dict = TypeDict::new();
dict.add_varname(String::from("T"));
dict.add_varname(String::from("U"));
dict.add_varname(String::from("V"));
dict.add_varname(String::from("W"));
let mut t1 = dict.parse(ts1).unwrap();
let mut t2 = dict.parse(ts2).unwrap();
let σ = crate::unify( &t1, &t2 );
if expect_unificator {
assert!(σ.is_ok());
let σ = σ.unwrap();
assert_eq!(
t1.apply_substitution(&|v| σ.get(v).cloned()),
t2.apply_substitution(&|v| σ.get(v).cloned())
);
} else {
assert!(! σ.is_ok());
}
}
#[test]
fn test_unification_error() {
let mut dict = TypeDict::new();
dict.add_varname(String::from("T"));
assert_eq!(
crate::unify(
&dict.parse("<A T>").unwrap(),
&dict.parse("<B T>").unwrap()
),
Err(UnificationError {
addr: vec![0],
t1: dict.parse("A").unwrap(),
t2: dict.parse("B").unwrap()
})
);
assert_eq!(
crate::unify(
&dict.parse("<V <U A> T>").unwrap(),
&dict.parse("<V <U B> T>").unwrap()
),
Err(UnificationError {
addr: vec![1, 1],
t1: dict.parse("A").unwrap(),
t2: dict.parse("B").unwrap()
})
);
}
#[test]
fn test_unification() {
test_unify("A", "A", true);
test_unify("A", "B", false);
test_unify("<Seq T>", "<Seq Ascii~Char>", true);
test_unify("<Seq T>", "<U Char>", true);
test_unify(
"<Seq Path~<Seq Char>>~<SepSeq Char '\n'>~<Seq Char>",
"<Seq T~<Seq Char>>~<SepSeq Char '\n'>~<Seq Char>",
true
);
let mut dict = TypeDict::new();
dict.add_varname(String::from("T"));
dict.add_varname(String::from("U"));
dict.add_varname(String::from("V"));
dict.add_varname(String::from("W"));
assert_eq!(
UnificationProblem::new(vec![
(dict.parse("U").unwrap(), dict.parse("<Seq Char>").unwrap()),
(dict.parse("T").unwrap(), dict.parse("<Seq U>").unwrap()),
]).solve(),
Ok(
vec![
// T
(TypeID::Var(0), dict.parse("<Seq <Seq Char>>").unwrap()),
// U
(TypeID::Var(1), dict.parse("<Seq Char>").unwrap())
].into_iter().collect()
)
);
assert_eq!(
UnificationProblem::new(vec![
(dict.parse("<Seq T>").unwrap(), dict.parse("<Seq W~<Seq Char>>").unwrap()),
(dict.parse("<Seq >").unwrap(), dict.parse("<Seq W>").unwrap()),
]).solve(),
Ok(
vec![
// W
(TypeID::Var(3), dict.parse("").unwrap()),
// T
(TypeID::Var(0), dict.parse("~<Seq Char>").unwrap())
].into_iter().collect()
)
);
}

100
src/unification.rs Normal file
View file

@ -0,0 +1,100 @@
use {
std::collections::HashMap,
crate::{term::*, dict::*}
};
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
#[derive(Clone, Eq, PartialEq, Debug)]
pub struct UnificationError {
pub addr: Vec<usize>,
pub t1: TypeTerm,
pub t2: TypeTerm
}
pub struct UnificationProblem {
eqs: Vec<(TypeTerm, TypeTerm, Vec<usize>)>,
σ: HashMap<TypeID, TypeTerm>
}
impl UnificationProblem {
pub fn new(eqs: Vec<(TypeTerm, TypeTerm)>) -> Self {
UnificationProblem {
eqs: eqs.iter().map(|(lhs,rhs)| (lhs.clone(),rhs.clone(),vec![])).collect(),
σ: HashMap::new()
}
}
pub fn eval_equation(&mut self, lhs: TypeTerm, rhs: TypeTerm, addr: Vec<usize>) -> Result<(), UnificationError> {
match (lhs.clone(), rhs.clone()) {
(TypeTerm::TypeID(TypeID::Var(varid)), t) |
(t, TypeTerm::TypeID(TypeID::Var(varid))) => {
self.σ.insert(TypeID::Var(varid), t.clone());
// update all values in substitution
let mut new_σ = HashMap::new();
for (v, tt) in self.σ.iter() {
let mut tt = tt.clone();
tt.apply_substitution(&|v| self.σ.get(v).cloned());
new_σ.insert(v.clone(), tt);
}
self.σ = new_σ;
Ok(())
}
(TypeTerm::TypeID(a1), TypeTerm::TypeID(a2)) => {
if a1 == a2 { Ok(()) } else { Err(UnificationError{ addr, t1: lhs, t2: rhs}) }
}
(TypeTerm::Num(n1), TypeTerm::Num(n2)) => {
if n1 == n2 { Ok(()) } else { Err(UnificationError{ addr, t1: lhs, t2: rhs}) }
}
(TypeTerm::Char(c1), TypeTerm::Char(c2)) => {
if c1 == c2 { Ok(()) } else { Err(UnificationError{ addr, t1: lhs, t2: rhs}) }
}
(TypeTerm::Ladder(a1), TypeTerm::Ladder(a2)) |
(TypeTerm::App(a1), TypeTerm::App(a2)) => {
if a1.len() == a2.len() {
for (i, (x, y)) in a1.iter().cloned().zip(a2.iter().cloned()).enumerate() {
let mut new_addr = addr.clone();
new_addr.push(i);
self.eqs.push((x, y, new_addr));
}
Ok(())
} else {
Err(UnificationError{ addr, t1: lhs, t2: rhs })
}
}
(TypeTerm::Ladder(l1), TypeTerm::Ladder(l2)) => {
Err(UnificationError{ addr, t1: lhs, t2: rhs })
}
_ => Err(UnificationError{ addr, t1: lhs, t2: rhs})
}
}
pub fn solve(mut self) -> Result<HashMap<TypeID, TypeTerm>, UnificationError> {
while self.eqs.len() > 0 {
while let Some( (mut lhs,mut rhs,addr) ) = self.eqs.pop() {
lhs.apply_substitution(&|v| self.σ.get(v).cloned());
rhs.apply_substitution(&|v| self.σ.get(v).cloned());
self.eval_equation(lhs, rhs, addr)?;
}
}
Ok(self.σ)
}
}
pub fn unify(
t1: &TypeTerm,
t2: &TypeTerm
) -> Result<HashMap<TypeID, TypeTerm>, UnificationError> {
let mut unification = UnificationProblem::new(vec![ (t1.clone(), t2.clone()) ]);
unification.solve()
}
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\

47
src/unparser.rs Normal file
View file

@ -0,0 +1,47 @@
use crate::{dict::*, term::*};
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
impl TypeDict {
pub fn unparse(&self, t: &TypeTerm) -> String {
match t {
TypeTerm::TypeID(id) => self.get_typename(id).unwrap(),
TypeTerm::Num(n) => format!("{}", n),
TypeTerm::Char(c) => match c {
'\0' => "'\\0'".into(),
'\n' => "'\\n'".into(),
'\t' => "'\\t'".into(),
'\'' => "'\\''".into(),
c => format!("'{}'", c)
},
TypeTerm::Ladder(rungs) => {
let mut s = String::new();
let mut first = true;
for r in rungs.iter() {
if !first {
s.push('~');
}
first = false;
s.push_str(&mut self.unparse(r));
}
s
}
TypeTerm::App(args) => {
let mut s = String::new();
s.push('<');
let mut first = true;
for r in args.iter() {
if !first {
s.push(' ');
}
first = false;
s.push_str(&mut self.unparse(r));
}
s.push('>');
s
}
}
}
}
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\