initial parser implementation

This commit is contained in:
Michael Sippel 2023-10-02 01:28:22 +02:00
parent 8ec95dfbed
commit 8fa38ca7d3
Signed by: senvas
GPG key ID: F96CF119C34B64A6
4 changed files with 348 additions and 3 deletions

View file

@ -1,8 +1,9 @@
pub mod lexer;
pub mod bimap;
pub mod dict;
pub mod term;
pub mod lexer;
pub mod parser;
#[cfg(test)]
mod test;

136
src/parser.rs Normal file
View file

@ -0,0 +1,136 @@
use {
std::iter::Peekable,
crate::{
dict::*,
term::*,
lexer::*
}
};
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum ParseError {
LexError(LexError),
UnexpectedClose,
UnexpectedLadder,
UnexpectedEnd,
UnexpectedToken
}
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
impl std::str::FromStr for TypeTerm {
type Err = ParseError;
fn from_str(s : &str) -> Result<Self, Self::Err> {
// creating a new context every time is not that useful..
let mut dict = TypeDict::new();
dict.parse(&mut LadderTypeLexer::from(s.chars()).peekable())
}
}
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
impl TypeDict {
fn parse_app<It>( &mut self, tokens: &mut Peekable<LadderTypeLexer<It>> ) -> Result<TypeTerm, ParseError>
where It: Iterator<Item = char>
{
let mut args = Vec::new();
while let Some(tok) = tokens.peek() {
match tok {
Ok(LadderTypeToken::Close) => {
tokens.next();
return Ok(TypeTerm::App(args));
}
_ => {
match self.parse_partial(tokens) {
Ok(a) => { args.push(a); }
Err(err) => { return Err(err); }
}
}
}
}
Err(ParseError::UnexpectedEnd)
}
fn parse_rung<It>( &mut self, tokens: &mut Peekable<LadderTypeLexer<It>> ) -> Result<TypeTerm, ParseError>
where It: Iterator<Item = char>
{
match tokens.next() {
Some(Ok(LadderTypeToken::Open)) => self.parse_app(tokens),
Some(Ok(LadderTypeToken::Close)) => Err(ParseError::UnexpectedClose),
Some(Ok(LadderTypeToken::Ladder)) => Err(ParseError::UnexpectedLadder),
Some(Ok(LadderTypeToken::Symbol(s))) =>
Ok(TypeTerm::TypeID(
if let Some(tyid) = self.get_typeid(&s) {
tyid
} else {
self.add_typename(s)
}
)),
Some(Ok(LadderTypeToken::Char(c))) => Ok(TypeTerm::Char(c)),
Some(Ok(LadderTypeToken::Num(n))) => Ok(TypeTerm::Num(n)),
Some(Err(err)) => Err(ParseError::LexError(err)),
None => Err(ParseError::UnexpectedEnd)
}
}
fn parse_partial<It>( &mut self, tokens: &mut Peekable<LadderTypeLexer<It>> ) -> Result<TypeTerm, ParseError>
where It: Iterator<Item = char>
{
let mut rungs = Vec::new();
match self.parse_rung(tokens) {
Ok(t) => { rungs.push(t); }
Err(err) => { return Err(err); }
}
while let Some(tok) = tokens.peek() {
match tok {
Ok(LadderTypeToken::Ladder) => {
tokens.next();
if rungs.len() > 0 {
match self.parse_rung(tokens) {
Ok(t) => { rungs.push(t); }
Err(err) => { return Err(err); }
}
} else {
return Err(ParseError::UnexpectedLadder);
}
}
Err(lexerr) => {
return Err(ParseError::LexError(lexerr.clone()));
}
_ => {
break;
}
}
}
match rungs.len() {
0 => Err(ParseError::UnexpectedEnd),
1 => Ok(rungs[0].clone()),
_ => Ok(TypeTerm::Ladder(rungs)),
}
}
pub fn parse<It>( &mut self, tokens: &mut Peekable<LadderTypeLexer<It>> ) -> Result<TypeTerm, ParseError>
where It: Iterator<Item = char>
{
match self.parse_partial(tokens) {
Ok(t) => {
if let Some(tok) = tokens.peek() {
Err(ParseError::UnexpectedToken)
} else {
Ok(t)
}
}
Err(err) => Err(err)
}
}
}
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\

View file

@ -54,6 +54,23 @@ impl TypeTerm {
self
}
pub fn repr_as(&mut self, t: impl Into<TypeTerm>) -> &mut Self {
match self {
TypeTerm::Ladder(rungs) => {
rungs.push(t.into());
}
_ => {
*self = TypeTerm::Ladder(vec![
self.clone(),
t.into()
])
}
}
self
}
pub fn num_arg(&mut self, v: i64) -> &mut Self {
self.arg(TypeTerm::Num(v))
}

View file

@ -1,7 +1,198 @@
use {
crate::{term::*, dict::*, parser::*},
std::str::FromStr
};
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
#[test]
fn test_parser() {
// todo
fn test_parser_id() {
assert_eq!(
Ok(TypeTerm::TypeID(TypeID::Fun(0))),
TypeTerm::from_str("A")
);
}
#[test]
fn test_parser_num() {
assert_eq!(
Ok(TypeTerm::Num(1234)),
TypeTerm::from_str("1234")
);
}
#[test]
fn test_parser_char() {
assert_eq!(
Ok(TypeTerm::Char('x')),
TypeTerm::from_str("'x'")
);
}
#[test]
fn test_parser_app() {
assert_eq!(
TypeTerm::from_str("<A B>"),
Ok(TypeTerm::App(vec![
TypeTerm::TypeID(TypeID::Fun(0)),
TypeTerm::TypeID(TypeID::Fun(1)),
]))
);
assert_eq!(
TypeTerm::from_str("<A B C>"),
Ok(TypeTerm::App(vec![
TypeTerm::TypeID(TypeID::Fun(0)),
TypeTerm::TypeID(TypeID::Fun(1)),
TypeTerm::TypeID(TypeID::Fun(2)),
]))
);
}
#[test]
fn test_parser_unexpected_close() {
assert_eq!(
TypeTerm::from_str(">"),
Err(ParseError::UnexpectedClose)
);
}
#[test]
fn test_parser_unexpected_token() {
assert_eq!(
TypeTerm::from_str("A B"),
Err(ParseError::UnexpectedToken)
);
}
#[test]
fn test_parser_ladder() {
assert_eq!(
TypeTerm::from_str("A~B"),
Ok(TypeTerm::Ladder(vec![
TypeTerm::TypeID(TypeID::Fun(0)),
TypeTerm::TypeID(TypeID::Fun(1)),
]))
);
assert_eq!(
TypeTerm::from_str("A~B~C"),
Ok(TypeTerm::Ladder(vec![
TypeTerm::TypeID(TypeID::Fun(0)),
TypeTerm::TypeID(TypeID::Fun(1)),
TypeTerm::TypeID(TypeID::Fun(2)),
]))
);
}
#[test]
fn test_parser_ladder_outside() {
assert_eq!(
TypeTerm::from_str("<A B>~C"),
Ok(TypeTerm::Ladder(vec![
TypeTerm::App(vec![
TypeTerm::TypeID(TypeID::Fun(0)),
TypeTerm::TypeID(TypeID::Fun(1)),
]),
TypeTerm::TypeID(TypeID::Fun(2)),
]))
);
}
#[test]
fn test_parser_ladder_inside() {
assert_eq!(
TypeTerm::from_str("<A B~C>"),
Ok(TypeTerm::App(vec![
TypeTerm::TypeID(TypeID::Fun(0)),
TypeTerm::Ladder(vec![
TypeTerm::TypeID(TypeID::Fun(1)),
TypeTerm::TypeID(TypeID::Fun(2)),
])
]))
);
}
#[test]
fn test_parser_ladder_between() {
assert_eq!(
TypeTerm::from_str("<A B~<C D>>"),
Ok(TypeTerm::App(vec![
TypeTerm::TypeID(TypeID::Fun(0)),
TypeTerm::Ladder(vec![
TypeTerm::TypeID(TypeID::Fun(1)),
TypeTerm::App(vec![
TypeTerm::TypeID(TypeID::Fun(2)),
TypeTerm::TypeID(TypeID::Fun(3)),
])
])
]))
);
}
#[test]
fn test_parser_ladder_large() {
assert_eq!(
TypeTerm::from_str(
"<Seq Date \
~<TimeSince UnixEpoch> \
~<Duration Seconds> \
~ \
~<PosInt 10 BigEndian> \
~< Seq <Digit 10>~Unicode > > \
~<SepSeq Unicode ':'> \
~<Seq Unicode> \
~UTF-8 \
~<Seq Byte>"),
Ok(
TypeTerm::Ladder(vec![
TypeTerm::App(vec![
TypeTerm::TypeID(TypeID::Fun(0)),
TypeTerm::Ladder(vec![
TypeTerm::TypeID(TypeID::Fun(1)),
TypeTerm::App(vec![
TypeTerm::TypeID(TypeID::Fun(2)),
TypeTerm::TypeID(TypeID::Fun(3))
]),
TypeTerm::App(vec![
TypeTerm::TypeID(TypeID::Fun(4)),
TypeTerm::TypeID(TypeID::Fun(5))
]),
TypeTerm::TypeID(TypeID::Fun(6)),
TypeTerm::App(vec![
TypeTerm::TypeID(TypeID::Fun(7)),
TypeTerm::Num(10),
TypeTerm::TypeID(TypeID::Fun(8))
]),
TypeTerm::App(vec![
TypeTerm::TypeID(TypeID::Fun(0)),
TypeTerm::Ladder(vec![
TypeTerm::App(vec![
TypeTerm::TypeID(TypeID::Fun(9)),
TypeTerm::Num(10)
]),
TypeTerm::TypeID(TypeID::Fun(10))
])
])
])
]),
TypeTerm::App(vec![
TypeTerm::TypeID(TypeID::Fun(11)),
TypeTerm::TypeID(TypeID::Fun(10)),
TypeTerm::Char(':')
]),
TypeTerm::App(vec![
TypeTerm::TypeID(TypeID::Fun(0)),
TypeTerm::TypeID(TypeID::Fun(10))
]),
TypeTerm::TypeID(TypeID::Fun(12)),
TypeTerm::App(vec![
TypeTerm::TypeID(TypeID::Fun(0)),
TypeTerm::TypeID(TypeID::Fun(13))
])
])
)
);
}