lt-core/src/parser.rs

488 lines
17 KiB
Rust
Raw Normal View History

2024-05-09 20:13:10 +02:00
use {
2024-05-12 18:58:39 +02:00
crate::{
expr::{LTExpr, Statement, TypeError, TypeTag},
lexer::{LTIRLexer, LTIRToken, LexError, InputRegionTag},
2024-05-12 18:58:39 +02:00
},
2024-05-12 18:56:10 +02:00
std::{
iter::Peekable,
2024-05-12 18:58:39 +02:00
sync::{Arc, RwLock},
2024-05-12 18:56:10 +02:00
},
2024-05-09 20:13:10 +02:00
};
2024-05-14 09:56:19 +02:00
#[derive(Clone, Debug, PartialEq)]
2024-05-09 20:13:10 +02:00
pub enum ParseError {
LexError(LexError),
UnexpectedClose,
UnexpectedEnd,
2024-05-12 18:58:39 +02:00
UnexpectedToken,
2024-05-14 09:56:19 +02:00
TypeParseError(laddertypes::parser::ParseError)
2024-05-09 20:13:10 +02:00
}
2024-05-11 00:00:20 +02:00
pub fn parse_expect<It>(
tokens: &mut Peekable<It>,
2024-05-12 18:58:39 +02:00
expected_token: LTIRToken,
) -> Result<(), (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
2024-05-11 00:00:20 +02:00
{
match tokens.next() {
2024-05-13 22:55:24 +02:00
Some((region, Ok(t))) => {
2024-05-11 00:00:20 +02:00
if t == expected_token {
Ok(())
} else {
Err((region, ParseError::UnexpectedToken))
2024-05-11 00:00:20 +02:00
}
2024-05-12 18:58:39 +02:00
}
Some((region, Err(err))) => Err((region, ParseError::LexError(err))),
None => Err((InputRegionTag::default(), ParseError::UnexpectedEnd)),
2024-05-11 00:00:20 +02:00
}
}
2024-05-14 09:56:19 +02:00
/* parse symbol name
*/
pub fn parse_symbol<It>(tokens: &mut Peekable<It>) -> Result<String, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
2024-05-11 00:00:20 +02:00
{
match tokens.next() {
2024-05-13 22:55:24 +02:00
Some((region, Ok(LTIRToken::Symbol(name)))) => Ok(name),
Some((region, Ok(_))) => Err((region, ParseError::UnexpectedToken)),
Some((region, Err(err))) => Err((region, ParseError::LexError(err))),
None => Err((InputRegionTag::default(), ParseError::UnexpectedEnd)),
2024-05-11 00:00:20 +02:00
}
}
2024-05-14 09:56:19 +02:00
/* parse an optional type annotation
* `: T`
*/
2024-05-12 18:56:10 +02:00
pub fn parse_type_tag<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<It>,
) -> Result<Option<laddertypes::TypeTerm>, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
2024-05-12 18:56:10 +02:00
{
let peek = { tokens.peek().cloned() };
if let Some((region, peektok)) = peek {
match peektok {
2024-05-12 18:56:10 +02:00
Ok(LTIRToken::AssignType(typeterm_str)) => {
tokens.next();
match typectx.write().unwrap().parse(typeterm_str.as_str()) {
2024-05-14 09:56:19 +02:00
Ok(typeterm) => Ok(Some(typeterm)),
Err(parse_error) => Err((region, ParseError::TypeParseError(parse_error))),
2024-05-12 18:56:10 +02:00
}
}
2024-05-14 09:56:19 +02:00
_ => Ok(None),
2024-05-12 18:56:10 +02:00
}
} else {
2024-05-14 09:56:19 +02:00
Ok(None)
2024-05-12 18:56:10 +02:00
}
}
2024-05-14 09:56:19 +02:00
#[derive(Debug, PartialEq, Eq)]
pub enum VariableBinding {
Atomic {
symbol: String,
typtag: Option<laddertypes::TypeTerm>
},
Struct {
members: Vec< VariableBinding >,
typtag: Option<laddertypes::TypeTerm>
}
}
impl VariableBinding {
pub fn flatten(self) -> Vec<(String, Option<laddertypes::TypeTerm>)> {
match self {
VariableBinding::Atomic{ symbol, typtag } =>
vec![ (symbol, typtag) ],
VariableBinding::Struct{ members, typtag } =>
members
.into_iter()
.map(|a| a.flatten().into_iter())
.flatten()
.collect()
}
}
}
/* parse a symbol binding of the form
* `x`
* or `x : T`
*/
pub fn parse_binding_expr<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<It>,
) -> Result< VariableBinding, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
2024-05-14 09:56:19 +02:00
{
if let Some((region, peektok)) = tokens.peek().clone() {
match peektok {
Ok(LTIRToken::BlockOpen) => {
Ok(VariableBinding::Struct {
members: parse_binding_block(typectx, tokens)?,
typtag: parse_type_tag(typectx, tokens)?
})
}
Ok(LTIRToken::Symbol(_)) => {
Ok(VariableBinding::Atomic{
symbol: parse_symbol(tokens)?,
typtag: parse_type_tag(typectx, tokens)?
})
}
Err(err) => Err((*region, ParseError::LexError(err.clone()))),
_ => Err((*region, ParseError::UnexpectedToken))
2024-05-14 09:56:19 +02:00
}
} else {
Err((InputRegionTag::default(), ParseError::UnexpectedEnd))
2024-05-14 09:56:19 +02:00
}
}
/* parse a block of symbol bidnings
* `{ x:T; y:U; ... }`
*/
pub fn parse_binding_block<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<It>,
) -> Result< Vec<VariableBinding>, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
2024-05-14 09:56:19 +02:00
{
let mut last_region = InputRegionTag::default();
2024-05-14 09:56:19 +02:00
let _ = parse_expect(tokens, LTIRToken::BlockOpen)?;
let mut bindings = Vec::new();
while let Some((region, peektok)) = tokens.peek() {
last_region = *region;
2024-05-14 09:56:19 +02:00
match peektok {
Ok(LTIRToken::BlockClose) => {
tokens.next();
return Ok(bindings);
}
Ok(LTIRToken::StatementSep) => {
tokens.next();
}
Ok(_) => {
bindings.push(parse_binding_expr(typectx, tokens)?);
}
Err(err) => {
return Err((last_region, ParseError::LexError(err.clone())));
2024-05-14 09:56:19 +02:00
}
}
}
Err((last_region, ParseError::UnexpectedEnd))
2024-05-14 09:56:19 +02:00
}
2024-05-09 20:13:10 +02:00
pub fn parse_statement<It>(
2024-05-12 18:56:10 +02:00
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<It>,
) -> Result<crate::expr::Statement, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
2024-05-09 20:13:10 +02:00
{
2024-05-13 22:55:24 +02:00
if let Some((region, peektok)) = tokens.peek() {
2024-05-11 00:00:20 +02:00
match peektok {
Ok(LTIRToken::Symbol(sym)) => {
match sym.as_str() {
"!" => {
tokens.next();
2024-05-11 18:07:58 +02:00
// todo accept address-expression instead of symbol
2024-05-11 00:00:20 +02:00
let name = parse_symbol(tokens)?;
2024-05-12 18:56:10 +02:00
let val_expr = parse_expr(typectx, tokens)?;
2024-05-11 00:00:20 +02:00
let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
Ok(Statement::Assignment {
var_id: name,
2024-05-12 18:58:39 +02:00
val_expr,
2024-05-11 00:00:20 +02:00
})
}
"let" => {
tokens.next();
let name = parse_symbol(tokens)?;
2024-05-14 09:56:19 +02:00
let typ = parse_type_tag(typectx, tokens)?;
/* todo
let mut variable_bindings = parse_binding_expr(typectx, tokens)?;
*/
let _ = parse_expect(tokens, LTIRToken::AssignValue);
2024-05-12 18:56:10 +02:00
let val_expr = parse_expr(typectx, tokens)?;
let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
2024-05-12 18:58:39 +02:00
Ok(Statement::LetAssign {
2024-05-14 09:56:19 +02:00
typ: match typ {
Some(t) => Some(Ok(t)),
None => None
},
var_id: name,
2024-05-12 18:58:39 +02:00
val_expr,
})
}
2024-05-11 00:00:20 +02:00
"while" => {
tokens.next();
let _ = parse_expect(tokens, LTIRToken::ExprOpen)?;
2024-05-12 18:56:10 +02:00
let cond = parse_expr(typectx, tokens)?;
2024-05-11 00:00:20 +02:00
let _ = parse_expect(tokens, LTIRToken::ExprClose)?;
Ok(Statement::WhileLoop {
condition: cond,
2024-05-14 09:56:19 +02:00
body: parse_statement_block(typectx, tokens)?,
2024-05-11 00:00:20 +02:00
})
}
"return" => {
tokens.next();
2024-05-12 18:56:10 +02:00
let expr = parse_expr(typectx, tokens)?;
2024-05-11 00:00:20 +02:00
let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
2024-05-12 18:56:10 +02:00
Ok(Statement::Return(parse_expr(typectx, tokens)?))
2024-05-11 00:00:20 +02:00
}
_ => {
2024-05-12 18:56:10 +02:00
let expr = parse_expr(typectx, tokens)?;
2024-05-11 00:00:20 +02:00
let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
Ok(Statement::Expr(expr))
}
}
}
Ok(_) => {
2024-05-12 18:56:10 +02:00
let expr = parse_expr(typectx, tokens)?;
2024-05-11 00:00:20 +02:00
let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
Ok(Statement::Expr(expr))
2024-05-12 18:58:39 +02:00
}
Err(err) => Err((*region, ParseError::LexError(err.clone()))),
2024-05-11 00:00:20 +02:00
}
} else {
Err((InputRegionTag::default(), ParseError::UnexpectedEnd))
2024-05-11 00:00:20 +02:00
}
2024-05-09 20:13:10 +02:00
}
2024-05-14 09:56:19 +02:00
pub fn parse_statement_block<It>(
2024-05-12 18:56:10 +02:00
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<It>,
) -> Result<Vec<Statement>, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
2024-05-09 20:13:10 +02:00
{
2024-05-11 00:00:20 +02:00
let _ = parse_expect(tokens, LTIRToken::BlockOpen)?;
let mut statements = Vec::new();
2024-05-13 22:55:24 +02:00
while let Some((region, peektok)) = tokens.peek() {
2024-05-11 00:00:20 +02:00
match peektok {
Ok(LTIRToken::BlockClose) => {
tokens.next();
2024-05-12 18:58:39 +02:00
return Ok(statements);
}
Ok(_) => {
statements.push(parse_statement(typectx, tokens)?);
}
Err(err) => {
return Err((*region, ParseError::LexError(err.clone())));
2024-05-11 00:00:20 +02:00
}
}
}
Err((InputRegionTag::default(), ParseError::UnexpectedEnd))
2024-05-09 20:13:10 +02:00
}
2024-05-11 00:00:20 +02:00
pub fn parse_atom<It>(
tokens: &mut Peekable<It>,
) -> Result<crate::expr::LTExpr, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
2024-05-11 00:00:20 +02:00
{
match tokens.next() {
2024-05-13 22:55:24 +02:00
Some((region, Ok(LTIRToken::Symbol(sym)))) => Ok(LTExpr::symbol(sym.as_str())),
Some((region, Ok(LTIRToken::Char(c)))) => Ok(LTExpr::lit_uint(c as u64)),
Some((region, Ok(LTIRToken::Num(n)))) => Ok(LTExpr::lit_uint(n as u64)),
Some((region, Ok(_))) => Err((region, ParseError::UnexpectedToken)),
Some((region, Err(err))) => Err((region, ParseError::LexError(err))),
None => Err((InputRegionTag::default(), ParseError::UnexpectedEnd)),
2024-05-11 00:00:20 +02:00
}
}
2024-05-09 20:13:10 +02:00
pub fn parse_expr<It>(
2024-05-12 18:56:10 +02:00
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<It>,
) -> Result<crate::expr::LTExpr, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
2024-05-09 20:13:10 +02:00
{
let mut children = Vec::new();
2024-05-13 22:55:24 +02:00
while let Some((region, tok)) = tokens.peek() {
2024-05-11 00:00:20 +02:00
match tok {
Ok(LTIRToken::Lambda) => {
if children.len() == 0 {
tokens.next();
2024-05-14 09:56:19 +02:00
let mut variable_bindings = parse_binding_expr(typectx, tokens)?;
let _ = parse_expect(tokens, LTIRToken::MapsTo);
2024-05-12 18:56:10 +02:00
let body = parse_expr(typectx, tokens)?;
2024-05-11 00:00:20 +02:00
2024-05-12 18:58:39 +02:00
return Ok(LTExpr::Abstraction {
2024-05-14 09:56:19 +02:00
args: variable_bindings.flatten().into_iter().map(|(s,t)| (s,t.map(|t|Ok(t))) ).collect(),
2024-05-12 18:58:39 +02:00
body: Box::new(body),
2024-05-11 00:00:20 +02:00
});
} else {
return Err((*region, ParseError::UnexpectedToken));
2024-05-11 00:00:20 +02:00
}
2024-05-09 20:13:10 +02:00
}
2024-05-11 00:00:20 +02:00
Ok(LTIRToken::ExprOpen) => {
tokens.next();
2024-05-13 22:55:24 +02:00
while let Some((region, peektok)) = tokens.peek() {
2024-05-11 00:00:20 +02:00
match peektok {
Ok(LTIRToken::ExprClose) => {
tokens.next();
break;
}
_ => {}
}
2024-05-12 18:56:10 +02:00
children.push(parse_expr(typectx, tokens)?);
2024-05-11 00:00:20 +02:00
}
2024-05-12 18:58:39 +02:00
}
Ok(LTIRToken::ExprClose) => {
break;
}
2024-05-12 18:56:10 +02:00
Ok(LTIRToken::BlockOpen) => {
2024-05-14 09:56:19 +02:00
children.push(LTExpr::block(parse_statement_block(typectx, tokens)?));
2024-05-12 18:56:10 +02:00
}
2024-05-12 18:58:39 +02:00
Ok(LTIRToken::BlockClose) => {
break;
}
Ok(LTIRToken::StatementSep) => {
break;
}
Ok(LTIRToken::Symbol(name)) => match name.as_str() {
"if" => {
tokens.next();
let _ = parse_expect(tokens, LTIRToken::ExprOpen)?;
let cond = parse_expr(typectx, tokens)?;
let _ = parse_expect(tokens, LTIRToken::ExprClose)?;
2024-05-14 09:56:19 +02:00
let if_expr = LTExpr::block(parse_statement_block(typectx, tokens)?);
2024-05-12 18:58:39 +02:00
let mut else_expr = LTExpr::block(vec![]);
2024-05-11 00:00:20 +02:00
2024-05-13 22:55:24 +02:00
if let Some((region, peektok)) = tokens.peek() {
2024-05-12 18:58:39 +02:00
if let Ok(LTIRToken::Symbol(name)) = peektok {
if name == "else" {
tokens.next();
else_expr = parse_expr(typectx, tokens)?;
2024-05-11 00:00:20 +02:00
}
}
}
2024-05-12 18:58:39 +02:00
children.push(LTExpr::Branch {
condition: Box::new(cond),
if_expr: Box::new(if_expr),
else_expr: Box::new(else_expr),
});
}
name => {
children.push(parse_atom(tokens)?);
2024-05-11 00:00:20 +02:00
}
2024-05-12 18:58:39 +02:00
},
Ok(atom) => {
children.push(parse_atom(tokens)?);
}
Err(err) => {
return Err((*region, ParseError::LexError(err.clone())));
2024-05-11 00:00:20 +02:00
}
2024-05-09 20:13:10 +02:00
}
}
if children.len() > 0 {
let head = children.remove(0);
Ok(LTExpr::Application {
2024-05-12 18:56:10 +02:00
typ: None,
2024-05-09 20:13:10 +02:00
head: Box::new(head),
2024-05-12 18:58:39 +02:00
body: children,
2024-05-09 20:13:10 +02:00
})
} else {
Err((InputRegionTag::default(), ParseError::UnexpectedEnd))
2024-05-09 20:13:10 +02:00
}
}
2024-05-14 09:56:19 +02:00
mod tests {
use std::sync::{Arc, RwLock};
#[test]
fn test_parse_atomic_binding() {
let mut lexer = crate::lexer::LTIRLexer::from("x".chars()).peekable();
let typectx = Arc::new(RwLock::new(laddertypes::dict::TypeDict::new()));
let bindings = crate::parser::parse_binding_expr( &typectx, &mut lexer );
assert_eq!(
bindings,
Ok(crate::parser::VariableBinding::Atomic{
symbol: "x".into(),
typtag: None
})
);
}
#[test]
fn test_parse_typed_atomic_binding() {
let mut lexer = crate::lexer::LTIRLexer::from("x:T".chars()).peekable();
let typectx = Arc::new(RwLock::new(laddertypes::dict::TypeDict::new()));
let bindings = crate::parser::parse_binding_expr( &typectx, &mut lexer );
assert_eq!(
bindings,
Ok(crate::parser::VariableBinding::Atomic{
symbol: "x".into(),
typtag: Some(typectx.write().unwrap().parse("T").unwrap())
})
);
}
#[test]
fn test_parse_struct_binding() {
let mut lexer = crate::lexer::LTIRLexer::from("{x y}".chars()).peekable();
let typectx = Arc::new(RwLock::new(laddertypes::dict::TypeDict::new()));
let bindings = crate::parser::parse_binding_expr( &typectx, &mut lexer );
assert_eq!(
bindings,
Ok(crate::parser::VariableBinding::Struct{
members: vec![
crate::parser::VariableBinding::Atomic{ symbol: "x".into(), typtag: None },
crate::parser::VariableBinding::Atomic{ symbol: "y".into(), typtag: None }
],
typtag: None
})
);
}
#[test]
fn test_parse_typed_struct_binding1() {
let mut lexer = crate::lexer::LTIRLexer::from("{x y}:T".chars()).peekable();
let typectx = Arc::new(RwLock::new(laddertypes::dict::TypeDict::new()));
let bindings = crate::parser::parse_binding_expr( &typectx, &mut lexer );
assert_eq!(
bindings,
Ok(crate::parser::VariableBinding::Struct{
members: vec![
crate::parser::VariableBinding::Atomic{ symbol: "x".into(), typtag: None },
crate::parser::VariableBinding::Atomic{ symbol: "y".into(), typtag: None }
],
typtag: Some(typectx.write().unwrap().parse("T").unwrap())
})
);
}
#[test]
fn test_parse_typed_struct_binding2() {
let mut lexer = crate::lexer::LTIRLexer::from("{x:U; y}:T".chars()).peekable();
let typectx = Arc::new(RwLock::new(laddertypes::dict::TypeDict::new()));
let bindings = crate::parser::parse_binding_expr( &typectx, &mut lexer );
let type_u = typectx.write().unwrap().parse("U").unwrap();
let type_t = typectx.write().unwrap().parse("T").unwrap();
assert_eq!(
bindings,
Ok(crate::parser::VariableBinding::Struct{
members: vec![
crate::parser::VariableBinding::Atomic{
symbol: "x".into(),
typtag: Some(type_u)
},
crate::parser::VariableBinding::Atomic{ symbol: "y".into(), typtag: None }
],
typtag: Some(type_t)
})
);
}
}