ast: input region tag for every node; fixup tests & region calculation in lexer

This commit is contained in:
Michael Sippel 2024-10-01 14:51:45 +02:00
parent bacb3cf519
commit 7441826f58
Signed by: senvas
GPG key ID: F96CF119C34B64A6
5 changed files with 137 additions and 106 deletions

View file

@ -8,7 +8,7 @@ use {
}
};
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq)]
pub enum Statement {
Assignment {
name_region: InputRegionTag,
@ -28,21 +28,24 @@ pub enum Statement {
Expr(LTExpr),
}
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq)]
pub enum TypeError {
ParseError(laddertypes::parser::ParseError),
Mismatch {
expected: laddertypes::TypeTerm,
received: laddertypes::TypeTerm,
},
NoSymbol,
SuperflousArgument,
Todo
}
pub type TypeTag = Result<laddertypes::TypeTerm, TypeError>;
#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq)]
pub enum LTExpr {
WordLiteral {
typ: Option<TypeTag>,
region: InputRegionTag,
val: tisc::VM_Word,
},
StringLiteral {
@ -55,65 +58,68 @@ pub enum LTExpr {
symbol: String,
},
Ascend {
region: InputRegionTag,
typ: TypeTag,
expr: Box<LTExpr>
},
Descend {
region: InputRegionTag,
typ: TypeTag,
expr: Box<LTExpr>
},
Application {
region: InputRegionTag,
typ: Option<TypeTag>,
head: Box<LTExpr>,
body: Vec<LTExpr>,
},
Abstraction {
region: InputRegionTag,
args: Vec<(InputRegionTag, String, Option<TypeTag>)>,
body: Box<LTExpr>,
},
Branch {
region: InputRegionTag,
condition: Box<LTExpr>,
if_expr: Box<LTExpr>,
else_expr: Box<LTExpr>,
},
Block {
region: InputRegionTag,
statements: Vec<Statement>,
},
ExportBlock {
region: InputRegionTag,
statements: Vec<Statement>,
}
}
impl LTExpr {
/*
pub fn symbol(str: &str) -> Self {
LTExpr::Symbol {
typ: None, //typectx.write().unwrap().parse("<Ref memory::Word>~Symbol~<Seq Char>").expect("parse typeterm"),
symbol: String::from(str),
}
impl LTExpr {
pub fn get_region(&self) -> InputRegionTag {
match self {
LTExpr::WordLiteral { region, val } => region,
LTExpr::StringLiteral { region, value } => region,
LTExpr::Symbol { region, typ, symbol } => region,
LTExpr::Ascend { region, typ, expr } => region,
LTExpr::Descend{ region, typ, expr } => region,
LTExpr::Application{ region, typ, head, body } => region,
LTExpr::Abstraction{ region, args, body } => region,
LTExpr::Branch{ region, condition, if_expr, else_expr } => region,
LTExpr::Block{ region, statements } => region,
LTExpr::ExportBlock{ region, statements } => region
}.clone()
}
*/
pub fn lit_uint(val: u64) -> Self {
LTExpr::WordLiteral {
typ: None, //typectx.write().unwrap().parse("_2^64~machine::UInt64~machine::Word").expect("parse typeterm"),
region: InputRegionTag::default(),
val: val as tisc::VM_Word,
}
}
/*
pub fn abstraction(args: Vec<(&str, &str)>, body: LTExpr) -> LTExpr {
LTExpr::Abstraction {
args: args
.into_iter()
.map(
|(arg_name, arg_type)| (arg_name.into(), None), //typectx.write().unwrap().parse(t).expect("parse typeterm")
)
.collect(),
body: Box::new(body),
}
}
*/
pub fn application(head: LTExpr, body: Vec<LTExpr>) -> Self {
LTExpr::Application {
region: InputRegionTag::default(),
typ: None,
head: Box::new(head),
body: body,
@ -121,7 +127,7 @@ impl LTExpr {
}
pub fn block(body: Vec<Statement>) -> Self {
LTExpr::Block { statements: body }
LTExpr::Block { region: InputRegionTag::default(), statements: body }
}
}
@ -134,45 +140,3 @@ impl Statement {
}
}
/*
impl LTExpr {
fn get_type(&self, dict: &laddertypes::dict::TypeDict) -> laddertypes::TypeTerm {
match self {
LTExpr::StringLiteral{ val:_, typ } => { typ.clone() }
LTExpr::MemoryLiteral{ val:_, typ } => { typ.clone() }
LTExpr::Abstraction{ arg_type, val_expr } => {
laddertypes::TypeTerm::App(vec![
laddertypes::TypeTerm::TypeID(dict.get_typeid(&"Fn".into()).expect("expected function type")),
arg_type.clone(),
val_expr.get_type(dict)
])
}
LTExpr::Application{ head, body } => {
match head.deref() {
LTExpr::Abstraction{ arg_type, val_expr } => {
val_expr.get_type(dict)
}
_ => {
panic!("invalid application");
}
}
}
LTExpr::Block{ statements } => {
if let Some(last_statement) = statements.last() {
match last_statement {
Statement::Return(ret_expr) |
Statement::Expr(ret_expr) => {
ret_expr.get_type(dict)
}
_ => {
laddertypes::TypeTerm::unit()
}
}
} else {
laddertypes::TypeTerm::unit()
}
}
}
}
}
*/

View file

@ -267,6 +267,8 @@ where
break;
}
Some('\\') => {
self.position += 2;
region.end += 2;
match self.chars.next() {
Some('0') => {
val.push('\0');
@ -281,6 +283,8 @@ where
}
}
Some(c) => {
self.position += 1;
region.end += 1;
val.push(c);
}
None => {
@ -339,7 +343,7 @@ where
LexerState::Ascend(s) |
LexerState::Descend(s) => {
if *c == ')' {
if *c == ')' || *c == ';' {
let token = state.clone().into_token().unwrap();
return Some((region, Ok(token)));
} else {
@ -367,11 +371,15 @@ where
match s.as_str(){
"as"=> {
self.chars.next();
self.position += 1;
region.end += 1;
state = LexerState::Ascend(String::new());
continue;
}
"des" => {
self.chars.next();
self.position += 1;
region.end += 1;
state = LexerState::Descend(String::new());
continue;
}

View file

@ -56,7 +56,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
pub fn parse_type_tag<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<It>,
) -> Result<Option<laddertypes::TypeTerm>, (InputRegionTag, ParseError)>
) -> Result<Option<(InputRegionTag, laddertypes::TypeTerm)>, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
{
let peek = { tokens.peek().cloned() };
@ -65,7 +65,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
Ok(LTIRToken::AssignType(typeterm_str)) => {
tokens.next();
match typectx.write().unwrap().parse(typeterm_str.as_str()) {
Ok(typeterm) => Ok(Some(typeterm)),
Ok(typeterm) => Ok(Some((region, typeterm))),
Err(parse_error) => Err((region, ParseError::TypeParseError(parse_error))),
}
}
@ -81,11 +81,11 @@ pub enum VariableBinding {
Atomic {
region: InputRegionTag,
symbol: String,
typtag: Option<laddertypes::TypeTerm>
typtag: Option<(InputRegionTag, laddertypes::TypeTerm)>
},
Struct {
members: Vec< VariableBinding >,
typtag: Option<laddertypes::TypeTerm>
typtag: Option<(InputRegionTag, laddertypes::TypeTerm)>
}
}
@ -93,7 +93,7 @@ impl VariableBinding {
pub fn flatten(self) -> Vec<(InputRegionTag, String, Option<laddertypes::TypeTerm>)> {
match self {
VariableBinding::Atomic{ region, symbol, typtag } =>
vec![ (region, symbol, typtag) ],
vec![ (region, symbol, typtag.map(|t|t.1)) ],
VariableBinding::Struct{ members, typtag } =>
members
.into_iter()
@ -210,7 +210,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
Ok(Statement::LetAssign {
typ: match typ {
Some(t) => Some(Ok(t)),
Some((r,t)) => Some(Ok(t)),
None => None
},
var_id: name,
@ -306,6 +306,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
match tok {
Ok(LTIRToken::Lambda) => {
if children.len() == 0 {
let region = region.clone();
tokens.next();
let mut variable_bindings = parse_binding_expr(typectx, tokens)?;
@ -313,6 +314,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
let body = parse_expr(typectx, tokens)?;
return Ok(LTExpr::Abstraction {
region,
args: variable_bindings.flatten().into_iter().map(|(r,s,t)| (r,s,t.map(|t|Ok(t))) ).collect(),
body: Box::new(body),
});
@ -361,6 +363,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
if let Some(expr) = children.pop() {
children.push(LTExpr::Ascend {
region: region.clone(),
typ,
expr: Box::new(expr)
});
@ -378,6 +381,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
if let Some(expr) = children.pop() {
children.push(LTExpr::Descend {
region,
typ,
expr: Box::new(expr)
});
@ -387,6 +391,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
}
Ok(LTIRToken::Symbol(name)) => match name.as_str() {
"if" => {
let region = region.clone();
tokens.next();
let _ = parse_expect(tokens, LTIRToken::ExprOpen)?;
let cond = parse_expr(typectx, tokens)?;
@ -404,15 +409,18 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
}
children.push(LTExpr::Branch {
region,
condition: Box::new(cond),
if_expr: Box::new(if_expr),
else_expr: Box::new(else_expr),
});
},
"export" => {
let region = region.clone();
tokens.next();
let block = parse_statement_block(typectx, tokens)?;
children.push(LTExpr::ExportBlock {
region,
statements: block
});
},
@ -429,13 +437,16 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
}
}
if children.len() > 0 {
if children.len() > 1 {
let head = children.remove(0);
Ok(LTExpr::Application {
region: InputRegionTag::default(),
typ: None,
head: Box::new(head),
body: children,
})
} else if children.len() == 1 {
Ok(children.pop().unwrap())
} else {
Err((InputRegionTag::default(), ParseError::UnexpectedEnd))
}
@ -444,7 +455,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
mod tests {
use crate::parser::LTExpr;
use crate::parser::{LTExpr, InputRegionTag};
use std::sync::{Arc, RwLock};
#[test]
@ -456,6 +467,7 @@ mod tests {
assert_eq!(
bindings,
Ok(crate::parser::VariableBinding::Atomic{
region: InputRegionTag{ begin: 0, end: 1 },
symbol: "x".into(),
typtag: None
})
@ -470,7 +482,32 @@ mod tests {
assert_eq!(
expr,
Ok(LTExpr::DoubleQuote("testlo".into()))
Ok(LTExpr::StringLiteral{
region: InputRegionTag{ begin: 0, end: 6 },
value: "test".into()
})
);
}
#[test]
fn test_parse_ascend() {
let mut lexer = crate::lexer::LTIRLexer::from("\"ff\" as <Seq <Digit 16>>".chars()).peekable();
let typectx = Arc::new(RwLock::new(laddertypes::dict::TypeDict::new()));
let expr = crate::parser::parse_expr( &typectx, &mut lexer );
assert_eq!(
expr,
Ok(LTExpr::Ascend {
region: InputRegionTag{ begin: 5, end: 24 },
typ: match typectx.write().unwrap().parse("<Seq <Digit 16>>") {
Ok(t) => Ok(t),
Err(e) => Err(crate::parser::TypeError::ParseError(e))
},
expr: Box::new(LTExpr::StringLiteral {
region: InputRegionTag{ begin: 0, end: 4 },
value: "ff".into()
})
})
);
}
@ -484,8 +521,9 @@ mod tests {
assert_eq!(
bindings,
Ok(crate::parser::VariableBinding::Atomic{
region: InputRegionTag{ begin: 0, end: 1 },
symbol: "x".into(),
typtag: Some(typectx.write().unwrap().parse("T").unwrap())
typtag: Some((InputRegionTag{begin: 1, end:3}, typectx.write().unwrap().parse("T").unwrap()))
})
);
}
@ -500,8 +538,12 @@ mod tests {
bindings,
Ok(crate::parser::VariableBinding::Struct{
members: vec![
crate::parser::VariableBinding::Atomic{ symbol: "x".into(), typtag: None },
crate::parser::VariableBinding::Atomic{ symbol: "y".into(), typtag: None }
crate::parser::VariableBinding::Atomic{
region: InputRegionTag{ begin: 1, end: 2 },
symbol: "x".into(), typtag: None },
crate::parser::VariableBinding::Atomic{
region: InputRegionTag{ begin: 3, end: 4 },
symbol: "y".into(), typtag: None }
],
typtag: None
})
@ -518,10 +560,14 @@ mod tests {
bindings,
Ok(crate::parser::VariableBinding::Struct{
members: vec![
crate::parser::VariableBinding::Atomic{ symbol: "x".into(), typtag: None },
crate::parser::VariableBinding::Atomic{ symbol: "y".into(), typtag: None }
crate::parser::VariableBinding::Atomic{
region: InputRegionTag{ begin: 1, end: 2 },
symbol: "x".into(), typtag: None },
crate::parser::VariableBinding::Atomic{
region: InputRegionTag{ begin: 3, end: 4 },
symbol: "y".into(), typtag: None }
],
typtag: Some(typectx.write().unwrap().parse("T").unwrap())
typtag: Some((InputRegionTag{begin:5, end:7}, typectx.write().unwrap().parse("T").unwrap()))
})
);
}
@ -540,12 +586,16 @@ mod tests {
Ok(crate::parser::VariableBinding::Struct{
members: vec![
crate::parser::VariableBinding::Atomic{
region: InputRegionTag{ begin: 1, end: 2 },
symbol: "x".into(),
typtag: Some(type_u)
typtag: Some((InputRegionTag{begin: 2, end:4}, type_u))
},
crate::parser::VariableBinding::Atomic{ symbol: "y".into(), typtag: None }
crate::parser::VariableBinding::Atomic{
region: InputRegionTag{ begin: 6, end: 7 },
symbol: "y".into(),
typtag: None }
],
typtag: Some(type_t)
typtag: Some((InputRegionTag{begin: 8, end:10}, type_t))
})
);
}

View file

@ -1,7 +1,7 @@
use {
crate::{
lexer::InputRegionTag,
expr::{LTExpr, Statement},
expr::{LTExpr, Statement, TypeTag, TypeError},
symbols::{Scope, SymbolDef},
},
std::{
@ -142,7 +142,7 @@ impl ProcedureCompiler {
var_id,
val_expr,
} => match val_expr {
LTExpr::Abstraction { args: _, body: _ } => {
LTExpr::Abstraction { region:_, args: _, body: _ } => {
self.symbols
.write()
.unwrap()
@ -231,22 +231,22 @@ impl ProcedureCompiler {
self.asm = self.asm.lit(c as i64);
}
}
LTExpr::WordLiteral { typ, val } => {
LTExpr::WordLiteral { region, val } => {
self.asm = self.asm.lit(*val);
}
LTExpr::Ascend { typ, expr } => {
LTExpr::Ascend { region, typ, expr } => {
self = self.compile(expr);
}
LTExpr::Descend { typ, expr } => {
LTExpr::Descend { region, typ, expr } => {
self = self.compile(expr);
}
LTExpr::Application { typ, head, body } => {
LTExpr::Application { region, typ, head, body } => {
for arg in body.iter().rev() {
self = self.compile(arg);
}
self = self.compile(head);
}
LTExpr::Abstraction { args, body } => {
LTExpr::Abstraction { region, args, body } => {
for (region, arg_name, arg_type) in args.iter() {
if let Some(Ok(typeterm)) = arg_type {
let id = self
@ -265,6 +265,7 @@ impl ProcedureCompiler {
self = self.compile(body);
}
LTExpr::Branch {
region,
condition,
if_expr,
else_expr,
@ -281,12 +282,12 @@ impl ProcedureCompiler {
self.asm = asm;
self.asm = self.asm.branch(if_asm, else_asm);
}
LTExpr::Block { statements } => {
LTExpr::Block { region, statements } => {
for s in statements.iter() {
self = self.compile_statement(s, false);
}
}
LTExpr::ExportBlock{ statements } => {
LTExpr::ExportBlock{ region, statements } => {
for s in statements.iter() {
self = self.compile_statement(s, true);
}

View file

@ -38,15 +38,23 @@ impl SymbolDef {
out_types,
link_addr: _,
export: _,
} => laddertypes::TypeTerm::App(vec![
typectx
} => laddertypes::TypeTerm::App(
std::iter::once(
typectx
.write()
.unwrap()
.parse("Fn")
.expect("parse typeterm"),
laddertypes::TypeTerm::App(in_types.clone()),
laddertypes::TypeTerm::App(out_types.clone()),
]),
.parse("Func")
.expect("parse typeterm")
).chain(
in_types.clone().into_iter()
).chain(
std::iter::once(
typectx.write().unwrap().parse("Struct").expect("parse typeterm")
).chain(
out_types.clone().into_iter()
)
).collect()
),
}
}
}
@ -149,7 +157,7 @@ impl Scope {
for (name, def) in self.symbols.iter_mut() {
if let Some(offset) = linker.get_link_addr( name ) {
match def {
SymbolDef::Procedure {
SymbolDef::Procedure {
in_types:_,out_types:_,
link_addr,
export:_
@ -162,7 +170,7 @@ impl Scope {
_ => {}
}
}
}
}
}