ast: input region tag for every node; fixup tests & region calculation in lexer

This commit is contained in:
Michael Sippel 2024-10-01 14:51:45 +02:00
parent bacb3cf519
commit 7441826f58
Signed by: senvas
GPG key ID: F96CF119C34B64A6
5 changed files with 137 additions and 106 deletions

View file

@ -8,7 +8,7 @@ use {
} }
}; };
#[derive(Clone, Debug)] #[derive(Clone, Debug, PartialEq)]
pub enum Statement { pub enum Statement {
Assignment { Assignment {
name_region: InputRegionTag, name_region: InputRegionTag,
@ -28,21 +28,24 @@ pub enum Statement {
Expr(LTExpr), Expr(LTExpr),
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug, PartialEq)]
pub enum TypeError { pub enum TypeError {
ParseError(laddertypes::parser::ParseError), ParseError(laddertypes::parser::ParseError),
Mismatch { Mismatch {
expected: laddertypes::TypeTerm, expected: laddertypes::TypeTerm,
received: laddertypes::TypeTerm, received: laddertypes::TypeTerm,
}, },
NoSymbol,
SuperflousArgument,
Todo
} }
pub type TypeTag = Result<laddertypes::TypeTerm, TypeError>; pub type TypeTag = Result<laddertypes::TypeTerm, TypeError>;
#[derive(Clone, Debug)] #[derive(Clone, Debug, PartialEq)]
pub enum LTExpr { pub enum LTExpr {
WordLiteral { WordLiteral {
typ: Option<TypeTag>, region: InputRegionTag,
val: tisc::VM_Word, val: tisc::VM_Word,
}, },
StringLiteral { StringLiteral {
@ -55,65 +58,68 @@ pub enum LTExpr {
symbol: String, symbol: String,
}, },
Ascend { Ascend {
region: InputRegionTag,
typ: TypeTag, typ: TypeTag,
expr: Box<LTExpr> expr: Box<LTExpr>
}, },
Descend { Descend {
region: InputRegionTag,
typ: TypeTag, typ: TypeTag,
expr: Box<LTExpr> expr: Box<LTExpr>
}, },
Application { Application {
region: InputRegionTag,
typ: Option<TypeTag>, typ: Option<TypeTag>,
head: Box<LTExpr>, head: Box<LTExpr>,
body: Vec<LTExpr>, body: Vec<LTExpr>,
}, },
Abstraction { Abstraction {
region: InputRegionTag,
args: Vec<(InputRegionTag, String, Option<TypeTag>)>, args: Vec<(InputRegionTag, String, Option<TypeTag>)>,
body: Box<LTExpr>, body: Box<LTExpr>,
}, },
Branch { Branch {
region: InputRegionTag,
condition: Box<LTExpr>, condition: Box<LTExpr>,
if_expr: Box<LTExpr>, if_expr: Box<LTExpr>,
else_expr: Box<LTExpr>, else_expr: Box<LTExpr>,
}, },
Block { Block {
region: InputRegionTag,
statements: Vec<Statement>, statements: Vec<Statement>,
}, },
ExportBlock { ExportBlock {
region: InputRegionTag,
statements: Vec<Statement>, statements: Vec<Statement>,
} }
} }
impl LTExpr { impl LTExpr {
/* pub fn get_region(&self) -> InputRegionTag {
pub fn symbol(str: &str) -> Self { match self {
LTExpr::Symbol { LTExpr::WordLiteral { region, val } => region,
typ: None, //typectx.write().unwrap().parse("<Ref memory::Word>~Symbol~<Seq Char>").expect("parse typeterm"), LTExpr::StringLiteral { region, value } => region,
symbol: String::from(str), LTExpr::Symbol { region, typ, symbol } => region,
} LTExpr::Ascend { region, typ, expr } => region,
LTExpr::Descend{ region, typ, expr } => region,
LTExpr::Application{ region, typ, head, body } => region,
LTExpr::Abstraction{ region, args, body } => region,
LTExpr::Branch{ region, condition, if_expr, else_expr } => region,
LTExpr::Block{ region, statements } => region,
LTExpr::ExportBlock{ region, statements } => region
}.clone()
} }
*/
pub fn lit_uint(val: u64) -> Self { pub fn lit_uint(val: u64) -> Self {
LTExpr::WordLiteral { LTExpr::WordLiteral {
typ: None, //typectx.write().unwrap().parse("_2^64~machine::UInt64~machine::Word").expect("parse typeterm"), region: InputRegionTag::default(),
val: val as tisc::VM_Word, val: val as tisc::VM_Word,
} }
} }
/*
pub fn abstraction(args: Vec<(&str, &str)>, body: LTExpr) -> LTExpr {
LTExpr::Abstraction {
args: args
.into_iter()
.map(
|(arg_name, arg_type)| (arg_name.into(), None), //typectx.write().unwrap().parse(t).expect("parse typeterm")
)
.collect(),
body: Box::new(body),
}
}
*/
pub fn application(head: LTExpr, body: Vec<LTExpr>) -> Self { pub fn application(head: LTExpr, body: Vec<LTExpr>) -> Self {
LTExpr::Application { LTExpr::Application {
region: InputRegionTag::default(),
typ: None, typ: None,
head: Box::new(head), head: Box::new(head),
body: body, body: body,
@ -121,7 +127,7 @@ impl LTExpr {
} }
pub fn block(body: Vec<Statement>) -> Self { pub fn block(body: Vec<Statement>) -> Self {
LTExpr::Block { statements: body } LTExpr::Block { region: InputRegionTag::default(), statements: body }
} }
} }
@ -134,45 +140,3 @@ impl Statement {
} }
} }
/*
impl LTExpr {
fn get_type(&self, dict: &laddertypes::dict::TypeDict) -> laddertypes::TypeTerm {
match self {
LTExpr::StringLiteral{ val:_, typ } => { typ.clone() }
LTExpr::MemoryLiteral{ val:_, typ } => { typ.clone() }
LTExpr::Abstraction{ arg_type, val_expr } => {
laddertypes::TypeTerm::App(vec![
laddertypes::TypeTerm::TypeID(dict.get_typeid(&"Fn".into()).expect("expected function type")),
arg_type.clone(),
val_expr.get_type(dict)
])
}
LTExpr::Application{ head, body } => {
match head.deref() {
LTExpr::Abstraction{ arg_type, val_expr } => {
val_expr.get_type(dict)
}
_ => {
panic!("invalid application");
}
}
}
LTExpr::Block{ statements } => {
if let Some(last_statement) = statements.last() {
match last_statement {
Statement::Return(ret_expr) |
Statement::Expr(ret_expr) => {
ret_expr.get_type(dict)
}
_ => {
laddertypes::TypeTerm::unit()
}
}
} else {
laddertypes::TypeTerm::unit()
}
}
}
}
}
*/

View file

@ -267,6 +267,8 @@ where
break; break;
} }
Some('\\') => { Some('\\') => {
self.position += 2;
region.end += 2;
match self.chars.next() { match self.chars.next() {
Some('0') => { Some('0') => {
val.push('\0'); val.push('\0');
@ -281,6 +283,8 @@ where
} }
} }
Some(c) => { Some(c) => {
self.position += 1;
region.end += 1;
val.push(c); val.push(c);
} }
None => { None => {
@ -339,7 +343,7 @@ where
LexerState::Ascend(s) | LexerState::Ascend(s) |
LexerState::Descend(s) => { LexerState::Descend(s) => {
if *c == ')' { if *c == ')' || *c == ';' {
let token = state.clone().into_token().unwrap(); let token = state.clone().into_token().unwrap();
return Some((region, Ok(token))); return Some((region, Ok(token)));
} else { } else {
@ -367,11 +371,15 @@ where
match s.as_str(){ match s.as_str(){
"as"=> { "as"=> {
self.chars.next(); self.chars.next();
self.position += 1;
region.end += 1;
state = LexerState::Ascend(String::new()); state = LexerState::Ascend(String::new());
continue; continue;
} }
"des" => { "des" => {
self.chars.next(); self.chars.next();
self.position += 1;
region.end += 1;
state = LexerState::Descend(String::new()); state = LexerState::Descend(String::new());
continue; continue;
} }

View file

@ -56,7 +56,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
pub fn parse_type_tag<It>( pub fn parse_type_tag<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>, typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<It>, tokens: &mut Peekable<It>,
) -> Result<Option<laddertypes::TypeTerm>, (InputRegionTag, ParseError)> ) -> Result<Option<(InputRegionTag, laddertypes::TypeTerm)>, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)> where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
{ {
let peek = { tokens.peek().cloned() }; let peek = { tokens.peek().cloned() };
@ -65,7 +65,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
Ok(LTIRToken::AssignType(typeterm_str)) => { Ok(LTIRToken::AssignType(typeterm_str)) => {
tokens.next(); tokens.next();
match typectx.write().unwrap().parse(typeterm_str.as_str()) { match typectx.write().unwrap().parse(typeterm_str.as_str()) {
Ok(typeterm) => Ok(Some(typeterm)), Ok(typeterm) => Ok(Some((region, typeterm))),
Err(parse_error) => Err((region, ParseError::TypeParseError(parse_error))), Err(parse_error) => Err((region, ParseError::TypeParseError(parse_error))),
} }
} }
@ -81,11 +81,11 @@ pub enum VariableBinding {
Atomic { Atomic {
region: InputRegionTag, region: InputRegionTag,
symbol: String, symbol: String,
typtag: Option<laddertypes::TypeTerm> typtag: Option<(InputRegionTag, laddertypes::TypeTerm)>
}, },
Struct { Struct {
members: Vec< VariableBinding >, members: Vec< VariableBinding >,
typtag: Option<laddertypes::TypeTerm> typtag: Option<(InputRegionTag, laddertypes::TypeTerm)>
} }
} }
@ -93,7 +93,7 @@ impl VariableBinding {
pub fn flatten(self) -> Vec<(InputRegionTag, String, Option<laddertypes::TypeTerm>)> { pub fn flatten(self) -> Vec<(InputRegionTag, String, Option<laddertypes::TypeTerm>)> {
match self { match self {
VariableBinding::Atomic{ region, symbol, typtag } => VariableBinding::Atomic{ region, symbol, typtag } =>
vec![ (region, symbol, typtag) ], vec![ (region, symbol, typtag.map(|t|t.1)) ],
VariableBinding::Struct{ members, typtag } => VariableBinding::Struct{ members, typtag } =>
members members
.into_iter() .into_iter()
@ -210,7 +210,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
Ok(Statement::LetAssign { Ok(Statement::LetAssign {
typ: match typ { typ: match typ {
Some(t) => Some(Ok(t)), Some((r,t)) => Some(Ok(t)),
None => None None => None
}, },
var_id: name, var_id: name,
@ -306,6 +306,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
match tok { match tok {
Ok(LTIRToken::Lambda) => { Ok(LTIRToken::Lambda) => {
if children.len() == 0 { if children.len() == 0 {
let region = region.clone();
tokens.next(); tokens.next();
let mut variable_bindings = parse_binding_expr(typectx, tokens)?; let mut variable_bindings = parse_binding_expr(typectx, tokens)?;
@ -313,6 +314,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
let body = parse_expr(typectx, tokens)?; let body = parse_expr(typectx, tokens)?;
return Ok(LTExpr::Abstraction { return Ok(LTExpr::Abstraction {
region,
args: variable_bindings.flatten().into_iter().map(|(r,s,t)| (r,s,t.map(|t|Ok(t))) ).collect(), args: variable_bindings.flatten().into_iter().map(|(r,s,t)| (r,s,t.map(|t|Ok(t))) ).collect(),
body: Box::new(body), body: Box::new(body),
}); });
@ -361,6 +363,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
if let Some(expr) = children.pop() { if let Some(expr) = children.pop() {
children.push(LTExpr::Ascend { children.push(LTExpr::Ascend {
region: region.clone(),
typ, typ,
expr: Box::new(expr) expr: Box::new(expr)
}); });
@ -378,6 +381,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
if let Some(expr) = children.pop() { if let Some(expr) = children.pop() {
children.push(LTExpr::Descend { children.push(LTExpr::Descend {
region,
typ, typ,
expr: Box::new(expr) expr: Box::new(expr)
}); });
@ -387,6 +391,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
} }
Ok(LTIRToken::Symbol(name)) => match name.as_str() { Ok(LTIRToken::Symbol(name)) => match name.as_str() {
"if" => { "if" => {
let region = region.clone();
tokens.next(); tokens.next();
let _ = parse_expect(tokens, LTIRToken::ExprOpen)?; let _ = parse_expect(tokens, LTIRToken::ExprOpen)?;
let cond = parse_expr(typectx, tokens)?; let cond = parse_expr(typectx, tokens)?;
@ -404,15 +409,18 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
} }
children.push(LTExpr::Branch { children.push(LTExpr::Branch {
region,
condition: Box::new(cond), condition: Box::new(cond),
if_expr: Box::new(if_expr), if_expr: Box::new(if_expr),
else_expr: Box::new(else_expr), else_expr: Box::new(else_expr),
}); });
}, },
"export" => { "export" => {
let region = region.clone();
tokens.next(); tokens.next();
let block = parse_statement_block(typectx, tokens)?; let block = parse_statement_block(typectx, tokens)?;
children.push(LTExpr::ExportBlock { children.push(LTExpr::ExportBlock {
region,
statements: block statements: block
}); });
}, },
@ -429,13 +437,16 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
} }
} }
if children.len() > 0 { if children.len() > 1 {
let head = children.remove(0); let head = children.remove(0);
Ok(LTExpr::Application { Ok(LTExpr::Application {
region: InputRegionTag::default(),
typ: None, typ: None,
head: Box::new(head), head: Box::new(head),
body: children, body: children,
}) })
} else if children.len() == 1 {
Ok(children.pop().unwrap())
} else { } else {
Err((InputRegionTag::default(), ParseError::UnexpectedEnd)) Err((InputRegionTag::default(), ParseError::UnexpectedEnd))
} }
@ -444,7 +455,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
mod tests { mod tests {
use crate::parser::LTExpr; use crate::parser::{LTExpr, InputRegionTag};
use std::sync::{Arc, RwLock}; use std::sync::{Arc, RwLock};
#[test] #[test]
@ -456,6 +467,7 @@ mod tests {
assert_eq!( assert_eq!(
bindings, bindings,
Ok(crate::parser::VariableBinding::Atomic{ Ok(crate::parser::VariableBinding::Atomic{
region: InputRegionTag{ begin: 0, end: 1 },
symbol: "x".into(), symbol: "x".into(),
typtag: None typtag: None
}) })
@ -470,7 +482,32 @@ mod tests {
assert_eq!( assert_eq!(
expr, expr,
Ok(LTExpr::DoubleQuote("testlo".into())) Ok(LTExpr::StringLiteral{
region: InputRegionTag{ begin: 0, end: 6 },
value: "test".into()
})
);
}
#[test]
fn test_parse_ascend() {
let mut lexer = crate::lexer::LTIRLexer::from("\"ff\" as <Seq <Digit 16>>".chars()).peekable();
let typectx = Arc::new(RwLock::new(laddertypes::dict::TypeDict::new()));
let expr = crate::parser::parse_expr( &typectx, &mut lexer );
assert_eq!(
expr,
Ok(LTExpr::Ascend {
region: InputRegionTag{ begin: 5, end: 24 },
typ: match typectx.write().unwrap().parse("<Seq <Digit 16>>") {
Ok(t) => Ok(t),
Err(e) => Err(crate::parser::TypeError::ParseError(e))
},
expr: Box::new(LTExpr::StringLiteral {
region: InputRegionTag{ begin: 0, end: 4 },
value: "ff".into()
})
})
); );
} }
@ -484,8 +521,9 @@ mod tests {
assert_eq!( assert_eq!(
bindings, bindings,
Ok(crate::parser::VariableBinding::Atomic{ Ok(crate::parser::VariableBinding::Atomic{
region: InputRegionTag{ begin: 0, end: 1 },
symbol: "x".into(), symbol: "x".into(),
typtag: Some(typectx.write().unwrap().parse("T").unwrap()) typtag: Some((InputRegionTag{begin: 1, end:3}, typectx.write().unwrap().parse("T").unwrap()))
}) })
); );
} }
@ -500,8 +538,12 @@ mod tests {
bindings, bindings,
Ok(crate::parser::VariableBinding::Struct{ Ok(crate::parser::VariableBinding::Struct{
members: vec![ members: vec![
crate::parser::VariableBinding::Atomic{ symbol: "x".into(), typtag: None }, crate::parser::VariableBinding::Atomic{
crate::parser::VariableBinding::Atomic{ symbol: "y".into(), typtag: None } region: InputRegionTag{ begin: 1, end: 2 },
symbol: "x".into(), typtag: None },
crate::parser::VariableBinding::Atomic{
region: InputRegionTag{ begin: 3, end: 4 },
symbol: "y".into(), typtag: None }
], ],
typtag: None typtag: None
}) })
@ -518,10 +560,14 @@ mod tests {
bindings, bindings,
Ok(crate::parser::VariableBinding::Struct{ Ok(crate::parser::VariableBinding::Struct{
members: vec![ members: vec![
crate::parser::VariableBinding::Atomic{ symbol: "x".into(), typtag: None }, crate::parser::VariableBinding::Atomic{
crate::parser::VariableBinding::Atomic{ symbol: "y".into(), typtag: None } region: InputRegionTag{ begin: 1, end: 2 },
symbol: "x".into(), typtag: None },
crate::parser::VariableBinding::Atomic{
region: InputRegionTag{ begin: 3, end: 4 },
symbol: "y".into(), typtag: None }
], ],
typtag: Some(typectx.write().unwrap().parse("T").unwrap()) typtag: Some((InputRegionTag{begin:5, end:7}, typectx.write().unwrap().parse("T").unwrap()))
}) })
); );
} }
@ -540,12 +586,16 @@ mod tests {
Ok(crate::parser::VariableBinding::Struct{ Ok(crate::parser::VariableBinding::Struct{
members: vec![ members: vec![
crate::parser::VariableBinding::Atomic{ crate::parser::VariableBinding::Atomic{
region: InputRegionTag{ begin: 1, end: 2 },
symbol: "x".into(), symbol: "x".into(),
typtag: Some(type_u) typtag: Some((InputRegionTag{begin: 2, end:4}, type_u))
}, },
crate::parser::VariableBinding::Atomic{ symbol: "y".into(), typtag: None } crate::parser::VariableBinding::Atomic{
region: InputRegionTag{ begin: 6, end: 7 },
symbol: "y".into(),
typtag: None }
], ],
typtag: Some(type_t) typtag: Some((InputRegionTag{begin: 8, end:10}, type_t))
}) })
); );
} }

View file

@ -1,7 +1,7 @@
use { use {
crate::{ crate::{
lexer::InputRegionTag, lexer::InputRegionTag,
expr::{LTExpr, Statement}, expr::{LTExpr, Statement, TypeTag, TypeError},
symbols::{Scope, SymbolDef}, symbols::{Scope, SymbolDef},
}, },
std::{ std::{
@ -142,7 +142,7 @@ impl ProcedureCompiler {
var_id, var_id,
val_expr, val_expr,
} => match val_expr { } => match val_expr {
LTExpr::Abstraction { args: _, body: _ } => { LTExpr::Abstraction { region:_, args: _, body: _ } => {
self.symbols self.symbols
.write() .write()
.unwrap() .unwrap()
@ -231,22 +231,22 @@ impl ProcedureCompiler {
self.asm = self.asm.lit(c as i64); self.asm = self.asm.lit(c as i64);
} }
} }
LTExpr::WordLiteral { typ, val } => { LTExpr::WordLiteral { region, val } => {
self.asm = self.asm.lit(*val); self.asm = self.asm.lit(*val);
} }
LTExpr::Ascend { typ, expr } => { LTExpr::Ascend { region, typ, expr } => {
self = self.compile(expr); self = self.compile(expr);
} }
LTExpr::Descend { typ, expr } => { LTExpr::Descend { region, typ, expr } => {
self = self.compile(expr); self = self.compile(expr);
} }
LTExpr::Application { typ, head, body } => { LTExpr::Application { region, typ, head, body } => {
for arg in body.iter().rev() { for arg in body.iter().rev() {
self = self.compile(arg); self = self.compile(arg);
} }
self = self.compile(head); self = self.compile(head);
} }
LTExpr::Abstraction { args, body } => { LTExpr::Abstraction { region, args, body } => {
for (region, arg_name, arg_type) in args.iter() { for (region, arg_name, arg_type) in args.iter() {
if let Some(Ok(typeterm)) = arg_type { if let Some(Ok(typeterm)) = arg_type {
let id = self let id = self
@ -265,6 +265,7 @@ impl ProcedureCompiler {
self = self.compile(body); self = self.compile(body);
} }
LTExpr::Branch { LTExpr::Branch {
region,
condition, condition,
if_expr, if_expr,
else_expr, else_expr,
@ -281,12 +282,12 @@ impl ProcedureCompiler {
self.asm = asm; self.asm = asm;
self.asm = self.asm.branch(if_asm, else_asm); self.asm = self.asm.branch(if_asm, else_asm);
} }
LTExpr::Block { statements } => { LTExpr::Block { region, statements } => {
for s in statements.iter() { for s in statements.iter() {
self = self.compile_statement(s, false); self = self.compile_statement(s, false);
} }
} }
LTExpr::ExportBlock{ statements } => { LTExpr::ExportBlock{ region, statements } => {
for s in statements.iter() { for s in statements.iter() {
self = self.compile_statement(s, true); self = self.compile_statement(s, true);
} }

View file

@ -38,15 +38,23 @@ impl SymbolDef {
out_types, out_types,
link_addr: _, link_addr: _,
export: _, export: _,
} => laddertypes::TypeTerm::App(vec![ } => laddertypes::TypeTerm::App(
typectx std::iter::once(
typectx
.write() .write()
.unwrap() .unwrap()
.parse("Fn") .parse("Func")
.expect("parse typeterm"), .expect("parse typeterm")
laddertypes::TypeTerm::App(in_types.clone()), ).chain(
laddertypes::TypeTerm::App(out_types.clone()), in_types.clone().into_iter()
]), ).chain(
std::iter::once(
typectx.write().unwrap().parse("Struct").expect("parse typeterm")
).chain(
out_types.clone().into_iter()
)
).collect()
),
} }
} }
} }
@ -149,7 +157,7 @@ impl Scope {
for (name, def) in self.symbols.iter_mut() { for (name, def) in self.symbols.iter_mut() {
if let Some(offset) = linker.get_link_addr( name ) { if let Some(offset) = linker.get_link_addr( name ) {
match def { match def {
SymbolDef::Procedure { SymbolDef::Procedure {
in_types:_,out_types:_, in_types:_,out_types:_,
link_addr, link_addr,
export:_ export:_
@ -162,7 +170,7 @@ impl Scope {
_ => {} _ => {}
} }
} }
} }
} }