parse type annotations to ast

This commit is contained in:
Michael Sippel 2024-05-12 18:56:10 +02:00
parent f54f630b38
commit 49c72e8930
Signed by: senvas
GPG key ID: F96CF119C34B64A6
6 changed files with 207 additions and 64 deletions

View file

@ -13,6 +13,7 @@ pub enum Statement {
val_expr: LTExpr val_expr: LTExpr
}, },
LetAssign { LetAssign {
typ: Option<TypeTag>,
var_id: String, var_id: String,
val_expr: LTExpr, val_expr: LTExpr,
}, },
@ -24,22 +25,34 @@ pub enum Statement {
Expr(LTExpr) Expr(LTExpr)
} }
#[derive(Clone, Debug)]
pub enum TypeError {
ParseError(laddertypes::parser::ParseError),
Mismatch {
expected: laddertypes::TypeTerm,
received: laddertypes::TypeTerm
}
}
pub type TypeTag = Result< laddertypes::TypeTerm, TypeError >;
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum LTExpr { pub enum LTExpr {
Literal { Literal {
typ: Option< laddertypes::TypeTerm >, typ: Option<TypeTag>,
val: tisc::VM_Word val: tisc::VM_Word
}, },
Symbol { Symbol {
typ: Option< laddertypes::TypeTerm >, typ: Option<TypeTag>,
symbol: String, symbol: String,
}, },
Application { Application {
typ: Option<TypeTag>,
head: Box<LTExpr>, head: Box<LTExpr>,
body: Vec<LTExpr> body: Vec<LTExpr>
}, },
Abstraction { Abstraction {
args: Vec<(String, Option<laddertypes::TypeTerm>)>, args: Vec<(String, Option<TypeTag>)>,
body: Box<LTExpr> body: Box<LTExpr>
}, },
Branch { Branch {
@ -67,18 +80,19 @@ impl LTExpr {
} }
} }
pub fn abstraction(args: Vec<(&str, &str)>, val_expr: LTExpr) -> LTExpr { pub fn abstraction(args: Vec<(&str, &str)>, body: LTExpr) -> LTExpr {
LTExpr::Abstraction { LTExpr::Abstraction {
args: args.into_iter().map(|(arg_name, arg_type)| args: args.into_iter().map(|(arg_name, arg_type)|
( arg_name.into(), None ) ( arg_name.into(), None )
//typectx.write().unwrap().parse(t).expect("parse typeterm") //typectx.write().unwrap().parse(t).expect("parse typeterm")
).collect(), ).collect(),
body: Box::new(val_expr) body: Box::new(body)
} }
} }
pub fn application(head: LTExpr, body: Vec<LTExpr>) -> Self { pub fn application(head: LTExpr, body: Vec<LTExpr>) -> Self {
LTExpr::Application { LTExpr::Application {
typ: None,
head: Box::new( head ), head: Box::new( head ),
body: body body: body
} }

View file

@ -10,7 +10,8 @@ pub enum LTIRToken {
// TripleQuote(String), // TripleQuote(String),
Lambda, Lambda,
AssignType, LambdaBody,
AssignType( String ),
AssignValue, AssignValue,
ExprOpen, ExprOpen,
@ -30,6 +31,7 @@ pub enum LexError {
#[derive(PartialEq, Eq, Clone, Debug)] #[derive(PartialEq, Eq, Clone, Debug)]
pub enum LexerState { pub enum LexerState {
Any, Any,
TypeTerm( String ),
Sym( String ), Sym( String ),
Num( i64 ), Num( i64 ),
Char( Option<char> ) Char( Option<char> )
@ -39,6 +41,7 @@ impl LexerState {
fn into_token(self) -> Option< LTIRToken > { fn into_token(self) -> Option< LTIRToken > {
match self { match self {
LexerState::Any => None, LexerState::Any => None,
LexerState::TypeTerm(s) => Some(LTIRToken::AssignType(s)),
LexerState::Sym(s) => Some(LTIRToken::Symbol(s)), LexerState::Sym(s) => Some(LTIRToken::Symbol(s)),
LexerState::Num(n) => Some(LTIRToken::Num(n)), LexerState::Num(n) => Some(LTIRToken::Num(n)),
LexerState::Char(c) => Some(LTIRToken::Char(c?)) LexerState::Char(c) => Some(LTIRToken::Char(c?))
@ -86,11 +89,15 @@ where It: Iterator<Item = char>
LexerState::Any => { LexerState::Any => {
match c { match c {
'λ' => { self.chars.next(); return Some(Ok(LTIRToken::Lambda)); }, 'λ' => { self.chars.next(); return Some(Ok(LTIRToken::Lambda)); },
'.' => { self.chars.next(); return Some(Ok(LTIRToken::LambdaBody)); },
'(' => { self.chars.next(); return Some(Ok(LTIRToken::ExprOpen)); }, '(' => { self.chars.next(); return Some(Ok(LTIRToken::ExprOpen)); },
')' => { self.chars.next(); return Some(Ok(LTIRToken::ExprClose)); }, ')' => { self.chars.next(); return Some(Ok(LTIRToken::ExprClose)); },
'{' => { self.chars.next(); return Some(Ok(LTIRToken::BlockOpen)); }, '{' => { self.chars.next(); return Some(Ok(LTIRToken::BlockOpen)); },
'}' => { self.chars.next(); return Some(Ok(LTIRToken::BlockClose)); }, '}' => { self.chars.next(); return Some(Ok(LTIRToken::BlockClose)); },
':' => { self.chars.next(); return Some(Ok(LTIRToken::AssignType)); }, ':' => {
self.chars.next();
state = LexerState::TypeTerm(String::new());
},
'=' => { self.chars.next(); return Some(Ok(LTIRToken::AssignValue)); }, '=' => { self.chars.next(); return Some(Ok(LTIRToken::AssignValue)); },
';' => { self.chars.next(); return Some(Ok(LTIRToken::StatementSep)); }, ';' => { self.chars.next(); return Some(Ok(LTIRToken::StatementSep)); },
'\'' => { self.chars.next(); state = LexerState::Char(None); }, '\'' => { self.chars.next(); state = LexerState::Char(None); },
@ -138,12 +145,25 @@ where It: Iterator<Item = char>
} }
} }
LexerState::TypeTerm(s) => {
if *c == '=' || *c == '.' {
if let Some(token) = state.clone().into_token() {
return Some(Ok(token))
}
} else {
if let Some(c) = self.chars.next() {
s.push(c);
}
}
}
_ => { _ => {
if c.is_whitespace() if c.is_whitespace()
|| *c == '(' || *c == ')' || *c == '(' || *c == ')'
|| *c == '{' || *c == '}' || *c == '{' || *c == '}'
|| *c == ';' || *c == '=' || *c == ':' || *c == ';' || *c == '='
|| *c == ':' || *c == '.'
{ {
// finish the current token // finish the current token
@ -183,4 +203,25 @@ where It: Iterator<Item = char>
} }
} }
mod tests {
#[test]
fn test_lexer() {
let mut lexer = crate::lexer::LTIRLexer::from(
"let var1:=123;
let square =λx.* x x;
let sqrt = λx:~machine::Float64~machine::Word.(f64-sqrt x);
let magnitude =
λx:
.λy:
.sqrt (+ (* x x) (* y y));
".chars()
);
for token in lexer {
eprintln!("token = {:?}", token);
}
}
}

View file

@ -21,6 +21,7 @@ fn compile(scope: &Arc<RwLock<Scope>>, name: &str, source: &str) -> Vec< tisc::a
ProcedureCompiler::new(scope) ProcedureCompiler::new(scope)
.compile( .compile(
&parser::parse_expr( &parser::parse_expr(
&scope.read().unwrap().typectx,
&mut lexer::LTIRLexer::from( &mut lexer::LTIRLexer::from(
source.chars().peekable() source.chars().peekable()
).peekable() ).peekable()
@ -63,16 +64,30 @@ fn main() {
linker.add_procedure("main", compile(&main_scope, linker.add_procedure("main", compile(&main_scope,
"main", "main",
"{ "{
let print-nullterm = λstr { let print-nullterm =
λ str : <Ref <Seq Char~Ascii~machine::Word>>
~ <Ref <NullTerminatedArray machine::Word>>
~ machine::Address
~ machine::Word
.
{
while (@ str) { while (@ str) {
emit (@ str); emit (@ str);
! str (i+ str 1); ! str (i+ str 1);
} }
}; };
let print-lenprefix = λstr { let print-len =
let len = (@ str); λ len : _2^64
! str (i+ str 1); ~ machine::UInt64
~ machine::Word
.
λ str : <Ref <Seq Char~Ascii~machine::Word>>
~ <Ref <Array machine::Word>>
~ machine::Address
~ machine::Word
.
{
let end = (i+ str len); let end = (i+ str len);
while (i- str end) { while (i- str end) {
emit (@ str); emit (@ str);
@ -80,23 +95,50 @@ fn main() {
} }
}; };
let hello = λ{ let print-lenprefix =
λ str : <Ref <Seq Char~Ascii~machine::Word>>
~ <Ref <LenPrefixArray machine::Word>>
~ <Ref <Struct
<len _2^64
~machine::UInt64
~machine::Word>
<data <Array machine::Word>>
>>
~ machine::Address
~ machine::Word
.
{
let len = (@ str);
! str (i+ str 1);
print-len len str;
};
let hello = λ.{
print-nullterm hello-string; print-nullterm hello-string;
print-lenprefix pfxstr; print-lenprefix pfxstr;
};
hello; let isquare = λx:. i* x x;
let imagnitude2 = λx:.λy:. i+ (isquare x) (isquare y);
let factorial = λn:.
if( n ){ i* n (factorial (i- n 1)); }
else { 1; };
let isquare = λx (i* x x); factorial 20;
let magnitude2 = λx y {
i+ (isquare x) (isquare y); if ( i- (imagnitude2 10 20) 500 ) {
emit '?';
} else {
emit '!';
}; };
magnitude2 8 16;
emit '\n'; emit '\n';
emit (i+ '0' (isquare 3)); emit (i+ '0' (isquare 3));
emit '\n'; emit '\n';
}")); };
hello;
}"
));
linker.add_static("hello-string", linker.add_static("hello-string",
"Hallo Welt!\n\0" "Hallo Welt!\n\0"

View file

@ -1,8 +1,11 @@
use { use {
std::iter::Peekable, std::{
iter::Peekable,
sync::{Arc, RwLock}
},
crate::{ crate::{
lexer::{LTIRLexer, LTIRToken, LexError}, lexer::{LTIRLexer, LTIRToken, LexError},
expr::{LTExpr, Statement} expr::{LTExpr, Statement, TypeTag, TypeError}
} }
}; };
@ -46,7 +49,34 @@ where It: Iterator<Item = char>
} }
} }
pub fn parse_type_tag<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<LTIRLexer<It>>
) -> Option<TypeTag>
where It: Iterator<Item = char>
{
if let Some(peektok) = tokens.peek().clone() {
match peektok.clone() {
Ok(LTIRToken::AssignType(typeterm_str)) => {
tokens.next();
match typectx.write().unwrap().parse(typeterm_str.as_str()) {
Ok(typeterm) => {
Some(Ok(typeterm))
}
Err(parse_error) => {
Some(Err(TypeError::ParseError(parse_error)))
}
}
}
_ => None
}
} else {
None
}
}
pub fn parse_statement<It>( pub fn parse_statement<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<LTIRLexer<It>> tokens: &mut Peekable<LTIRLexer<It>>
) -> Result< crate::expr::Statement, ParseError > ) -> Result< crate::expr::Statement, ParseError >
where It: Iterator<Item = char> where It: Iterator<Item = char>
@ -59,7 +89,7 @@ where It: Iterator<Item = char>
tokens.next(); tokens.next();
// todo accept address-expression instead of symbol // todo accept address-expression instead of symbol
let name = parse_symbol(tokens)?; let name = parse_symbol(tokens)?;
let val_expr = parse_expr(tokens)?; let val_expr = parse_expr(typectx, tokens)?;
let _ = parse_expect(tokens, LTIRToken::StatementSep)?; let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
Ok(Statement::Assignment { Ok(Statement::Assignment {
@ -70,11 +100,13 @@ where It: Iterator<Item = char>
"let" => { "let" => {
tokens.next(); tokens.next();
let name = parse_symbol(tokens)?; let name = parse_symbol(tokens)?;
let typ = parse_type_tag(typectx, tokens);
let _ = parse_expect(tokens, LTIRToken::AssignValue); let _ = parse_expect(tokens, LTIRToken::AssignValue);
let val_expr = parse_expr(tokens)?; let val_expr = parse_expr(typectx, tokens)?;
let _ = parse_expect(tokens, LTIRToken::StatementSep)?; let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
Ok(Statement::LetAssign { Ok(Statement::LetAssign {
typ,
var_id: name, var_id: name,
val_expr val_expr
}) })
@ -82,28 +114,28 @@ where It: Iterator<Item = char>
"while" => { "while" => {
tokens.next(); tokens.next();
let _ = parse_expect(tokens, LTIRToken::ExprOpen)?; let _ = parse_expect(tokens, LTIRToken::ExprOpen)?;
let cond = parse_expr(tokens)?; let cond = parse_expr(typectx, tokens)?;
let _ = parse_expect(tokens, LTIRToken::ExprClose)?; let _ = parse_expect(tokens, LTIRToken::ExprClose)?;
Ok(Statement::WhileLoop { Ok(Statement::WhileLoop {
condition: cond, condition: cond,
body: parse_block(tokens)? body: parse_block(typectx, tokens)?
}) })
} }
"return" => { "return" => {
tokens.next(); tokens.next();
let expr = parse_expr(tokens)?; let expr = parse_expr(typectx, tokens)?;
let _ = parse_expect(tokens, LTIRToken::StatementSep)?; let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
Ok(Statement::Return(parse_expr(tokens)?)) Ok(Statement::Return(parse_expr(typectx, tokens)?))
} }
_ => { _ => {
let expr = parse_expr(tokens)?; let expr = parse_expr(typectx, tokens)?;
let _ = parse_expect(tokens, LTIRToken::StatementSep)?; let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
Ok(Statement::Expr(expr)) Ok(Statement::Expr(expr))
} }
} }
} }
Ok(_) => { Ok(_) => {
let expr = parse_expr(tokens)?; let expr = parse_expr(typectx, tokens)?;
let _ = parse_expect(tokens, LTIRToken::StatementSep)?; let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
Ok(Statement::Expr(expr)) Ok(Statement::Expr(expr))
}, },
@ -115,6 +147,7 @@ where It: Iterator<Item = char>
} }
pub fn parse_block<It>( pub fn parse_block<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<LTIRLexer<It>> tokens: &mut Peekable<LTIRLexer<It>>
) -> Result< Vec<Statement>, ParseError > ) -> Result< Vec<Statement>, ParseError >
where It: Iterator<Item = char> where It: Iterator<Item = char>
@ -128,7 +161,7 @@ where It: Iterator<Item = char>
tokens.next(); tokens.next();
return Ok(statements) return Ok(statements)
} }
Ok(_) => { statements.push( parse_statement(tokens)? ); } Ok(_) => { statements.push( parse_statement(typectx, tokens)? ); }
Err(err) => { return Err(ParseError::LexError(err.clone())); } Err(err) => { return Err(ParseError::LexError(err.clone())); }
} }
} }
@ -164,6 +197,7 @@ where It: Iterator<Item = char>
} }
pub fn parse_expr<It>( pub fn parse_expr<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<LTIRLexer<It>> tokens: &mut Peekable<LTIRLexer<It>>
) -> Result< crate::expr::LTExpr, ParseError > ) -> Result< crate::expr::LTExpr, ParseError >
where It: Iterator<Item = char> where It: Iterator<Item = char>
@ -178,9 +212,14 @@ where It: Iterator<Item = char>
let mut args = Vec::new(); let mut args = Vec::new();
while let Some(Ok(LTIRToken::Symbol(_))) = tokens.peek() { while let Some(Ok(LTIRToken::Symbol(_))) = tokens.peek() {
args.push((parse_symbol(tokens)?, None)); args.push((
parse_symbol(tokens)?,
parse_type_tag(typectx, tokens)
));
} }
let body = parse_expr(tokens)?;
let _ = parse_expect(tokens, LTIRToken::LambdaBody);
let body = parse_expr(typectx, tokens)?;
return Ok(LTExpr::Abstraction{ return Ok(LTExpr::Abstraction{
args, args,
@ -200,11 +239,13 @@ where It: Iterator<Item = char>
} }
_ => {} _ => {}
} }
children.push(parse_expr(tokens)?); children.push(parse_expr(typectx, tokens)?);
} }
}, },
Ok(LTIRToken::ExprClose) => { break; } Ok(LTIRToken::ExprClose) => { break; }
Ok(LTIRToken::BlockOpen) => { children.push( LTExpr::block(parse_block(tokens)?)); } Ok(LTIRToken::BlockOpen) => {
children.push( LTExpr::block(parse_block(typectx, tokens)?));
}
Ok(LTIRToken::BlockClose) => { break; } Ok(LTIRToken::BlockClose) => { break; }
Ok(LTIRToken::StatementSep) => { break; } Ok(LTIRToken::StatementSep) => { break; }
Ok(LTIRToken::Symbol(name)) => { Ok(LTIRToken::Symbol(name)) => {
@ -212,16 +253,16 @@ where It: Iterator<Item = char>
"if" => { "if" => {
tokens.next(); tokens.next();
let _ = parse_expect(tokens, LTIRToken::ExprOpen)?; let _ = parse_expect(tokens, LTIRToken::ExprOpen)?;
let cond = parse_expr(tokens)?; let cond = parse_expr(typectx, tokens)?;
let _ = parse_expect(tokens, LTIRToken::ExprClose)?; let _ = parse_expect(tokens, LTIRToken::ExprClose)?;
let if_expr = LTExpr::block(parse_block(tokens)?); let if_expr = LTExpr::block(parse_block(typectx, tokens)?);
let mut else_expr = LTExpr::block(vec![]); let mut else_expr = LTExpr::block(vec![]);
if let Some(peektok) = tokens.peek() { if let Some(peektok) = tokens.peek() {
if let Ok(LTIRToken::Symbol(name)) = peektok { if let Ok(LTIRToken::Symbol(name)) = peektok {
if name == "else" { if name == "else" {
tokens.next(); tokens.next();
else_expr = parse_expr(tokens)?; else_expr = parse_expr(typectx, tokens)?;
} }
} }
} }
@ -245,6 +286,7 @@ where It: Iterator<Item = char>
if children.len() > 0 { if children.len() > 0 {
let head = children.remove(0); let head = children.remove(0);
Ok(LTExpr::Application { Ok(LTExpr::Application {
typ: None,
head: Box::new(head), head: Box::new(head),
body: children body: children
}) })

View file

@ -98,7 +98,7 @@ impl ProcedureCompiler {
} }
} }
} }
Statement::LetAssign{ var_id, val_expr } => { Statement::LetAssign{ typ, var_id, val_expr } => {
match val_expr { match val_expr {
LTExpr::Abstraction { args:_, body:_ } => { LTExpr::Abstraction { args:_, body:_ } => {
self.symbols.write().unwrap() self.symbols.write().unwrap()
@ -180,7 +180,7 @@ impl ProcedureCompiler {
LTExpr::Literal { typ, val } => { LTExpr::Literal { typ, val } => {
self.asm = self.asm.lit( *val ); self.asm = self.asm.lit( *val );
} }
LTExpr::Application { head, body } => { LTExpr::Application { typ, head, body } => {
for arg in body.iter().rev() { for arg in body.iter().rev() {
self = self.compile(arg); self = self.compile(arg);
} }
@ -188,16 +188,19 @@ impl ProcedureCompiler {
} }
LTExpr::Abstraction { args, body } => { LTExpr::Abstraction { args, body } => {
for (arg_name, arg_type) in args.iter() { for (arg_name, arg_type) in args.iter() {
if let Some(Ok(typeterm)) = arg_type {
let id = self.symbols let id = self.symbols
.write().unwrap() .write().unwrap()
.declare_var( .declare_var(
arg_name.clone(), arg_name.clone(),
arg_type.clone().unwrap_or( typeterm.clone()
laddertypes::TypeTerm::unit())
); );
self.asm = self.asm self.asm = self.asm
.lit( id ) .lit( id )
.call("data-frame-set"); .call("data-frame-set");
} else {
eprintln!("invalid type {:?} for argument {}", arg_type, arg_name);
}
} }
self = self.compile(body); self = self.compile(body);
} }

View file

@ -89,6 +89,7 @@ pub fn init_runtime(linker: &mut Linker) -> Arc<RwLock<Scope>> {
linker.add_procedure( linker.add_procedure(
"i-", "i-",
tisc::Assembler::new() tisc::Assembler::new()
.inst(tisc::VM_Instruction::Swap)
.inst(tisc::VM_Instruction::BitwiseNot) .inst(tisc::VM_Instruction::BitwiseNot)
.lit(1) .lit(1)
.inst(tisc::VM_Instruction::Add) .inst(tisc::VM_Instruction::Add)
@ -174,6 +175,7 @@ pub fn init_runtime(linker: &mut Linker) -> Arc<RwLock<Scope>> {
.lit( 1 ) .lit( 1 )
// [ a sum b -- a sum (b-1) ] // [ a sum b -- a sum (b-1) ]
.inst( tisc::VM_Instruction::Swap )
.call( "i-" ) .call( "i-" )
// [ a sum b -- a b sum ] // [ a sum b -- a b sum ]
@ -245,7 +247,6 @@ pub fn init_runtime(linker: &mut Linker) -> Arc<RwLock<Scope>> {
tisc::Assembler::new() tisc::Assembler::new()
.static_ref("data-frame-ptr") .static_ref("data-frame-ptr")
.inst( tisc::VM_Instruction::Fetch ) .inst( tisc::VM_Instruction::Fetch )
.inst( tisc::VM_Instruction::Swap )
.call("i-") .call("i-")
.static_ref("data-frame-ptr") .static_ref("data-frame-ptr")
.inst( tisc::VM_Instruction::Store ) .inst( tisc::VM_Instruction::Store )