parse type annotations to ast

This commit is contained in:
Michael Sippel 2024-05-12 18:56:10 +02:00
parent f54f630b38
commit 49c72e8930
Signed by: senvas
GPG key ID: F96CF119C34B64A6
6 changed files with 207 additions and 64 deletions

View file

@ -13,6 +13,7 @@ pub enum Statement {
val_expr: LTExpr
},
LetAssign {
typ: Option<TypeTag>,
var_id: String,
val_expr: LTExpr,
},
@ -24,22 +25,34 @@ pub enum Statement {
Expr(LTExpr)
}
#[derive(Clone, Debug)]
pub enum TypeError {
ParseError(laddertypes::parser::ParseError),
Mismatch {
expected: laddertypes::TypeTerm,
received: laddertypes::TypeTerm
}
}
pub type TypeTag = Result< laddertypes::TypeTerm, TypeError >;
#[derive(Clone, Debug)]
pub enum LTExpr {
Literal {
typ: Option< laddertypes::TypeTerm >,
typ: Option<TypeTag>,
val: tisc::VM_Word
},
Symbol {
typ: Option< laddertypes::TypeTerm >,
typ: Option<TypeTag>,
symbol: String,
},
Application {
typ: Option<TypeTag>,
head: Box<LTExpr>,
body: Vec<LTExpr>
},
Abstraction {
args: Vec<(String, Option<laddertypes::TypeTerm>)>,
args: Vec<(String, Option<TypeTag>)>,
body: Box<LTExpr>
},
Branch {
@ -67,18 +80,19 @@ impl LTExpr {
}
}
pub fn abstraction(args: Vec<(&str, &str)>, val_expr: LTExpr) -> LTExpr {
pub fn abstraction(args: Vec<(&str, &str)>, body: LTExpr) -> LTExpr {
LTExpr::Abstraction {
args: args.into_iter().map(|(arg_name, arg_type)|
( arg_name.into(), None )
//typectx.write().unwrap().parse(t).expect("parse typeterm")
).collect(),
body: Box::new(val_expr)
body: Box::new(body)
}
}
pub fn application(head: LTExpr, body: Vec<LTExpr>) -> Self {
LTExpr::Application {
typ: None,
head: Box::new( head ),
body: body
}

View file

@ -10,7 +10,8 @@ pub enum LTIRToken {
// TripleQuote(String),
Lambda,
AssignType,
LambdaBody,
AssignType( String ),
AssignValue,
ExprOpen,
@ -30,6 +31,7 @@ pub enum LexError {
#[derive(PartialEq, Eq, Clone, Debug)]
pub enum LexerState {
Any,
TypeTerm( String ),
Sym( String ),
Num( i64 ),
Char( Option<char> )
@ -39,6 +41,7 @@ impl LexerState {
fn into_token(self) -> Option< LTIRToken > {
match self {
LexerState::Any => None,
LexerState::TypeTerm(s) => Some(LTIRToken::AssignType(s)),
LexerState::Sym(s) => Some(LTIRToken::Symbol(s)),
LexerState::Num(n) => Some(LTIRToken::Num(n)),
LexerState::Char(c) => Some(LTIRToken::Char(c?))
@ -86,11 +89,15 @@ where It: Iterator<Item = char>
LexerState::Any => {
match c {
'λ' => { self.chars.next(); return Some(Ok(LTIRToken::Lambda)); },
'.' => { self.chars.next(); return Some(Ok(LTIRToken::LambdaBody)); },
'(' => { self.chars.next(); return Some(Ok(LTIRToken::ExprOpen)); },
')' => { self.chars.next(); return Some(Ok(LTIRToken::ExprClose)); },
'{' => { self.chars.next(); return Some(Ok(LTIRToken::BlockOpen)); },
'}' => { self.chars.next(); return Some(Ok(LTIRToken::BlockClose)); },
':' => { self.chars.next(); return Some(Ok(LTIRToken::AssignType)); },
':' => {
self.chars.next();
state = LexerState::TypeTerm(String::new());
},
'=' => { self.chars.next(); return Some(Ok(LTIRToken::AssignValue)); },
';' => { self.chars.next(); return Some(Ok(LTIRToken::StatementSep)); },
'\'' => { self.chars.next(); state = LexerState::Char(None); },
@ -138,12 +145,25 @@ where It: Iterator<Item = char>
}
}
LexerState::TypeTerm(s) => {
if *c == '=' || *c == '.' {
if let Some(token) = state.clone().into_token() {
return Some(Ok(token))
}
} else {
if let Some(c) = self.chars.next() {
s.push(c);
}
}
}
_ => {
if c.is_whitespace()
|| *c == '(' || *c == ')'
|| *c == '{' || *c == '}'
|| *c == ';' || *c == '=' || *c == ':'
|| *c == ';' || *c == '='
|| *c == ':' || *c == '.'
{
// finish the current token
@ -183,4 +203,25 @@ where It: Iterator<Item = char>
}
}
mod tests {
#[test]
fn test_lexer() {
let mut lexer = crate::lexer::LTIRLexer::from(
"let var1:=123;
let square =λx.* x x;
let sqrt = λx:~machine::Float64~machine::Word.(f64-sqrt x);
let magnitude =
λx:
.λy:
.sqrt (+ (* x x) (* y y));
".chars()
);
for token in lexer {
eprintln!("token = {:?}", token);
}
}
}

View file

@ -21,6 +21,7 @@ fn compile(scope: &Arc<RwLock<Scope>>, name: &str, source: &str) -> Vec< tisc::a
ProcedureCompiler::new(scope)
.compile(
&parser::parse_expr(
&scope.read().unwrap().typectx,
&mut lexer::LTIRLexer::from(
source.chars().peekable()
).peekable()
@ -63,40 +64,81 @@ fn main() {
linker.add_procedure("main", compile(&main_scope,
"main",
"{
let print-nullterm = λstr {
while (@ str) {
emit (@ str);
! str (i+ str 1);
}
};
let print-nullterm =
λ str : <Ref <Seq Char~Ascii~machine::Word>>
~ <Ref <NullTerminatedArray machine::Word>>
~ machine::Address
~ machine::Word
.
{
while (@ str) {
emit (@ str);
! str (i+ str 1);
}
};
let print-lenprefix = λstr {
let print-len =
λ len : _2^64
~ machine::UInt64
~ machine::Word
.
λ str : <Ref <Seq Char~Ascii~machine::Word>>
~ <Ref <Array machine::Word>>
~ machine::Address
~ machine::Word
.
{
let end = (i+ str len);
while (i- str end) {
emit (@ str);
! str (i+ str 1);
}
};
let print-lenprefix =
λ str : <Ref <Seq Char~Ascii~machine::Word>>
~ <Ref <LenPrefixArray machine::Word>>
~ <Ref <Struct
<len _2^64
~machine::UInt64
~machine::Word>
<data <Array machine::Word>>
>>
~ machine::Address
~ machine::Word
.
{
let len = (@ str);
! str (i+ str 1);
let end = (i+ str len);
while (i- str end) {
emit (@ str);
! str (i+ str 1);
}
print-len len str;
};
let hello = λ{
let hello = λ.{
print-nullterm hello-string;
print-lenprefix pfxstr;
let isquare = λx:. i* x x;
let imagnitude2 = λx:.λy:. i+ (isquare x) (isquare y);
let factorial = λn:.
if( n ){ i* n (factorial (i- n 1)); }
else { 1; };
factorial 20;
if ( i- (imagnitude2 10 20) 500 ) {
emit '?';
} else {
emit '!';
};
emit '\n';
emit (i+ '0' (isquare 3));
emit '\n';
};
hello;
let isquare = λx (i* x x);
let magnitude2 = λx y {
i+ (isquare x) (isquare y);
};
magnitude2 8 16;
emit '\n';
emit (i+ '0' (isquare 3));
emit '\n';
}"));
}"
));
linker.add_static("hello-string",
"Hallo Welt!\n\0"

View file

@ -1,8 +1,11 @@
use {
std::iter::Peekable,
std::{
iter::Peekable,
sync::{Arc, RwLock}
},
crate::{
lexer::{LTIRLexer, LTIRToken, LexError},
expr::{LTExpr, Statement}
expr::{LTExpr, Statement, TypeTag, TypeError}
}
};
@ -46,7 +49,34 @@ where It: Iterator<Item = char>
}
}
pub fn parse_type_tag<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<LTIRLexer<It>>
) -> Option<TypeTag>
where It: Iterator<Item = char>
{
if let Some(peektok) = tokens.peek().clone() {
match peektok.clone() {
Ok(LTIRToken::AssignType(typeterm_str)) => {
tokens.next();
match typectx.write().unwrap().parse(typeterm_str.as_str()) {
Ok(typeterm) => {
Some(Ok(typeterm))
}
Err(parse_error) => {
Some(Err(TypeError::ParseError(parse_error)))
}
}
}
_ => None
}
} else {
None
}
}
pub fn parse_statement<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<LTIRLexer<It>>
) -> Result< crate::expr::Statement, ParseError >
where It: Iterator<Item = char>
@ -59,7 +89,7 @@ where It: Iterator<Item = char>
tokens.next();
// todo accept address-expression instead of symbol
let name = parse_symbol(tokens)?;
let val_expr = parse_expr(tokens)?;
let val_expr = parse_expr(typectx, tokens)?;
let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
Ok(Statement::Assignment {
@ -70,11 +100,13 @@ where It: Iterator<Item = char>
"let" => {
tokens.next();
let name = parse_symbol(tokens)?;
let typ = parse_type_tag(typectx, tokens);
let _ = parse_expect(tokens, LTIRToken::AssignValue);
let val_expr = parse_expr(tokens)?;
let val_expr = parse_expr(typectx, tokens)?;
let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
Ok(Statement::LetAssign {
typ,
var_id: name,
val_expr
})
@ -82,28 +114,28 @@ where It: Iterator<Item = char>
"while" => {
tokens.next();
let _ = parse_expect(tokens, LTIRToken::ExprOpen)?;
let cond = parse_expr(tokens)?;
let cond = parse_expr(typectx, tokens)?;
let _ = parse_expect(tokens, LTIRToken::ExprClose)?;
Ok(Statement::WhileLoop {
condition: cond,
body: parse_block(tokens)?
body: parse_block(typectx, tokens)?
})
}
"return" => {
tokens.next();
let expr = parse_expr(tokens)?;
let expr = parse_expr(typectx, tokens)?;
let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
Ok(Statement::Return(parse_expr(tokens)?))
Ok(Statement::Return(parse_expr(typectx, tokens)?))
}
_ => {
let expr = parse_expr(tokens)?;
let expr = parse_expr(typectx, tokens)?;
let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
Ok(Statement::Expr(expr))
}
}
}
Ok(_) => {
let expr = parse_expr(tokens)?;
let expr = parse_expr(typectx, tokens)?;
let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
Ok(Statement::Expr(expr))
},
@ -115,6 +147,7 @@ where It: Iterator<Item = char>
}
pub fn parse_block<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<LTIRLexer<It>>
) -> Result< Vec<Statement>, ParseError >
where It: Iterator<Item = char>
@ -128,7 +161,7 @@ where It: Iterator<Item = char>
tokens.next();
return Ok(statements)
}
Ok(_) => { statements.push( parse_statement(tokens)? ); }
Ok(_) => { statements.push( parse_statement(typectx, tokens)? ); }
Err(err) => { return Err(ParseError::LexError(err.clone())); }
}
}
@ -164,6 +197,7 @@ where It: Iterator<Item = char>
}
pub fn parse_expr<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<LTIRLexer<It>>
) -> Result< crate::expr::LTExpr, ParseError >
where It: Iterator<Item = char>
@ -178,9 +212,14 @@ where It: Iterator<Item = char>
let mut args = Vec::new();
while let Some(Ok(LTIRToken::Symbol(_))) = tokens.peek() {
args.push((parse_symbol(tokens)?, None));
args.push((
parse_symbol(tokens)?,
parse_type_tag(typectx, tokens)
));
}
let body = parse_expr(tokens)?;
let _ = parse_expect(tokens, LTIRToken::LambdaBody);
let body = parse_expr(typectx, tokens)?;
return Ok(LTExpr::Abstraction{
args,
@ -200,11 +239,13 @@ where It: Iterator<Item = char>
}
_ => {}
}
children.push(parse_expr(tokens)?);
children.push(parse_expr(typectx, tokens)?);
}
},
Ok(LTIRToken::ExprClose) => { break; }
Ok(LTIRToken::BlockOpen) => { children.push( LTExpr::block(parse_block(tokens)?)); }
Ok(LTIRToken::BlockOpen) => {
children.push( LTExpr::block(parse_block(typectx, tokens)?));
}
Ok(LTIRToken::BlockClose) => { break; }
Ok(LTIRToken::StatementSep) => { break; }
Ok(LTIRToken::Symbol(name)) => {
@ -212,16 +253,16 @@ where It: Iterator<Item = char>
"if" => {
tokens.next();
let _ = parse_expect(tokens, LTIRToken::ExprOpen)?;
let cond = parse_expr(tokens)?;
let cond = parse_expr(typectx, tokens)?;
let _ = parse_expect(tokens, LTIRToken::ExprClose)?;
let if_expr = LTExpr::block(parse_block(tokens)?);
let if_expr = LTExpr::block(parse_block(typectx, tokens)?);
let mut else_expr = LTExpr::block(vec![]);
if let Some(peektok) = tokens.peek() {
if let Ok(LTIRToken::Symbol(name)) = peektok {
if name == "else" {
tokens.next();
else_expr = parse_expr(tokens)?;
else_expr = parse_expr(typectx, tokens)?;
}
}
}
@ -245,6 +286,7 @@ where It: Iterator<Item = char>
if children.len() > 0 {
let head = children.remove(0);
Ok(LTExpr::Application {
typ: None,
head: Box::new(head),
body: children
})

View file

@ -98,7 +98,7 @@ impl ProcedureCompiler {
}
}
}
Statement::LetAssign{ var_id, val_expr } => {
Statement::LetAssign{ typ, var_id, val_expr } => {
match val_expr {
LTExpr::Abstraction { args:_, body:_ } => {
self.symbols.write().unwrap()
@ -180,7 +180,7 @@ impl ProcedureCompiler {
LTExpr::Literal { typ, val } => {
self.asm = self.asm.lit( *val );
}
LTExpr::Application { head, body } => {
LTExpr::Application { typ, head, body } => {
for arg in body.iter().rev() {
self = self.compile(arg);
}
@ -188,16 +188,19 @@ impl ProcedureCompiler {
}
LTExpr::Abstraction { args, body } => {
for (arg_name, arg_type) in args.iter() {
let id = self.symbols
.write().unwrap()
.declare_var(
arg_name.clone(),
arg_type.clone().unwrap_or(
laddertypes::TypeTerm::unit())
);
self.asm = self.asm
.lit( id )
.call("data-frame-set");
if let Some(Ok(typeterm)) = arg_type {
let id = self.symbols
.write().unwrap()
.declare_var(
arg_name.clone(),
typeterm.clone()
);
self.asm = self.asm
.lit( id )
.call("data-frame-set");
} else {
eprintln!("invalid type {:?} for argument {}", arg_type, arg_name);
}
}
self = self.compile(body);
}

View file

@ -89,6 +89,7 @@ pub fn init_runtime(linker: &mut Linker) -> Arc<RwLock<Scope>> {
linker.add_procedure(
"i-",
tisc::Assembler::new()
.inst(tisc::VM_Instruction::Swap)
.inst(tisc::VM_Instruction::BitwiseNot)
.lit(1)
.inst(tisc::VM_Instruction::Add)
@ -174,6 +175,7 @@ pub fn init_runtime(linker: &mut Linker) -> Arc<RwLock<Scope>> {
.lit( 1 )
// [ a sum b -- a sum (b-1) ]
.inst( tisc::VM_Instruction::Swap )
.call( "i-" )
// [ a sum b -- a b sum ]
@ -245,7 +247,6 @@ pub fn init_runtime(linker: &mut Linker) -> Arc<RwLock<Scope>> {
tisc::Assembler::new()
.static_ref("data-frame-ptr")
.inst( tisc::VM_Instruction::Fetch )
.inst( tisc::VM_Instruction::Swap )
.call("i-")
.static_ref("data-frame-ptr")
.inst( tisc::VM_Instruction::Store )