first basic parser

This commit is contained in:
Michael Sippel 2024-05-11 00:00:20 +02:00
parent ebc5f720bf
commit d7c06d423e
Signed by: senvas
GPG key ID: F96CF119C34B64A6
6 changed files with 251 additions and 134 deletions

View file

@ -23,11 +23,11 @@ pub enum Statement {
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum LTExpr { pub enum LTExpr {
Literal { Literal {
typ: laddertypes::TypeTerm, typ: Option< laddertypes::TypeTerm >,
val: tisc::VM_Word val: tisc::VM_Word
}, },
Symbol { Symbol {
typ: laddertypes::TypeTerm, typ: Option< laddertypes::TypeTerm >,
symbol: String, symbol: String,
}, },
Application { Application {
@ -36,7 +36,7 @@ pub enum LTExpr {
}, },
Abstraction { Abstraction {
arg_id: String, arg_id: String,
arg_type: laddertypes::TypeTerm, arg_type: Option< laddertypes::TypeTerm >,
val_expr: Box<LTExpr> val_expr: Box<LTExpr>
}, },
Let { Let {
@ -55,29 +55,29 @@ pub enum LTExpr {
} }
impl LTExpr { impl LTExpr {
pub fn symbol(typectx: &Arc<RwLock<laddertypes::TypeDict>>, str: &str) -> Self { pub fn symbol(str: &str) -> Self {
LTExpr::Symbol { LTExpr::Symbol {
typ: typectx.write().unwrap().parse("<Ref memory::Word>~Symbol~<Seq Char>").expect("parse typeterm"), typ: None,//typectx.write().unwrap().parse("<Ref memory::Word>~Symbol~<Seq Char>").expect("parse typeterm"),
symbol: String::from(str) symbol: String::from(str)
} }
} }
pub fn lit_uint(typectx: &Arc<RwLock<laddertypes::TypeDict>>, val: u64) -> Self { pub fn lit_uint(val: u64) -> Self {
LTExpr::Literal { LTExpr::Literal {
typ: typectx.write().unwrap().parse("_2^64~machine::UInt64~machine::Word").expect("parse typeterm"), typ: None,//typectx.write().unwrap().parse("_2^64~machine::UInt64~machine::Word").expect("parse typeterm"),
val: val as tisc::VM_Word val: val as tisc::VM_Word
} }
} }
pub fn abstraction(typectx: &Arc<RwLock<laddertypes::TypeDict>>, arg_id: &str, arg_typ: &str, val_expr: LTExpr) -> LTExpr { pub fn abstraction(arg_id: &str, arg_typ: &str, val_expr: LTExpr) -> LTExpr {
LTExpr::Abstraction { LTExpr::Abstraction {
arg_id: String::from(arg_id), arg_id: String::from(arg_id),
arg_type: typectx.write().unwrap().parse(arg_typ).expect("parse typeterm"), arg_type: None,//typectx.write().unwrap().parse(arg_typ).expect("parse typeterm"),
val_expr: Box::new(val_expr) val_expr: Box::new(val_expr)
} }
} }
pub fn let_expr(typectx: &Arc<RwLock<laddertypes::TypeDict>>, name: &str, val: LTExpr, body: LTExpr) -> Self { pub fn let_expr(name: &str, val: LTExpr, body: LTExpr) -> Self {
LTExpr::Let { LTExpr::Let {
name: String::from(name), name: String::from(name),
val: Box::new(val), val: Box::new(val),

View file

@ -9,6 +9,8 @@ pub enum LTIRToken {
// DoubleQuote(String), // DoubleQuote(String),
// TripleQuote(String), // TripleQuote(String),
Lambda,
ExprOpen, ExprOpen,
ExprClose, ExprClose,
@ -85,6 +87,7 @@ where It: Iterator<Item = char>
')' => { self.chars.next(); return Some(Ok(LTIRToken::ExprClose)); }, ')' => { self.chars.next(); return Some(Ok(LTIRToken::ExprClose)); },
'{' => { self.chars.next(); return Some(Ok(LTIRToken::BlockOpen)); }, '{' => { self.chars.next(); return Some(Ok(LTIRToken::BlockOpen)); },
'}' => { self.chars.next(); return Some(Ok(LTIRToken::BlockClose)); }, '}' => { self.chars.next(); return Some(Ok(LTIRToken::BlockClose)); },
'λ' => { self.chars.next(); return Some(Ok(LTIRToken::Lambda)); },
';' => { self.chars.next(); return Some(Ok(LTIRToken::StatementSep)); }, ';' => { self.chars.next(); return Some(Ok(LTIRToken::StatementSep)); },
'\'' => { self.chars.next(); state = LexerState::Char(None); }, '\'' => { self.chars.next(); state = LexerState::Char(None); },
c => { c => {

View file

@ -17,9 +17,18 @@ use crate::{
procedure_compiler::ProcedureCompiler procedure_compiler::ProcedureCompiler
}; };
fn compile(scope: &Arc<RwLock<Scope>>, source: &str) -> Vec< tisc::assembler::AssemblyWord > {
/* ProcedureCompiler::new(scope)
*/ .compile(
&parser::parse_expr(
&mut lexer::LTIRLexer::from(
source.chars().peekable()
).peekable()
).expect("syntax error")
)
.into_asm()
.build()
}
fn main() { fn main() {
// create virtual machine with 4096 words of memory // create virtual machine with 4096 words of memory
@ -28,28 +37,8 @@ fn main() {
let mut linker = tisc::Linker::new(); let mut linker = tisc::Linker::new();
let root_scope = crate::runtime::init_runtime(&mut linker); let root_scope = crate::runtime::init_runtime(&mut linker);
let main_scope = Scope::with_parent(&root_scope); let main_scope = Scope::with_parent(&root_scope);
let typectx = main_scope.read().unwrap().typectx.clone(); let typectx = main_scope.read().unwrap().typectx.clone();
let mut lexer = lexer::LTIRLexer::from(
"{
emit '*';
let x : _2^64~machine::UInt64~machine::Word = 8;
while (i< x 7) {
= x (i+ x 1);
}
let square = (
(λ x (i* x x))
)
}".chars()
);
let block = parser::parse_block( &mut lexer.peekable() );
eprintln!("parsed block = {:?}", block);
/* define type of the symbol /* define type of the symbol
*/ */
main_scope.write().unwrap() main_scope.write().unwrap()
@ -64,68 +53,21 @@ fn main() {
main_scope.write().unwrap() main_scope.write().unwrap()
.declare_proc_parse( .declare_proc_parse(
"print-nullterm", "print-nullterm",
vec![], vec![],
vec![ vec![
"<Ref <Seq Char~Ascii~machine::Word>~<NullTerminatedSeq machine::Word>> "<Ref <Seq Char~Ascii~machine::Word>~<NullTerminatedSeq machine::Word>>
~machine::Address ~machine::Address
~machine::Word" ~machine::Word"
], ],
vec![]); vec![]);
/* link assembly-program to symbol /* link assembly-program to symbol
*/ */
linker.add_procedure( linker.add_procedure(
"main", "main",
ProcedureCompiler::new(&main_scope) compile(&main_scope, "{
.compile( print-nullterm hello-string;
&LTExpr::block(vec![ }"));
Statement::Expr(
LTExpr::application(
LTExpr::abstraction(
&typectx,
"c",
"Char",
LTExpr::block(vec![
Statement::Expr(LTExpr::application(
LTExpr::symbol(&typectx, "emit"),
vec![
LTExpr::symbol(&typectx, "c")
]
)),
Statement::Assignment{
var_id: "c".into(),
val_expr: LTExpr::application(
LTExpr::symbol(&typectx, "i+"),
vec![
LTExpr::symbol(&typectx, "c"),
LTExpr::lit_uint(&typectx, 1)
]
)
},
Statement::Expr(LTExpr::application(
LTExpr::symbol(&typectx, "emit"),
vec![
LTExpr::symbol(&typectx, "c")
]
))
])
),
vec![
LTExpr::lit_uint(&typectx, 42)
]
)),
Statement::Expr(
LTExpr::application(
LTExpr::symbol(&typectx, "emit"),
vec![
LTExpr::lit_uint(&typectx, 10)
]
))
])
)
.into_asm()
.build()
);
linker.add_static("hello-string", linker.add_static("hello-string",
"Hallo Welt!\n\0" "Hallo Welt!\n\0"
@ -135,21 +77,13 @@ fn main() {
linker.add_procedure( linker.add_procedure(
"print-nullterm", "print-nullterm",
tisc::Assembler::new() compile(&main_scope,
.while_loop( "λ str {
tisc::Assembler::new() while (@ str) {
.inst( tisc::VM_Instruction::Dup ) emit (@ str);
.inst( tisc::VM_Instruction::Fetch ) ! str (i+ str 1);
.inst( tisc::VM_Instruction::Dup ), }
}")
tisc::Assembler::new()
.inst( tisc::VM_Instruction::Emit )
.lit( 1 )
.inst( tisc::VM_Instruction::Add )
)
.inst( tisc::VM_Instruction::Drop )
.inst( tisc::VM_Instruction::Drop )
.build()
); );
main_scope.write().unwrap().declare_proc_parse( main_scope.write().unwrap().declare_proc_parse(
@ -161,9 +95,23 @@ fn main() {
~machine::Word" ~machine::Word"
], ],
vec![]); vec![]);
/*
linker.add_procedure( linker.add_procedure(
"print-lenprefix", "print-lenprefix",
compile(&main_scope,
"λ str {
let len = (@ str);
! str = (i+ str 1);
let end = (i+ str len);
while (i- str end) {
emit (@ str);
! str (i+ str 1);
}
}"
)
);
*/
/*
tisc::Assembler::new() tisc::Assembler::new()
// calculate stop address // calculate stop address
.inst( tisc::VM_Instruction::Dup ) .inst( tisc::VM_Instruction::Dup )
@ -193,7 +141,7 @@ fn main() {
.inst( tisc::VM_Instruction::Drop ) .inst( tisc::VM_Instruction::Drop )
.inst( tisc::VM_Instruction::Drop ) .inst( tisc::VM_Instruction::Drop )
.build() .build()
); */
let main_addr = linker.get_link_addr(&"main".into()).expect("'main' not linked"); let main_addr = linker.get_link_addr(&"main".into()).expect("'main' not linked");
vm.load( linker.link_total().expect("could not link") ); vm.load( linker.link_total().expect("could not link") );

View file

@ -2,7 +2,7 @@ use {
std::iter::Peekable, std::iter::Peekable,
crate::{ crate::{
lexer::{LTIRLexer, LTIRToken, LexError}, lexer::{LTIRLexer, LTIRToken, LexError},
expr::LTExpr expr::{LTExpr, Statement}
} }
}; };
@ -14,20 +14,140 @@ pub enum ParseError {
UnexpectedToken UnexpectedToken
} }
pub fn parse_expect<It>(
tokens: &mut Peekable<LTIRLexer<It>>,
expected_token: LTIRToken
) -> Result< (), ParseError >
where It: Iterator<Item = char>
{
match tokens.next() {
Some(Ok(t)) => {
if t == expected_token {
Ok(())
} else {
Err(ParseError::UnexpectedToken)
}
},
Some(Err(err)) => Err(ParseError::LexError(err)),
None => Err(ParseError::UnexpectedEnd)
}
}
pub fn parse_symbol<It>(
tokens: &mut Peekable<LTIRLexer<It>>
) -> Result< String, ParseError >
where It: Iterator<Item = char>
{
match tokens.next() {
Some(Ok(LTIRToken::Symbol(name))) => Ok(name),
Some(Ok(_)) => Err(ParseError::UnexpectedToken),
Some(Err(err)) => Err(ParseError::LexError(err)),
None => Err(ParseError::UnexpectedEnd),
}
}
pub fn parse_statement<It>( pub fn parse_statement<It>(
tokens: &mut Peekable<LTIRLexer<It>> tokens: &mut Peekable<LTIRLexer<It>>
) -> Result< crate::expr::Statement, ParseError > ) -> Result< crate::expr::Statement, ParseError >
where It: Iterator<Item = char> where It: Iterator<Item = char>
{ {
Err(ParseError::UnexpectedEnd) if let Some(peektok) = tokens.peek() {
match peektok {
Ok(LTIRToken::Symbol(sym)) => {
match sym.as_str() {
"!" => {
tokens.next();
let name = parse_symbol(tokens)?;
let val_expr = parse_expr(tokens)?;
let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
Ok(Statement::Assignment {
var_id: name,
val_expr
})
}
"while" => {
tokens.next();
let _ = parse_expect(tokens, LTIRToken::ExprOpen)?;
let cond = parse_expr(tokens)?;
let _ = parse_expect(tokens, LTIRToken::ExprClose)?;
Ok(Statement::WhileLoop {
condition: cond,
body: parse_block(tokens)?
})
}
"return" => {
tokens.next();
let expr = parse_expr(tokens)?;
let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
Ok(Statement::Return(parse_expr(tokens)?))
}
_ => {
let expr = parse_expr(tokens)?;
let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
Ok(Statement::Expr(expr))
}
}
}
Ok(_) => {
let expr = parse_expr(tokens)?;
let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
Ok(Statement::Expr(expr))
},
Err(err) => Err(ParseError::LexError(err.clone()))
}
} else {
Err(ParseError::UnexpectedEnd)
}
} }
pub fn parse_block<It>( pub fn parse_block<It>(
tokens: &mut Peekable<LTIRLexer<It>> tokens: &mut Peekable<LTIRLexer<It>>
) -> Result< Vec<Statement>, ParseError >
where It: Iterator<Item = char>
{
let _ = parse_expect(tokens, LTIRToken::BlockOpen)?;
let mut statements = Vec::new();
while let Some(peektok) = tokens.peek() {
match peektok {
Ok(LTIRToken::BlockClose) => {
tokens.next();
return Ok(statements)
}
Ok(_) => { statements.push( parse_statement(tokens)? ); }
Err(err) => { return Err(ParseError::LexError(err.clone())); }
}
}
Err(ParseError::UnexpectedEnd)
}
pub fn parse_atom<It>(
tokens: &mut Peekable<LTIRLexer<It>>
) -> Result< crate::expr::LTExpr, ParseError > ) -> Result< crate::expr::LTExpr, ParseError >
where It: Iterator<Item = char> where It: Iterator<Item = char>
{ {
Err(ParseError::UnexpectedEnd) match tokens.next() {
Some(Ok(LTIRToken::Symbol(sym))) => {
Ok(LTExpr::symbol(sym.as_str()))
}
Some(Ok(LTIRToken::Char(c))) => {
Ok(LTExpr::lit_uint(c as u64))
}
Some(Ok(LTIRToken::Num(n))) => {
Ok(LTExpr::lit_uint(n as u64))
}
Some(Ok(_)) => {
Err(ParseError::UnexpectedToken)
}
Some(Err(err)) => {
Err(ParseError::LexError(err))
}
None => {
Err(ParseError::UnexpectedEnd)
}
}
} }
pub fn parse_expr<It>( pub fn parse_expr<It>(
@ -37,27 +157,73 @@ where It: Iterator<Item = char>
{ {
let mut children = Vec::new(); let mut children = Vec::new();
match tokens.next() { while let Some(tok) = tokens.peek() {
Some(Ok(LTIRToken::ExprOpen)) => { match tok {
if let Ok(subexpr) = parse_expr( tokens ) { Ok(LTIRToken::Lambda) => {
if children.len() == 0 {
} else { tokens.next();
let name = parse_symbol(tokens)?;
let body = parse_expr(tokens)?;
return Ok(LTExpr::Abstraction{
arg_id: name,
arg_type: None,
val_expr: Box::new(body)
});
} else {
return Err(ParseError::UnexpectedToken);
}
} }
/* Ok(LTIRToken::ExprOpen) => {
Err(ParseError::UnexpectedEnd) tokens.next();
*/ while let Some(peektok) = tokens.peek() {
}, match peektok {
Some(Ok(LTIRToken::BlockOpen)) => { Ok(LTIRToken::ExprClose) => {
/* tokens.next();
Err(ParseError::UnexpectedEnd) break;
*/ }
_ => {}
}
children.push(parse_expr(tokens)?);
}
},
Ok(LTIRToken::ExprClose) => { break; }
Ok(LTIRToken::BlockOpen) => { children.push( LTExpr::block(parse_block(tokens)?)); }
Ok(LTIRToken::BlockClose) => { break; }
Ok(LTIRToken::StatementSep) => { break; }
Ok(LTIRToken::Symbol(name)) => {
match name.as_str() {
"if" => {
tokens.next();
let _ = parse_expect(tokens, LTIRToken::ExprOpen)?;
let cond = parse_expr(tokens)?;
let _ = parse_expect(tokens, LTIRToken::ExprClose)?;
let if_expr = LTExpr::block(parse_block(tokens)?);
let mut else_expr = LTExpr::block(vec![]);
if let Some(peektok) = tokens.peek() {
if let Ok(LTIRToken::Symbol(name)) = peektok {
if name == "else" {
tokens.next();
else_expr = parse_expr(tokens)?;
}
}
}
children.push(LTExpr::Branch{
condition: Box::new(cond),
if_expr: Box::new(if_expr),
else_expr: Box::new(else_expr)
});
}
name => {
children.push(parse_atom(tokens)?);
}
}
}
Ok(atom) => { children.push(parse_atom(tokens)?); }
Err(err) => { return Err(ParseError::LexError(err.clone())); }
} }
/*
_ => Err(ParseError::UnexpectedToken),
None => Err(ParseError::UnexpectedEnd)
*/
_ => {}
} }
if children.len() > 0 { if children.len() > 0 {

View file

@ -44,7 +44,8 @@ impl ProcedureCompiler {
pub fn compile_statement(mut self, statement: &Statement) -> Self { pub fn compile_statement(mut self, statement: &Statement) -> Self {
match statement { match statement {
Statement::Assignment{ var_id, val_expr } => { Statement::Assignment{ var_id, val_expr } => {
self = self.compile(val_expr); self = self.compile(val_expr);
match self.symbols.read().unwrap().get(var_id) { match self.symbols.read().unwrap().get(var_id) {
Some(SymbolDef::FrameRef{ typ, stack_ref }) => { Some(SymbolDef::FrameRef{ typ, stack_ref }) => {
self.asm = self.asm self.asm = self.asm

View file

@ -208,15 +208,14 @@ pub fn init_runtime(linker: &mut Linker) -> Arc<RwLock<Scope>> {
ProcedureCompiler::new(&symbols) ProcedureCompiler::new(&symbols)
.compile( .compile(
&LTExpr::abstraction( &LTExpr::abstraction(
&typectx,
"x", "x",
"_2^64~machine::UInt64~machine::Word", "_2^64~machine::UInt64~machine::Word",
LTExpr::application( LTExpr::application(
LTExpr::symbol(&typectx, "i*"), LTExpr::symbol("i*"),
vec![ vec![
LTExpr::symbol(&typectx, "x"), LTExpr::symbol("x"),
LTExpr::symbol(&typectx, "x") LTExpr::symbol("x")
] ]
) )
) )