From d7c06d423e4271b7797b0c61fa41e8f0ae46f8cc Mon Sep 17 00:00:00 2001
From: Michael Sippel <micha@fragmental.art>
Date: Sat, 11 May 2024 00:00:20 +0200
Subject: [PATCH] first basic parser

---
 src/expr.rs               |  20 ++--
 src/lexer.rs              |   3 +
 src/main.rs               | 142 ++++++++------------------
 src/parser.rs             | 210 ++++++++++++++++++++++++++++++++++----
 src/procedure_compiler.rs |   3 +-
 src/runtime.rs            |   7 +-
 6 files changed, 251 insertions(+), 134 deletions(-)

diff --git a/src/expr.rs b/src/expr.rs
index 9afc91e..bef3d0c 100644
--- a/src/expr.rs
+++ b/src/expr.rs
@@ -23,11 +23,11 @@ pub enum Statement {
 #[derive(Clone, Debug)]
 pub enum LTExpr {
     Literal {
-        typ: laddertypes::TypeTerm,
+        typ: Option< laddertypes::TypeTerm >,
         val: tisc::VM_Word
     },
     Symbol {
-        typ: laddertypes::TypeTerm,
+        typ: Option< laddertypes::TypeTerm >,
         symbol: String,
     },
     Application {
@@ -36,7 +36,7 @@ pub enum LTExpr {
     },
     Abstraction {
         arg_id: String,
-        arg_type: laddertypes::TypeTerm,
+        arg_type: Option< laddertypes::TypeTerm >,
         val_expr: Box<LTExpr>
     },
     Let {
@@ -55,29 +55,29 @@ pub enum LTExpr {
 }
 
 impl LTExpr {
-    pub fn symbol(typectx: &Arc<RwLock<laddertypes::TypeDict>>, str: &str) -> Self {
+    pub fn symbol(str: &str) -> Self {
         LTExpr::Symbol {
-            typ: typectx.write().unwrap().parse("<Ref memory::Word>~Symbol~<Seq Char>").expect("parse typeterm"),
+            typ: None,//typectx.write().unwrap().parse("<Ref memory::Word>~Symbol~<Seq Char>").expect("parse typeterm"),
             symbol: String::from(str)
         }
     }
 
-    pub fn lit_uint(typectx: &Arc<RwLock<laddertypes::TypeDict>>, val: u64) -> Self {
+    pub fn lit_uint(val: u64) -> Self {
         LTExpr::Literal {
-            typ: typectx.write().unwrap().parse("ℤ_2^64~machine::UInt64~machine::Word").expect("parse typeterm"),
+            typ: None,//typectx.write().unwrap().parse("ℤ_2^64~machine::UInt64~machine::Word").expect("parse typeterm"),
             val: val as tisc::VM_Word
         }
     }
 
-    pub fn abstraction(typectx: &Arc<RwLock<laddertypes::TypeDict>>, arg_id: &str, arg_typ: &str, val_expr: LTExpr) -> LTExpr {
+    pub fn abstraction(arg_id: &str, arg_typ: &str, val_expr: LTExpr) -> LTExpr {
         LTExpr::Abstraction {
             arg_id: String::from(arg_id),
-            arg_type: typectx.write().unwrap().parse(arg_typ).expect("parse typeterm"),
+            arg_type: None,//typectx.write().unwrap().parse(arg_typ).expect("parse typeterm"),
             val_expr: Box::new(val_expr)
         }
     }
 
-    pub fn let_expr(typectx: &Arc<RwLock<laddertypes::TypeDict>>, name: &str, val: LTExpr, body: LTExpr) -> Self {
+    pub fn let_expr(name: &str, val: LTExpr, body: LTExpr) -> Self {
         LTExpr::Let {
             name: String::from(name),
             val: Box::new(val),
diff --git a/src/lexer.rs b/src/lexer.rs
index 49db092..e2c53b9 100644
--- a/src/lexer.rs
+++ b/src/lexer.rs
@@ -9,6 +9,8 @@ pub enum LTIRToken {
     // DoubleQuote(String),
     // TripleQuote(String),
 
+    Lambda,
+
     ExprOpen,
     ExprClose,
 
@@ -85,6 +87,7 @@ where It: Iterator<Item = char>
                         ')' => { self.chars.next(); return Some(Ok(LTIRToken::ExprClose)); },
                         '{' => { self.chars.next(); return Some(Ok(LTIRToken::BlockOpen)); },
                         '}' => { self.chars.next(); return Some(Ok(LTIRToken::BlockClose)); },
+                        'λ' => { self.chars.next(); return Some(Ok(LTIRToken::Lambda)); },
                         ';' => { self.chars.next(); return Some(Ok(LTIRToken::StatementSep)); },
                         '\'' => { self.chars.next(); state = LexerState::Char(None); },
                         c => {
diff --git a/src/main.rs b/src/main.rs
index 6194e6e..5aee775 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -17,9 +17,18 @@ use crate::{
     procedure_compiler::ProcedureCompiler
 };
 
-
-/*
-*/
+fn compile(scope: &Arc<RwLock<Scope>>, source: &str) -> Vec< tisc::assembler::AssemblyWord > {
+    ProcedureCompiler::new(scope)
+        .compile(
+            &parser::parse_expr(
+                &mut lexer::LTIRLexer::from(
+                    source.chars().peekable()
+                ).peekable()
+            ).expect("syntax error")
+        )
+        .into_asm()
+        .build()
+}
 
 fn main() {
     // create virtual machine with 4096 words of memory
@@ -28,28 +37,8 @@ fn main() {
     let mut linker = tisc::Linker::new();
     let root_scope = crate::runtime::init_runtime(&mut linker);
     let main_scope = Scope::with_parent(&root_scope);
-
     let typectx = main_scope.read().unwrap().typectx.clone();
 
-
-    let mut lexer = lexer::LTIRLexer::from(
-        "{
-            emit '*';
-
-            let x : ℤ_2^64~machine::UInt64~machine::Word = 8;
-            while (i< x 7) {
-                = x (i+ x 1);
-            }
-
-            let square = (
-                (λ x (i* x x))
-            )
-        }".chars()
-    );
-
-    let block = parser::parse_block( &mut lexer.peekable() );
-    eprintln!("parsed block = {:?}", block);
-
     /* define type of the symbol
      */
     main_scope.write().unwrap()
@@ -64,68 +53,21 @@ fn main() {
     main_scope.write().unwrap()
         .declare_proc_parse(
             "print-nullterm",
-        vec![],
-        vec![
-            "<Ref <Seq Char~Ascii~machine::Word>~<NullTerminatedSeq machine::Word>>
-            ~machine::Address
-            ~machine::Word"
-        ],
-        vec![]);
+            vec![],
+            vec![
+                "<Ref <Seq Char~Ascii~machine::Word>~<NullTerminatedSeq machine::Word>>
+                ~machine::Address
+                ~machine::Word"
+            ],
+            vec![]);
 
     /* link assembly-program to symbol
      */
     linker.add_procedure(
         "main",
-        ProcedureCompiler::new(&main_scope)
-            .compile(
-                &LTExpr::block(vec![
-                    Statement::Expr(
-                        LTExpr::application(
-                            LTExpr::abstraction(
-                                &typectx,
-                                "c",
-                                "Char",
-                                LTExpr::block(vec![
-                                    Statement::Expr(LTExpr::application(
-                                        LTExpr::symbol(&typectx, "emit"),
-                                        vec![
-                                            LTExpr::symbol(&typectx, "c")
-                                        ]
-                                    )),
-                                    Statement::Assignment{
-                                        var_id: "c".into(),
-                                        val_expr: LTExpr::application(
-                                            LTExpr::symbol(&typectx, "i+"),
-                                            vec![
-                                                LTExpr::symbol(&typectx, "c"),
-                                                LTExpr::lit_uint(&typectx, 1)
-                                            ]
-                                        )
-                                    },
-                                    Statement::Expr(LTExpr::application(
-                                        LTExpr::symbol(&typectx, "emit"),
-                                        vec![
-                                            LTExpr::symbol(&typectx, "c")
-                                        ]
-                                    ))
-                                ])
-                            ),
-                            vec![
-                                LTExpr::lit_uint(&typectx, 42)
-                            ]
-                        )),
-                    Statement::Expr(
-                        LTExpr::application(
-                            LTExpr::symbol(&typectx, "emit"),
-                            vec![
-                                LTExpr::lit_uint(&typectx, 10)
-                            ]
-                        ))
-                ])
-            )
-            .into_asm()
-            .build()
-    );
+        compile(&main_scope, "{
+            print-nullterm hello-string;
+        }"));
 
     linker.add_static("hello-string",
         "Hallo Welt!\n\0"
@@ -135,21 +77,13 @@ fn main() {
 
     linker.add_procedure(
         "print-nullterm",
-        tisc::Assembler::new()
-            .while_loop(
-                tisc::Assembler::new()
-                    .inst( tisc::VM_Instruction::Dup )
-                    .inst( tisc::VM_Instruction::Fetch )
-                    .inst( tisc::VM_Instruction::Dup ),
-
-                tisc::Assembler::new()
-                    .inst( tisc::VM_Instruction::Emit )
-                    .lit( 1 )
-                    .inst( tisc::VM_Instruction::Add )
-            )
-            .inst( tisc::VM_Instruction::Drop )
-            .inst( tisc::VM_Instruction::Drop )
-            .build()
+        compile(&main_scope,
+            "λ str {    
+                while (@ str) {
+                    emit (@ str);
+                    ! str (i+ str 1);
+                }
+            }")
     );
 
     main_scope.write().unwrap().declare_proc_parse(
@@ -161,9 +95,23 @@ fn main() {
             ~machine::Word"
         ],
         vec![]);
-
+/*
     linker.add_procedure(
         "print-lenprefix",
+        compile(&main_scope,
+            "λ str {
+                let len = (@ str);
+                ! str = (i+ str 1);
+                let end = (i+ str len);
+                while (i- str end) {
+                    emit (@ str);
+                    ! str (i+ str 1);
+                }
+            }"
+        )
+    );
+    */
+    /*
         tisc::Assembler::new()
             // calculate stop address
             .inst( tisc::VM_Instruction::Dup )
@@ -193,7 +141,7 @@ fn main() {
             .inst( tisc::VM_Instruction::Drop )
             .inst( tisc::VM_Instruction::Drop )
             .build()
-    );
+    */
 
     let main_addr = linker.get_link_addr(&"main".into()).expect("'main' not linked"); 
     vm.load( linker.link_total().expect("could not link") );
diff --git a/src/parser.rs b/src/parser.rs
index ffeb44d..dce3bd6 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -2,7 +2,7 @@ use {
     std::iter::Peekable,
     crate::{
         lexer::{LTIRLexer, LTIRToken, LexError},
-        expr::LTExpr
+        expr::{LTExpr, Statement}
     }
 };
 
@@ -14,20 +14,140 @@ pub enum ParseError {
     UnexpectedToken
 }
 
+pub fn parse_expect<It>(
+    tokens: &mut Peekable<LTIRLexer<It>>,
+    expected_token: LTIRToken
+) -> Result< (), ParseError >
+where It: Iterator<Item = char>
+{
+    match tokens.next() {
+        Some(Ok(t)) => {
+            if t == expected_token {
+                Ok(())
+            } else {
+                Err(ParseError::UnexpectedToken)
+            }
+        },
+        Some(Err(err)) => Err(ParseError::LexError(err)),
+        None => Err(ParseError::UnexpectedEnd)
+    }
+}
+
+pub fn parse_symbol<It>(
+    tokens: &mut Peekable<LTIRLexer<It>>
+) -> Result< String, ParseError >
+where It: Iterator<Item = char>
+{
+    match tokens.next() {
+        Some(Ok(LTIRToken::Symbol(name))) => Ok(name),
+        Some(Ok(_)) => Err(ParseError::UnexpectedToken),
+        Some(Err(err)) => Err(ParseError::LexError(err)),
+        None => Err(ParseError::UnexpectedEnd),
+    }
+}
+
 pub fn parse_statement<It>(
     tokens: &mut Peekable<LTIRLexer<It>>
 ) -> Result< crate::expr::Statement, ParseError >
 where It: Iterator<Item = char>
 {
-    Err(ParseError::UnexpectedEnd)
+    if let Some(peektok) = tokens.peek() {
+        match peektok {
+            Ok(LTIRToken::Symbol(sym)) => {
+                match sym.as_str() {
+                    "!" => {
+                        tokens.next();
+                        let name = parse_symbol(tokens)?;
+                        let val_expr = parse_expr(tokens)?;
+                        let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
+
+                        Ok(Statement::Assignment {
+                            var_id: name,
+                            val_expr
+                        })
+                    }
+                    "while" => {
+                        tokens.next();
+                        let _ = parse_expect(tokens, LTIRToken::ExprOpen)?;
+                        let cond = parse_expr(tokens)?;
+                        let _ = parse_expect(tokens, LTIRToken::ExprClose)?;
+                        Ok(Statement::WhileLoop {
+                            condition: cond,
+                            body: parse_block(tokens)?
+                        })
+                    }
+                    "return" => {
+                        tokens.next();
+                        let expr = parse_expr(tokens)?;
+                        let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
+                        Ok(Statement::Return(parse_expr(tokens)?))
+                    }
+                    _ => {
+                        let expr = parse_expr(tokens)?;
+                        let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
+                        Ok(Statement::Expr(expr))
+                    }
+                }
+            }
+            Ok(_) => {
+                let expr = parse_expr(tokens)?;
+                let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
+                Ok(Statement::Expr(expr))
+            },
+            Err(err) => Err(ParseError::LexError(err.clone()))
+        }
+    } else {
+        Err(ParseError::UnexpectedEnd)
+    }
 }
 
 pub fn parse_block<It>(
     tokens: &mut Peekable<LTIRLexer<It>>
+) -> Result< Vec<Statement>, ParseError >
+where It: Iterator<Item = char>
+{
+    let _ = parse_expect(tokens, LTIRToken::BlockOpen)?;
+
+    let mut statements = Vec::new();
+    while let Some(peektok) = tokens.peek() {
+        match peektok {
+            Ok(LTIRToken::BlockClose) => {
+                tokens.next();
+                return Ok(statements)
+            }
+            Ok(_) => { statements.push( parse_statement(tokens)? ); }
+            Err(err) => { return Err(ParseError::LexError(err.clone())); }
+        }
+    }
+
+    Err(ParseError::UnexpectedEnd)
+}
+
+pub fn parse_atom<It>(
+    tokens: &mut Peekable<LTIRLexer<It>>
 ) -> Result< crate::expr::LTExpr, ParseError >
 where It: Iterator<Item = char>
 {
-    Err(ParseError::UnexpectedEnd)
+    match tokens.next() {
+        Some(Ok(LTIRToken::Symbol(sym))) => {
+            Ok(LTExpr::symbol(sym.as_str()))
+        }
+        Some(Ok(LTIRToken::Char(c))) => {
+            Ok(LTExpr::lit_uint(c as u64))
+        }
+        Some(Ok(LTIRToken::Num(n))) => {
+            Ok(LTExpr::lit_uint(n as u64))
+        }
+        Some(Ok(_)) => {
+            Err(ParseError::UnexpectedToken)
+        }
+        Some(Err(err)) => {
+            Err(ParseError::LexError(err))
+        }
+        None => {
+            Err(ParseError::UnexpectedEnd)
+        }
+    }
 }
 
 pub fn parse_expr<It>(
@@ -37,27 +157,73 @@ where It: Iterator<Item = char>
 {
     let mut children = Vec::new();
 
-    match tokens.next() {
-        Some(Ok(LTIRToken::ExprOpen)) => {
-            if let Ok(subexpr) = parse_expr( tokens ) {
-                
-            } else {
-                
+    while let Some(tok) = tokens.peek() {
+        match tok {
+            Ok(LTIRToken::Lambda) => {
+                if children.len() == 0 {
+                    tokens.next();
+                    let name = parse_symbol(tokens)?;
+                    let body = parse_expr(tokens)?;
+
+                    return Ok(LTExpr::Abstraction{
+                        arg_id: name,
+                        arg_type: None,
+                        val_expr: Box::new(body)
+                    });
+                } else {
+                    return Err(ParseError::UnexpectedToken);
+                }
             }
-/*
-            Err(ParseError::UnexpectedEnd)
-        */
-        },
-        Some(Ok(LTIRToken::BlockOpen)) => {
-            /*
-            Err(ParseError::UnexpectedEnd)
-            */
+            Ok(LTIRToken::ExprOpen) => {
+                tokens.next();
+                while let Some(peektok) = tokens.peek() {
+                    match peektok {
+                        Ok(LTIRToken::ExprClose) => {
+                            tokens.next();
+                            break;
+                        }
+                        _ => {}
+                    }
+                    children.push(parse_expr(tokens)?);
+                }
+            },
+            Ok(LTIRToken::ExprClose) => { break; }
+            Ok(LTIRToken::BlockOpen) => { children.push( LTExpr::block(parse_block(tokens)?)); }
+            Ok(LTIRToken::BlockClose) => { break; }
+            Ok(LTIRToken::StatementSep) => { break; }
+            Ok(LTIRToken::Symbol(name)) => {
+                match name.as_str() {
+                    "if" => {
+                        tokens.next();
+                        let _ = parse_expect(tokens, LTIRToken::ExprOpen)?;
+                        let cond = parse_expr(tokens)?;
+                        let _ = parse_expect(tokens, LTIRToken::ExprClose)?;
+                        let if_expr = LTExpr::block(parse_block(tokens)?);
+                        let mut else_expr = LTExpr::block(vec![]);
+
+                        if let Some(peektok) = tokens.peek() {
+                            if let Ok(LTIRToken::Symbol(name)) = peektok {
+                                if name == "else" {
+                                    tokens.next();
+                                    else_expr = parse_expr(tokens)?;
+                                }
+                            }
+                        }
+
+                        children.push(LTExpr::Branch{
+                            condition: Box::new(cond),
+                            if_expr: Box::new(if_expr),
+                            else_expr: Box::new(else_expr)
+                        });
+                    }
+                    name => {
+                        children.push(parse_atom(tokens)?);
+                    }
+                }
+            }
+            Ok(atom) => { children.push(parse_atom(tokens)?); }
+            Err(err) => { return Err(ParseError::LexError(err.clone())); }
         }
-        /*
-        _ => Err(ParseError::UnexpectedToken),
-        None => Err(ParseError::UnexpectedEnd)
-        */
-        _ => {}
     }
 
     if children.len() > 0 {
diff --git a/src/procedure_compiler.rs b/src/procedure_compiler.rs
index 730b6a9..9c75334 100644
--- a/src/procedure_compiler.rs
+++ b/src/procedure_compiler.rs
@@ -44,7 +44,8 @@ impl ProcedureCompiler {
     pub fn compile_statement(mut self, statement: &Statement) -> Self {
         match statement {
             Statement::Assignment{ var_id, val_expr } => {
-                self = self.compile(val_expr);    
+                self = self.compile(val_expr);
+
                 match self.symbols.read().unwrap().get(var_id) {
                     Some(SymbolDef::FrameRef{ typ, stack_ref }) => {
                         self.asm = self.asm
diff --git a/src/runtime.rs b/src/runtime.rs
index 40328e1..fe43f21 100644
--- a/src/runtime.rs
+++ b/src/runtime.rs
@@ -208,15 +208,14 @@ pub fn init_runtime(linker: &mut Linker) -> Arc<RwLock<Scope>> {
         ProcedureCompiler::new(&symbols)
             .compile(
                 &LTExpr::abstraction(
-                    &typectx,
                     "x",
                     "ℤ_2^64~machine::UInt64~machine::Word",
 
                     LTExpr::application(
-                        LTExpr::symbol(&typectx, "i*"),
+                        LTExpr::symbol("i*"),
                         vec![
-                           LTExpr::symbol(&typectx, "x"),
-                           LTExpr::symbol(&typectx, "x")
+                           LTExpr::symbol("x"),
+                           LTExpr::symbol("x")
                         ]
                     )
                 )