From ebc5f720bf9f820bfec031aa80ff6782c16008d6 Mon Sep 17 00:00:00 2001 From: Michael Sippel Date: Thu, 9 May 2024 20:13:10 +0200 Subject: [PATCH] parser wip --- src/lexer.rs | 179 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 25 +++++++ src/parser.rs | 73 ++++++++++++++++++++ 3 files changed, 277 insertions(+) create mode 100644 src/lexer.rs create mode 100644 src/parser.rs diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..49db092 --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,179 @@ + +#[derive(PartialEq, Eq, Clone, Debug)] +pub enum LTIRToken { + Symbol( String ), + Char( char ), + Num( i64 ), + + // SingleQuote(String), + // DoubleQuote(String), + // TripleQuote(String), + + ExprOpen, + ExprClose, + + BlockOpen, + BlockClose, + StatementSep, +} + +#[derive(PartialEq, Eq, Clone, Debug)] +pub enum LexError { + InvalidDigit, + InvalidChar +} + +#[derive(PartialEq, Eq, Clone, Debug)] +pub enum LexerState { + Any, + Sym( String ), + Num( i64 ), + Char( Option ) +} + +impl LexerState { + fn into_token(self) -> Option< LTIRToken > { + match self { + LexerState::Any => None, + LexerState::Sym(s) => Some(LTIRToken::Symbol(s)), + LexerState::Num(n) => Some(LTIRToken::Num(n)), + LexerState::Char(c) => Some(LTIRToken::Char(c?)) + } + } +} + + +pub struct LTIRLexer +where It: std::iter::Iterator +{ + chars: std::iter::Peekable, +} + +impl LTIRLexer +where It: Iterator +{ + pub fn into_inner(self) -> std::iter::Peekable { + self.chars + } +} + +impl From for LTIRLexer +where It: Iterator +{ + fn from(chars: It) -> Self { + LTIRLexer { + chars: chars.peekable() + } + } +} + +impl Iterator for LTIRLexer +where It: Iterator +{ + type Item = Result; + + fn next(&mut self) -> Option { + let mut state = LexerState::Any; + + while let Some(c) = self.chars.peek() { + match &mut state { + + // determine token type + LexerState::Any => { + match c { + '(' => { self.chars.next(); return Some(Ok(LTIRToken::ExprOpen)); }, + ')' => { self.chars.next(); return Some(Ok(LTIRToken::ExprClose)); }, + '{' => { self.chars.next(); return Some(Ok(LTIRToken::BlockOpen)); }, + '}' => { self.chars.next(); return Some(Ok(LTIRToken::BlockClose)); }, + ';' => { self.chars.next(); return Some(Ok(LTIRToken::StatementSep)); }, + '\'' => { self.chars.next(); state = LexerState::Char(None); }, + c => { + if c.is_whitespace() { + self.chars.next(); + } else if c.is_digit(10) { + state = LexerState::Num( 0 ); + } else { + state = LexerState::Sym( String::new() ); + } + } + } + } + + LexerState::Char(val) => { + *val = Some( + match self.chars.next() { + Some('\\') => { + match self.chars.next() { + Some('0') => '\0', + Some('n') => '\n', + Some('t') => '\t', + Some(c) => c, + None => { + return Some(Err(LexError::InvalidChar)); + } + } + } + Some(c) => c, + None => { + return Some(Err(LexError::InvalidChar)); + } + }); + + match self.chars.next() { + Some('\'') => { + if let Some(token) = state.clone().into_token() { + return Some(Ok(token)); + } + } + _ => { + return Some(Err(LexError::InvalidChar)); + } + } + } + + _ => { + + if c.is_whitespace() + || *c == '(' || *c == ')' + || *c == '{' || *c == '}' + || *c == ';' + { + // finish the current token + + if let Some(token) = state.clone().into_token() { + return Some(Ok(token)); + } + } else { + // append to the current token + + let c = self.chars.next().unwrap(); + + match &mut state { + LexerState::Sym(s) => { + s.push(c); + } + + LexerState::Num(n) => { + if let Some(d) = c.to_digit(10) { + *n = (*n) * 10 + d as i64; + } else { + return Some(Err(LexError::InvalidDigit)); + } + } + + _ => {} + } + } + } + } + } + + if let Some(token) = state.into_token() { + Some(Ok(token)) + } else { + None + } + } +} + + diff --git a/src/main.rs b/src/main.rs index 794d7c9..6194e6e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,6 +8,8 @@ mod expr; mod symbols; mod procedure_compiler; mod runtime; +mod lexer; +mod parser; use crate::{ expr::{LTExpr, Statement}, @@ -15,6 +17,10 @@ use crate::{ procedure_compiler::ProcedureCompiler }; + +/* +*/ + fn main() { // create virtual machine with 4096 words of memory let mut vm = tisc::VM::new(0x1000); @@ -25,6 +31,25 @@ fn main() { let typectx = main_scope.read().unwrap().typectx.clone(); + + let mut lexer = lexer::LTIRLexer::from( + "{ + emit '*'; + + let x : ℤ_2^64~machine::UInt64~machine::Word = 8; + while (i< x 7) { + = x (i+ x 1); + } + + let square = ( + (λ x (i* x x)) + ) + }".chars() + ); + + let block = parser::parse_block( &mut lexer.peekable() ); + eprintln!("parsed block = {:?}", block); + /* define type of the symbol */ main_scope.write().unwrap() diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..ffeb44d --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,73 @@ +use { + std::iter::Peekable, + crate::{ + lexer::{LTIRLexer, LTIRToken, LexError}, + expr::LTExpr + } +}; + +#[derive(Clone, Debug)] +pub enum ParseError { + LexError(LexError), + UnexpectedClose, + UnexpectedEnd, + UnexpectedToken +} + +pub fn parse_statement( + tokens: &mut Peekable> +) -> Result< crate::expr::Statement, ParseError > +where It: Iterator +{ + Err(ParseError::UnexpectedEnd) +} + +pub fn parse_block( + tokens: &mut Peekable> +) -> Result< crate::expr::LTExpr, ParseError > +where It: Iterator +{ + Err(ParseError::UnexpectedEnd) +} + +pub fn parse_expr( + tokens: &mut Peekable> +) -> Result< crate::expr::LTExpr, ParseError > +where It: Iterator +{ + let mut children = Vec::new(); + + match tokens.next() { + Some(Ok(LTIRToken::ExprOpen)) => { + if let Ok(subexpr) = parse_expr( tokens ) { + + } else { + + } +/* + Err(ParseError::UnexpectedEnd) + */ + }, + Some(Ok(LTIRToken::BlockOpen)) => { + /* + Err(ParseError::UnexpectedEnd) + */ + } + /* + _ => Err(ParseError::UnexpectedToken), + None => Err(ParseError::UnexpectedEnd) + */ + _ => {} + } + + if children.len() > 0 { + let head = children.remove(0); + Ok(LTExpr::Application { + head: Box::new(head), + body: children + }) + } else { + Err(ParseError::UnexpectedEnd) + } +} +