parser wip

This commit is contained in:
Michael Sippel 2024-05-09 20:13:10 +02:00
parent 09e9e063ae
commit ebc5f720bf
Signed by: senvas
GPG key ID: F96CF119C34B64A6
3 changed files with 277 additions and 0 deletions

179
src/lexer.rs Normal file
View file

@ -0,0 +1,179 @@
#[derive(PartialEq, Eq, Clone, Debug)]
pub enum LTIRToken {
Symbol( String ),
Char( char ),
Num( i64 ),
// SingleQuote(String),
// DoubleQuote(String),
// TripleQuote(String),
ExprOpen,
ExprClose,
BlockOpen,
BlockClose,
StatementSep,
}
#[derive(PartialEq, Eq, Clone, Debug)]
pub enum LexError {
InvalidDigit,
InvalidChar
}
#[derive(PartialEq, Eq, Clone, Debug)]
pub enum LexerState {
Any,
Sym( String ),
Num( i64 ),
Char( Option<char> )
}
impl LexerState {
fn into_token(self) -> Option< LTIRToken > {
match self {
LexerState::Any => None,
LexerState::Sym(s) => Some(LTIRToken::Symbol(s)),
LexerState::Num(n) => Some(LTIRToken::Num(n)),
LexerState::Char(c) => Some(LTIRToken::Char(c?))
}
}
}
pub struct LTIRLexer<It>
where It: std::iter::Iterator<Item = char>
{
chars: std::iter::Peekable<It>,
}
impl<It> LTIRLexer<It>
where It: Iterator<Item = char>
{
pub fn into_inner(self) -> std::iter::Peekable<It> {
self.chars
}
}
impl<It> From<It> for LTIRLexer<It>
where It: Iterator<Item = char>
{
fn from(chars: It) -> Self {
LTIRLexer {
chars: chars.peekable()
}
}
}
impl<It> Iterator for LTIRLexer<It>
where It: Iterator<Item = char>
{
type Item = Result<LTIRToken, LexError>;
fn next(&mut self) -> Option<Self::Item> {
let mut state = LexerState::Any;
while let Some(c) = self.chars.peek() {
match &mut state {
// determine token type
LexerState::Any => {
match c {
'(' => { self.chars.next(); return Some(Ok(LTIRToken::ExprOpen)); },
')' => { self.chars.next(); return Some(Ok(LTIRToken::ExprClose)); },
'{' => { self.chars.next(); return Some(Ok(LTIRToken::BlockOpen)); },
'}' => { self.chars.next(); return Some(Ok(LTIRToken::BlockClose)); },
';' => { self.chars.next(); return Some(Ok(LTIRToken::StatementSep)); },
'\'' => { self.chars.next(); state = LexerState::Char(None); },
c => {
if c.is_whitespace() {
self.chars.next();
} else if c.is_digit(10) {
state = LexerState::Num( 0 );
} else {
state = LexerState::Sym( String::new() );
}
}
}
}
LexerState::Char(val) => {
*val = Some(
match self.chars.next() {
Some('\\') => {
match self.chars.next() {
Some('0') => '\0',
Some('n') => '\n',
Some('t') => '\t',
Some(c) => c,
None => {
return Some(Err(LexError::InvalidChar));
}
}
}
Some(c) => c,
None => {
return Some(Err(LexError::InvalidChar));
}
});
match self.chars.next() {
Some('\'') => {
if let Some(token) = state.clone().into_token() {
return Some(Ok(token));
}
}
_ => {
return Some(Err(LexError::InvalidChar));
}
}
}
_ => {
if c.is_whitespace()
|| *c == '(' || *c == ')'
|| *c == '{' || *c == '}'
|| *c == ';'
{
// finish the current token
if let Some(token) = state.clone().into_token() {
return Some(Ok(token));
}
} else {
// append to the current token
let c = self.chars.next().unwrap();
match &mut state {
LexerState::Sym(s) => {
s.push(c);
}
LexerState::Num(n) => {
if let Some(d) = c.to_digit(10) {
*n = (*n) * 10 + d as i64;
} else {
return Some(Err(LexError::InvalidDigit));
}
}
_ => {}
}
}
}
}
}
if let Some(token) = state.into_token() {
Some(Ok(token))
} else {
None
}
}
}

View file

@ -8,6 +8,8 @@ mod expr;
mod symbols;
mod procedure_compiler;
mod runtime;
mod lexer;
mod parser;
use crate::{
expr::{LTExpr, Statement},
@ -15,6 +17,10 @@ use crate::{
procedure_compiler::ProcedureCompiler
};
/*
*/
fn main() {
// create virtual machine with 4096 words of memory
let mut vm = tisc::VM::new(0x1000);
@ -25,6 +31,25 @@ fn main() {
let typectx = main_scope.read().unwrap().typectx.clone();
let mut lexer = lexer::LTIRLexer::from(
"{
emit '*';
let x : _2^64~machine::UInt64~machine::Word = 8;
while (i< x 7) {
= x (i+ x 1);
}
let square = (
(λ x (i* x x))
)
}".chars()
);
let block = parser::parse_block( &mut lexer.peekable() );
eprintln!("parsed block = {:?}", block);
/* define type of the symbol
*/
main_scope.write().unwrap()

73
src/parser.rs Normal file
View file

@ -0,0 +1,73 @@
use {
std::iter::Peekable,
crate::{
lexer::{LTIRLexer, LTIRToken, LexError},
expr::LTExpr
}
};
#[derive(Clone, Debug)]
pub enum ParseError {
LexError(LexError),
UnexpectedClose,
UnexpectedEnd,
UnexpectedToken
}
pub fn parse_statement<It>(
tokens: &mut Peekable<LTIRLexer<It>>
) -> Result< crate::expr::Statement, ParseError >
where It: Iterator<Item = char>
{
Err(ParseError::UnexpectedEnd)
}
pub fn parse_block<It>(
tokens: &mut Peekable<LTIRLexer<It>>
) -> Result< crate::expr::LTExpr, ParseError >
where It: Iterator<Item = char>
{
Err(ParseError::UnexpectedEnd)
}
pub fn parse_expr<It>(
tokens: &mut Peekable<LTIRLexer<It>>
) -> Result< crate::expr::LTExpr, ParseError >
where It: Iterator<Item = char>
{
let mut children = Vec::new();
match tokens.next() {
Some(Ok(LTIRToken::ExprOpen)) => {
if let Ok(subexpr) = parse_expr( tokens ) {
} else {
}
/*
Err(ParseError::UnexpectedEnd)
*/
},
Some(Ok(LTIRToken::BlockOpen)) => {
/*
Err(ParseError::UnexpectedEnd)
*/
}
/*
_ => Err(ParseError::UnexpectedToken),
None => Err(ParseError::UnexpectedEnd)
*/
_ => {}
}
if children.len() > 0 {
let head = children.remove(0);
Ok(LTExpr::Application {
head: Box::new(head),
body: children
})
} else {
Err(ParseError::UnexpectedEnd)
}
}