parser: pass through token ranges in errors & add simple diagnostics output that cites the source file

This commit is contained in:
Michael Sippel 2024-05-16 13:14:00 +02:00
parent f8a967abbf
commit f06bf14b52
Signed by: senvas
GPG key ID: F96CF119C34B64A6
4 changed files with 153 additions and 74 deletions

View file

@ -7,4 +7,5 @@ edition = "2021"
laddertypes = { path = "../lib-laddertypes" }
tisc = { path = "../lib-tisc" }
iterate-text = "0.0.1"
tiny-ansi = "0.1.0"

View file

@ -79,11 +79,19 @@ where
}
}
#[derive(Clone, Debug)]
#[derive(Clone, Copy, PartialEq, Debug)]
pub struct InputRegionTag {
begin: usize,
end: usize
pub begin: usize,
pub end: usize
}
impl Default for InputRegionTag {
fn default() -> Self {
InputRegionTag {
begin: 0,
end: 0
}
}
}
impl InputRegionTag {
@ -271,9 +279,8 @@ where
LexerState::TypeTerm(s) => {
if *c == '=' || *c == '↦' || *c == ';' {
if let Some(token) = state.clone().into_token() {
return Some((region, Ok(token)));
}
let token = state.clone().into_token().unwrap();
return Some((region, Ok(token)));
} else {
if let Some(c) = self.chars.next() {
self.position += 1;

View file

@ -2,6 +2,7 @@ use {
std::collections::HashMap,
std::sync::{Arc, RwLock},
std::{boxed::Box, ops::Deref},
tiny_ansi::TinyAnsi
};
mod expr;
@ -12,36 +13,71 @@ mod runtime;
mod symbols;
use crate::{
lexer::InputRegionTag,
expr::{LTExpr, Statement},
procedure_compiler::ProcedureCompiler,
symbols::Scope,
};
fn compile(
scope: &Arc<RwLock<Scope>>,
name: &str,
source: impl Iterator<Item = char>,
) -> Vec<tisc::assembler::AssemblyWord> {
ProcedureCompiler::new(scope)
.compile(
&parser::parse_expr(
&scope.read().unwrap().typectx,
&mut lexer::LTIRLexer::from(source.peekable())
.filter(|tok| match tok {
(_, Ok(lexer::LTIRToken::Comment(_))) => false,
_ => true
})
.peekable(),
)
.expect("syntax error"),
)
.into_asm(&name.into())
fn print_diagnostic(
path: &str,
region: InputRegionTag,
message: String
) {
let lines = iterate_text::file::lines::IterateFileLines::new(path);
let mut line_region = InputRegionTag::default();
let n_before = 3;
let n_after = 3;
let mut last_lines = Vec::new();
let mut next_lines = 0;
println!("\n{}:", path.green());
for (i, l) in lines.enumerate() {
line_region.end += l.chars().count();
last_lines.push((i+1, l.clone()));
if last_lines.len() > n_before {
last_lines.remove(0);
}
if region.begin >= line_region.begin &&
region.begin < line_region.end {
next_lines = n_after;
let column_begin = region.begin - line_region.begin;
let column_end = region.end - line_region.begin;
// display the source line
for (j,ll) in last_lines.iter() {
print!("{}\t{}{}",
format!("{}",j).to_string().bright_white(),
"|".yellow().bold(),
ll.white());
}
print!("\t{}", "|".yellow().bold());
for _ in 0..column_begin { print!("{}", ".".magenta().bold()); }
for _ in column_begin..column_end { print!("{}", "^".magenta().bold()); }
print!("\n");
print!("{} [{}-{}]: {}\n", "error".bright_red(), column_begin, column_end, message);
}
else if next_lines > 0 {
next_lines -= 1;
print!("{}:\t{}{}", format!("{}", i+1).to_string().bright_white(), "|".yellow().bold(), l.white());
}
line_region.begin = line_region.end;
}
}
/* TODO:
* - Parser error reporting
* - Compiler error reporting
* - parse float literals
* - write to address resulting from expression
* - sized objects
* - Typecheck for LTExpr::Application
@ -57,24 +93,55 @@ fn main() {
let main_scope = Scope::with_parent(&root_scope);
let typectx = main_scope.read().unwrap().typectx.clone();
/* open source file
*/
let args: Vec<String> = std::env::args().collect();
let path = &args[1];
let iter_chars = iterate_text::file::characters::IterateFileCharacters::new(path);
/* link assembly-program to symbols
/* compile source file
*/
linker.add_procedure("main", compile(&main_scope, "main", iter_chars));
let mut lexer = lexer::LTIRLexer::from( iter_chars.peekable() );
let mut program_tokens = lexer.filter(|tok| match tok {
(_, Ok(lexer::LTIRToken::Comment(_))) => false,
_ => true
})
.peekable();
/* load & run compiled bytecode
*/
let main_addr = linker
.get_link_addr(&"main".into())
.expect("'main' not linked");
vm.load(linker.link_total().expect("could not link"));
vm.execute(main_addr);
match parser::parse_expr( &typectx, &mut program_tokens ) {
Ok( ast ) => {
let bytecode = ProcedureCompiler::new(&main_scope)
.compile( &ast )
.into_asm(&"main".into());
eprintln!(
"\n====\nVM execution finished\ndatastack = {:?}\n====",
vm.data_stack
);
eprintln!("{}", "Compiled successfully.\n======================\n".green());
/* link assembly-program to symbols
*/
linker.add_procedure("main", bytecode);
/* load & run compiled bytecode
*/
let main_addr = linker
.get_link_addr(&"main".into())
.expect("'main' not linked");
vm.load(linker.link_total().expect("could not link"));
vm.execute(main_addr);
eprintln!(
"\n====\nVM execution finished\ndatastack = {:?}\n====",
vm.data_stack
);
}
Err( (region, parse_error) ) => {
print_diagnostic(
path,
region,
format!("{:?}", parse_error)
);
eprintln!("=======\nerror: Parse Error\n");
}
}
}

View file

@ -21,7 +21,7 @@ pub enum ParseError {
pub fn parse_expect<It>(
tokens: &mut Peekable<It>,
expected_token: LTIRToken,
) -> Result<(), ParseError>
) -> Result<(), (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
{
match tokens.next() {
@ -29,24 +29,24 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
if t == expected_token {
Ok(())
} else {
Err(ParseError::UnexpectedToken)
Err((region, ParseError::UnexpectedToken))
}
}
Some((region, Err(err))) => Err(ParseError::LexError(err)),
None => Err(ParseError::UnexpectedEnd),
Some((region, Err(err))) => Err((region, ParseError::LexError(err))),
None => Err((InputRegionTag::default(), ParseError::UnexpectedEnd)),
}
}
/* parse symbol name
*/
pub fn parse_symbol<It>(tokens: &mut Peekable<It>) -> Result<String, ParseError>
pub fn parse_symbol<It>(tokens: &mut Peekable<It>) -> Result<String, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
{
match tokens.next() {
Some((region, Ok(LTIRToken::Symbol(name)))) => Ok(name),
Some((region, Ok(_))) => Err(ParseError::UnexpectedToken),
Some((region, Err(err))) => Err(ParseError::LexError(err)),
None => Err(ParseError::UnexpectedEnd),
Some((region, Ok(_))) => Err((region, ParseError::UnexpectedToken)),
Some((region, Err(err))) => Err((region, ParseError::LexError(err))),
None => Err((InputRegionTag::default(), ParseError::UnexpectedEnd)),
}
}
@ -56,16 +56,17 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
pub fn parse_type_tag<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<It>,
) -> Result<Option<laddertypes::TypeTerm>, ParseError>
) -> Result<Option<laddertypes::TypeTerm>, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
{
if let Some((region, peektok)) = tokens.peek().clone() {
match peektok.clone() {
let peek = { tokens.peek().cloned() };
if let Some((region, peektok)) = peek {
match peektok {
Ok(LTIRToken::AssignType(typeterm_str)) => {
tokens.next();
match typectx.write().unwrap().parse(typeterm_str.as_str()) {
Ok(typeterm) => Ok(Some(typeterm)),
Err(parse_error) => Err(ParseError::TypeParseError(parse_error)),
Err(parse_error) => Err((region, ParseError::TypeParseError(parse_error))),
}
}
_ => Ok(None),
@ -109,7 +110,7 @@ impl VariableBinding {
pub fn parse_binding_expr<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<It>,
) -> Result< VariableBinding, ParseError>
) -> Result< VariableBinding, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
{
if let Some((region, peektok)) = tokens.peek().clone() {
@ -126,11 +127,11 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
typtag: parse_type_tag(typectx, tokens)?
})
}
Err(err) => Err(ParseError::LexError(err.clone())),
_ => Err(ParseError::UnexpectedToken)
Err(err) => Err((*region, ParseError::LexError(err.clone()))),
_ => Err((*region, ParseError::UnexpectedToken))
}
} else {
Err(ParseError::UnexpectedEnd)
Err((InputRegionTag::default(), ParseError::UnexpectedEnd))
}
}
@ -140,13 +141,16 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
pub fn parse_binding_block<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<It>,
) -> Result< Vec<VariableBinding>, ParseError>
) -> Result< Vec<VariableBinding>, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
{
let mut last_region = InputRegionTag::default();
let _ = parse_expect(tokens, LTIRToken::BlockOpen)?;
let mut bindings = Vec::new();
while let Some((region, peektok)) = tokens.peek() {
last_region = *region;
match peektok {
Ok(LTIRToken::BlockClose) => {
tokens.next();
@ -159,18 +163,18 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
bindings.push(parse_binding_expr(typectx, tokens)?);
}
Err(err) => {
return Err(ParseError::LexError(err.clone()));
return Err((last_region, ParseError::LexError(err.clone())));
}
}
}
Err(ParseError::UnexpectedEnd)
Err((last_region, ParseError::UnexpectedEnd))
}
pub fn parse_statement<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<It>,
) -> Result<crate::expr::Statement, ParseError>
) -> Result<crate::expr::Statement, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
{
if let Some((region, peektok)) = tokens.peek() {
@ -237,17 +241,17 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
Ok(Statement::Expr(expr))
}
Err(err) => Err(ParseError::LexError(err.clone())),
Err(err) => Err((*region, ParseError::LexError(err.clone()))),
}
} else {
Err(ParseError::UnexpectedEnd)
Err((InputRegionTag::default(), ParseError::UnexpectedEnd))
}
}
pub fn parse_statement_block<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<It>,
) -> Result<Vec<Statement>, ParseError>
) -> Result<Vec<Statement>, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
{
let _ = parse_expect(tokens, LTIRToken::BlockOpen)?;
@ -263,33 +267,33 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
statements.push(parse_statement(typectx, tokens)?);
}
Err(err) => {
return Err(ParseError::LexError(err.clone()));
return Err((*region, ParseError::LexError(err.clone())));
}
}
}
Err(ParseError::UnexpectedEnd)
Err((InputRegionTag::default(), ParseError::UnexpectedEnd))
}
pub fn parse_atom<It>(
tokens: &mut Peekable<It>,
) -> Result<crate::expr::LTExpr, ParseError>
) -> Result<crate::expr::LTExpr, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
{
match tokens.next() {
Some((region, Ok(LTIRToken::Symbol(sym)))) => Ok(LTExpr::symbol(sym.as_str())),
Some((region, Ok(LTIRToken::Char(c)))) => Ok(LTExpr::lit_uint(c as u64)),
Some((region, Ok(LTIRToken::Num(n)))) => Ok(LTExpr::lit_uint(n as u64)),
Some((region, Ok(_))) => Err(ParseError::UnexpectedToken),
Some((region, Err(err))) => Err(ParseError::LexError(err)),
None => Err(ParseError::UnexpectedEnd),
Some((region, Ok(_))) => Err((region, ParseError::UnexpectedToken)),
Some((region, Err(err))) => Err((region, ParseError::LexError(err))),
None => Err((InputRegionTag::default(), ParseError::UnexpectedEnd)),
}
}
pub fn parse_expr<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<It>,
) -> Result<crate::expr::LTExpr, ParseError>
) -> Result<crate::expr::LTExpr, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
{
let mut children = Vec::new();
@ -309,7 +313,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
body: Box::new(body),
});
} else {
return Err(ParseError::UnexpectedToken);
return Err((*region, ParseError::UnexpectedToken));
}
}
Ok(LTIRToken::ExprOpen) => {
@ -369,7 +373,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
children.push(parse_atom(tokens)?);
}
Err(err) => {
return Err(ParseError::LexError(err.clone()));
return Err((*region, ParseError::LexError(err.clone())));
}
}
}
@ -382,7 +386,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
body: children,
})
} else {
Err(ParseError::UnexpectedEnd)
Err((InputRegionTag::default(), ParseError::UnexpectedEnd))
}
}