parser: pass through token ranges in errors & add simple diagnostics output that cites the source file

This commit is contained in:
Michael Sippel 2024-05-16 13:14:00 +02:00
parent f8a967abbf
commit f06bf14b52
Signed by: senvas
GPG key ID: F96CF119C34B64A6
4 changed files with 153 additions and 74 deletions

View file

@ -7,4 +7,5 @@ edition = "2021"
laddertypes = { path = "../lib-laddertypes" } laddertypes = { path = "../lib-laddertypes" }
tisc = { path = "../lib-tisc" } tisc = { path = "../lib-tisc" }
iterate-text = "0.0.1" iterate-text = "0.0.1"
tiny-ansi = "0.1.0"

View file

@ -79,11 +79,19 @@ where
} }
} }
#[derive(Clone, Copy, PartialEq, Debug)]
#[derive(Clone, Debug)]
pub struct InputRegionTag { pub struct InputRegionTag {
begin: usize, pub begin: usize,
end: usize pub end: usize
}
impl Default for InputRegionTag {
fn default() -> Self {
InputRegionTag {
begin: 0,
end: 0
}
}
} }
impl InputRegionTag { impl InputRegionTag {
@ -271,9 +279,8 @@ where
LexerState::TypeTerm(s) => { LexerState::TypeTerm(s) => {
if *c == '=' || *c == '↦' || *c == ';' { if *c == '=' || *c == '↦' || *c == ';' {
if let Some(token) = state.clone().into_token() { let token = state.clone().into_token().unwrap();
return Some((region, Ok(token))); return Some((region, Ok(token)));
}
} else { } else {
if let Some(c) = self.chars.next() { if let Some(c) = self.chars.next() {
self.position += 1; self.position += 1;

View file

@ -2,6 +2,7 @@ use {
std::collections::HashMap, std::collections::HashMap,
std::sync::{Arc, RwLock}, std::sync::{Arc, RwLock},
std::{boxed::Box, ops::Deref}, std::{boxed::Box, ops::Deref},
tiny_ansi::TinyAnsi
}; };
mod expr; mod expr;
@ -12,36 +13,71 @@ mod runtime;
mod symbols; mod symbols;
use crate::{ use crate::{
lexer::InputRegionTag,
expr::{LTExpr, Statement}, expr::{LTExpr, Statement},
procedure_compiler::ProcedureCompiler, procedure_compiler::ProcedureCompiler,
symbols::Scope, symbols::Scope,
}; };
fn compile( fn print_diagnostic(
scope: &Arc<RwLock<Scope>>, path: &str,
name: &str, region: InputRegionTag,
source: impl Iterator<Item = char>, message: String
) -> Vec<tisc::assembler::AssemblyWord> { ) {
ProcedureCompiler::new(scope) let lines = iterate_text::file::lines::IterateFileLines::new(path);
.compile(
&parser::parse_expr( let mut line_region = InputRegionTag::default();
&scope.read().unwrap().typectx,
&mut lexer::LTIRLexer::from(source.peekable()) let n_before = 3;
.filter(|tok| match tok { let n_after = 3;
(_, Ok(lexer::LTIRToken::Comment(_))) => false,
_ => true let mut last_lines = Vec::new();
}) let mut next_lines = 0;
.peekable(),
) println!("\n{}:", path.green());
.expect("syntax error"), for (i, l) in lines.enumerate() {
) line_region.end += l.chars().count();
.into_asm(&name.into())
last_lines.push((i+1, l.clone()));
if last_lines.len() > n_before {
last_lines.remove(0);
} }
if region.begin >= line_region.begin &&
region.begin < line_region.end {
next_lines = n_after;
let column_begin = region.begin - line_region.begin;
let column_end = region.end - line_region.begin;
// display the source line
for (j,ll) in last_lines.iter() {
print!("{}\t{}{}",
format!("{}",j).to_string().bright_white(),
"|".yellow().bold(),
ll.white());
}
print!("\t{}", "|".yellow().bold());
for _ in 0..column_begin { print!("{}", ".".magenta().bold()); }
for _ in column_begin..column_end { print!("{}", "^".magenta().bold()); }
print!("\n");
print!("{} [{}-{}]: {}\n", "error".bright_red(), column_begin, column_end, message);
}
else if next_lines > 0 {
next_lines -= 1;
print!("{}:\t{}{}", format!("{}", i+1).to_string().bright_white(), "|".yellow().bold(), l.white());
}
line_region.begin = line_region.end;
}
}
/* TODO: /* TODO:
* - Parser error reporting
* - Compiler error reporting * - Compiler error reporting
* - parse float literals
* - write to address resulting from expression * - write to address resulting from expression
* - sized objects * - sized objects
* - Typecheck for LTExpr::Application * - Typecheck for LTExpr::Application
@ -57,19 +93,39 @@ fn main() {
let main_scope = Scope::with_parent(&root_scope); let main_scope = Scope::with_parent(&root_scope);
let typectx = main_scope.read().unwrap().typectx.clone(); let typectx = main_scope.read().unwrap().typectx.clone();
/* open source file
*/
let args: Vec<String> = std::env::args().collect(); let args: Vec<String> = std::env::args().collect();
let path = &args[1]; let path = &args[1];
let iter_chars = iterate_text::file::characters::IterateFileCharacters::new(path); let iter_chars = iterate_text::file::characters::IterateFileCharacters::new(path);
/* compile source file
*/
let mut lexer = lexer::LTIRLexer::from( iter_chars.peekable() );
let mut program_tokens = lexer.filter(|tok| match tok {
(_, Ok(lexer::LTIRToken::Comment(_))) => false,
_ => true
})
.peekable();
match parser::parse_expr( &typectx, &mut program_tokens ) {
Ok( ast ) => {
let bytecode = ProcedureCompiler::new(&main_scope)
.compile( &ast )
.into_asm(&"main".into());
eprintln!("{}", "Compiled successfully.\n======================\n".green());
/* link assembly-program to symbols /* link assembly-program to symbols
*/ */
linker.add_procedure("main", compile(&main_scope, "main", iter_chars)); linker.add_procedure("main", bytecode);
/* load & run compiled bytecode /* load & run compiled bytecode
*/ */
let main_addr = linker let main_addr = linker
.get_link_addr(&"main".into()) .get_link_addr(&"main".into())
.expect("'main' not linked"); .expect("'main' not linked");
vm.load(linker.link_total().expect("could not link")); vm.load(linker.link_total().expect("could not link"));
vm.execute(main_addr); vm.execute(main_addr);
@ -78,3 +134,14 @@ fn main() {
vm.data_stack vm.data_stack
); );
} }
Err( (region, parse_error) ) => {
print_diagnostic(
path,
region,
format!("{:?}", parse_error)
);
eprintln!("=======\nerror: Parse Error\n");
}
}
}

View file

@ -21,7 +21,7 @@ pub enum ParseError {
pub fn parse_expect<It>( pub fn parse_expect<It>(
tokens: &mut Peekable<It>, tokens: &mut Peekable<It>,
expected_token: LTIRToken, expected_token: LTIRToken,
) -> Result<(), ParseError> ) -> Result<(), (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)> where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
{ {
match tokens.next() { match tokens.next() {
@ -29,24 +29,24 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
if t == expected_token { if t == expected_token {
Ok(()) Ok(())
} else { } else {
Err(ParseError::UnexpectedToken) Err((region, ParseError::UnexpectedToken))
} }
} }
Some((region, Err(err))) => Err(ParseError::LexError(err)), Some((region, Err(err))) => Err((region, ParseError::LexError(err))),
None => Err(ParseError::UnexpectedEnd), None => Err((InputRegionTag::default(), ParseError::UnexpectedEnd)),
} }
} }
/* parse symbol name /* parse symbol name
*/ */
pub fn parse_symbol<It>(tokens: &mut Peekable<It>) -> Result<String, ParseError> pub fn parse_symbol<It>(tokens: &mut Peekable<It>) -> Result<String, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)> where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
{ {
match tokens.next() { match tokens.next() {
Some((region, Ok(LTIRToken::Symbol(name)))) => Ok(name), Some((region, Ok(LTIRToken::Symbol(name)))) => Ok(name),
Some((region, Ok(_))) => Err(ParseError::UnexpectedToken), Some((region, Ok(_))) => Err((region, ParseError::UnexpectedToken)),
Some((region, Err(err))) => Err(ParseError::LexError(err)), Some((region, Err(err))) => Err((region, ParseError::LexError(err))),
None => Err(ParseError::UnexpectedEnd), None => Err((InputRegionTag::default(), ParseError::UnexpectedEnd)),
} }
} }
@ -56,16 +56,17 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
pub fn parse_type_tag<It>( pub fn parse_type_tag<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>, typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<It>, tokens: &mut Peekable<It>,
) -> Result<Option<laddertypes::TypeTerm>, ParseError> ) -> Result<Option<laddertypes::TypeTerm>, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)> where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
{ {
if let Some((region, peektok)) = tokens.peek().clone() { let peek = { tokens.peek().cloned() };
match peektok.clone() { if let Some((region, peektok)) = peek {
match peektok {
Ok(LTIRToken::AssignType(typeterm_str)) => { Ok(LTIRToken::AssignType(typeterm_str)) => {
tokens.next(); tokens.next();
match typectx.write().unwrap().parse(typeterm_str.as_str()) { match typectx.write().unwrap().parse(typeterm_str.as_str()) {
Ok(typeterm) => Ok(Some(typeterm)), Ok(typeterm) => Ok(Some(typeterm)),
Err(parse_error) => Err(ParseError::TypeParseError(parse_error)), Err(parse_error) => Err((region, ParseError::TypeParseError(parse_error))),
} }
} }
_ => Ok(None), _ => Ok(None),
@ -109,7 +110,7 @@ impl VariableBinding {
pub fn parse_binding_expr<It>( pub fn parse_binding_expr<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>, typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<It>, tokens: &mut Peekable<It>,
) -> Result< VariableBinding, ParseError> ) -> Result< VariableBinding, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)> where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
{ {
if let Some((region, peektok)) = tokens.peek().clone() { if let Some((region, peektok)) = tokens.peek().clone() {
@ -126,11 +127,11 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
typtag: parse_type_tag(typectx, tokens)? typtag: parse_type_tag(typectx, tokens)?
}) })
} }
Err(err) => Err(ParseError::LexError(err.clone())), Err(err) => Err((*region, ParseError::LexError(err.clone()))),
_ => Err(ParseError::UnexpectedToken) _ => Err((*region, ParseError::UnexpectedToken))
} }
} else { } else {
Err(ParseError::UnexpectedEnd) Err((InputRegionTag::default(), ParseError::UnexpectedEnd))
} }
} }
@ -140,13 +141,16 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
pub fn parse_binding_block<It>( pub fn parse_binding_block<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>, typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<It>, tokens: &mut Peekable<It>,
) -> Result< Vec<VariableBinding>, ParseError> ) -> Result< Vec<VariableBinding>, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)> where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
{ {
let mut last_region = InputRegionTag::default();
let _ = parse_expect(tokens, LTIRToken::BlockOpen)?; let _ = parse_expect(tokens, LTIRToken::BlockOpen)?;
let mut bindings = Vec::new(); let mut bindings = Vec::new();
while let Some((region, peektok)) = tokens.peek() { while let Some((region, peektok)) = tokens.peek() {
last_region = *region;
match peektok { match peektok {
Ok(LTIRToken::BlockClose) => { Ok(LTIRToken::BlockClose) => {
tokens.next(); tokens.next();
@ -159,18 +163,18 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
bindings.push(parse_binding_expr(typectx, tokens)?); bindings.push(parse_binding_expr(typectx, tokens)?);
} }
Err(err) => { Err(err) => {
return Err(ParseError::LexError(err.clone())); return Err((last_region, ParseError::LexError(err.clone())));
} }
} }
} }
Err(ParseError::UnexpectedEnd) Err((last_region, ParseError::UnexpectedEnd))
} }
pub fn parse_statement<It>( pub fn parse_statement<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>, typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<It>, tokens: &mut Peekable<It>,
) -> Result<crate::expr::Statement, ParseError> ) -> Result<crate::expr::Statement, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)> where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
{ {
if let Some((region, peektok)) = tokens.peek() { if let Some((region, peektok)) = tokens.peek() {
@ -237,17 +241,17 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
let _ = parse_expect(tokens, LTIRToken::StatementSep)?; let _ = parse_expect(tokens, LTIRToken::StatementSep)?;
Ok(Statement::Expr(expr)) Ok(Statement::Expr(expr))
} }
Err(err) => Err(ParseError::LexError(err.clone())), Err(err) => Err((*region, ParseError::LexError(err.clone()))),
} }
} else { } else {
Err(ParseError::UnexpectedEnd) Err((InputRegionTag::default(), ParseError::UnexpectedEnd))
} }
} }
pub fn parse_statement_block<It>( pub fn parse_statement_block<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>, typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<It>, tokens: &mut Peekable<It>,
) -> Result<Vec<Statement>, ParseError> ) -> Result<Vec<Statement>, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)> where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
{ {
let _ = parse_expect(tokens, LTIRToken::BlockOpen)?; let _ = parse_expect(tokens, LTIRToken::BlockOpen)?;
@ -263,33 +267,33 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
statements.push(parse_statement(typectx, tokens)?); statements.push(parse_statement(typectx, tokens)?);
} }
Err(err) => { Err(err) => {
return Err(ParseError::LexError(err.clone())); return Err((*region, ParseError::LexError(err.clone())));
} }
} }
} }
Err(ParseError::UnexpectedEnd) Err((InputRegionTag::default(), ParseError::UnexpectedEnd))
} }
pub fn parse_atom<It>( pub fn parse_atom<It>(
tokens: &mut Peekable<It>, tokens: &mut Peekable<It>,
) -> Result<crate::expr::LTExpr, ParseError> ) -> Result<crate::expr::LTExpr, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)> where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
{ {
match tokens.next() { match tokens.next() {
Some((region, Ok(LTIRToken::Symbol(sym)))) => Ok(LTExpr::symbol(sym.as_str())), Some((region, Ok(LTIRToken::Symbol(sym)))) => Ok(LTExpr::symbol(sym.as_str())),
Some((region, Ok(LTIRToken::Char(c)))) => Ok(LTExpr::lit_uint(c as u64)), Some((region, Ok(LTIRToken::Char(c)))) => Ok(LTExpr::lit_uint(c as u64)),
Some((region, Ok(LTIRToken::Num(n)))) => Ok(LTExpr::lit_uint(n as u64)), Some((region, Ok(LTIRToken::Num(n)))) => Ok(LTExpr::lit_uint(n as u64)),
Some((region, Ok(_))) => Err(ParseError::UnexpectedToken), Some((region, Ok(_))) => Err((region, ParseError::UnexpectedToken)),
Some((region, Err(err))) => Err(ParseError::LexError(err)), Some((region, Err(err))) => Err((region, ParseError::LexError(err))),
None => Err(ParseError::UnexpectedEnd), None => Err((InputRegionTag::default(), ParseError::UnexpectedEnd)),
} }
} }
pub fn parse_expr<It>( pub fn parse_expr<It>(
typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>, typectx: &Arc<RwLock<laddertypes::dict::TypeDict>>,
tokens: &mut Peekable<It>, tokens: &mut Peekable<It>,
) -> Result<crate::expr::LTExpr, ParseError> ) -> Result<crate::expr::LTExpr, (InputRegionTag, ParseError)>
where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)> where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
{ {
let mut children = Vec::new(); let mut children = Vec::new();
@ -309,7 +313,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
body: Box::new(body), body: Box::new(body),
}); });
} else { } else {
return Err(ParseError::UnexpectedToken); return Err((*region, ParseError::UnexpectedToken));
} }
} }
Ok(LTIRToken::ExprOpen) => { Ok(LTIRToken::ExprOpen) => {
@ -369,7 +373,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
children.push(parse_atom(tokens)?); children.push(parse_atom(tokens)?);
} }
Err(err) => { Err(err) => {
return Err(ParseError::LexError(err.clone())); return Err((*region, ParseError::LexError(err.clone())));
} }
} }
} }
@ -382,7 +386,7 @@ where It: Iterator<Item = (InputRegionTag, Result<LTIRToken, LexError>)>
body: children, body: children,
}) })
} else { } else {
Err(ParseError::UnexpectedEnd) Err((InputRegionTag::default(), ParseError::UnexpectedEnd))
} }
} }