lexer: add input region for each token
This commit is contained in:
parent
c910265531
commit
34a129d101
2 changed files with 99 additions and 43 deletions
108
src/lexer.rs
108
src/lexer.rs
|
@ -52,6 +52,7 @@ where
|
|||
It: std::iter::Iterator<Item = char>,
|
||||
{
|
||||
chars: std::iter::Peekable<It>,
|
||||
position: usize
|
||||
}
|
||||
|
||||
impl<It> LTIRLexer<It>
|
||||
|
@ -70,6 +71,23 @@ where
|
|||
fn from(chars: It) -> Self {
|
||||
LTIRLexer {
|
||||
chars: chars.peekable(),
|
||||
position: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct InputRegionTag {
|
||||
begin: usize,
|
||||
end: usize
|
||||
}
|
||||
|
||||
impl InputRegionTag {
|
||||
pub fn max( a: InputRegionTag, b: InputRegionTag ) -> InputRegionTag {
|
||||
InputRegionTag {
|
||||
begin: usize::min( a.begin, b.begin ),
|
||||
end: usize::max( a.end, b.end )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -78,10 +96,14 @@ impl<It> Iterator for LTIRLexer<It>
|
|||
where
|
||||
It: Iterator<Item = char>,
|
||||
{
|
||||
type Item = Result<LTIRToken, LexError>;
|
||||
type Item = (InputRegionTag, Result<LTIRToken, LexError>);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let mut state = LexerState::Any;
|
||||
let mut region = InputRegionTag{
|
||||
begin: self.position,
|
||||
end: self.position
|
||||
};
|
||||
|
||||
while let Some(c) = self.chars.peek() {
|
||||
match &mut state {
|
||||
|
@ -89,47 +111,70 @@ where
|
|||
LexerState::Any => match c {
|
||||
'λ' => {
|
||||
self.chars.next();
|
||||
return Some(Ok(LTIRToken::Lambda));
|
||||
self.position += 1;
|
||||
region.end += 1;
|
||||
return Some((region, Ok(LTIRToken::Lambda)));
|
||||
}
|
||||
'.' => {
|
||||
'.' | '↦' => {
|
||||
self.chars.next();
|
||||
return Some(Ok(LTIRToken::LambdaBody));
|
||||
self.position += 1;
|
||||
region.end += 1;
|
||||
return Some((region, Ok(LTIRToken::LambdaBody)));
|
||||
}
|
||||
'(' => {
|
||||
self.chars.next();
|
||||
return Some(Ok(LTIRToken::ExprOpen));
|
||||
self.position += 1;
|
||||
region.end += 1;
|
||||
return Some((region, Ok(LTIRToken::ExprOpen)));
|
||||
}
|
||||
')' => {
|
||||
self.chars.next();
|
||||
return Some(Ok(LTIRToken::ExprClose));
|
||||
self.position += 1;
|
||||
region.end += 1;
|
||||
return Some((region, Ok(LTIRToken::ExprClose)));
|
||||
}
|
||||
'{' => {
|
||||
self.chars.next();
|
||||
return Some(Ok(LTIRToken::BlockOpen));
|
||||
self.position += 1;
|
||||
region.end += 1;
|
||||
return Some((region, Ok(LTIRToken::BlockOpen)));
|
||||
}
|
||||
'}' => {
|
||||
self.chars.next();
|
||||
return Some(Ok(LTIRToken::BlockClose));
|
||||
self.position += 1;
|
||||
region.end += 1;
|
||||
return Some((region, Ok(LTIRToken::BlockClose)));
|
||||
}
|
||||
':' => {
|
||||
self.chars.next();
|
||||
self.position += 1;
|
||||
region.end += 1;
|
||||
state = LexerState::TypeTerm(String::new());
|
||||
}
|
||||
'=' => {
|
||||
self.chars.next();
|
||||
return Some(Ok(LTIRToken::AssignValue));
|
||||
self.position += 1;
|
||||
region.end += 1;
|
||||
return Some((region, Ok(LTIRToken::AssignValue)));
|
||||
}
|
||||
';' => {
|
||||
self.chars.next();
|
||||
return Some(Ok(LTIRToken::StatementSep));
|
||||
self.position += 1;
|
||||
region.end += 1;
|
||||
return Some((region, Ok(LTIRToken::StatementSep)));
|
||||
}
|
||||
'\'' => {
|
||||
self.chars.next();
|
||||
self.position += 1;
|
||||
region.end += 1;
|
||||
state = LexerState::Char(None);
|
||||
}
|
||||
c => {
|
||||
if c.is_whitespace() {
|
||||
self.chars.next();
|
||||
self.position += 1;
|
||||
region.begin += 1;
|
||||
region.end += 1;
|
||||
} else if c.is_digit(10) {
|
||||
state = LexerState::Num(0);
|
||||
} else {
|
||||
|
@ -139,30 +184,36 @@ where
|
|||
},
|
||||
|
||||
LexerState::Char(val) => {
|
||||
self.position += 2;
|
||||
region.end += 2;
|
||||
*val = Some(match self.chars.next() {
|
||||
Some('\\') => match self.chars.next() {
|
||||
Some('0') => '\0',
|
||||
Some('n') => '\n',
|
||||
Some('t') => '\t',
|
||||
Some(c) => c,
|
||||
None => {
|
||||
return Some(Err(LexError::InvalidChar));
|
||||
Some('\\') => {
|
||||
self.position += 1;
|
||||
region.end += 1;
|
||||
match self.chars.next() {
|
||||
Some('0') => '\0',
|
||||
Some('n') => '\n',
|
||||
Some('t') => '\t',
|
||||
Some(c) => c,
|
||||
None => {
|
||||
return Some((region, Err(LexError::InvalidChar)));
|
||||
}
|
||||
}
|
||||
},
|
||||
Some(c) => c,
|
||||
None => {
|
||||
return Some(Err(LexError::InvalidChar));
|
||||
return Some((region, Err(LexError::InvalidChar)));
|
||||
}
|
||||
});
|
||||
|
||||
match self.chars.next() {
|
||||
Some('\'') => {
|
||||
if let Some(token) = state.clone().into_token() {
|
||||
return Some(Ok(token));
|
||||
return Some((region, Ok(token)));
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return Some(Err(LexError::InvalidChar));
|
||||
return Some((region, Err(LexError::InvalidChar)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -170,10 +221,12 @@ where
|
|||
LexerState::TypeTerm(s) => {
|
||||
if *c == '=' || *c == '.' {
|
||||
if let Some(token) = state.clone().into_token() {
|
||||
return Some(Ok(token));
|
||||
return Some((region, Ok(token)));
|
||||
}
|
||||
} else {
|
||||
if let Some(c) = self.chars.next() {
|
||||
self.position += 1;
|
||||
region.end += 1;
|
||||
s.push(c);
|
||||
}
|
||||
}
|
||||
|
@ -189,16 +242,19 @@ where
|
|||
|| *c == '='
|
||||
|| *c == ':'
|
||||
|| *c == '.'
|
||||
|| *c == '↦'
|
||||
{
|
||||
// finish the current token
|
||||
|
||||
if let Some(token) = state.clone().into_token() {
|
||||
return Some(Ok(token));
|
||||
return Some((region, Ok(token)));
|
||||
}
|
||||
} else {
|
||||
// append to the current token
|
||||
|
||||
let c = self.chars.next().unwrap();
|
||||
self.position += 1;
|
||||
region.end += 1;
|
||||
|
||||
match &mut state {
|
||||
LexerState::Sym(s) => {
|
||||
|
@ -209,7 +265,7 @@ where
|
|||
if let Some(d) = c.to_digit(10) {
|
||||
*n = (*n) * 10 + d as i64;
|
||||
} else {
|
||||
return Some(Err(LexError::InvalidDigit));
|
||||
return Some((region, Err(LexError::InvalidDigit)));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -221,7 +277,7 @@ where
|
|||
}
|
||||
|
||||
if let Some(token) = state.into_token() {
|
||||
Some(Ok(token))
|
||||
Some((region, Ok(token)))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
@ -244,8 +300,8 @@ mod tests {
|
|||
.chars(),
|
||||
);
|
||||
|
||||
for token in lexer {
|
||||
eprintln!("token = {:?}", token);
|
||||
for (range, token) in lexer {
|
||||
eprintln!("[{:?}] {:?}", range, token);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,14 +25,14 @@ where
|
|||
It: Iterator<Item = char>,
|
||||
{
|
||||
match tokens.next() {
|
||||
Some(Ok(t)) => {
|
||||
Some((region, Ok(t))) => {
|
||||
if t == expected_token {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(ParseError::UnexpectedToken)
|
||||
}
|
||||
}
|
||||
Some(Err(err)) => Err(ParseError::LexError(err)),
|
||||
Some((region, Err(err))) => Err(ParseError::LexError(err)),
|
||||
None => Err(ParseError::UnexpectedEnd),
|
||||
}
|
||||
}
|
||||
|
@ -42,9 +42,9 @@ where
|
|||
It: Iterator<Item = char>,
|
||||
{
|
||||
match tokens.next() {
|
||||
Some(Ok(LTIRToken::Symbol(name))) => Ok(name),
|
||||
Some(Ok(_)) => Err(ParseError::UnexpectedToken),
|
||||
Some(Err(err)) => Err(ParseError::LexError(err)),
|
||||
Some((region, Ok(LTIRToken::Symbol(name)))) => Ok(name),
|
||||
Some((region, Ok(_))) => Err(ParseError::UnexpectedToken),
|
||||
Some((region, Err(err))) => Err(ParseError::LexError(err)),
|
||||
None => Err(ParseError::UnexpectedEnd),
|
||||
}
|
||||
}
|
||||
|
@ -56,7 +56,7 @@ pub fn parse_type_tag<It>(
|
|||
where
|
||||
It: Iterator<Item = char>,
|
||||
{
|
||||
if let Some(peektok) = tokens.peek().clone() {
|
||||
if let Some((region, peektok)) = tokens.peek().clone() {
|
||||
match peektok.clone() {
|
||||
Ok(LTIRToken::AssignType(typeterm_str)) => {
|
||||
tokens.next();
|
||||
|
@ -79,7 +79,7 @@ pub fn parse_statement<It>(
|
|||
where
|
||||
It: Iterator<Item = char>,
|
||||
{
|
||||
if let Some(peektok) = tokens.peek() {
|
||||
if let Some((region, peektok)) = tokens.peek() {
|
||||
match peektok {
|
||||
Ok(LTIRToken::Symbol(sym)) => {
|
||||
match sym.as_str() {
|
||||
|
@ -154,7 +154,7 @@ where
|
|||
let _ = parse_expect(tokens, LTIRToken::BlockOpen)?;
|
||||
|
||||
let mut statements = Vec::new();
|
||||
while let Some(peektok) = tokens.peek() {
|
||||
while let Some((region, peektok)) = tokens.peek() {
|
||||
match peektok {
|
||||
Ok(LTIRToken::BlockClose) => {
|
||||
tokens.next();
|
||||
|
@ -179,11 +179,11 @@ where
|
|||
It: Iterator<Item = char>,
|
||||
{
|
||||
match tokens.next() {
|
||||
Some(Ok(LTIRToken::Symbol(sym))) => Ok(LTExpr::symbol(sym.as_str())),
|
||||
Some(Ok(LTIRToken::Char(c))) => Ok(LTExpr::lit_uint(c as u64)),
|
||||
Some(Ok(LTIRToken::Num(n))) => Ok(LTExpr::lit_uint(n as u64)),
|
||||
Some(Ok(_)) => Err(ParseError::UnexpectedToken),
|
||||
Some(Err(err)) => Err(ParseError::LexError(err)),
|
||||
Some((region, Ok(LTIRToken::Symbol(sym)))) => Ok(LTExpr::symbol(sym.as_str())),
|
||||
Some((region, Ok(LTIRToken::Char(c)))) => Ok(LTExpr::lit_uint(c as u64)),
|
||||
Some((region, Ok(LTIRToken::Num(n)))) => Ok(LTExpr::lit_uint(n as u64)),
|
||||
Some((region, Ok(_))) => Err(ParseError::UnexpectedToken),
|
||||
Some((region, Err(err))) => Err(ParseError::LexError(err)),
|
||||
None => Err(ParseError::UnexpectedEnd),
|
||||
}
|
||||
}
|
||||
|
@ -197,14 +197,14 @@ where
|
|||
{
|
||||
let mut children = Vec::new();
|
||||
|
||||
while let Some(tok) = tokens.peek() {
|
||||
while let Some((region, tok)) = tokens.peek() {
|
||||
match tok {
|
||||
Ok(LTIRToken::Lambda) => {
|
||||
if children.len() == 0 {
|
||||
tokens.next();
|
||||
|
||||
let mut args = Vec::new();
|
||||
while let Some(Ok(LTIRToken::Symbol(_))) = tokens.peek() {
|
||||
while let Some((region, Ok(LTIRToken::Symbol(_)))) = tokens.peek() {
|
||||
args.push((parse_symbol(tokens)?, parse_type_tag(typectx, tokens)));
|
||||
}
|
||||
|
||||
|
@ -221,7 +221,7 @@ where
|
|||
}
|
||||
Ok(LTIRToken::ExprOpen) => {
|
||||
tokens.next();
|
||||
while let Some(peektok) = tokens.peek() {
|
||||
while let Some((region, peektok)) = tokens.peek() {
|
||||
match peektok {
|
||||
Ok(LTIRToken::ExprClose) => {
|
||||
tokens.next();
|
||||
|
@ -253,7 +253,7 @@ where
|
|||
let if_expr = LTExpr::block(parse_block(typectx, tokens)?);
|
||||
let mut else_expr = LTExpr::block(vec![]);
|
||||
|
||||
if let Some(peektok) = tokens.peek() {
|
||||
if let Some((region, peektok)) = tokens.peek() {
|
||||
if let Ok(LTIRToken::Symbol(name)) = peektok {
|
||||
if name == "else" {
|
||||
tokens.next();
|
||||
|
|
Loading…
Reference in a new issue