2024-05-09 20:13:10 +02:00
|
|
|
|
#[derive(PartialEq, Eq, Clone, Debug)]
|
|
|
|
|
pub enum LTIRToken {
|
2024-05-15 23:51:20 +02:00
|
|
|
|
Comment(String),
|
2024-05-12 18:58:39 +02:00
|
|
|
|
Symbol(String),
|
|
|
|
|
Char(char),
|
|
|
|
|
Num(i64),
|
2024-05-09 20:13:10 +02:00
|
|
|
|
|
|
|
|
|
// SingleQuote(String),
|
|
|
|
|
// DoubleQuote(String),
|
|
|
|
|
// TripleQuote(String),
|
2024-05-11 00:00:20 +02:00
|
|
|
|
Lambda,
|
2024-05-15 21:42:44 +02:00
|
|
|
|
MapsTo,
|
2024-05-12 18:58:39 +02:00
|
|
|
|
AssignType(String),
|
2024-05-11 01:19:44 +02:00
|
|
|
|
AssignValue,
|
2024-05-11 00:00:20 +02:00
|
|
|
|
|
2024-05-09 20:13:10 +02:00
|
|
|
|
ExprOpen,
|
|
|
|
|
ExprClose,
|
|
|
|
|
|
|
|
|
|
BlockOpen,
|
|
|
|
|
BlockClose,
|
|
|
|
|
StatementSep,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(PartialEq, Eq, Clone, Debug)]
|
|
|
|
|
pub enum LexError {
|
|
|
|
|
InvalidDigit,
|
2024-05-12 18:58:39 +02:00
|
|
|
|
InvalidChar,
|
2024-05-09 20:13:10 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(PartialEq, Eq, Clone, Debug)]
|
|
|
|
|
pub enum LexerState {
|
|
|
|
|
Any,
|
2024-05-15 23:51:20 +02:00
|
|
|
|
Comment(String),
|
2024-05-12 18:58:39 +02:00
|
|
|
|
TypeTerm(String),
|
|
|
|
|
Sym(String),
|
|
|
|
|
Num(i64),
|
|
|
|
|
Char(Option<char>),
|
2024-05-09 20:13:10 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl LexerState {
|
2024-05-12 18:58:39 +02:00
|
|
|
|
fn into_token(self) -> Option<LTIRToken> {
|
2024-05-09 20:13:10 +02:00
|
|
|
|
match self {
|
|
|
|
|
LexerState::Any => None,
|
2024-05-15 23:51:20 +02:00
|
|
|
|
LexerState::Comment(s) => Some(LTIRToken::Comment(s)),
|
2024-05-12 18:56:10 +02:00
|
|
|
|
LexerState::TypeTerm(s) => Some(LTIRToken::AssignType(s)),
|
2024-05-09 20:13:10 +02:00
|
|
|
|
LexerState::Sym(s) => Some(LTIRToken::Symbol(s)),
|
|
|
|
|
LexerState::Num(n) => Some(LTIRToken::Num(n)),
|
2024-05-12 18:58:39 +02:00
|
|
|
|
LexerState::Char(c) => Some(LTIRToken::Char(c?)),
|
2024-05-09 20:13:10 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub struct LTIRLexer<It>
|
2024-05-12 18:58:39 +02:00
|
|
|
|
where
|
|
|
|
|
It: std::iter::Iterator<Item = char>,
|
2024-05-09 20:13:10 +02:00
|
|
|
|
{
|
|
|
|
|
chars: std::iter::Peekable<It>,
|
2024-05-13 22:55:24 +02:00
|
|
|
|
position: usize
|
2024-05-09 20:13:10 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<It> LTIRLexer<It>
|
2024-05-12 18:58:39 +02:00
|
|
|
|
where
|
|
|
|
|
It: Iterator<Item = char>,
|
2024-05-09 20:13:10 +02:00
|
|
|
|
{
|
|
|
|
|
pub fn into_inner(self) -> std::iter::Peekable<It> {
|
|
|
|
|
self.chars
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<It> From<It> for LTIRLexer<It>
|
2024-05-12 18:58:39 +02:00
|
|
|
|
where
|
|
|
|
|
It: Iterator<Item = char>,
|
2024-05-09 20:13:10 +02:00
|
|
|
|
{
|
|
|
|
|
fn from(chars: It) -> Self {
|
|
|
|
|
LTIRLexer {
|
2024-05-12 18:58:39 +02:00
|
|
|
|
chars: chars.peekable(),
|
2024-05-13 22:55:24 +02:00
|
|
|
|
position: 0,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-18 18:54:32 +02:00
|
|
|
|
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
|
2024-05-13 22:55:24 +02:00
|
|
|
|
pub struct InputRegionTag {
|
2024-05-16 13:14:00 +02:00
|
|
|
|
pub begin: usize,
|
|
|
|
|
pub end: usize
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Default for InputRegionTag {
|
|
|
|
|
fn default() -> Self {
|
|
|
|
|
InputRegionTag {
|
|
|
|
|
begin: 0,
|
|
|
|
|
end: 0
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-05-13 22:55:24 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl InputRegionTag {
|
|
|
|
|
pub fn max( a: InputRegionTag, b: InputRegionTag ) -> InputRegionTag {
|
|
|
|
|
InputRegionTag {
|
|
|
|
|
begin: usize::min( a.begin, b.begin ),
|
|
|
|
|
end: usize::max( a.end, b.end )
|
2024-05-09 20:13:10 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl<It> Iterator for LTIRLexer<It>
|
2024-05-12 18:58:39 +02:00
|
|
|
|
where
|
|
|
|
|
It: Iterator<Item = char>,
|
2024-05-09 20:13:10 +02:00
|
|
|
|
{
|
2024-05-13 22:55:24 +02:00
|
|
|
|
type Item = (InputRegionTag, Result<LTIRToken, LexError>);
|
2024-05-09 20:13:10 +02:00
|
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
|
let mut state = LexerState::Any;
|
2024-05-13 22:55:24 +02:00
|
|
|
|
let mut region = InputRegionTag{
|
|
|
|
|
begin: self.position,
|
|
|
|
|
end: self.position
|
|
|
|
|
};
|
2024-05-09 20:13:10 +02:00
|
|
|
|
|
|
|
|
|
while let Some(c) = self.chars.peek() {
|
|
|
|
|
match &mut state {
|
|
|
|
|
// determine token type
|
2024-05-12 18:58:39 +02:00
|
|
|
|
LexerState::Any => match c {
|
|
|
|
|
'λ' => {
|
|
|
|
|
self.chars.next();
|
2024-05-13 22:55:24 +02:00
|
|
|
|
self.position += 1;
|
|
|
|
|
region.end += 1;
|
|
|
|
|
return Some((region, Ok(LTIRToken::Lambda)));
|
2024-05-12 18:58:39 +02:00
|
|
|
|
}
|
2024-05-15 21:42:44 +02:00
|
|
|
|
'↦' => {
|
2024-05-12 18:58:39 +02:00
|
|
|
|
self.chars.next();
|
2024-05-13 22:55:24 +02:00
|
|
|
|
self.position += 1;
|
|
|
|
|
region.end += 1;
|
2024-05-15 21:42:44 +02:00
|
|
|
|
return Some((region, Ok(LTIRToken::MapsTo)));
|
2024-05-12 18:58:39 +02:00
|
|
|
|
}
|
|
|
|
|
'(' => {
|
|
|
|
|
self.chars.next();
|
2024-05-13 22:55:24 +02:00
|
|
|
|
self.position += 1;
|
|
|
|
|
region.end += 1;
|
|
|
|
|
return Some((region, Ok(LTIRToken::ExprOpen)));
|
2024-05-12 18:58:39 +02:00
|
|
|
|
}
|
|
|
|
|
')' => {
|
|
|
|
|
self.chars.next();
|
2024-05-13 22:55:24 +02:00
|
|
|
|
self.position += 1;
|
|
|
|
|
region.end += 1;
|
|
|
|
|
return Some((region, Ok(LTIRToken::ExprClose)));
|
2024-05-12 18:58:39 +02:00
|
|
|
|
}
|
|
|
|
|
'{' => {
|
|
|
|
|
self.chars.next();
|
2024-05-13 22:55:24 +02:00
|
|
|
|
self.position += 1;
|
|
|
|
|
region.end += 1;
|
|
|
|
|
return Some((region, Ok(LTIRToken::BlockOpen)));
|
2024-05-12 18:58:39 +02:00
|
|
|
|
}
|
|
|
|
|
'}' => {
|
|
|
|
|
self.chars.next();
|
2024-05-13 22:55:24 +02:00
|
|
|
|
self.position += 1;
|
|
|
|
|
region.end += 1;
|
|
|
|
|
return Some((region, Ok(LTIRToken::BlockClose)));
|
2024-05-12 18:58:39 +02:00
|
|
|
|
}
|
|
|
|
|
':' => {
|
|
|
|
|
self.chars.next();
|
2024-05-13 22:55:24 +02:00
|
|
|
|
self.position += 1;
|
|
|
|
|
region.end += 1;
|
2024-05-12 18:58:39 +02:00
|
|
|
|
state = LexerState::TypeTerm(String::new());
|
|
|
|
|
}
|
|
|
|
|
'=' => {
|
|
|
|
|
self.chars.next();
|
2024-05-13 22:55:24 +02:00
|
|
|
|
self.position += 1;
|
|
|
|
|
region.end += 1;
|
|
|
|
|
return Some((region, Ok(LTIRToken::AssignValue)));
|
2024-05-12 18:58:39 +02:00
|
|
|
|
}
|
|
|
|
|
';' => {
|
|
|
|
|
self.chars.next();
|
2024-05-13 22:55:24 +02:00
|
|
|
|
self.position += 1;
|
|
|
|
|
region.end += 1;
|
|
|
|
|
return Some((region, Ok(LTIRToken::StatementSep)));
|
2024-05-12 18:58:39 +02:00
|
|
|
|
}
|
|
|
|
|
'\'' => {
|
|
|
|
|
self.chars.next();
|
2024-05-13 22:55:24 +02:00
|
|
|
|
self.position += 1;
|
|
|
|
|
region.end += 1;
|
2024-05-12 18:58:39 +02:00
|
|
|
|
state = LexerState::Char(None);
|
|
|
|
|
}
|
2024-05-15 23:51:20 +02:00
|
|
|
|
'/' => {
|
|
|
|
|
self.chars.next();
|
|
|
|
|
self.position += 1;
|
|
|
|
|
region.end += 1;
|
|
|
|
|
|
|
|
|
|
match self.chars.next() {
|
|
|
|
|
Some('*') => {
|
|
|
|
|
self.position += 1;
|
|
|
|
|
region.end += 1;
|
|
|
|
|
state = LexerState::Comment(String::new());
|
|
|
|
|
}
|
|
|
|
|
_ => {
|
|
|
|
|
return Some((region, Err(LexError::InvalidChar)));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-05-12 18:58:39 +02:00
|
|
|
|
c => {
|
|
|
|
|
if c.is_whitespace() {
|
2024-05-12 18:56:10 +02:00
|
|
|
|
self.chars.next();
|
2024-05-13 22:55:24 +02:00
|
|
|
|
self.position += 1;
|
|
|
|
|
region.begin += 1;
|
|
|
|
|
region.end += 1;
|
2024-05-12 18:58:39 +02:00
|
|
|
|
} else if c.is_digit(10) {
|
|
|
|
|
state = LexerState::Num(0);
|
|
|
|
|
} else {
|
|
|
|
|
state = LexerState::Sym(String::new());
|
2024-05-09 20:13:10 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
2024-05-12 18:58:39 +02:00
|
|
|
|
},
|
2024-05-09 20:13:10 +02:00
|
|
|
|
|
2024-05-15 23:51:20 +02:00
|
|
|
|
LexerState::Comment(s) => {
|
|
|
|
|
|
|
|
|
|
match self.chars.next() {
|
|
|
|
|
Some('*') => {
|
|
|
|
|
match self.chars.peek() {
|
|
|
|
|
Some('/') => {
|
|
|
|
|
self.chars.next();
|
|
|
|
|
self.position += 2;
|
|
|
|
|
region.end += 2;
|
|
|
|
|
|
|
|
|
|
if let Some(token) = state.clone().into_token() {
|
|
|
|
|
return Some((region, Ok(token)));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
_ => {
|
|
|
|
|
s.push('*');
|
|
|
|
|
self.position += 1;
|
|
|
|
|
region.end += 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Some(c) => {
|
|
|
|
|
s.push(c);
|
|
|
|
|
self.position += 1;
|
|
|
|
|
region.end += 1;
|
|
|
|
|
}
|
|
|
|
|
None => {
|
|
|
|
|
return Some((region, Err(LexError::InvalidChar)));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-09 20:13:10 +02:00
|
|
|
|
LexerState::Char(val) => {
|
2024-05-13 22:55:24 +02:00
|
|
|
|
self.position += 2;
|
|
|
|
|
region.end += 2;
|
2024-05-12 18:58:39 +02:00
|
|
|
|
*val = Some(match self.chars.next() {
|
2024-05-13 22:55:24 +02:00
|
|
|
|
Some('\\') => {
|
|
|
|
|
self.position += 1;
|
|
|
|
|
region.end += 1;
|
|
|
|
|
match self.chars.next() {
|
|
|
|
|
Some('0') => '\0',
|
|
|
|
|
Some('n') => '\n',
|
|
|
|
|
Some('t') => '\t',
|
|
|
|
|
Some(c) => c,
|
|
|
|
|
None => {
|
|
|
|
|
return Some((region, Err(LexError::InvalidChar)));
|
|
|
|
|
}
|
2024-05-09 20:13:10 +02:00
|
|
|
|
}
|
2024-05-12 18:58:39 +02:00
|
|
|
|
},
|
|
|
|
|
Some(c) => c,
|
|
|
|
|
None => {
|
2024-05-13 22:55:24 +02:00
|
|
|
|
return Some((region, Err(LexError::InvalidChar)));
|
2024-05-12 18:58:39 +02:00
|
|
|
|
}
|
|
|
|
|
});
|
2024-05-09 20:13:10 +02:00
|
|
|
|
|
|
|
|
|
match self.chars.next() {
|
|
|
|
|
Some('\'') => {
|
|
|
|
|
if let Some(token) = state.clone().into_token() {
|
2024-05-13 22:55:24 +02:00
|
|
|
|
return Some((region, Ok(token)));
|
2024-05-12 18:58:39 +02:00
|
|
|
|
}
|
2024-05-09 20:13:10 +02:00
|
|
|
|
}
|
|
|
|
|
_ => {
|
2024-05-13 22:55:24 +02:00
|
|
|
|
return Some((region, Err(LexError::InvalidChar)));
|
2024-05-09 20:13:10 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-12 18:56:10 +02:00
|
|
|
|
LexerState::TypeTerm(s) => {
|
2024-05-15 21:42:44 +02:00
|
|
|
|
if *c == '=' || *c == '↦' || *c == ';' {
|
2024-05-16 13:14:00 +02:00
|
|
|
|
let token = state.clone().into_token().unwrap();
|
|
|
|
|
return Some((region, Ok(token)));
|
2024-05-12 18:56:10 +02:00
|
|
|
|
} else {
|
|
|
|
|
if let Some(c) = self.chars.next() {
|
2024-05-13 22:55:24 +02:00
|
|
|
|
self.position += 1;
|
|
|
|
|
region.end += 1;
|
2024-05-12 18:56:10 +02:00
|
|
|
|
s.push(c);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-09 20:13:10 +02:00
|
|
|
|
_ => {
|
|
|
|
|
if c.is_whitespace()
|
2024-05-12 18:58:39 +02:00
|
|
|
|
|| *c == '('
|
|
|
|
|
|| *c == ')'
|
|
|
|
|
|| *c == '{'
|
|
|
|
|
|| *c == '}'
|
|
|
|
|
|| *c == ';'
|
|
|
|
|
|| *c == '='
|
|
|
|
|
|| *c == ':'
|
2024-05-13 22:55:24 +02:00
|
|
|
|
|| *c == '↦'
|
2024-05-09 20:13:10 +02:00
|
|
|
|
{
|
|
|
|
|
// finish the current token
|
|
|
|
|
|
|
|
|
|
if let Some(token) = state.clone().into_token() {
|
2024-05-13 22:55:24 +02:00
|
|
|
|
return Some((region, Ok(token)));
|
2024-05-09 20:13:10 +02:00
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
// append to the current token
|
|
|
|
|
|
|
|
|
|
let c = self.chars.next().unwrap();
|
2024-05-13 22:55:24 +02:00
|
|
|
|
self.position += 1;
|
|
|
|
|
region.end += 1;
|
2024-05-09 20:13:10 +02:00
|
|
|
|
|
|
|
|
|
match &mut state {
|
|
|
|
|
LexerState::Sym(s) => {
|
|
|
|
|
s.push(c);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LexerState::Num(n) => {
|
|
|
|
|
if let Some(d) = c.to_digit(10) {
|
|
|
|
|
*n = (*n) * 10 + d as i64;
|
|
|
|
|
} else {
|
2024-05-13 22:55:24 +02:00
|
|
|
|
return Some((region, Err(LexError::InvalidDigit)));
|
2024-05-09 20:13:10 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_ => {}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if let Some(token) = state.into_token() {
|
2024-05-13 22:55:24 +02:00
|
|
|
|
Some((region, Ok(token)))
|
2024-05-09 20:13:10 +02:00
|
|
|
|
} else {
|
|
|
|
|
None
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-12 18:56:10 +02:00
|
|
|
|
mod tests {
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_lexer() {
|
|
|
|
|
let mut lexer = crate::lexer::LTIRLexer::from(
|
|
|
|
|
"let var1:ℕ=123;
|
2024-05-15 23:51:20 +02:00
|
|
|
|
/* comment */
|
2024-05-12 18:56:10 +02:00
|
|
|
|
let square =λx.* x x;
|
|
|
|
|
|
|
|
|
|
let sqrt = λx:ℝ~machine::Float64~machine::Word.(f64-sqrt x);
|
|
|
|
|
let magnitude =
|
|
|
|
|
λx:ℝ
|
|
|
|
|
.λy:ℝ
|
|
|
|
|
.sqrt (+ (* x x) (* y y));
|
2024-05-12 18:58:39 +02:00
|
|
|
|
"
|
|
|
|
|
.chars(),
|
2024-05-12 18:56:10 +02:00
|
|
|
|
);
|
|
|
|
|
|
2024-05-13 22:55:24 +02:00
|
|
|
|
for (range, token) in lexer {
|
|
|
|
|
eprintln!("[{:?}] {:?}", range, token);
|
2024-05-12 18:56:10 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|