lexer: improve whitespace handling

This commit is contained in:
Michael Sippel 2023-10-02 01:30:10 +02:00
parent 8fa38ca7d3
commit da899192f5
Signed by: senvas
GPG key ID: F96CF119C34B64A6
4 changed files with 22 additions and 21 deletions

View file

@ -77,9 +77,10 @@ where It: Iterator<Item = char>
'>' => { self.chars.next(); return Some(Ok(LadderTypeToken::Close)); },
'~' => { self.chars.next(); return Some(Ok(LadderTypeToken::Ladder)); },
'\'' => { self.chars.next(); state = LexerState::Char(None); },
' ' => { self.chars.next(); },
c => {
if c.is_alphabetic() {
if c.is_whitespace() {
self.chars.next();
} else if c.is_alphabetic() {
state = LexerState::Sym( String::new() );
} else if c.is_digit(10) {
state = LexerState::Num( 0 );

View file

@ -121,7 +121,7 @@ impl TypeDict {
{
match self.parse_partial(tokens) {
Ok(t) => {
if let Some(tok) = tokens.peek() {
if let Some(_tok) = tokens.peek() {
Err(ParseError::UnexpectedToken)
} else {
Ok(t)

View file

@ -83,15 +83,15 @@ fn test_lexer_app_space() {
#[test]
fn test_lexer_large() {
let mut lex = LadderTypeLexer::from(
"<Seq Date \
~<TimeSince UnixEpoch> \
~<Duration Seconds> \
~ \
~<PosInt 10 BigEndian> \
~< Seq <Digit 10>~Unicode > > \
~<SepSeq Unicode ':'> \
~<Seq Unicode> \
~UTF-8 \
"<Seq Date
~<TimeSince UnixEpoch>
~<Duration Seconds>
~
~<PosInt 10 BigEndian>
~< Seq <Digit 10>~Unicode > >
~<SepSeq Unicode ':'>
~<Seq Unicode>
~UTF-8
~<Seq Byte>".chars());
assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Open)));

View file

@ -134,15 +134,15 @@ fn test_parser_ladder_between() {
fn test_parser_ladder_large() {
assert_eq!(
TypeTerm::from_str(
"<Seq Date \
~<TimeSince UnixEpoch> \
~<Duration Seconds> \
~ \
~<PosInt 10 BigEndian> \
~< Seq <Digit 10>~Unicode > > \
~<SepSeq Unicode ':'> \
~<Seq Unicode> \
~UTF-8 \
"<Seq Date
~<TimeSince UnixEpoch>
~<Duration Seconds>
~
~<PosInt 10 BigEndian>
~< Seq <Digit 10>~Unicode > >
~<SepSeq Unicode ':'>
~<Seq Unicode>
~UTF-8
~<Seq Byte>"),
Ok(