lexer: improve whitespace handling

2023-10-02 01:30:10 +02:00 · 2023-10-02 01:30:10 +02:00 · da899192f5
commit da899192f5
parent 8fa38ca7d3
4 changed files with 22 additions and 21 deletions
--- a/src/lexer.rs
+++ b/src/lexer.rs
@ -77,9 +77,10 @@ where It: Iterator<Item = char>
                        '>' => { self.chars.next(); return Some(Ok(LadderTypeToken::Close)); },
                        '~' => { self.chars.next(); return Some(Ok(LadderTypeToken::Ladder)); },
                        '\'' => { self.chars.next(); state = LexerState::Char(None); },
-                        ' ' => { self.chars.next(); },
                        c => {
-                            if c.is_alphabetic() {
+                            if c.is_whitespace() {
+                                self.chars.next();
+                            } else if c.is_alphabetic() {
                                state = LexerState::Sym( String::new() );
                            } else if c.is_digit(10) {
                                state = LexerState::Num( 0 );
--- a/src/parser.rs
+++ b/src/parser.rs
@ -121,7 +121,7 @@ impl TypeDict {
    {
        match self.parse_partial(tokens) {
            Ok(t) => {
-                if let Some(tok) = tokens.peek() {
+                if let Some(_tok) = tokens.peek() {
                    Err(ParseError::UnexpectedToken)
                } else {
                    Ok(t)
--- a/src/test/lexer.rs
+++ b/src/test/lexer.rs
@ -83,15 +83,15 @@ fn test_lexer_app_space() {
 #[test]
 fn test_lexer_large() {
    let mut lex = LadderTypeLexer::from(
-        "<Seq Date \
-              ~<TimeSince UnixEpoch> \
-              ~<Duration Seconds> \
-              ~ℕ \
-              ~<PosInt 10 BigEndian> \
-              ~< Seq <Digit 10>~Unicode > > \
-         ~<SepSeq Unicode ':'> \
-         ~<Seq Unicode> \
-         ~UTF-8 \
+        "<Seq Date
+              ~<TimeSince UnixEpoch>
+              ~<Duration Seconds>
+              ~ℕ
+              ~<PosInt 10 BigEndian>
+              ~< Seq <Digit 10>~Unicode > >
+         ~<SepSeq Unicode ':'>
+         ~<Seq Unicode>
+         ~UTF-8
         ~<Seq Byte>".chars());

    assert_eq!( lex.next(), Some(Ok(LadderTypeToken::Open)));
--- a/src/test/parser.rs
+++ b/src/test/parser.rs
@ -134,15 +134,15 @@ fn test_parser_ladder_between() {
 fn test_parser_ladder_large() {
    assert_eq!(
        TypeTerm::from_str(
-            "<Seq Date \
-                  ~<TimeSince UnixEpoch> \
-                  ~<Duration Seconds> \
-                  ~ℕ \
-                  ~<PosInt 10 BigEndian> \
-                  ~< Seq <Digit 10>~Unicode > > \
-              ~<SepSeq Unicode ':'> \
-              ~<Seq Unicode> \
-              ~UTF-8 \
+            "<Seq Date
+                  ~<TimeSince UnixEpoch>
+                  ~<Duration Seconds>
+                  ~ℕ
+                  ~<PosInt 10 BigEndian>
+                  ~< Seq <Digit 10>~Unicode > >
+              ~<SepSeq Unicode ':'>
+              ~<Seq Unicode>
+              ~UTF-8
              ~<Seq Byte>"),

        Ok(