From 73817529d600e36887ebb364623d55e07d8dc84a Mon Sep 17 00:00:00 2001
From: Michael Sippel <micha@fragmental.art>
Date: Sat, 7 Oct 2023 13:07:18 +0200
Subject: [PATCH 1/9] first samples for a potential command-type description
 language

---
 typeDB/cut     | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++
 typeDB/date    | 15 ++++++++++++
 typeDB/dirname | 56 +++++++++++++++++++++++++++++++++++++++++++
 typeDB/echo    | 16 +++++++++++++
 typeDB/seq     | 25 +++++++++++++++++++
 typeDB/xargs   | 38 +++++++++++++++++++++++++++++
 6 files changed, 215 insertions(+)
 create mode 100644 typeDB/cut
 create mode 100644 typeDB/date
 create mode 100644 typeDB/dirname
 create mode 100644 typeDB/echo
 create mode 100644 typeDB/seq
 create mode 100644 typeDB/xargs

diff --git a/typeDB/cut b/typeDB/cut
new file mode 100644
index 0000000..e860067
--- /dev/null
+++ b/typeDB/cut
@@ -0,0 +1,65 @@
+::cut OPTION... [FILE]... {
+
+    match OPTION... {
+        --help { <1 : Help~<Seq Char> ; }
+	--version { <1 : VersionInfo~<Seq Char> ; }
+
+        * {
+	    match OPTION... {
+		-f--fields LIST:<Seq ℕ>~CutFieldList~<Seq Char> {
+
+                    ROW-DELIM =
+                        match OPTION... {
+                            -z--zero-terminated { '\0' }
+                            * { '\n' }
+			};
+
+                    IN-COL-DELIM =
+                        match OPTION... {
+                            -d--delimiter DELIM:Char { DELIM }
+                            * { '\t' }
+                        };
+
+                    OUT-COL-DELIM =
+                        match OPTION... {
+			    --output-delimiter STRING:<Seq Char> { STRING }
+			    * { IN-COL-DELIM }
+			};
+
+                    FILE : Path
+                           ~<Seq PathSegment
+                                 ~<Seq Char>>
+                           ~<SepSeq Char '/'>
+                           ~<Seq Char> ;
+
+                    @FILE : <Seq <* < ITEM~<Seq Char> >... >
+                                 ~<Seq <Seq Char>>
+                                 ~<SepSeq Char IN-COL-DELIM>
+                                 ~<Seq Char>>
+                            ~<SepSeq Char ROW-DELIM>
+                            ~<Seq Char> ;
+
+                    >0 : <Seq <* <ITEM~<Seq Char>>... >
+                              ~<Seq <Seq Char>>
+                              ~<SepSeq Char IN-COL-DELIM>
+                              ~<Seq Char>>
+                         ~<SepSeq Char ROW-DELIM>
+                         ~<Seq Char> ;
+
+                    # `[LIST]` means `select all fields contained in LIST from parameter pack`
+                    <1 : <Seq <*  <ITEM~<Seq Char>>[LIST]... >
+                              ~<Seq <Seq Char>>
+                              ~<SepSeq Char OUT-COL-DELIM>
+                              ~<Seq Char>>
+                         ~<SepSeq Char ROW-DELIM>
+                         ~<Seq Char> ;
+                }
+
+                * {
+                    <1 : Nothing ;
+                    <2 : MissingFields~ErrorMessage~<Seq Char> ;
+                }
+            }
+        }
+    }
+}
diff --git a/typeDB/date b/typeDB/date
new file mode 100644
index 0000000..84edf45
--- /dev/null
+++ b/typeDB/date
@@ -0,0 +1,15 @@
+::date [OPTION]... [+FORMAT] {
+    match FORMAT {
+        "+%s" {
+            <1 : TimePoint
+                 ~<TimeSince UnixEpoch>
+                 ~<Duration Seconds>
+                 ~ℕ
+                 ~<PosInt 10 BigEndian>
+                 ~<Seq <Digit 10>~Char>
+        }
+        * {
+            <1 : TimePoint ;
+        }
+    }
+}
diff --git a/typeDB/dirname b/typeDB/dirname
new file mode 100644
index 0000000..c94d46a
--- /dev/null
+++ b/typeDB/dirname
@@ -0,0 +1,56 @@
+::dirname [OPTION] NAME... {
+
+    # dont expect any data on stdin
+    #
+
+    >0 : None ;
+
+
+    # each (non-option) argument must be a path
+    #
+
+    NAME : Path
+           ~<Seq PathSegment
+                 ~<Seq Char>>
+           ~<SepSeq Char '/'>
+           ~<Seq Char>
+	   ~UTF-8
+	   ~<Seq Byte> ;
+
+
+    matchopt OPTION {
+
+        # help and version flags will bypass any data processing
+	#
+
+        --help    { <1 : Help~<Seq Char> ; }
+	--version { <1 : VersionInfo~<Seq Char> ; }
+
+
+        # in case neither --help nor --version is set
+	# ...
+
+        * {
+
+	    # if zero-flag is set, use '\0' instead of '\n'
+	    # as delimiter in output sequence
+
+            DELIM =
+	        match OPTION {
+                    -z--zero { '\0' }
+                    * { '\n' }
+                };
+
+
+            # output a sequence of paths to stdout
+	    #
+
+            <1 : <Seq Path
+                      ~<Seq PathSegment~<Seq Char>>
+                      ~<SepSeq Char '/'>
+                      ~<Seq Char>>
+                 ~<SepSeq Char DELIM>
+		 ~<Seq Char> ;
+        }
+    }
+}
diff --git a/typeDB/echo b/typeDB/echo
new file mode 100644
index 0000000..6a6b9b6
--- /dev/null
+++ b/typeDB/echo
@@ -0,0 +1,16 @@
+::echo [OPTIONS]... [STRING]... {
+   >0 : None ;
+
+    matchopt OPTION {
+        --help    { <1 : Help~<Seq Char> ; }
+	--version { <1 : VersionInfo~<Seq Char> ; }
+	-n {
+            <1 : <* STRING~<Seq Char>... >
+                 ~<Seq Char> ;
+        }
+	* {
+            <1 : <* STRING~<Seq Char>... '\n' >
+                 ~<Seq Char> ;
+        }
+    }
+}
diff --git a/typeDB/seq b/typeDB/seq
new file mode 100644
index 0000000..b585a9a
--- /dev/null
+++ b/typeDB/seq
@@ -0,0 +1,25 @@
+::seq [OPTION]... [FIRST [INCREMENT]] LAST {
+    >0 : None ;
+
+    match OPTION... {
+        --help { <1 : Help~<Seq Char> ; }
+	--version { <1 : VersionInfo~<Seq Char> ; }
+        * {
+            NUM = match OPTION... {
+                -f--format FORMAT { ℝ~<Float 10 FORMAT>~<Seq Char> }
+		* { ℤ~<PosInt 10 BigEndian>~<Seq <Digit 10>~Char> }
+	    };
+
+            SEP = match OPTION... {
+                -s--separator SEP { SEP }
+                * { '\n' }
+            };
+
+            $FIRST     : NUM ;
+            $INCREMENT : NUM ;
+            $LAST      : NUM ;
+
+	    <1 : <Seq NUM~<Seq Char>>~<SepSeq Char SEP>~<Seq Char> ;
+        }	
+    }
+}
diff --git a/typeDB/xargs b/typeDB/xargs
new file mode 100644
index 0000000..6380730
--- /dev/null
+++ b/typeDB/xargs
@@ -0,0 +1,38 @@
+::xargs [OPTION]... [CMD [INITIAL-ARGS]...]  {
+
+    DELIM =
+        match OPTION... {
+            -0--null { '\0' }
+            -d--delimiter D:Char { D }
+        };
+
+    match OPTION... {
+        -a--argfile PATH => {
+            # type of data read from file at `path`
+
+            @PATH : ARGS~<Seq Char>...
+                    ~<Seq <Seq Char>>
+                    ~<SepSeq Char DELIM>
+                    ~<Seq Char> ;
+        }
+        * {
+            # type of data read from stdin
+
+            >0 : ARGS~<Seq Char>...
+                 ~<Seq <Seq Char>>
+                 ~<SepSeq Char DELIM>
+                 ~<Seq Char> ;
+        }
+    }
+
+    match OPTION... {
+        -I REPLACE-STR {
+	    # TODO : how to handle replacement ?
+	    <1 : cmdtype <1 { CMD { INITIAL-ARGS where REPLACE-STR -> ARGS... } } ;
+        }
+        * {
+            <1 : cmdtype <1 { CMD INITIAL-ARGS... ARGS... } ;
+        }
+    }
+
+}

From 7988c8a2e1220ffbf806a533e83c591ec3e1f891 Mon Sep 17 00:00:00 2001
From: Michael Sippel <micha@fragmental.art>
Date: Sun, 15 Oct 2023 11:47:06 +0200
Subject: [PATCH 2/9] wip parser

---
 src/ast.rs    | 123 +++++++++++++++++++++++++++++++++++++++++++++++
 src/env.rs    |   9 ++++
 src/expand.rs |  44 +++++++++++++++++
 src/main.rs   |  19 +++++++-
 src/parse.rs  | 130 ++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 324 insertions(+), 1 deletion(-)
 create mode 100644 src/ast.rs
 create mode 100644 src/env.rs
 create mode 100644 src/expand.rs
 create mode 100644 src/parse.rs

diff --git a/src/ast.rs b/src/ast.rs
new file mode 100644
index 0000000..5a20bbc
--- /dev/null
+++ b/src/ast.rs
@@ -0,0 +1,123 @@
+//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
+
+#[derive(Debug)]
+pub enum Command {
+    Simple {
+        assignments: Vec<(String, Word)>,
+        command_word: Word,
+        redirections: Vec<Redirection>
+    },
+    Pipeline(Vec<Command>),
+    Sequence(Vec<Command>),
+    ShortCircuitConjection(Vec<Command>),
+    ShortCircuitDisjunction(Vec<Command>),
+    Negation(Command),
+    While {
+        condition: Command,
+        loop_body: Command
+    },
+    For {
+        varname: String,
+        sequence: Word,
+        loop_body: Command
+    }
+    If {
+        condition: Command,
+        then_branch: Command,
+        else_branch: Command
+    },
+    Case {
+        expr: Word,
+        cases: Vec<(Word, Command)>
+    },
+    Function {
+        name: String,
+        body: Command
+    }
+}
+
+/*
+ * We are all luminous beings.
+ * Why then, do we not appear before each
+ * other radiant in our illumination ?
+ */
+
+/*
+ * Bewteen the idea
+ * And the reality
+ * Between the motion
+ * And the act
+ * Falls the Shadow
+ * (T.S. Eliot)
+ */
+
+//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
+
+#[derive(Debug)]
+pub struct Word {
+    pub segments: Vec<WordSegment>
+}
+
+#[derive(Debug)]
+pub enum WordSegment {
+    FieldSeparator,
+    Tilde(String),
+    Literal(String),
+    Parameter(String, ParameterFormat),
+    Subshell(Command),
+    DoubleQuote(Word),
+}
+
+#[derive(Debug)]
+pub enum ParameterFormat {
+    Normal,
+    Length,
+    Default(Word),
+    Assign(Word),
+    Error(Word),
+    Alt(Word),
+    Sub(ParamSubSide, ParamSubMode, Word),
+}
+
+#[derive(Debug)]
+pub enum ParamSubMode {
+    Shortest, Longest
+}
+
+#[derive(Debug)]
+pub enum ParamSubSide {
+    Prefix, Suffix
+}
+
+//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
+
+#[derive(Debug)]
+pub struct Redirection {
+    redirection_type: RedirectionType,
+    fd: u64,
+    target: Word
+}
+
+#[derive(Debug)]
+pub enum RedirectionType {
+    File(FileRedirectionType),
+    Dup(DupRedirectionType),
+    Heredoc // '<<'
+}
+
+#[derive(Debug)]
+pub enum FileRedirectionType {
+    In,         // '<'
+    InOut,      // '<>'
+    Out,        // '>'
+    OutReplace, // '>|'
+    OutAppend,  // '>|'
+}
+
+#[derive(Debug)]
+pub enum DupRedirectionType {
+    In,  // '<&'
+    Out  // '>&'
+}
+
+//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
diff --git a/src/env.rs b/src/env.rs
new file mode 100644
index 0000000..091e68d
--- /dev/null
+++ b/src/env.rs
@@ -0,0 +1,9 @@
+use std::collections::HashMap;
+
+//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
+
+struct Environment {
+    variables: HashMap<String, String>
+}
+
+//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
diff --git a/src/expand.rs b/src/expand.rs
new file mode 100644
index 0000000..412fc5f
--- /dev/null
+++ b/src/expand.rs
@@ -0,0 +1,44 @@
+use crate::ast::*;
+
+//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
+
+impl Word {
+    pub fn (&mut self, env: &Environment) {
+        for x in self.0.iter_mut() {
+            x.expand_tilde();
+            match x {
+                Word::Tilde => Word::Literal( env.get_home() ),
+                other => other,
+            }
+        }
+    }
+
+    pub fn expand(&self) -> Vec<String> {
+        let mut fields = Vec::new();
+
+        for seg in self.segments.iter() {
+            //
+        }
+
+        fields
+    }
+
+    pub fn split_field(&mut self) {
+        
+    }
+}
+
+impl WordSegment {
+    pub fn split_field(&self) -> Word {
+        
+        match self {
+            
+        }
+    }
+
+    pub fn expand(&self) -> Word {
+        match 
+    }
+}
+
+//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
diff --git a/src/main.rs b/src/main.rs
index 5533bc1..d6025bd 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -4,6 +4,11 @@ use {
     tiny_ansi::TinyAnsi
 };
 
+mod ast;
+mod env;
+mod parse;
+//mod expand;
+
 //<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
 
 pub fn get_type_str(cmd: &str, item: &str) -> Option<String> {
@@ -31,7 +36,19 @@ fn main() {
     let mut dict = TypeDict::new();
 
     let stdin = std::io::stdin();
-    for pipeline in std::io::BufReader::new(stdin).lines() {        
+    for line in std::io::BufReader::new(stdin).lines() {
+        if let Ok(line) = line {
+            let mut lex = parse::WordLexer::from( line.chars() );
+            for word in lex {
+                eprintln!("word-segment: {:?}", word);
+            }
+        }
+    }
+
+    return;
+
+    let stdin = std::io::stdin();
+    for pipeline in std::io::BufReader::new(stdin).lines() {
         let mut last_cmd = String::new();
         let mut last_stdout_type : Option<TypeTerm> = None;
 
diff --git a/src/parse.rs b/src/parse.rs
new file mode 100644
index 0000000..9a54df4
--- /dev/null
+++ b/src/parse.rs
@@ -0,0 +1,130 @@
+use {
+    crate::ast::*,
+    std::iter::{Peekable, FromIterator},
+};
+
+pub struct WordLexer<It>
+where It: Iterator<Item = char> {
+    chars: Peekable<It>
+}
+
+impl<It> From<It> for WordLexer<It>
+where It: Iterator<Item = char> {
+    fn from(iter: It) -> Self {
+        WordLexer {
+            chars: iter.into_iter().peekable()
+        }
+    }
+}
+
+#[derive(Debug)]
+pub enum LexError {
+    UnexpectedEnd(char)
+}
+
+impl<It> WordLexer<It>
+where It: Iterator<Item = char> {
+    fn collect_until(&mut self, close: char) -> Result<String, LexError> {
+        let mut val = String::new();
+        while let Some(c) = self.chars.peek().cloned() {
+            if c == close {
+                return Ok(val)
+            } else {
+                self.chars.next();
+                val.push(c);
+            }
+        }
+
+        if close.is_whitespace() {
+            Ok(val)
+        } else {
+            Err(LexError::UnexpectedEnd(close))
+        }
+    }
+}
+
+impl<It> Iterator for WordLexer<It>
+where It: Iterator<Item = char> {
+    type Item = Result<WordSegment, LexError>;
+
+    fn next(&mut self) -> Option<Result<WordSegment, LexError>> {
+        match self.chars.peek().cloned() {
+            Some('~') => {
+                self.chars.next();
+                match self.collect_until(' ') {
+                    Ok(s) => Some(Ok(WordSegment::Tilde(s))),
+                    Err(e) => Some(Err(e))
+                }
+            }
+            Some('"') => {
+                self.chars.next();
+                match self.collect_until('"') {
+                    Ok(s) => {
+                        self.chars.next();
+
+                        let word = Word {
+                            segments: WordLexer { chars: s.chars().peekable() }
+                            .scan((), |_, x| x.ok())
+                                .collect::<Vec<_>>()
+                        };
+
+                        Some(Ok(WordSegment::DoubleQuote(word)))
+                    },
+                    Err(e) => Some(Err(e))
+                }
+            },
+            Some('\'') => {
+                self.chars.next();
+                match self.collect_until('\'') {
+                    Ok(s) => {
+                        self.chars.next();
+                        Some(Ok(WordSegment::Literal(s)))
+                    },
+                    Err(e) => Some(Err(e))
+                }
+            },
+            Some('$') => {
+                self.chars.next();
+                match self.chars.peek() {
+                    Some('{') => {
+                        self.chars.next();
+                        match self.collect_until('}') {
+                            Ok(s) => {
+                                self.chars.next();
+                                Some(Ok(WordSegment::Variable(s)))
+                            }
+                            Err(e) => Some(Err(e))
+                        }
+                    }
+                    _ => {
+                        match self.collect_until(' ') {
+                            Ok(s) => {
+                                Some(Ok(WordSegment::Variable(s)))
+                            }
+                            Err(e) => Some(Err(e))
+                        }
+                    }
+                }
+            }
+            Some(c) => {
+                while let Some(c) = self.chars.peek() {
+                    if c.is_whitespace() {
+                        self.chars.next();
+                    } else {
+                        return match self.collect_until(' ') {
+                            Ok(s) => {
+                                Some(Ok(WordSegment::Literal(s)))
+                            }
+                            Err(e) => Some(Err(e))
+                        };
+                    }
+                }
+                None
+            }
+            None => {
+                None
+            }
+        }
+    }
+}
+

From 695cbb24f1d27c347fe21bbb98e61d5028551ac8 Mon Sep 17 00:00:00 2001
From: Michael Sippel <micha@fragmental.art>
Date: Thu, 26 Oct 2023 20:25:56 +0200
Subject: [PATCH 3/9] basic parser

---
 src/ast.rs   |  68 ++++----
 src/main.rs  |   8 +-
 src/parse.rs | 429 ++++++++++++++++++++++++++++++++++++++++++---------
 3 files changed, 392 insertions(+), 113 deletions(-)

diff --git a/src/ast.rs b/src/ast.rs
index 5a20bbc..87e7aae 100644
--- a/src/ast.rs
+++ b/src/ast.rs
@@ -1,30 +1,32 @@
+use std::boxed::Box;
+
 //<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
 
-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
 pub enum Command {
     Simple {
-        assignments: Vec<(String, Word)>,
+        assignments: Vec<Assignment>,
         command_word: Word,
         redirections: Vec<Redirection>
     },
     Pipeline(Vec<Command>),
     Sequence(Vec<Command>),
-    ShortCircuitConjection(Vec<Command>),
+    ShortCircuitConjunction(Vec<Command>),
     ShortCircuitDisjunction(Vec<Command>),
-    Negation(Command),
+    Negation(Box<Command>),
     While {
-        condition: Command,
-        loop_body: Command
+        condition: Box<Command>,
+        loop_body: Box<Command>
     },
     For {
         varname: String,
         sequence: Word,
-        loop_body: Command
-    }
+        loop_body: Box<Command>
+    },
     If {
-        condition: Command,
-        then_branch: Command,
-        else_branch: Command
+        condition: Box<Command>,
+        then_branch: Box<Command>,
+        else_branch: Box<Command>
     },
     Case {
         expr: Word,
@@ -32,35 +34,25 @@ pub enum Command {
     },
     Function {
         name: String,
-        body: Command
+        body: Box<Command>
     }
 }
 
-/*
- * We are all luminous beings.
- * Why then, do we not appear before each
- * other radiant in our illumination ?
- */
-
-/*
- * Bewteen the idea
- * And the reality
- * Between the motion
- * And the act
- * Falls the Shadow
- * (T.S. Eliot)
- */
-
 //<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
 
-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
+pub struct Assignment {
+    pub name: String,
+    pub value: Word
+}
+
+#[derive(Debug, PartialEq)]
 pub struct Word {
     pub segments: Vec<WordSegment>
 }
 
-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
 pub enum WordSegment {
-    FieldSeparator,
     Tilde(String),
     Literal(String),
     Parameter(String, ParameterFormat),
@@ -68,7 +60,7 @@ pub enum WordSegment {
     DoubleQuote(Word),
 }
 
-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
 pub enum ParameterFormat {
     Normal,
     Length,
@@ -79,42 +71,42 @@ pub enum ParameterFormat {
     Sub(ParamSubSide, ParamSubMode, Word),
 }
 
-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
 pub enum ParamSubMode {
     Shortest, Longest
 }
 
-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
 pub enum ParamSubSide {
     Prefix, Suffix
 }
 
 //<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
 
-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
 pub struct Redirection {
     redirection_type: RedirectionType,
     fd: u64,
     target: Word
 }
 
-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
 pub enum RedirectionType {
     File(FileRedirectionType),
     Dup(DupRedirectionType),
     Heredoc // '<<'
 }
 
-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
 pub enum FileRedirectionType {
     In,         // '<'
     InOut,      // '<>'
     Out,        // '>'
     OutReplace, // '>|'
-    OutAppend,  // '>|'
+    OutAppend,  // '>>'
 }
 
-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
 pub enum DupRedirectionType {
     In,  // '<&'
     Out  // '>&'
diff --git a/src/main.rs b/src/main.rs
index d6025bd..3752690 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,3 +1,5 @@
+#![feature(iterator_try_collect)]
+
 use {
     laddertypes::*,
     std::io::BufRead,
@@ -38,10 +40,14 @@ fn main() {
     let stdin = std::io::stdin();
     for line in std::io::BufReader::new(stdin).lines() {
         if let Ok(line) = line {
+            let cmd = parse::parse_cmd( &mut line.chars().peekable() );
+            eprintln!("parsed cmd: {:?}", cmd);
+            /*
             let mut lex = parse::WordLexer::from( line.chars() );
             for word in lex {
                 eprintln!("word-segment: {:?}", word);
-            }
+        }
+            */
         }
     }
 
diff --git a/src/parse.rs b/src/parse.rs
index 9a54df4..af691f9 100644
--- a/src/parse.rs
+++ b/src/parse.rs
@@ -1,105 +1,368 @@
 use {
     crate::ast::*,
-    std::iter::{Peekable, FromIterator},
+    std::iter::{Peekable},
 };
 
-pub struct WordLexer<It>
-where It: Iterator<Item = char> {
-    chars: Peekable<It>
+
+#[derive(Debug, PartialEq)]
+pub enum LexError {
+    UnexpectedEnd(Vec<Option<char>>),
+    UnexpectedToken(char),
+    InvalidFileRedirectionType
 }
 
-impl<It> From<It> for WordLexer<It>
+
+///! iterates chars until it finds some char in `delim`
+pub struct DelimIter<'a, It>
 where It: Iterator<Item = char> {
-    fn from(iter: It) -> Self {
-        WordLexer {
-            chars: iter.into_iter().peekable()
-        }
+    chars: &'a mut Peekable<It>,
+    delim: Vec<(Option<char>, bool)>
+}
+
+impl<'a, It> DelimIter<'a, It>
+where It: Iterator<Item = char> {
+    fn new(chars: &'a mut Peekable<It>, delim: Vec<(Option<char>, bool)>) -> Self {
+        DelimIter { chars, delim }
+    }
+
+    fn new_whitespace(chars: &'a mut Peekable<It>) -> Self {
+        DelimIter::new(chars, vec![
+            (None, true),
+            (Some(' '), true),
+            (Some('\t'), true),
+            (Some('\n'), true)
+        ])
+    }
+
+    fn new_shell_word(chars: &'a mut Peekable<It>) -> Self {
+        DelimIter::new(chars, vec![
+            (None, true),
+            (Some(' '), true),
+            (Some('\t'), true),
+            (Some('\n'), true),
+            (Some('|'), false),
+            (Some('&'), false),
+            (Some(';'), false),
+            (Some('\"'), false),
+            (Some('\''), false)
+        ])
     }
 }
 
-#[derive(Debug)]
-pub enum LexError {
-    UnexpectedEnd(char)
-}
+impl<'a, It> Iterator for DelimIter<'a, It>
+where It: 'a + Iterator<Item = char> {
+    type Item = Result<char, LexError>;
 
-impl<It> WordLexer<It>
-where It: Iterator<Item = char> {
-    fn collect_until(&mut self, close: char) -> Result<String, LexError> {
-        let mut val = String::new();
-        while let Some(c) = self.chars.peek().cloned() {
-            if c == close {
-                return Ok(val)
-            } else {
-                self.chars.next();
-                val.push(c);
+    fn next(&mut self) -> Option<Result<char, LexError>> {
+        for (delim, consume) in self.delim.iter() {
+            if self.chars.peek().cloned() == *delim {
+                if *consume {
+                    self.chars.next();
+                }
+                return None;
             }
         }
 
-        if close.is_whitespace() {
-            Ok(val)
-        } else {
-            Err(LexError::UnexpectedEnd(close))
+        match self.chars.next() {
+            Some(c) => Some(Ok(c)),
+            None => Some(Err(LexError::UnexpectedEnd(vec![])))
         }
     }
 }
 
-impl<It> Iterator for WordLexer<It>
+
+pub struct WordLexer<'a, It>
+where It: 'a + Iterator<Item = char> {
+    chars: &'a mut Peekable<It>
+}
+
+impl<'a, It> WordLexer<'a, It>
 where It: Iterator<Item = char> {
+    fn collect_until(&mut self, close: Option<char>) -> Result<String, LexError> {
+        DelimIter::new(&mut self.chars, vec![(close, true)])
+            .try_collect::<String>()
+    }
+}
+
+pub fn skip_whitespace<It>(chars: &mut Peekable<It>)
+where It: Iterator<Item = char>
+{
+    while let Some(c) = chars.peek() {
+        if c.is_whitespace() {
+            chars.next();
+        } else {
+            break;
+        }
+    }
+}
+
+pub fn parse_quoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError>
+where It: Iterator<Item = char>
+{
+    assert_eq!( chars.next(), Some('\''));
+    let quoted = DelimIter::new(chars, vec![(Some('\''), true)]).try_collect::<String>();
+    match quoted {
+        Ok(s) => {
+            Ok(WordSegment::Literal(s))
+        },
+        Err(e) => Err(e)
+    }
+}
+
+pub fn parse_doublequoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError>
+where It: Iterator<Item = char>
+{
+    assert_eq!( chars.next(), Some('\"'));
+    let quoted = DelimIter::new(chars, vec![(Some('\"'), true)]).try_collect::<String>();
+    match quoted {
+        Ok(s) => {
+            let word = Word {
+                segments: // fixme: handle spaces correctly -> create QuoteLexer
+                WordLexer { chars: &mut s.chars().peekable() }
+                .scan((), |_, x| x.ok())
+                    .collect::<Vec<_>>()
+            };
+
+            Ok(WordSegment::DoubleQuote(word))
+        },
+        Err(e) => Err(e)
+    }    
+}
+
+pub fn parse_word<It>(chars: &mut Peekable<It>) -> Result<Word, LexError>
+where It: Iterator<Item = char>
+{
+    Ok(Word {
+        segments: WordLexer{ chars }.try_collect::<Vec<_>>()?
+    })
+}
+
+pub fn parse_assignment<It>(chars: &mut Peekable<It>) -> Result<Assignment, LexError>
+where It: Iterator<Item = char>
+{
+    let name = DelimIter::new(chars, vec![(Some('='), true)]).try_collect::<String>()?;
+    let value_str = DelimIter::new_whitespace(chars).try_collect::<String>()?;
+    let value = parse_word(&mut value_str.chars().peekable())?;
+    Ok(Assignment{ name, value })
+}
+
+impl std::str::FromStr for FileRedirectionType {
+    type Err = LexError;
+
+    fn from_str(s: &str) -> Result<FileRedirectionType, LexError> {
+        match s {
+            "<" => Ok(FileRedirectionType::In),
+            "<>" => Ok(FileRedirectionType::InOut),
+            ">" => Ok(FileRedirectionType::Out),
+            ">|" => Ok(FileRedirectionType::OutReplace),
+            ">>" => Ok(FileRedirectionType::OutAppend),
+            _ => Err(LexError::InvalidFileRedirectionType)
+        }
+    }
+}
+
+pub fn parse_redirection<It>(chars: &mut Peekable<It>) -> Result<Redirection, LexError>
+where It: Iterator<Item = char>
+{
+    Err(LexError::InvalidFileRedirectionType)
+    //    let name = DelimIterator::new(chars, vec!['<', '>']).collect::<String>();
+}
+
+pub fn parse_simple_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError>
+where It: Iterator<Item = char>
+{
+    let mut assignments = Vec::new();
+    let mut redirections = Vec::new();
+
+    if chars.peek() == None {
+        return Ok(None);
+    }
+
+    let mut first = DelimIter::new_shell_word(chars).try_collect::<String>()?;
+
+    while first.contains('=') {
+        assignments.push( parse_assignment(chars)? );
+        first = DelimIter::new_shell_word(chars).try_collect::<String>()?;
+    }
+
+    let mut cmd_segments = WordLexer{ chars }.try_collect::<Vec<_>>()?;
+    cmd_segments.insert(0, WordSegment::Literal(first));
+
+    Ok(Some(Command::Simple {
+        assignments,
+        command_word: Word { segments: cmd_segments },
+        redirections,
+    }))
+}
+
+pub fn parse_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError>
+where It: Iterator<Item = char>
+{
+    skip_whitespace(chars);
+    match chars.peek() {
+        Some('!') => {
+            chars.next();
+            if let Some(cmd) = parse_cmd(chars)? {
+                Ok(Some(Command::Negation(Box::new(cmd))))
+            } else {
+                Err(LexError::UnexpectedEnd(vec![]))
+            }
+        }
+        _ => {
+            if let Some(head) = parse_simple_cmd(chars)? {
+                skip_whitespace(chars);
+
+                match chars.peek() {
+                    Some(';') => {
+                        chars.next();
+
+                        let tail = parse_cmd( chars ) ?;
+                        match tail {
+                            Some(Command::Sequence(mut s)) => {
+                                s.insert(0, head);
+                                Ok(Some(Command::Sequence(s)))
+                            }
+                            Some(tail) => {
+                                Ok(Some(Command::Sequence(vec![ head, tail ])))
+                            }
+                            None => {
+                                Ok(Some(head))
+                            }
+                        }
+                    }
+                    Some('|') => {
+                        chars.next();
+                        match chars.peek() {
+                            Some('|') => {
+                                chars.next();
+
+                                let tail = parse_cmd( chars ) ?;
+                                match tail {
+                                    Some(Command::ShortCircuitDisjunction(mut s)) => {
+                                        s.insert(0, head);
+                                        Ok(Some(Command::ShortCircuitDisjunction(s)))
+                                    }
+                                    Some(tail) => {
+                                        Ok(Some(Command::ShortCircuitDisjunction(vec![ head, tail ])))
+                                    }
+                                    None => {
+                                        Err(LexError::UnexpectedEnd(vec![Some('|')]))
+                                    }
+                                }
+                            }
+                            _ => {
+                                let tail = parse_cmd( chars ) ?;
+                                match tail {
+                                    Some(Command::Pipeline(mut s)) => {
+                                        s.insert(0, head);
+                                        Ok(Some(Command::Pipeline(s)))
+                                    }
+                                    Some(tail) => {
+                                        Ok(Some(Command::Pipeline(vec![ head, tail ])))
+                                    }
+                                    None => {
+                                        Err(LexError::UnexpectedEnd(vec![]))
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    Some('&') => {
+                        chars.next();
+                        match chars.peek() {
+                            Some('&') => {
+                                chars.next();
+
+                                let tail = parse_cmd( chars ) ?;
+                                match tail {
+                                    Some(Command::ShortCircuitConjunction(mut s)) => {
+                                        s.insert(0, head);
+                                        Ok(Some(Command::ShortCircuitConjunction(s)))
+                                    }
+                                    Some(tail) => {
+                                        Ok(Some(Command::ShortCircuitConjunction(vec![ head, tail ])))
+                                    }
+                                    None => {
+                                        Err(LexError::UnexpectedEnd(vec![Some('&'), Some('&')]))
+                                    }
+                                }
+                            }
+                            Some(c) => {
+                                Err(LexError::UnexpectedToken(*c))
+                            }
+                            None => {
+                                // todo:
+                                // background job
+                                Ok(Some(head))
+                            }
+                        }
+                    }
+                    Some(c) => {
+                        Err(LexError::UnexpectedToken(*c))
+                    }
+                    None => {
+                        Ok(Some(head))
+                    }
+                }
+            } else {
+                Ok(None)
+            }
+        }
+    }
+}
+
+impl<'a, It> Iterator for WordLexer<'a, It>
+where It: 'a + Iterator<Item = char> {
     type Item = Result<WordSegment, LexError>;
 
     fn next(&mut self) -> Option<Result<WordSegment, LexError>> {
+        skip_whitespace(self.chars);
         match self.chars.peek().cloned() {
+            Some('|') => { None }
+            Some('&') => { None }
+            Some(';') => { None }
             Some('~') => {
                 self.chars.next();
-                match self.collect_until(' ') {
-                    Ok(s) => Some(Ok(WordSegment::Tilde(s))),
+                let user = DelimIter::new_whitespace(self.chars).collect();
+                match user {
+                    Ok(user) => Some(Ok(WordSegment::Tilde(user))),
                     Err(e) => Some(Err(e))
                 }
             }
-            Some('"') => {
-                self.chars.next();
-                match self.collect_until('"') {
-                    Ok(s) => {
-                        self.chars.next();
-
-                        let word = Word {
-                            segments: WordLexer { chars: s.chars().peekable() }
-                            .scan((), |_, x| x.ok())
-                                .collect::<Vec<_>>()
-                        };
-
-                        Some(Ok(WordSegment::DoubleQuote(word)))
-                    },
-                    Err(e) => Some(Err(e))
-                }
-            },
-            Some('\'') => {
-                self.chars.next();
-                match self.collect_until('\'') {
-                    Ok(s) => {
-                        self.chars.next();
-                        Some(Ok(WordSegment::Literal(s)))
-                    },
-                    Err(e) => Some(Err(e))
-                }
-            },
+            Some('"') => { Some(parse_doublequoted(self.chars)) },
+            Some('\'') => { Some(parse_quoted(self.chars)) },
             Some('$') => {
                 self.chars.next();
                 match self.chars.peek() {
                     Some('{') => {
                         self.chars.next();
-                        match self.collect_until('}') {
+                        match DelimIter::new(&mut self.chars, vec![(Some('}'), true)]).try_collect::<String>() {
                             Ok(s) => {
-                                self.chars.next();
-                                Some(Ok(WordSegment::Variable(s)))
+                                Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal)))
                             }
                             Err(e) => Some(Err(e))
                         }
                     }
+                    Some('(') => {
+                        self.chars.next();
+                        let subcmd_str = DelimIter::new(&mut self.chars, vec![(Some(')'), true)]).try_collect::<String>();
+                        match subcmd_str {
+                            Ok(subcmd_str) => {
+                                match parse_cmd(&mut subcmd_str.chars().peekable()) {
+                                    Ok(Some(subcmd)) => {
+                                        Some(Ok(WordSegment::Subshell(subcmd)))        
+                                    }
+                                    Ok(None) => None,
+                                    Err(err) => Some(Err(err))
+                                }
+                            }
+                            Err(err) => Some(Err(err))
+                        }
+                    }
                     _ => {
-                        match self.collect_until(' ') {
+                        match DelimIter::new_whitespace(self.chars).collect() {
                             Ok(s) => {
-                                Some(Ok(WordSegment::Variable(s)))
+                                Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal)))
                             }
                             Err(e) => Some(Err(e))
                         }
@@ -107,19 +370,11 @@ where It: Iterator<Item = char> {
                 }
             }
             Some(c) => {
-                while let Some(c) = self.chars.peek() {
-                    if c.is_whitespace() {
-                        self.chars.next();
-                    } else {
-                        return match self.collect_until(' ') {
-                            Ok(s) => {
-                                Some(Ok(WordSegment::Literal(s)))
-                            }
-                            Err(e) => Some(Err(e))
-                        };
-                    }
+                let s : Result<String, LexError> = DelimIter::new_shell_word(self.chars).collect();
+                match s {
+                    Ok(s) => Some(Ok(WordSegment::Literal(s))),
+                    Err(e) => Some(Err(e))
                 }
-                None
             }
             None => {
                 None
@@ -128,3 +383,29 @@ where It: Iterator<Item = char> {
     }
 }
 
+
+mod test {
+    use crate::parse::*;
+
+    #[test]
+    fn test_delim_iter() {
+        let mut cs = "test 1234".chars().peekable();
+        let mut lexer = DelimIter::new_shell_word(&mut cs);
+        assert_eq!(lexer.try_collect::<String>(), Ok(String::from("test")));
+    }
+
+    #[test]
+    fn test_word_lexer() {
+        let mut cs = "test   1234|test".chars().peekable();
+
+        {
+            let mut lexer = WordLexer{ chars: &mut cs };
+            assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test")))));
+            assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("1234")))));
+            assert_eq!(lexer.next(), None);
+        }
+
+        assert_eq!(cs.next(), Some('|'));
+    }
+}
+

From 21aa45d189b556acce8478c62dd107e81a32c14b Mon Sep 17 00:00:00 2001
From: Michael Sippel <micha@fragmental.art>
Date: Fri, 27 Oct 2023 02:11:28 +0200
Subject: [PATCH 4/9] parse assignments

---
 src/parse.rs | 73 +++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 50 insertions(+), 23 deletions(-)

diff --git a/src/parse.rs b/src/parse.rs
index af691f9..b4db237 100644
--- a/src/parse.rs
+++ b/src/parse.rs
@@ -47,6 +47,21 @@ where It: Iterator<Item = char> {
             (Some('\''), false)
         ])
     }
+
+    fn new_shell_word_or_assignment(chars: &'a mut Peekable<It>) -> Self {
+        DelimIter::new(chars, vec![
+            (None, true),
+            (Some(' '), true),
+            (Some('\t'), true),
+            (Some('\n'), true),
+            (Some('='), false),
+            (Some('|'), false),
+            (Some('&'), false),
+            (Some(';'), false),
+            (Some('\"'), false),
+            (Some('\''), false)
+        ])
+    }
 }
 
 impl<'a, It> Iterator for DelimIter<'a, It>
@@ -137,15 +152,6 @@ where It: Iterator<Item = char>
     })
 }
 
-pub fn parse_assignment<It>(chars: &mut Peekable<It>) -> Result<Assignment, LexError>
-where It: Iterator<Item = char>
-{
-    let name = DelimIter::new(chars, vec![(Some('='), true)]).try_collect::<String>()?;
-    let value_str = DelimIter::new_whitespace(chars).try_collect::<String>()?;
-    let value = parse_word(&mut value_str.chars().peekable())?;
-    Ok(Assignment{ name, value })
-}
-
 impl std::str::FromStr for FileRedirectionType {
     type Err = LexError;
 
@@ -178,21 +184,38 @@ where It: Iterator<Item = char>
         return Ok(None);
     }
 
-    let mut first = DelimIter::new_shell_word(chars).try_collect::<String>()?;
+    loop {
+        skip_whitespace(chars);
+        let mut name = DelimIter::new_shell_word_or_assignment(chars).try_collect::<String>()?;
 
-    while first.contains('=') {
-        assignments.push( parse_assignment(chars)? );
-        first = DelimIter::new_shell_word(chars).try_collect::<String>()?;
+        match chars.peek().clone() {
+            Some('=') => {
+                chars.next();
+                let mut lex = WordLexer{ chars };
+                match lex.next() {
+                    Some(Ok(value)) => {
+                        assignments.push(Assignment { name, value: Word{ segments: vec![ value ] } });
+                    },
+                    Some(Err(e)) => {
+                        return Err(e);
+                    },
+                    None => {
+                        return Err(LexError::UnexpectedEnd(vec![]));
+                    }
+                }
+            }
+            _ => {
+                let mut cmd_segments = WordLexer{ chars }.try_collect::<Vec<_>>()?;
+                cmd_segments.insert(0, WordSegment::Literal(name));
+
+                return Ok(Some(Command::Simple {
+                    assignments,
+                    command_word: Word { segments: cmd_segments },
+                    redirections,
+                }));
+            }
+        }
     }
-
-    let mut cmd_segments = WordLexer{ chars }.try_collect::<Vec<_>>()?;
-    cmd_segments.insert(0, WordSegment::Literal(first));
-
-    Ok(Some(Command::Simple {
-        assignments,
-        command_word: Word { segments: cmd_segments },
-        redirections,
-    }))
 }
 
 pub fn parse_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError>
@@ -404,8 +427,12 @@ mod test {
             assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("1234")))));
             assert_eq!(lexer.next(), None);
         }
-
         assert_eq!(cs.next(), Some('|'));
+        {
+            let mut lexer = WordLexer{ chars: &mut cs };
+            assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test")))));
+            assert_eq!(lexer.next(), None);
+        }
     }
 }
 

From 63e06a247211731f3d0d83b2f0a4c16947d08f2f Mon Sep 17 00:00:00 2001
From: Michael Sippel <micha@fragmental.art>
Date: Sun, 29 Oct 2023 14:53:16 +0100
Subject: [PATCH 5/9] wip cmd-type-dsl

---
 src/cmd_type_dsl/annotation.rs |  68 +++++++++++++++++++++
 src/cmd_type_dsl/mod.rs        | 107 +++++++++++++++++++++++++++++++++
 src/main.rs                    |   2 +
 3 files changed, 177 insertions(+)
 create mode 100644 src/cmd_type_dsl/annotation.rs
 create mode 100644 src/cmd_type_dsl/mod.rs

diff --git a/src/cmd_type_dsl/annotation.rs b/src/cmd_type_dsl/annotation.rs
new file mode 100644
index 0000000..24c6467
--- /dev/null
+++ b/src/cmd_type_dsl/annotation.rs
@@ -0,0 +1,68 @@
+
+
+    pub enum AnnotationContext {
+        Cached( Vec<(CommandPattern, CommandTypeStatement)> ),
+        Load( String ),
+        FindIn( String ),
+    }
+    impl AnnotationContext {
+        /* loads & parses any given context
+         */
+        /*
+        pub fn into_cached(self) -> AnnotationContext {
+            match self {
+                AnnotationContext::Load( path ) => {
+                    
+                }
+            }
+        }
+*/
+        pub fn get_type(&self, cmd: &Command) -> Result<CommandType, UnificationError> {
+            match cmd {
+                Command::Simple{ assignments, command_word, redirections } => {
+                    match self {
+                        AnnotationContext::Cached( annotations ) => {
+                            // find matching command pattern...
+                            for (cmd_pat, typ) in annotations.iter() {
+                                if let Ok(unificator) = cmd_pat.match_cmd(cmd) {
+                                    return Ok( typ.substitute(unificator).eval() );
+                                }
+                            }
+
+                            Err(UnificationError::NoPattern)
+                        },
+
+                        AnnotationContext::Load( path ) => {
+                            /* todo:
+                             *   - open file at `path`
+                             *   - parse CommandPattern + CommandTypeStatement
+                             *   - get_type on AnnotationContext::Cached()
+                             */
+                            
+                        }
+                        AnnotationContext::FindIn( path ) => {
+    //                    if let Some(command_name) = command_word.segments.get(0) {
+                            /* todo:
+                             * - use command_name to lookup file
+                             * - forward to AnnotationContext::Load()
+                             */
+/*
+                            let mut err = UnificationError( vec![] );
+                            for file in path.direntries {
+                                if let Ok(typ) = AnnotationContext::Load( path ).get_type() => {
+                                    
+                                }
+                            }
+*/
+    //                    }
+                        }
+                    }
+                }
+
+                _ => {
+                    Err(UnificationError::NoPattern)
+                }
+            }
+        }
+    }
+
diff --git a/src/cmd_type_dsl/mod.rs b/src/cmd_type_dsl/mod.rs
new file mode 100644
index 0000000..4d7ec13
--- /dev/null
+++ b/src/cmd_type_dsl/mod.rs
@@ -0,0 +1,107 @@
+use std::{
+    collections::HashMap,
+    boxed::Box
+};
+
+use crate::ast::Command;
+use laddertypes::*;
+
+
+pub struct Substitution(HashMap< String, CommandTypeExpr >);
+impl Substitution {
+   pub fn apply(&self, expr: &mut CommandTypeExpr) {
+        
+   }
+}
+
+pub enum CommandArgPattern {
+    Literal(String),
+    Variable(String),
+    VariablePack(Box<CommandArgPattern>),
+    Optional(Box<CommandArgPattern>),
+    Conjunction(Vec<CommandArgPattern>),
+    Disjunction(Vec<CommandArgPattern>)
+}
+
+pub struct CommandPattern {
+    name: String,
+    args: Vec<CommandArgPattern>,
+    env: Vec<(String, CommandTypeExpr)>,
+}
+
+impl CommandArgPattern {
+    pub fn match_cmd(&self, cmd: &Command) -> Result<Substitution, UnificationError> {
+        Err(UnificationError(vec![]))
+    }
+}
+
+pub struct MatchCandidate {
+    at: usize,
+    expected: CommandPattern,
+    found: CommandTypeExpr,
+}
+
+pub struct UnificationError( Vec<MatchCandidate> );
+
+
+
+pub enum CommandTypeExpr {
+    Parameter(String),
+    ParameterPack(String),
+    Char(char),
+    Match(Box<CommandTypeExpr>, Vec<(CommandArgPattern, CommandTypeExpr)>)
+}
+
+impl CommandTypeExpr {
+    pub fn eval(self) -> CommandTypeExpr {
+        match self {
+            s=>s
+        }
+    }
+}
+
+pub struct FileDescriptor(u32);
+pub enum PipeDirection { In, Out }
+
+pub enum Selector {
+    Pipe(FileDescriptor, PipeDirection),
+    Parameter(String),
+    ParameterPack(String),
+    File(String)
+}
+
+pub enum CommandTypeStatement {
+    TypAssign(Selector, TypeTerm),
+    ValAssign(String, CommandTypeExpr),
+    Block(Vec<CommandTypeStatement>),
+    Match(Box<CommandTypeExpr>, Vec<(CommandArgPattern, CommandTypeStatement)>)
+}
+
+pub struct CommandType(Vec<(Selector, TypeTerm)>);
+
+impl CommandTypeStatement {
+    pub fn eval(self) -> CommandType {
+       match self {
+            CommandTypeStatement::Block(stmnts) => {
+                CommandType( stmnts.into_iter().map(|stmnt| stmnt.eval().0.into_iter()).flatten().collect() )
+            }
+            CommandTypeStatement::TypAssign(selector, typ) => {
+                CommandType( vec![ (selector, typ) ])
+            }
+            CommandTypeStatement::ValAssign(variable, expr) => {
+                CommandType(vec![])
+            }
+            CommandTypeStatement::Match(pattern, cases) => {
+                    /*
+                    for (case,stmnt) in cases.into_iter() {
+                        if let Ok(unificator) = pattern
+                        if let Ok() = case.match_expr()
+                        CommandType( vec![] )
+                    }
+                        */
+                CommandType(vec![])
+            }
+        }
+    }
+}
+
diff --git a/src/main.rs b/src/main.rs
index 3752690..f81dca8 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -11,6 +11,8 @@ mod env;
 mod parse;
 //mod expand;
 
+mod cmd_type_dsl;
+
 //<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
 
 pub fn get_type_str(cmd: &str, item: &str) -> Option<String> {

From 1d860d1811bf172a0f036f5d4e2d4e00ccaddfe3 Mon Sep 17 00:00:00 2001
From: Michael Sippel <micha@fragmental.art>
Date: Mon, 30 Oct 2023 15:30:42 +0100
Subject: [PATCH 6/9] wip cmd-type-dsl

---
 src/ast.rs              | 115 -----------
 src/cmd_type_dsl/mod.rs |  66 +++---
 src/env.rs              |   9 -
 src/expand.rs           |  44 ----
 src/main.rs             |   8 +-
 src/parse.rs            | 438 ----------------------------------------
 typeDB/cut              |  14 +-
 typeDB/seq              |   2 +-
 8 files changed, 50 insertions(+), 646 deletions(-)
 delete mode 100644 src/ast.rs
 delete mode 100644 src/env.rs
 delete mode 100644 src/expand.rs
 delete mode 100644 src/parse.rs

diff --git a/src/ast.rs b/src/ast.rs
deleted file mode 100644
index 87e7aae..0000000
--- a/src/ast.rs
+++ /dev/null
@@ -1,115 +0,0 @@
-use std::boxed::Box;
-
-//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
-
-#[derive(Debug, PartialEq)]
-pub enum Command {
-    Simple {
-        assignments: Vec<Assignment>,
-        command_word: Word,
-        redirections: Vec<Redirection>
-    },
-    Pipeline(Vec<Command>),
-    Sequence(Vec<Command>),
-    ShortCircuitConjunction(Vec<Command>),
-    ShortCircuitDisjunction(Vec<Command>),
-    Negation(Box<Command>),
-    While {
-        condition: Box<Command>,
-        loop_body: Box<Command>
-    },
-    For {
-        varname: String,
-        sequence: Word,
-        loop_body: Box<Command>
-    },
-    If {
-        condition: Box<Command>,
-        then_branch: Box<Command>,
-        else_branch: Box<Command>
-    },
-    Case {
-        expr: Word,
-        cases: Vec<(Word, Command)>
-    },
-    Function {
-        name: String,
-        body: Box<Command>
-    }
-}
-
-//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
-
-#[derive(Debug, PartialEq)]
-pub struct Assignment {
-    pub name: String,
-    pub value: Word
-}
-
-#[derive(Debug, PartialEq)]
-pub struct Word {
-    pub segments: Vec<WordSegment>
-}
-
-#[derive(Debug, PartialEq)]
-pub enum WordSegment {
-    Tilde(String),
-    Literal(String),
-    Parameter(String, ParameterFormat),
-    Subshell(Command),
-    DoubleQuote(Word),
-}
-
-#[derive(Debug, PartialEq)]
-pub enum ParameterFormat {
-    Normal,
-    Length,
-    Default(Word),
-    Assign(Word),
-    Error(Word),
-    Alt(Word),
-    Sub(ParamSubSide, ParamSubMode, Word),
-}
-
-#[derive(Debug, PartialEq)]
-pub enum ParamSubMode {
-    Shortest, Longest
-}
-
-#[derive(Debug, PartialEq)]
-pub enum ParamSubSide {
-    Prefix, Suffix
-}
-
-//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
-
-#[derive(Debug, PartialEq)]
-pub struct Redirection {
-    redirection_type: RedirectionType,
-    fd: u64,
-    target: Word
-}
-
-#[derive(Debug, PartialEq)]
-pub enum RedirectionType {
-    File(FileRedirectionType),
-    Dup(DupRedirectionType),
-    Heredoc // '<<'
-}
-
-#[derive(Debug, PartialEq)]
-pub enum FileRedirectionType {
-    In,         // '<'
-    InOut,      // '<>'
-    Out,        // '>'
-    OutReplace, // '>|'
-    OutAppend,  // '>>'
-}
-
-#[derive(Debug, PartialEq)]
-pub enum DupRedirectionType {
-    In,  // '<&'
-    Out  // '>&'
-}
-
-//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
diff --git a/src/cmd_type_dsl/mod.rs b/src/cmd_type_dsl/mod.rs
index 4d7ec13..1b55ab2 100644
--- a/src/cmd_type_dsl/mod.rs
+++ b/src/cmd_type_dsl/mod.rs
@@ -3,17 +3,12 @@ use std::{
     boxed::Box
 };
 
-use crate::ast::Command;
+use crate::sh::ast::Command;
 use laddertypes::*;
 
-
 pub struct Substitution(HashMap< String, CommandTypeExpr >);
-impl Substitution {
-   pub fn apply(&self, expr: &mut CommandTypeExpr) {
-        
-   }
-}
 
+#[derive(Clone)]
 pub enum CommandArgPattern {
     Literal(String),
     Variable(String),
@@ -23,39 +18,58 @@ pub enum CommandArgPattern {
     Disjunction(Vec<CommandArgPattern>)
 }
 
+#[derive(Clone)]
 pub struct CommandPattern {
     name: String,
     args: Vec<CommandArgPattern>,
     env: Vec<(String, CommandTypeExpr)>,
 }
 
+#[derive(Clone)]
+pub struct MatchCandidate {
+    at: usize,
+    expected: CommandPattern,
+    found: CommandTypeExpr,
+}
+
+#[derive(Clone)]
+pub struct UnificationError( Vec<MatchCandidate> );
+
+#[derive(Clone)]
+pub enum CommandTypeExpr {
+    Type(TypeTerm),
+    Match(Box<CommandTypeExpr>, Vec<(CommandArgPattern, CommandTypeExpr)>)
+}
+
 impl CommandArgPattern {
     pub fn match_cmd(&self, cmd: &Command) -> Result<Substitution, UnificationError> {
         Err(UnificationError(vec![]))
     }
 }
 
-pub struct MatchCandidate {
-    at: usize,
-    expected: CommandPattern,
-    found: CommandTypeExpr,
-}
-
-pub struct UnificationError( Vec<MatchCandidate> );
-
-
-
-pub enum CommandTypeExpr {
-    Parameter(String),
-    ParameterPack(String),
-    Char(char),
-    Match(Box<CommandTypeExpr>, Vec<(CommandArgPattern, CommandTypeExpr)>)
-}
-
 impl CommandTypeExpr {
-    pub fn eval(self) -> CommandTypeExpr {
+    pub fn eval(self) -> Result<TypeTerm, CommandTypeExpr> {
         match self {
-            s=>s
+            CommandTypeExpr::Type(typ) => Ok(typ),
+            CommandTypeExpr::Match(pattern, cases) => {
+                
+            }
+            s=> Ok(s)
+        }
+    }
+
+    pub fn apply_subst(&mut self, subst: &Substitution) {
+        match self {
+            CommandTypeExpr::Type(typ) => {
+                self = CommandTypeExpr::Type(
+                    typ.apply_substitution(|v: String| subst.get(v))
+                );
+            }
+            CommandTypeExpr::Match( pattern, cases ) => {
+                
+                // todo
+            }
+            _ => {}
         }
     }
 }
diff --git a/src/env.rs b/src/env.rs
deleted file mode 100644
index 091e68d..0000000
--- a/src/env.rs
+++ /dev/null
@@ -1,9 +0,0 @@
-use std::collections::HashMap;
-
-//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
-
-struct Environment {
-    variables: HashMap<String, String>
-}
-
-//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
diff --git a/src/expand.rs b/src/expand.rs
deleted file mode 100644
index 412fc5f..0000000
--- a/src/expand.rs
+++ /dev/null
@@ -1,44 +0,0 @@
-use crate::ast::*;
-
-//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
-
-impl Word {
-    pub fn (&mut self, env: &Environment) {
-        for x in self.0.iter_mut() {
-            x.expand_tilde();
-            match x {
-                Word::Tilde => Word::Literal( env.get_home() ),
-                other => other,
-            }
-        }
-    }
-
-    pub fn expand(&self) -> Vec<String> {
-        let mut fields = Vec::new();
-
-        for seg in self.segments.iter() {
-            //
-        }
-
-        fields
-    }
-
-    pub fn split_field(&mut self) {
-        
-    }
-}
-
-impl WordSegment {
-    pub fn split_field(&self) -> Word {
-        
-        match self {
-            
-        }
-    }
-
-    pub fn expand(&self) -> Word {
-        match 
-    }
-}
-
-//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
diff --git a/src/main.rs b/src/main.rs
index f81dca8..02e6b77 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -6,11 +6,7 @@ use {
     tiny_ansi::TinyAnsi
 };
 
-mod ast;
-mod env;
-mod parse;
-//mod expand;
-
+mod sh;
 mod cmd_type_dsl;
 
 //<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
@@ -42,7 +38,7 @@ fn main() {
     let stdin = std::io::stdin();
     for line in std::io::BufReader::new(stdin).lines() {
         if let Ok(line) = line {
-            let cmd = parse::parse_cmd( &mut line.chars().peekable() );
+            let cmd = sh::parse::parse_cmd( &mut line.chars().peekable() );
             eprintln!("parsed cmd: {:?}", cmd);
             /*
             let mut lex = parse::WordLexer::from( line.chars() );
diff --git a/src/parse.rs b/src/parse.rs
deleted file mode 100644
index b4db237..0000000
--- a/src/parse.rs
+++ /dev/null
@@ -1,438 +0,0 @@
-use {
-    crate::ast::*,
-    std::iter::{Peekable},
-};
-
-
-#[derive(Debug, PartialEq)]
-pub enum LexError {
-    UnexpectedEnd(Vec<Option<char>>),
-    UnexpectedToken(char),
-    InvalidFileRedirectionType
-}
-
-
-///! iterates chars until it finds some char in `delim`
-pub struct DelimIter<'a, It>
-where It: Iterator<Item = char> {
-    chars: &'a mut Peekable<It>,
-    delim: Vec<(Option<char>, bool)>
-}
-
-impl<'a, It> DelimIter<'a, It>
-where It: Iterator<Item = char> {
-    fn new(chars: &'a mut Peekable<It>, delim: Vec<(Option<char>, bool)>) -> Self {
-        DelimIter { chars, delim }
-    }
-
-    fn new_whitespace(chars: &'a mut Peekable<It>) -> Self {
-        DelimIter::new(chars, vec![
-            (None, true),
-            (Some(' '), true),
-            (Some('\t'), true),
-            (Some('\n'), true)
-        ])
-    }
-
-    fn new_shell_word(chars: &'a mut Peekable<It>) -> Self {
-        DelimIter::new(chars, vec![
-            (None, true),
-            (Some(' '), true),
-            (Some('\t'), true),
-            (Some('\n'), true),
-            (Some('|'), false),
-            (Some('&'), false),
-            (Some(';'), false),
-            (Some('\"'), false),
-            (Some('\''), false)
-        ])
-    }
-
-    fn new_shell_word_or_assignment(chars: &'a mut Peekable<It>) -> Self {
-        DelimIter::new(chars, vec![
-            (None, true),
-            (Some(' '), true),
-            (Some('\t'), true),
-            (Some('\n'), true),
-            (Some('='), false),
-            (Some('|'), false),
-            (Some('&'), false),
-            (Some(';'), false),
-            (Some('\"'), false),
-            (Some('\''), false)
-        ])
-    }
-}
-
-impl<'a, It> Iterator for DelimIter<'a, It>
-where It: 'a + Iterator<Item = char> {
-    type Item = Result<char, LexError>;
-
-    fn next(&mut self) -> Option<Result<char, LexError>> {
-        for (delim, consume) in self.delim.iter() {
-            if self.chars.peek().cloned() == *delim {
-                if *consume {
-                    self.chars.next();
-                }
-                return None;
-            }
-        }
-
-        match self.chars.next() {
-            Some(c) => Some(Ok(c)),
-            None => Some(Err(LexError::UnexpectedEnd(vec![])))
-        }
-    }
-}
-
-
-pub struct WordLexer<'a, It>
-where It: 'a + Iterator<Item = char> {
-    chars: &'a mut Peekable<It>
-}
-
-impl<'a, It> WordLexer<'a, It>
-where It: Iterator<Item = char> {
-    fn collect_until(&mut self, close: Option<char>) -> Result<String, LexError> {
-        DelimIter::new(&mut self.chars, vec![(close, true)])
-            .try_collect::<String>()
-    }
-}
-
-pub fn skip_whitespace<It>(chars: &mut Peekable<It>)
-where It: Iterator<Item = char>
-{
-    while let Some(c) = chars.peek() {
-        if c.is_whitespace() {
-            chars.next();
-        } else {
-            break;
-        }
-    }
-}
-
-pub fn parse_quoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError>
-where It: Iterator<Item = char>
-{
-    assert_eq!( chars.next(), Some('\''));
-    let quoted = DelimIter::new(chars, vec![(Some('\''), true)]).try_collect::<String>();
-    match quoted {
-        Ok(s) => {
-            Ok(WordSegment::Literal(s))
-        },
-        Err(e) => Err(e)
-    }
-}
-
-pub fn parse_doublequoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError>
-where It: Iterator<Item = char>
-{
-    assert_eq!( chars.next(), Some('\"'));
-    let quoted = DelimIter::new(chars, vec![(Some('\"'), true)]).try_collect::<String>();
-    match quoted {
-        Ok(s) => {
-            let word = Word {
-                segments: // fixme: handle spaces correctly -> create QuoteLexer
-                WordLexer { chars: &mut s.chars().peekable() }
-                .scan((), |_, x| x.ok())
-                    .collect::<Vec<_>>()
-            };
-
-            Ok(WordSegment::DoubleQuote(word))
-        },
-        Err(e) => Err(e)
-    }    
-}
-
-pub fn parse_word<It>(chars: &mut Peekable<It>) -> Result<Word, LexError>
-where It: Iterator<Item = char>
-{
-    Ok(Word {
-        segments: WordLexer{ chars }.try_collect::<Vec<_>>()?
-    })
-}
-
-impl std::str::FromStr for FileRedirectionType {
-    type Err = LexError;
-
-    fn from_str(s: &str) -> Result<FileRedirectionType, LexError> {
-        match s {
-            "<" => Ok(FileRedirectionType::In),
-            "<>" => Ok(FileRedirectionType::InOut),
-            ">" => Ok(FileRedirectionType::Out),
-            ">|" => Ok(FileRedirectionType::OutReplace),
-            ">>" => Ok(FileRedirectionType::OutAppend),
-            _ => Err(LexError::InvalidFileRedirectionType)
-        }
-    }
-}
-
-pub fn parse_redirection<It>(chars: &mut Peekable<It>) -> Result<Redirection, LexError>
-where It: Iterator<Item = char>
-{
-    Err(LexError::InvalidFileRedirectionType)
-    //    let name = DelimIterator::new(chars, vec!['<', '>']).collect::<String>();
-}
-
-pub fn parse_simple_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError>
-where It: Iterator<Item = char>
-{
-    let mut assignments = Vec::new();
-    let mut redirections = Vec::new();
-
-    if chars.peek() == None {
-        return Ok(None);
-    }
-
-    loop {
-        skip_whitespace(chars);
-        let mut name = DelimIter::new_shell_word_or_assignment(chars).try_collect::<String>()?;
-
-        match chars.peek().clone() {
-            Some('=') => {
-                chars.next();
-                let mut lex = WordLexer{ chars };
-                match lex.next() {
-                    Some(Ok(value)) => {
-                        assignments.push(Assignment { name, value: Word{ segments: vec![ value ] } });
-                    },
-                    Some(Err(e)) => {
-                        return Err(e);
-                    },
-                    None => {
-                        return Err(LexError::UnexpectedEnd(vec![]));
-                    }
-                }
-            }
-            _ => {
-                let mut cmd_segments = WordLexer{ chars }.try_collect::<Vec<_>>()?;
-                cmd_segments.insert(0, WordSegment::Literal(name));
-
-                return Ok(Some(Command::Simple {
-                    assignments,
-                    command_word: Word { segments: cmd_segments },
-                    redirections,
-                }));
-            }
-        }
-    }
-}
-
-pub fn parse_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError>
-where It: Iterator<Item = char>
-{
-    skip_whitespace(chars);
-    match chars.peek() {
-        Some('!') => {
-            chars.next();
-            if let Some(cmd) = parse_cmd(chars)? {
-                Ok(Some(Command::Negation(Box::new(cmd))))
-            } else {
-                Err(LexError::UnexpectedEnd(vec![]))
-            }
-        }
-        _ => {
-            if let Some(head) = parse_simple_cmd(chars)? {
-                skip_whitespace(chars);
-
-                match chars.peek() {
-                    Some(';') => {
-                        chars.next();
-
-                        let tail = parse_cmd( chars ) ?;
-                        match tail {
-                            Some(Command::Sequence(mut s)) => {
-                                s.insert(0, head);
-                                Ok(Some(Command::Sequence(s)))
-                            }
-                            Some(tail) => {
-                                Ok(Some(Command::Sequence(vec![ head, tail ])))
-                            }
-                            None => {
-                                Ok(Some(head))
-                            }
-                        }
-                    }
-                    Some('|') => {
-                        chars.next();
-                        match chars.peek() {
-                            Some('|') => {
-                                chars.next();
-
-                                let tail = parse_cmd( chars ) ?;
-                                match tail {
-                                    Some(Command::ShortCircuitDisjunction(mut s)) => {
-                                        s.insert(0, head);
-                                        Ok(Some(Command::ShortCircuitDisjunction(s)))
-                                    }
-                                    Some(tail) => {
-                                        Ok(Some(Command::ShortCircuitDisjunction(vec![ head, tail ])))
-                                    }
-                                    None => {
-                                        Err(LexError::UnexpectedEnd(vec![Some('|')]))
-                                    }
-                                }
-                            }
-                            _ => {
-                                let tail = parse_cmd( chars ) ?;
-                                match tail {
-                                    Some(Command::Pipeline(mut s)) => {
-                                        s.insert(0, head);
-                                        Ok(Some(Command::Pipeline(s)))
-                                    }
-                                    Some(tail) => {
-                                        Ok(Some(Command::Pipeline(vec![ head, tail ])))
-                                    }
-                                    None => {
-                                        Err(LexError::UnexpectedEnd(vec![]))
-                                    }
-                                }
-                            }
-                        }
-                    }
-                    Some('&') => {
-                        chars.next();
-                        match chars.peek() {
-                            Some('&') => {
-                                chars.next();
-
-                                let tail = parse_cmd( chars ) ?;
-                                match tail {
-                                    Some(Command::ShortCircuitConjunction(mut s)) => {
-                                        s.insert(0, head);
-                                        Ok(Some(Command::ShortCircuitConjunction(s)))
-                                    }
-                                    Some(tail) => {
-                                        Ok(Some(Command::ShortCircuitConjunction(vec![ head, tail ])))
-                                    }
-                                    None => {
-                                        Err(LexError::UnexpectedEnd(vec![Some('&'), Some('&')]))
-                                    }
-                                }
-                            }
-                            Some(c) => {
-                                Err(LexError::UnexpectedToken(*c))
-                            }
-                            None => {
-                                // todo:
-                                // background job
-                                Ok(Some(head))
-                            }
-                        }
-                    }
-                    Some(c) => {
-                        Err(LexError::UnexpectedToken(*c))
-                    }
-                    None => {
-                        Ok(Some(head))
-                    }
-                }
-            } else {
-                Ok(None)
-            }
-        }
-    }
-}
-
-impl<'a, It> Iterator for WordLexer<'a, It>
-where It: 'a + Iterator<Item = char> {
-    type Item = Result<WordSegment, LexError>;
-
-    fn next(&mut self) -> Option<Result<WordSegment, LexError>> {
-        skip_whitespace(self.chars);
-        match self.chars.peek().cloned() {
-            Some('|') => { None }
-            Some('&') => { None }
-            Some(';') => { None }
-            Some('~') => {
-                self.chars.next();
-                let user = DelimIter::new_whitespace(self.chars).collect();
-                match user {
-                    Ok(user) => Some(Ok(WordSegment::Tilde(user))),
-                    Err(e) => Some(Err(e))
-                }
-            }
-            Some('"') => { Some(parse_doublequoted(self.chars)) },
-            Some('\'') => { Some(parse_quoted(self.chars)) },
-            Some('$') => {
-                self.chars.next();
-                match self.chars.peek() {
-                    Some('{') => {
-                        self.chars.next();
-                        match DelimIter::new(&mut self.chars, vec![(Some('}'), true)]).try_collect::<String>() {
-                            Ok(s) => {
-                                Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal)))
-                            }
-                            Err(e) => Some(Err(e))
-                        }
-                    }
-                    Some('(') => {
-                        self.chars.next();
-                        let subcmd_str = DelimIter::new(&mut self.chars, vec![(Some(')'), true)]).try_collect::<String>();
-                        match subcmd_str {
-                            Ok(subcmd_str) => {
-                                match parse_cmd(&mut subcmd_str.chars().peekable()) {
-                                    Ok(Some(subcmd)) => {
-                                        Some(Ok(WordSegment::Subshell(subcmd)))        
-                                    }
-                                    Ok(None) => None,
-                                    Err(err) => Some(Err(err))
-                                }
-                            }
-                            Err(err) => Some(Err(err))
-                        }
-                    }
-                    _ => {
-                        match DelimIter::new_whitespace(self.chars).collect() {
-                            Ok(s) => {
-                                Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal)))
-                            }
-                            Err(e) => Some(Err(e))
-                        }
-                    }
-                }
-            }
-            Some(c) => {
-                let s : Result<String, LexError> = DelimIter::new_shell_word(self.chars).collect();
-                match s {
-                    Ok(s) => Some(Ok(WordSegment::Literal(s))),
-                    Err(e) => Some(Err(e))
-                }
-            }
-            None => {
-                None
-            }
-        }
-    }
-}
-
-
-mod test {
-    use crate::parse::*;
-
-    #[test]
-    fn test_delim_iter() {
-        let mut cs = "test 1234".chars().peekable();
-        let mut lexer = DelimIter::new_shell_word(&mut cs);
-        assert_eq!(lexer.try_collect::<String>(), Ok(String::from("test")));
-    }
-
-    #[test]
-    fn test_word_lexer() {
-        let mut cs = "test   1234|test".chars().peekable();
-
-        {
-            let mut lexer = WordLexer{ chars: &mut cs };
-            assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test")))));
-            assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("1234")))));
-            assert_eq!(lexer.next(), None);
-        }
-        assert_eq!(cs.next(), Some('|'));
-        {
-            let mut lexer = WordLexer{ chars: &mut cs };
-            assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test")))));
-            assert_eq!(lexer.next(), None);
-        }
-    }
-}
-
diff --git a/typeDB/cut b/typeDB/cut
index e860067..e9443b2 100644
--- a/typeDB/cut
+++ b/typeDB/cut
@@ -2,17 +2,17 @@
 
     match OPTION... {
         --help { <1 : Help~<Seq Char> ; }
-	--version { <1 : VersionInfo~<Seq Char> ; }
+        --version { <1 : VersionInfo~<Seq Char> ; }
 
         * {
-	    match OPTION... {
-		-f--fields LIST:<Seq ℕ>~CutFieldList~<Seq Char> {
+            match OPTION... {
+                -f--fields LIST:<Seq ℕ>~CutFieldList~<Seq Char> {
 
                     ROW-DELIM =
                         match OPTION... {
                             -z--zero-terminated { '\0' }
                             * { '\n' }
-			};
+                        };
 
                     IN-COL-DELIM =
                         match OPTION... {
@@ -22,9 +22,9 @@
 
                     OUT-COL-DELIM =
                         match OPTION... {
-			    --output-delimiter STRING:<Seq Char> { STRING }
-			    * { IN-COL-DELIM }
-			};
+                            --output-delimiter STRING:<Seq Char> { STRING }
+                            * { IN-COL-DELIM }
+                        };
 
                     FILE : Path
                            ~<Seq PathSegment
diff --git a/typeDB/seq b/typeDB/seq
index b585a9a..1119ab5 100644
--- a/typeDB/seq
+++ b/typeDB/seq
@@ -19,7 +19,7 @@
             $INCREMENT : NUM ;
             $LAST      : NUM ;
 
-	    <1 : <Seq NUM~<Seq Char>>~<SepSeq Char SEP>~<Seq Char> ;
+            <1 : <Seq NUM~<Seq Char>>~<SepSeq Char SEP>~<Seq Char> ;
         }	
     }
 }

From e0c3acab63a771645282b667b2f161f71e393da8 Mon Sep 17 00:00:00 2001
From: Michael Sippel <micha@fragmental.art>
Date: Mon, 30 Oct 2023 23:23:45 +0100
Subject: [PATCH 7/9] parser: correct handling of spaces in double quotes

---
 src/sh/ast.rs    | 115 +++++++++++
 src/sh/mod.rs    |   7 +
 src/sh/mod.rs~   |   3 +
 src/sh/parse.rs  | 484 +++++++++++++++++++++++++++++++++++++++++++++++
 src/sh/parse.rs~ | 438 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 1047 insertions(+)
 create mode 100644 src/sh/ast.rs
 create mode 100644 src/sh/mod.rs
 create mode 100644 src/sh/mod.rs~
 create mode 100644 src/sh/parse.rs
 create mode 100644 src/sh/parse.rs~

diff --git a/src/sh/ast.rs b/src/sh/ast.rs
new file mode 100644
index 0000000..87e7aae
--- /dev/null
+++ b/src/sh/ast.rs
@@ -0,0 +1,115 @@
+use std::boxed::Box;
+
+//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
+
+#[derive(Debug, PartialEq)]
+pub enum Command {
+    Simple {
+        assignments: Vec<Assignment>,
+        command_word: Word,
+        redirections: Vec<Redirection>
+    },
+    Pipeline(Vec<Command>),
+    Sequence(Vec<Command>),
+    ShortCircuitConjunction(Vec<Command>),
+    ShortCircuitDisjunction(Vec<Command>),
+    Negation(Box<Command>),
+    While {
+        condition: Box<Command>,
+        loop_body: Box<Command>
+    },
+    For {
+        varname: String,
+        sequence: Word,
+        loop_body: Box<Command>
+    },
+    If {
+        condition: Box<Command>,
+        then_branch: Box<Command>,
+        else_branch: Box<Command>
+    },
+    Case {
+        expr: Word,
+        cases: Vec<(Word, Command)>
+    },
+    Function {
+        name: String,
+        body: Box<Command>
+    }
+}
+
+//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
+
+#[derive(Debug, PartialEq)]
+pub struct Assignment {
+    pub name: String,
+    pub value: Word
+}
+
+#[derive(Debug, PartialEq)]
+pub struct Word {
+    pub segments: Vec<WordSegment>
+}
+
+#[derive(Debug, PartialEq)]
+pub enum WordSegment {
+    Tilde(String),
+    Literal(String),
+    Parameter(String, ParameterFormat),
+    Subshell(Command),
+    DoubleQuote(Word),
+}
+
+#[derive(Debug, PartialEq)]
+pub enum ParameterFormat {
+    Normal,
+    Length,
+    Default(Word),
+    Assign(Word),
+    Error(Word),
+    Alt(Word),
+    Sub(ParamSubSide, ParamSubMode, Word),
+}
+
+#[derive(Debug, PartialEq)]
+pub enum ParamSubMode {
+    Shortest, Longest
+}
+
+#[derive(Debug, PartialEq)]
+pub enum ParamSubSide {
+    Prefix, Suffix
+}
+
+//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
+
+#[derive(Debug, PartialEq)]
+pub struct Redirection {
+    redirection_type: RedirectionType,
+    fd: u64,
+    target: Word
+}
+
+#[derive(Debug, PartialEq)]
+pub enum RedirectionType {
+    File(FileRedirectionType),
+    Dup(DupRedirectionType),
+    Heredoc // '<<'
+}
+
+#[derive(Debug, PartialEq)]
+pub enum FileRedirectionType {
+    In,         // '<'
+    InOut,      // '<>'
+    Out,        // '>'
+    OutReplace, // '>|'
+    OutAppend,  // '>>'
+}
+
+#[derive(Debug, PartialEq)]
+pub enum DupRedirectionType {
+    In,  // '<&'
+    Out  // '>&'
+}
+
+//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
diff --git a/src/sh/mod.rs b/src/sh/mod.rs
new file mode 100644
index 0000000..0c08c79
--- /dev/null
+++ b/src/sh/mod.rs
@@ -0,0 +1,7 @@
+
+pub mod ast;
+pub mod parse;
+
+
+pub use ast::*;
+pub use parse::*;
diff --git a/src/sh/mod.rs~ b/src/sh/mod.rs~
new file mode 100644
index 0000000..cf2b51c
--- /dev/null
+++ b/src/sh/mod.rs~
@@ -0,0 +1,3 @@
+
+pub mod ast;
+pub mod parse;
diff --git a/src/sh/parse.rs b/src/sh/parse.rs
new file mode 100644
index 0000000..6b5691a
--- /dev/null
+++ b/src/sh/parse.rs
@@ -0,0 +1,484 @@
+use {
+    crate::sh::ast::*,
+    std::iter::{Peekable},
+};
+
+
+#[derive(Debug, PartialEq)]
+pub enum LexError {
+    UnexpectedEnd(Vec<Option<char>>),
+    UnexpectedToken(char),
+    InvalidFileRedirectionType
+}
+
+
+///! iterates chars until it finds some char in `delim`
+pub struct DelimIter<'a, It>
+where It: Iterator<Item = char> {
+    chars: &'a mut Peekable<It>,
+    delim: Vec<(Option<char>, bool)>
+}
+
+impl<'a, It> DelimIter<'a, It>
+where It: Iterator<Item = char> {
+    fn new(chars: &'a mut Peekable<It>, delim: Vec<(Option<char>, bool)>) -> Self {
+        DelimIter { chars, delim }
+    }
+
+    fn new_whitespace(chars: &'a mut Peekable<It>) -> Self {
+        DelimIter::new(chars, vec![
+            (None, true),
+            (Some(' '), true),
+            (Some('\t'), true),
+            (Some('\n'), true)
+        ])
+    }
+
+    fn new_shell_word(chars: &'a mut Peekable<It>) -> Self {
+        DelimIter::new(chars, vec![
+            (None, true),
+            (Some(' '), true),
+            (Some('\t'), true),
+            (Some('\n'), true),
+            (Some('|'), false),
+            (Some('&'), false),
+            (Some(';'), false),
+            (Some('\"'), false),
+            (Some('\''), false)
+        ])
+    }
+
+    fn new_shell_word_or_assignment(chars: &'a mut Peekable<It>) -> Self {
+        DelimIter::new(chars, vec![
+            (None, true),
+            (Some(' '), true),
+            (Some('\t'), true),
+            (Some('\n'), true),
+            (Some('='), false),
+            (Some('|'), false),
+            (Some('&'), false),
+            (Some(';'), false),
+            (Some('\"'), false),
+            (Some('\''), false)
+        ])
+    }
+}
+
+impl<'a, It> Iterator for DelimIter<'a, It>
+where It: 'a + Iterator<Item = char> {
+    type Item = Result<char, LexError>;
+
+    fn next(&mut self) -> Option<Result<char, LexError>> {
+        for (delim, consume) in self.delim.iter() {
+            if self.chars.peek().cloned() == *delim {
+                if *consume {
+                    self.chars.next();
+                }
+                return None;
+            }
+        }
+
+        match self.chars.next() {
+            Some(c) => Some(Ok(c)),
+            None => Some(Err(LexError::UnexpectedEnd(vec![])))
+        }
+    }
+}
+
+
+pub struct WordLexer<'a, It>
+where It: 'a + Iterator<Item = char> {
+    chars: &'a mut Peekable<It>
+}
+
+impl<'a, It> WordLexer<'a, It>
+where It: Iterator<Item = char> {
+    fn collect_until(&mut self, close: Option<char>) -> Result<String, LexError> {
+        DelimIter::new(&mut self.chars, vec![(close, true)])
+            .try_collect::<String>()
+    }
+}
+
+pub struct SubstLexer<'a, It>
+where It: 'a + Iterator<Item = char> {
+    chars: &'a mut Peekable<It>
+}
+
+pub fn skip_whitespace<It>(chars: &mut Peekable<It>)
+where It: Iterator<Item = char>
+{
+    while let Some(c) = chars.peek() {
+        if c.is_whitespace() {
+            chars.next();
+        } else {
+            break;
+        }
+    }
+}
+
+pub fn parse_quoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError>
+where It: Iterator<Item = char>
+{
+    assert_eq!( chars.next(), Some('\''));
+    let quoted = DelimIter::new(chars, vec![(Some('\''), true)]).try_collect::<String>();
+    match quoted {
+        Ok(s) => {
+            Ok(WordSegment::Literal(s))
+        },
+        Err(e) => Err(e)
+    }
+}
+
+pub fn parse_doublequoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError>
+where It: Iterator<Item = char>
+{
+    assert_eq!( chars.next(), Some('\"'));
+    let quoted = DelimIter::new(chars, vec![(Some('\"'), true)]).try_collect::<String>();
+
+    match quoted {
+        Ok(s) => {
+            let word = Word {
+                segments: SubstLexer { chars: &mut s.chars().peekable() }
+                .try_collect::<Vec<_>>()?
+//                .scan((), |_, x| x.ok())
+//                    .collect::<Vec<_>>()
+            };
+
+            Ok(WordSegment::DoubleQuote(word))
+        },
+        Err(e) => Err(e)
+    }    
+}
+
+pub fn parse_word<It>(chars: &mut Peekable<It>) -> Result<Word, LexError>
+where It: Iterator<Item = char>
+{
+    Ok(Word {
+        segments: WordLexer{ chars }.try_collect::<Vec<_>>()?
+    })
+}
+
+impl std::str::FromStr for FileRedirectionType {
+    type Err = LexError;
+
+    fn from_str(s: &str) -> Result<FileRedirectionType, LexError> {
+        match s {
+            "<" => Ok(FileRedirectionType::In),
+            "<>" => Ok(FileRedirectionType::InOut),
+            ">" => Ok(FileRedirectionType::Out),
+            ">|" => Ok(FileRedirectionType::OutReplace),
+            ">>" => Ok(FileRedirectionType::OutAppend),
+            _ => Err(LexError::InvalidFileRedirectionType)
+        }
+    }
+}
+
+pub fn parse_redirection<It>(chars: &mut Peekable<It>) -> Result<Redirection, LexError>
+where It: Iterator<Item = char>
+{
+    Err(LexError::InvalidFileRedirectionType)
+    //    let name = DelimIterator::new(chars, vec!['<', '>']).collect::<String>();
+}
+
+pub fn parse_simple_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError>
+where It: Iterator<Item = char>
+{
+    let mut assignments = Vec::new();
+    let mut redirections = Vec::new();
+
+    if chars.peek() == None {
+        return Ok(None);
+    }
+
+    loop {
+        skip_whitespace(chars);
+        let mut name = DelimIter::new_shell_word_or_assignment(chars).try_collect::<String>()?;
+
+        match chars.peek().clone() {
+            Some('=') => {
+                chars.next();
+                let mut lex = WordLexer{ chars };
+                match lex.next() {
+                    Some(Ok(value)) => {
+                        assignments.push(Assignment { name, value: Word{ segments: vec![ value ] } });
+                    },
+                    Some(Err(e)) => {
+                        return Err(e);
+                    },
+                    None => {
+                        return Err(LexError::UnexpectedEnd(vec![]));
+                    }
+                }
+            }
+            _ => {
+                let mut cmd_segments = WordLexer{ chars }.try_collect::<Vec<_>>()?;
+                cmd_segments.insert(0, WordSegment::Literal(name));
+
+                return Ok(Some(Command::Simple {
+                    assignments,
+                    command_word: Word { segments: cmd_segments },
+                    redirections,
+                }));
+            }
+        }
+    }
+}
+
+pub fn parse_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError>
+where It: Iterator<Item = char>
+{
+    skip_whitespace(chars);
+    match chars.peek() {
+        Some('!') => {
+            chars.next();
+            if let Some(cmd) = parse_cmd(chars)? {
+                Ok(Some(Command::Negation(Box::new(cmd))))
+            } else {
+                Err(LexError::UnexpectedEnd(vec![]))
+            }
+        }
+        _ => {
+            if let Some(head) = parse_simple_cmd(chars)? {
+                skip_whitespace(chars);
+
+                match chars.peek() {
+                    Some(';') => {
+                        chars.next();
+
+                        let tail = parse_cmd( chars ) ?;
+                        match tail {
+                            Some(Command::Sequence(mut s)) => {
+                                s.insert(0, head);
+                                Ok(Some(Command::Sequence(s)))
+                            }
+                            Some(tail) => {
+                                Ok(Some(Command::Sequence(vec![ head, tail ])))
+                            }
+                            None => {
+                                Ok(Some(head))
+                            }
+                        }
+                    }
+                    Some('|') => {
+                        chars.next();
+                        match chars.peek() {
+                            Some('|') => {
+                                chars.next();
+
+                                let tail = parse_cmd( chars ) ?;
+                                match tail {
+                                    Some(Command::ShortCircuitDisjunction(mut s)) => {
+                                        s.insert(0, head);
+                                        Ok(Some(Command::ShortCircuitDisjunction(s)))
+                                    }
+                                    Some(tail) => {
+                                        Ok(Some(Command::ShortCircuitDisjunction(vec![ head, tail ])))
+                                    }
+                                    None => {
+                                        Err(LexError::UnexpectedEnd(vec![Some('|')]))
+                                    }
+                                }
+                            }
+                            _ => {
+                                let tail = parse_cmd( chars ) ?;
+                                match tail {
+                                    Some(Command::Pipeline(mut s)) => {
+                                        s.insert(0, head);
+                                        Ok(Some(Command::Pipeline(s)))
+                                    }
+                                    Some(tail) => {
+                                        Ok(Some(Command::Pipeline(vec![ head, tail ])))
+                                    }
+                                    None => {
+                                        Err(LexError::UnexpectedEnd(vec![]))
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    Some('&') => {
+                        chars.next();
+                        match chars.peek() {
+                            Some('&') => {
+                                chars.next();
+
+                                let tail = parse_cmd( chars ) ?;
+                                match tail {
+                                    Some(Command::ShortCircuitConjunction(mut s)) => {
+                                        s.insert(0, head);
+                                        Ok(Some(Command::ShortCircuitConjunction(s)))
+                                    }
+                                    Some(tail) => {
+                                        Ok(Some(Command::ShortCircuitConjunction(vec![ head, tail ])))
+                                    }
+                                    None => {
+                                        Err(LexError::UnexpectedEnd(vec![Some('&'), Some('&')]))
+                                    }
+                                }
+                            }
+                            Some(c) => {
+                                Err(LexError::UnexpectedToken(*c))
+                            }
+                            None => {
+                                // todo:
+                                // background job
+                                Ok(Some(head))
+                            }
+                        }
+                    }
+                    Some(c) => {
+                        Err(LexError::UnexpectedToken(*c))
+                    }
+                    None => {
+                        Ok(Some(head))
+                    }
+                }
+            } else {
+                Ok(None)
+            }
+        }
+    }
+}
+impl<'a, It> Iterator for SubstLexer<'a, It>
+where It: 'a + Iterator<Item = char> {
+    type Item = Result<WordSegment, LexError>;
+
+    fn next(&mut self) -> Option<Result<WordSegment, LexError>> {
+        match self.chars.peek().cloned() {
+            Some('$') => {
+                self.chars.next();
+                match self.chars.peek() {
+                    // curly-braced parameter e.g. `${PARAM}`
+                    Some('{') => {
+                        self.chars.next();
+                        match DelimIter::new(&mut self.chars, vec![(Some('}'), true)]).try_collect::<String>() {
+                            Ok(s) => {
+                                Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal)))
+                            }
+                            Err(e) => Some(Err(e))
+                        }
+                    }
+
+                    // Subshell
+                    Some('(') => {
+                        self.chars.next();
+                        let subcmd_str = DelimIter::new(&mut self.chars, vec![(Some(')'), true)]).try_collect::<String>();
+                        match subcmd_str {
+                            Ok(subcmd_str) => {
+                                match parse_cmd(&mut subcmd_str.chars().peekable()) {
+                                    Ok(Some(subcmd)) => {
+                                        Some(Ok(WordSegment::Subshell(subcmd)))        
+                                    }
+                                    Ok(None) => None,
+                                    Err(err) => Some(Err(err))
+                                }
+                            }
+                            Err(err) => Some(Err(err))
+                        }
+                    }
+
+                    // plain parameter name e.g. `$PARAM`
+                    _ => {
+                        match DelimIter::new_whitespace(self.chars).collect() {
+                            Ok(s) => {
+                                Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal)))
+                            }
+                            Err(e) => Some(Err(e))
+                        }
+                    }
+                }
+            }
+
+            // not a substitution,
+            // take as literal until next $.
+            _ => {
+                let lit_str = DelimIter::new(
+                    &mut self.chars,
+                    vec![
+                        (None, true),
+                        (Some('$'), false)
+                    ]
+                ).try_collect::<String>();
+
+                match lit_str {
+                    Ok(s) => {
+                        if s.len() > 0 {
+                            Some(Ok(WordSegment::Literal(s)))
+                        } else {
+                            None
+                        }
+                    }
+                    Err(e) => Some(Err(e))
+                }
+            }
+        }
+    }
+}
+
+impl<'a, It> Iterator for WordLexer<'a, It>
+where It: 'a + Iterator<Item = char> {
+    type Item = Result<WordSegment, LexError>;
+
+    fn next(&mut self) -> Option<Result<WordSegment, LexError>> {
+        skip_whitespace(self.chars);
+        match self.chars.peek().cloned() {
+            Some('|') => { None }
+            Some('&') => { None }
+            Some(';') => { None }
+            Some('~') => {
+                self.chars.next();
+                let user = DelimIter::new_whitespace(self.chars).collect();
+                match user {
+                    Ok(user) => Some(Ok(WordSegment::Tilde(user))),
+                    Err(e) => Some(Err(e))
+                }
+            }
+            Some('"') => { Some(parse_doublequoted(self.chars)) },
+            Some('\'') => { Some(parse_quoted(self.chars)) },
+            Some('$') => {
+                SubstLexer{ chars: &mut self.chars }.next()
+            }
+            Some(c) => {
+                let s : Result<String, LexError> = DelimIter::new_shell_word(self.chars).collect();
+                match s {
+                    Ok(s) => Some(Ok(WordSegment::Literal(s))),
+                    Err(e) => Some(Err(e))
+                }
+            }
+            None => {
+                None
+            }
+        }
+    }
+}
+
+
+mod test {
+    use crate::sh::parse::*;
+
+    #[test]
+    fn test_delim_iter() {
+        let mut cs = "test 1234".chars().peekable();
+        let mut lexer = DelimIter::new_shell_word(&mut cs);
+        assert_eq!(lexer.try_collect::<String>(), Ok(String::from("test")));
+    }
+
+    #[test]
+    fn test_word_lexer() {
+        let mut cs = "test   1234|test".chars().peekable();
+
+        {
+            let mut lexer = WordLexer{ chars: &mut cs };
+            assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test")))));
+            assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("1234")))));
+            assert_eq!(lexer.next(), None);
+        }
+        assert_eq!(cs.next(), Some('|'));
+        {
+            let mut lexer = WordLexer{ chars: &mut cs };
+            assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test")))));
+            assert_eq!(lexer.next(), None);
+        }
+    }
+}
+
diff --git a/src/sh/parse.rs~ b/src/sh/parse.rs~
new file mode 100644
index 0000000..b4db237
--- /dev/null
+++ b/src/sh/parse.rs~
@@ -0,0 +1,438 @@
+use {
+    crate::ast::*,
+    std::iter::{Peekable},
+};
+
+
+#[derive(Debug, PartialEq)]
+pub enum LexError {
+    UnexpectedEnd(Vec<Option<char>>),
+    UnexpectedToken(char),
+    InvalidFileRedirectionType
+}
+
+
+///! iterates chars until it finds some char in `delim`
+pub struct DelimIter<'a, It>
+where It: Iterator<Item = char> {
+    chars: &'a mut Peekable<It>,
+    delim: Vec<(Option<char>, bool)>
+}
+
+impl<'a, It> DelimIter<'a, It>
+where It: Iterator<Item = char> {
+    fn new(chars: &'a mut Peekable<It>, delim: Vec<(Option<char>, bool)>) -> Self {
+        DelimIter { chars, delim }
+    }
+
+    fn new_whitespace(chars: &'a mut Peekable<It>) -> Self {
+        DelimIter::new(chars, vec![
+            (None, true),
+            (Some(' '), true),
+            (Some('\t'), true),
+            (Some('\n'), true)
+        ])
+    }
+
+    fn new_shell_word(chars: &'a mut Peekable<It>) -> Self {
+        DelimIter::new(chars, vec![
+            (None, true),
+            (Some(' '), true),
+            (Some('\t'), true),
+            (Some('\n'), true),
+            (Some('|'), false),
+            (Some('&'), false),
+            (Some(';'), false),
+            (Some('\"'), false),
+            (Some('\''), false)
+        ])
+    }
+
+    fn new_shell_word_or_assignment(chars: &'a mut Peekable<It>) -> Self {
+        DelimIter::new(chars, vec![
+            (None, true),
+            (Some(' '), true),
+            (Some('\t'), true),
+            (Some('\n'), true),
+            (Some('='), false),
+            (Some('|'), false),
+            (Some('&'), false),
+            (Some(';'), false),
+            (Some('\"'), false),
+            (Some('\''), false)
+        ])
+    }
+}
+
+impl<'a, It> Iterator for DelimIter<'a, It>
+where It: 'a + Iterator<Item = char> {
+    type Item = Result<char, LexError>;
+
+    fn next(&mut self) -> Option<Result<char, LexError>> {
+        for (delim, consume) in self.delim.iter() {
+            if self.chars.peek().cloned() == *delim {
+                if *consume {
+                    self.chars.next();
+                }
+                return None;
+            }
+        }
+
+        match self.chars.next() {
+            Some(c) => Some(Ok(c)),
+            None => Some(Err(LexError::UnexpectedEnd(vec![])))
+        }
+    }
+}
+
+
+pub struct WordLexer<'a, It>
+where It: 'a + Iterator<Item = char> {
+    chars: &'a mut Peekable<It>
+}
+
+impl<'a, It> WordLexer<'a, It>
+where It: Iterator<Item = char> {
+    fn collect_until(&mut self, close: Option<char>) -> Result<String, LexError> {
+        DelimIter::new(&mut self.chars, vec![(close, true)])
+            .try_collect::<String>()
+    }
+}
+
+pub fn skip_whitespace<It>(chars: &mut Peekable<It>)
+where It: Iterator<Item = char>
+{
+    while let Some(c) = chars.peek() {
+        if c.is_whitespace() {
+            chars.next();
+        } else {
+            break;
+        }
+    }
+}
+
+pub fn parse_quoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError>
+where It: Iterator<Item = char>
+{
+    assert_eq!( chars.next(), Some('\''));
+    let quoted = DelimIter::new(chars, vec![(Some('\''), true)]).try_collect::<String>();
+    match quoted {
+        Ok(s) => {
+            Ok(WordSegment::Literal(s))
+        },
+        Err(e) => Err(e)
+    }
+}
+
+pub fn parse_doublequoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError>
+where It: Iterator<Item = char>
+{
+    assert_eq!( chars.next(), Some('\"'));
+    let quoted = DelimIter::new(chars, vec![(Some('\"'), true)]).try_collect::<String>();
+    match quoted {
+        Ok(s) => {
+            let word = Word {
+                segments: // fixme: handle spaces correctly -> create QuoteLexer
+                WordLexer { chars: &mut s.chars().peekable() }
+                .scan((), |_, x| x.ok())
+                    .collect::<Vec<_>>()
+            };
+
+            Ok(WordSegment::DoubleQuote(word))
+        },
+        Err(e) => Err(e)
+    }    
+}
+
+pub fn parse_word<It>(chars: &mut Peekable<It>) -> Result<Word, LexError>
+where It: Iterator<Item = char>
+{
+    Ok(Word {
+        segments: WordLexer{ chars }.try_collect::<Vec<_>>()?
+    })
+}
+
+impl std::str::FromStr for FileRedirectionType {
+    type Err = LexError;
+
+    fn from_str(s: &str) -> Result<FileRedirectionType, LexError> {
+        match s {
+            "<" => Ok(FileRedirectionType::In),
+            "<>" => Ok(FileRedirectionType::InOut),
+            ">" => Ok(FileRedirectionType::Out),
+            ">|" => Ok(FileRedirectionType::OutReplace),
+            ">>" => Ok(FileRedirectionType::OutAppend),
+            _ => Err(LexError::InvalidFileRedirectionType)
+        }
+    }
+}
+
+pub fn parse_redirection<It>(chars: &mut Peekable<It>) -> Result<Redirection, LexError>
+where It: Iterator<Item = char>
+{
+    Err(LexError::InvalidFileRedirectionType)
+    //    let name = DelimIterator::new(chars, vec!['<', '>']).collect::<String>();
+}
+
+pub fn parse_simple_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError>
+where It: Iterator<Item = char>
+{
+    let mut assignments = Vec::new();
+    let mut redirections = Vec::new();
+
+    if chars.peek() == None {
+        return Ok(None);
+    }
+
+    loop {
+        skip_whitespace(chars);
+        let mut name = DelimIter::new_shell_word_or_assignment(chars).try_collect::<String>()?;
+
+        match chars.peek().clone() {
+            Some('=') => {
+                chars.next();
+                let mut lex = WordLexer{ chars };
+                match lex.next() {
+                    Some(Ok(value)) => {
+                        assignments.push(Assignment { name, value: Word{ segments: vec![ value ] } });
+                    },
+                    Some(Err(e)) => {
+                        return Err(e);
+                    },
+                    None => {
+                        return Err(LexError::UnexpectedEnd(vec![]));
+                    }
+                }
+            }
+            _ => {
+                let mut cmd_segments = WordLexer{ chars }.try_collect::<Vec<_>>()?;
+                cmd_segments.insert(0, WordSegment::Literal(name));
+
+                return Ok(Some(Command::Simple {
+                    assignments,
+                    command_word: Word { segments: cmd_segments },
+                    redirections,
+                }));
+            }
+        }
+    }
+}
+
+pub fn parse_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError>
+where It: Iterator<Item = char>
+{
+    skip_whitespace(chars);
+    match chars.peek() {
+        Some('!') => {
+            chars.next();
+            if let Some(cmd) = parse_cmd(chars)? {
+                Ok(Some(Command::Negation(Box::new(cmd))))
+            } else {
+                Err(LexError::UnexpectedEnd(vec![]))
+            }
+        }
+        _ => {
+            if let Some(head) = parse_simple_cmd(chars)? {
+                skip_whitespace(chars);
+
+                match chars.peek() {
+                    Some(';') => {
+                        chars.next();
+
+                        let tail = parse_cmd( chars ) ?;
+                        match tail {
+                            Some(Command::Sequence(mut s)) => {
+                                s.insert(0, head);
+                                Ok(Some(Command::Sequence(s)))
+                            }
+                            Some(tail) => {
+                                Ok(Some(Command::Sequence(vec![ head, tail ])))
+                            }
+                            None => {
+                                Ok(Some(head))
+                            }
+                        }
+                    }
+                    Some('|') => {
+                        chars.next();
+                        match chars.peek() {
+                            Some('|') => {
+                                chars.next();
+
+                                let tail = parse_cmd( chars ) ?;
+                                match tail {
+                                    Some(Command::ShortCircuitDisjunction(mut s)) => {
+                                        s.insert(0, head);
+                                        Ok(Some(Command::ShortCircuitDisjunction(s)))
+                                    }
+                                    Some(tail) => {
+                                        Ok(Some(Command::ShortCircuitDisjunction(vec![ head, tail ])))
+                                    }
+                                    None => {
+                                        Err(LexError::UnexpectedEnd(vec![Some('|')]))
+                                    }
+                                }
+                            }
+                            _ => {
+                                let tail = parse_cmd( chars ) ?;
+                                match tail {
+                                    Some(Command::Pipeline(mut s)) => {
+                                        s.insert(0, head);
+                                        Ok(Some(Command::Pipeline(s)))
+                                    }
+                                    Some(tail) => {
+                                        Ok(Some(Command::Pipeline(vec![ head, tail ])))
+                                    }
+                                    None => {
+                                        Err(LexError::UnexpectedEnd(vec![]))
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    Some('&') => {
+                        chars.next();
+                        match chars.peek() {
+                            Some('&') => {
+                                chars.next();
+
+                                let tail = parse_cmd( chars ) ?;
+                                match tail {
+                                    Some(Command::ShortCircuitConjunction(mut s)) => {
+                                        s.insert(0, head);
+                                        Ok(Some(Command::ShortCircuitConjunction(s)))
+                                    }
+                                    Some(tail) => {
+                                        Ok(Some(Command::ShortCircuitConjunction(vec![ head, tail ])))
+                                    }
+                                    None => {
+                                        Err(LexError::UnexpectedEnd(vec![Some('&'), Some('&')]))
+                                    }
+                                }
+                            }
+                            Some(c) => {
+                                Err(LexError::UnexpectedToken(*c))
+                            }
+                            None => {
+                                // todo:
+                                // background job
+                                Ok(Some(head))
+                            }
+                        }
+                    }
+                    Some(c) => {
+                        Err(LexError::UnexpectedToken(*c))
+                    }
+                    None => {
+                        Ok(Some(head))
+                    }
+                }
+            } else {
+                Ok(None)
+            }
+        }
+    }
+}
+
+impl<'a, It> Iterator for WordLexer<'a, It>
+where It: 'a + Iterator<Item = char> {
+    type Item = Result<WordSegment, LexError>;
+
+    fn next(&mut self) -> Option<Result<WordSegment, LexError>> {
+        skip_whitespace(self.chars);
+        match self.chars.peek().cloned() {
+            Some('|') => { None }
+            Some('&') => { None }
+            Some(';') => { None }
+            Some('~') => {
+                self.chars.next();
+                let user = DelimIter::new_whitespace(self.chars).collect();
+                match user {
+                    Ok(user) => Some(Ok(WordSegment::Tilde(user))),
+                    Err(e) => Some(Err(e))
+                }
+            }
+            Some('"') => { Some(parse_doublequoted(self.chars)) },
+            Some('\'') => { Some(parse_quoted(self.chars)) },
+            Some('$') => {
+                self.chars.next();
+                match self.chars.peek() {
+                    Some('{') => {
+                        self.chars.next();
+                        match DelimIter::new(&mut self.chars, vec![(Some('}'), true)]).try_collect::<String>() {
+                            Ok(s) => {
+                                Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal)))
+                            }
+                            Err(e) => Some(Err(e))
+                        }
+                    }
+                    Some('(') => {
+                        self.chars.next();
+                        let subcmd_str = DelimIter::new(&mut self.chars, vec![(Some(')'), true)]).try_collect::<String>();
+                        match subcmd_str {
+                            Ok(subcmd_str) => {
+                                match parse_cmd(&mut subcmd_str.chars().peekable()) {
+                                    Ok(Some(subcmd)) => {
+                                        Some(Ok(WordSegment::Subshell(subcmd)))        
+                                    }
+                                    Ok(None) => None,
+                                    Err(err) => Some(Err(err))
+                                }
+                            }
+                            Err(err) => Some(Err(err))
+                        }
+                    }
+                    _ => {
+                        match DelimIter::new_whitespace(self.chars).collect() {
+                            Ok(s) => {
+                                Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal)))
+                            }
+                            Err(e) => Some(Err(e))
+                        }
+                    }
+                }
+            }
+            Some(c) => {
+                let s : Result<String, LexError> = DelimIter::new_shell_word(self.chars).collect();
+                match s {
+                    Ok(s) => Some(Ok(WordSegment::Literal(s))),
+                    Err(e) => Some(Err(e))
+                }
+            }
+            None => {
+                None
+            }
+        }
+    }
+}
+
+
+mod test {
+    use crate::parse::*;
+
+    #[test]
+    fn test_delim_iter() {
+        let mut cs = "test 1234".chars().peekable();
+        let mut lexer = DelimIter::new_shell_word(&mut cs);
+        assert_eq!(lexer.try_collect::<String>(), Ok(String::from("test")));
+    }
+
+    #[test]
+    fn test_word_lexer() {
+        let mut cs = "test   1234|test".chars().peekable();
+
+        {
+            let mut lexer = WordLexer{ chars: &mut cs };
+            assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test")))));
+            assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("1234")))));
+            assert_eq!(lexer.next(), None);
+        }
+        assert_eq!(cs.next(), Some('|'));
+        {
+            let mut lexer = WordLexer{ chars: &mut cs };
+            assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test")))));
+            assert_eq!(lexer.next(), None);
+        }
+    }
+}
+

From 903d6dd64fc938f6027853560111ba6edfbcdf52 Mon Sep 17 00:00:00 2001
From: Michael Sippel <micha@fragmental.art>
Date: Tue, 31 Oct 2023 16:30:08 +0100
Subject: [PATCH 8/9] parser: remove deprecated collect_until

---
 src/sh/parse.rs | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/src/sh/parse.rs b/src/sh/parse.rs
index 6b5691a..41a7a52 100644
--- a/src/sh/parse.rs
+++ b/src/sh/parse.rs
@@ -85,20 +85,11 @@ where It: 'a + Iterator<Item = char> {
     }
 }
 
-
 pub struct WordLexer<'a, It>
 where It: 'a + Iterator<Item = char> {
     chars: &'a mut Peekable<It>
 }
 
-impl<'a, It> WordLexer<'a, It>
-where It: Iterator<Item = char> {
-    fn collect_until(&mut self, close: Option<char>) -> Result<String, LexError> {
-        DelimIter::new(&mut self.chars, vec![(close, true)])
-            .try_collect::<String>()
-    }
-}
-
 pub struct SubstLexer<'a, It>
 where It: 'a + Iterator<Item = char> {
     chars: &'a mut Peekable<It>
@@ -140,8 +131,6 @@ where It: Iterator<Item = char>
             let word = Word {
                 segments: SubstLexer { chars: &mut s.chars().peekable() }
                 .try_collect::<Vec<_>>()?
-//                .scan((), |_, x| x.ok())
-//                    .collect::<Vec<_>>()
             };
 
             Ok(WordSegment::DoubleQuote(word))
@@ -362,6 +351,7 @@ where It: 'a + Iterator<Item = char> {
                     // Subshell
                     Some('(') => {
                         self.chars.next();
+
                         let subcmd_str = DelimIter::new(&mut self.chars, vec![(Some(')'), true)]).try_collect::<String>();
                         match subcmd_str {
                             Ok(subcmd_str) => {

From c49577e9247ed2325a97f48cae93c70b6823c5b7 Mon Sep 17 00:00:00 2001
From: Michael Sippel <micha@fragmental.art>
Date: Wed, 8 Nov 2023 04:22:08 +0100
Subject: [PATCH 9/9] parser: fix recursive subshells

---
 src/main.rs     |  2 +-
 src/sh/parse.rs | 88 ++++++++++++++++++++++++-------------------------
 2 files changed, 44 insertions(+), 46 deletions(-)

diff --git a/src/main.rs b/src/main.rs
index 02e6b77..9b45a18 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -38,7 +38,7 @@ fn main() {
     let stdin = std::io::stdin();
     for line in std::io::BufReader::new(stdin).lines() {
         if let Ok(line) = line {
-            let cmd = sh::parse::parse_cmd( &mut line.chars().peekable() );
+            let cmd = sh::parse::parse_cmd( &mut line.chars().peekable(), 0 );
             eprintln!("parsed cmd: {:?}", cmd);
             /*
             let mut lex = parse::WordLexer::from( line.chars() );
diff --git a/src/sh/parse.rs b/src/sh/parse.rs
index 41a7a52..3a30250 100644
--- a/src/sh/parse.rs
+++ b/src/sh/parse.rs
@@ -30,7 +30,8 @@ where It: Iterator<Item = char> {
             (None, true),
             (Some(' '), true),
             (Some('\t'), true),
-            (Some('\n'), true)
+            (Some('\n'), true),
+            (Some(')'), false),
         ])
     }
 
@@ -43,6 +44,8 @@ where It: Iterator<Item = char> {
             (Some('|'), false),
             (Some('&'), false),
             (Some(';'), false),
+            (Some(')'), false),
+            (Some('$'), false),
             (Some('\"'), false),
             (Some('\''), false)
         ])
@@ -57,7 +60,8 @@ where It: Iterator<Item = char> {
             (Some('='), false),
             (Some('|'), false),
             (Some('&'), false),
-            (Some(';'), false),
+            (Some(';'), false),            
+            (Some(')'), false),
             (Some('\"'), false),
             (Some('\''), false)
         ])
@@ -92,7 +96,8 @@ where It: 'a + Iterator<Item = char> {
 
 pub struct SubstLexer<'a, It>
 where It: 'a + Iterator<Item = char> {
-    chars: &'a mut Peekable<It>
+    chars: &'a mut Peekable<It>,
+    depth: usize
 }
 
 pub fn skip_whitespace<It>(chars: &mut Peekable<It>)
@@ -124,12 +129,14 @@ pub fn parse_doublequoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, L
 where It: Iterator<Item = char>
 {
     assert_eq!( chars.next(), Some('\"'));
+
+    // todo: allow escaped \"
     let quoted = DelimIter::new(chars, vec![(Some('\"'), true)]).try_collect::<String>();
 
     match quoted {
         Ok(s) => {
             let word = Word {
-                segments: SubstLexer { chars: &mut s.chars().peekable() }
+                segments: SubstLexer { chars: &mut s.chars().peekable(), depth: 1 }
                 .try_collect::<Vec<_>>()?
             };
 
@@ -213,14 +220,14 @@ where It: Iterator<Item = char>
     }
 }
 
-pub fn parse_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError>
+pub fn parse_cmd<It>(chars: &mut Peekable<It>, depth: usize) -> Result<Option<Command>, LexError>
 where It: Iterator<Item = char>
 {
     skip_whitespace(chars);
     match chars.peek() {
         Some('!') => {
             chars.next();
-            if let Some(cmd) = parse_cmd(chars)? {
+            if let Some(cmd) = parse_cmd(chars, depth)? {
                 Ok(Some(Command::Negation(Box::new(cmd))))
             } else {
                 Err(LexError::UnexpectedEnd(vec![]))
@@ -234,18 +241,14 @@ where It: Iterator<Item = char>
                     Some(';') => {
                         chars.next();
 
-                        let tail = parse_cmd( chars ) ?;
+                        let tail = parse_cmd( chars, depth ) ?;
                         match tail {
                             Some(Command::Sequence(mut s)) => {
                                 s.insert(0, head);
                                 Ok(Some(Command::Sequence(s)))
                             }
-                            Some(tail) => {
-                                Ok(Some(Command::Sequence(vec![ head, tail ])))
-                            }
-                            None => {
-                                Ok(Some(head))
-                            }
+                            Some(tail) => Ok(Some(Command::Sequence(vec![ head, tail ]))),
+                            None => Ok(Some(head))
                         }
                     }
                     Some('|') => {
@@ -254,33 +257,25 @@ where It: Iterator<Item = char>
                             Some('|') => {
                                 chars.next();
 
-                                let tail = parse_cmd( chars ) ?;
+                                let tail = parse_cmd( chars, depth ) ?;
                                 match tail {
                                     Some(Command::ShortCircuitDisjunction(mut s)) => {
                                         s.insert(0, head);
                                         Ok(Some(Command::ShortCircuitDisjunction(s)))
                                     }
-                                    Some(tail) => {
-                                        Ok(Some(Command::ShortCircuitDisjunction(vec![ head, tail ])))
-                                    }
-                                    None => {
-                                        Err(LexError::UnexpectedEnd(vec![Some('|')]))
-                                    }
+                                    Some(tail) => Ok(Some(Command::ShortCircuitDisjunction(vec![ head, tail ]))),
+                                    None => Err(LexError::UnexpectedEnd(vec![Some('|')]))
                                 }
                             }
                             _ => {
-                                let tail = parse_cmd( chars ) ?;
+                                let tail = parse_cmd( chars, depth ) ?;
                                 match tail {
                                     Some(Command::Pipeline(mut s)) => {
                                         s.insert(0, head);
                                         Ok(Some(Command::Pipeline(s)))
                                     }
-                                    Some(tail) => {
-                                        Ok(Some(Command::Pipeline(vec![ head, tail ])))
-                                    }
-                                    None => {
-                                        Err(LexError::UnexpectedEnd(vec![]))
-                                    }
+                                    Some(tail) => Ok(Some(Command::Pipeline(vec![ head, tail ]))),
+                                    None => Err(LexError::UnexpectedEnd(vec![]))
                                 }
                             }
                         }
@@ -291,23 +286,17 @@ where It: Iterator<Item = char>
                             Some('&') => {
                                 chars.next();
 
-                                let tail = parse_cmd( chars ) ?;
+                                let tail = parse_cmd( chars, depth ) ?;
                                 match tail {
                                     Some(Command::ShortCircuitConjunction(mut s)) => {
                                         s.insert(0, head);
                                         Ok(Some(Command::ShortCircuitConjunction(s)))
                                     }
-                                    Some(tail) => {
-                                        Ok(Some(Command::ShortCircuitConjunction(vec![ head, tail ])))
-                                    }
-                                    None => {
-                                        Err(LexError::UnexpectedEnd(vec![Some('&'), Some('&')]))
-                                    }
+                                    Some(tail) => Ok(Some(Command::ShortCircuitConjunction(vec![ head, tail ]))),
+                                    None => Err(LexError::UnexpectedEnd(vec![Some('&'), Some('&')]))
                                 }
                             }
-                            Some(c) => {
-                                Err(LexError::UnexpectedToken(*c))
-                            }
+                            Some(c) => Err(LexError::UnexpectedToken(*c)),
                             None => {
                                 // todo:
                                 // background job
@@ -315,12 +304,17 @@ where It: Iterator<Item = char>
                             }
                         }
                     }
-                    Some(c) => {
-                        Err(LexError::UnexpectedToken(*c))
-                    }
-                    None => {
-                        Ok(Some(head))
+                    Some(')') => {
+                        eprintln!("got )");
+                        chars.next();
+                        if depth > 0 {
+                            Ok(Some(head))
+                        } else {
+                            Err(LexError::UnexpectedToken(')'))
+                        }
                     }
+                    Some(c) => Err(LexError::UnexpectedToken(*c)),
+                    None => Ok(Some(head))
                 }
             } else {
                 Ok(None)
@@ -351,20 +345,23 @@ where It: 'a + Iterator<Item = char> {
                     // Subshell
                     Some('(') => {
                         self.chars.next();
-
+/*
                         let subcmd_str = DelimIter::new(&mut self.chars, vec![(Some(')'), true)]).try_collect::<String>();
                         match subcmd_str {
                             Ok(subcmd_str) => {
-                                match parse_cmd(&mut subcmd_str.chars().peekable()) {
+                                */
+                                match parse_cmd(&mut self.chars, 1) {
                                     Ok(Some(subcmd)) => {
                                         Some(Ok(WordSegment::Subshell(subcmd)))        
                                     }
                                     Ok(None) => None,
                                     Err(err) => Some(Err(err))
                                 }
+                        /*
                             }
                             Err(err) => Some(Err(err))
                         }
+                */
                     }
 
                     // plain parameter name e.g. `$PARAM`
@@ -426,8 +423,9 @@ where It: 'a + Iterator<Item = char> {
             Some('"') => { Some(parse_doublequoted(self.chars)) },
             Some('\'') => { Some(parse_quoted(self.chars)) },
             Some('$') => {
-                SubstLexer{ chars: &mut self.chars }.next()
+                SubstLexer{ chars: &mut self.chars, depth: 1 }.next()
             }
+            Some(')') => { None }
             Some(c) => {
                 let s : Result<String, LexError> = DelimIter::new_shell_word(self.chars).collect();
                 match s {