From 73817529d600e36887ebb364623d55e07d8dc84a Mon Sep 17 00:00:00 2001 From: Michael Sippel <micha@fragmental.art> Date: Sat, 7 Oct 2023 13:07:18 +0200 Subject: [PATCH 1/9] first samples for a potential command-type description language --- typeDB/cut | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++ typeDB/date | 15 ++++++++++++ typeDB/dirname | 56 +++++++++++++++++++++++++++++++++++++++++++ typeDB/echo | 16 +++++++++++++ typeDB/seq | 25 +++++++++++++++++++ typeDB/xargs | 38 +++++++++++++++++++++++++++++ 6 files changed, 215 insertions(+) create mode 100644 typeDB/cut create mode 100644 typeDB/date create mode 100644 typeDB/dirname create mode 100644 typeDB/echo create mode 100644 typeDB/seq create mode 100644 typeDB/xargs diff --git a/typeDB/cut b/typeDB/cut new file mode 100644 index 0000000..e860067 --- /dev/null +++ b/typeDB/cut @@ -0,0 +1,65 @@ +::cut OPTION... [FILE]... { + + match OPTION... { + --help { <1 : Help~<Seq Char> ; } + --version { <1 : VersionInfo~<Seq Char> ; } + + * { + match OPTION... { + -f--fields LIST:<Seq ℕ>~CutFieldList~<Seq Char> { + + ROW-DELIM = + match OPTION... { + -z--zero-terminated { '\0' } + * { '\n' } + }; + + IN-COL-DELIM = + match OPTION... { + -d--delimiter DELIM:Char { DELIM } + * { '\t' } + }; + + OUT-COL-DELIM = + match OPTION... { + --output-delimiter STRING:<Seq Char> { STRING } + * { IN-COL-DELIM } + }; + + FILE : Path + ~<Seq PathSegment + ~<Seq Char>> + ~<SepSeq Char '/'> + ~<Seq Char> ; + + @FILE : <Seq <* < ITEM~<Seq Char> >... > + ~<Seq <Seq Char>> + ~<SepSeq Char IN-COL-DELIM> + ~<Seq Char>> + ~<SepSeq Char ROW-DELIM> + ~<Seq Char> ; + + >0 : <Seq <* <ITEM~<Seq Char>>... > + ~<Seq <Seq Char>> + ~<SepSeq Char IN-COL-DELIM> + ~<Seq Char>> + ~<SepSeq Char ROW-DELIM> + ~<Seq Char> ; + + # `[LIST]` means `select all fields contained in LIST from parameter pack` + <1 : <Seq <* <ITEM~<Seq Char>>[LIST]... > + ~<Seq <Seq Char>> + ~<SepSeq Char OUT-COL-DELIM> + ~<Seq Char>> + ~<SepSeq Char ROW-DELIM> + ~<Seq Char> ; + } + + * { + <1 : Nothing ; + <2 : MissingFields~ErrorMessage~<Seq Char> ; + } + } + } + } +} diff --git a/typeDB/date b/typeDB/date new file mode 100644 index 0000000..84edf45 --- /dev/null +++ b/typeDB/date @@ -0,0 +1,15 @@ +::date [OPTION]... [+FORMAT] { + match FORMAT { + "+%s" { + <1 : TimePoint + ~<TimeSince UnixEpoch> + ~<Duration Seconds> + ~ℕ + ~<PosInt 10 BigEndian> + ~<Seq <Digit 10>~Char> + } + * { + <1 : TimePoint ; + } + } +} diff --git a/typeDB/dirname b/typeDB/dirname new file mode 100644 index 0000000..c94d46a --- /dev/null +++ b/typeDB/dirname @@ -0,0 +1,56 @@ +::dirname [OPTION] NAME... { + + # dont expect any data on stdin + # + + >0 : None ; + + + # each (non-option) argument must be a path + # + + NAME : Path + ~<Seq PathSegment + ~<Seq Char>> + ~<SepSeq Char '/'> + ~<Seq Char> + ~UTF-8 + ~<Seq Byte> ; + + + matchopt OPTION { + + # help and version flags will bypass any data processing + # + + --help { <1 : Help~<Seq Char> ; } + --version { <1 : VersionInfo~<Seq Char> ; } + + + # in case neither --help nor --version is set + # ... + + * { + + # if zero-flag is set, use '\0' instead of '\n' + # as delimiter in output sequence + + DELIM = + match OPTION { + -z--zero { '\0' } + * { '\n' } + }; + + + # output a sequence of paths to stdout + # + + <1 : <Seq Path + ~<Seq PathSegment~<Seq Char>> + ~<SepSeq Char '/'> + ~<Seq Char>> + ~<SepSeq Char DELIM> + ~<Seq Char> ; + } + } +} diff --git a/typeDB/echo b/typeDB/echo new file mode 100644 index 0000000..6a6b9b6 --- /dev/null +++ b/typeDB/echo @@ -0,0 +1,16 @@ +::echo [OPTIONS]... [STRING]... { + >0 : None ; + + matchopt OPTION { + --help { <1 : Help~<Seq Char> ; } + --version { <1 : VersionInfo~<Seq Char> ; } + -n { + <1 : <* STRING~<Seq Char>... > + ~<Seq Char> ; + } + * { + <1 : <* STRING~<Seq Char>... '\n' > + ~<Seq Char> ; + } + } +} diff --git a/typeDB/seq b/typeDB/seq new file mode 100644 index 0000000..b585a9a --- /dev/null +++ b/typeDB/seq @@ -0,0 +1,25 @@ +::seq [OPTION]... [FIRST [INCREMENT]] LAST { + >0 : None ; + + match OPTION... { + --help { <1 : Help~<Seq Char> ; } + --version { <1 : VersionInfo~<Seq Char> ; } + * { + NUM = match OPTION... { + -f--format FORMAT { ℝ~<Float 10 FORMAT>~<Seq Char> } + * { ℤ~<PosInt 10 BigEndian>~<Seq <Digit 10>~Char> } + }; + + SEP = match OPTION... { + -s--separator SEP { SEP } + * { '\n' } + }; + + $FIRST : NUM ; + $INCREMENT : NUM ; + $LAST : NUM ; + + <1 : <Seq NUM~<Seq Char>>~<SepSeq Char SEP>~<Seq Char> ; + } + } +} diff --git a/typeDB/xargs b/typeDB/xargs new file mode 100644 index 0000000..6380730 --- /dev/null +++ b/typeDB/xargs @@ -0,0 +1,38 @@ +::xargs [OPTION]... [CMD [INITIAL-ARGS]...] { + + DELIM = + match OPTION... { + -0--null { '\0' } + -d--delimiter D:Char { D } + }; + + match OPTION... { + -a--argfile PATH => { + # type of data read from file at `path` + + @PATH : ARGS~<Seq Char>... + ~<Seq <Seq Char>> + ~<SepSeq Char DELIM> + ~<Seq Char> ; + } + * { + # type of data read from stdin + + >0 : ARGS~<Seq Char>... + ~<Seq <Seq Char>> + ~<SepSeq Char DELIM> + ~<Seq Char> ; + } + } + + match OPTION... { + -I REPLACE-STR { + # TODO : how to handle replacement ? + <1 : cmdtype <1 { CMD { INITIAL-ARGS where REPLACE-STR -> ARGS... } } ; + } + * { + <1 : cmdtype <1 { CMD INITIAL-ARGS... ARGS... } ; + } + } + +} From 7988c8a2e1220ffbf806a533e83c591ec3e1f891 Mon Sep 17 00:00:00 2001 From: Michael Sippel <micha@fragmental.art> Date: Sun, 15 Oct 2023 11:47:06 +0200 Subject: [PATCH 2/9] wip parser --- src/ast.rs | 123 +++++++++++++++++++++++++++++++++++++++++++++++ src/env.rs | 9 ++++ src/expand.rs | 44 +++++++++++++++++ src/main.rs | 19 +++++++- src/parse.rs | 130 ++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 324 insertions(+), 1 deletion(-) create mode 100644 src/ast.rs create mode 100644 src/env.rs create mode 100644 src/expand.rs create mode 100644 src/parse.rs diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..5a20bbc --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,123 @@ +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ + +#[derive(Debug)] +pub enum Command { + Simple { + assignments: Vec<(String, Word)>, + command_word: Word, + redirections: Vec<Redirection> + }, + Pipeline(Vec<Command>), + Sequence(Vec<Command>), + ShortCircuitConjection(Vec<Command>), + ShortCircuitDisjunction(Vec<Command>), + Negation(Command), + While { + condition: Command, + loop_body: Command + }, + For { + varname: String, + sequence: Word, + loop_body: Command + } + If { + condition: Command, + then_branch: Command, + else_branch: Command + }, + Case { + expr: Word, + cases: Vec<(Word, Command)> + }, + Function { + name: String, + body: Command + } +} + +/* + * We are all luminous beings. + * Why then, do we not appear before each + * other radiant in our illumination ? + */ + +/* + * Bewteen the idea + * And the reality + * Between the motion + * And the act + * Falls the Shadow + * (T.S. Eliot) + */ + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ + +#[derive(Debug)] +pub struct Word { + pub segments: Vec<WordSegment> +} + +#[derive(Debug)] +pub enum WordSegment { + FieldSeparator, + Tilde(String), + Literal(String), + Parameter(String, ParameterFormat), + Subshell(Command), + DoubleQuote(Word), +} + +#[derive(Debug)] +pub enum ParameterFormat { + Normal, + Length, + Default(Word), + Assign(Word), + Error(Word), + Alt(Word), + Sub(ParamSubSide, ParamSubMode, Word), +} + +#[derive(Debug)] +pub enum ParamSubMode { + Shortest, Longest +} + +#[derive(Debug)] +pub enum ParamSubSide { + Prefix, Suffix +} + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ + +#[derive(Debug)] +pub struct Redirection { + redirection_type: RedirectionType, + fd: u64, + target: Word +} + +#[derive(Debug)] +pub enum RedirectionType { + File(FileRedirectionType), + Dup(DupRedirectionType), + Heredoc // '<<' +} + +#[derive(Debug)] +pub enum FileRedirectionType { + In, // '<' + InOut, // '<>' + Out, // '>' + OutReplace, // '>|' + OutAppend, // '>|' +} + +#[derive(Debug)] +pub enum DupRedirectionType { + In, // '<&' + Out // '>&' +} + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ diff --git a/src/env.rs b/src/env.rs new file mode 100644 index 0000000..091e68d --- /dev/null +++ b/src/env.rs @@ -0,0 +1,9 @@ +use std::collections::HashMap; + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ + +struct Environment { + variables: HashMap<String, String> +} + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ diff --git a/src/expand.rs b/src/expand.rs new file mode 100644 index 0000000..412fc5f --- /dev/null +++ b/src/expand.rs @@ -0,0 +1,44 @@ +use crate::ast::*; + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ + +impl Word { + pub fn (&mut self, env: &Environment) { + for x in self.0.iter_mut() { + x.expand_tilde(); + match x { + Word::Tilde => Word::Literal( env.get_home() ), + other => other, + } + } + } + + pub fn expand(&self) -> Vec<String> { + let mut fields = Vec::new(); + + for seg in self.segments.iter() { + // + } + + fields + } + + pub fn split_field(&mut self) { + + } +} + +impl WordSegment { + pub fn split_field(&self) -> Word { + + match self { + + } + } + + pub fn expand(&self) -> Word { + match + } +} + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ diff --git a/src/main.rs b/src/main.rs index 5533bc1..d6025bd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,11 @@ use { tiny_ansi::TinyAnsi }; +mod ast; +mod env; +mod parse; +//mod expand; + //<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ pub fn get_type_str(cmd: &str, item: &str) -> Option<String> { @@ -31,7 +36,19 @@ fn main() { let mut dict = TypeDict::new(); let stdin = std::io::stdin(); - for pipeline in std::io::BufReader::new(stdin).lines() { + for line in std::io::BufReader::new(stdin).lines() { + if let Ok(line) = line { + let mut lex = parse::WordLexer::from( line.chars() ); + for word in lex { + eprintln!("word-segment: {:?}", word); + } + } + } + + return; + + let stdin = std::io::stdin(); + for pipeline in std::io::BufReader::new(stdin).lines() { let mut last_cmd = String::new(); let mut last_stdout_type : Option<TypeTerm> = None; diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..9a54df4 --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,130 @@ +use { + crate::ast::*, + std::iter::{Peekable, FromIterator}, +}; + +pub struct WordLexer<It> +where It: Iterator<Item = char> { + chars: Peekable<It> +} + +impl<It> From<It> for WordLexer<It> +where It: Iterator<Item = char> { + fn from(iter: It) -> Self { + WordLexer { + chars: iter.into_iter().peekable() + } + } +} + +#[derive(Debug)] +pub enum LexError { + UnexpectedEnd(char) +} + +impl<It> WordLexer<It> +where It: Iterator<Item = char> { + fn collect_until(&mut self, close: char) -> Result<String, LexError> { + let mut val = String::new(); + while let Some(c) = self.chars.peek().cloned() { + if c == close { + return Ok(val) + } else { + self.chars.next(); + val.push(c); + } + } + + if close.is_whitespace() { + Ok(val) + } else { + Err(LexError::UnexpectedEnd(close)) + } + } +} + +impl<It> Iterator for WordLexer<It> +where It: Iterator<Item = char> { + type Item = Result<WordSegment, LexError>; + + fn next(&mut self) -> Option<Result<WordSegment, LexError>> { + match self.chars.peek().cloned() { + Some('~') => { + self.chars.next(); + match self.collect_until(' ') { + Ok(s) => Some(Ok(WordSegment::Tilde(s))), + Err(e) => Some(Err(e)) + } + } + Some('"') => { + self.chars.next(); + match self.collect_until('"') { + Ok(s) => { + self.chars.next(); + + let word = Word { + segments: WordLexer { chars: s.chars().peekable() } + .scan((), |_, x| x.ok()) + .collect::<Vec<_>>() + }; + + Some(Ok(WordSegment::DoubleQuote(word))) + }, + Err(e) => Some(Err(e)) + } + }, + Some('\'') => { + self.chars.next(); + match self.collect_until('\'') { + Ok(s) => { + self.chars.next(); + Some(Ok(WordSegment::Literal(s))) + }, + Err(e) => Some(Err(e)) + } + }, + Some('$') => { + self.chars.next(); + match self.chars.peek() { + Some('{') => { + self.chars.next(); + match self.collect_until('}') { + Ok(s) => { + self.chars.next(); + Some(Ok(WordSegment::Variable(s))) + } + Err(e) => Some(Err(e)) + } + } + _ => { + match self.collect_until(' ') { + Ok(s) => { + Some(Ok(WordSegment::Variable(s))) + } + Err(e) => Some(Err(e)) + } + } + } + } + Some(c) => { + while let Some(c) = self.chars.peek() { + if c.is_whitespace() { + self.chars.next(); + } else { + return match self.collect_until(' ') { + Ok(s) => { + Some(Ok(WordSegment::Literal(s))) + } + Err(e) => Some(Err(e)) + }; + } + } + None + } + None => { + None + } + } + } +} + From 695cbb24f1d27c347fe21bbb98e61d5028551ac8 Mon Sep 17 00:00:00 2001 From: Michael Sippel <micha@fragmental.art> Date: Thu, 26 Oct 2023 20:25:56 +0200 Subject: [PATCH 3/9] basic parser --- src/ast.rs | 68 ++++---- src/main.rs | 8 +- src/parse.rs | 429 ++++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 392 insertions(+), 113 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 5a20bbc..87e7aae 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,30 +1,32 @@ +use std::boxed::Box; + //<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum Command { Simple { - assignments: Vec<(String, Word)>, + assignments: Vec<Assignment>, command_word: Word, redirections: Vec<Redirection> }, Pipeline(Vec<Command>), Sequence(Vec<Command>), - ShortCircuitConjection(Vec<Command>), + ShortCircuitConjunction(Vec<Command>), ShortCircuitDisjunction(Vec<Command>), - Negation(Command), + Negation(Box<Command>), While { - condition: Command, - loop_body: Command + condition: Box<Command>, + loop_body: Box<Command> }, For { varname: String, sequence: Word, - loop_body: Command - } + loop_body: Box<Command> + }, If { - condition: Command, - then_branch: Command, - else_branch: Command + condition: Box<Command>, + then_branch: Box<Command>, + else_branch: Box<Command> }, Case { expr: Word, @@ -32,35 +34,25 @@ pub enum Command { }, Function { name: String, - body: Command + body: Box<Command> } } -/* - * We are all luminous beings. - * Why then, do we not appear before each - * other radiant in our illumination ? - */ - -/* - * Bewteen the idea - * And the reality - * Between the motion - * And the act - * Falls the Shadow - * (T.S. Eliot) - */ - //<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ -#[derive(Debug)] +#[derive(Debug, PartialEq)] +pub struct Assignment { + pub name: String, + pub value: Word +} + +#[derive(Debug, PartialEq)] pub struct Word { pub segments: Vec<WordSegment> } -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum WordSegment { - FieldSeparator, Tilde(String), Literal(String), Parameter(String, ParameterFormat), @@ -68,7 +60,7 @@ pub enum WordSegment { DoubleQuote(Word), } -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum ParameterFormat { Normal, Length, @@ -79,42 +71,42 @@ pub enum ParameterFormat { Sub(ParamSubSide, ParamSubMode, Word), } -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum ParamSubMode { Shortest, Longest } -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum ParamSubSide { Prefix, Suffix } //<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub struct Redirection { redirection_type: RedirectionType, fd: u64, target: Word } -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum RedirectionType { File(FileRedirectionType), Dup(DupRedirectionType), Heredoc // '<<' } -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum FileRedirectionType { In, // '<' InOut, // '<>' Out, // '>' OutReplace, // '>|' - OutAppend, // '>|' + OutAppend, // '>>' } -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum DupRedirectionType { In, // '<&' Out // '>&' diff --git a/src/main.rs b/src/main.rs index d6025bd..3752690 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,5 @@ +#![feature(iterator_try_collect)] + use { laddertypes::*, std::io::BufRead, @@ -38,10 +40,14 @@ fn main() { let stdin = std::io::stdin(); for line in std::io::BufReader::new(stdin).lines() { if let Ok(line) = line { + let cmd = parse::parse_cmd( &mut line.chars().peekable() ); + eprintln!("parsed cmd: {:?}", cmd); + /* let mut lex = parse::WordLexer::from( line.chars() ); for word in lex { eprintln!("word-segment: {:?}", word); - } + } + */ } } diff --git a/src/parse.rs b/src/parse.rs index 9a54df4..af691f9 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -1,105 +1,368 @@ use { crate::ast::*, - std::iter::{Peekable, FromIterator}, + std::iter::{Peekable}, }; -pub struct WordLexer<It> -where It: Iterator<Item = char> { - chars: Peekable<It> + +#[derive(Debug, PartialEq)] +pub enum LexError { + UnexpectedEnd(Vec<Option<char>>), + UnexpectedToken(char), + InvalidFileRedirectionType } -impl<It> From<It> for WordLexer<It> + +///! iterates chars until it finds some char in `delim` +pub struct DelimIter<'a, It> where It: Iterator<Item = char> { - fn from(iter: It) -> Self { - WordLexer { - chars: iter.into_iter().peekable() - } + chars: &'a mut Peekable<It>, + delim: Vec<(Option<char>, bool)> +} + +impl<'a, It> DelimIter<'a, It> +where It: Iterator<Item = char> { + fn new(chars: &'a mut Peekable<It>, delim: Vec<(Option<char>, bool)>) -> Self { + DelimIter { chars, delim } + } + + fn new_whitespace(chars: &'a mut Peekable<It>) -> Self { + DelimIter::new(chars, vec![ + (None, true), + (Some(' '), true), + (Some('\t'), true), + (Some('\n'), true) + ]) + } + + fn new_shell_word(chars: &'a mut Peekable<It>) -> Self { + DelimIter::new(chars, vec![ + (None, true), + (Some(' '), true), + (Some('\t'), true), + (Some('\n'), true), + (Some('|'), false), + (Some('&'), false), + (Some(';'), false), + (Some('\"'), false), + (Some('\''), false) + ]) } } -#[derive(Debug)] -pub enum LexError { - UnexpectedEnd(char) -} +impl<'a, It> Iterator for DelimIter<'a, It> +where It: 'a + Iterator<Item = char> { + type Item = Result<char, LexError>; -impl<It> WordLexer<It> -where It: Iterator<Item = char> { - fn collect_until(&mut self, close: char) -> Result<String, LexError> { - let mut val = String::new(); - while let Some(c) = self.chars.peek().cloned() { - if c == close { - return Ok(val) - } else { - self.chars.next(); - val.push(c); + fn next(&mut self) -> Option<Result<char, LexError>> { + for (delim, consume) in self.delim.iter() { + if self.chars.peek().cloned() == *delim { + if *consume { + self.chars.next(); + } + return None; } } - if close.is_whitespace() { - Ok(val) - } else { - Err(LexError::UnexpectedEnd(close)) + match self.chars.next() { + Some(c) => Some(Ok(c)), + None => Some(Err(LexError::UnexpectedEnd(vec![]))) } } } -impl<It> Iterator for WordLexer<It> + +pub struct WordLexer<'a, It> +where It: 'a + Iterator<Item = char> { + chars: &'a mut Peekable<It> +} + +impl<'a, It> WordLexer<'a, It> where It: Iterator<Item = char> { + fn collect_until(&mut self, close: Option<char>) -> Result<String, LexError> { + DelimIter::new(&mut self.chars, vec![(close, true)]) + .try_collect::<String>() + } +} + +pub fn skip_whitespace<It>(chars: &mut Peekable<It>) +where It: Iterator<Item = char> +{ + while let Some(c) = chars.peek() { + if c.is_whitespace() { + chars.next(); + } else { + break; + } + } +} + +pub fn parse_quoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError> +where It: Iterator<Item = char> +{ + assert_eq!( chars.next(), Some('\'')); + let quoted = DelimIter::new(chars, vec![(Some('\''), true)]).try_collect::<String>(); + match quoted { + Ok(s) => { + Ok(WordSegment::Literal(s)) + }, + Err(e) => Err(e) + } +} + +pub fn parse_doublequoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError> +where It: Iterator<Item = char> +{ + assert_eq!( chars.next(), Some('\"')); + let quoted = DelimIter::new(chars, vec![(Some('\"'), true)]).try_collect::<String>(); + match quoted { + Ok(s) => { + let word = Word { + segments: // fixme: handle spaces correctly -> create QuoteLexer + WordLexer { chars: &mut s.chars().peekable() } + .scan((), |_, x| x.ok()) + .collect::<Vec<_>>() + }; + + Ok(WordSegment::DoubleQuote(word)) + }, + Err(e) => Err(e) + } +} + +pub fn parse_word<It>(chars: &mut Peekable<It>) -> Result<Word, LexError> +where It: Iterator<Item = char> +{ + Ok(Word { + segments: WordLexer{ chars }.try_collect::<Vec<_>>()? + }) +} + +pub fn parse_assignment<It>(chars: &mut Peekable<It>) -> Result<Assignment, LexError> +where It: Iterator<Item = char> +{ + let name = DelimIter::new(chars, vec![(Some('='), true)]).try_collect::<String>()?; + let value_str = DelimIter::new_whitespace(chars).try_collect::<String>()?; + let value = parse_word(&mut value_str.chars().peekable())?; + Ok(Assignment{ name, value }) +} + +impl std::str::FromStr for FileRedirectionType { + type Err = LexError; + + fn from_str(s: &str) -> Result<FileRedirectionType, LexError> { + match s { + "<" => Ok(FileRedirectionType::In), + "<>" => Ok(FileRedirectionType::InOut), + ">" => Ok(FileRedirectionType::Out), + ">|" => Ok(FileRedirectionType::OutReplace), + ">>" => Ok(FileRedirectionType::OutAppend), + _ => Err(LexError::InvalidFileRedirectionType) + } + } +} + +pub fn parse_redirection<It>(chars: &mut Peekable<It>) -> Result<Redirection, LexError> +where It: Iterator<Item = char> +{ + Err(LexError::InvalidFileRedirectionType) + // let name = DelimIterator::new(chars, vec!['<', '>']).collect::<String>(); +} + +pub fn parse_simple_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError> +where It: Iterator<Item = char> +{ + let mut assignments = Vec::new(); + let mut redirections = Vec::new(); + + if chars.peek() == None { + return Ok(None); + } + + let mut first = DelimIter::new_shell_word(chars).try_collect::<String>()?; + + while first.contains('=') { + assignments.push( parse_assignment(chars)? ); + first = DelimIter::new_shell_word(chars).try_collect::<String>()?; + } + + let mut cmd_segments = WordLexer{ chars }.try_collect::<Vec<_>>()?; + cmd_segments.insert(0, WordSegment::Literal(first)); + + Ok(Some(Command::Simple { + assignments, + command_word: Word { segments: cmd_segments }, + redirections, + })) +} + +pub fn parse_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError> +where It: Iterator<Item = char> +{ + skip_whitespace(chars); + match chars.peek() { + Some('!') => { + chars.next(); + if let Some(cmd) = parse_cmd(chars)? { + Ok(Some(Command::Negation(Box::new(cmd)))) + } else { + Err(LexError::UnexpectedEnd(vec![])) + } + } + _ => { + if let Some(head) = parse_simple_cmd(chars)? { + skip_whitespace(chars); + + match chars.peek() { + Some(';') => { + chars.next(); + + let tail = parse_cmd( chars ) ?; + match tail { + Some(Command::Sequence(mut s)) => { + s.insert(0, head); + Ok(Some(Command::Sequence(s))) + } + Some(tail) => { + Ok(Some(Command::Sequence(vec![ head, tail ]))) + } + None => { + Ok(Some(head)) + } + } + } + Some('|') => { + chars.next(); + match chars.peek() { + Some('|') => { + chars.next(); + + let tail = parse_cmd( chars ) ?; + match tail { + Some(Command::ShortCircuitDisjunction(mut s)) => { + s.insert(0, head); + Ok(Some(Command::ShortCircuitDisjunction(s))) + } + Some(tail) => { + Ok(Some(Command::ShortCircuitDisjunction(vec![ head, tail ]))) + } + None => { + Err(LexError::UnexpectedEnd(vec![Some('|')])) + } + } + } + _ => { + let tail = parse_cmd( chars ) ?; + match tail { + Some(Command::Pipeline(mut s)) => { + s.insert(0, head); + Ok(Some(Command::Pipeline(s))) + } + Some(tail) => { + Ok(Some(Command::Pipeline(vec![ head, tail ]))) + } + None => { + Err(LexError::UnexpectedEnd(vec![])) + } + } + } + } + } + Some('&') => { + chars.next(); + match chars.peek() { + Some('&') => { + chars.next(); + + let tail = parse_cmd( chars ) ?; + match tail { + Some(Command::ShortCircuitConjunction(mut s)) => { + s.insert(0, head); + Ok(Some(Command::ShortCircuitConjunction(s))) + } + Some(tail) => { + Ok(Some(Command::ShortCircuitConjunction(vec![ head, tail ]))) + } + None => { + Err(LexError::UnexpectedEnd(vec![Some('&'), Some('&')])) + } + } + } + Some(c) => { + Err(LexError::UnexpectedToken(*c)) + } + None => { + // todo: + // background job + Ok(Some(head)) + } + } + } + Some(c) => { + Err(LexError::UnexpectedToken(*c)) + } + None => { + Ok(Some(head)) + } + } + } else { + Ok(None) + } + } + } +} + +impl<'a, It> Iterator for WordLexer<'a, It> +where It: 'a + Iterator<Item = char> { type Item = Result<WordSegment, LexError>; fn next(&mut self) -> Option<Result<WordSegment, LexError>> { + skip_whitespace(self.chars); match self.chars.peek().cloned() { + Some('|') => { None } + Some('&') => { None } + Some(';') => { None } Some('~') => { self.chars.next(); - match self.collect_until(' ') { - Ok(s) => Some(Ok(WordSegment::Tilde(s))), + let user = DelimIter::new_whitespace(self.chars).collect(); + match user { + Ok(user) => Some(Ok(WordSegment::Tilde(user))), Err(e) => Some(Err(e)) } } - Some('"') => { - self.chars.next(); - match self.collect_until('"') { - Ok(s) => { - self.chars.next(); - - let word = Word { - segments: WordLexer { chars: s.chars().peekable() } - .scan((), |_, x| x.ok()) - .collect::<Vec<_>>() - }; - - Some(Ok(WordSegment::DoubleQuote(word))) - }, - Err(e) => Some(Err(e)) - } - }, - Some('\'') => { - self.chars.next(); - match self.collect_until('\'') { - Ok(s) => { - self.chars.next(); - Some(Ok(WordSegment::Literal(s))) - }, - Err(e) => Some(Err(e)) - } - }, + Some('"') => { Some(parse_doublequoted(self.chars)) }, + Some('\'') => { Some(parse_quoted(self.chars)) }, Some('$') => { self.chars.next(); match self.chars.peek() { Some('{') => { self.chars.next(); - match self.collect_until('}') { + match DelimIter::new(&mut self.chars, vec![(Some('}'), true)]).try_collect::<String>() { Ok(s) => { - self.chars.next(); - Some(Ok(WordSegment::Variable(s))) + Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal))) } Err(e) => Some(Err(e)) } } + Some('(') => { + self.chars.next(); + let subcmd_str = DelimIter::new(&mut self.chars, vec![(Some(')'), true)]).try_collect::<String>(); + match subcmd_str { + Ok(subcmd_str) => { + match parse_cmd(&mut subcmd_str.chars().peekable()) { + Ok(Some(subcmd)) => { + Some(Ok(WordSegment::Subshell(subcmd))) + } + Ok(None) => None, + Err(err) => Some(Err(err)) + } + } + Err(err) => Some(Err(err)) + } + } _ => { - match self.collect_until(' ') { + match DelimIter::new_whitespace(self.chars).collect() { Ok(s) => { - Some(Ok(WordSegment::Variable(s))) + Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal))) } Err(e) => Some(Err(e)) } @@ -107,19 +370,11 @@ where It: Iterator<Item = char> { } } Some(c) => { - while let Some(c) = self.chars.peek() { - if c.is_whitespace() { - self.chars.next(); - } else { - return match self.collect_until(' ') { - Ok(s) => { - Some(Ok(WordSegment::Literal(s))) - } - Err(e) => Some(Err(e)) - }; - } + let s : Result<String, LexError> = DelimIter::new_shell_word(self.chars).collect(); + match s { + Ok(s) => Some(Ok(WordSegment::Literal(s))), + Err(e) => Some(Err(e)) } - None } None => { None @@ -128,3 +383,29 @@ where It: Iterator<Item = char> { } } + +mod test { + use crate::parse::*; + + #[test] + fn test_delim_iter() { + let mut cs = "test 1234".chars().peekable(); + let mut lexer = DelimIter::new_shell_word(&mut cs); + assert_eq!(lexer.try_collect::<String>(), Ok(String::from("test"))); + } + + #[test] + fn test_word_lexer() { + let mut cs = "test 1234|test".chars().peekable(); + + { + let mut lexer = WordLexer{ chars: &mut cs }; + assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test"))))); + assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("1234"))))); + assert_eq!(lexer.next(), None); + } + + assert_eq!(cs.next(), Some('|')); + } +} + From 21aa45d189b556acce8478c62dd107e81a32c14b Mon Sep 17 00:00:00 2001 From: Michael Sippel <micha@fragmental.art> Date: Fri, 27 Oct 2023 02:11:28 +0200 Subject: [PATCH 4/9] parse assignments --- src/parse.rs | 73 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 50 insertions(+), 23 deletions(-) diff --git a/src/parse.rs b/src/parse.rs index af691f9..b4db237 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -47,6 +47,21 @@ where It: Iterator<Item = char> { (Some('\''), false) ]) } + + fn new_shell_word_or_assignment(chars: &'a mut Peekable<It>) -> Self { + DelimIter::new(chars, vec![ + (None, true), + (Some(' '), true), + (Some('\t'), true), + (Some('\n'), true), + (Some('='), false), + (Some('|'), false), + (Some('&'), false), + (Some(';'), false), + (Some('\"'), false), + (Some('\''), false) + ]) + } } impl<'a, It> Iterator for DelimIter<'a, It> @@ -137,15 +152,6 @@ where It: Iterator<Item = char> }) } -pub fn parse_assignment<It>(chars: &mut Peekable<It>) -> Result<Assignment, LexError> -where It: Iterator<Item = char> -{ - let name = DelimIter::new(chars, vec![(Some('='), true)]).try_collect::<String>()?; - let value_str = DelimIter::new_whitespace(chars).try_collect::<String>()?; - let value = parse_word(&mut value_str.chars().peekable())?; - Ok(Assignment{ name, value }) -} - impl std::str::FromStr for FileRedirectionType { type Err = LexError; @@ -178,21 +184,38 @@ where It: Iterator<Item = char> return Ok(None); } - let mut first = DelimIter::new_shell_word(chars).try_collect::<String>()?; + loop { + skip_whitespace(chars); + let mut name = DelimIter::new_shell_word_or_assignment(chars).try_collect::<String>()?; - while first.contains('=') { - assignments.push( parse_assignment(chars)? ); - first = DelimIter::new_shell_word(chars).try_collect::<String>()?; + match chars.peek().clone() { + Some('=') => { + chars.next(); + let mut lex = WordLexer{ chars }; + match lex.next() { + Some(Ok(value)) => { + assignments.push(Assignment { name, value: Word{ segments: vec![ value ] } }); + }, + Some(Err(e)) => { + return Err(e); + }, + None => { + return Err(LexError::UnexpectedEnd(vec![])); + } + } + } + _ => { + let mut cmd_segments = WordLexer{ chars }.try_collect::<Vec<_>>()?; + cmd_segments.insert(0, WordSegment::Literal(name)); + + return Ok(Some(Command::Simple { + assignments, + command_word: Word { segments: cmd_segments }, + redirections, + })); + } + } } - - let mut cmd_segments = WordLexer{ chars }.try_collect::<Vec<_>>()?; - cmd_segments.insert(0, WordSegment::Literal(first)); - - Ok(Some(Command::Simple { - assignments, - command_word: Word { segments: cmd_segments }, - redirections, - })) } pub fn parse_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError> @@ -404,8 +427,12 @@ mod test { assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("1234"))))); assert_eq!(lexer.next(), None); } - assert_eq!(cs.next(), Some('|')); + { + let mut lexer = WordLexer{ chars: &mut cs }; + assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test"))))); + assert_eq!(lexer.next(), None); + } } } From 63e06a247211731f3d0d83b2f0a4c16947d08f2f Mon Sep 17 00:00:00 2001 From: Michael Sippel <micha@fragmental.art> Date: Sun, 29 Oct 2023 14:53:16 +0100 Subject: [PATCH 5/9] wip cmd-type-dsl --- src/cmd_type_dsl/annotation.rs | 68 +++++++++++++++++++++ src/cmd_type_dsl/mod.rs | 107 +++++++++++++++++++++++++++++++++ src/main.rs | 2 + 3 files changed, 177 insertions(+) create mode 100644 src/cmd_type_dsl/annotation.rs create mode 100644 src/cmd_type_dsl/mod.rs diff --git a/src/cmd_type_dsl/annotation.rs b/src/cmd_type_dsl/annotation.rs new file mode 100644 index 0000000..24c6467 --- /dev/null +++ b/src/cmd_type_dsl/annotation.rs @@ -0,0 +1,68 @@ + + + pub enum AnnotationContext { + Cached( Vec<(CommandPattern, CommandTypeStatement)> ), + Load( String ), + FindIn( String ), + } + impl AnnotationContext { + /* loads & parses any given context + */ + /* + pub fn into_cached(self) -> AnnotationContext { + match self { + AnnotationContext::Load( path ) => { + + } + } + } +*/ + pub fn get_type(&self, cmd: &Command) -> Result<CommandType, UnificationError> { + match cmd { + Command::Simple{ assignments, command_word, redirections } => { + match self { + AnnotationContext::Cached( annotations ) => { + // find matching command pattern... + for (cmd_pat, typ) in annotations.iter() { + if let Ok(unificator) = cmd_pat.match_cmd(cmd) { + return Ok( typ.substitute(unificator).eval() ); + } + } + + Err(UnificationError::NoPattern) + }, + + AnnotationContext::Load( path ) => { + /* todo: + * - open file at `path` + * - parse CommandPattern + CommandTypeStatement + * - get_type on AnnotationContext::Cached() + */ + + } + AnnotationContext::FindIn( path ) => { + // if let Some(command_name) = command_word.segments.get(0) { + /* todo: + * - use command_name to lookup file + * - forward to AnnotationContext::Load() + */ +/* + let mut err = UnificationError( vec![] ); + for file in path.direntries { + if let Ok(typ) = AnnotationContext::Load( path ).get_type() => { + + } + } +*/ + // } + } + } + } + + _ => { + Err(UnificationError::NoPattern) + } + } + } + } + diff --git a/src/cmd_type_dsl/mod.rs b/src/cmd_type_dsl/mod.rs new file mode 100644 index 0000000..4d7ec13 --- /dev/null +++ b/src/cmd_type_dsl/mod.rs @@ -0,0 +1,107 @@ +use std::{ + collections::HashMap, + boxed::Box +}; + +use crate::ast::Command; +use laddertypes::*; + + +pub struct Substitution(HashMap< String, CommandTypeExpr >); +impl Substitution { + pub fn apply(&self, expr: &mut CommandTypeExpr) { + + } +} + +pub enum CommandArgPattern { + Literal(String), + Variable(String), + VariablePack(Box<CommandArgPattern>), + Optional(Box<CommandArgPattern>), + Conjunction(Vec<CommandArgPattern>), + Disjunction(Vec<CommandArgPattern>) +} + +pub struct CommandPattern { + name: String, + args: Vec<CommandArgPattern>, + env: Vec<(String, CommandTypeExpr)>, +} + +impl CommandArgPattern { + pub fn match_cmd(&self, cmd: &Command) -> Result<Substitution, UnificationError> { + Err(UnificationError(vec![])) + } +} + +pub struct MatchCandidate { + at: usize, + expected: CommandPattern, + found: CommandTypeExpr, +} + +pub struct UnificationError( Vec<MatchCandidate> ); + + + +pub enum CommandTypeExpr { + Parameter(String), + ParameterPack(String), + Char(char), + Match(Box<CommandTypeExpr>, Vec<(CommandArgPattern, CommandTypeExpr)>) +} + +impl CommandTypeExpr { + pub fn eval(self) -> CommandTypeExpr { + match self { + s=>s + } + } +} + +pub struct FileDescriptor(u32); +pub enum PipeDirection { In, Out } + +pub enum Selector { + Pipe(FileDescriptor, PipeDirection), + Parameter(String), + ParameterPack(String), + File(String) +} + +pub enum CommandTypeStatement { + TypAssign(Selector, TypeTerm), + ValAssign(String, CommandTypeExpr), + Block(Vec<CommandTypeStatement>), + Match(Box<CommandTypeExpr>, Vec<(CommandArgPattern, CommandTypeStatement)>) +} + +pub struct CommandType(Vec<(Selector, TypeTerm)>); + +impl CommandTypeStatement { + pub fn eval(self) -> CommandType { + match self { + CommandTypeStatement::Block(stmnts) => { + CommandType( stmnts.into_iter().map(|stmnt| stmnt.eval().0.into_iter()).flatten().collect() ) + } + CommandTypeStatement::TypAssign(selector, typ) => { + CommandType( vec![ (selector, typ) ]) + } + CommandTypeStatement::ValAssign(variable, expr) => { + CommandType(vec![]) + } + CommandTypeStatement::Match(pattern, cases) => { + /* + for (case,stmnt) in cases.into_iter() { + if let Ok(unificator) = pattern + if let Ok() = case.match_expr() + CommandType( vec![] ) + } + */ + CommandType(vec![]) + } + } + } +} + diff --git a/src/main.rs b/src/main.rs index 3752690..f81dca8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,6 +11,8 @@ mod env; mod parse; //mod expand; +mod cmd_type_dsl; + //<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ pub fn get_type_str(cmd: &str, item: &str) -> Option<String> { From 1d860d1811bf172a0f036f5d4e2d4e00ccaddfe3 Mon Sep 17 00:00:00 2001 From: Michael Sippel <micha@fragmental.art> Date: Mon, 30 Oct 2023 15:30:42 +0100 Subject: [PATCH 6/9] wip cmd-type-dsl --- src/ast.rs | 115 ----------- src/cmd_type_dsl/mod.rs | 66 +++--- src/env.rs | 9 - src/expand.rs | 44 ---- src/main.rs | 8 +- src/parse.rs | 438 ---------------------------------------- typeDB/cut | 14 +- typeDB/seq | 2 +- 8 files changed, 50 insertions(+), 646 deletions(-) delete mode 100644 src/ast.rs delete mode 100644 src/env.rs delete mode 100644 src/expand.rs delete mode 100644 src/parse.rs diff --git a/src/ast.rs b/src/ast.rs deleted file mode 100644 index 87e7aae..0000000 --- a/src/ast.rs +++ /dev/null @@ -1,115 +0,0 @@ -use std::boxed::Box; - -//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ - -#[derive(Debug, PartialEq)] -pub enum Command { - Simple { - assignments: Vec<Assignment>, - command_word: Word, - redirections: Vec<Redirection> - }, - Pipeline(Vec<Command>), - Sequence(Vec<Command>), - ShortCircuitConjunction(Vec<Command>), - ShortCircuitDisjunction(Vec<Command>), - Negation(Box<Command>), - While { - condition: Box<Command>, - loop_body: Box<Command> - }, - For { - varname: String, - sequence: Word, - loop_body: Box<Command> - }, - If { - condition: Box<Command>, - then_branch: Box<Command>, - else_branch: Box<Command> - }, - Case { - expr: Word, - cases: Vec<(Word, Command)> - }, - Function { - name: String, - body: Box<Command> - } -} - -//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ - -#[derive(Debug, PartialEq)] -pub struct Assignment { - pub name: String, - pub value: Word -} - -#[derive(Debug, PartialEq)] -pub struct Word { - pub segments: Vec<WordSegment> -} - -#[derive(Debug, PartialEq)] -pub enum WordSegment { - Tilde(String), - Literal(String), - Parameter(String, ParameterFormat), - Subshell(Command), - DoubleQuote(Word), -} - -#[derive(Debug, PartialEq)] -pub enum ParameterFormat { - Normal, - Length, - Default(Word), - Assign(Word), - Error(Word), - Alt(Word), - Sub(ParamSubSide, ParamSubMode, Word), -} - -#[derive(Debug, PartialEq)] -pub enum ParamSubMode { - Shortest, Longest -} - -#[derive(Debug, PartialEq)] -pub enum ParamSubSide { - Prefix, Suffix -} - -//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ - -#[derive(Debug, PartialEq)] -pub struct Redirection { - redirection_type: RedirectionType, - fd: u64, - target: Word -} - -#[derive(Debug, PartialEq)] -pub enum RedirectionType { - File(FileRedirectionType), - Dup(DupRedirectionType), - Heredoc // '<<' -} - -#[derive(Debug, PartialEq)] -pub enum FileRedirectionType { - In, // '<' - InOut, // '<>' - Out, // '>' - OutReplace, // '>|' - OutAppend, // '>>' -} - -#[derive(Debug, PartialEq)] -pub enum DupRedirectionType { - In, // '<&' - Out // '>&' -} - -//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ diff --git a/src/cmd_type_dsl/mod.rs b/src/cmd_type_dsl/mod.rs index 4d7ec13..1b55ab2 100644 --- a/src/cmd_type_dsl/mod.rs +++ b/src/cmd_type_dsl/mod.rs @@ -3,17 +3,12 @@ use std::{ boxed::Box }; -use crate::ast::Command; +use crate::sh::ast::Command; use laddertypes::*; - pub struct Substitution(HashMap< String, CommandTypeExpr >); -impl Substitution { - pub fn apply(&self, expr: &mut CommandTypeExpr) { - - } -} +#[derive(Clone)] pub enum CommandArgPattern { Literal(String), Variable(String), @@ -23,39 +18,58 @@ pub enum CommandArgPattern { Disjunction(Vec<CommandArgPattern>) } +#[derive(Clone)] pub struct CommandPattern { name: String, args: Vec<CommandArgPattern>, env: Vec<(String, CommandTypeExpr)>, } +#[derive(Clone)] +pub struct MatchCandidate { + at: usize, + expected: CommandPattern, + found: CommandTypeExpr, +} + +#[derive(Clone)] +pub struct UnificationError( Vec<MatchCandidate> ); + +#[derive(Clone)] +pub enum CommandTypeExpr { + Type(TypeTerm), + Match(Box<CommandTypeExpr>, Vec<(CommandArgPattern, CommandTypeExpr)>) +} + impl CommandArgPattern { pub fn match_cmd(&self, cmd: &Command) -> Result<Substitution, UnificationError> { Err(UnificationError(vec![])) } } -pub struct MatchCandidate { - at: usize, - expected: CommandPattern, - found: CommandTypeExpr, -} - -pub struct UnificationError( Vec<MatchCandidate> ); - - - -pub enum CommandTypeExpr { - Parameter(String), - ParameterPack(String), - Char(char), - Match(Box<CommandTypeExpr>, Vec<(CommandArgPattern, CommandTypeExpr)>) -} - impl CommandTypeExpr { - pub fn eval(self) -> CommandTypeExpr { + pub fn eval(self) -> Result<TypeTerm, CommandTypeExpr> { match self { - s=>s + CommandTypeExpr::Type(typ) => Ok(typ), + CommandTypeExpr::Match(pattern, cases) => { + + } + s=> Ok(s) + } + } + + pub fn apply_subst(&mut self, subst: &Substitution) { + match self { + CommandTypeExpr::Type(typ) => { + self = CommandTypeExpr::Type( + typ.apply_substitution(|v: String| subst.get(v)) + ); + } + CommandTypeExpr::Match( pattern, cases ) => { + + // todo + } + _ => {} } } } diff --git a/src/env.rs b/src/env.rs deleted file mode 100644 index 091e68d..0000000 --- a/src/env.rs +++ /dev/null @@ -1,9 +0,0 @@ -use std::collections::HashMap; - -//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ - -struct Environment { - variables: HashMap<String, String> -} - -//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ diff --git a/src/expand.rs b/src/expand.rs deleted file mode 100644 index 412fc5f..0000000 --- a/src/expand.rs +++ /dev/null @@ -1,44 +0,0 @@ -use crate::ast::*; - -//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ - -impl Word { - pub fn (&mut self, env: &Environment) { - for x in self.0.iter_mut() { - x.expand_tilde(); - match x { - Word::Tilde => Word::Literal( env.get_home() ), - other => other, - } - } - } - - pub fn expand(&self) -> Vec<String> { - let mut fields = Vec::new(); - - for seg in self.segments.iter() { - // - } - - fields - } - - pub fn split_field(&mut self) { - - } -} - -impl WordSegment { - pub fn split_field(&self) -> Word { - - match self { - - } - } - - pub fn expand(&self) -> Word { - match - } -} - -//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ diff --git a/src/main.rs b/src/main.rs index f81dca8..02e6b77 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,11 +6,7 @@ use { tiny_ansi::TinyAnsi }; -mod ast; -mod env; -mod parse; -//mod expand; - +mod sh; mod cmd_type_dsl; //<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ @@ -42,7 +38,7 @@ fn main() { let stdin = std::io::stdin(); for line in std::io::BufReader::new(stdin).lines() { if let Ok(line) = line { - let cmd = parse::parse_cmd( &mut line.chars().peekable() ); + let cmd = sh::parse::parse_cmd( &mut line.chars().peekable() ); eprintln!("parsed cmd: {:?}", cmd); /* let mut lex = parse::WordLexer::from( line.chars() ); diff --git a/src/parse.rs b/src/parse.rs deleted file mode 100644 index b4db237..0000000 --- a/src/parse.rs +++ /dev/null @@ -1,438 +0,0 @@ -use { - crate::ast::*, - std::iter::{Peekable}, -}; - - -#[derive(Debug, PartialEq)] -pub enum LexError { - UnexpectedEnd(Vec<Option<char>>), - UnexpectedToken(char), - InvalidFileRedirectionType -} - - -///! iterates chars until it finds some char in `delim` -pub struct DelimIter<'a, It> -where It: Iterator<Item = char> { - chars: &'a mut Peekable<It>, - delim: Vec<(Option<char>, bool)> -} - -impl<'a, It> DelimIter<'a, It> -where It: Iterator<Item = char> { - fn new(chars: &'a mut Peekable<It>, delim: Vec<(Option<char>, bool)>) -> Self { - DelimIter { chars, delim } - } - - fn new_whitespace(chars: &'a mut Peekable<It>) -> Self { - DelimIter::new(chars, vec![ - (None, true), - (Some(' '), true), - (Some('\t'), true), - (Some('\n'), true) - ]) - } - - fn new_shell_word(chars: &'a mut Peekable<It>) -> Self { - DelimIter::new(chars, vec![ - (None, true), - (Some(' '), true), - (Some('\t'), true), - (Some('\n'), true), - (Some('|'), false), - (Some('&'), false), - (Some(';'), false), - (Some('\"'), false), - (Some('\''), false) - ]) - } - - fn new_shell_word_or_assignment(chars: &'a mut Peekable<It>) -> Self { - DelimIter::new(chars, vec![ - (None, true), - (Some(' '), true), - (Some('\t'), true), - (Some('\n'), true), - (Some('='), false), - (Some('|'), false), - (Some('&'), false), - (Some(';'), false), - (Some('\"'), false), - (Some('\''), false) - ]) - } -} - -impl<'a, It> Iterator for DelimIter<'a, It> -where It: 'a + Iterator<Item = char> { - type Item = Result<char, LexError>; - - fn next(&mut self) -> Option<Result<char, LexError>> { - for (delim, consume) in self.delim.iter() { - if self.chars.peek().cloned() == *delim { - if *consume { - self.chars.next(); - } - return None; - } - } - - match self.chars.next() { - Some(c) => Some(Ok(c)), - None => Some(Err(LexError::UnexpectedEnd(vec![]))) - } - } -} - - -pub struct WordLexer<'a, It> -where It: 'a + Iterator<Item = char> { - chars: &'a mut Peekable<It> -} - -impl<'a, It> WordLexer<'a, It> -where It: Iterator<Item = char> { - fn collect_until(&mut self, close: Option<char>) -> Result<String, LexError> { - DelimIter::new(&mut self.chars, vec![(close, true)]) - .try_collect::<String>() - } -} - -pub fn skip_whitespace<It>(chars: &mut Peekable<It>) -where It: Iterator<Item = char> -{ - while let Some(c) = chars.peek() { - if c.is_whitespace() { - chars.next(); - } else { - break; - } - } -} - -pub fn parse_quoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError> -where It: Iterator<Item = char> -{ - assert_eq!( chars.next(), Some('\'')); - let quoted = DelimIter::new(chars, vec![(Some('\''), true)]).try_collect::<String>(); - match quoted { - Ok(s) => { - Ok(WordSegment::Literal(s)) - }, - Err(e) => Err(e) - } -} - -pub fn parse_doublequoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError> -where It: Iterator<Item = char> -{ - assert_eq!( chars.next(), Some('\"')); - let quoted = DelimIter::new(chars, vec![(Some('\"'), true)]).try_collect::<String>(); - match quoted { - Ok(s) => { - let word = Word { - segments: // fixme: handle spaces correctly -> create QuoteLexer - WordLexer { chars: &mut s.chars().peekable() } - .scan((), |_, x| x.ok()) - .collect::<Vec<_>>() - }; - - Ok(WordSegment::DoubleQuote(word)) - }, - Err(e) => Err(e) - } -} - -pub fn parse_word<It>(chars: &mut Peekable<It>) -> Result<Word, LexError> -where It: Iterator<Item = char> -{ - Ok(Word { - segments: WordLexer{ chars }.try_collect::<Vec<_>>()? - }) -} - -impl std::str::FromStr for FileRedirectionType { - type Err = LexError; - - fn from_str(s: &str) -> Result<FileRedirectionType, LexError> { - match s { - "<" => Ok(FileRedirectionType::In), - "<>" => Ok(FileRedirectionType::InOut), - ">" => Ok(FileRedirectionType::Out), - ">|" => Ok(FileRedirectionType::OutReplace), - ">>" => Ok(FileRedirectionType::OutAppend), - _ => Err(LexError::InvalidFileRedirectionType) - } - } -} - -pub fn parse_redirection<It>(chars: &mut Peekable<It>) -> Result<Redirection, LexError> -where It: Iterator<Item = char> -{ - Err(LexError::InvalidFileRedirectionType) - // let name = DelimIterator::new(chars, vec!['<', '>']).collect::<String>(); -} - -pub fn parse_simple_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError> -where It: Iterator<Item = char> -{ - let mut assignments = Vec::new(); - let mut redirections = Vec::new(); - - if chars.peek() == None { - return Ok(None); - } - - loop { - skip_whitespace(chars); - let mut name = DelimIter::new_shell_word_or_assignment(chars).try_collect::<String>()?; - - match chars.peek().clone() { - Some('=') => { - chars.next(); - let mut lex = WordLexer{ chars }; - match lex.next() { - Some(Ok(value)) => { - assignments.push(Assignment { name, value: Word{ segments: vec![ value ] } }); - }, - Some(Err(e)) => { - return Err(e); - }, - None => { - return Err(LexError::UnexpectedEnd(vec![])); - } - } - } - _ => { - let mut cmd_segments = WordLexer{ chars }.try_collect::<Vec<_>>()?; - cmd_segments.insert(0, WordSegment::Literal(name)); - - return Ok(Some(Command::Simple { - assignments, - command_word: Word { segments: cmd_segments }, - redirections, - })); - } - } - } -} - -pub fn parse_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError> -where It: Iterator<Item = char> -{ - skip_whitespace(chars); - match chars.peek() { - Some('!') => { - chars.next(); - if let Some(cmd) = parse_cmd(chars)? { - Ok(Some(Command::Negation(Box::new(cmd)))) - } else { - Err(LexError::UnexpectedEnd(vec![])) - } - } - _ => { - if let Some(head) = parse_simple_cmd(chars)? { - skip_whitespace(chars); - - match chars.peek() { - Some(';') => { - chars.next(); - - let tail = parse_cmd( chars ) ?; - match tail { - Some(Command::Sequence(mut s)) => { - s.insert(0, head); - Ok(Some(Command::Sequence(s))) - } - Some(tail) => { - Ok(Some(Command::Sequence(vec![ head, tail ]))) - } - None => { - Ok(Some(head)) - } - } - } - Some('|') => { - chars.next(); - match chars.peek() { - Some('|') => { - chars.next(); - - let tail = parse_cmd( chars ) ?; - match tail { - Some(Command::ShortCircuitDisjunction(mut s)) => { - s.insert(0, head); - Ok(Some(Command::ShortCircuitDisjunction(s))) - } - Some(tail) => { - Ok(Some(Command::ShortCircuitDisjunction(vec![ head, tail ]))) - } - None => { - Err(LexError::UnexpectedEnd(vec![Some('|')])) - } - } - } - _ => { - let tail = parse_cmd( chars ) ?; - match tail { - Some(Command::Pipeline(mut s)) => { - s.insert(0, head); - Ok(Some(Command::Pipeline(s))) - } - Some(tail) => { - Ok(Some(Command::Pipeline(vec![ head, tail ]))) - } - None => { - Err(LexError::UnexpectedEnd(vec![])) - } - } - } - } - } - Some('&') => { - chars.next(); - match chars.peek() { - Some('&') => { - chars.next(); - - let tail = parse_cmd( chars ) ?; - match tail { - Some(Command::ShortCircuitConjunction(mut s)) => { - s.insert(0, head); - Ok(Some(Command::ShortCircuitConjunction(s))) - } - Some(tail) => { - Ok(Some(Command::ShortCircuitConjunction(vec![ head, tail ]))) - } - None => { - Err(LexError::UnexpectedEnd(vec![Some('&'), Some('&')])) - } - } - } - Some(c) => { - Err(LexError::UnexpectedToken(*c)) - } - None => { - // todo: - // background job - Ok(Some(head)) - } - } - } - Some(c) => { - Err(LexError::UnexpectedToken(*c)) - } - None => { - Ok(Some(head)) - } - } - } else { - Ok(None) - } - } - } -} - -impl<'a, It> Iterator for WordLexer<'a, It> -where It: 'a + Iterator<Item = char> { - type Item = Result<WordSegment, LexError>; - - fn next(&mut self) -> Option<Result<WordSegment, LexError>> { - skip_whitespace(self.chars); - match self.chars.peek().cloned() { - Some('|') => { None } - Some('&') => { None } - Some(';') => { None } - Some('~') => { - self.chars.next(); - let user = DelimIter::new_whitespace(self.chars).collect(); - match user { - Ok(user) => Some(Ok(WordSegment::Tilde(user))), - Err(e) => Some(Err(e)) - } - } - Some('"') => { Some(parse_doublequoted(self.chars)) }, - Some('\'') => { Some(parse_quoted(self.chars)) }, - Some('$') => { - self.chars.next(); - match self.chars.peek() { - Some('{') => { - self.chars.next(); - match DelimIter::new(&mut self.chars, vec![(Some('}'), true)]).try_collect::<String>() { - Ok(s) => { - Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal))) - } - Err(e) => Some(Err(e)) - } - } - Some('(') => { - self.chars.next(); - let subcmd_str = DelimIter::new(&mut self.chars, vec![(Some(')'), true)]).try_collect::<String>(); - match subcmd_str { - Ok(subcmd_str) => { - match parse_cmd(&mut subcmd_str.chars().peekable()) { - Ok(Some(subcmd)) => { - Some(Ok(WordSegment::Subshell(subcmd))) - } - Ok(None) => None, - Err(err) => Some(Err(err)) - } - } - Err(err) => Some(Err(err)) - } - } - _ => { - match DelimIter::new_whitespace(self.chars).collect() { - Ok(s) => { - Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal))) - } - Err(e) => Some(Err(e)) - } - } - } - } - Some(c) => { - let s : Result<String, LexError> = DelimIter::new_shell_word(self.chars).collect(); - match s { - Ok(s) => Some(Ok(WordSegment::Literal(s))), - Err(e) => Some(Err(e)) - } - } - None => { - None - } - } - } -} - - -mod test { - use crate::parse::*; - - #[test] - fn test_delim_iter() { - let mut cs = "test 1234".chars().peekable(); - let mut lexer = DelimIter::new_shell_word(&mut cs); - assert_eq!(lexer.try_collect::<String>(), Ok(String::from("test"))); - } - - #[test] - fn test_word_lexer() { - let mut cs = "test 1234|test".chars().peekable(); - - { - let mut lexer = WordLexer{ chars: &mut cs }; - assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test"))))); - assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("1234"))))); - assert_eq!(lexer.next(), None); - } - assert_eq!(cs.next(), Some('|')); - { - let mut lexer = WordLexer{ chars: &mut cs }; - assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test"))))); - assert_eq!(lexer.next(), None); - } - } -} - diff --git a/typeDB/cut b/typeDB/cut index e860067..e9443b2 100644 --- a/typeDB/cut +++ b/typeDB/cut @@ -2,17 +2,17 @@ match OPTION... { --help { <1 : Help~<Seq Char> ; } - --version { <1 : VersionInfo~<Seq Char> ; } + --version { <1 : VersionInfo~<Seq Char> ; } * { - match OPTION... { - -f--fields LIST:<Seq ℕ>~CutFieldList~<Seq Char> { + match OPTION... { + -f--fields LIST:<Seq ℕ>~CutFieldList~<Seq Char> { ROW-DELIM = match OPTION... { -z--zero-terminated { '\0' } * { '\n' } - }; + }; IN-COL-DELIM = match OPTION... { @@ -22,9 +22,9 @@ OUT-COL-DELIM = match OPTION... { - --output-delimiter STRING:<Seq Char> { STRING } - * { IN-COL-DELIM } - }; + --output-delimiter STRING:<Seq Char> { STRING } + * { IN-COL-DELIM } + }; FILE : Path ~<Seq PathSegment diff --git a/typeDB/seq b/typeDB/seq index b585a9a..1119ab5 100644 --- a/typeDB/seq +++ b/typeDB/seq @@ -19,7 +19,7 @@ $INCREMENT : NUM ; $LAST : NUM ; - <1 : <Seq NUM~<Seq Char>>~<SepSeq Char SEP>~<Seq Char> ; + <1 : <Seq NUM~<Seq Char>>~<SepSeq Char SEP>~<Seq Char> ; } } } From e0c3acab63a771645282b667b2f161f71e393da8 Mon Sep 17 00:00:00 2001 From: Michael Sippel <micha@fragmental.art> Date: Mon, 30 Oct 2023 23:23:45 +0100 Subject: [PATCH 7/9] parser: correct handling of spaces in double quotes --- src/sh/ast.rs | 115 +++++++++++ src/sh/mod.rs | 7 + src/sh/mod.rs~ | 3 + src/sh/parse.rs | 484 +++++++++++++++++++++++++++++++++++++++++++++++ src/sh/parse.rs~ | 438 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 1047 insertions(+) create mode 100644 src/sh/ast.rs create mode 100644 src/sh/mod.rs create mode 100644 src/sh/mod.rs~ create mode 100644 src/sh/parse.rs create mode 100644 src/sh/parse.rs~ diff --git a/src/sh/ast.rs b/src/sh/ast.rs new file mode 100644 index 0000000..87e7aae --- /dev/null +++ b/src/sh/ast.rs @@ -0,0 +1,115 @@ +use std::boxed::Box; + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ + +#[derive(Debug, PartialEq)] +pub enum Command { + Simple { + assignments: Vec<Assignment>, + command_word: Word, + redirections: Vec<Redirection> + }, + Pipeline(Vec<Command>), + Sequence(Vec<Command>), + ShortCircuitConjunction(Vec<Command>), + ShortCircuitDisjunction(Vec<Command>), + Negation(Box<Command>), + While { + condition: Box<Command>, + loop_body: Box<Command> + }, + For { + varname: String, + sequence: Word, + loop_body: Box<Command> + }, + If { + condition: Box<Command>, + then_branch: Box<Command>, + else_branch: Box<Command> + }, + Case { + expr: Word, + cases: Vec<(Word, Command)> + }, + Function { + name: String, + body: Box<Command> + } +} + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ + +#[derive(Debug, PartialEq)] +pub struct Assignment { + pub name: String, + pub value: Word +} + +#[derive(Debug, PartialEq)] +pub struct Word { + pub segments: Vec<WordSegment> +} + +#[derive(Debug, PartialEq)] +pub enum WordSegment { + Tilde(String), + Literal(String), + Parameter(String, ParameterFormat), + Subshell(Command), + DoubleQuote(Word), +} + +#[derive(Debug, PartialEq)] +pub enum ParameterFormat { + Normal, + Length, + Default(Word), + Assign(Word), + Error(Word), + Alt(Word), + Sub(ParamSubSide, ParamSubMode, Word), +} + +#[derive(Debug, PartialEq)] +pub enum ParamSubMode { + Shortest, Longest +} + +#[derive(Debug, PartialEq)] +pub enum ParamSubSide { + Prefix, Suffix +} + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ + +#[derive(Debug, PartialEq)] +pub struct Redirection { + redirection_type: RedirectionType, + fd: u64, + target: Word +} + +#[derive(Debug, PartialEq)] +pub enum RedirectionType { + File(FileRedirectionType), + Dup(DupRedirectionType), + Heredoc // '<<' +} + +#[derive(Debug, PartialEq)] +pub enum FileRedirectionType { + In, // '<' + InOut, // '<>' + Out, // '>' + OutReplace, // '>|' + OutAppend, // '>>' +} + +#[derive(Debug, PartialEq)] +pub enum DupRedirectionType { + In, // '<&' + Out // '>&' +} + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ diff --git a/src/sh/mod.rs b/src/sh/mod.rs new file mode 100644 index 0000000..0c08c79 --- /dev/null +++ b/src/sh/mod.rs @@ -0,0 +1,7 @@ + +pub mod ast; +pub mod parse; + + +pub use ast::*; +pub use parse::*; diff --git a/src/sh/mod.rs~ b/src/sh/mod.rs~ new file mode 100644 index 0000000..cf2b51c --- /dev/null +++ b/src/sh/mod.rs~ @@ -0,0 +1,3 @@ + +pub mod ast; +pub mod parse; diff --git a/src/sh/parse.rs b/src/sh/parse.rs new file mode 100644 index 0000000..6b5691a --- /dev/null +++ b/src/sh/parse.rs @@ -0,0 +1,484 @@ +use { + crate::sh::ast::*, + std::iter::{Peekable}, +}; + + +#[derive(Debug, PartialEq)] +pub enum LexError { + UnexpectedEnd(Vec<Option<char>>), + UnexpectedToken(char), + InvalidFileRedirectionType +} + + +///! iterates chars until it finds some char in `delim` +pub struct DelimIter<'a, It> +where It: Iterator<Item = char> { + chars: &'a mut Peekable<It>, + delim: Vec<(Option<char>, bool)> +} + +impl<'a, It> DelimIter<'a, It> +where It: Iterator<Item = char> { + fn new(chars: &'a mut Peekable<It>, delim: Vec<(Option<char>, bool)>) -> Self { + DelimIter { chars, delim } + } + + fn new_whitespace(chars: &'a mut Peekable<It>) -> Self { + DelimIter::new(chars, vec![ + (None, true), + (Some(' '), true), + (Some('\t'), true), + (Some('\n'), true) + ]) + } + + fn new_shell_word(chars: &'a mut Peekable<It>) -> Self { + DelimIter::new(chars, vec![ + (None, true), + (Some(' '), true), + (Some('\t'), true), + (Some('\n'), true), + (Some('|'), false), + (Some('&'), false), + (Some(';'), false), + (Some('\"'), false), + (Some('\''), false) + ]) + } + + fn new_shell_word_or_assignment(chars: &'a mut Peekable<It>) -> Self { + DelimIter::new(chars, vec![ + (None, true), + (Some(' '), true), + (Some('\t'), true), + (Some('\n'), true), + (Some('='), false), + (Some('|'), false), + (Some('&'), false), + (Some(';'), false), + (Some('\"'), false), + (Some('\''), false) + ]) + } +} + +impl<'a, It> Iterator for DelimIter<'a, It> +where It: 'a + Iterator<Item = char> { + type Item = Result<char, LexError>; + + fn next(&mut self) -> Option<Result<char, LexError>> { + for (delim, consume) in self.delim.iter() { + if self.chars.peek().cloned() == *delim { + if *consume { + self.chars.next(); + } + return None; + } + } + + match self.chars.next() { + Some(c) => Some(Ok(c)), + None => Some(Err(LexError::UnexpectedEnd(vec![]))) + } + } +} + + +pub struct WordLexer<'a, It> +where It: 'a + Iterator<Item = char> { + chars: &'a mut Peekable<It> +} + +impl<'a, It> WordLexer<'a, It> +where It: Iterator<Item = char> { + fn collect_until(&mut self, close: Option<char>) -> Result<String, LexError> { + DelimIter::new(&mut self.chars, vec![(close, true)]) + .try_collect::<String>() + } +} + +pub struct SubstLexer<'a, It> +where It: 'a + Iterator<Item = char> { + chars: &'a mut Peekable<It> +} + +pub fn skip_whitespace<It>(chars: &mut Peekable<It>) +where It: Iterator<Item = char> +{ + while let Some(c) = chars.peek() { + if c.is_whitespace() { + chars.next(); + } else { + break; + } + } +} + +pub fn parse_quoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError> +where It: Iterator<Item = char> +{ + assert_eq!( chars.next(), Some('\'')); + let quoted = DelimIter::new(chars, vec![(Some('\''), true)]).try_collect::<String>(); + match quoted { + Ok(s) => { + Ok(WordSegment::Literal(s)) + }, + Err(e) => Err(e) + } +} + +pub fn parse_doublequoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError> +where It: Iterator<Item = char> +{ + assert_eq!( chars.next(), Some('\"')); + let quoted = DelimIter::new(chars, vec![(Some('\"'), true)]).try_collect::<String>(); + + match quoted { + Ok(s) => { + let word = Word { + segments: SubstLexer { chars: &mut s.chars().peekable() } + .try_collect::<Vec<_>>()? +// .scan((), |_, x| x.ok()) +// .collect::<Vec<_>>() + }; + + Ok(WordSegment::DoubleQuote(word)) + }, + Err(e) => Err(e) + } +} + +pub fn parse_word<It>(chars: &mut Peekable<It>) -> Result<Word, LexError> +where It: Iterator<Item = char> +{ + Ok(Word { + segments: WordLexer{ chars }.try_collect::<Vec<_>>()? + }) +} + +impl std::str::FromStr for FileRedirectionType { + type Err = LexError; + + fn from_str(s: &str) -> Result<FileRedirectionType, LexError> { + match s { + "<" => Ok(FileRedirectionType::In), + "<>" => Ok(FileRedirectionType::InOut), + ">" => Ok(FileRedirectionType::Out), + ">|" => Ok(FileRedirectionType::OutReplace), + ">>" => Ok(FileRedirectionType::OutAppend), + _ => Err(LexError::InvalidFileRedirectionType) + } + } +} + +pub fn parse_redirection<It>(chars: &mut Peekable<It>) -> Result<Redirection, LexError> +where It: Iterator<Item = char> +{ + Err(LexError::InvalidFileRedirectionType) + // let name = DelimIterator::new(chars, vec!['<', '>']).collect::<String>(); +} + +pub fn parse_simple_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError> +where It: Iterator<Item = char> +{ + let mut assignments = Vec::new(); + let mut redirections = Vec::new(); + + if chars.peek() == None { + return Ok(None); + } + + loop { + skip_whitespace(chars); + let mut name = DelimIter::new_shell_word_or_assignment(chars).try_collect::<String>()?; + + match chars.peek().clone() { + Some('=') => { + chars.next(); + let mut lex = WordLexer{ chars }; + match lex.next() { + Some(Ok(value)) => { + assignments.push(Assignment { name, value: Word{ segments: vec![ value ] } }); + }, + Some(Err(e)) => { + return Err(e); + }, + None => { + return Err(LexError::UnexpectedEnd(vec![])); + } + } + } + _ => { + let mut cmd_segments = WordLexer{ chars }.try_collect::<Vec<_>>()?; + cmd_segments.insert(0, WordSegment::Literal(name)); + + return Ok(Some(Command::Simple { + assignments, + command_word: Word { segments: cmd_segments }, + redirections, + })); + } + } + } +} + +pub fn parse_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError> +where It: Iterator<Item = char> +{ + skip_whitespace(chars); + match chars.peek() { + Some('!') => { + chars.next(); + if let Some(cmd) = parse_cmd(chars)? { + Ok(Some(Command::Negation(Box::new(cmd)))) + } else { + Err(LexError::UnexpectedEnd(vec![])) + } + } + _ => { + if let Some(head) = parse_simple_cmd(chars)? { + skip_whitespace(chars); + + match chars.peek() { + Some(';') => { + chars.next(); + + let tail = parse_cmd( chars ) ?; + match tail { + Some(Command::Sequence(mut s)) => { + s.insert(0, head); + Ok(Some(Command::Sequence(s))) + } + Some(tail) => { + Ok(Some(Command::Sequence(vec![ head, tail ]))) + } + None => { + Ok(Some(head)) + } + } + } + Some('|') => { + chars.next(); + match chars.peek() { + Some('|') => { + chars.next(); + + let tail = parse_cmd( chars ) ?; + match tail { + Some(Command::ShortCircuitDisjunction(mut s)) => { + s.insert(0, head); + Ok(Some(Command::ShortCircuitDisjunction(s))) + } + Some(tail) => { + Ok(Some(Command::ShortCircuitDisjunction(vec![ head, tail ]))) + } + None => { + Err(LexError::UnexpectedEnd(vec![Some('|')])) + } + } + } + _ => { + let tail = parse_cmd( chars ) ?; + match tail { + Some(Command::Pipeline(mut s)) => { + s.insert(0, head); + Ok(Some(Command::Pipeline(s))) + } + Some(tail) => { + Ok(Some(Command::Pipeline(vec![ head, tail ]))) + } + None => { + Err(LexError::UnexpectedEnd(vec![])) + } + } + } + } + } + Some('&') => { + chars.next(); + match chars.peek() { + Some('&') => { + chars.next(); + + let tail = parse_cmd( chars ) ?; + match tail { + Some(Command::ShortCircuitConjunction(mut s)) => { + s.insert(0, head); + Ok(Some(Command::ShortCircuitConjunction(s))) + } + Some(tail) => { + Ok(Some(Command::ShortCircuitConjunction(vec![ head, tail ]))) + } + None => { + Err(LexError::UnexpectedEnd(vec![Some('&'), Some('&')])) + } + } + } + Some(c) => { + Err(LexError::UnexpectedToken(*c)) + } + None => { + // todo: + // background job + Ok(Some(head)) + } + } + } + Some(c) => { + Err(LexError::UnexpectedToken(*c)) + } + None => { + Ok(Some(head)) + } + } + } else { + Ok(None) + } + } + } +} +impl<'a, It> Iterator for SubstLexer<'a, It> +where It: 'a + Iterator<Item = char> { + type Item = Result<WordSegment, LexError>; + + fn next(&mut self) -> Option<Result<WordSegment, LexError>> { + match self.chars.peek().cloned() { + Some('$') => { + self.chars.next(); + match self.chars.peek() { + // curly-braced parameter e.g. `${PARAM}` + Some('{') => { + self.chars.next(); + match DelimIter::new(&mut self.chars, vec![(Some('}'), true)]).try_collect::<String>() { + Ok(s) => { + Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal))) + } + Err(e) => Some(Err(e)) + } + } + + // Subshell + Some('(') => { + self.chars.next(); + let subcmd_str = DelimIter::new(&mut self.chars, vec![(Some(')'), true)]).try_collect::<String>(); + match subcmd_str { + Ok(subcmd_str) => { + match parse_cmd(&mut subcmd_str.chars().peekable()) { + Ok(Some(subcmd)) => { + Some(Ok(WordSegment::Subshell(subcmd))) + } + Ok(None) => None, + Err(err) => Some(Err(err)) + } + } + Err(err) => Some(Err(err)) + } + } + + // plain parameter name e.g. `$PARAM` + _ => { + match DelimIter::new_whitespace(self.chars).collect() { + Ok(s) => { + Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal))) + } + Err(e) => Some(Err(e)) + } + } + } + } + + // not a substitution, + // take as literal until next $. + _ => { + let lit_str = DelimIter::new( + &mut self.chars, + vec![ + (None, true), + (Some('$'), false) + ] + ).try_collect::<String>(); + + match lit_str { + Ok(s) => { + if s.len() > 0 { + Some(Ok(WordSegment::Literal(s))) + } else { + None + } + } + Err(e) => Some(Err(e)) + } + } + } + } +} + +impl<'a, It> Iterator for WordLexer<'a, It> +where It: 'a + Iterator<Item = char> { + type Item = Result<WordSegment, LexError>; + + fn next(&mut self) -> Option<Result<WordSegment, LexError>> { + skip_whitespace(self.chars); + match self.chars.peek().cloned() { + Some('|') => { None } + Some('&') => { None } + Some(';') => { None } + Some('~') => { + self.chars.next(); + let user = DelimIter::new_whitespace(self.chars).collect(); + match user { + Ok(user) => Some(Ok(WordSegment::Tilde(user))), + Err(e) => Some(Err(e)) + } + } + Some('"') => { Some(parse_doublequoted(self.chars)) }, + Some('\'') => { Some(parse_quoted(self.chars)) }, + Some('$') => { + SubstLexer{ chars: &mut self.chars }.next() + } + Some(c) => { + let s : Result<String, LexError> = DelimIter::new_shell_word(self.chars).collect(); + match s { + Ok(s) => Some(Ok(WordSegment::Literal(s))), + Err(e) => Some(Err(e)) + } + } + None => { + None + } + } + } +} + + +mod test { + use crate::sh::parse::*; + + #[test] + fn test_delim_iter() { + let mut cs = "test 1234".chars().peekable(); + let mut lexer = DelimIter::new_shell_word(&mut cs); + assert_eq!(lexer.try_collect::<String>(), Ok(String::from("test"))); + } + + #[test] + fn test_word_lexer() { + let mut cs = "test 1234|test".chars().peekable(); + + { + let mut lexer = WordLexer{ chars: &mut cs }; + assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test"))))); + assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("1234"))))); + assert_eq!(lexer.next(), None); + } + assert_eq!(cs.next(), Some('|')); + { + let mut lexer = WordLexer{ chars: &mut cs }; + assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test"))))); + assert_eq!(lexer.next(), None); + } + } +} + diff --git a/src/sh/parse.rs~ b/src/sh/parse.rs~ new file mode 100644 index 0000000..b4db237 --- /dev/null +++ b/src/sh/parse.rs~ @@ -0,0 +1,438 @@ +use { + crate::ast::*, + std::iter::{Peekable}, +}; + + +#[derive(Debug, PartialEq)] +pub enum LexError { + UnexpectedEnd(Vec<Option<char>>), + UnexpectedToken(char), + InvalidFileRedirectionType +} + + +///! iterates chars until it finds some char in `delim` +pub struct DelimIter<'a, It> +where It: Iterator<Item = char> { + chars: &'a mut Peekable<It>, + delim: Vec<(Option<char>, bool)> +} + +impl<'a, It> DelimIter<'a, It> +where It: Iterator<Item = char> { + fn new(chars: &'a mut Peekable<It>, delim: Vec<(Option<char>, bool)>) -> Self { + DelimIter { chars, delim } + } + + fn new_whitespace(chars: &'a mut Peekable<It>) -> Self { + DelimIter::new(chars, vec![ + (None, true), + (Some(' '), true), + (Some('\t'), true), + (Some('\n'), true) + ]) + } + + fn new_shell_word(chars: &'a mut Peekable<It>) -> Self { + DelimIter::new(chars, vec![ + (None, true), + (Some(' '), true), + (Some('\t'), true), + (Some('\n'), true), + (Some('|'), false), + (Some('&'), false), + (Some(';'), false), + (Some('\"'), false), + (Some('\''), false) + ]) + } + + fn new_shell_word_or_assignment(chars: &'a mut Peekable<It>) -> Self { + DelimIter::new(chars, vec![ + (None, true), + (Some(' '), true), + (Some('\t'), true), + (Some('\n'), true), + (Some('='), false), + (Some('|'), false), + (Some('&'), false), + (Some(';'), false), + (Some('\"'), false), + (Some('\''), false) + ]) + } +} + +impl<'a, It> Iterator for DelimIter<'a, It> +where It: 'a + Iterator<Item = char> { + type Item = Result<char, LexError>; + + fn next(&mut self) -> Option<Result<char, LexError>> { + for (delim, consume) in self.delim.iter() { + if self.chars.peek().cloned() == *delim { + if *consume { + self.chars.next(); + } + return None; + } + } + + match self.chars.next() { + Some(c) => Some(Ok(c)), + None => Some(Err(LexError::UnexpectedEnd(vec![]))) + } + } +} + + +pub struct WordLexer<'a, It> +where It: 'a + Iterator<Item = char> { + chars: &'a mut Peekable<It> +} + +impl<'a, It> WordLexer<'a, It> +where It: Iterator<Item = char> { + fn collect_until(&mut self, close: Option<char>) -> Result<String, LexError> { + DelimIter::new(&mut self.chars, vec![(close, true)]) + .try_collect::<String>() + } +} + +pub fn skip_whitespace<It>(chars: &mut Peekable<It>) +where It: Iterator<Item = char> +{ + while let Some(c) = chars.peek() { + if c.is_whitespace() { + chars.next(); + } else { + break; + } + } +} + +pub fn parse_quoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError> +where It: Iterator<Item = char> +{ + assert_eq!( chars.next(), Some('\'')); + let quoted = DelimIter::new(chars, vec![(Some('\''), true)]).try_collect::<String>(); + match quoted { + Ok(s) => { + Ok(WordSegment::Literal(s)) + }, + Err(e) => Err(e) + } +} + +pub fn parse_doublequoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError> +where It: Iterator<Item = char> +{ + assert_eq!( chars.next(), Some('\"')); + let quoted = DelimIter::new(chars, vec![(Some('\"'), true)]).try_collect::<String>(); + match quoted { + Ok(s) => { + let word = Word { + segments: // fixme: handle spaces correctly -> create QuoteLexer + WordLexer { chars: &mut s.chars().peekable() } + .scan((), |_, x| x.ok()) + .collect::<Vec<_>>() + }; + + Ok(WordSegment::DoubleQuote(word)) + }, + Err(e) => Err(e) + } +} + +pub fn parse_word<It>(chars: &mut Peekable<It>) -> Result<Word, LexError> +where It: Iterator<Item = char> +{ + Ok(Word { + segments: WordLexer{ chars }.try_collect::<Vec<_>>()? + }) +} + +impl std::str::FromStr for FileRedirectionType { + type Err = LexError; + + fn from_str(s: &str) -> Result<FileRedirectionType, LexError> { + match s { + "<" => Ok(FileRedirectionType::In), + "<>" => Ok(FileRedirectionType::InOut), + ">" => Ok(FileRedirectionType::Out), + ">|" => Ok(FileRedirectionType::OutReplace), + ">>" => Ok(FileRedirectionType::OutAppend), + _ => Err(LexError::InvalidFileRedirectionType) + } + } +} + +pub fn parse_redirection<It>(chars: &mut Peekable<It>) -> Result<Redirection, LexError> +where It: Iterator<Item = char> +{ + Err(LexError::InvalidFileRedirectionType) + // let name = DelimIterator::new(chars, vec!['<', '>']).collect::<String>(); +} + +pub fn parse_simple_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError> +where It: Iterator<Item = char> +{ + let mut assignments = Vec::new(); + let mut redirections = Vec::new(); + + if chars.peek() == None { + return Ok(None); + } + + loop { + skip_whitespace(chars); + let mut name = DelimIter::new_shell_word_or_assignment(chars).try_collect::<String>()?; + + match chars.peek().clone() { + Some('=') => { + chars.next(); + let mut lex = WordLexer{ chars }; + match lex.next() { + Some(Ok(value)) => { + assignments.push(Assignment { name, value: Word{ segments: vec![ value ] } }); + }, + Some(Err(e)) => { + return Err(e); + }, + None => { + return Err(LexError::UnexpectedEnd(vec![])); + } + } + } + _ => { + let mut cmd_segments = WordLexer{ chars }.try_collect::<Vec<_>>()?; + cmd_segments.insert(0, WordSegment::Literal(name)); + + return Ok(Some(Command::Simple { + assignments, + command_word: Word { segments: cmd_segments }, + redirections, + })); + } + } + } +} + +pub fn parse_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError> +where It: Iterator<Item = char> +{ + skip_whitespace(chars); + match chars.peek() { + Some('!') => { + chars.next(); + if let Some(cmd) = parse_cmd(chars)? { + Ok(Some(Command::Negation(Box::new(cmd)))) + } else { + Err(LexError::UnexpectedEnd(vec![])) + } + } + _ => { + if let Some(head) = parse_simple_cmd(chars)? { + skip_whitespace(chars); + + match chars.peek() { + Some(';') => { + chars.next(); + + let tail = parse_cmd( chars ) ?; + match tail { + Some(Command::Sequence(mut s)) => { + s.insert(0, head); + Ok(Some(Command::Sequence(s))) + } + Some(tail) => { + Ok(Some(Command::Sequence(vec![ head, tail ]))) + } + None => { + Ok(Some(head)) + } + } + } + Some('|') => { + chars.next(); + match chars.peek() { + Some('|') => { + chars.next(); + + let tail = parse_cmd( chars ) ?; + match tail { + Some(Command::ShortCircuitDisjunction(mut s)) => { + s.insert(0, head); + Ok(Some(Command::ShortCircuitDisjunction(s))) + } + Some(tail) => { + Ok(Some(Command::ShortCircuitDisjunction(vec![ head, tail ]))) + } + None => { + Err(LexError::UnexpectedEnd(vec![Some('|')])) + } + } + } + _ => { + let tail = parse_cmd( chars ) ?; + match tail { + Some(Command::Pipeline(mut s)) => { + s.insert(0, head); + Ok(Some(Command::Pipeline(s))) + } + Some(tail) => { + Ok(Some(Command::Pipeline(vec![ head, tail ]))) + } + None => { + Err(LexError::UnexpectedEnd(vec![])) + } + } + } + } + } + Some('&') => { + chars.next(); + match chars.peek() { + Some('&') => { + chars.next(); + + let tail = parse_cmd( chars ) ?; + match tail { + Some(Command::ShortCircuitConjunction(mut s)) => { + s.insert(0, head); + Ok(Some(Command::ShortCircuitConjunction(s))) + } + Some(tail) => { + Ok(Some(Command::ShortCircuitConjunction(vec![ head, tail ]))) + } + None => { + Err(LexError::UnexpectedEnd(vec![Some('&'), Some('&')])) + } + } + } + Some(c) => { + Err(LexError::UnexpectedToken(*c)) + } + None => { + // todo: + // background job + Ok(Some(head)) + } + } + } + Some(c) => { + Err(LexError::UnexpectedToken(*c)) + } + None => { + Ok(Some(head)) + } + } + } else { + Ok(None) + } + } + } +} + +impl<'a, It> Iterator for WordLexer<'a, It> +where It: 'a + Iterator<Item = char> { + type Item = Result<WordSegment, LexError>; + + fn next(&mut self) -> Option<Result<WordSegment, LexError>> { + skip_whitespace(self.chars); + match self.chars.peek().cloned() { + Some('|') => { None } + Some('&') => { None } + Some(';') => { None } + Some('~') => { + self.chars.next(); + let user = DelimIter::new_whitespace(self.chars).collect(); + match user { + Ok(user) => Some(Ok(WordSegment::Tilde(user))), + Err(e) => Some(Err(e)) + } + } + Some('"') => { Some(parse_doublequoted(self.chars)) }, + Some('\'') => { Some(parse_quoted(self.chars)) }, + Some('$') => { + self.chars.next(); + match self.chars.peek() { + Some('{') => { + self.chars.next(); + match DelimIter::new(&mut self.chars, vec![(Some('}'), true)]).try_collect::<String>() { + Ok(s) => { + Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal))) + } + Err(e) => Some(Err(e)) + } + } + Some('(') => { + self.chars.next(); + let subcmd_str = DelimIter::new(&mut self.chars, vec![(Some(')'), true)]).try_collect::<String>(); + match subcmd_str { + Ok(subcmd_str) => { + match parse_cmd(&mut subcmd_str.chars().peekable()) { + Ok(Some(subcmd)) => { + Some(Ok(WordSegment::Subshell(subcmd))) + } + Ok(None) => None, + Err(err) => Some(Err(err)) + } + } + Err(err) => Some(Err(err)) + } + } + _ => { + match DelimIter::new_whitespace(self.chars).collect() { + Ok(s) => { + Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal))) + } + Err(e) => Some(Err(e)) + } + } + } + } + Some(c) => { + let s : Result<String, LexError> = DelimIter::new_shell_word(self.chars).collect(); + match s { + Ok(s) => Some(Ok(WordSegment::Literal(s))), + Err(e) => Some(Err(e)) + } + } + None => { + None + } + } + } +} + + +mod test { + use crate::parse::*; + + #[test] + fn test_delim_iter() { + let mut cs = "test 1234".chars().peekable(); + let mut lexer = DelimIter::new_shell_word(&mut cs); + assert_eq!(lexer.try_collect::<String>(), Ok(String::from("test"))); + } + + #[test] + fn test_word_lexer() { + let mut cs = "test 1234|test".chars().peekable(); + + { + let mut lexer = WordLexer{ chars: &mut cs }; + assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test"))))); + assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("1234"))))); + assert_eq!(lexer.next(), None); + } + assert_eq!(cs.next(), Some('|')); + { + let mut lexer = WordLexer{ chars: &mut cs }; + assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test"))))); + assert_eq!(lexer.next(), None); + } + } +} + From 903d6dd64fc938f6027853560111ba6edfbcdf52 Mon Sep 17 00:00:00 2001 From: Michael Sippel <micha@fragmental.art> Date: Tue, 31 Oct 2023 16:30:08 +0100 Subject: [PATCH 8/9] parser: remove deprecated collect_until --- src/sh/parse.rs | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/sh/parse.rs b/src/sh/parse.rs index 6b5691a..41a7a52 100644 --- a/src/sh/parse.rs +++ b/src/sh/parse.rs @@ -85,20 +85,11 @@ where It: 'a + Iterator<Item = char> { } } - pub struct WordLexer<'a, It> where It: 'a + Iterator<Item = char> { chars: &'a mut Peekable<It> } -impl<'a, It> WordLexer<'a, It> -where It: Iterator<Item = char> { - fn collect_until(&mut self, close: Option<char>) -> Result<String, LexError> { - DelimIter::new(&mut self.chars, vec![(close, true)]) - .try_collect::<String>() - } -} - pub struct SubstLexer<'a, It> where It: 'a + Iterator<Item = char> { chars: &'a mut Peekable<It> @@ -140,8 +131,6 @@ where It: Iterator<Item = char> let word = Word { segments: SubstLexer { chars: &mut s.chars().peekable() } .try_collect::<Vec<_>>()? -// .scan((), |_, x| x.ok()) -// .collect::<Vec<_>>() }; Ok(WordSegment::DoubleQuote(word)) @@ -362,6 +351,7 @@ where It: 'a + Iterator<Item = char> { // Subshell Some('(') => { self.chars.next(); + let subcmd_str = DelimIter::new(&mut self.chars, vec![(Some(')'), true)]).try_collect::<String>(); match subcmd_str { Ok(subcmd_str) => { From c49577e9247ed2325a97f48cae93c70b6823c5b7 Mon Sep 17 00:00:00 2001 From: Michael Sippel <micha@fragmental.art> Date: Wed, 8 Nov 2023 04:22:08 +0100 Subject: [PATCH 9/9] parser: fix recursive subshells --- src/main.rs | 2 +- src/sh/parse.rs | 88 ++++++++++++++++++++++++------------------------- 2 files changed, 44 insertions(+), 46 deletions(-) diff --git a/src/main.rs b/src/main.rs index 02e6b77..9b45a18 100644 --- a/src/main.rs +++ b/src/main.rs @@ -38,7 +38,7 @@ fn main() { let stdin = std::io::stdin(); for line in std::io::BufReader::new(stdin).lines() { if let Ok(line) = line { - let cmd = sh::parse::parse_cmd( &mut line.chars().peekable() ); + let cmd = sh::parse::parse_cmd( &mut line.chars().peekable(), 0 ); eprintln!("parsed cmd: {:?}", cmd); /* let mut lex = parse::WordLexer::from( line.chars() ); diff --git a/src/sh/parse.rs b/src/sh/parse.rs index 41a7a52..3a30250 100644 --- a/src/sh/parse.rs +++ b/src/sh/parse.rs @@ -30,7 +30,8 @@ where It: Iterator<Item = char> { (None, true), (Some(' '), true), (Some('\t'), true), - (Some('\n'), true) + (Some('\n'), true), + (Some(')'), false), ]) } @@ -43,6 +44,8 @@ where It: Iterator<Item = char> { (Some('|'), false), (Some('&'), false), (Some(';'), false), + (Some(')'), false), + (Some('$'), false), (Some('\"'), false), (Some('\''), false) ]) @@ -57,7 +60,8 @@ where It: Iterator<Item = char> { (Some('='), false), (Some('|'), false), (Some('&'), false), - (Some(';'), false), + (Some(';'), false), + (Some(')'), false), (Some('\"'), false), (Some('\''), false) ]) @@ -92,7 +96,8 @@ where It: 'a + Iterator<Item = char> { pub struct SubstLexer<'a, It> where It: 'a + Iterator<Item = char> { - chars: &'a mut Peekable<It> + chars: &'a mut Peekable<It>, + depth: usize } pub fn skip_whitespace<It>(chars: &mut Peekable<It>) @@ -124,12 +129,14 @@ pub fn parse_doublequoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, L where It: Iterator<Item = char> { assert_eq!( chars.next(), Some('\"')); + + // todo: allow escaped \" let quoted = DelimIter::new(chars, vec![(Some('\"'), true)]).try_collect::<String>(); match quoted { Ok(s) => { let word = Word { - segments: SubstLexer { chars: &mut s.chars().peekable() } + segments: SubstLexer { chars: &mut s.chars().peekable(), depth: 1 } .try_collect::<Vec<_>>()? }; @@ -213,14 +220,14 @@ where It: Iterator<Item = char> } } -pub fn parse_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError> +pub fn parse_cmd<It>(chars: &mut Peekable<It>, depth: usize) -> Result<Option<Command>, LexError> where It: Iterator<Item = char> { skip_whitespace(chars); match chars.peek() { Some('!') => { chars.next(); - if let Some(cmd) = parse_cmd(chars)? { + if let Some(cmd) = parse_cmd(chars, depth)? { Ok(Some(Command::Negation(Box::new(cmd)))) } else { Err(LexError::UnexpectedEnd(vec![])) @@ -234,18 +241,14 @@ where It: Iterator<Item = char> Some(';') => { chars.next(); - let tail = parse_cmd( chars ) ?; + let tail = parse_cmd( chars, depth ) ?; match tail { Some(Command::Sequence(mut s)) => { s.insert(0, head); Ok(Some(Command::Sequence(s))) } - Some(tail) => { - Ok(Some(Command::Sequence(vec![ head, tail ]))) - } - None => { - Ok(Some(head)) - } + Some(tail) => Ok(Some(Command::Sequence(vec![ head, tail ]))), + None => Ok(Some(head)) } } Some('|') => { @@ -254,33 +257,25 @@ where It: Iterator<Item = char> Some('|') => { chars.next(); - let tail = parse_cmd( chars ) ?; + let tail = parse_cmd( chars, depth ) ?; match tail { Some(Command::ShortCircuitDisjunction(mut s)) => { s.insert(0, head); Ok(Some(Command::ShortCircuitDisjunction(s))) } - Some(tail) => { - Ok(Some(Command::ShortCircuitDisjunction(vec![ head, tail ]))) - } - None => { - Err(LexError::UnexpectedEnd(vec![Some('|')])) - } + Some(tail) => Ok(Some(Command::ShortCircuitDisjunction(vec![ head, tail ]))), + None => Err(LexError::UnexpectedEnd(vec![Some('|')])) } } _ => { - let tail = parse_cmd( chars ) ?; + let tail = parse_cmd( chars, depth ) ?; match tail { Some(Command::Pipeline(mut s)) => { s.insert(0, head); Ok(Some(Command::Pipeline(s))) } - Some(tail) => { - Ok(Some(Command::Pipeline(vec![ head, tail ]))) - } - None => { - Err(LexError::UnexpectedEnd(vec![])) - } + Some(tail) => Ok(Some(Command::Pipeline(vec![ head, tail ]))), + None => Err(LexError::UnexpectedEnd(vec![])) } } } @@ -291,23 +286,17 @@ where It: Iterator<Item = char> Some('&') => { chars.next(); - let tail = parse_cmd( chars ) ?; + let tail = parse_cmd( chars, depth ) ?; match tail { Some(Command::ShortCircuitConjunction(mut s)) => { s.insert(0, head); Ok(Some(Command::ShortCircuitConjunction(s))) } - Some(tail) => { - Ok(Some(Command::ShortCircuitConjunction(vec![ head, tail ]))) - } - None => { - Err(LexError::UnexpectedEnd(vec![Some('&'), Some('&')])) - } + Some(tail) => Ok(Some(Command::ShortCircuitConjunction(vec![ head, tail ]))), + None => Err(LexError::UnexpectedEnd(vec![Some('&'), Some('&')])) } } - Some(c) => { - Err(LexError::UnexpectedToken(*c)) - } + Some(c) => Err(LexError::UnexpectedToken(*c)), None => { // todo: // background job @@ -315,12 +304,17 @@ where It: Iterator<Item = char> } } } - Some(c) => { - Err(LexError::UnexpectedToken(*c)) - } - None => { - Ok(Some(head)) + Some(')') => { + eprintln!("got )"); + chars.next(); + if depth > 0 { + Ok(Some(head)) + } else { + Err(LexError::UnexpectedToken(')')) + } } + Some(c) => Err(LexError::UnexpectedToken(*c)), + None => Ok(Some(head)) } } else { Ok(None) @@ -351,20 +345,23 @@ where It: 'a + Iterator<Item = char> { // Subshell Some('(') => { self.chars.next(); - +/* let subcmd_str = DelimIter::new(&mut self.chars, vec![(Some(')'), true)]).try_collect::<String>(); match subcmd_str { Ok(subcmd_str) => { - match parse_cmd(&mut subcmd_str.chars().peekable()) { + */ + match parse_cmd(&mut self.chars, 1) { Ok(Some(subcmd)) => { Some(Ok(WordSegment::Subshell(subcmd))) } Ok(None) => None, Err(err) => Some(Err(err)) } + /* } Err(err) => Some(Err(err)) } + */ } // plain parameter name e.g. `$PARAM` @@ -426,8 +423,9 @@ where It: 'a + Iterator<Item = char> { Some('"') => { Some(parse_doublequoted(self.chars)) }, Some('\'') => { Some(parse_quoted(self.chars)) }, Some('$') => { - SubstLexer{ chars: &mut self.chars }.next() + SubstLexer{ chars: &mut self.chars, depth: 1 }.next() } + Some(')') => { None } Some(c) => { let s : Result<String, LexError> = DelimIter::new_shell_word(self.chars).collect(); match s {