From 695cbb24f1d27c347fe21bbb98e61d5028551ac8 Mon Sep 17 00:00:00 2001 From: Michael Sippel Date: Thu, 26 Oct 2023 20:25:56 +0200 Subject: [PATCH] basic parser --- src/ast.rs | 68 ++++---- src/main.rs | 8 +- src/parse.rs | 429 ++++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 392 insertions(+), 113 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 5a20bbc..87e7aae 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,30 +1,32 @@ +use std::boxed::Box; + //<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum Command { Simple { - assignments: Vec<(String, Word)>, + assignments: Vec, command_word: Word, redirections: Vec }, Pipeline(Vec), Sequence(Vec), - ShortCircuitConjection(Vec), + ShortCircuitConjunction(Vec), ShortCircuitDisjunction(Vec), - Negation(Command), + Negation(Box), While { - condition: Command, - loop_body: Command + condition: Box, + loop_body: Box }, For { varname: String, sequence: Word, - loop_body: Command - } + loop_body: Box + }, If { - condition: Command, - then_branch: Command, - else_branch: Command + condition: Box, + then_branch: Box, + else_branch: Box }, Case { expr: Word, @@ -32,35 +34,25 @@ pub enum Command { }, Function { name: String, - body: Command + body: Box } } -/* - * We are all luminous beings. - * Why then, do we not appear before each - * other radiant in our illumination ? - */ - -/* - * Bewteen the idea - * And the reality - * Between the motion - * And the act - * Falls the Shadow - * (T.S. Eliot) - */ - //<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ -#[derive(Debug)] +#[derive(Debug, PartialEq)] +pub struct Assignment { + pub name: String, + pub value: Word +} + +#[derive(Debug, PartialEq)] pub struct Word { pub segments: Vec } -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum WordSegment { - FieldSeparator, Tilde(String), Literal(String), Parameter(String, ParameterFormat), @@ -68,7 +60,7 @@ pub enum WordSegment { DoubleQuote(Word), } -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum ParameterFormat { Normal, Length, @@ -79,42 +71,42 @@ pub enum ParameterFormat { Sub(ParamSubSide, ParamSubMode, Word), } -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum ParamSubMode { Shortest, Longest } -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum ParamSubSide { Prefix, Suffix } //<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub struct Redirection { redirection_type: RedirectionType, fd: u64, target: Word } -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum RedirectionType { File(FileRedirectionType), Dup(DupRedirectionType), Heredoc // '<<' } -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum FileRedirectionType { In, // '<' InOut, // '<>' Out, // '>' OutReplace, // '>|' - OutAppend, // '>|' + OutAppend, // '>>' } -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum DupRedirectionType { In, // '<&' Out // '>&' diff --git a/src/main.rs b/src/main.rs index d6025bd..3752690 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,5 @@ +#![feature(iterator_try_collect)] + use { laddertypes::*, std::io::BufRead, @@ -38,10 +40,14 @@ fn main() { let stdin = std::io::stdin(); for line in std::io::BufReader::new(stdin).lines() { if let Ok(line) = line { + let cmd = parse::parse_cmd( &mut line.chars().peekable() ); + eprintln!("parsed cmd: {:?}", cmd); + /* let mut lex = parse::WordLexer::from( line.chars() ); for word in lex { eprintln!("word-segment: {:?}", word); - } + } + */ } } diff --git a/src/parse.rs b/src/parse.rs index 9a54df4..af691f9 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -1,105 +1,368 @@ use { crate::ast::*, - std::iter::{Peekable, FromIterator}, + std::iter::{Peekable}, }; -pub struct WordLexer -where It: Iterator { - chars: Peekable + +#[derive(Debug, PartialEq)] +pub enum LexError { + UnexpectedEnd(Vec>), + UnexpectedToken(char), + InvalidFileRedirectionType } -impl From for WordLexer + +///! iterates chars until it finds some char in `delim` +pub struct DelimIter<'a, It> where It: Iterator { - fn from(iter: It) -> Self { - WordLexer { - chars: iter.into_iter().peekable() - } + chars: &'a mut Peekable, + delim: Vec<(Option, bool)> +} + +impl<'a, It> DelimIter<'a, It> +where It: Iterator { + fn new(chars: &'a mut Peekable, delim: Vec<(Option, bool)>) -> Self { + DelimIter { chars, delim } + } + + fn new_whitespace(chars: &'a mut Peekable) -> Self { + DelimIter::new(chars, vec![ + (None, true), + (Some(' '), true), + (Some('\t'), true), + (Some('\n'), true) + ]) + } + + fn new_shell_word(chars: &'a mut Peekable) -> Self { + DelimIter::new(chars, vec![ + (None, true), + (Some(' '), true), + (Some('\t'), true), + (Some('\n'), true), + (Some('|'), false), + (Some('&'), false), + (Some(';'), false), + (Some('\"'), false), + (Some('\''), false) + ]) } } -#[derive(Debug)] -pub enum LexError { - UnexpectedEnd(char) -} +impl<'a, It> Iterator for DelimIter<'a, It> +where It: 'a + Iterator { + type Item = Result; -impl WordLexer -where It: Iterator { - fn collect_until(&mut self, close: char) -> Result { - let mut val = String::new(); - while let Some(c) = self.chars.peek().cloned() { - if c == close { - return Ok(val) - } else { - self.chars.next(); - val.push(c); + fn next(&mut self) -> Option> { + for (delim, consume) in self.delim.iter() { + if self.chars.peek().cloned() == *delim { + if *consume { + self.chars.next(); + } + return None; } } - if close.is_whitespace() { - Ok(val) - } else { - Err(LexError::UnexpectedEnd(close)) + match self.chars.next() { + Some(c) => Some(Ok(c)), + None => Some(Err(LexError::UnexpectedEnd(vec![]))) } } } -impl Iterator for WordLexer + +pub struct WordLexer<'a, It> +where It: 'a + Iterator { + chars: &'a mut Peekable +} + +impl<'a, It> WordLexer<'a, It> where It: Iterator { + fn collect_until(&mut self, close: Option) -> Result { + DelimIter::new(&mut self.chars, vec![(close, true)]) + .try_collect::() + } +} + +pub fn skip_whitespace(chars: &mut Peekable) +where It: Iterator +{ + while let Some(c) = chars.peek() { + if c.is_whitespace() { + chars.next(); + } else { + break; + } + } +} + +pub fn parse_quoted(chars: &mut Peekable) -> Result +where It: Iterator +{ + assert_eq!( chars.next(), Some('\'')); + let quoted = DelimIter::new(chars, vec![(Some('\''), true)]).try_collect::(); + match quoted { + Ok(s) => { + Ok(WordSegment::Literal(s)) + }, + Err(e) => Err(e) + } +} + +pub fn parse_doublequoted(chars: &mut Peekable) -> Result +where It: Iterator +{ + assert_eq!( chars.next(), Some('\"')); + let quoted = DelimIter::new(chars, vec![(Some('\"'), true)]).try_collect::(); + match quoted { + Ok(s) => { + let word = Word { + segments: // fixme: handle spaces correctly -> create QuoteLexer + WordLexer { chars: &mut s.chars().peekable() } + .scan((), |_, x| x.ok()) + .collect::>() + }; + + Ok(WordSegment::DoubleQuote(word)) + }, + Err(e) => Err(e) + } +} + +pub fn parse_word(chars: &mut Peekable) -> Result +where It: Iterator +{ + Ok(Word { + segments: WordLexer{ chars }.try_collect::>()? + }) +} + +pub fn parse_assignment(chars: &mut Peekable) -> Result +where It: Iterator +{ + let name = DelimIter::new(chars, vec![(Some('='), true)]).try_collect::()?; + let value_str = DelimIter::new_whitespace(chars).try_collect::()?; + let value = parse_word(&mut value_str.chars().peekable())?; + Ok(Assignment{ name, value }) +} + +impl std::str::FromStr for FileRedirectionType { + type Err = LexError; + + fn from_str(s: &str) -> Result { + match s { + "<" => Ok(FileRedirectionType::In), + "<>" => Ok(FileRedirectionType::InOut), + ">" => Ok(FileRedirectionType::Out), + ">|" => Ok(FileRedirectionType::OutReplace), + ">>" => Ok(FileRedirectionType::OutAppend), + _ => Err(LexError::InvalidFileRedirectionType) + } + } +} + +pub fn parse_redirection(chars: &mut Peekable) -> Result +where It: Iterator +{ + Err(LexError::InvalidFileRedirectionType) + // let name = DelimIterator::new(chars, vec!['<', '>']).collect::(); +} + +pub fn parse_simple_cmd(chars: &mut Peekable) -> Result, LexError> +where It: Iterator +{ + let mut assignments = Vec::new(); + let mut redirections = Vec::new(); + + if chars.peek() == None { + return Ok(None); + } + + let mut first = DelimIter::new_shell_word(chars).try_collect::()?; + + while first.contains('=') { + assignments.push( parse_assignment(chars)? ); + first = DelimIter::new_shell_word(chars).try_collect::()?; + } + + let mut cmd_segments = WordLexer{ chars }.try_collect::>()?; + cmd_segments.insert(0, WordSegment::Literal(first)); + + Ok(Some(Command::Simple { + assignments, + command_word: Word { segments: cmd_segments }, + redirections, + })) +} + +pub fn parse_cmd(chars: &mut Peekable) -> Result, LexError> +where It: Iterator +{ + skip_whitespace(chars); + match chars.peek() { + Some('!') => { + chars.next(); + if let Some(cmd) = parse_cmd(chars)? { + Ok(Some(Command::Negation(Box::new(cmd)))) + } else { + Err(LexError::UnexpectedEnd(vec![])) + } + } + _ => { + if let Some(head) = parse_simple_cmd(chars)? { + skip_whitespace(chars); + + match chars.peek() { + Some(';') => { + chars.next(); + + let tail = parse_cmd( chars ) ?; + match tail { + Some(Command::Sequence(mut s)) => { + s.insert(0, head); + Ok(Some(Command::Sequence(s))) + } + Some(tail) => { + Ok(Some(Command::Sequence(vec![ head, tail ]))) + } + None => { + Ok(Some(head)) + } + } + } + Some('|') => { + chars.next(); + match chars.peek() { + Some('|') => { + chars.next(); + + let tail = parse_cmd( chars ) ?; + match tail { + Some(Command::ShortCircuitDisjunction(mut s)) => { + s.insert(0, head); + Ok(Some(Command::ShortCircuitDisjunction(s))) + } + Some(tail) => { + Ok(Some(Command::ShortCircuitDisjunction(vec![ head, tail ]))) + } + None => { + Err(LexError::UnexpectedEnd(vec![Some('|')])) + } + } + } + _ => { + let tail = parse_cmd( chars ) ?; + match tail { + Some(Command::Pipeline(mut s)) => { + s.insert(0, head); + Ok(Some(Command::Pipeline(s))) + } + Some(tail) => { + Ok(Some(Command::Pipeline(vec![ head, tail ]))) + } + None => { + Err(LexError::UnexpectedEnd(vec![])) + } + } + } + } + } + Some('&') => { + chars.next(); + match chars.peek() { + Some('&') => { + chars.next(); + + let tail = parse_cmd( chars ) ?; + match tail { + Some(Command::ShortCircuitConjunction(mut s)) => { + s.insert(0, head); + Ok(Some(Command::ShortCircuitConjunction(s))) + } + Some(tail) => { + Ok(Some(Command::ShortCircuitConjunction(vec![ head, tail ]))) + } + None => { + Err(LexError::UnexpectedEnd(vec![Some('&'), Some('&')])) + } + } + } + Some(c) => { + Err(LexError::UnexpectedToken(*c)) + } + None => { + // todo: + // background job + Ok(Some(head)) + } + } + } + Some(c) => { + Err(LexError::UnexpectedToken(*c)) + } + None => { + Ok(Some(head)) + } + } + } else { + Ok(None) + } + } + } +} + +impl<'a, It> Iterator for WordLexer<'a, It> +where It: 'a + Iterator { type Item = Result; fn next(&mut self) -> Option> { + skip_whitespace(self.chars); match self.chars.peek().cloned() { + Some('|') => { None } + Some('&') => { None } + Some(';') => { None } Some('~') => { self.chars.next(); - match self.collect_until(' ') { - Ok(s) => Some(Ok(WordSegment::Tilde(s))), + let user = DelimIter::new_whitespace(self.chars).collect(); + match user { + Ok(user) => Some(Ok(WordSegment::Tilde(user))), Err(e) => Some(Err(e)) } } - Some('"') => { - self.chars.next(); - match self.collect_until('"') { - Ok(s) => { - self.chars.next(); - - let word = Word { - segments: WordLexer { chars: s.chars().peekable() } - .scan((), |_, x| x.ok()) - .collect::>() - }; - - Some(Ok(WordSegment::DoubleQuote(word))) - }, - Err(e) => Some(Err(e)) - } - }, - Some('\'') => { - self.chars.next(); - match self.collect_until('\'') { - Ok(s) => { - self.chars.next(); - Some(Ok(WordSegment::Literal(s))) - }, - Err(e) => Some(Err(e)) - } - }, + Some('"') => { Some(parse_doublequoted(self.chars)) }, + Some('\'') => { Some(parse_quoted(self.chars)) }, Some('$') => { self.chars.next(); match self.chars.peek() { Some('{') => { self.chars.next(); - match self.collect_until('}') { + match DelimIter::new(&mut self.chars, vec![(Some('}'), true)]).try_collect::() { Ok(s) => { - self.chars.next(); - Some(Ok(WordSegment::Variable(s))) + Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal))) } Err(e) => Some(Err(e)) } } + Some('(') => { + self.chars.next(); + let subcmd_str = DelimIter::new(&mut self.chars, vec![(Some(')'), true)]).try_collect::(); + match subcmd_str { + Ok(subcmd_str) => { + match parse_cmd(&mut subcmd_str.chars().peekable()) { + Ok(Some(subcmd)) => { + Some(Ok(WordSegment::Subshell(subcmd))) + } + Ok(None) => None, + Err(err) => Some(Err(err)) + } + } + Err(err) => Some(Err(err)) + } + } _ => { - match self.collect_until(' ') { + match DelimIter::new_whitespace(self.chars).collect() { Ok(s) => { - Some(Ok(WordSegment::Variable(s))) + Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal))) } Err(e) => Some(Err(e)) } @@ -107,19 +370,11 @@ where It: Iterator { } } Some(c) => { - while let Some(c) = self.chars.peek() { - if c.is_whitespace() { - self.chars.next(); - } else { - return match self.collect_until(' ') { - Ok(s) => { - Some(Ok(WordSegment::Literal(s))) - } - Err(e) => Some(Err(e)) - }; - } + let s : Result = DelimIter::new_shell_word(self.chars).collect(); + match s { + Ok(s) => Some(Ok(WordSegment::Literal(s))), + Err(e) => Some(Err(e)) } - None } None => { None @@ -128,3 +383,29 @@ where It: Iterator { } } + +mod test { + use crate::parse::*; + + #[test] + fn test_delim_iter() { + let mut cs = "test 1234".chars().peekable(); + let mut lexer = DelimIter::new_shell_word(&mut cs); + assert_eq!(lexer.try_collect::(), Ok(String::from("test"))); + } + + #[test] + fn test_word_lexer() { + let mut cs = "test 1234|test".chars().peekable(); + + { + let mut lexer = WordLexer{ chars: &mut cs }; + assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test"))))); + assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("1234"))))); + assert_eq!(lexer.next(), None); + } + + assert_eq!(cs.next(), Some('|')); + } +} +