diff --git a/src/cmd_type_dsl/annotation.rs b/src/cmd_type_dsl/annotation.rs new file mode 100644 index 0000000..24c6467 --- /dev/null +++ b/src/cmd_type_dsl/annotation.rs @@ -0,0 +1,68 @@ + + + pub enum AnnotationContext { + Cached( Vec<(CommandPattern, CommandTypeStatement)> ), + Load( String ), + FindIn( String ), + } + impl AnnotationContext { + /* loads & parses any given context + */ + /* + pub fn into_cached(self) -> AnnotationContext { + match self { + AnnotationContext::Load( path ) => { + + } + } + } +*/ + pub fn get_type(&self, cmd: &Command) -> Result<CommandType, UnificationError> { + match cmd { + Command::Simple{ assignments, command_word, redirections } => { + match self { + AnnotationContext::Cached( annotations ) => { + // find matching command pattern... + for (cmd_pat, typ) in annotations.iter() { + if let Ok(unificator) = cmd_pat.match_cmd(cmd) { + return Ok( typ.substitute(unificator).eval() ); + } + } + + Err(UnificationError::NoPattern) + }, + + AnnotationContext::Load( path ) => { + /* todo: + * - open file at `path` + * - parse CommandPattern + CommandTypeStatement + * - get_type on AnnotationContext::Cached() + */ + + } + AnnotationContext::FindIn( path ) => { + // if let Some(command_name) = command_word.segments.get(0) { + /* todo: + * - use command_name to lookup file + * - forward to AnnotationContext::Load() + */ +/* + let mut err = UnificationError( vec![] ); + for file in path.direntries { + if let Ok(typ) = AnnotationContext::Load( path ).get_type() => { + + } + } +*/ + // } + } + } + } + + _ => { + Err(UnificationError::NoPattern) + } + } + } + } + diff --git a/src/cmd_type_dsl/mod.rs b/src/cmd_type_dsl/mod.rs new file mode 100644 index 0000000..1b55ab2 --- /dev/null +++ b/src/cmd_type_dsl/mod.rs @@ -0,0 +1,121 @@ +use std::{ + collections::HashMap, + boxed::Box +}; + +use crate::sh::ast::Command; +use laddertypes::*; + +pub struct Substitution(HashMap< String, CommandTypeExpr >); + +#[derive(Clone)] +pub enum CommandArgPattern { + Literal(String), + Variable(String), + VariablePack(Box<CommandArgPattern>), + Optional(Box<CommandArgPattern>), + Conjunction(Vec<CommandArgPattern>), + Disjunction(Vec<CommandArgPattern>) +} + +#[derive(Clone)] +pub struct CommandPattern { + name: String, + args: Vec<CommandArgPattern>, + env: Vec<(String, CommandTypeExpr)>, +} + +#[derive(Clone)] +pub struct MatchCandidate { + at: usize, + expected: CommandPattern, + found: CommandTypeExpr, +} + +#[derive(Clone)] +pub struct UnificationError( Vec<MatchCandidate> ); + +#[derive(Clone)] +pub enum CommandTypeExpr { + Type(TypeTerm), + Match(Box<CommandTypeExpr>, Vec<(CommandArgPattern, CommandTypeExpr)>) +} + +impl CommandArgPattern { + pub fn match_cmd(&self, cmd: &Command) -> Result<Substitution, UnificationError> { + Err(UnificationError(vec![])) + } +} + +impl CommandTypeExpr { + pub fn eval(self) -> Result<TypeTerm, CommandTypeExpr> { + match self { + CommandTypeExpr::Type(typ) => Ok(typ), + CommandTypeExpr::Match(pattern, cases) => { + + } + s=> Ok(s) + } + } + + pub fn apply_subst(&mut self, subst: &Substitution) { + match self { + CommandTypeExpr::Type(typ) => { + self = CommandTypeExpr::Type( + typ.apply_substitution(|v: String| subst.get(v)) + ); + } + CommandTypeExpr::Match( pattern, cases ) => { + + // todo + } + _ => {} + } + } +} + +pub struct FileDescriptor(u32); +pub enum PipeDirection { In, Out } + +pub enum Selector { + Pipe(FileDescriptor, PipeDirection), + Parameter(String), + ParameterPack(String), + File(String) +} + +pub enum CommandTypeStatement { + TypAssign(Selector, TypeTerm), + ValAssign(String, CommandTypeExpr), + Block(Vec<CommandTypeStatement>), + Match(Box<CommandTypeExpr>, Vec<(CommandArgPattern, CommandTypeStatement)>) +} + +pub struct CommandType(Vec<(Selector, TypeTerm)>); + +impl CommandTypeStatement { + pub fn eval(self) -> CommandType { + match self { + CommandTypeStatement::Block(stmnts) => { + CommandType( stmnts.into_iter().map(|stmnt| stmnt.eval().0.into_iter()).flatten().collect() ) + } + CommandTypeStatement::TypAssign(selector, typ) => { + CommandType( vec![ (selector, typ) ]) + } + CommandTypeStatement::ValAssign(variable, expr) => { + CommandType(vec![]) + } + CommandTypeStatement::Match(pattern, cases) => { + /* + for (case,stmnt) in cases.into_iter() { + if let Ok(unificator) = pattern + if let Ok() = case.match_expr() + CommandType( vec![] ) + } + */ + CommandType(vec![]) + } + } + } +} + diff --git a/src/main.rs b/src/main.rs index 5533bc1..9b45a18 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,14 @@ +#![feature(iterator_try_collect)] + use { laddertypes::*, std::io::BufRead, tiny_ansi::TinyAnsi }; +mod sh; +mod cmd_type_dsl; + //<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ pub fn get_type_str(cmd: &str, item: &str) -> Option<String> { @@ -31,7 +36,23 @@ fn main() { let mut dict = TypeDict::new(); let stdin = std::io::stdin(); - for pipeline in std::io::BufReader::new(stdin).lines() { + for line in std::io::BufReader::new(stdin).lines() { + if let Ok(line) = line { + let cmd = sh::parse::parse_cmd( &mut line.chars().peekable(), 0 ); + eprintln!("parsed cmd: {:?}", cmd); + /* + let mut lex = parse::WordLexer::from( line.chars() ); + for word in lex { + eprintln!("word-segment: {:?}", word); + } + */ + } + } + + return; + + let stdin = std::io::stdin(); + for pipeline in std::io::BufReader::new(stdin).lines() { let mut last_cmd = String::new(); let mut last_stdout_type : Option<TypeTerm> = None; diff --git a/src/sh/ast.rs b/src/sh/ast.rs new file mode 100644 index 0000000..87e7aae --- /dev/null +++ b/src/sh/ast.rs @@ -0,0 +1,115 @@ +use std::boxed::Box; + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ + +#[derive(Debug, PartialEq)] +pub enum Command { + Simple { + assignments: Vec<Assignment>, + command_word: Word, + redirections: Vec<Redirection> + }, + Pipeline(Vec<Command>), + Sequence(Vec<Command>), + ShortCircuitConjunction(Vec<Command>), + ShortCircuitDisjunction(Vec<Command>), + Negation(Box<Command>), + While { + condition: Box<Command>, + loop_body: Box<Command> + }, + For { + varname: String, + sequence: Word, + loop_body: Box<Command> + }, + If { + condition: Box<Command>, + then_branch: Box<Command>, + else_branch: Box<Command> + }, + Case { + expr: Word, + cases: Vec<(Word, Command)> + }, + Function { + name: String, + body: Box<Command> + } +} + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ + +#[derive(Debug, PartialEq)] +pub struct Assignment { + pub name: String, + pub value: Word +} + +#[derive(Debug, PartialEq)] +pub struct Word { + pub segments: Vec<WordSegment> +} + +#[derive(Debug, PartialEq)] +pub enum WordSegment { + Tilde(String), + Literal(String), + Parameter(String, ParameterFormat), + Subshell(Command), + DoubleQuote(Word), +} + +#[derive(Debug, PartialEq)] +pub enum ParameterFormat { + Normal, + Length, + Default(Word), + Assign(Word), + Error(Word), + Alt(Word), + Sub(ParamSubSide, ParamSubMode, Word), +} + +#[derive(Debug, PartialEq)] +pub enum ParamSubMode { + Shortest, Longest +} + +#[derive(Debug, PartialEq)] +pub enum ParamSubSide { + Prefix, Suffix +} + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ + +#[derive(Debug, PartialEq)] +pub struct Redirection { + redirection_type: RedirectionType, + fd: u64, + target: Word +} + +#[derive(Debug, PartialEq)] +pub enum RedirectionType { + File(FileRedirectionType), + Dup(DupRedirectionType), + Heredoc // '<<' +} + +#[derive(Debug, PartialEq)] +pub enum FileRedirectionType { + In, // '<' + InOut, // '<>' + Out, // '>' + OutReplace, // '>|' + OutAppend, // '>>' +} + +#[derive(Debug, PartialEq)] +pub enum DupRedirectionType { + In, // '<&' + Out // '>&' +} + +//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ diff --git a/src/sh/mod.rs b/src/sh/mod.rs new file mode 100644 index 0000000..0c08c79 --- /dev/null +++ b/src/sh/mod.rs @@ -0,0 +1,7 @@ + +pub mod ast; +pub mod parse; + + +pub use ast::*; +pub use parse::*; diff --git a/src/sh/mod.rs~ b/src/sh/mod.rs~ new file mode 100644 index 0000000..cf2b51c --- /dev/null +++ b/src/sh/mod.rs~ @@ -0,0 +1,3 @@ + +pub mod ast; +pub mod parse; diff --git a/src/sh/parse.rs b/src/sh/parse.rs new file mode 100644 index 0000000..3a30250 --- /dev/null +++ b/src/sh/parse.rs @@ -0,0 +1,472 @@ +use { + crate::sh::ast::*, + std::iter::{Peekable}, +}; + + +#[derive(Debug, PartialEq)] +pub enum LexError { + UnexpectedEnd(Vec<Option<char>>), + UnexpectedToken(char), + InvalidFileRedirectionType +} + + +///! iterates chars until it finds some char in `delim` +pub struct DelimIter<'a, It> +where It: Iterator<Item = char> { + chars: &'a mut Peekable<It>, + delim: Vec<(Option<char>, bool)> +} + +impl<'a, It> DelimIter<'a, It> +where It: Iterator<Item = char> { + fn new(chars: &'a mut Peekable<It>, delim: Vec<(Option<char>, bool)>) -> Self { + DelimIter { chars, delim } + } + + fn new_whitespace(chars: &'a mut Peekable<It>) -> Self { + DelimIter::new(chars, vec![ + (None, true), + (Some(' '), true), + (Some('\t'), true), + (Some('\n'), true), + (Some(')'), false), + ]) + } + + fn new_shell_word(chars: &'a mut Peekable<It>) -> Self { + DelimIter::new(chars, vec![ + (None, true), + (Some(' '), true), + (Some('\t'), true), + (Some('\n'), true), + (Some('|'), false), + (Some('&'), false), + (Some(';'), false), + (Some(')'), false), + (Some('$'), false), + (Some('\"'), false), + (Some('\''), false) + ]) + } + + fn new_shell_word_or_assignment(chars: &'a mut Peekable<It>) -> Self { + DelimIter::new(chars, vec![ + (None, true), + (Some(' '), true), + (Some('\t'), true), + (Some('\n'), true), + (Some('='), false), + (Some('|'), false), + (Some('&'), false), + (Some(';'), false), + (Some(')'), false), + (Some('\"'), false), + (Some('\''), false) + ]) + } +} + +impl<'a, It> Iterator for DelimIter<'a, It> +where It: 'a + Iterator<Item = char> { + type Item = Result<char, LexError>; + + fn next(&mut self) -> Option<Result<char, LexError>> { + for (delim, consume) in self.delim.iter() { + if self.chars.peek().cloned() == *delim { + if *consume { + self.chars.next(); + } + return None; + } + } + + match self.chars.next() { + Some(c) => Some(Ok(c)), + None => Some(Err(LexError::UnexpectedEnd(vec![]))) + } + } +} + +pub struct WordLexer<'a, It> +where It: 'a + Iterator<Item = char> { + chars: &'a mut Peekable<It> +} + +pub struct SubstLexer<'a, It> +where It: 'a + Iterator<Item = char> { + chars: &'a mut Peekable<It>, + depth: usize +} + +pub fn skip_whitespace<It>(chars: &mut Peekable<It>) +where It: Iterator<Item = char> +{ + while let Some(c) = chars.peek() { + if c.is_whitespace() { + chars.next(); + } else { + break; + } + } +} + +pub fn parse_quoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError> +where It: Iterator<Item = char> +{ + assert_eq!( chars.next(), Some('\'')); + let quoted = DelimIter::new(chars, vec![(Some('\''), true)]).try_collect::<String>(); + match quoted { + Ok(s) => { + Ok(WordSegment::Literal(s)) + }, + Err(e) => Err(e) + } +} + +pub fn parse_doublequoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError> +where It: Iterator<Item = char> +{ + assert_eq!( chars.next(), Some('\"')); + + // todo: allow escaped \" + let quoted = DelimIter::new(chars, vec![(Some('\"'), true)]).try_collect::<String>(); + + match quoted { + Ok(s) => { + let word = Word { + segments: SubstLexer { chars: &mut s.chars().peekable(), depth: 1 } + .try_collect::<Vec<_>>()? + }; + + Ok(WordSegment::DoubleQuote(word)) + }, + Err(e) => Err(e) + } +} + +pub fn parse_word<It>(chars: &mut Peekable<It>) -> Result<Word, LexError> +where It: Iterator<Item = char> +{ + Ok(Word { + segments: WordLexer{ chars }.try_collect::<Vec<_>>()? + }) +} + +impl std::str::FromStr for FileRedirectionType { + type Err = LexError; + + fn from_str(s: &str) -> Result<FileRedirectionType, LexError> { + match s { + "<" => Ok(FileRedirectionType::In), + "<>" => Ok(FileRedirectionType::InOut), + ">" => Ok(FileRedirectionType::Out), + ">|" => Ok(FileRedirectionType::OutReplace), + ">>" => Ok(FileRedirectionType::OutAppend), + _ => Err(LexError::InvalidFileRedirectionType) + } + } +} + +pub fn parse_redirection<It>(chars: &mut Peekable<It>) -> Result<Redirection, LexError> +where It: Iterator<Item = char> +{ + Err(LexError::InvalidFileRedirectionType) + // let name = DelimIterator::new(chars, vec!['<', '>']).collect::<String>(); +} + +pub fn parse_simple_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError> +where It: Iterator<Item = char> +{ + let mut assignments = Vec::new(); + let mut redirections = Vec::new(); + + if chars.peek() == None { + return Ok(None); + } + + loop { + skip_whitespace(chars); + let mut name = DelimIter::new_shell_word_or_assignment(chars).try_collect::<String>()?; + + match chars.peek().clone() { + Some('=') => { + chars.next(); + let mut lex = WordLexer{ chars }; + match lex.next() { + Some(Ok(value)) => { + assignments.push(Assignment { name, value: Word{ segments: vec![ value ] } }); + }, + Some(Err(e)) => { + return Err(e); + }, + None => { + return Err(LexError::UnexpectedEnd(vec![])); + } + } + } + _ => { + let mut cmd_segments = WordLexer{ chars }.try_collect::<Vec<_>>()?; + cmd_segments.insert(0, WordSegment::Literal(name)); + + return Ok(Some(Command::Simple { + assignments, + command_word: Word { segments: cmd_segments }, + redirections, + })); + } + } + } +} + +pub fn parse_cmd<It>(chars: &mut Peekable<It>, depth: usize) -> Result<Option<Command>, LexError> +where It: Iterator<Item = char> +{ + skip_whitespace(chars); + match chars.peek() { + Some('!') => { + chars.next(); + if let Some(cmd) = parse_cmd(chars, depth)? { + Ok(Some(Command::Negation(Box::new(cmd)))) + } else { + Err(LexError::UnexpectedEnd(vec![])) + } + } + _ => { + if let Some(head) = parse_simple_cmd(chars)? { + skip_whitespace(chars); + + match chars.peek() { + Some(';') => { + chars.next(); + + let tail = parse_cmd( chars, depth ) ?; + match tail { + Some(Command::Sequence(mut s)) => { + s.insert(0, head); + Ok(Some(Command::Sequence(s))) + } + Some(tail) => Ok(Some(Command::Sequence(vec![ head, tail ]))), + None => Ok(Some(head)) + } + } + Some('|') => { + chars.next(); + match chars.peek() { + Some('|') => { + chars.next(); + + let tail = parse_cmd( chars, depth ) ?; + match tail { + Some(Command::ShortCircuitDisjunction(mut s)) => { + s.insert(0, head); + Ok(Some(Command::ShortCircuitDisjunction(s))) + } + Some(tail) => Ok(Some(Command::ShortCircuitDisjunction(vec![ head, tail ]))), + None => Err(LexError::UnexpectedEnd(vec![Some('|')])) + } + } + _ => { + let tail = parse_cmd( chars, depth ) ?; + match tail { + Some(Command::Pipeline(mut s)) => { + s.insert(0, head); + Ok(Some(Command::Pipeline(s))) + } + Some(tail) => Ok(Some(Command::Pipeline(vec![ head, tail ]))), + None => Err(LexError::UnexpectedEnd(vec![])) + } + } + } + } + Some('&') => { + chars.next(); + match chars.peek() { + Some('&') => { + chars.next(); + + let tail = parse_cmd( chars, depth ) ?; + match tail { + Some(Command::ShortCircuitConjunction(mut s)) => { + s.insert(0, head); + Ok(Some(Command::ShortCircuitConjunction(s))) + } + Some(tail) => Ok(Some(Command::ShortCircuitConjunction(vec![ head, tail ]))), + None => Err(LexError::UnexpectedEnd(vec![Some('&'), Some('&')])) + } + } + Some(c) => Err(LexError::UnexpectedToken(*c)), + None => { + // todo: + // background job + Ok(Some(head)) + } + } + } + Some(')') => { + eprintln!("got )"); + chars.next(); + if depth > 0 { + Ok(Some(head)) + } else { + Err(LexError::UnexpectedToken(')')) + } + } + Some(c) => Err(LexError::UnexpectedToken(*c)), + None => Ok(Some(head)) + } + } else { + Ok(None) + } + } + } +} +impl<'a, It> Iterator for SubstLexer<'a, It> +where It: 'a + Iterator<Item = char> { + type Item = Result<WordSegment, LexError>; + + fn next(&mut self) -> Option<Result<WordSegment, LexError>> { + match self.chars.peek().cloned() { + Some('$') => { + self.chars.next(); + match self.chars.peek() { + // curly-braced parameter e.g. `${PARAM}` + Some('{') => { + self.chars.next(); + match DelimIter::new(&mut self.chars, vec![(Some('}'), true)]).try_collect::<String>() { + Ok(s) => { + Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal))) + } + Err(e) => Some(Err(e)) + } + } + + // Subshell + Some('(') => { + self.chars.next(); +/* + let subcmd_str = DelimIter::new(&mut self.chars, vec![(Some(')'), true)]).try_collect::<String>(); + match subcmd_str { + Ok(subcmd_str) => { + */ + match parse_cmd(&mut self.chars, 1) { + Ok(Some(subcmd)) => { + Some(Ok(WordSegment::Subshell(subcmd))) + } + Ok(None) => None, + Err(err) => Some(Err(err)) + } + /* + } + Err(err) => Some(Err(err)) + } + */ + } + + // plain parameter name e.g. `$PARAM` + _ => { + match DelimIter::new_whitespace(self.chars).collect() { + Ok(s) => { + Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal))) + } + Err(e) => Some(Err(e)) + } + } + } + } + + // not a substitution, + // take as literal until next $. + _ => { + let lit_str = DelimIter::new( + &mut self.chars, + vec![ + (None, true), + (Some('$'), false) + ] + ).try_collect::<String>(); + + match lit_str { + Ok(s) => { + if s.len() > 0 { + Some(Ok(WordSegment::Literal(s))) + } else { + None + } + } + Err(e) => Some(Err(e)) + } + } + } + } +} + +impl<'a, It> Iterator for WordLexer<'a, It> +where It: 'a + Iterator<Item = char> { + type Item = Result<WordSegment, LexError>; + + fn next(&mut self) -> Option<Result<WordSegment, LexError>> { + skip_whitespace(self.chars); + match self.chars.peek().cloned() { + Some('|') => { None } + Some('&') => { None } + Some(';') => { None } + Some('~') => { + self.chars.next(); + let user = DelimIter::new_whitespace(self.chars).collect(); + match user { + Ok(user) => Some(Ok(WordSegment::Tilde(user))), + Err(e) => Some(Err(e)) + } + } + Some('"') => { Some(parse_doublequoted(self.chars)) }, + Some('\'') => { Some(parse_quoted(self.chars)) }, + Some('$') => { + SubstLexer{ chars: &mut self.chars, depth: 1 }.next() + } + Some(')') => { None } + Some(c) => { + let s : Result<String, LexError> = DelimIter::new_shell_word(self.chars).collect(); + match s { + Ok(s) => Some(Ok(WordSegment::Literal(s))), + Err(e) => Some(Err(e)) + } + } + None => { + None + } + } + } +} + + +mod test { + use crate::sh::parse::*; + + #[test] + fn test_delim_iter() { + let mut cs = "test 1234".chars().peekable(); + let mut lexer = DelimIter::new_shell_word(&mut cs); + assert_eq!(lexer.try_collect::<String>(), Ok(String::from("test"))); + } + + #[test] + fn test_word_lexer() { + let mut cs = "test 1234|test".chars().peekable(); + + { + let mut lexer = WordLexer{ chars: &mut cs }; + assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test"))))); + assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("1234"))))); + assert_eq!(lexer.next(), None); + } + assert_eq!(cs.next(), Some('|')); + { + let mut lexer = WordLexer{ chars: &mut cs }; + assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test"))))); + assert_eq!(lexer.next(), None); + } + } +} + diff --git a/src/sh/parse.rs~ b/src/sh/parse.rs~ new file mode 100644 index 0000000..b4db237 --- /dev/null +++ b/src/sh/parse.rs~ @@ -0,0 +1,438 @@ +use { + crate::ast::*, + std::iter::{Peekable}, +}; + + +#[derive(Debug, PartialEq)] +pub enum LexError { + UnexpectedEnd(Vec<Option<char>>), + UnexpectedToken(char), + InvalidFileRedirectionType +} + + +///! iterates chars until it finds some char in `delim` +pub struct DelimIter<'a, It> +where It: Iterator<Item = char> { + chars: &'a mut Peekable<It>, + delim: Vec<(Option<char>, bool)> +} + +impl<'a, It> DelimIter<'a, It> +where It: Iterator<Item = char> { + fn new(chars: &'a mut Peekable<It>, delim: Vec<(Option<char>, bool)>) -> Self { + DelimIter { chars, delim } + } + + fn new_whitespace(chars: &'a mut Peekable<It>) -> Self { + DelimIter::new(chars, vec![ + (None, true), + (Some(' '), true), + (Some('\t'), true), + (Some('\n'), true) + ]) + } + + fn new_shell_word(chars: &'a mut Peekable<It>) -> Self { + DelimIter::new(chars, vec![ + (None, true), + (Some(' '), true), + (Some('\t'), true), + (Some('\n'), true), + (Some('|'), false), + (Some('&'), false), + (Some(';'), false), + (Some('\"'), false), + (Some('\''), false) + ]) + } + + fn new_shell_word_or_assignment(chars: &'a mut Peekable<It>) -> Self { + DelimIter::new(chars, vec![ + (None, true), + (Some(' '), true), + (Some('\t'), true), + (Some('\n'), true), + (Some('='), false), + (Some('|'), false), + (Some('&'), false), + (Some(';'), false), + (Some('\"'), false), + (Some('\''), false) + ]) + } +} + +impl<'a, It> Iterator for DelimIter<'a, It> +where It: 'a + Iterator<Item = char> { + type Item = Result<char, LexError>; + + fn next(&mut self) -> Option<Result<char, LexError>> { + for (delim, consume) in self.delim.iter() { + if self.chars.peek().cloned() == *delim { + if *consume { + self.chars.next(); + } + return None; + } + } + + match self.chars.next() { + Some(c) => Some(Ok(c)), + None => Some(Err(LexError::UnexpectedEnd(vec![]))) + } + } +} + + +pub struct WordLexer<'a, It> +where It: 'a + Iterator<Item = char> { + chars: &'a mut Peekable<It> +} + +impl<'a, It> WordLexer<'a, It> +where It: Iterator<Item = char> { + fn collect_until(&mut self, close: Option<char>) -> Result<String, LexError> { + DelimIter::new(&mut self.chars, vec![(close, true)]) + .try_collect::<String>() + } +} + +pub fn skip_whitespace<It>(chars: &mut Peekable<It>) +where It: Iterator<Item = char> +{ + while let Some(c) = chars.peek() { + if c.is_whitespace() { + chars.next(); + } else { + break; + } + } +} + +pub fn parse_quoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError> +where It: Iterator<Item = char> +{ + assert_eq!( chars.next(), Some('\'')); + let quoted = DelimIter::new(chars, vec![(Some('\''), true)]).try_collect::<String>(); + match quoted { + Ok(s) => { + Ok(WordSegment::Literal(s)) + }, + Err(e) => Err(e) + } +} + +pub fn parse_doublequoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError> +where It: Iterator<Item = char> +{ + assert_eq!( chars.next(), Some('\"')); + let quoted = DelimIter::new(chars, vec![(Some('\"'), true)]).try_collect::<String>(); + match quoted { + Ok(s) => { + let word = Word { + segments: // fixme: handle spaces correctly -> create QuoteLexer + WordLexer { chars: &mut s.chars().peekable() } + .scan((), |_, x| x.ok()) + .collect::<Vec<_>>() + }; + + Ok(WordSegment::DoubleQuote(word)) + }, + Err(e) => Err(e) + } +} + +pub fn parse_word<It>(chars: &mut Peekable<It>) -> Result<Word, LexError> +where It: Iterator<Item = char> +{ + Ok(Word { + segments: WordLexer{ chars }.try_collect::<Vec<_>>()? + }) +} + +impl std::str::FromStr for FileRedirectionType { + type Err = LexError; + + fn from_str(s: &str) -> Result<FileRedirectionType, LexError> { + match s { + "<" => Ok(FileRedirectionType::In), + "<>" => Ok(FileRedirectionType::InOut), + ">" => Ok(FileRedirectionType::Out), + ">|" => Ok(FileRedirectionType::OutReplace), + ">>" => Ok(FileRedirectionType::OutAppend), + _ => Err(LexError::InvalidFileRedirectionType) + } + } +} + +pub fn parse_redirection<It>(chars: &mut Peekable<It>) -> Result<Redirection, LexError> +where It: Iterator<Item = char> +{ + Err(LexError::InvalidFileRedirectionType) + // let name = DelimIterator::new(chars, vec!['<', '>']).collect::<String>(); +} + +pub fn parse_simple_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError> +where It: Iterator<Item = char> +{ + let mut assignments = Vec::new(); + let mut redirections = Vec::new(); + + if chars.peek() == None { + return Ok(None); + } + + loop { + skip_whitespace(chars); + let mut name = DelimIter::new_shell_word_or_assignment(chars).try_collect::<String>()?; + + match chars.peek().clone() { + Some('=') => { + chars.next(); + let mut lex = WordLexer{ chars }; + match lex.next() { + Some(Ok(value)) => { + assignments.push(Assignment { name, value: Word{ segments: vec![ value ] } }); + }, + Some(Err(e)) => { + return Err(e); + }, + None => { + return Err(LexError::UnexpectedEnd(vec![])); + } + } + } + _ => { + let mut cmd_segments = WordLexer{ chars }.try_collect::<Vec<_>>()?; + cmd_segments.insert(0, WordSegment::Literal(name)); + + return Ok(Some(Command::Simple { + assignments, + command_word: Word { segments: cmd_segments }, + redirections, + })); + } + } + } +} + +pub fn parse_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError> +where It: Iterator<Item = char> +{ + skip_whitespace(chars); + match chars.peek() { + Some('!') => { + chars.next(); + if let Some(cmd) = parse_cmd(chars)? { + Ok(Some(Command::Negation(Box::new(cmd)))) + } else { + Err(LexError::UnexpectedEnd(vec![])) + } + } + _ => { + if let Some(head) = parse_simple_cmd(chars)? { + skip_whitespace(chars); + + match chars.peek() { + Some(';') => { + chars.next(); + + let tail = parse_cmd( chars ) ?; + match tail { + Some(Command::Sequence(mut s)) => { + s.insert(0, head); + Ok(Some(Command::Sequence(s))) + } + Some(tail) => { + Ok(Some(Command::Sequence(vec![ head, tail ]))) + } + None => { + Ok(Some(head)) + } + } + } + Some('|') => { + chars.next(); + match chars.peek() { + Some('|') => { + chars.next(); + + let tail = parse_cmd( chars ) ?; + match tail { + Some(Command::ShortCircuitDisjunction(mut s)) => { + s.insert(0, head); + Ok(Some(Command::ShortCircuitDisjunction(s))) + } + Some(tail) => { + Ok(Some(Command::ShortCircuitDisjunction(vec![ head, tail ]))) + } + None => { + Err(LexError::UnexpectedEnd(vec![Some('|')])) + } + } + } + _ => { + let tail = parse_cmd( chars ) ?; + match tail { + Some(Command::Pipeline(mut s)) => { + s.insert(0, head); + Ok(Some(Command::Pipeline(s))) + } + Some(tail) => { + Ok(Some(Command::Pipeline(vec![ head, tail ]))) + } + None => { + Err(LexError::UnexpectedEnd(vec![])) + } + } + } + } + } + Some('&') => { + chars.next(); + match chars.peek() { + Some('&') => { + chars.next(); + + let tail = parse_cmd( chars ) ?; + match tail { + Some(Command::ShortCircuitConjunction(mut s)) => { + s.insert(0, head); + Ok(Some(Command::ShortCircuitConjunction(s))) + } + Some(tail) => { + Ok(Some(Command::ShortCircuitConjunction(vec![ head, tail ]))) + } + None => { + Err(LexError::UnexpectedEnd(vec![Some('&'), Some('&')])) + } + } + } + Some(c) => { + Err(LexError::UnexpectedToken(*c)) + } + None => { + // todo: + // background job + Ok(Some(head)) + } + } + } + Some(c) => { + Err(LexError::UnexpectedToken(*c)) + } + None => { + Ok(Some(head)) + } + } + } else { + Ok(None) + } + } + } +} + +impl<'a, It> Iterator for WordLexer<'a, It> +where It: 'a + Iterator<Item = char> { + type Item = Result<WordSegment, LexError>; + + fn next(&mut self) -> Option<Result<WordSegment, LexError>> { + skip_whitespace(self.chars); + match self.chars.peek().cloned() { + Some('|') => { None } + Some('&') => { None } + Some(';') => { None } + Some('~') => { + self.chars.next(); + let user = DelimIter::new_whitespace(self.chars).collect(); + match user { + Ok(user) => Some(Ok(WordSegment::Tilde(user))), + Err(e) => Some(Err(e)) + } + } + Some('"') => { Some(parse_doublequoted(self.chars)) }, + Some('\'') => { Some(parse_quoted(self.chars)) }, + Some('$') => { + self.chars.next(); + match self.chars.peek() { + Some('{') => { + self.chars.next(); + match DelimIter::new(&mut self.chars, vec![(Some('}'), true)]).try_collect::<String>() { + Ok(s) => { + Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal))) + } + Err(e) => Some(Err(e)) + } + } + Some('(') => { + self.chars.next(); + let subcmd_str = DelimIter::new(&mut self.chars, vec![(Some(')'), true)]).try_collect::<String>(); + match subcmd_str { + Ok(subcmd_str) => { + match parse_cmd(&mut subcmd_str.chars().peekable()) { + Ok(Some(subcmd)) => { + Some(Ok(WordSegment::Subshell(subcmd))) + } + Ok(None) => None, + Err(err) => Some(Err(err)) + } + } + Err(err) => Some(Err(err)) + } + } + _ => { + match DelimIter::new_whitespace(self.chars).collect() { + Ok(s) => { + Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal))) + } + Err(e) => Some(Err(e)) + } + } + } + } + Some(c) => { + let s : Result<String, LexError> = DelimIter::new_shell_word(self.chars).collect(); + match s { + Ok(s) => Some(Ok(WordSegment::Literal(s))), + Err(e) => Some(Err(e)) + } + } + None => { + None + } + } + } +} + + +mod test { + use crate::parse::*; + + #[test] + fn test_delim_iter() { + let mut cs = "test 1234".chars().peekable(); + let mut lexer = DelimIter::new_shell_word(&mut cs); + assert_eq!(lexer.try_collect::<String>(), Ok(String::from("test"))); + } + + #[test] + fn test_word_lexer() { + let mut cs = "test 1234|test".chars().peekable(); + + { + let mut lexer = WordLexer{ chars: &mut cs }; + assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test"))))); + assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("1234"))))); + assert_eq!(lexer.next(), None); + } + assert_eq!(cs.next(), Some('|')); + { + let mut lexer = WordLexer{ chars: &mut cs }; + assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test"))))); + assert_eq!(lexer.next(), None); + } + } +} + diff --git a/typeDB/cut b/typeDB/cut new file mode 100644 index 0000000..e9443b2 --- /dev/null +++ b/typeDB/cut @@ -0,0 +1,65 @@ +::cut OPTION... [FILE]... { + + match OPTION... { + --help { <1 : Help~<Seq Char> ; } + --version { <1 : VersionInfo~<Seq Char> ; } + + * { + match OPTION... { + -f--fields LIST:<Seq ℕ>~CutFieldList~<Seq Char> { + + ROW-DELIM = + match OPTION... { + -z--zero-terminated { '\0' } + * { '\n' } + }; + + IN-COL-DELIM = + match OPTION... { + -d--delimiter DELIM:Char { DELIM } + * { '\t' } + }; + + OUT-COL-DELIM = + match OPTION... { + --output-delimiter STRING:<Seq Char> { STRING } + * { IN-COL-DELIM } + }; + + FILE : Path + ~<Seq PathSegment + ~<Seq Char>> + ~<SepSeq Char '/'> + ~<Seq Char> ; + + @FILE : <Seq <* < ITEM~<Seq Char> >... > + ~<Seq <Seq Char>> + ~<SepSeq Char IN-COL-DELIM> + ~<Seq Char>> + ~<SepSeq Char ROW-DELIM> + ~<Seq Char> ; + + >0 : <Seq <* <ITEM~<Seq Char>>... > + ~<Seq <Seq Char>> + ~<SepSeq Char IN-COL-DELIM> + ~<Seq Char>> + ~<SepSeq Char ROW-DELIM> + ~<Seq Char> ; + + # `[LIST]` means `select all fields contained in LIST from parameter pack` + <1 : <Seq <* <ITEM~<Seq Char>>[LIST]... > + ~<Seq <Seq Char>> + ~<SepSeq Char OUT-COL-DELIM> + ~<Seq Char>> + ~<SepSeq Char ROW-DELIM> + ~<Seq Char> ; + } + + * { + <1 : Nothing ; + <2 : MissingFields~ErrorMessage~<Seq Char> ; + } + } + } + } +} diff --git a/typeDB/date b/typeDB/date new file mode 100644 index 0000000..84edf45 --- /dev/null +++ b/typeDB/date @@ -0,0 +1,15 @@ +::date [OPTION]... [+FORMAT] { + match FORMAT { + "+%s" { + <1 : TimePoint + ~<TimeSince UnixEpoch> + ~<Duration Seconds> + ~ℕ + ~<PosInt 10 BigEndian> + ~<Seq <Digit 10>~Char> + } + * { + <1 : TimePoint ; + } + } +} diff --git a/typeDB/dirname b/typeDB/dirname new file mode 100644 index 0000000..c94d46a --- /dev/null +++ b/typeDB/dirname @@ -0,0 +1,56 @@ +::dirname [OPTION] NAME... { + + # dont expect any data on stdin + # + + >0 : None ; + + + # each (non-option) argument must be a path + # + + NAME : Path + ~<Seq PathSegment + ~<Seq Char>> + ~<SepSeq Char '/'> + ~<Seq Char> + ~UTF-8 + ~<Seq Byte> ; + + + matchopt OPTION { + + # help and version flags will bypass any data processing + # + + --help { <1 : Help~<Seq Char> ; } + --version { <1 : VersionInfo~<Seq Char> ; } + + + # in case neither --help nor --version is set + # ... + + * { + + # if zero-flag is set, use '\0' instead of '\n' + # as delimiter in output sequence + + DELIM = + match OPTION { + -z--zero { '\0' } + * { '\n' } + }; + + + # output a sequence of paths to stdout + # + + <1 : <Seq Path + ~<Seq PathSegment~<Seq Char>> + ~<SepSeq Char '/'> + ~<Seq Char>> + ~<SepSeq Char DELIM> + ~<Seq Char> ; + } + } +} diff --git a/typeDB/echo b/typeDB/echo new file mode 100644 index 0000000..6a6b9b6 --- /dev/null +++ b/typeDB/echo @@ -0,0 +1,16 @@ +::echo [OPTIONS]... [STRING]... { + >0 : None ; + + matchopt OPTION { + --help { <1 : Help~<Seq Char> ; } + --version { <1 : VersionInfo~<Seq Char> ; } + -n { + <1 : <* STRING~<Seq Char>... > + ~<Seq Char> ; + } + * { + <1 : <* STRING~<Seq Char>... '\n' > + ~<Seq Char> ; + } + } +} diff --git a/typeDB/seq b/typeDB/seq new file mode 100644 index 0000000..1119ab5 --- /dev/null +++ b/typeDB/seq @@ -0,0 +1,25 @@ +::seq [OPTION]... [FIRST [INCREMENT]] LAST { + >0 : None ; + + match OPTION... { + --help { <1 : Help~<Seq Char> ; } + --version { <1 : VersionInfo~<Seq Char> ; } + * { + NUM = match OPTION... { + -f--format FORMAT { ℝ~<Float 10 FORMAT>~<Seq Char> } + * { ℤ~<PosInt 10 BigEndian>~<Seq <Digit 10>~Char> } + }; + + SEP = match OPTION... { + -s--separator SEP { SEP } + * { '\n' } + }; + + $FIRST : NUM ; + $INCREMENT : NUM ; + $LAST : NUM ; + + <1 : <Seq NUM~<Seq Char>>~<SepSeq Char SEP>~<Seq Char> ; + } + } +} diff --git a/typeDB/xargs b/typeDB/xargs new file mode 100644 index 0000000..6380730 --- /dev/null +++ b/typeDB/xargs @@ -0,0 +1,38 @@ +::xargs [OPTION]... [CMD [INITIAL-ARGS]...] { + + DELIM = + match OPTION... { + -0--null { '\0' } + -d--delimiter D:Char { D } + }; + + match OPTION... { + -a--argfile PATH => { + # type of data read from file at `path` + + @PATH : ARGS~<Seq Char>... + ~<Seq <Seq Char>> + ~<SepSeq Char DELIM> + ~<Seq Char> ; + } + * { + # type of data read from stdin + + >0 : ARGS~<Seq Char>... + ~<Seq <Seq Char>> + ~<SepSeq Char DELIM> + ~<Seq Char> ; + } + } + + match OPTION... { + -I REPLACE-STR { + # TODO : how to handle replacement ? + <1 : cmdtype <1 { CMD { INITIAL-ARGS where REPLACE-STR -> ARGS... } } ; + } + * { + <1 : cmdtype <1 { CMD INITIAL-ARGS... ARGS... } ; + } + } + +}