basic parser

This commit is contained in:
Michael Sippel 2023-10-26 20:25:56 +02:00
parent 7988c8a2e1
commit 695cbb24f1
Signed by: senvas
GPG key ID: 060F22F65102F95C
3 changed files with 392 additions and 113 deletions

View file

@ -1,30 +1,32 @@
use std::boxed::Box;
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ //<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
#[derive(Debug)] #[derive(Debug, PartialEq)]
pub enum Command { pub enum Command {
Simple { Simple {
assignments: Vec<(String, Word)>, assignments: Vec<Assignment>,
command_word: Word, command_word: Word,
redirections: Vec<Redirection> redirections: Vec<Redirection>
}, },
Pipeline(Vec<Command>), Pipeline(Vec<Command>),
Sequence(Vec<Command>), Sequence(Vec<Command>),
ShortCircuitConjection(Vec<Command>), ShortCircuitConjunction(Vec<Command>),
ShortCircuitDisjunction(Vec<Command>), ShortCircuitDisjunction(Vec<Command>),
Negation(Command), Negation(Box<Command>),
While { While {
condition: Command, condition: Box<Command>,
loop_body: Command loop_body: Box<Command>
}, },
For { For {
varname: String, varname: String,
sequence: Word, sequence: Word,
loop_body: Command loop_body: Box<Command>
} },
If { If {
condition: Command, condition: Box<Command>,
then_branch: Command, then_branch: Box<Command>,
else_branch: Command else_branch: Box<Command>
}, },
Case { Case {
expr: Word, expr: Word,
@ -32,35 +34,25 @@ pub enum Command {
}, },
Function { Function {
name: String, name: String,
body: Command body: Box<Command>
} }
} }
/*
* We are all luminous beings.
* Why then, do we not appear before each
* other radiant in our illumination ?
*/
/*
* Bewteen the idea
* And the reality
* Between the motion
* And the act
* Falls the Shadow
* (T.S. Eliot)
*/
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ //<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
#[derive(Debug)] #[derive(Debug, PartialEq)]
pub struct Assignment {
pub name: String,
pub value: Word
}
#[derive(Debug, PartialEq)]
pub struct Word { pub struct Word {
pub segments: Vec<WordSegment> pub segments: Vec<WordSegment>
} }
#[derive(Debug)] #[derive(Debug, PartialEq)]
pub enum WordSegment { pub enum WordSegment {
FieldSeparator,
Tilde(String), Tilde(String),
Literal(String), Literal(String),
Parameter(String, ParameterFormat), Parameter(String, ParameterFormat),
@ -68,7 +60,7 @@ pub enum WordSegment {
DoubleQuote(Word), DoubleQuote(Word),
} }
#[derive(Debug)] #[derive(Debug, PartialEq)]
pub enum ParameterFormat { pub enum ParameterFormat {
Normal, Normal,
Length, Length,
@ -79,42 +71,42 @@ pub enum ParameterFormat {
Sub(ParamSubSide, ParamSubMode, Word), Sub(ParamSubSide, ParamSubMode, Word),
} }
#[derive(Debug)] #[derive(Debug, PartialEq)]
pub enum ParamSubMode { pub enum ParamSubMode {
Shortest, Longest Shortest, Longest
} }
#[derive(Debug)] #[derive(Debug, PartialEq)]
pub enum ParamSubSide { pub enum ParamSubSide {
Prefix, Suffix Prefix, Suffix
} }
//<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\ //<<<<>>>><<>><><<>><<<*>>><<>><><<>><<<<>>>>\\
#[derive(Debug)] #[derive(Debug, PartialEq)]
pub struct Redirection { pub struct Redirection {
redirection_type: RedirectionType, redirection_type: RedirectionType,
fd: u64, fd: u64,
target: Word target: Word
} }
#[derive(Debug)] #[derive(Debug, PartialEq)]
pub enum RedirectionType { pub enum RedirectionType {
File(FileRedirectionType), File(FileRedirectionType),
Dup(DupRedirectionType), Dup(DupRedirectionType),
Heredoc // '<<' Heredoc // '<<'
} }
#[derive(Debug)] #[derive(Debug, PartialEq)]
pub enum FileRedirectionType { pub enum FileRedirectionType {
In, // '<' In, // '<'
InOut, // '<>' InOut, // '<>'
Out, // '>' Out, // '>'
OutReplace, // '>|' OutReplace, // '>|'
OutAppend, // '>|' OutAppend, // '>>'
} }
#[derive(Debug)] #[derive(Debug, PartialEq)]
pub enum DupRedirectionType { pub enum DupRedirectionType {
In, // '<&' In, // '<&'
Out // '>&' Out // '>&'

View file

@ -1,3 +1,5 @@
#![feature(iterator_try_collect)]
use { use {
laddertypes::*, laddertypes::*,
std::io::BufRead, std::io::BufRead,
@ -38,10 +40,14 @@ fn main() {
let stdin = std::io::stdin(); let stdin = std::io::stdin();
for line in std::io::BufReader::new(stdin).lines() { for line in std::io::BufReader::new(stdin).lines() {
if let Ok(line) = line { if let Ok(line) = line {
let cmd = parse::parse_cmd( &mut line.chars().peekable() );
eprintln!("parsed cmd: {:?}", cmd);
/*
let mut lex = parse::WordLexer::from( line.chars() ); let mut lex = parse::WordLexer::from( line.chars() );
for word in lex { for word in lex {
eprintln!("word-segment: {:?}", word); eprintln!("word-segment: {:?}", word);
} }
*/
} }
} }

View file

@ -1,105 +1,368 @@
use { use {
crate::ast::*, crate::ast::*,
std::iter::{Peekable, FromIterator}, std::iter::{Peekable},
}; };
pub struct WordLexer<It>
where It: Iterator<Item = char> {
chars: Peekable<It>
}
impl<It> From<It> for WordLexer<It> #[derive(Debug, PartialEq)]
where It: Iterator<Item = char> {
fn from(iter: It) -> Self {
WordLexer {
chars: iter.into_iter().peekable()
}
}
}
#[derive(Debug)]
pub enum LexError { pub enum LexError {
UnexpectedEnd(char) UnexpectedEnd(Vec<Option<char>>),
UnexpectedToken(char),
InvalidFileRedirectionType
} }
impl<It> WordLexer<It>
///! iterates chars until it finds some char in `delim`
pub struct DelimIter<'a, It>
where It: Iterator<Item = char> { where It: Iterator<Item = char> {
fn collect_until(&mut self, close: char) -> Result<String, LexError> { chars: &'a mut Peekable<It>,
let mut val = String::new(); delim: Vec<(Option<char>, bool)>
while let Some(c) = self.chars.peek().cloned() {
if c == close {
return Ok(val)
} else {
self.chars.next();
val.push(c);
}
} }
if close.is_whitespace() { impl<'a, It> DelimIter<'a, It>
Ok(val)
} else {
Err(LexError::UnexpectedEnd(close))
}
}
}
impl<It> Iterator for WordLexer<It>
where It: Iterator<Item = char> { where It: Iterator<Item = char> {
type Item = Result<WordSegment, LexError>; fn new(chars: &'a mut Peekable<It>, delim: Vec<(Option<char>, bool)>) -> Self {
DelimIter { chars, delim }
}
fn next(&mut self) -> Option<Result<WordSegment, LexError>> { fn new_whitespace(chars: &'a mut Peekable<It>) -> Self {
match self.chars.peek().cloned() { DelimIter::new(chars, vec![
Some('~') => { (None, true),
self.chars.next(); (Some(' '), true),
match self.collect_until(' ') { (Some('\t'), true),
Ok(s) => Some(Ok(WordSegment::Tilde(s))), (Some('\n'), true)
Err(e) => Some(Err(e)) ])
}
fn new_shell_word(chars: &'a mut Peekable<It>) -> Self {
DelimIter::new(chars, vec![
(None, true),
(Some(' '), true),
(Some('\t'), true),
(Some('\n'), true),
(Some('|'), false),
(Some('&'), false),
(Some(';'), false),
(Some('\"'), false),
(Some('\''), false)
])
} }
} }
Some('"') => {
impl<'a, It> Iterator for DelimIter<'a, It>
where It: 'a + Iterator<Item = char> {
type Item = Result<char, LexError>;
fn next(&mut self) -> Option<Result<char, LexError>> {
for (delim, consume) in self.delim.iter() {
if self.chars.peek().cloned() == *delim {
if *consume {
self.chars.next(); self.chars.next();
match self.collect_until('"') { }
return None;
}
}
match self.chars.next() {
Some(c) => Some(Ok(c)),
None => Some(Err(LexError::UnexpectedEnd(vec![])))
}
}
}
pub struct WordLexer<'a, It>
where It: 'a + Iterator<Item = char> {
chars: &'a mut Peekable<It>
}
impl<'a, It> WordLexer<'a, It>
where It: Iterator<Item = char> {
fn collect_until(&mut self, close: Option<char>) -> Result<String, LexError> {
DelimIter::new(&mut self.chars, vec![(close, true)])
.try_collect::<String>()
}
}
pub fn skip_whitespace<It>(chars: &mut Peekable<It>)
where It: Iterator<Item = char>
{
while let Some(c) = chars.peek() {
if c.is_whitespace() {
chars.next();
} else {
break;
}
}
}
pub fn parse_quoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError>
where It: Iterator<Item = char>
{
assert_eq!( chars.next(), Some('\''));
let quoted = DelimIter::new(chars, vec![(Some('\''), true)]).try_collect::<String>();
match quoted {
Ok(s) => { Ok(s) => {
self.chars.next(); Ok(WordSegment::Literal(s))
},
Err(e) => Err(e)
}
}
pub fn parse_doublequoted<It>(chars: &mut Peekable<It>) -> Result<WordSegment, LexError>
where It: Iterator<Item = char>
{
assert_eq!( chars.next(), Some('\"'));
let quoted = DelimIter::new(chars, vec![(Some('\"'), true)]).try_collect::<String>();
match quoted {
Ok(s) => {
let word = Word { let word = Word {
segments: WordLexer { chars: s.chars().peekable() } segments: // fixme: handle spaces correctly -> create QuoteLexer
WordLexer { chars: &mut s.chars().peekable() }
.scan((), |_, x| x.ok()) .scan((), |_, x| x.ok())
.collect::<Vec<_>>() .collect::<Vec<_>>()
}; };
Some(Ok(WordSegment::DoubleQuote(word))) Ok(WordSegment::DoubleQuote(word))
}, },
Err(e) => Err(e)
}
}
pub fn parse_word<It>(chars: &mut Peekable<It>) -> Result<Word, LexError>
where It: Iterator<Item = char>
{
Ok(Word {
segments: WordLexer{ chars }.try_collect::<Vec<_>>()?
})
}
pub fn parse_assignment<It>(chars: &mut Peekable<It>) -> Result<Assignment, LexError>
where It: Iterator<Item = char>
{
let name = DelimIter::new(chars, vec![(Some('='), true)]).try_collect::<String>()?;
let value_str = DelimIter::new_whitespace(chars).try_collect::<String>()?;
let value = parse_word(&mut value_str.chars().peekable())?;
Ok(Assignment{ name, value })
}
impl std::str::FromStr for FileRedirectionType {
type Err = LexError;
fn from_str(s: &str) -> Result<FileRedirectionType, LexError> {
match s {
"<" => Ok(FileRedirectionType::In),
"<>" => Ok(FileRedirectionType::InOut),
">" => Ok(FileRedirectionType::Out),
">|" => Ok(FileRedirectionType::OutReplace),
">>" => Ok(FileRedirectionType::OutAppend),
_ => Err(LexError::InvalidFileRedirectionType)
}
}
}
pub fn parse_redirection<It>(chars: &mut Peekable<It>) -> Result<Redirection, LexError>
where It: Iterator<Item = char>
{
Err(LexError::InvalidFileRedirectionType)
// let name = DelimIterator::new(chars, vec!['<', '>']).collect::<String>();
}
pub fn parse_simple_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError>
where It: Iterator<Item = char>
{
let mut assignments = Vec::new();
let mut redirections = Vec::new();
if chars.peek() == None {
return Ok(None);
}
let mut first = DelimIter::new_shell_word(chars).try_collect::<String>()?;
while first.contains('=') {
assignments.push( parse_assignment(chars)? );
first = DelimIter::new_shell_word(chars).try_collect::<String>()?;
}
let mut cmd_segments = WordLexer{ chars }.try_collect::<Vec<_>>()?;
cmd_segments.insert(0, WordSegment::Literal(first));
Ok(Some(Command::Simple {
assignments,
command_word: Word { segments: cmd_segments },
redirections,
}))
}
pub fn parse_cmd<It>(chars: &mut Peekable<It>) -> Result<Option<Command>, LexError>
where It: Iterator<Item = char>
{
skip_whitespace(chars);
match chars.peek() {
Some('!') => {
chars.next();
if let Some(cmd) = parse_cmd(chars)? {
Ok(Some(Command::Negation(Box::new(cmd))))
} else {
Err(LexError::UnexpectedEnd(vec![]))
}
}
_ => {
if let Some(head) = parse_simple_cmd(chars)? {
skip_whitespace(chars);
match chars.peek() {
Some(';') => {
chars.next();
let tail = parse_cmd( chars ) ?;
match tail {
Some(Command::Sequence(mut s)) => {
s.insert(0, head);
Ok(Some(Command::Sequence(s)))
}
Some(tail) => {
Ok(Some(Command::Sequence(vec![ head, tail ])))
}
None => {
Ok(Some(head))
}
}
}
Some('|') => {
chars.next();
match chars.peek() {
Some('|') => {
chars.next();
let tail = parse_cmd( chars ) ?;
match tail {
Some(Command::ShortCircuitDisjunction(mut s)) => {
s.insert(0, head);
Ok(Some(Command::ShortCircuitDisjunction(s)))
}
Some(tail) => {
Ok(Some(Command::ShortCircuitDisjunction(vec![ head, tail ])))
}
None => {
Err(LexError::UnexpectedEnd(vec![Some('|')]))
}
}
}
_ => {
let tail = parse_cmd( chars ) ?;
match tail {
Some(Command::Pipeline(mut s)) => {
s.insert(0, head);
Ok(Some(Command::Pipeline(s)))
}
Some(tail) => {
Ok(Some(Command::Pipeline(vec![ head, tail ])))
}
None => {
Err(LexError::UnexpectedEnd(vec![]))
}
}
}
}
}
Some('&') => {
chars.next();
match chars.peek() {
Some('&') => {
chars.next();
let tail = parse_cmd( chars ) ?;
match tail {
Some(Command::ShortCircuitConjunction(mut s)) => {
s.insert(0, head);
Ok(Some(Command::ShortCircuitConjunction(s)))
}
Some(tail) => {
Ok(Some(Command::ShortCircuitConjunction(vec![ head, tail ])))
}
None => {
Err(LexError::UnexpectedEnd(vec![Some('&'), Some('&')]))
}
}
}
Some(c) => {
Err(LexError::UnexpectedToken(*c))
}
None => {
// todo:
// background job
Ok(Some(head))
}
}
}
Some(c) => {
Err(LexError::UnexpectedToken(*c))
}
None => {
Ok(Some(head))
}
}
} else {
Ok(None)
}
}
}
}
impl<'a, It> Iterator for WordLexer<'a, It>
where It: 'a + Iterator<Item = char> {
type Item = Result<WordSegment, LexError>;
fn next(&mut self) -> Option<Result<WordSegment, LexError>> {
skip_whitespace(self.chars);
match self.chars.peek().cloned() {
Some('|') => { None }
Some('&') => { None }
Some(';') => { None }
Some('~') => {
self.chars.next();
let user = DelimIter::new_whitespace(self.chars).collect();
match user {
Ok(user) => Some(Ok(WordSegment::Tilde(user))),
Err(e) => Some(Err(e)) Err(e) => Some(Err(e))
} }
},
Some('\'') => {
self.chars.next();
match self.collect_until('\'') {
Ok(s) => {
self.chars.next();
Some(Ok(WordSegment::Literal(s)))
},
Err(e) => Some(Err(e))
} }
}, Some('"') => { Some(parse_doublequoted(self.chars)) },
Some('\'') => { Some(parse_quoted(self.chars)) },
Some('$') => { Some('$') => {
self.chars.next(); self.chars.next();
match self.chars.peek() { match self.chars.peek() {
Some('{') => { Some('{') => {
self.chars.next(); self.chars.next();
match self.collect_until('}') { match DelimIter::new(&mut self.chars, vec![(Some('}'), true)]).try_collect::<String>() {
Ok(s) => { Ok(s) => {
self.chars.next(); Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal)))
Some(Ok(WordSegment::Variable(s)))
} }
Err(e) => Some(Err(e)) Err(e) => Some(Err(e))
} }
} }
Some('(') => {
self.chars.next();
let subcmd_str = DelimIter::new(&mut self.chars, vec![(Some(')'), true)]).try_collect::<String>();
match subcmd_str {
Ok(subcmd_str) => {
match parse_cmd(&mut subcmd_str.chars().peekable()) {
Ok(Some(subcmd)) => {
Some(Ok(WordSegment::Subshell(subcmd)))
}
Ok(None) => None,
Err(err) => Some(Err(err))
}
}
Err(err) => Some(Err(err))
}
}
_ => { _ => {
match self.collect_until(' ') { match DelimIter::new_whitespace(self.chars).collect() {
Ok(s) => { Ok(s) => {
Some(Ok(WordSegment::Variable(s))) Some(Ok(WordSegment::Parameter(s, ParameterFormat::Normal)))
} }
Err(e) => Some(Err(e)) Err(e) => Some(Err(e))
} }
@ -107,20 +370,12 @@ where It: Iterator<Item = char> {
} }
} }
Some(c) => { Some(c) => {
while let Some(c) = self.chars.peek() { let s : Result<String, LexError> = DelimIter::new_shell_word(self.chars).collect();
if c.is_whitespace() { match s {
self.chars.next(); Ok(s) => Some(Ok(WordSegment::Literal(s))),
} else {
return match self.collect_until(' ') {
Ok(s) => {
Some(Ok(WordSegment::Literal(s)))
}
Err(e) => Some(Err(e)) Err(e) => Some(Err(e))
};
} }
} }
None
}
None => { None => {
None None
} }
@ -128,3 +383,29 @@ where It: Iterator<Item = char> {
} }
} }
mod test {
use crate::parse::*;
#[test]
fn test_delim_iter() {
let mut cs = "test 1234".chars().peekable();
let mut lexer = DelimIter::new_shell_word(&mut cs);
assert_eq!(lexer.try_collect::<String>(), Ok(String::from("test")));
}
#[test]
fn test_word_lexer() {
let mut cs = "test 1234|test".chars().peekable();
{
let mut lexer = WordLexer{ chars: &mut cs };
assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("test")))));
assert_eq!(lexer.next(), Some(Ok(WordSegment::Literal(String::from("1234")))));
assert_eq!(lexer.next(), None);
}
assert_eq!(cs.next(), Some('|'));
}
}