diff --git a/on-host/arpist-traject/src/main.rs b/on-host/arpist-traject/src/main.rs index 9818c45a37a8292b6435c1fe0748bacd0415d5e7..06f321f39fe60812dfcb423fb93c8ee027dfb949 100644 --- a/on-host/arpist-traject/src/main.rs +++ b/on-host/arpist-traject/src/main.rs @@ -1,3 +1,4 @@ +mod syntax; mod tokens; use logos::Logos; diff --git a/on-host/arpist-traject/src/syntax.rs b/on-host/arpist-traject/src/syntax.rs new file mode 100644 index 0000000000000000000000000000000000000000..0387a875f60a8e35cc9acb5f7b993ac292614e61 --- /dev/null +++ b/on-host/arpist-traject/src/syntax.rs @@ -0,0 +1,86 @@ +use std::ops::Range; + +mod parser; + +#[derive(Debug, Clone)] +struct Program(Vec<Lines>); + +#[derive(Debug, Clone)] +pub enum Lines { + TimeDefinition(TimeRef), + Filler, + Generator(Generator), + Parameter(Parameter), +} + +#[derive(Debug, Clone)] +struct Parameter { + span: Range<usize>, + name: String, + value: Expr, +} + +#[derive(Debug, Clone)] +enum Expr { + Op(Box<Operation>), + Num(Number), + TimeVar(Range<usize>), + Group(GroupExpr), +} + +impl Expr { + fn span(&self) -> Range<usize> { + match self { + Expr::Op(op) => op.span.clone(), + Expr::Num(num) => num.span.clone(), + Expr::TimeVar(ix) => ix.clone(), + Expr::Group(expr) => expr.span.clone(), + } + } +} + +#[derive(Debug, Clone)] +struct GroupExpr { + span: Range<usize>, + value: Box<Expr>, +} + +#[derive(Debug, Clone)] +struct Operation { + span: Range<usize>, + op: Operator, + lhs: Box<Expr>, + rhs: Box<Expr>, +} + +#[derive(Debug, Clone)] +enum Operator { + Add, + Sub, + Mul, + Div, + Pow, +} + +#[derive(Debug, Clone)] +struct Number { + span: Range<usize>, + value: f64, +} + +#[derive(Debug, Clone)] +struct Generator { + span: Range<usize>, + kind: GeneratorKind, +} + +#[derive(Debug, Clone)] +enum GeneratorKind { + Parametric, +} + +#[derive(Debug, Clone)] +struct TimeRef { + span: Range<usize>, + offset: f64, +} diff --git a/on-host/arpist-traject/src/syntax/parser.rs b/on-host/arpist-traject/src/syntax/parser.rs new file mode 100644 index 0000000000000000000000000000000000000000..194c1719d67764599b14aee6e6afd851d9f5aaab --- /dev/null +++ b/on-host/arpist-traject/src/syntax/parser.rs @@ -0,0 +1,433 @@ +use std::{ + collections::VecDeque, + iter::Peekable, + ops::{Deref, Range}, +}; + +use logos::{Lexer, Logos, SpannedIter}; + +use super::*; +use crate::tokens::Token; + +trait Lookahead: Iterator { + type LookedItem; + + fn lookahead(&mut self, times: usize) -> Option<Self::LookedItem>; + fn digest(&mut self, times: usize); +} + +#[derive(Clone)] +struct CachedLexer<'a> { + iter: SpannedIter<'a, Token>, + cached: VecDeque<(Token, Range<usize>)>, +} + +impl Iterator for CachedLexer<'_> { + type Item = (Token, Range<usize>); + + fn next(&mut self) -> Option<Self::Item> { + self.digest(1); + self.lookahead(0) + } +} + +impl<'a> Lookahead for CachedLexer<'a> { + type LookedItem = Self::Item; + + fn digest(&mut self, times: usize) { + let to_pop = times.min(self.cached.len()); + for _ in 0..to_pop { + self.cached.pop_front(); + } + for _ in to_pop..times { + self.iter.next(); + } + } + + fn lookahead(&mut self, times: usize) -> Option<Self::LookedItem> { + while self.cached.len() <= times { + let (token, range) = self.iter.next()?; + self.cached.push_back((token.unwrap(), range)); + } + Some(self.cached[times].clone()) + } +} + +impl<'a> CachedLexer<'a> { + fn new(input: &'a str) -> Self { + let lexer = Token::lexer(input).spanned(); + let s = lexer.clone().collect::<Vec<_>>(); + CachedLexer { + iter: lexer, + cached: VecDeque::new(), + } + } + + fn view(&mut self) -> LexerView { + LexerView { + ix: 0, + iter: Box::new(self), + } + } +} + +type NextView<'a> = + Box<&'a mut dyn Lookahead<Item = (Token, Range<usize>), LookedItem = (Token, Range<usize>)>>; + +struct LexerView<'a> { + /// offset from iter position + ix: usize, + /// parent lookahead iterator + iter: NextView<'a>, +} + +impl Iterator for LexerView<'_> { + type Item = (Token, Range<usize>); + + fn next(&mut self) -> Option<Self::Item> { + let res = self.iter.lookahead(self.ix); + self.ix += 1; + res + } +} + +impl Lookahead for LexerView<'_> { + type LookedItem = (Token, Range<usize>); + + fn lookahead(&mut self, times: usize) -> Option<Self::LookedItem> { + self.iter.lookahead(self.ix + times) + } + + fn digest(&mut self, times: usize) { + self.ix += times; + } +} + +impl<'a, 'b> LexerView<'a> { + fn commit(self) { + self.iter.digest(self.ix); + } + + fn peek(&mut self) -> Option<(Token, Range<usize>)> { + self.lookahead(0) + } + + fn view(&'b mut self) -> LexerView { + LexerView { + ix: 0, + iter: Box::new(self), + } + } +} + +// fn parse_program<L: Lookahead>(lex: &mut ParserView<'_, L>) -> Option<Program> +// where +// L::LookedItem: std::fmt::Debug, +// { +// println!("{:?}", lex.peek()); +// println!("parse_program"); +// let mut lines = Vec::new(); +// while let Some(line) = parse_line(lex) { +// lines.push(line); +// } +// Some(Program(lines)) +// } + +// fn parse_line<L: Lookahead>(lex: &mut ParserView<'_, L>) -> Option<Lines> +// where +// L::LookedItem: std::fmt::Debug, +// { +// println!("{:?}", lex.peek()); +// println!("parse_line"); +// consume_any_space(lex); +// let (token, range) = lex.next()?; +// match token { +// Ok(Token::AtTimeLine) => { +// consume_any_space(lex); +// let time_ref = parse_time_ref(lex)?; +// Some(Lines::TimeDefinition(time_ref)) +// } +// Ok(Token::GeneratorLine) => { +// consume_any_space(lex); +// let generator = parse_generator(lex)?; +// Some(Lines::Generator(generator)) +// } +// Ok(Token::ParameterLine) => { +// consume_any_space(lex); +// let param = parse_parameter(lex)?; +// Some(Lines::Parameter(param)) +// } +// Ok(Token::FillerLine) => Some(Lines::Filler), +// _ => None, +// } +// } + +// fn parse_time_ref<L: Lookahead>(lex: &mut ParserView<'_, L>) -> Option<TimeRef> +// where +// L::LookedItem: std::fmt::Debug, +// { +// println!("{:?}", lex.peek()); +// println!("parse_time_ref"); +// let (token, range) = lex.next()?; +// if let Ok(Token::TimeRef) = token { +// let (sign_token, sign_range) = lex.next()?; +// let sign = match sign_token { +// Ok(Token::OpSub) => -1.0, +// Ok(Token::OpAdd) => 1.0, +// _ => return None, +// }; +// let number = parse_number(lex)?; +// Some(TimeRef { +// span: range.start..number.span.end, +// offset: sign * number.value, +// }) +// } else { +// None +// } +// } + +// fn parse_generator<L: Lookahead>(lex: &mut ParserView<'_, L>) -> Option<Generator> +// where +// L::LookedItem: std::fmt::Debug, +// { +// println!("{:?}", lex.peek()); +// println!("parse_generator"); +// let (bracket_token, s_range) = lex.next()?; +// let Ok(Token::LBracket) = bracket_token else { +// return None; +// }; +// let (gen_token, _) = lex.next()?; +// let Ok(Token::ParametricGenerator) = gen_token else { +// return None; +// }; +// let (bracket_token, e_range) = lex.next()?; +// let Ok(Token::RBracket) = bracket_token else { +// return None; +// }; +// Some(Generator { +// span: s_range.start..e_range.end, +// kind: GeneratorKind::Parametric, +// }) +// } + +// fn parse_parameter<L: Lookahead>(lex: &mut ParserView<'_, L>) -> Option<Parameter> +// where +// L::LookedItem: std::fmt::Debug, +// { +// println!("{:?}", lex.peek()); +// println!("parse_parameter"); +// let (Ok(token), name_range) = lex.next()? else { +// return None; +// }; + +// // get the coordinate +// let coordinate = match token { +// Token::PositionX => "x", +// Token::PositionY => "y", +// Token::PositionZ => "z", +// _ => return None, +// }; + +// // need the semi-colon +// consume_any_space(lex); +// let (semi_token, _) = lex.next()?; +// let Ok(Token::Colon) = semi_token else { +// return None; +// }; +// consume_any_space(lex); + +// // get the expression +// let expr = parse_expr(lex)?; + +// Some(Parameter { +// span: name_range.start..expr.span().end, +// name: coordinate.to_string(), +// value: expr, +// }) +// } + +fn parse_expr(mut view: LexerView<'_>) -> Option<Expr> { + let res = parse_sum(view.view()); + view.commit(); + res +} + +fn parse_sum(mut view: LexerView<'_>) -> Option<Expr> { + let lhs = parse_product(view.view())?; + consume_any_space(view.view()); + let res = match view.peek() { + Some((Token::OpAdd, _)) => { + view.next(); + consume_any_space(view.view()); + let rhs = parse_sum(view.view())?; + Expr::Op(Box::new(Operation { + span: lhs.span().start..rhs.span().end, + op: Operator::Add, + lhs: Box::new(lhs), + rhs: Box::new(rhs), + })) + } + Some((Token::OpSub, _)) => { + view.next(); + consume_any_space(view.view()); + let rhs = parse_sum(view.view())?; + Expr::Op(Box::new(Operation { + span: lhs.span().start..rhs.span().end, + op: Operator::Sub, + lhs: Box::new(lhs), + rhs: Box::new(rhs), + })) + } + _ => lhs, + }; + view.commit(); + Some(res) +} + +fn parse_product(mut view: LexerView<'_>) -> Option<Expr> { + let lhs = parse_power(view.view())?; + consume_any_space(view.view()); + let res = match view.peek() { + Some((Token::OpMul, _)) => { + view.next(); + consume_any_space(view.view()); + let rhs = parse_product(view.view())?; + Expr::Op(Box::new(Operation { + span: lhs.span().start..rhs.span().end, + op: Operator::Mul, + lhs: Box::new(lhs), + rhs: Box::new(rhs), + })) + } + Some((Token::OpDiv, _)) => { + view.next(); + consume_any_space(view.view()); + let rhs = parse_product(view.view())?; + Expr::Op(Box::new(Operation { + span: lhs.span().start..rhs.span().end, + op: Operator::Div, + lhs: Box::new(lhs), + rhs: Box::new(rhs), + })) + } + _ => lhs, + }; + view.commit(); + Some(res) +} + +fn parse_power(mut view: LexerView<'_>) -> Option<Expr> { + let lhs = parse_expr_value(view.view())?; + consume_any_space(view.view()); + let res = if let Some((Token::OpPow, _)) = view.peek() { + view.next(); + consume_any_space(view.view()); + let rhs = parse_power(view.view())?; + Expr::Op(Box::new(Operation { + span: lhs.span().start..rhs.span().end, + op: Operator::Pow, + lhs: Box::new(lhs), + rhs: Box::new(rhs), + })) + } else { + lhs + }; + view.commit(); + Some(res) +} + +fn parse_expr_value(mut view: LexerView<'_>) -> Option<Expr> { + if let Some(number) = parse_number(view.view()) { + view.commit(); + return Some(Expr::Num(number)); + } + if let Some(group) = parse_group(view.view()) { + view.commit(); + return Some(Expr::Group(group)); + } + + None +} + +fn parse_number(mut view: LexerView<'_>) -> Option<Number> { + let (token, range) = view.next()?; + let res = match token { + Token::Num(n) => Number { + span: range, + value: n, + }, + Token::OpSub => { + let n = parse_number(view.view())?; + Number { + span: range.start..n.span.end, + value: -n.value, + } + } + _ => return None, + }; + view.commit(); + Some(res) +} + +fn parse_group(mut view: LexerView<'_>) -> Option<GroupExpr> { + if let (Token::LParen, s) = view.next()? { + let expr = parse_expr(view.view())?; + if let (Token::RParen, e) = view.next()? { + view.commit(); + return Some(GroupExpr { + span: s.start..e.end, + value: Box::new(expr), + }); + } + } + None +} + +fn consume_any_space(mut view: LexerView<'_>) { + while let Some((Token::Spaces, _)) = view.peek() { + view.next(); + } + view.commit(); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_num() { + let mut lex = CachedLexer::new("10.23"); + assert_eq!(parse_number(lex.view()).unwrap().value, 10.23); + + let mut lex = CachedLexer::new("-123"); + assert_eq!(parse_number(lex.view()).unwrap().value, -123.0); + + let mut lex = CachedLexer::new("0"); + assert_eq!(parse_number(lex.view()).unwrap().value, 0.0); + + let mut lex = CachedLexer::new("-0.0"); + assert_eq!(parse_number(lex.view()).unwrap().value, 0.0); + + let mut lex = CachedLexer::new("42 12.2"); + assert_eq!(parse_number(lex.view()).unwrap().value, 42.0); + lex.next(); + assert_eq!(parse_number(lex.view()).unwrap().value, 12.2); + + let mut lex = CachedLexer::new("T12.2"); + assert!(parse_number(lex.view()).is_none()); + lex.next(); + assert_eq!(parse_number(lex.view()).unwrap().value, 12.2); + } + + #[test] + fn test_parse_expr() { + let mut lex = CachedLexer::new("(3.4+2)*3.1"); + let expr = parse_expr(lex.view()).unwrap(); + + let mut lex = CachedLexer::new("3.4+2*3.1"); + let expr = parse_expr(lex.view()).unwrap(); + + let mut lex = CachedLexer::new("(3^2+1) / 2"); + let expr = parse_expr(lex.view()).unwrap(); + } +} diff --git a/on-host/arpist-traject/src/tokens.rs b/on-host/arpist-traject/src/tokens.rs index 2654f3103c00acfdccd4980c56f119cbeef39664..38775805f50f052322bba5e5a219c267e9befc65 100644 --- a/on-host/arpist-traject/src/tokens.rs +++ b/on-host/arpist-traject/src/tokens.rs @@ -1,6 +1,6 @@ use logos::Logos; -#[derive(Debug, PartialEq, Logos)] +#[derive(Clone, Debug, PartialEq, Logos)] pub enum Token { #[regex("[ \t]+")] Spaces, diff --git a/on-host/arpist-traject/grammar.txt b/on-host/grammar.txt similarity index 100% rename from on-host/arpist-traject/grammar.txt rename to on-host/grammar.txt diff --git a/on-host/arpist-traject/test.txt b/on-host/test.txt similarity index 100% rename from on-host/arpist-traject/test.txt rename to on-host/test.txt