From f3616604f8119ed3a69a34e257de301146f70dd5 Mon Sep 17 00:00:00 2001 From: Federico Lolli <federico.lolli@skywarder.eu> Date: Sun, 17 Mar 2024 00:05:56 +0100 Subject: [PATCH] First AST built --- on-host/arpist-traject/src/main.rs | 20 +- on-host/arpist-traject/src/syntax.rs | 63 ++++- on-host/arpist-traject/src/syntax/parser.rs | 291 +++++++++++--------- on-host/arpist-traject/src/tokens.rs | 14 +- 4 files changed, 227 insertions(+), 161 deletions(-) diff --git a/on-host/arpist-traject/src/main.rs b/on-host/arpist-traject/src/main.rs index 06f321f..5f289aa 100644 --- a/on-host/arpist-traject/src/main.rs +++ b/on-host/arpist-traject/src/main.rs @@ -1,16 +1,20 @@ +use syntax::CachedLexer; + mod syntax; mod tokens; -use logos::Logos; +// use logos::Logos; -use self::tokens::Token; +// use self::tokens::Token; fn main() { let input = std::fs::read_to_string("test.txt").unwrap(); - let lex = Token::lexer(&input); - let tokens = lex.spanned().collect::<Vec<_>>(); - for token in &tokens { - println!("{:?}", token.0.as_ref().unwrap()); - } - // dbg!(tokens); + // let lex = Token::lexer(&input); + // let tokens = lex.spanned().collect::<Vec<_>>(); + // for token in &tokens { + // println!("{:?}", token.0.as_ref().unwrap()); + // } + + let p = CachedLexer::new(&input).parse_program(); + dbg!(p); } diff --git a/on-host/arpist-traject/src/syntax.rs b/on-host/arpist-traject/src/syntax.rs index dcedcc9..8f6a1cf 100644 --- a/on-host/arpist-traject/src/syntax.rs +++ b/on-host/arpist-traject/src/syntax.rs @@ -2,26 +2,48 @@ use std::ops::Range; mod parser; +pub use parser::CachedLexer; + #[derive(Debug, Clone)] -struct Program(Vec<Lines>); +pub struct Program(Vec<Blocks>); #[derive(Debug, Clone)] -pub enum Lines { +pub enum Blocks { TimeDefinition(TimeRef), - Filler, - Generator(Generator), - Parameter(Parameter), + Generator(GeneratorBlock), + Setting(SettingBlock), +} + +#[derive(Debug, Clone)] +pub struct SettingBlock { + span: Range<usize>, + name: Setting, + settings: Vec<NumberParameter>, } #[derive(Debug, Clone)] -struct Parameter { +pub struct GeneratorBlock { + span: Range<usize>, + generator: Generator, + parameters: Vec<ExprParameter>, +} + +#[derive(Debug, Clone)] +pub struct NumberParameter { + span: Range<usize>, + name: String, + value: Number, +} + +#[derive(Debug, Clone)] +pub struct ExprParameter { span: Range<usize>, name: String, value: TimeExpr, } #[derive(Debug, Clone)] -enum TimeExpr { +pub enum TimeExpr { Op(Box<Operation>), Num(Number), TimeVar(TimeVar), @@ -40,13 +62,13 @@ impl TimeExpr { } #[derive(Debug, Clone)] -struct GroupExpr { +pub struct GroupExpr { span: Range<usize>, value: Box<TimeExpr>, } #[derive(Debug, Clone)] -struct Operation { +pub struct Operation { span: Range<usize>, op: Operator, lhs: Box<TimeExpr>, @@ -54,7 +76,7 @@ struct Operation { } #[derive(Debug, Clone)] -enum Operator { +pub enum Operator { Add, Sub, Mul, @@ -63,29 +85,40 @@ enum Operator { } #[derive(Debug, Clone)] -struct Number { +pub struct Number { span: Range<usize>, value: f64, } #[derive(Debug, Clone)] -struct Generator { +pub struct Generator { span: Range<usize>, kind: GeneratorKind, } #[derive(Debug, Clone)] -enum GeneratorKind { +pub enum GeneratorKind { PositionParametric, } #[derive(Debug, Clone)] -struct TimeRef { +pub struct Setting { + span: Range<usize>, + name: SettingKind, +} + +#[derive(Debug, Clone)] +pub enum SettingKind { + GPS, +} + +#[derive(Debug, Clone)] +pub struct TimeRef { span: Range<usize>, offset: f64, } #[derive(Debug, Clone)] -struct TimeVar { +pub struct TimeVar { span: Range<usize>, } diff --git a/on-host/arpist-traject/src/syntax/parser.rs b/on-host/arpist-traject/src/syntax/parser.rs index 892dcba..58d024c 100644 --- a/on-host/arpist-traject/src/syntax/parser.rs +++ b/on-host/arpist-traject/src/syntax/parser.rs @@ -17,7 +17,7 @@ trait Lookahead: Iterator { } #[derive(Clone)] -struct CachedLexer<'a> { +pub struct CachedLexer<'a> { iter: SpannedIter<'a, Token>, cached: VecDeque<(Token, Range<usize>)>, } @@ -54,7 +54,7 @@ impl<'a> Lookahead for CachedLexer<'a> { } impl<'a> CachedLexer<'a> { - fn new(input: &'a str) -> Self { + pub fn new(input: &'a str) -> Self { let lexer = Token::lexer(input).spanned(); let s = lexer.clone().collect::<Vec<_>>(); CachedLexer { @@ -69,6 +69,10 @@ impl<'a> CachedLexer<'a> { iter: Box::new(self), } } + + pub fn parse_program(&mut self) -> Option<Program> { + parse_program(self.view()) + } } type NextView<'a> = @@ -120,131 +124,142 @@ impl<'a, 'b> LexerView<'a> { } } -// fn parse_program<L: Lookahead>(lex: &mut ParserView<'_, L>) -> Option<Program> -// where -// L::LookedItem: std::fmt::Debug, -// { -// println!("{:?}", lex.peek()); -// println!("parse_program"); -// let mut lines = Vec::new(); -// while let Some(line) = parse_line(lex) { -// lines.push(line); -// } -// Some(Program(lines)) -// } - -// fn parse_line<L: Lookahead>(lex: &mut ParserView<'_, L>) -> Option<Lines> -// where -// L::LookedItem: std::fmt::Debug, -// { -// println!("{:?}", lex.peek()); -// println!("parse_line"); -// consume_any_space(lex); -// let (token, range) = lex.next()?; -// match token { -// Ok(Token::AtTimeLine) => { -// consume_any_space(lex); -// let time_ref = parse_time_ref(lex)?; -// Some(Lines::TimeDefinition(time_ref)) -// } -// Ok(Token::GeneratorLine) => { -// consume_any_space(lex); -// let generator = parse_generator(lex)?; -// Some(Lines::Generator(generator)) -// } -// Ok(Token::ParameterLine) => { -// consume_any_space(lex); -// let param = parse_parameter(lex)?; -// Some(Lines::Parameter(param)) -// } -// Ok(Token::FillerLine) => Some(Lines::Filler), -// _ => None, -// } -// } - -// fn parse_time_ref<L: Lookahead>(lex: &mut ParserView<'_, L>) -> Option<TimeRef> -// where -// L::LookedItem: std::fmt::Debug, -// { -// println!("{:?}", lex.peek()); -// println!("parse_time_ref"); -// let (token, range) = lex.next()?; -// if let Ok(Token::TimeRef) = token { -// let (sign_token, sign_range) = lex.next()?; -// let sign = match sign_token { -// Ok(Token::OpSub) => -1.0, -// Ok(Token::OpAdd) => 1.0, -// _ => return None, -// }; -// let number = parse_number(lex)?; -// Some(TimeRef { -// span: range.start..number.span.end, -// offset: sign * number.value, -// }) -// } else { -// None -// } -// } - -// fn parse_generator<L: Lookahead>(lex: &mut ParserView<'_, L>) -> Option<Generator> -// where -// L::LookedItem: std::fmt::Debug, -// { -// println!("{:?}", lex.peek()); -// println!("parse_generator"); -// let (bracket_token, s_range) = lex.next()?; -// let Ok(Token::LBracket) = bracket_token else { -// return None; -// }; -// let (gen_token, _) = lex.next()?; -// let Ok(Token::ParametricGenerator) = gen_token else { -// return None; -// }; -// let (bracket_token, e_range) = lex.next()?; -// let Ok(Token::RBracket) = bracket_token else { -// return None; -// }; -// Some(Generator { -// span: s_range.start..e_range.end, -// kind: GeneratorKind::Parametric, -// }) -// } - -// fn parse_parameter<L: Lookahead>(lex: &mut ParserView<'_, L>) -> Option<Parameter> -// where -// L::LookedItem: std::fmt::Debug, -// { -// println!("{:?}", lex.peek()); -// println!("parse_parameter"); -// let (Ok(token), name_range) = lex.next()? else { -// return None; -// }; - -// // get the coordinate -// let coordinate = match token { -// Token::PositionX => "x", -// Token::PositionY => "y", -// Token::PositionZ => "z", -// _ => return None, -// }; - -// // need the semi-colon -// consume_any_space(lex); -// let (semi_token, _) = lex.next()?; -// let Ok(Token::Colon) = semi_token else { -// return None; -// }; -// consume_any_space(lex); - -// // get the expression -// let expr = parse_expr(lex)?; - -// Some(Parameter { -// span: name_range.start..expr.span().end, -// name: coordinate.to_string(), -// value: expr, -// }) -// } +fn parse_program(mut view: LexerView<'_>) -> Option<Program> { + let mut blocks = Vec::new(); + while let Some((token, _)) = view.peek() { + match token { + Token::GeneratorLine => { + blocks.push(Blocks::Generator(parse_generator_block(view.view())?)); + } + Token::SettingLine => { + blocks.push(Blocks::Setting(parse_setting_block(view.view())?)); + } + Token::AtTimeLine => { + blocks.push(Blocks::TimeDefinition(parse_timedef_block(view.view())?)); + } + Token::FillerLine => { + consume_filler_lines(view.view()); + } + _ => return None, + } + } + view.commit(); + Some(Program(blocks)) +} + +fn parse_timedef_block(mut view: LexerView<'_>) -> Option<TimeRef> { + let (Token::AtTimeLine, s) = view.next()? else { + return None; + }; + let timeref = parse_time_ref(view.view())?; + consume_new_line(view.view()); + view.commit(); + Some(TimeRef { + span: s.start..timeref.span.end, + offset: timeref.offset, + }) +} + +fn parse_generator_block(mut view: LexerView<'_>) -> Option<GeneratorBlock> { + let (Token::GeneratorLine, s) = view.next()? else { + return None; + }; + let generator = parse_generator(view.view())?; + consume_new_line(view.view()); + let mut parameters = Vec::new(); + while let (Token::ParameterLine, _) = view.peek()? { + view.next(); + parameters.push(parse_expr_parameter(view.view())?); + consume_new_line(view.view()); + } + view.commit(); + Some(GeneratorBlock { + span: s.start..parameters.last().unwrap().span.end, + generator, + parameters, + }) +} + +fn parse_setting_block(mut view: LexerView<'_>) -> Option<SettingBlock> { + let (Token::SettingLine, s) = view.next()? else { + return None; + }; + let setting = parse_setting(view.view())?; + consume_new_line(view.view()); + let mut settings = Vec::new(); + while let (Token::ParameterLine, _) = view.peek()? { + view.next(); + settings.push(parse_num_parameter(view.view())?); + consume_new_line(view.view()); + } + view.commit(); + Some(SettingBlock { + span: s.start..settings.last().unwrap().span.end, + name: setting, + settings, + }) +} + +fn parse_time_ref(mut view: LexerView<'_>) -> Option<TimeRef> { + if let (Token::TimeRef(t), r) = view.next()? { + view.commit(); + Some(TimeRef { span: r, offset: t }) + } else { + None + } +} + +fn parse_setting(mut view: LexerView<'_>) -> Option<Setting> { + if let (Token::BracketRef(set), span) = view.next()? { + if set == "GPS" { + view.commit(); + return Some(Setting { + span, + name: SettingKind::GPS, + }); + } + } + None +} + +fn parse_generator(mut view: LexerView<'_>) -> Option<Generator> { + if let (Token::BracketRef(gen), span) = view.next()? { + if gen == "position.parametric" { + view.commit(); + return Some(Generator { + span, + kind: GeneratorKind::PositionParametric, + }); + } + }; + None +} + +fn parse_num_parameter(mut view: LexerView<'_>) -> Option<NumberParameter> { + let (Token::Parameter(x), r) = view.next()? else { + return None; + }; + let number = parse_number(view.view())?; + view.commit(); + Some(NumberParameter { + span: r.start..number.span.end, + name: x, + value: number, + }) +} + +fn parse_expr_parameter(mut view: LexerView<'_>) -> Option<ExprParameter> { + let (Token::Parameter(x), r) = view.next()? else { + return None; + }; + let expr = parse_expr(view.view())?; + view.commit(); + Some(ExprParameter { + span: r.start..expr.span().end, + name: x, + value: expr, + }) +} fn parse_expr(mut view: LexerView<'_>) -> Option<TimeExpr> { let res = parse_sum(view.view()); @@ -346,8 +361,6 @@ fn parse_power(mut view: LexerView<'_>) -> Option<TimeExpr> { } fn parse_expr_value(mut view: LexerView<'_>) -> Option<TimeExpr> { - println!("{:?}", view.peek()); - println!("parse_expr_value"); if let Some(number) = parse_number(view.view()) { view.commit(); return Some(TimeExpr::Num(number)); @@ -399,8 +412,6 @@ fn parse_group(mut view: LexerView<'_>) -> Option<GroupExpr> { } fn parse_time_var(mut view: LexerView<'_>) -> Option<TimeVar> { - println!("{:?}", view.peek()); - println!("parse_time_var"); if let (Token::Time, r) = view.next()? { view.commit(); Some(TimeVar { span: r }) @@ -409,6 +420,14 @@ fn parse_time_var(mut view: LexerView<'_>) -> Option<TimeVar> { } } +fn consume_filler_lines(mut view: LexerView<'_>) { + while let Some((Token::FillerLine, _)) = view.peek() { + view.next(); + consume_new_line(view.view()); + } + view.commit(); +} + fn consume_any_space(mut view: LexerView<'_>) { while let Some((Token::Spaces, _)) = view.peek() { view.next(); @@ -416,6 +435,16 @@ fn consume_any_space(mut view: LexerView<'_>) { view.commit(); } +fn consume_new_line(mut view: LexerView<'_>) -> Option<()> { + match view.next()? { + (Token::Newline, _) => { + view.commit(); + Some(()) + } + _ => None, + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/on-host/arpist-traject/src/tokens.rs b/on-host/arpist-traject/src/tokens.rs index e0216d3..9275350 100644 --- a/on-host/arpist-traject/src/tokens.rs +++ b/on-host/arpist-traject/src/tokens.rs @@ -38,24 +38,24 @@ pub enum Token { #[regex("T[+-]?[0-9]+(\\.[0-9]+)?", |lex| lex.slice()[1..].parse::<f64>().ok())] TimeRef(f64), - #[regex("[a-z]+ *:", |lex| lex.slice().trim_end_matches(':').trim().to_string())] + #[regex("[a-z]+ *: *", |lex| lex.slice().trim().trim_end_matches(':').trim().to_string())] Parameter(String), #[regex("\\[[a-zA-Z.]+\\]", |lex| lex.slice().trim_matches('[').trim_matches(']').to_string())] - Generator(String), + BracketRef(String), - #[token("|")] + #[regex("\\| *")] FillerLine, - #[token("@")] + #[regex("@ +")] AtTimeLine, - #[token("=")] + #[regex("= +")] SettingLine, - #[token("#")] + #[regex("# +")] GeneratorLine, - #[token("\\")] + #[regex("\\\\ +")] ParameterLine, } -- GitLab