diff --git a/crates/mbe/src/syntax_bridge.rs b/crates/mbe/src/syntax_bridge.rs index a4e3efaeb52..fb531340108 100644 --- a/crates/mbe/src/syntax_bridge.rs +++ b/crates/mbe/src/syntax_bridge.rs @@ -95,7 +95,9 @@ pub fn token_tree_to_syntax_node( parser::Step::Token { kind, n_input_tokens: n_raw_tokens } => { tree_sink.token(kind, n_raw_tokens) } - parser::Step::FloatSplit { has_pseudo_dot } => tree_sink.float_split(has_pseudo_dot), + parser::Step::FloatSplit { ends_in_dot: has_pseudo_dot } => { + tree_sink.float_split(has_pseudo_dot) + } parser::Step::Enter { kind } => tree_sink.start_node(kind), parser::Step::Exit => tree_sink.finish_node(), parser::Step::Error { msg } => tree_sink.error(msg.to_string()), @@ -797,6 +799,8 @@ fn delim_to_str(d: tt::DelimiterKind, closing: bool) -> Option<&'static str> { } impl<'a> TtTreeSink<'a> { + /// Parses a float literal as if it was a one to two name ref nodes with a dot inbetween. + /// This occurs when a float literal is used as a field access. fn float_split(&mut self, has_pseudo_dot: bool) { let (text, _span) = match self.cursor.token_tree() { Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Literal(lit), _)) => { diff --git a/crates/mbe/src/to_parser_input.rs b/crates/mbe/src/to_parser_input.rs index 6d20998bb4d..051e20b3a3f 100644 --- a/crates/mbe/src/to_parser_input.rs +++ b/crates/mbe/src/to_parser_input.rs @@ -47,6 +47,9 @@ pub(crate) fn to_parser_input(buffer: &TokenBuffer<'_>) -> parser::Input { res.push(kind); if kind == FLOAT_NUMBER && !inner_text.ends_with('.') { + // Tag the token as joint if it is float with a fractional part + // we use this jointness to inform the parser about what token split + // event to emit when we encounter a float literal in a field access res.was_joint(); } } diff --git a/crates/parser/src/event.rs b/crates/parser/src/event.rs index fb2616cf013..577eb0967b4 100644 --- a/crates/parser/src/event.rs +++ b/crates/parser/src/event.rs @@ -72,11 +72,14 @@ pub(crate) enum Event { /// `n_raw_tokens = 2` is used to produced a single `>>`. Token { kind: SyntaxKind, - // Consider custom enum here? n_raw_tokens: u8, }, + /// When we parse `foo.0.0` or `foo. 0. 0` the lexer will hand us a float literal + /// instead of an integer literal followed by a dot as the lexer has no contextual knowledge. + /// This event instructs whatever consumes the events to split the float literal into + /// the corresponding parts. FloatSplitHack { - has_pseudo_dot: bool, + ends_in_dot: bool, }, Error { msg: String, @@ -128,8 +131,8 @@ pub(super) fn process(mut events: Vec) -> Output { Event::Token { kind, n_raw_tokens } => { res.token(kind, n_raw_tokens); } - Event::FloatSplitHack { has_pseudo_dot } => { - res.float_split_hack(has_pseudo_dot); + Event::FloatSplitHack { ends_in_dot } => { + res.float_split_hack(ends_in_dot); let ev = mem::replace(&mut events[i + 1], Event::tombstone()); assert!(matches!(ev, Event::Finish), "{ev:?}"); } diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index 9b895ff3ca6..8c5aed0232b 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -102,7 +102,9 @@ pub fn parse(&self, input: &Input) -> Output { match step { Step::Enter { .. } => depth += 1, Step::Exit => depth -= 1, - Step::FloatSplit { has_pseudo_dot } => depth -= 1 + !has_pseudo_dot as usize, + Step::FloatSplit { ends_in_dot: has_pseudo_dot } => { + depth -= 1 + !has_pseudo_dot as usize + } Step::Token { .. } | Step::Error { .. } => (), } } diff --git a/crates/parser/src/output.rs b/crates/parser/src/output.rs index 9587c8cb1ba..41d4c68b2d7 100644 --- a/crates/parser/src/output.rs +++ b/crates/parser/src/output.rs @@ -25,7 +25,7 @@ pub struct Output { #[derive(Debug)] pub enum Step<'a> { Token { kind: SyntaxKind, n_input_tokens: u8 }, - FloatSplit { has_pseudo_dot: bool }, + FloatSplit { ends_in_dot: bool }, Enter { kind: SyntaxKind }, Exit, Error { msg: &'a str }, @@ -70,7 +70,7 @@ pub fn iter(&self) -> impl Iterator> { } Self::EXIT_EVENT => Step::Exit, Self::SPLIT_EVENT => { - Step::FloatSplit { has_pseudo_dot: event & Self::N_INPUT_TOKEN_MASK != 0 } + Step::FloatSplit { ends_in_dot: event & Self::N_INPUT_TOKEN_MASK != 0 } } _ => unreachable!(), } @@ -84,9 +84,9 @@ pub(crate) fn token(&mut self, kind: SyntaxKind, n_tokens: u8) { self.event.push(e) } - pub(crate) fn float_split_hack(&mut self, has_pseudo_dot: bool) { + pub(crate) fn float_split_hack(&mut self, ends_in_dot: bool) { let e = (Self::SPLIT_EVENT as u32) << Self::TAG_SHIFT - | ((has_pseudo_dot as u32) << Self::N_INPUT_TOKEN_SHIFT) + | ((ends_in_dot as u32) << Self::N_INPUT_TOKEN_SHIFT) | Self::EVENT_MASK; self.event.push(e); } diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index 0f4fa602291..280416ae7c9 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -182,7 +182,7 @@ pub(crate) fn bump_any(&mut self) { } /// Advances the parser by one token - pub(crate) fn split_float(&mut self, marker: Marker) -> (bool, Marker) { + pub(crate) fn split_float(&mut self, mut marker: Marker) -> (bool, Marker) { assert!(self.at(SyntaxKind::FLOAT_NUMBER)); // we have parse `.` // ``.0.1 @@ -191,26 +191,23 @@ pub(crate) fn split_float(&mut self, marker: Marker) -> (bool, Marker) { // ``. 0. 1; // here we need to change the follow up parse, the return value will cause us to emulate a dot // the actual splitting happens later - let has_pseudo_dot = !self.inp.is_joint(self.pos); - let marker = if !has_pseudo_dot { - let new_pos = self.start(); + let ends_in_dot = !self.inp.is_joint(self.pos); + if !ends_in_dot { + let new_marker = self.start(); let idx = marker.pos as usize; match &mut self.events[idx] { Event::Start { forward_parent, kind } => { *kind = SyntaxKind::FIELD_EXPR; - *forward_parent = Some(new_pos.pos - marker.pos); + *forward_parent = Some(new_marker.pos - marker.pos); } _ => unreachable!(), } - // NOTE: This brings the start / finish pairs out of balance! - std::mem::forget(marker); - new_pos - } else { - marker + marker.bomb.defuse(); + marker = new_marker; }; self.pos += 1 as usize; - self.push_event(Event::FloatSplitHack { has_pseudo_dot }); - (has_pseudo_dot, marker) + self.push_event(Event::FloatSplitHack { ends_in_dot }); + (ends_in_dot, marker) } /// Advances the parser by one token, remapping its kind. diff --git a/crates/parser/src/shortcuts.rs b/crates/parser/src/shortcuts.rs index 21939c34943..47e4adcbbe6 100644 --- a/crates/parser/src/shortcuts.rs +++ b/crates/parser/src/shortcuts.rs @@ -43,10 +43,11 @@ pub fn to_input(&self) -> crate::Input { res.was_joint(); } res.push(kind); - // we set jointness for floating point numbers as a hack to inform the - // parser about whether we have a `0.` or `0.1` style float + // Tag the token as joint if it is float with a fractional part + // we use this jointness to inform the parser about what token split + // event to emit when we encounter a float literal in a field access if kind == SyntaxKind::FLOAT_NUMBER { - if !self.text(i).split_once('.').map_or(true, |(_, it)| it.is_empty()) { + if !self.text(i).ends_with('.') { res.was_joint(); } } @@ -71,7 +72,9 @@ pub fn intersperse_trivia( Step::Token { kind, n_input_tokens: n_raw_tokens } => { builder.token(kind, n_raw_tokens) } - Step::FloatSplit { has_pseudo_dot } => builder.float_split(has_pseudo_dot), + Step::FloatSplit { ends_in_dot: has_pseudo_dot } => { + builder.float_split(has_pseudo_dot) + } Step::Enter { kind } => builder.enter(kind), Step::Exit => builder.exit(), Step::Error { msg } => {