From 0241884cf24674534277e80f297f9f4e109c9b51 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Sun, 1 Jun 2025 12:24:16 -0400 Subject: [PATCH] highlighter: Consider any skipped injection layers to be modified This fixes an issue where an edit to a document which removes an injection layer could eventually cause a panic that `parent_ranges` is empty in `intersect_ranges`: `Option::unwrap` on a `None` value. The test case edits a markdown block where a codefence is created between two HTML comments. Before the edit, the document has a combined injection for the two HTML comments. After the edit, the second HTML comment is reinterpreted by the markdown parser as the content of the codefence - not an HTML comment anymore. In this case the HTML layer must be re-parsed because its included ranges have been changed the edit: before it was a range for each comment and after it is only the first comment. Previously we did not mark the HTML layer as modified since it was not directly edited. But this meant that, when running the HTML injection query over this layer after the edit, we got query matches for the second HTML comment since the node still existed according to the layer's tree. This second injection capture did not intersect with any parent ranges since the layer's ranges no longer included the second HTML comment, causing `parent_ranges` to be empty and an unwrap of `None`. The fix is to mark any layers which are not reused as modified. If a layer is a combined injection and not all layers are reused then the layer must be re-parsed because its included ranges have changed. All modified layers are re-parsed. To do this we look at the old injections for each layer and any that are skipped or discarded rather than reused have their layer marked as modified. Note that this does not cause layers which have been removed from the tree to stick around. The layers are only marked as modified (meaning that they need to be re-parsed) and not marked as touched (meaning that they are still valid). Also note that this is not an issue for adding injection ranges to a combined injection since we always consider a layer to be modified if we cannot reuse an injection. A practical reproduction of this (and the source of the test) is mentioned in helix-editor/helix#13544. --- highlighter/src/injections_query.rs | 21 ++++++--- highlighter/src/tests.rs | 66 +++++++++++++++++++++++++---- 2 files changed, 73 insertions(+), 14 deletions(-) diff --git a/highlighter/src/injections_query.rs b/highlighter/src/injections_query.rs index f587581..15c022e 100644 --- a/highlighter/src/injections_query.rs +++ b/highlighter/src/injections_query.rs @@ -472,6 +472,13 @@ impl Syntax { } } + // Any remaining injections which were not reused should have their layers marked as + // modified. These layers might have a new set of ranges (if they were visited) and so + // their trees need to be re-parsed. + for old_injection in old_injections { + self.layer_mut(old_injection.layer).flags.modified = true; + } + let layer_data = &mut self.layer_mut(layer); layer_data.ranges = parent_ranges; layer_data.parse_tree = Some(parse_tree); @@ -612,11 +619,15 @@ impl Syntax { new_range: Range, injections: &mut Peekable>, ) -> Option { - loop { - let skip = injections.next_if(|injection| injection.range.end <= new_range.start); - if skip.is_none() { - break; - } + while let Some(skipped) = + injections.next_if(|injection| injection.range.end <= new_range.start) + { + // If the layer had an injection and now does not have the injection, consider the + // skipped layer to be modified so that its tree is re-parsed. It must be re-parsed + // since the skipped layer now has a different set of ranges than it used to. Note + // that the layer isn't marked as `touched` so it could be discarded if the layer + // is not ever visited. + self.layer_mut(skipped.layer).flags.modified = true; } injections .next_if(|injection| { diff --git a/highlighter/src/tests.rs b/highlighter/src/tests.rs index 3f660bc..1f0464d 100644 --- a/highlighter/src/tests.rs +++ b/highlighter/src/tests.rs @@ -7,13 +7,15 @@ use indexmap::{IndexMap, IndexSet}; use once_cell::sync::Lazy; use once_cell::unsync::OnceCell; use skidder::Repo; -use tree_sitter::Grammar; +use tree_sitter::{Grammar, InputEdit, Point}; use crate::config::{LanguageConfig, LanguageLoader}; use crate::fixtures::{check_highlighter_fixture, check_injection_fixture}; use crate::highlighter::Highlight; use crate::injections_query::InjectionLanguageMarker; -use crate::{Language, Layer}; +use crate::{Language, Layer, Syntax}; + +const PARSE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(1); static GRAMMARS: Lazy> = Lazy::new(|| { let skidder_config = skidder_config(); @@ -248,13 +250,7 @@ fn layers() { /// ``` pub fn hello() {}"; - let syntax = crate::Syntax::new( - ropey::RopeSlice::from(input), - loader.get("rust"), - std::time::Duration::from_secs(60), - &loader, - ) - .unwrap(); + let syntax = Syntax::new(input.into(), loader.get("rust"), PARSE_TIMEOUT, &loader).unwrap(); let assert_injection = |snippet: &str, expected: &[&str]| { assert!(!expected.is_empty(), "all layers have at least 1 injection"); @@ -452,3 +448,55 @@ fn injection_precedence() { ); injection_fixture(&loader, "injections/overlapping_injection.rs"); } + +#[test] +fn edit_remove_and_add_injection_layer() { + let loader = TestLanguageLoader::new(); + // Add another backtick, causing the double old backtick to become a codefence and the second + // HTML comment to become the codefence's body. + // When we reuse the injection for the HTML comments, we need to be sure to re-parse the HTML + // layer so that it recognizes that the second comment is no longer valid. + let before_text = "\n``\n"; + let after_text = "\n```\n"; + let edit = InputEdit { + start_byte: 10, + old_end_byte: 10, + new_end_byte: 11, + start_point: Point::ZERO, + old_end_point: Point::ZERO, + new_end_point: Point::ZERO, + }; + let mut syntax = Syntax::new( + before_text.into(), + loader.get("markdown"), + PARSE_TIMEOUT, + &loader, + ) + .unwrap(); + // The test here is that `Syntax::update` can apply the edit `Ok(_)` without panicking. + syntax + .update(after_text.into(), PARSE_TIMEOUT, &[edit], &loader) + .unwrap(); + + // Now test the inverse. Start with the after text and edit it to be the before text. In this + // case an injection is added for the HTML comment. + let edit = InputEdit { + start_byte: 10, + old_end_byte: 11, + new_end_byte: 10, + start_point: Point::ZERO, + old_end_point: Point::ZERO, + new_end_point: Point::ZERO, + }; + let mut syntax = Syntax::new( + after_text.into(), + loader.get("markdown"), + PARSE_TIMEOUT, + &loader, + ) + .unwrap(); + // The test here is that `Syntax::update` can apply the edit `Ok(_)` without panicking. + syntax + .update(before_text.into(), PARSE_TIMEOUT, &[edit], &loader) + .unwrap(); +}