highlighter: Consider any skipped injection layers to be modified

This fixes an issue where an edit to a document which removes an injection layer could eventually cause a panic that `parent_ranges` is empty in `intersect_ranges`: `Option::unwrap` on a `None` value. The test case edits a markdown block where a codefence is created between two HTML comments. Before the edit, the document has a combined injection for the two HTML comments. After the edit, the second HTML comment is reinterpreted by the markdown parser as the content of the codefence - not an HTML comment anymore. In this case the HTML layer must be re-parsed because its included ranges have been changed the edit: before it was a range for each comment and after it is only the first comment. Previously we did not mark the HTML layer as modified since it was not directly edited. But this meant that, when running the HTML injection query over this layer after the edit, we got query matches for the second HTML comment since the node still existed according to the layer's tree. This second injection capture did not intersect with any parent ranges since the layer's ranges no longer included the second HTML comment, causing `parent_ranges` to be empty and an unwrap of `None`. The fix is to mark any layers which are not reused as modified. If a layer is a combined injection and not all layers are reused then the layer must be re-parsed because its included ranges have changed. All modified layers are re-parsed. To do this we look at the old injections for each layer and any that are skipped or discarded rather than reused have their layer marked as modified. Note that this does not cause layers which have been removed from the tree to stick around. The layers are only marked as modified (meaning that they need to be re-parsed) and not marked as touched (meaning that they are still valid). Also note that this is not an issue for adding injection ranges to a combined injection since we always consider a layer to be modified if we cannot reuse an injection. A practical reproduction of this (and the source of the test) is mentioned in helix-editor/helix#13544.
2025-10-06 00:12:46 +02:00 · 2025-06-01 12:24:16 -04:00
parent 7acb839083
commit 0241884cf2
2 changed files with 73 additions and 14 deletions
--- a/highlighter/src/injections_query.rs
+++ b/highlighter/src/injections_query.rs
@@ -472,6 +472,13 @@ impl Syntax {
            }
        }

+        // Any remaining injections which were not reused should have their layers marked as
+        // modified. These layers might have a new set of ranges (if they were visited) and so
+        // their trees need to be re-parsed.
+        for old_injection in old_injections {
+            self.layer_mut(old_injection.layer).flags.modified = true;
+        }
+
        let layer_data = &mut self.layer_mut(layer);
        layer_data.ranges = parent_ranges;
        layer_data.parse_tree = Some(parse_tree);
@@ -612,11 +619,15 @@ impl Syntax {
        new_range: Range,
        injections: &mut Peekable<impl Iterator<Item = Injection>>,
    ) -> Option<Injection> {
-        loop {
-            let skip = injections.next_if(|injection| injection.range.end <= new_range.start);
-            if skip.is_none() {
-                break;
-            }
+        while let Some(skipped) =
+            injections.next_if(|injection| injection.range.end <= new_range.start)
+        {
+            // If the layer had an injection and now does not have the injection, consider the
+            // skipped layer to be modified so that its tree is re-parsed. It must be re-parsed
+            // since the skipped layer now has a different set of ranges than it used to. Note
+            // that the layer isn't marked as `touched` so it could be discarded if the layer
+            // is not ever visited.
+            self.layer_mut(skipped.layer).flags.modified = true;
        }
        injections
            .next_if(|injection| {
--- a/highlighter/src/tests.rs
+++ b/highlighter/src/tests.rs
@@ -7,13 +7,15 @@ use indexmap::{IndexMap, IndexSet};
 use once_cell::sync::Lazy;
 use once_cell::unsync::OnceCell;
 use skidder::Repo;
-use tree_sitter::Grammar;
+use tree_sitter::{Grammar, InputEdit, Point};

 use crate::config::{LanguageConfig, LanguageLoader};
 use crate::fixtures::{check_highlighter_fixture, check_injection_fixture};
 use crate::highlighter::Highlight;
 use crate::injections_query::InjectionLanguageMarker;
-use crate::{Language, Layer};
+use crate::{Language, Layer, Syntax};
+
+const PARSE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(1);

 static GRAMMARS: Lazy<Vec<PathBuf>> = Lazy::new(|| {
    let skidder_config = skidder_config();
@@ -248,13 +250,7 @@ fn layers() {
 /// ```
 pub fn hello() {}";

-    let syntax = crate::Syntax::new(
-        ropey::RopeSlice::from(input),
-        loader.get("rust"),
-        std::time::Duration::from_secs(60),
-        &loader,
-    )
-    .unwrap();
+    let syntax = Syntax::new(input.into(), loader.get("rust"), PARSE_TIMEOUT, &loader).unwrap();

    let assert_injection = |snippet: &str, expected: &[&str]| {
        assert!(!expected.is_empty(), "all layers have at least 1 injection");
@@ -452,3 +448,55 @@ fn injection_precedence() {
    );
    injection_fixture(&loader, "injections/overlapping_injection.rs");
 }
+
+#[test]
+fn edit_remove_and_add_injection_layer() {
+    let loader = TestLanguageLoader::new();
+    // Add another backtick, causing the double old backtick to become a codefence and the second
+    // HTML comment to become the codefence's body.
+    // When we reuse the injection for the HTML comments, we need to be sure to re-parse the HTML
+    // layer so that it recognizes that the second comment is no longer valid.
+    let before_text = "<!---->\n``\n<!---->";
+    let after_text = "<!---->\n```\n<!---->";
+    let edit = InputEdit {
+        start_byte: 10,
+        old_end_byte: 10,
+        new_end_byte: 11,
+        start_point: Point::ZERO,
+        old_end_point: Point::ZERO,
+        new_end_point: Point::ZERO,
+    };
+    let mut syntax = Syntax::new(
+        before_text.into(),
+        loader.get("markdown"),
+        PARSE_TIMEOUT,
+        &loader,
+    )
+    .unwrap();
+    // The test here is that `Syntax::update` can apply the edit `Ok(_)` without panicking.
+    syntax
+        .update(after_text.into(), PARSE_TIMEOUT, &[edit], &loader)
+        .unwrap();
+
+    // Now test the inverse. Start with the after text and edit it to be the before text. In this
+    // case an injection is added for the HTML comment.
+    let edit = InputEdit {
+        start_byte: 10,
+        old_end_byte: 11,
+        new_end_byte: 10,
+        start_point: Point::ZERO,
+        old_end_point: Point::ZERO,
+        new_end_point: Point::ZERO,
+    };
+    let mut syntax = Syntax::new(
+        after_text.into(),
+        loader.get("markdown"),
+        PARSE_TIMEOUT,
+        &loader,
+    )
+    .unwrap();
+    // The test here is that `Syntax::update` can apply the edit `Ok(_)` without panicking.
+    syntax
+        .update(before_text.into(), PARSE_TIMEOUT, &[edit], &loader)
+        .unwrap();
+}