Compare commits

...

8 Commits

Author SHA1 Message Date
Pascal Kuthe
ae0d4189e1 tmp 2024-07-27 15:43:44 +02:00
Pascal Kuthe
f7686d7af2 getting somewhere 2024-07-20 04:37:47 +02:00
Pascal Kuthe
29ffc20290 progress 2024-07-15 18:26:26 +02:00
Pascal Kuthe
1bd36f528a tmp 2024-07-15 18:25:11 +02:00
Pascal Kuthe
0cd8817d06 tmp 2024-07-15 18:25:08 +02:00
Pascal Kuthe
782a34941e vendor tree-sitter lib 2024-07-15 18:23:41 +02:00
Pascal Kuthe
37397ecc6d tmp 2024-07-15 18:23:41 +02:00
Pascal Kuthe
c4b7b08809 move syntax highlighting to separate crate 2024-07-15 18:23:24 +02:00
81 changed files with 25064 additions and 1871 deletions

22
Cargo.lock generated
View File

@@ -1311,6 +1311,7 @@ dependencies = [
"hashbrown 0.14.5",
"helix-loader",
"helix-stdx",
"helix-syntax",
"imara-diff",
"indoc",
"log",
@@ -1425,6 +1426,27 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "helix-syntax"
version = "24.7.0"
dependencies = [
"ahash",
"arc-swap",
"bitflags 2.6.0",
"cc",
"hashbrown 0.14.5",
"helix-stdx",
"libloading",
"log",
"once_cell",
"regex",
"regex-cursor",
"ropey",
"slotmap",
"thiserror",
"tree-sitter",
]
[[package]]
name = "helix-term"
version = "24.7.0"

View File

@@ -12,6 +12,7 @@ members = [
"helix-vcs",
"helix-parsec",
"helix-stdx",
"helix-syntax",
"xtask",
]

View File

@@ -17,6 +17,7 @@ integration = []
[dependencies]
helix-stdx = { path = "../helix-stdx" }
helix-syntax = { path = "../helix-syntax" }
helix-loader = { path = "../helix-loader" }
ropey = { version = "1.6.1", default-features = false, features = ["simd"] }

View File

@@ -738,18 +738,9 @@ fn init_indent_query<'a, 'b>(
.map(|prec| prec.byte_range().end - 1..byte_pos + 1)
.unwrap_or(byte_pos..byte_pos + 1);
crate::syntax::PARSER.with(|ts_parser| {
let mut ts_parser = ts_parser.borrow_mut();
let mut cursor = ts_parser.cursors.pop().unwrap_or_default();
let query_result = query_indents(
query,
syntax,
&mut cursor,
text,
query_range,
new_line_byte_pos,
);
ts_parser.cursors.push(cursor);
crate::syntax::with_cursor(|cursor| {
let query_result =
query_indents(query, syntax, cursor, text, query_range, new_line_byte_pos);
(query_result, deepest_preceding)
})
};

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,3 @@
use arc_swap::ArcSwap;
use helix_core::{
indent::{indent_level_for_line, treesitter_indent_for_pos, IndentStyle},
syntax::{Configuration, Loader},
@@ -6,7 +5,7 @@ use helix_core::{
};
use helix_stdx::rope::RopeSliceExt;
use ropey::Rope;
use std::{ops::Range, path::PathBuf, process::Command, sync::Arc};
use std::{ops::Range, path::PathBuf, process::Command};
#[test]
fn test_treesitter_indent_rust() {
@@ -200,12 +199,7 @@ fn test_treesitter_indent(
let indent_style = IndentStyle::from_str(&language_config.indent.as_ref().unwrap().unit);
let highlight_config = language_config.highlight_config(&[]).unwrap();
let text = doc.slice(..);
let syntax = Syntax::new(
text,
highlight_config,
Arc::new(ArcSwap::from_pointee(loader)),
)
.unwrap();
let syntax = Syntax::new(text, highlight_config, |_| None).unwrap();
let indent_query = language_config.indent_query().unwrap();
for i in 0..doc.len_lines() {

34
helix-syntax/Cargo.toml Normal file
View File

@@ -0,0 +1,34 @@
[package]
name = "helix-syntax"
description = "Helix syntax highlighting "
include = ["src/**/*", "README.md"]
version.workspace = true
authors.workspace = true
edition.workspace = true
license.workspace = true
rust-version.workspace = true
categories.workspace = true
repository.workspace = true
homepage.workspace = true
[features]
[dependencies]
helix-stdx = { path = "../helix-stdx" }
ropey = { version = "1.6.1", default-features = false, features = ["simd"] }
slotmap = "1.0"
tree-sitter.workspace = true
once_cell = "1.19"
arc-swap = "1"
regex = "1"
bitflags = "2.4"
ahash = "0.8.9"
hashbrown = { version = "0.14.3", features = ["raw"] }
log = "0.4"
regex-cursor = "0.1.4"
libloading = "0.8.3"
thiserror = "1.0.59"
[build-dependencies]
cc = "1.0.95"

28
helix-syntax/build.rs Normal file
View File

@@ -0,0 +1,28 @@
use std::path::PathBuf;
use std::{env, fs};
fn main() {
if env::var_os("DISABLED_TS_BUILD").is_some() {
return;
}
let mut config = cc::Build::new();
let manifest_path = PathBuf::from(env::var_os("CARGO_MANIFEST_DIR").unwrap());
let include_path = manifest_path.join("../vendor/tree-sitter/include");
let src_path = manifest_path.join("../vendor/tree-sitter/src");
for entry in fs::read_dir(&src_path).unwrap() {
let entry = entry.unwrap();
let path = src_path.join(entry.file_name());
println!("cargo:rerun-if-changed={}", path.to_str().unwrap());
}
config
.flag_if_supported("-std=c11")
.flag_if_supported("-fvisibility=hidden")
.flag_if_supported("-Wshadow")
.flag_if_supported("-Wno-unused-parameter")
.include(&src_path)
.include(&include_path)
.file(src_path.join("lib.c"))
.compile("tree-sitter");
}

346
helix-syntax/src/config.rs Normal file
View File

@@ -0,0 +1,346 @@
use std::borrow::Cow;
use std::path::Path;
use std::sync::Arc;
use crate::tree_sitter::query::{Capture, Pattern, QueryStr, UserPredicate};
use crate::tree_sitter::{query, Grammar, Query, QueryMatch, SyntaxTreeNode};
use arc_swap::ArcSwap;
use helix_stdx::rope::{self, RopeSliceExt};
use once_cell::sync::Lazy;
use regex::Regex;
use ropey::RopeSlice;
use crate::byte_range_to_str;
use crate::highlighter::Highlight;
/// Contains the data needed to highlight code written in a particular language.
///
/// This struct is immutable and can be shared between threads.
#[derive(Debug)]
pub struct HighlightConfiguration {
pub grammar: Grammar,
pub query: Query,
pub(crate) injections_query: Query,
pub(crate) combined_injections_patterns: Vec<Pattern>,
first_highlights_pattern: Pattern,
pub(crate) highlight_indices: ArcSwap<Vec<Highlight>>,
pub(crate) non_local_variable_patterns: Vec<bool>,
pub(crate) injection_content_capture: Option<Capture>,
pub(crate) injection_language_capture: Option<Capture>,
pub(crate) injection_filename_capture: Option<Capture>,
pub(crate) injection_shebang_capture: Option<Capture>,
pub(crate) local_scope_capture: Option<Capture>,
pub(crate) local_def_capture: Option<Capture>,
pub(crate) local_def_value_capture: Option<Capture>,
pub(crate) local_ref_capture: Option<Capture>,
}
impl HighlightConfiguration {
/// Creates a `HighlightConfiguration` for a given `Grammar` and set of highlighting
/// queries.
///
/// # Parameters
///
/// * `language` - The Tree-sitter `Grammar` that should be used for parsing.
/// * `highlights_query` - A string containing tree patterns for syntax highlighting. This
/// should be non-empty, otherwise no syntax highlights will be added.
/// * `injections_query` - A string containing tree patterns for injecting other languages
/// into the document. This can be empty if no injections are desired.
/// * `locals_query` - A string containing tree patterns for tracking local variable
/// definitions and references. This can be empty if local variable tracking is not needed.
///
/// Returns a `HighlightConfiguration` that can then be used with the `highlight` method.
pub fn new(
grammar: Grammar,
path: impl AsRef<Path>,
highlights_query: &str,
injection_query: &str,
locals_query: &str,
) -> Result<Self, query::ParseError> {
// Concatenate the query strings, keeping track of the start offset of each section.
let mut query_source = String::new();
query_source.push_str(locals_query);
let highlights_query_offset = query_source.len();
query_source.push_str(highlights_query);
let mut non_local_variable_patterns = Vec::with_capacity(32);
// Construct a single query by concatenating the three query strings, but record the
// range of pattern indices that belong to each individual string.
let query = Query::new(grammar, &query_source, path, |pattern, predicate| {
match predicate {
UserPredicate::IsPropertySet {
negate: true,
key: "local",
val: None,
} => {
if non_local_variable_patterns.len() < pattern.idx() {
non_local_variable_patterns.resize(pattern.idx(), false)
}
non_local_variable_patterns[pattern.idx()] = true;
}
predicate => {
return Err(format!("unsupported predicate {predicate}").into());
}
}
Ok(())
})?;
let mut combined_injections_patterns = Vec::new();
let injections_query = Query::new(grammar, injection_query, path, |pattern, predicate| {
match predicate {
UserPredicate::SetProperty {
key: "injection.combined",
val: None,
} => combined_injections_patterns.push(pattern),
predicate => {
return Err(format!("unsupported predicate {predicate}").into());
}
}
Ok(())
})?;
let first_highlights_pattern = query
.patterns()
.find(|pattern| query.start_byte_for_pattern(*pattern) >= highlights_query_offset)
.unwrap_or(Pattern::SENTINEL);
let injection_content_capture = query.get_capture("injection.content");
let injection_language_capture = query.get_capture("injection.language");
let injection_filename_capture = query.get_capture("injection.filename");
let injection_shebang_capture = query.get_capture("injection.shebang");
let local_def_capture = query.get_capture("local.definition");
let local_def_value_capture = query.get_capture("local.definition-value");
let local_ref_capture = query.get_capture("local.reference");
let local_scope_capture = query.get_capture("local.scope");
let highlight_indices =
ArcSwap::from_pointee(vec![Highlight::NONE; query.num_captures() as usize]);
Ok(Self {
grammar,
query,
injections_query,
combined_injections_patterns,
first_highlights_pattern,
highlight_indices,
non_local_variable_patterns,
injection_content_capture,
injection_language_capture,
injection_filename_capture,
injection_shebang_capture,
local_scope_capture,
local_def_capture,
local_def_value_capture,
local_ref_capture,
})
}
/// Set the list of recognized highlight names.
///
/// Tree-sitter syntax-highlighting queries specify highlights in the form of dot-separated
/// highlight names like `punctuation.bracket` and `function.method.builtin`. Consumers of
/// these queries can choose to recognize highlights with different levels of specificity.
/// For example, the string `function.builtin` will match against `function.builtin.constructor`
/// but will not match `function.method.builtin` and `function.method`.
///
/// When highlighting, results are returned as `Highlight` values, which contain the index
/// of the matched highlight this list of highlight names.
pub fn configure(&self, recognized_names: &[String]) {
let mut capture_parts = Vec::new();
let indices: Vec<_> = self
.query
.captures()
.map(move |(_, capture_name)| {
capture_parts.clear();
capture_parts.extend(capture_name.split('.'));
let mut best_index = u32::MAX;
let mut best_match_len = 0;
for (i, recognized_name) in recognized_names.iter().enumerate() {
let mut len = 0;
let mut matches = true;
for (i, part) in recognized_name.split('.').enumerate() {
match capture_parts.get(i) {
Some(capture_part) if *capture_part == part => len += 1,
_ => {
matches = false;
break;
}
}
}
if matches && len > best_match_len {
best_index = i as u32;
best_match_len = len;
}
}
Highlight(best_index)
})
.collect();
self.highlight_indices.store(Arc::new(indices));
}
fn injection_pair<'a>(
&self,
query_match: &QueryMatch<'a, 'a>,
source: RopeSlice<'a>,
) -> (
Option<InjectionLanguageMarker<'a>>,
Option<SyntaxTreeNode<'a>>,
) {
let mut injection_capture = None;
let mut content_node = None;
for matched_node in query_match.matched_nodes() {
let capture = Some(matched_node.capture);
if capture == self.injection_language_capture {
let name = byte_range_to_str(matched_node.syntax_node.byte_range(), source);
injection_capture = Some(InjectionLanguageMarker::Name(name));
} else if capture == self.injection_filename_capture {
let name = byte_range_to_str(matched_node.syntax_node.byte_range(), source);
let path = Path::new(name.as_ref()).to_path_buf();
injection_capture = Some(InjectionLanguageMarker::Filename(path.into()));
} else if capture == self.injection_shebang_capture {
let node_slice = source.byte_slice(matched_node.syntax_node.byte_range());
// some languages allow space and newlines before the actual string content
// so a shebang could be on either the first or second line
let lines = if let Ok(end) = node_slice.try_line_to_byte(2) {
node_slice.byte_slice(..end)
} else {
node_slice
};
injection_capture = SHEBANG_REGEX
.captures_iter(lines.regex_input())
.map(|cap| {
let cap = lines.byte_slice(cap.get_group(1).unwrap().range());
InjectionLanguageMarker::Shebang(cap.into())
})
.next()
} else if capture == self.injection_content_capture {
content_node = Some(matched_node.syntax_node.clone());
}
}
(injection_capture, content_node)
}
pub(super) fn injection_for_match<'a>(
&self,
query: &'a Query,
query_match: &QueryMatch<'a, 'a>,
source: RopeSlice<'a>,
) -> (
Option<InjectionLanguageMarker<'a>>,
Option<SyntaxTreeNode<'a>>,
IncludedChildren,
) {
let (mut injection_capture, content_node) = self.injection_pair(query_match, source);
let mut included_children = IncludedChildren::default();
for prop in query.property_settings(query_match.pattern_index) {
match prop.key.as_ref() {
// In addition to specifying the language name via the text of a
// captured node, it can also be hard-coded via a `#set!` predicate
// that sets the injection.language key.
"injection.language" if injection_capture.is_none() => {
injection_capture = prop
.value
.as_ref()
.map(|s| InjectionLanguageMarker::Name(s.as_ref().into()));
}
// By default, injections do not include the *children* of an
// `injection.content` node - only the ranges that belong to the
// node itself. This can be changed using a `#set!` predicate that
// sets the `injection.include-children` key.
"injection.include-children" => included_children = IncludedChildren::All,
// Some queries might only exclude named children but include unnamed
// children in their `injection.content` node. This can be enabled using
// a `#set!` predicate that sets the `injection.include-unnamed-children` key.
"injection.include-unnamed-children" => {
included_children = IncludedChildren::Unnamed
}
_ => {}
}
}
(injection_capture, content_node, included_children)
}
// pub fn load_query(
// &self,
// language: &str,
// filename: &str,
// read_query_text: impl FnMut(&str, &str) -> String,
// ) -> Result<Option<Query>, QueryError> {
// let query_text = read_query(language, filename, read_query_text);
// if query_text.is_empty() {
// return Ok(None);
// }
// Query::new(&self.grammar, &query_text, ).map(Some)
// }
}
/// reads a query by invoking `read_query_text`, handeles any `inherits` directives
pub fn read_query(
language: &str,
filename: &str,
mut read_query_text: impl FnMut(&str, &str) -> String,
) -> String {
fn read_query_impl(
language: &str,
filename: &str,
read_query_text: &mut impl FnMut(&str, &str) -> String,
) -> String {
static INHERITS_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r";+\s*inherits\s*:?\s*([a-z_,()-]+)\s*").unwrap());
let query = read_query_text(language, filename);
// replaces all "; inherits <language>(,<language>)*" with the queries of the given language(s)
INHERITS_REGEX
.replace_all(&query, |captures: &regex::Captures| {
captures[1]
.split(',')
.map(|language| {
format!(
"\n{}\n",
read_query_impl(language, filename, &mut *read_query_text)
)
})
.collect::<String>()
})
.to_string()
}
read_query_impl(language, filename, &mut read_query_text)
}
const SHEBANG: &str = r"#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)";
static SHEBANG_REGEX: Lazy<rope::Regex> = Lazy::new(|| rope::Regex::new(SHEBANG).unwrap());
struct InjectionSettings {
include_children: IncludedChildren,
language: Option<QueryStr>,
}
#[derive(Debug, Clone)]
pub enum InjectionLanguageMarker<'a> {
Name(Cow<'a, str>),
Filename(Cow<'a, Path>),
Shebang(String),
}
#[derive(Clone)]
enum IncludedChildren {
None,
All,
Unnamed,
}
impl Default for IncludedChildren {
fn default() -> Self {
Self::None
}
}

View File

@@ -0,0 +1,438 @@
pub use super::highlighter2::*;
// use std::borrow::Cow;
// use std::cell::RefCell;
// use std::sync::atomic::{self, AtomicUsize};
// use std::{fmt, iter, mem, ops};
// use ropey::RopeSlice;
// use tree_sitter::{QueryCaptures, QueryCursor, Tree};
// use crate::{byte_range_to_str, Error, HighlightConfiguration, Syntax, TREE_SITTER_MATCH_LIMIT};
// const CANCELLATION_CHECK_INTERVAL: usize = 100;
// /// Indicates which highlight should be applied to a region of source code.
// #[derive(Copy, Clone, Debug, PartialEq, Eq)]
// pub struct Highlight(pub usize);
// /// Represents a single step in rendering a syntax-highlighted document.
// #[derive(Copy, Clone, Debug)]
// pub enum HighlightEvent {
// Source { start: usize, end: usize },
// HighlightStart(Highlight),
// HighlightEnd,
// }
// #[derive(Debug)]
// struct LocalDef<'a> {
// name: Cow<'a, str>,
// value_range: ops::Range<usize>,
// highlight: Option<Highlight>,
// }
// #[derive(Debug)]
// struct LocalScope<'a> {
// inherits: bool,
// range: ops::Range<usize>,
// local_defs: Vec<LocalDef<'a>>,
// }
// #[derive(Debug)]
// struct HighlightIter<'a> {
// source: RopeSlice<'a>,
// byte_offset: usize,
// cancellation_flag: Option<&'a AtomicUsize>,
// layers: Vec<HighlightIterLayer<'a>>,
// iter_count: usize,
// next_event: Option<HighlightEvent>,
// last_highlight_range: Option<(usize, usize, u32)>,
// }
// struct HighlightIterLayer<'a> {
// _tree: Option<Tree>,
// cursor: QueryCursor,
// captures: RefCell<iter::Peekable<QueryCaptures<'a, 'a, RopeProvider<'a>, &'a [u8]>>>,
// config: &'a HighlightConfiguration,
// highlight_end_stack: Vec<usize>,
// scope_stack: Vec<LocalScope<'a>>,
// depth: u32,
// }
// impl<'a> fmt::Debug for HighlightIterLayer<'a> {
// fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// f.debug_struct("HighlightIterLayer").finish()
// }
// }
// impl<'a> HighlightIterLayer<'a> {
// // First, sort scope boundaries by their byte offset in the document. At a
// // given position, emit scope endings before scope beginnings. Finally, emit
// // scope boundaries from deeper layers first.
// fn sort_key(&self) -> Option<(usize, bool, isize)> {
// let depth = -(self.depth as isize);
// let next_start = self
// .captures
// .borrow_mut()
// .peek()
// .map(|(m, i)| m.captures[*i].node.start_byte());
// let next_end = self.highlight_end_stack.last().cloned();
// match (next_start, next_end) {
// (Some(start), Some(end)) => {
// if start < end {
// Some((start, true, depth))
// } else {
// Some((end, false, depth))
// }
// }
// (Some(i), None) => Some((i, true, depth)),
// (None, Some(j)) => Some((j, false, depth)),
// _ => None,
// }
// }
// }
// impl<'a> HighlightIter<'a> {
// fn emit_event(
// &mut self,
// offset: usize,
// event: Option<HighlightEvent>,
// ) -> Option<Result<HighlightEvent, Error>> {
// let result;
// if self.byte_offset < offset {
// result = Some(Ok(HighlightEvent::Source {
// start: self.byte_offset,
// end: offset,
// }));
// self.byte_offset = offset;
// self.next_event = event;
// } else {
// result = event.map(Ok);
// }
// self.sort_layers();
// result
// }
// fn sort_layers(&mut self) {
// while !self.layers.is_empty() {
// if let Some(sort_key) = self.layers[0].sort_key() {
// let mut i = 0;
// while i + 1 < self.layers.len() {
// if let Some(next_offset) = self.layers[i + 1].sort_key() {
// if next_offset < sort_key {
// i += 1;
// continue;
// }
// } else {
// let layer = self.layers.remove(i + 1);
// PARSER.with(|ts_parser| {
// let highlighter = &mut ts_parser.borrow_mut();
// highlighter.cursors.push(layer.cursor);
// });
// }
// break;
// }
// if i > 0 {
// self.layers[0..(i + 1)].rotate_left(1);
// }
// break;
// } else {
// let layer = self.layers.remove(0);
// PARSER.with(|ts_parser| {
// let highlighter = &mut ts_parser.borrow_mut();
// highlighter.cursors.push(layer.cursor);
// });
// }
// }
// }
// }
// impl<'a> Iterator for HighlightIter<'a> {
// type Item = Result<HighlightEvent, Error>;
// fn next(&mut self) -> Option<Self::Item> {
// 'main: loop {
// // If we've already determined the next highlight boundary, just return it.
// if let Some(e) = self.next_event.take() {
// return Some(Ok(e));
// }
// // Periodically check for cancellation, returning `Cancelled` error if the
// // cancellation flag was flipped.
// if let Some(cancellation_flag) = self.cancellation_flag {
// self.iter_count += 1;
// if self.iter_count >= CANCELLATION_CHECK_INTERVAL {
// self.iter_count = 0;
// if cancellation_flag.load(atomic::Ordering::Relaxed) != 0 {
// return Some(Err(Error::Cancelled));
// }
// }
// }
// // If none of the layers have any more highlight boundaries, terminate.
// if self.layers.is_empty() {
// let len = self.source.len_bytes();
// return if self.byte_offset < len {
// let result = Some(Ok(HighlightEvent::Source {
// start: self.byte_offset,
// end: len,
// }));
// self.byte_offset = len;
// result
// } else {
// None
// };
// }
// // Get the next capture from whichever layer has the earliest highlight boundary.
// let range;
// let layer = &mut self.layers[0];
// let captures = layer.captures.get_mut();
// if let Some((next_match, capture_index)) = captures.peek() {
// let next_capture = next_match.captures[*capture_index];
// range = next_capture.node.byte_range();
// // If any previous highlight ends before this node starts, then before
// // processing this capture, emit the source code up until the end of the
// // previous highlight, and an end event for that highlight.
// if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
// if end_byte <= range.start {
// layer.highlight_end_stack.pop();
// return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
// }
// }
// }
// // If there are no more captures, then emit any remaining highlight end events.
// // And if there are none of those, then just advance to the end of the document.
// else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
// layer.highlight_end_stack.pop();
// return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
// } else {
// return self.emit_event(self.source.len_bytes(), None);
// };
// let (mut match_, capture_index) = captures.next().unwrap();
// let mut capture = match_.captures[capture_index];
// // Remove from the local scope stack any local scopes that have already ended.
// while range.start > layer.scope_stack.last().unwrap().range.end {
// layer.scope_stack.pop();
// }
// // If this capture is for tracking local variables, then process the
// // local variable info.
// let mut reference_highlight = None;
// let mut definition_highlight = None;
// while match_.pattern_index < layer.config.highlights_pattern_index {
// // If the node represents a local scope, push a new local scope onto
// // the scope stack.
// if Some(capture.index) == layer.config.local_scope_capture_index {
// definition_highlight = None;
// let mut scope = LocalScope {
// inherits: true,
// range: range.clone(),
// local_defs: Vec::new(),
// };
// for prop in layer.config.query.property_settings(match_.pattern_index) {
// if let "local.scope-inherits" = prop.key.as_ref() {
// scope.inherits =
// prop.value.as_ref().map_or(true, |r| r.as_ref() == "true");
// }
// }
// layer.scope_stack.push(scope);
// }
// // If the node represents a definition, add a new definition to the
// // local scope at the top of the scope stack.
// else if Some(capture.index) == layer.config.local_def_capture_index {
// reference_highlight = None;
// let scope = layer.scope_stack.last_mut().unwrap();
// let mut value_range = 0..0;
// for capture in match_.captures {
// if Some(capture.index) == layer.config.local_def_value_capture_index {
// value_range = capture.node.byte_range();
// }
// }
// let name = byte_range_to_str(range.clone(), self.source);
// scope.local_defs.push(LocalDef {
// name,
// value_range,
// highlight: None,
// });
// definition_highlight = scope.local_defs.last_mut().map(|s| &mut s.highlight);
// }
// // If the node represents a reference, then try to find the corresponding
// // definition in the scope stack.
// else if Some(capture.index) == layer.config.local_ref_capture_index
// && definition_highlight.is_none()
// {
// definition_highlight = None;
// let name = byte_range_to_str(range.clone(), self.source);
// for scope in layer.scope_stack.iter().rev() {
// if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| {
// if def.name == name && range.start >= def.value_range.end {
// Some(def.highlight)
// } else {
// None
// }
// }) {
// reference_highlight = highlight;
// break;
// }
// if !scope.inherits {
// break;
// }
// }
// }
// // Continue processing any additional matches for the same node.
// if let Some((next_match, next_capture_index)) = captures.peek() {
// let next_capture = next_match.captures[*next_capture_index];
// if next_capture.node == capture.node {
// capture = next_capture;
// match_ = captures.next().unwrap().0;
// continue;
// }
// }
// self.sort_layers();
// continue 'main;
// }
// // Otherwise, this capture must represent a highlight.
// // If this exact range has already been highlighted by an earlier pattern, or by
// // a different layer, then skip over this one.
// if let Some((last_start, last_end, last_depth)) = self.last_highlight_range {
// if range.start == last_start && range.end == last_end && layer.depth < last_depth {
// self.sort_layers();
// continue 'main;
// }
// }
// // If the current node was found to be a local variable, then skip over any
// // highlighting patterns that are disabled for local variables.
// if definition_highlight.is_some() || reference_highlight.is_some() {
// while layer.config.non_local_variable_patterns[match_.pattern_index] {
// match_.remove();
// if let Some((next_match, next_capture_index)) = captures.peek() {
// let next_capture = next_match.captures[*next_capture_index];
// if next_capture.node == capture.node {
// capture = next_capture;
// match_ = captures.next().unwrap().0;
// continue;
// }
// }
// self.sort_layers();
// continue 'main;
// }
// }
// // Once a highlighting pattern is found for the current node, skip over
// // any later highlighting patterns that also match this node. Captures
// // for a given node are ordered by pattern index, so these subsequent
// // captures are guaranteed to be for highlighting, not injections or
// // local variables.
// while let Some((next_match, next_capture_index)) = captures.peek() {
// let next_capture = next_match.captures[*next_capture_index];
// if next_capture.node == capture.node {
// captures.next();
// } else {
// break;
// }
// }
// let current_highlight = layer.config.highlight_indices.load()[capture.index as usize];
// // If this node represents a local definition, then store the current
// // highlight value on the local scope entry representing this node.
// if let Some(definition_highlight) = definition_highlight {
// *definition_highlight = current_highlight;
// }
// // Emit a scope start event and push the node's end position to the stack.
// if let Some(highlight) = reference_highlight.or(current_highlight) {
// self.last_highlight_range = Some((range.start, range.end, layer.depth));
// layer.highlight_end_stack.push(range.end);
// return self
// .emit_event(range.start, Some(HighlightEvent::HighlightStart(highlight)));
// }
// self.sort_layers();
// }
// }
// }
// impl Syntax {
// /// Iterate over the highlighted regions for a given slice of source code.
// pub fn highlight_iter<'a>(
// &'a self,
// source: RopeSlice<'a>,
// range: Option<std::ops::Range<usize>>,
// cancellation_flag: Option<&'a AtomicUsize>,
// ) -> impl Iterator<Item = Result<HighlightEvent, Error>> + 'a {
// let mut layers = self
// .layers
// .iter()
// .filter_map(|(_, layer)| {
// // TODO: if range doesn't overlap layer range, skip it
// // Reuse a cursor from the pool if available.
// let mut cursor = PARSER.with(|ts_parser| {
// let highlighter = &mut ts_parser.borrow_mut();
// highlighter.cursors.pop().unwrap_or_else(QueryCursor::new)
// });
// // The `captures` iterator borrows the `Tree` and the `QueryCursor`, which
// // prevents them from being moved. But both of these values are really just
// // pointers, so it's actually ok to move them.
// let cursor_ref =
// unsafe { mem::transmute::<_, &'static mut QueryCursor>(&mut cursor) };
// // if reusing cursors & no range this resets to whole range
// cursor_ref.set_byte_range(range.clone().unwrap_or(0..usize::MAX));
// cursor_ref.set_match_limit(TREE_SITTER_MATCH_LIMIT);
// let mut captures = cursor_ref
// .captures(
// &layer.config.query,
// layer.tree().root_node(),
// RopeProvider(source),
// )
// .peekable();
// // If there's no captures, skip the layer
// captures.peek()?;
// Some(HighlightIterLayer {
// highlight_end_stack: Vec::new(),
// scope_stack: vec![LocalScope {
// inherits: false,
// range: 0..usize::MAX,
// local_defs: Vec::new(),
// }],
// cursor,
// _tree: None,
// captures: RefCell::new(captures),
// config: layer.config.as_ref(), // TODO: just reuse `layer`
// depth: layer.depth, // TODO: just reuse `layer`
// })
// })
// .collect::<Vec<_>>();
// layers.sort_unstable_by_key(|layer| layer.sort_key());
// let mut result = HighlightIter {
// source,
// byte_offset: range.map_or(0, |r| r.start),
// cancellation_flag,
// iter_count: 0,
// layers,
// next_event: None,
// last_highlight_range: None,
// };
// result.sort_layers();
// result
// }
// }

View File

@@ -0,0 +1,206 @@
use std::borrow::Cow;
use std::iter::{self, Peekable};
use std::mem::{replace, take};
use std::slice;
use hashbrown::HashMap;
use crate::query_iter::{MatchedNode, QueryIter, QueryIterEvent};
use crate::{Injection, LayerId, Range, Syntax};
/// Indicates which highlight should be applied to a region of source code.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct Highlight(pub u32);
impl Highlight{
pub(crate) const NONE = Highlight(u32::MAX);
}
#[derive(Debug)]
struct LocalDef<'a> {
name: Cow<'a, str>,
value_range: Range,
highlight: Option<Highlight>,
}
#[derive(Debug)]
struct LocalScope<'a> {
inherits: bool,
range: Range,
local_defs: Vec<LocalDef<'a>>,
}
#[derive(Debug)]
struct HighlightedNode {
end: u32,
highlight: Highlight,
}
#[derive(Debug, Default)]
struct LayerData<'a> {
parent_highlights: usize,
dormant_highlights: Vec<HighlightedNode>,
scope_stack: Vec<LocalDef<'a>>,
}
struct HighlighterConfig<'a> {
new_precedance: bool,
highlight_indices: &'a [Highlight],
}
pub struct Highligther<'a> {
query: QueryIter<'a, LayerData<'a>>,
next_query_event: Option<QueryIterEvent<LayerData<'a>>>,
active_highlights: Vec<HighlightedNode>,
next_highlight_end: u32,
next_highlight_start: u32,
config: HighlighterConfig<'a>,
}
pub struct HighlightList<'a>(slice::Iter<'a, HighlightedNode>);
impl<'a> Iterator for HighlightList<'a> {
type Item = Highlight;
fn next(&mut self) -> Option<Highlight> {
self.0.next().map(|node| node.highlight)
}
}
pub enum HighlighEvent<'a> {
RefreshHiglights(HighlightList<'a>),
PushHighlights(HighlightList<'a>),
}
impl<'a> Highligther<'a> {
pub fn active_highlights(&self) -> HighlightList<'_> {
HighlightList(self.active_highlights.iter())
}
pub fn next_event_offset(&self) -> u32 {
self.next_highlight_start.min(self.next_highlight_end)
}
pub fn advance(&mut self) -> HighlighEvent<'_> {
let mut refresh = false;
let prev_stack_size = self.active_highlights.len();
let pos = self.next_event_offset();
if self.next_highlight_end == pos {
self.process_injection_ends();
self.process_higlight_end();
refresh = true;
}
let mut first_highlight = true;
while self.next_highlight_start == pos {
let Some(query_event) = self.adance_query_iter() else {
break;
};
match query_event {
QueryIterEvent::EnterInjection(_) => self.enter_injection(),
QueryIterEvent::Match(node) => self.start_highlight(node, &mut first_highlight),
QueryIterEvent::ExitInjection { injection, state } => {
// state is returned if the layer is finifhed, if it isn't we have
// a combined injection and need to deactive its highlights
if state.is_none() {
self.deactive_layer(injection.layer);
refresh = true;
}
}
}
}
self.next_highlight_end = self
.active_highlights
.last()
.map_or(u32::MAX, |node| node.end);
if refresh {
HighlighEvent::RefreshHiglights(HighlightList(self.active_highlights.iter()))
} else {
HighlighEvent::PushHighlights(HighlightList(
self.active_highlights[prev_stack_size..].iter(),
))
}
}
fn adance_query_iter(&mut self) -> Option<QueryIterEvent<LayerData<'a>>> {
let event = replace(&mut self.next_query_event, self.query.next());
self.next_highlight_start = self
.next_query_event
.as_ref()
.map_or(u32::MAX, |event| event.start());
event
}
fn process_higlight_end(&mut self) {
let i = self
.active_highlights
.iter()
.rposition(|highlight| highlight.end != self.next_highlight_end)
.unwrap();
self.active_highlights.truncate(i);
}
/// processes injections that end at the same position as highlights first.
fn process_injection_ends(&mut self) {
while self.next_highlight_end == self.next_highlight_start {
match self.next_query_event.as_ref() {
Some(QueryIterEvent::ExitInjection { injection, state }) => {
if state.is_none() {
self.deactive_layer(injection.layer);
}
}
Some(QueryIterEvent::Match(matched_node)) if matched_node.byte_range.is_empty() => {
}
_ => break,
}
}
}
fn enter_injection(&mut self) {
self.query.current_layer_state().parent_highlights = self.active_highlights.len();
}
fn deactive_layer(&mut self, layer: LayerId) {
let LayerData {
parent_highlights,
ref mut dormant_highlights,
..
} = *self.query.layer_state(layer);
let i = self.active_highlights[parent_highlights..]
.iter()
.rposition(|highlight| highlight.end != self.next_highlight_end)
.unwrap();
self.active_highlights.truncate(parent_highlights + i);
dormant_highlights.extend(self.active_highlights.drain(parent_highlights..))
}
fn start_highlight(&mut self, node: MatchedNode, first_highlight: &mut bool) {
if node.byte_range.is_empty() {
return;
}
// if there are multiple matches for the exact same node
// only use one of the (the last with new/nvim precedance)
if !*first_highlight
&& self.active_highlights.last().map_or(false, |prev_node| {
prev_node.end == node.byte_range.end as u32
})
{
if self.config.new_precedance {
self.active_highlights.pop();
} else {
return;
}
}
let highlight = self.config.highlight_indices[node.capture.idx()];
if highlight.0 == u32::MAX {
return;
}
self.active_highlights.push(HighlightedNode {
end: node.byte_range.end as u32,
highlight,
});
*first_highlight = false;
}
}

250
helix-syntax/src/lib.rs Normal file
View File

@@ -0,0 +1,250 @@
use ::ropey::RopeSlice;
use slotmap::{new_key_type, HopSlotMap};
use std::borrow::Cow;
use std::hash::{Hash, Hasher};
use std::path::Path;
use std::str;
use std::sync::Arc;
use crate::parse::LayerUpdateFlags;
pub use crate::config::{read_query, HighlightConfiguration};
use crate::tree_sitter::{SyntaxTree, SyntaxTreeNode};
pub use pretty_print::pretty_print_tree;
pub use tree_cursor::TreeCursor;
mod config;
pub mod highlighter;
pub mod highlighter2;
mod parse;
mod pretty_print;
mod query_iter;
pub mod text_object;
mod tree_cursor;
pub mod tree_sitter;
new_key_type! {
/// The default slot map key type.
pub struct LayerId;
}
/// The maximum number of in-progress matches a TS cursor can consider at once.
/// This is set to a constant in order to avoid performance problems for medium to large files. Set with `set_match_limit`.
/// Using such a limit means that we lose valid captures, so there is fundamentally a tradeoff here.
///
///
/// Old tree sitter versions used a limit of 32 by default until this limit was removed in version `0.19.5` (must now be set manually).
/// However, this causes performance issues for medium to large files.
/// In helix, this problem caused treesitter motions to take multiple seconds to complete in medium-sized rust files (3k loc).
///
///
/// Neovim also encountered this problem and reintroduced this limit after it was removed upstream
/// (see <https://github.com/neovim/neovim/issues/14897> and <https://github.com/neovim/neovim/pull/14915>).
/// The number used here is fundamentally a tradeoff between breaking some obscure edge cases and performance.
///
///
/// Neovim chose 64 for this value somewhat arbitrarily (<https://github.com/neovim/neovim/pull/18397>).
/// 64 is too low for some languages though. In particular, it breaks some highlighting for record fields in Erlang record definitions.
/// This number can be increased if new syntax highlight breakages are found, as long as the performance penalty is not too high.
pub const TREE_SITTER_MATCH_LIMIT: u32 = 256;
// TODO(perf): replace std::ops::Range<usize> with helix_stdx::Range<u32> once added
type Range = std::ops::Range<usize>;
/// The Tree siitter syntax tree for a single language.
/// This is really multipe nested different syntax trees due to tree sitter
/// injections. A single syntax tree/parser is called layer. Each layer
/// is parsed as a single "file" by tree sitter. There can be multiple layers
/// for the same language. A layer corresponds to one of three things:
/// * the root layer
/// * a singular injection limited to a single node in it's parent layer
/// * Multiple injections (multiple disjoint nodes in parent layer) that are
/// parsed as tough they are a single uninterrupted file.
///
/// An injection always refer to a single node into which another layer is
/// injected. As injections only correspond to syntax tree nodes injections in
/// the same layer do not intersect. However, the syntax tree in a an injected
/// layer can have nodes that intersect with nodes from the parent layer. For
/// example:
/// ```
/// layer2: | Sibling A | Sibling B (layer3) | Sibling C |
/// layer1: | Sibling A (layer2) | Sibling B | Sibling C (layer2) |
/// ````
/// In this case Sibling B really spans across a "GAP" in layer2. While the syntax
/// node can not be split up by tree sitter directly, we can treat Sibling B as two
/// seperate injections. That is done while parsing/running the query capture. As
/// a result the injections from a tree. Note that such other queries must account for
/// such multi injection nodes.
#[derive(Debug)]
pub struct Syntax {
layers: HopSlotMap<LayerId, LanguageLayer>,
root: LayerId,
}
impl Syntax {
pub fn new(
source: RopeSlice,
config: Arc<HighlightConfiguration>,
injection_callback: impl Fn(&InjectionLanguageMarker) -> Option<Arc<HighlightConfiguration>>,
) -> Option<Self> {
let root_layer = LanguageLayer {
parse_tree: None,
config,
flags: LayerUpdateFlags::empty(),
ranges: vec![tree_sitter::Range {
start_byte: 0,
end_byte: u32::MAX,
start_point: tree_sitter::Point { row: 0, col: 0 },
end_point: tree_sitter::Point {
row: u32::MAX,
col: u32::MAX,
},
}]
.into_boxed_slice(),
injections: Box::new([]),
parent: None,
};
// track scope_descriptor: a Vec of scopes for item in tree
let mut layers = HopSlotMap::default();
let root = layers.insert(root_layer);
let mut syntax = Self { root, layers };
let res = syntax.update(source, Vec::new(), injection_callback);
if res.is_err() {
log::error!("TS parser failed, disabling TS for the current buffer: {res:?}");
return None;
}
Some(syntax)
}
pub fn tree(&self) -> &SyntaxTree {
self.layers[self.root].tree()
}
pub fn tree_for_byte_range(&self, start: usize, end: usize) -> &SyntaxTree {
let layer = self.layer_for_byte_range(start, end);
self.layers[layer].tree()
}
pub fn named_descendant_for_byte_range(
&self,
start: usize,
end: usize,
) -> Option<SyntaxTreeNode<'_>> {
self.tree_for_byte_range(start, end)
.root_node()
.named_descendant_for_byte_range(start, end)
}
pub fn descendant_for_byte_range(
&self,
start: usize,
end: usize,
) -> Option<SyntaxTreeNode<'_>> {
self.tree_for_byte_range(start, end)
.root_node()
.descendant_for_byte_range(start, end)
}
pub fn layer_for_byte_range(&self, start: usize, end: usize) -> LayerId {
let mut cursor = self.root;
loop {
let layer = &self.layers[cursor];
let Some(start_injection) = layer.injection_at_byte_idx(start) else {
break;
};
let Some(end_injection) = layer.injection_at_byte_idx(end) else {
break;
};
if start_injection.layer == end_injection.layer {
cursor = start_injection.layer;
} else {
break;
}
}
cursor
}
pub fn walk(&self) -> TreeCursor<'_> {
TreeCursor::new(&self.layers, self.root)
}
}
#[derive(Debug, Clone)]
pub(crate) struct Injection {
pub byte_range: Range,
pub layer: LayerId,
}
#[derive(Debug)]
pub struct LanguageLayer {
pub config: Arc<HighlightConfiguration>,
parse_tree: Option<SyntaxTree>,
ranges: Box<[tree_sitter::Range]>,
/// a list of **sorted** non-overlapping injection ranges. Note that
/// injection ranges are not relative to the start of this layer but the
/// start of the root layer
injections: Box<[Injection]>,
/// internal flags used during parsing to track incremental invalidation
flags: LayerUpdateFlags,
parent: Option<LayerId>,
}
/// This PartialEq implementation only checks if that
/// two layers are theoretically identical (meaning they highlight the same text range with the same language).
/// It does not check whether the layers have the same internal treesitter
/// state.
impl PartialEq for LanguageLayer {
fn eq(&self, other: &Self) -> bool {
self.parent == other.parent
&& self.config.grammar == other.config.grammar
&& self.ranges == other.ranges
}
}
/// Hash implementation belongs to PartialEq implementation above.
/// See its documentation for details.
impl Hash for LanguageLayer {
fn hash<H: Hasher>(&self, state: &mut H) {
self.parent.hash(state);
self.config.grammar.hash(state);
self.ranges.hash(state);
}
}
impl LanguageLayer {
pub fn tree(&self) -> &SyntaxTree {
// TODO: no unwrap
self.parse_tree.as_ref().unwrap()
}
/// Returns the injection range **within this layers** that contains `idx`.
/// This function will not descend into nested injections
pub(crate) fn injection_at_byte_idx(&self, idx: usize) -> Option<&Injection> {
let i = self
.injections
.partition_point(|range| range.byte_range.start < idx);
self.injections
.get(i)
.filter(|injection| injection.byte_range.end > idx)
}
}
/// Represents the reason why syntax highlighting failed.
#[derive(Debug, PartialEq, Eq)]
pub enum Error {
Cancelled,
InvalidLanguage,
InvalidRanges,
Unknown,
}
fn byte_range_to_str(range: std::ops::Range<usize>, source: RopeSlice) -> Cow<str> {
Cow::from(source.byte_slice(range))
}

135
helix-syntax/src/merge.rs Normal file
View File

@@ -0,0 +1,135 @@
use crate::highlighter::{Highlight, HighlightEvent};
pub struct Merge<I> {
iter: I,
spans: Box<dyn Iterator<Item = (usize, std::ops::Range<usize>)>>,
next_event: Option<HighlightEvent>,
next_span: Option<(usize, std::ops::Range<usize>)>,
queue: Vec<HighlightEvent>,
}
/// Merge a list of spans into the highlight event stream.
pub fn merge<I: Iterator<Item = HighlightEvent>>(
iter: I,
spans: Vec<(usize, std::ops::Range<usize>)>,
) -> Merge<I> {
let spans = Box::new(spans.into_iter());
let mut merge = Merge {
iter,
spans,
next_event: None,
next_span: None,
queue: Vec::new(),
};
merge.next_event = merge.iter.next();
merge.next_span = merge.spans.next();
merge
}
impl<I: Iterator<Item = HighlightEvent>> Iterator for Merge<I> {
type Item = HighlightEvent;
fn next(&mut self) -> Option<Self::Item> {
use HighlightEvent::*;
if let Some(event) = self.queue.pop() {
return Some(event);
}
loop {
match (self.next_event, &self.next_span) {
// this happens when range is partially or fully offscreen
(Some(Source { start, .. }), Some((span, range))) if start > range.start => {
if start > range.end {
self.next_span = self.spans.next();
} else {
self.next_span = Some((*span, start..range.end));
};
}
_ => break,
}
}
match (self.next_event, &self.next_span) {
(Some(HighlightStart(i)), _) => {
self.next_event = self.iter.next();
Some(HighlightStart(i))
}
(Some(HighlightEnd), _) => {
self.next_event = self.iter.next();
Some(HighlightEnd)
}
(Some(Source { start, end }), Some((_, range))) if start < range.start => {
let intersect = range.start.min(end);
let event = Source {
start,
end: intersect,
};
if end == intersect {
// the event is complete
self.next_event = self.iter.next();
} else {
// subslice the event
self.next_event = Some(Source {
start: intersect,
end,
});
};
Some(event)
}
(Some(Source { start, end }), Some((span, range))) if start == range.start => {
let intersect = range.end.min(end);
let event = HighlightStart(Highlight(*span));
// enqueue in reverse order
self.queue.push(HighlightEnd);
self.queue.push(Source {
start,
end: intersect,
});
if end == intersect {
// the event is complete
self.next_event = self.iter.next();
} else {
// subslice the event
self.next_event = Some(Source {
start: intersect,
end,
});
};
if intersect == range.end {
self.next_span = self.spans.next();
} else {
self.next_span = Some((*span, intersect..range.end));
}
Some(event)
}
(Some(event), None) => {
self.next_event = self.iter.next();
Some(event)
}
// Can happen if cursor at EOF and/or diagnostic reaches past the end.
// We need to actually emit events for the cursor-at-EOF situation,
// even though the range is past the end of the text. This needs to be
// handled appropriately by the drawing code by not assuming that
// all `Source` events point to valid indices in the rope.
(None, Some((span, range))) => {
let event = HighlightStart(Highlight(*span));
self.queue.push(HighlightEnd);
self.queue.push(Source {
start: range.start,
end: range.end,
});
self.next_span = self.spans.next();
Some(event)
}
(None, None) => None,
e => unreachable!("{:?}", e),
}
}
}

429
helix-syntax/src/parse.rs Normal file
View File

@@ -0,0 +1,429 @@
// use std::collections::VecDeque;
// use std::mem::replace;
// use std::sync::Arc;
// use ahash::RandomState;
use bitflags::bitflags;
// use hashbrown::raw::RawTable;
// use ropey::RopeSlice;
// use tree_sitter::{Node, Parser, Point, QueryCursor, Range};
// use crate::ropey::RopeProvider;
// use crate::{
// Error, HighlightConfiguration, IncludedChildren, InjectionLanguageMarker, LanguageLayer,
// Syntax, PARSER, TREE_SITTER_MATCH_LIMIT,
// };
bitflags! {
/// Flags that track the status of a layer
/// in the `Sytaxn::update` function
#[derive(Debug)]
pub(crate) struct LayerUpdateFlags : u32{
const MODIFIED = 0b001;
const MOVED = 0b010;
const TOUCHED = 0b100;
}
}
// impl Syntax {
// pub fn update(
// &mut self,
// source: RopeSlice,
// edits: Vec<tree_sitter::InputEdit>,
// injection_callback: impl Fn(&InjectionLanguageMarker) -> Option<Arc<HighlightConfiguration>>,
// ) -> Result<(), Error> {
// let mut queue = VecDeque::new();
// queue.push_back(self.root);
// // This table allows inverse indexing of `layers`.
// // That is by hashing a `Layer` you can find
// // the `LayerId` of an existing equivalent `Layer` in `layers`.
// //
// // It is used to determine if a new layer exists for an injection
// // or if an existing layer needs to be updated.
// let mut layers_table = RawTable::with_capacity(self.layers.len());
// let layers_hasher = RandomState::new();
// // Use the edits to update all layers markers
// fn point_add(a: Point, b: Point) -> Point {
// if b.row > 0 {
// Point::new(a.row.saturating_add(b.row), b.column)
// } else {
// Point::new(0, a.column.saturating_add(b.column))
// }
// }
// fn point_sub(a: Point, b: Point) -> Point {
// if a.row > b.row {
// Point::new(a.row.saturating_sub(b.row), a.column)
// } else {
// Point::new(0, a.column.saturating_sub(b.column))
// }
// }
// for (layer_id, layer) in self.layers.iter_mut() {
// // The root layer always covers the whole range (0..usize::MAX)
// if layer.depth == 0 {
// layer.flags = LayerUpdateFlags::MODIFIED;
// continue;
// }
// if !edits.is_empty() {
// for range in &mut layer.ranges {
// // Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720
// for edit in edits.iter().rev() {
// let is_pure_insertion = edit.old_end_byte == edit.start_byte;
// // if edit is after range, skip
// if edit.start_byte > range.end_byte {
// // TODO: || (is_noop && edit.start_byte == range.end_byte)
// continue;
// }
// // if edit is before range, shift entire range by len
// if edit.old_end_byte < range.start_byte {
// range.start_byte =
// edit.new_end_byte + (range.start_byte - edit.old_end_byte);
// range.start_point = point_add(
// edit.new_end_position,
// point_sub(range.start_point, edit.old_end_position),
// );
// range.end_byte = edit
// .new_end_byte
// .saturating_add(range.end_byte - edit.old_end_byte);
// range.end_point = point_add(
// edit.new_end_position,
// point_sub(range.end_point, edit.old_end_position),
// );
// layer.flags |= LayerUpdateFlags::MOVED;
// }
// // if the edit starts in the space before and extends into the range
// else if edit.start_byte < range.start_byte {
// range.start_byte = edit.new_end_byte;
// range.start_point = edit.new_end_position;
// range.end_byte = range
// .end_byte
// .saturating_sub(edit.old_end_byte)
// .saturating_add(edit.new_end_byte);
// range.end_point = point_add(
// edit.new_end_position,
// point_sub(range.end_point, edit.old_end_position),
// );
// layer.flags = LayerUpdateFlags::MODIFIED;
// }
// // If the edit is an insertion at the start of the tree, shift
// else if edit.start_byte == range.start_byte && is_pure_insertion {
// range.start_byte = edit.new_end_byte;
// range.start_point = edit.new_end_position;
// layer.flags |= LayerUpdateFlags::MOVED;
// } else {
// range.end_byte = range
// .end_byte
// .saturating_sub(edit.old_end_byte)
// .saturating_add(edit.new_end_byte);
// range.end_point = point_add(
// edit.new_end_position,
// point_sub(range.end_point, edit.old_end_position),
// );
// layer.flags = LayerUpdateFlags::MODIFIED;
// }
// }
// }
// }
// let hash = layers_hasher.hash_one(layer);
// // Safety: insert_no_grow is unsafe because it assumes that the table
// // has enough capacity to hold additional elements.
// // This is always the case as we reserved enough capacity above.
// unsafe { layers_table.insert_no_grow(hash, layer_id) };
// }
// PARSER.with(|ts_parser| {
// let ts_parser = &mut ts_parser.borrow_mut();
// ts_parser.parser.set_timeout_micros(1000 * 500); // half a second is pretty generours
// let mut cursor = ts_parser.cursors.pop().unwrap_or_else(QueryCursor::new);
// // TODO: might need to set cursor range
// cursor.set_byte_range(0..usize::MAX);
// cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);
// let source_slice = source.slice(..);
// while let Some(layer_id) = queue.pop_front() {
// let layer = &mut self.layers[layer_id];
// // Mark the layer as touched
// layer.flags |= LayerUpdateFlags::TOUCHED;
// // If a tree already exists, notify it of changes.
// if let Some(tree) = &mut layer.parse_tree {
// if layer
// .flags
// .intersects(LayerUpdateFlags::MODIFIED | LayerUpdateFlags::MOVED)
// {
// for edit in edits.iter().rev() {
// // Apply the edits in reverse.
// // If we applied them in order then edit 1 would disrupt the positioning of edit 2.
// tree.edit(edit);
// }
// }
// if layer.flags.contains(LayerUpdateFlags::MODIFIED) {
// // Re-parse the tree.
// layer.parse(&mut ts_parser.parser, source)?;
// }
// } else {
// // always parse if this layer has never been parsed before
// layer.parse(&mut ts_parser.parser, source)?;
// }
// // Switch to an immutable borrow.
// let layer = &self.layers[layer_id];
// // Process injections.
// let matches = cursor.matches(
// &layer.config.injections_query,
// layer.tree().root_node(),
// RopeProvider(source_slice),
// );
// let mut combined_injections = vec![
// (None, Vec::new(), IncludedChildren::default());
// layer.config.combined_injections_patterns.len()
// ];
// let mut injections = Vec::new();
// let mut last_injection_end = 0;
// for mat in matches {
// let (injection_capture, content_node, included_children) = layer
// .config
// .injection_for_match(&layer.config.injections_query, &mat, source_slice);
// // in case this is a combined injection save it for more processing later
// if let Some(combined_injection_idx) = layer
// .config
// .combined_injections_patterns
// .iter()
// .position(|&pattern| pattern == mat.pattern_index)
// {
// let entry = &mut combined_injections[combined_injection_idx];
// if injection_capture.is_some() {
// entry.0 = injection_capture;
// }
// if let Some(content_node) = content_node {
// if content_node.start_byte() >= last_injection_end {
// entry.1.push(content_node);
// last_injection_end = content_node.end_byte();
// }
// }
// entry.2 = included_children;
// continue;
// }
// // Explicitly remove this match so that none of its other captures will remain
// // in the stream of captures.
// mat.remove();
// // If a language is found with the given name, then add a new language layer
// // to the highlighted document.
// if let (Some(injection_capture), Some(content_node)) =
// (injection_capture, content_node)
// {
// if let Some(config) = (injection_callback)(&injection_capture) {
// let ranges =
// intersect_ranges(&layer.ranges, &[content_node], included_children);
// if !ranges.is_empty() {
// if content_node.start_byte() < last_injection_end {
// continue;
// }
// last_injection_end = content_node.end_byte();
// injections.push((config, ranges));
// }
// }
// }
// }
// for (lang_name, content_nodes, included_children) in combined_injections {
// if let (Some(lang_name), false) = (lang_name, content_nodes.is_empty()) {
// if let Some(config) = (injection_callback)(&lang_name) {
// let ranges =
// intersect_ranges(&layer.ranges, &content_nodes, included_children);
// if !ranges.is_empty() {
// injections.push((config, ranges));
// }
// }
// }
// }
// let depth = layer.depth + 1;
// // TODO: can't inline this since matches borrows self.layers
// for (config, ranges) in injections {
// let parent = Some(layer_id);
// let new_layer = LanguageLayer {
// parse_tree: None,
// config,
// depth,
// ranges,
// flags: LayerUpdateFlags::empty(),
// parent: None,
// };
// // Find an identical existing layer
// let layer = layers_table
// .get(layers_hasher.hash_one(&new_layer), |&it| {
// self.layers[it] == new_layer
// })
// .copied();
// // ...or insert a new one.
// let layer_id = layer.unwrap_or_else(|| self.layers.insert(new_layer));
// self.layers[layer_id].parent = parent;
// queue.push_back(layer_id);
// }
// // TODO: pre-process local scopes at this time, rather than highlight?
// // would solve problems with locals not working across boundaries
// }
// // Return the cursor back in the pool.
// ts_parser.cursors.push(cursor);
// // Reset all `LayerUpdateFlags` and remove all untouched layers
// self.layers.retain(|_, layer| {
// replace(&mut layer.flags, LayerUpdateFlags::empty())
// .contains(LayerUpdateFlags::TOUCHED)
// });
// Ok(())
// })
// }
// }
// /// Compute the ranges that should be included when parsing an injection.
// /// This takes into account three things:
// /// * `parent_ranges` - The ranges must all fall within the *current* layer's ranges.
// /// * `nodes` - Every injection takes place within a set of nodes. The injection ranges
// /// are the ranges of those nodes.
// /// * `includes_children` - For some injections, the content nodes' children should be
// /// excluded from the nested document, so that only the content nodes' *own* content
// /// is reparsed. For other injections, the content nodes' entire ranges should be
// /// reparsed, including the ranges of their children.
// fn intersect_ranges(
// parent_ranges: &[Range],
// nodes: &[Node],
// included_children: IncludedChildren,
// ) -> Vec<Range> {
// let mut cursor = nodes[0].walk();
// let mut result = Vec::new();
// let mut parent_range_iter = parent_ranges.iter();
// let mut parent_range = parent_range_iter
// .next()
// .expect("Layers should only be constructed with non-empty ranges vectors");
// for node in nodes.iter() {
// let mut preceding_range = Range {
// start_byte: 0,
// start_point: Point::new(0, 0),
// end_byte: node.start_byte(),
// end_point: node.start_position(),
// };
// let following_range = Range {
// start_byte: node.end_byte(),
// start_point: node.end_position(),
// end_byte: usize::MAX,
// end_point: Point::new(usize::MAX, usize::MAX),
// };
// for excluded_range in node
// .children(&mut cursor)
// .filter_map(|child| match included_children {
// IncludedChildren::None => Some(child.range()),
// IncludedChildren::All => None,
// IncludedChildren::Unnamed => {
// if child.is_named() {
// Some(child.range())
// } else {
// None
// }
// }
// })
// .chain([following_range].iter().cloned())
// {
// let mut range = Range {
// start_byte: preceding_range.end_byte,
// start_point: preceding_range.end_point,
// end_byte: excluded_range.start_byte,
// end_point: excluded_range.start_point,
// };
// preceding_range = excluded_range;
// if range.end_byte < parent_range.start_byte {
// continue;
// }
// while parent_range.start_byte <= range.end_byte {
// if parent_range.end_byte > range.start_byte {
// if range.start_byte < parent_range.start_byte {
// range.start_byte = parent_range.start_byte;
// range.start_point = parent_range.start_point;
// }
// if parent_range.end_byte < range.end_byte {
// if range.start_byte < parent_range.end_byte {
// result.push(Range {
// start_byte: range.start_byte,
// start_point: range.start_point,
// end_byte: parent_range.end_byte,
// end_point: parent_range.end_point,
// });
// }
// range.start_byte = parent_range.end_byte;
// range.start_point = parent_range.end_point;
// } else {
// if range.start_byte < range.end_byte {
// result.push(range);
// }
// break;
// }
// }
// if let Some(next_range) = parent_range_iter.next() {
// parent_range = next_range;
// } else {
// return result;
// }
// }
// }
// }
// result
// }
// impl LanguageLayer {
// fn parse(&mut self, parser: &mut Parser, source: RopeSlice) -> Result<(), Error> {
// parser
// .set_included_ranges(&self.ranges)
// .map_err(|_| Error::InvalidRanges)?;
// parser
// .set_language(&self.config.language)
// .map_err(|_| Error::InvalidLanguage)?;
// // unsafe { syntax.parser.set_cancellation_flag(cancellation_flag) };
// let tree = parser
// .parse_with(
// &mut |byte, _| {
// if byte <= source.len_bytes() {
// let (chunk, start_byte, _, _) = source.chunk_at_byte(byte);
// &chunk.as_bytes()[byte - start_byte..]
// } else {
// // out of range
// &[]
// }
// },
// self.parse_tree.as_ref(),
// )
// .ok_or(Error::Cancelled)?;
// // unsafe { ts_parser.parser.set_cancellation_flag(None) };
// self.parse_tree = Some(tree);
// Ok(())
// }
// }

View File

@@ -0,0 +1,65 @@
use std::fmt;
use tree_sitter::{Node, TreeCursor};
pub fn pretty_print_tree<W: fmt::Write>(fmt: &mut W, node: Node) -> fmt::Result {
if node.child_count() == 0 {
if node_is_visible(&node) {
write!(fmt, "({})", node.kind())
} else {
write!(fmt, "\"{}\"", node.kind())
}
} else {
pretty_print_tree_impl(fmt, &mut node.walk(), 0)
}
}
fn pretty_print_tree_impl<W: fmt::Write>(
fmt: &mut W,
cursor: &mut TreeCursor,
depth: usize,
) -> fmt::Result {
let node = cursor.node();
let visible = node_is_visible(&node);
if visible {
let indentation_columns = depth * 2;
write!(fmt, "{:indentation_columns$}", "")?;
if let Some(field_name) = cursor.field_name() {
write!(fmt, "{}: ", field_name)?;
}
write!(fmt, "({}", node.kind())?;
}
// Handle children.
if cursor.goto_first_child() {
loop {
if node_is_visible(&cursor.node()) {
fmt.write_char('\n')?;
}
pretty_print_tree_impl(fmt, cursor, depth + 1)?;
if !cursor.goto_next_sibling() {
break;
}
}
let moved = cursor.goto_parent();
// The parent of the first child must exist, and must be `node`.
debug_assert!(moved);
debug_assert!(cursor.node() == node);
}
if visible {
fmt.write_char(')')?;
}
Ok(())
}
fn node_is_visible(node: &Node) -> bool {
node.is_missing() || (node.is_named() && node.language().node_kind_is_visible(node.kind_id()))
}

View File

@@ -0,0 +1,236 @@
use core::slice;
use std::iter::Peekable;
use std::mem::replace;
use hashbrown::HashMap;
use ropey::RopeSlice;
use crate::tree_sitter::{
Capture, InactiveQueryCursor, Query, QueryCursor, RopeTsInput, SyntaxTreeNode,
};
use crate::{Injection, LayerId, Range, Syntax};
#[derive(Clone)]
pub struct MatchedNode {
pub capture: Capture,
pub byte_range: Range,
}
struct LayerQueryIter<'a> {
cursor: QueryCursor<'a, 'a, RopeTsInput<'a>>,
peeked: Option<MatchedNode>,
}
impl<'a> LayerQueryIter<'a> {
fn peek(&mut self) -> Option<&MatchedNode> {
if self.peeked.is_none() {
let (query_match, node_idx) = self.cursor.next_matched_node()?;
let matched_node = query_match.matched_node(node_idx);
self.peeked = Some(MatchedNode {
capture: matched_node.capture,
byte_range: matched_node.syntax_node.byte_range(),
});
}
self.peeked.as_ref()
}
fn consume(&mut self) -> MatchedNode {
self.peeked.take().unwrap()
}
}
struct ActiveLayer<'a, S> {
state: S,
query_iter: LayerQueryIter<'a>,
injections: Peekable<slice::Iter<'a, Injection>>,
}
// data only needed when entering and exiting injections
// seperate struck to keep the QueryIter reasonably small
struct QueryIterLayerManager<'a, S> {
query: &'a Query,
node: SyntaxTreeNode<'a>,
src: RopeSlice<'a>,
syntax: &'a Syntax,
active_layers: HashMap<LayerId, Box<ActiveLayer<'a, S>>>,
active_injections: Vec<Injection>,
}
impl<'a, S: Default> QueryIterLayerManager<'a, S> {
fn init_layer(&mut self, injection: Injection) -> Box<ActiveLayer<'a, S>> {
self.active_layers
.remove(&injection.layer)
.unwrap_or_else(|| {
let layer = &self.syntax.layers[injection.layer];
let injection_start = layer
.injections
.partition_point(|child| child.byte_range.start < injection.byte_range.start);
let cursor = InactiveQueryCursor::new().execute_query(
self.query,
&self.node,
RopeTsInput::new(self.src),
);
Box::new(ActiveLayer {
state: S::default(),
query_iter: LayerQueryIter {
cursor,
peeked: None,
},
injections: layer.injections[injection_start..].iter().peekable(),
})
})
}
}
pub struct QueryIter<'a, LayerState: Default = ()> {
layer_manager: Box<QueryIterLayerManager<'a, LayerState>>,
current_layer: Box<ActiveLayer<'a, LayerState>>,
current_injection: Injection,
}
impl<'a, LayerState: Default> QueryIter<'a, LayerState> {
pub fn new(syntax: &'a Syntax, src: RopeSlice<'a>, query: &'a Query) -> Self {
Self::at(syntax, src, query, syntax.tree().root_node(), syntax.root)
}
pub fn at(
syntax: &'a Syntax,
src: RopeSlice<'a>,
query: &'a Query,
node: SyntaxTreeNode<'a>,
layer: LayerId,
) -> Self {
// create fake injection for query root
let injection = Injection {
byte_range: node.byte_range(),
layer,
};
let mut layer_manager = Box::new(QueryIterLayerManager {
query,
node,
src,
syntax,
// TODO: reuse allocations with an allocation pool
active_layers: HashMap::with_capacity(8),
active_injections: Vec::with_capacity(8),
});
Self {
current_layer: layer_manager.init_layer(injection),
current_injection: injection,
layer_manager,
}
}
pub fn current_layer_state(&mut self) -> &mut LayerState {
&mut self.current_layer.state
}
pub fn layer_state(&mut self, layer: LayerId) -> &mut LayerState {
if layer == self.current_injection.layer {
self.current_layer_state()
} else {
&mut self
.layer_manager
.active_layers
.get_mut(&layer)
.unwrap()
.state
}
}
fn enter_injection(&mut self, injection: Injection) {
let active_layer = self.layer_manager.init_layer(injection);
let old_injection = replace(&mut self.current_injection, injection);
let old_layer = replace(&mut self.current_layer, active_layer);
self.layer_manager
.active_layers
.insert(old_injection.layer, old_layer);
self.layer_manager.active_injections.push(old_injection);
}
fn exit_injection(&mut self) -> Option<(Injection, Option<LayerState>)> {
let injection = replace(
&mut self.current_injection,
self.layer_manager.active_injections.pop()?,
);
let layer = replace(
&mut self.current_layer,
self.layer_manager
.active_layers
.remove(&self.current_injection.layer)?,
);
let layer_unfinished = layer.query_iter.peeked.is_some();
if layer_unfinished {
self.layer_manager
.active_layers
.insert(injection.layer, layer)
.unwrap();
Some((injection, None))
} else {
Some((injection, Some(layer.state)))
}
}
}
impl<'a, S: Default> Iterator for QueryIter<'a, S> {
type Item = QueryIterEvent<S>;
fn next(&mut self) -> Option<QueryIterEvent<S>> {
loop {
let next_injection = self.current_layer.injections.peek().filter(|injection| {
injection.byte_range.start < self.current_injection.byte_range.end
});
let next_match = self.current_layer.query_iter.peek().filter(|matched_node| {
matched_node.byte_range.start < self.current_injection.byte_range.end
});
match (next_match, next_injection) {
(None, None) => {
return self.exit_injection().map(|(injection, state)| {
QueryIterEvent::ExitInjection { injection, state }
});
}
(Some(_), None) => {
// consume match
let matched_node = self.current_layer.query_iter.consume();
return Some(QueryIterEvent::Match(matched_node));
}
(Some(matched_node), Some(injection))
if matched_node.byte_range.start <= injection.byte_range.end =>
{
// consume match
let matched_node = self.current_layer.query_iter.consume();
// ignore nodes that are overlapped by the injection
if matched_node.byte_range.start <= injection.byte_range.start {
return Some(QueryIterEvent::Match(matched_node));
}
}
(Some(_), Some(_)) | (None, Some(_)) => {
// consume injection
let injection = self.current_layer.injections.next().unwrap();
self.enter_injection(injection.clone());
return Some(QueryIterEvent::EnterInjection(injection.clone()));
}
}
}
}
}
pub enum QueryIterEvent<State = ()> {
EnterInjection(Injection),
Match(MatchedNode),
ExitInjection {
injection: Injection,
state: Option<State>,
},
}
impl<S> QueryIterEvent<S> {
pub fn start(&self) -> u32 {
match self {
QueryIterEvent::EnterInjection(injection) => injection.byte_range.start as u32,
QueryIterEvent::Match(mat) => mat.byte_range.start as u32,
QueryIterEvent::ExitInjection { injection, .. } => injection.byte_range.start as u32,
}
}
}

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1,93 @@
// TODO: rework using query iter
use std::iter;
use ropey::RopeSlice;
use crate::tree_sitter::{InactiveQueryCursor, Query, RopeTsInput, SyntaxTreeNode};
use crate::TREE_SITTER_MATCH_LIMIT;
#[derive(Debug)]
pub enum CapturedNode<'a> {
Single(SyntaxTreeNode<'a>),
/// Guaranteed to be not empty
Grouped(Vec<SyntaxTreeNode<'a>>),
}
impl<'a> CapturedNode<'a> {
pub fn start_byte(&self) -> usize {
match self {
Self::Single(n) => n.start_byte(),
Self::Grouped(ns) => ns[0].start_byte(),
}
}
pub fn end_byte(&self) -> usize {
match self {
Self::Single(n) => n.end_byte(),
Self::Grouped(ns) => ns.last().unwrap().end_byte(),
}
}
}
#[derive(Debug)]
pub struct TextObjectQuery {
pub query: Query,
}
impl TextObjectQuery {
/// Run the query on the given node and return sub nodes which match given
/// capture ("function.inside", "class.around", etc).
///
/// Captures may contain multiple nodes by using quantifiers (+, *, etc),
/// and support for this is partial and could use improvement.
///
/// ```query
/// (comment)+ @capture
///
/// ; OR
/// (
/// (comment)*
/// .
/// (function)
/// ) @capture
/// ```
pub fn capture_nodes<'a>(
&'a self,
capture_name: &str,
node: SyntaxTreeNode<'a>,
slice: RopeSlice<'a>,
cursor: InactiveQueryCursor,
) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
self.capture_nodes_any(&[capture_name], node, slice, cursor)
}
/// Find the first capture that exists out of all given `capture_names`
/// and return sub nodes that match this capture.
pub fn capture_nodes_any<'a>(
&'a self,
capture_names: &[&str],
node: SyntaxTreeNode<'a>,
slice: RopeSlice<'a>,
mut cursor: InactiveQueryCursor,
) -> Option<impl Iterator<Item = CapturedNode<'a>>> {
let capture = capture_names
.iter()
.find_map(|cap| self.query.get_capture(cap))?;
cursor.set_match_limit(TREE_SITTER_MATCH_LIMIT);
let mut cursor = cursor.execute_query(&self.query, &node, RopeTsInput::new(slice));
let capture_node = iter::from_fn(move || {
let (mat, _) = cursor.next_matched_node()?;
Some(mat.nodes_for_capture(capture).cloned().collect())
})
.filter_map(move |nodes: Vec<_>| {
if nodes.len() > 1 {
Some(CapturedNode::Grouped(nodes))
} else {
nodes.into_iter().map(CapturedNode::Single).next()
}
});
Some(capture_node)
}
}

View File

@@ -0,0 +1,264 @@
use std::{cmp::Reverse, ops::Range};
use super::{LanguageLayer, LayerId};
use slotmap::HopSlotMap;
use tree_sitter::Node;
/// The byte range of an injection layer.
///
/// Injection ranges may overlap, but all overlapping parts are subsets of their parent ranges.
/// This allows us to sort the ranges ahead of time in order to efficiently find a range that
/// contains a point with maximum depth.
#[derive(Debug)]
struct InjectionRange {
start: usize,
end: usize,
layer_id: LayerId,
depth: u32,
}
pub struct TreeCursor<'a> {
layers: &'a HopSlotMap<LayerId, LanguageLayer>,
root: LayerId,
current: LayerId,
injection_ranges: Vec<InjectionRange>,
// TODO: Ideally this would be a `tree_sitter::TreeCursor<'a>` but
// that returns very surprising results in testing.
cursor: Node<'a>,
}
impl<'a> TreeCursor<'a> {
pub(super) fn new(layers: &'a HopSlotMap<LayerId, LanguageLayer>, root: LayerId) -> Self {
let mut injection_ranges = Vec::new();
for (layer_id, layer) in layers.iter() {
// Skip the root layer
if layer.parent.is_none() {
continue;
}
for byte_range in layer.ranges.iter() {
let range = InjectionRange {
start: byte_range.start_byte,
end: byte_range.end_byte,
layer_id,
depth: layer.depth,
};
injection_ranges.push(range);
}
}
injection_ranges.sort_unstable_by_key(|range| (range.end, Reverse(range.depth)));
let cursor = layers[root].tree().root_node();
Self {
layers,
root,
current: root,
injection_ranges,
cursor,
}
}
pub fn node(&self) -> Node<'a> {
self.cursor
}
pub fn goto_parent(&mut self) -> bool {
if let Some(parent) = self.node().parent() {
self.cursor = parent;
return true;
}
// If we are already on the root layer, we cannot ascend.
if self.current == self.root {
return false;
}
// Ascend to the parent layer.
let range = self.node().byte_range();
let parent_id = self.layers[self.current]
.parent
.expect("non-root layers have a parent");
self.current = parent_id;
let root = self.layers[self.current].tree().root_node();
self.cursor = root
.descendant_for_byte_range(range.start, range.end)
.unwrap_or(root);
true
}
pub fn goto_parent_with<P>(&mut self, predicate: P) -> bool
where
P: Fn(&Node) -> bool,
{
while self.goto_parent() {
if predicate(&self.node()) {
return true;
}
}
false
}
/// Finds the injection layer that has exactly the same range as the given `range`.
fn layer_id_of_byte_range(&self, search_range: Range<usize>) -> Option<LayerId> {
let start_idx = self
.injection_ranges
.partition_point(|range| range.end < search_range.end);
self.injection_ranges[start_idx..]
.iter()
.take_while(|range| range.end == search_range.end)
.find_map(|range| (range.start == search_range.start).then_some(range.layer_id))
}
fn goto_first_child_impl(&mut self, named: bool) -> bool {
// Check if the current node's range is an exact injection layer range.
if let Some(layer_id) = self
.layer_id_of_byte_range(self.node().byte_range())
.filter(|&layer_id| layer_id != self.current)
{
// Switch to the child layer.
self.current = layer_id;
self.cursor = self.layers[self.current].tree().root_node();
return true;
}
let child = if named {
self.cursor.named_child(0)
} else {
self.cursor.child(0)
};
if let Some(child) = child {
// Otherwise descend in the current tree.
self.cursor = child;
true
} else {
false
}
}
pub fn goto_first_child(&mut self) -> bool {
self.goto_first_child_impl(false)
}
pub fn goto_first_named_child(&mut self) -> bool {
self.goto_first_child_impl(true)
}
fn goto_next_sibling_impl(&mut self, named: bool) -> bool {
let sibling = if named {
self.cursor.next_named_sibling()
} else {
self.cursor.next_sibling()
};
if let Some(sibling) = sibling {
self.cursor = sibling;
true
} else {
false
}
}
pub fn goto_next_sibling(&mut self) -> bool {
self.goto_next_sibling_impl(false)
}
pub fn goto_next_named_sibling(&mut self) -> bool {
self.goto_next_sibling_impl(true)
}
fn goto_prev_sibling_impl(&mut self, named: bool) -> bool {
let sibling = if named {
self.cursor.prev_named_sibling()
} else {
self.cursor.prev_sibling()
};
if let Some(sibling) = sibling {
self.cursor = sibling;
true
} else {
false
}
}
pub fn goto_prev_sibling(&mut self) -> bool {
self.goto_prev_sibling_impl(false)
}
pub fn goto_prev_named_sibling(&mut self) -> bool {
self.goto_prev_sibling_impl(true)
}
/// Finds the injection layer that contains the given start-end range.
fn layer_id_containing_byte_range(&self, start: usize, end: usize) -> LayerId {
let start_idx = self
.injection_ranges
.partition_point(|range| range.end < end);
self.injection_ranges[start_idx..]
.iter()
.take_while(|range| range.start < end)
.find_map(|range| (range.start <= start).then_some(range.layer_id))
.unwrap_or(self.root)
}
pub fn reset_to_byte_range(&mut self, start: usize, end: usize) {
self.current = self.layer_id_containing_byte_range(start, end);
let root = self.layers[self.current].tree().root_node();
self.cursor = root.descendant_for_byte_range(start, end).unwrap_or(root);
}
/// Returns an iterator over the children of the node the TreeCursor is on
/// at the time this is called.
pub fn children(&'a mut self) -> ChildIter {
let parent = self.node();
ChildIter {
cursor: self,
parent,
named: false,
}
}
/// Returns an iterator over the named children of the node the TreeCursor is on
/// at the time this is called.
pub fn named_children(&'a mut self) -> ChildIter {
let parent = self.node();
ChildIter {
cursor: self,
parent,
named: true,
}
}
}
pub struct ChildIter<'n> {
cursor: &'n mut TreeCursor<'n>,
parent: Node<'n>,
named: bool,
}
impl<'n> Iterator for ChildIter<'n> {
type Item = Node<'n>;
fn next(&mut self) -> Option<Self::Item> {
// first iteration, just visit the first child
if self.cursor.node() == self.parent {
self.cursor
.goto_first_child_impl(self.named)
.then(|| self.cursor.node())
} else {
self.cursor
.goto_next_sibling_impl(self.named)
.then(|| self.cursor.node())
}
}
}

View File

@@ -0,0 +1,45 @@
mod grammar;
mod parser;
pub mod query;
mod query_cursor;
mod query_match;
mod ropey;
mod syntax_tree;
mod syntax_tree_node;
use std::ops;
pub use grammar::Grammar;
pub use parser::{Parser, ParserInputRaw};
pub use query::{Capture, Pattern, Query, QueryStr};
pub use query_cursor::{InactiveQueryCursor, MatchedNode, MatchedNodeIdx, QueryCursor, QueryMatch};
pub use ropey::RopeTsInput;
pub use syntax_tree::{InputEdit, SyntaxTree};
pub use syntax_tree_node::SyntaxTreeNode;
#[repr(C)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Point {
pub row: u32,
pub col: u32,
}
#[repr(C)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Range {
pub start_point: Point,
pub end_point: Point,
pub start_byte: u32,
pub end_byte: u32,
}
pub trait TsInput {
type Cursor: regex_cursor::Cursor;
fn cursor_at(&mut self, offset: usize) -> &mut Self::Cursor;
fn eq(&mut self, range1: ops::Range<usize>, range2: ops::Range<usize>) -> bool;
}
pub trait IntoTsInput {
type TsInput: TsInput;
fn into_ts_input(self) -> Self::TsInput;
}

View File

@@ -0,0 +1,107 @@
use std::fmt;
use std::path::{Path, PathBuf};
use std::ptr::NonNull;
use libloading::{Library, Symbol};
/// supported TS versions, WARNING: update when updating vendored c sources
pub const MIN_COMPATIBLE_ABI_VERSION: u32 = 13;
pub const ABI_VERSION: u32 = 14;
// opaque pointer
enum GrammarData {}
#[repr(transparent)]
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
pub struct Grammar {
ptr: NonNull<GrammarData>,
}
unsafe impl Send for Grammar {}
unsafe impl Sync for Grammar {}
impl std::fmt::Debug for Grammar {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Grammar").finish_non_exhaustive()
}
}
impl Grammar {
/// Loads a shared library containg a tree sitter grammar with name `name`
// from `library_path`.
///
/// # Safety
///
/// `library_path` must be a valid tree sitter grammar
pub unsafe fn new(name: &str, library_path: &Path) -> Result<Grammar, Error> {
let library = unsafe {
Library::new(library_path).map_err(|err| Error::DlOpen {
err,
path: library_path.to_owned(),
})?
};
let language_fn_name = format!("tree_sitter_{}", name.replace('-', "_"));
let grammar = unsafe {
let language_fn: Symbol<unsafe extern "C" fn() -> NonNull<GrammarData>> = library
.get(language_fn_name.as_bytes())
.map_err(|err| Error::DlSym {
err,
symbol: name.to_owned(),
})?;
Grammar { ptr: language_fn() }
};
let version = grammar.version();
if (MIN_COMPATIBLE_ABI_VERSION..=ABI_VERSION).contains(&version) {
std::mem::forget(library);
Ok(grammar)
} else {
Err(Error::IncompatibleVersion { version })
}
}
pub fn version(self) -> u32 {
unsafe { ts_language_version(self) }
}
}
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Error opening dynamic library {path:?}")]
DlOpen {
#[source]
err: libloading::Error,
path: PathBuf,
},
#[error("Failed to load symbol {symbol}")]
DlSym {
#[source]
err: libloading::Error,
symbol: String,
},
#[error("Tried to load grammar with incompatible ABI {version}.")]
IncompatibleVersion { version: u32 },
}
/// An error that occurred when trying to assign an incompatible [`Grammar`] to
/// a [`Parser`].
#[derive(Debug, PartialEq, Eq)]
pub struct IncompatibleGrammarError {
version: u32,
}
impl fmt::Display for IncompatibleGrammarError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"Tried to load grammar with incompatible ABI {}.",
self.version,
)
}
}
impl std::error::Error for IncompatibleGrammarError {}
extern "C" {
/// Get the ABI version number for this language. This version number
/// is used to ensure that languages were generated by a compatible version of
/// Tree-sitter. See also [`ts_parser_set_language`].
pub fn ts_language_version(grammar: Grammar) -> u32;
}

View File

@@ -0,0 +1,200 @@
use std::os::raw::c_void;
use std::panic::{catch_unwind, AssertUnwindSafe};
use std::ptr::NonNull;
use std::{fmt, ptr};
use regex_cursor::Cursor;
use crate::tree_sitter::syntax_tree::{SyntaxTree, SyntaxTreeData};
use crate::tree_sitter::{Grammar, IntoTsInput, Point, Range, TsInput};
// opaque data
enum ParserData {}
/// A stateful object that this is used to produce a [`Tree`] based on some
/// source code.
pub struct Parser {
ptr: NonNull<ParserData>,
}
impl Parser {
/// Create a new parser.
#[must_use]
pub fn new() -> Parser {
Parser {
ptr: unsafe { ts_parser_new() },
}
}
/// Set the language that the parser should use for parsing.
pub fn set_language(&mut self, grammar: Grammar) {
unsafe { ts_parser_set_language(self.ptr, grammar) };
}
/// Set the ranges of text that the parser should include when parsing. By default, the parser
/// will always include entire documents. This function allows you to parse only a *portion*
/// of a document but still return a syntax tree whose ranges match up with the document as a
/// whole. You can also pass multiple disjoint ranges.
///
/// `ranges` must be non-overlapping and sorted.
pub fn set_included_ranges(&mut self, ranges: &[Range]) -> Result<(), InvalidRangesErrror> {
// TODO: save some memory by only storing byte ranges and converting them to TS ranges in an
// internal buffer here. Points are not used by TS. Alternatively we can path the TS C code
// to accept a simple pair (struct with two fields) of byte positions here instead of a full
// tree sitter range
let success = unsafe {
ts_parser_set_included_ranges(self.ptr, ranges.as_ptr(), ranges.len() as u32)
};
if success {
Ok(())
} else {
Err(InvalidRangesErrror)
}
}
#[must_use]
pub fn parse<I: TsInput>(
&mut self,
input: impl IntoTsInput<TsInput = I>,
old_tree: Option<&SyntaxTree>,
) -> Option<SyntaxTree> {
let mut input = input.into_ts_input();
unsafe extern "C" fn read<C: TsInput>(
payload: NonNull<c_void>,
byte_index: u32,
_position: Point,
bytes_read: *mut u32,
) -> *const u8 {
let cursor = catch_unwind(AssertUnwindSafe(move || {
let input: &mut C = payload.cast().as_mut();
let cursor = input.cursor_at(byte_index as usize);
let slice = cursor.chunk();
(slice.as_ptr(), slice.len().try_into().unwrap())
}));
match cursor {
Ok((ptr, len)) => {
*bytes_read = len;
ptr
}
Err(_) => {
*bytes_read = 0;
ptr::null()
}
}
}
let input = ParserInputRaw {
payload: NonNull::from(&mut input).cast(),
read: read::<I>,
// utf8
encoding: 0,
};
unsafe {
let old_tree = old_tree.map(|tree| tree.as_raw());
let new_tree = ts_parser_parse(self.ptr, old_tree, input);
new_tree.map(|raw| SyntaxTree::from_raw(raw))
}
}
}
impl Default for Parser {
fn default() -> Self {
Self::new()
}
}
unsafe impl Sync for Parser {}
unsafe impl Send for Parser {}
impl Drop for Parser {
fn drop(&mut self) {
unsafe { ts_parser_delete(self.ptr) }
}
}
/// An error that occurred when trying to assign an incompatible [`Grammar`] to
/// a [`Parser`].
#[derive(Debug, PartialEq, Eq)]
pub struct InvalidRangesErrror;
impl fmt::Display for InvalidRangesErrror {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "include ranges are overlap or are not sorted",)
}
}
impl std::error::Error for InvalidRangesErrror {}
type TreeSitterReadFn = unsafe extern "C" fn(
payload: NonNull<c_void>,
byte_index: u32,
position: Point,
bytes_read: *mut u32,
) -> *const u8;
#[repr(C)]
#[derive(Debug)]
pub struct ParserInputRaw {
pub payload: NonNull<c_void>,
pub read: TreeSitterReadFn,
pub encoding: u32,
}
extern "C" {
/// Create a new parser
fn ts_parser_new() -> NonNull<ParserData>;
/// Delete the parser, freeing all of the memory that it used.
fn ts_parser_delete(parser: NonNull<ParserData>);
/// Set the language that the parser should use for parsing. Returns a boolean indicating
/// whether or not the language was successfully assigned. True means assignment
/// succeeded. False means there was a version mismatch: the language was generated with
/// an incompatible version of the Tree-sitter CLI. Check the language's version using
/// [`ts_language_version`] and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`]
/// and [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants.
fn ts_parser_set_language(parser: NonNull<ParserData>, language: Grammar) -> bool;
/// Set the ranges of text that the parser should include when parsing. By default, the parser
/// will always include entire documents. This function allows you to parse only a *portion*
/// of a document but still return a syntax tree whose ranges match up with the document as a
/// whole. You can also pass multiple disjoint ranges. The second and third parameters specify
/// the location and length of an array of ranges. The parser does *not* take ownership of
/// these ranges; it copies the data, so it doesn't matter how these ranges are allocated.
/// If `count` is zero, then the entire document will be parsed. Otherwise, the given ranges
/// must be ordered from earliest to latest in the document, and they must not overlap. That
/// is, the following must hold for all: `i < count - 1`: `ranges[i].end_byte <= ranges[i +
/// 1].start_byte` If this requirement is not satisfied, the operation will fail, the ranges
/// will not be assigned, and this function will return `false`. On success, this function
/// returns `true`
fn ts_parser_set_included_ranges(
parser: NonNull<ParserData>,
ranges: *const Range,
count: u32,
) -> bool;
/// Use the parser to parse some source code and create a syntax tree. If you are parsing this
/// document for the first time, pass `NULL` for the `old_tree` parameter. Otherwise, if you
/// have already parsed an earlier version of this document and the document has since been
/// edited, pass the previous syntax tree so that the unchanged parts of it can be reused.
/// This will save time and memory. For this to work correctly, you must have already edited
/// the old syntax tree using the [`ts_tree_edit`] function in a way that exactly matches
/// the source code changes. The [`TSInput`] parameter lets you specify how to read the text.
/// It has the following three fields: 1. [`read`]: A function to retrieve a chunk of text
/// at a given byte offset and (row, column) position. The function should return a pointer
/// to the text and write its length to the [`bytes_read`] pointer. The parser does not
/// take ownership of this buffer; it just borrows it until it has finished reading it. The
/// function should write a zero value to the [`bytes_read`] pointer to indicate the end of the
/// document. 2. [`payload`]: An arbitrary pointer that will be passed to each invocation of
/// the [`read`] function. 3. [`encoding`]: An indication of how the text is encoded. Either
/// `TSInputEncodingUTF8` or `TSInputEncodingUTF16`. This function returns a syntax tree
/// on success, and `NULL` on failure. There are three possible reasons for failure: 1. The
/// parser does not have a language assigned. Check for this using the [`ts_parser_language`]
/// function. 2. Parsing was cancelled due to a timeout that was set by an earlier call to the
/// [`ts_parser_set_timeout_micros`] function. You can resume parsing from where the parser
/// left out by calling [`ts_parser_parse`] again with the same arguments. Or you can start
/// parsing from scratch by first calling [`ts_parser_reset`]. 3. Parsing was cancelled using
/// a cancellation flag that was set by an earlier call to [`ts_parser_set_cancellation_flag`].
/// You can resume parsing from where the parser left out by calling [`ts_parser_parse`] again
/// with the same arguments. [`read`]: TSInput::read [`payload`]: TSInput::payload [`encoding`]:
/// TSInput::encoding [`bytes_read`]: TSInput::read
fn ts_parser_parse(
parser: NonNull<ParserData>,
old_tree: Option<NonNull<SyntaxTreeData>>,
input: ParserInputRaw,
) -> Option<NonNull<SyntaxTreeData>>;
}

View File

@@ -0,0 +1,451 @@
use std::fmt::{self, Display};
use std::ops::Range;
use std::path::{Path, PathBuf};
use std::ptr::NonNull;
use std::{slice, str};
use crate::tree_sitter::query::predicate::{InvalidPredicateError, Predicate, TextPredicate};
use crate::tree_sitter::Grammar;
mod predicate;
mod property;
pub enum UserPredicate<'a> {
IsPropertySet {
negate: bool,
key: &'a str,
val: Option<&'a str>,
},
SetProperty {
key: &'a str,
val: Option<&'a str>,
},
Other(Predicate<'a>),
}
impl Display for UserPredicate<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
UserPredicate::IsPropertySet { negate, key, val } => {
let predicate = if negate { "is-not?" } else { "is?" };
write!(f, " ({predicate} {key} {})", val.unwrap_or(""))
}
UserPredicate::SetProperty { key, val } => {
write!(f, "(set! {key} {})", val.unwrap_or(""))
}
UserPredicate::Other(ref predicate) => {
write!(f, "{}", predicate.name())
}
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Pattern(pub(crate) u32);
impl Pattern {
pub const SENTINEL: Pattern = Pattern(u32::MAX);
pub fn idx(&self) -> usize {
self.0 as usize
}
}
pub enum QueryData {}
#[derive(Debug)]
pub(super) struct PatternData {
text_predicates: Range<u32>,
}
#[derive(Debug)]
pub struct Query {
pub(crate) raw: NonNull<QueryData>,
num_captures: u32,
num_strings: u32,
text_predicates: Vec<TextPredicate>,
patterns: Box<[PatternData]>,
}
impl Query {
/// Create a new query from a string containing one or more S-expression
/// patterns.
///
/// The query is associated with a particular grammar, and can only be run
/// on syntax nodes parsed with that grammar. References to Queries can be
/// shared between multiple threads.
pub fn new(
grammar: Grammar,
source: &str,
path: impl AsRef<Path>,
mut custom_predicate: impl FnMut(Pattern, UserPredicate) -> Result<(), InvalidPredicateError>,
) -> Result<Self, ParseError> {
assert!(
source.len() <= i32::MAX as usize,
"TreeSitter queries must be smaller then 2 GiB (is {})",
source.len() as f64 / 1024.0 / 1024.0 / 1024.0
);
let mut error_offset = 0u32;
let mut error_kind = RawQueryError::None;
let bytes = source.as_bytes();
// Compile the query.
let ptr = unsafe {
ts_query_new(
grammar,
bytes.as_ptr(),
bytes.len() as u32,
&mut error_offset,
&mut error_kind,
)
};
let Some(raw) = ptr else {
let offset = error_offset as usize;
let error_word = || {
source[offset..]
.chars()
.take_while(|&c| c.is_alphanumeric() || matches!(c, '_' | '-'))
.collect()
};
let err = match error_kind {
RawQueryError::NodeType => {
let node: String = error_word();
ParseError::InvalidNodeType {
location: ParserErrorLocation::new(
source,
path.as_ref(),
offset,
node.chars().count(),
),
node,
}
}
RawQueryError::Field => {
let field = error_word();
ParseError::InvalidFieldName {
location: ParserErrorLocation::new(
source,
path.as_ref(),
offset,
field.chars().count(),
),
field,
}
}
RawQueryError::Capture => {
let capture = error_word();
ParseError::InvalidCaptureName {
location: ParserErrorLocation::new(
source,
path.as_ref(),
offset,
capture.chars().count(),
),
capture,
}
}
RawQueryError::Syntax => ParseError::SyntaxError(ParserErrorLocation::new(
source,
path.as_ref(),
offset,
0,
)),
RawQueryError::Structure => ParseError::ImpossiblePattern(
ParserErrorLocation::new(source, path.as_ref(), offset, 0),
),
RawQueryError::None => {
unreachable!("tree-sitter returned a null pointer but did not set an error")
}
RawQueryError::Language => unreachable!("should be handled at grammar load"),
};
return Err(err);
};
// I am not going to bother with safety comments here, all of these are
// safe as long as TS is not buggy because raw is a properly constructed query
let num_captures = unsafe { ts_query_capture_count(raw) };
let num_strings = unsafe { ts_query_string_count(raw) };
let num_patterns = unsafe { ts_query_pattern_count(raw) };
let mut query = Query {
raw,
num_captures,
num_strings,
text_predicates: Vec::new(),
patterns: Box::default(),
};
let patterns: Result<_, ParseError> = (0..num_patterns)
.map(|pattern| {
query
.parse_pattern_predicates(Pattern(pattern), &mut custom_predicate)
.map_err(|err| ParseError::InvalidPredicate {
message: err.msg.into(),
location: ParserErrorLocation::new(
source,
path.as_ref(),
unsafe { ts_query_start_byte_for_pattern(query.raw, pattern) as usize },
0,
),
})
})
.collect();
query.patterns = patterns?;
Ok(query)
}
#[inline]
fn get_string(&self, str: QueryStr) -> &str {
let value_id = str.0;
// need an assertions because the ts c api does not do bounds check
assert!(value_id <= self.num_captures, "invalid value index");
unsafe {
let mut len = 0;
let ptr = ts_query_string_value_for_id(self.raw, value_id, &mut len);
let data = slice::from_raw_parts(ptr, len as usize);
// safety: we only allow passing valid str(ings) as arguments to query::new
// name is always a substring of that. Treesitter does proper utf8 segmentation
// so any substrings it produces are codepoint aligned and therefore valid utf8
str::from_utf8_unchecked(data)
}
}
#[inline]
pub fn capture_name(&self, capture_idx: Capture) -> &str {
let capture_idx = capture_idx.0;
// need an assertions because the ts c api does not do bounds check
assert!(capture_idx <= self.num_captures, "invalid capture index");
let mut length = 0;
unsafe {
let ptr = ts_query_capture_name_for_id(self.raw, capture_idx, &mut length);
let name = slice::from_raw_parts(ptr, length as usize);
// safety: we only allow passing valid str(ings) as arguments to query::new
// name is always a substring of that. Treesitter does proper utf8 segmentation
// so any substrings it produces are codepoint aligned and therefore valid utf8
str::from_utf8_unchecked(name)
}
}
#[inline]
pub fn captures(&self) -> impl ExactSizeIterator<Item = (Capture, &str)> {
(0..self.num_captures).map(|cap| (Capture(cap), self.capture_name(Capture(cap))))
}
#[inline]
pub fn num_captures(&self) -> u32 {
self.num_captures
}
#[inline]
pub fn get_capture(&self, capture_name: &str) -> Option<Capture> {
for capture in 0..self.num_captures {
if capture_name == self.capture_name(Capture(capture)) {
return Some(Capture(capture));
}
}
None
}
pub(crate) fn pattern_text_predicates(&self, pattern_idx: u16) -> &[TextPredicate] {
let range = self.patterns[pattern_idx as usize].text_predicates.clone();
&self.text_predicates[range.start as usize..range.end as usize]
}
/// Get the byte offset where the given pattern starts in the query's
/// source.
#[doc(alias = "ts_query_start_byte_for_pattern")]
#[must_use]
pub fn start_byte_for_pattern(&self, pattern: Pattern) -> usize {
assert!(
pattern.0 < self.text_predicates.len() as u32,
"Pattern index is {pattern_index} but the pattern count is {}",
self.text_predicates.len(),
);
unsafe { ts_query_start_byte_for_pattern(self.raw, pattern.0) as usize }
}
/// Get the number of patterns in the query.
#[must_use]
pub fn pattern_count(&self) -> usize {
unsafe { ts_query_pattern_count(self.raw) as usize }
}
/// Get the number of patterns in the query.
#[must_use]
pub fn patterns(&self) -> impl ExactSizeIterator<Item = Pattern> {
(0..self.pattern_count() as u32).map(Pattern)
}
}
impl Drop for Query {
fn drop(&mut self) {
unsafe { ts_query_delete(self.raw) }
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct Capture(u32);
impl Capture {
pub fn name(self, query: &Query) -> &str {
query.capture_name(self)
}
pub fn idx(self) -> usize {
self.0 as usize
}
}
/// A reference to a string stroed in a query
#[derive(Clone, Copy, Debug)]
pub struct QueryStr(u32);
impl QueryStr {
pub fn get(self, query: &Query) -> &str {
query.get_string(self)
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct ParserErrorLocation {
pub path: PathBuf,
/// at which line the error occured
pub line: usize,
/// at which codepoints/columns the errors starts in the line
pub column: usize,
/// how many codepoints/columns the error takes up
pub len: usize,
line_content: String,
}
impl ParserErrorLocation {
pub fn new(source: &str, path: &Path, offset: usize, len: usize) -> ParserErrorLocation {
let (line, line_content) = source[..offset]
.split('\n')
.map(|line| line.strip_suffix('\r').unwrap_or(line))
.enumerate()
.last()
.unwrap_or((0, ""));
let column = line_content.chars().count();
ParserErrorLocation {
path: path.to_owned(),
line,
column,
len,
line_content: line_content.to_owned(),
}
}
}
impl Display for ParserErrorLocation {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(
f,
" --> {}:{}:{}",
self.path.display(),
self.line,
self.column
)?;
let line = self.line.to_string();
let prefix = format!(" {:width$} |", "", width = line.len());
writeln!(f, "{prefix}")?;
writeln!(f, " {line} | {}", self.line_content)?;
writeln!(
f,
"{prefix}{:width$}{:^<len$}",
"",
"^",
width = self.column,
len = self.len
)?;
writeln!(f, "{prefix}")
}
}
#[derive(thiserror::Error, Debug, PartialEq, Eq)]
pub enum ParseError {
#[error("unexpected EOF")]
UnexpectedEof,
#[error("invalid query syntax\n{0}")]
SyntaxError(ParserErrorLocation),
#[error("invalid node type {node:?}\n{location}")]
InvalidNodeType {
node: String,
location: ParserErrorLocation,
},
#[error("invalid field name {field:?}\n{location}")]
InvalidFieldName {
field: String,
location: ParserErrorLocation,
},
#[error("invalid capture name {capture:?}\n{location}")]
InvalidCaptureName {
capture: String,
location: ParserErrorLocation,
},
#[error("{message}\n{location}")]
InvalidPredicate {
message: String,
location: ParserErrorLocation,
},
#[error("invalid predicate\n{0}")]
ImpossiblePattern(ParserErrorLocation),
}
#[repr(C)]
enum RawQueryError {
None = 0,
Syntax = 1,
NodeType = 2,
Field = 3,
Capture = 4,
Structure = 5,
Language = 6,
}
extern "C" {
/// Create a new query from a string containing one or more S-expression
/// patterns. The query is associated with a particular language, and can
/// only be run on syntax nodes parsed with that language. If all of the
/// given patterns are valid, this returns a [`TSQuery`]. If a pattern is
/// invalid, this returns `NULL`, and provides two pieces of information
/// about the problem: 1. The byte offset of the error is written to
/// the `error_offset` parameter. 2. The type of error is written to the
/// `error_type` parameter.
fn ts_query_new(
grammar: Grammar,
source: *const u8,
source_len: u32,
error_offset: &mut u32,
error_type: &mut RawQueryError,
) -> Option<NonNull<QueryData>>;
/// Delete a query, freeing all of the memory that it used.
fn ts_query_delete(query: NonNull<QueryData>);
/// Get the number of patterns, captures, or string literals in the query.
fn ts_query_pattern_count(query: NonNull<QueryData>) -> u32;
fn ts_query_capture_count(query: NonNull<QueryData>) -> u32;
fn ts_query_string_count(query: NonNull<QueryData>) -> u32;
/// Get the byte offset where the given pattern starts in the query's
/// source. This can be useful when combining queries by concatenating their
/// source code strings.
fn ts_query_start_byte_for_pattern(query: NonNull<QueryData>, pattern_index: u32) -> u32;
// fn ts_query_is_pattern_rooted(query: NonNull<QueryData>, pattern_index: u32) -> bool;
// fn ts_query_is_pattern_non_local(query: NonNull<QueryData>, pattern_index: u32) -> bool;
// fn ts_query_is_pattern_guaranteed_at_step(query: NonNull<QueryData>, byte_offset: u32) -> bool;
/// Get the name and length of one of the query's captures, or one of the
/// query's string literals. Each capture and string is associated with a
/// numeric id based on the order that it appeared in the query's source.
fn ts_query_capture_name_for_id(
query: NonNull<QueryData>,
index: u32,
length: &mut u32,
) -> *const u8;
fn ts_query_string_value_for_id(
self_: NonNull<QueryData>,
index: u32,
length: &mut u32,
) -> *const u8;
}

View File

@@ -0,0 +1,448 @@
use std::error::Error;
use std::iter::zip;
use std::ops::Range;
use std::ptr::NonNull;
use std::{fmt, slice};
use crate::tree_sitter::query::property::QueryProperty;
use crate::tree_sitter::query::{
Capture, Pattern, PatternData, Query, QueryData, QueryStr, UserPredicate,
};
use crate::tree_sitter::query_cursor::MatchedNode;
use crate::tree_sitter::TsInput;
use regex_cursor::engines::meta::Regex;
use regex_cursor::Cursor;
macro_rules! bail {
($($args:tt)*) => {{
return Err(InvalidPredicateError {msg: format!($($args)*).into() })
}}
}
macro_rules! ensure {
($cond: expr, $($args:tt)*) => {{
if !$cond {
return Err(InvalidPredicateError { msg: format!($($args)*).into() })
}
}}
}
#[derive(Debug)]
pub(super) enum TextPredicateKind {
EqString(QueryStr),
EqCapture(Capture),
MatchString(Regex),
AnyString(Box<[QueryStr]>),
}
#[derive(Debug)]
pub(crate) struct TextPredicate {
capture: Capture,
kind: TextPredicateKind,
negated: bool,
match_all: bool,
}
fn input_matches_str<I: TsInput>(str: &str, range: Range<usize>, input: &mut I) -> bool {
if str.len() != range.len() {
return false;
}
let mut str = str.as_bytes();
let cursor = input.cursor_at(range.start);
let start_in_chunk = range.start - cursor.offset();
if range.end - cursor.offset() <= cursor.chunk().len() {
// hotpath
return &cursor.chunk()[start_in_chunk..range.end - cursor.offset()] == str;
}
if cursor.chunk()[start_in_chunk..] != str[..cursor.chunk().len() - start_in_chunk] {
return false;
}
str = &str[..cursor.chunk().len() - start_in_chunk];
while cursor.advance() {
if str.len() <= cursor.chunk().len() {
return &cursor.chunk()[..range.end - cursor.offset()] == str;
}
if &str[..cursor.chunk().len()] != cursor.chunk() {
return false;
}
str = &str[cursor.chunk().len()..]
}
// buggy cursor/invalid range
false
}
fn inputs_match<I: TsInput>(str: &str, range: Range<usize>, input: &mut I) -> bool {
if str.len() != range.len() {
return false;
}
let mut str = str.as_bytes();
let cursor = input.cursor_at(range.start);
let start_in_chunk = range.start - cursor.offset();
if range.end - cursor.offset() <= cursor.chunk().len() {
// hotpath
return &cursor.chunk()[start_in_chunk..range.end - cursor.offset()] == str;
}
if cursor.chunk()[start_in_chunk..] != str[..cursor.chunk().len() - start_in_chunk] {
return false;
}
str = &str[..cursor.chunk().len() - start_in_chunk];
while cursor.advance() {
if str.len() <= cursor.chunk().len() {
return &cursor.chunk()[..range.end - cursor.offset()] == str;
}
if &str[..cursor.chunk().len()] != cursor.chunk() {
return false;
}
str = &str[cursor.chunk().len()..]
}
// buggy cursor/invalid range
false
}
impl TextPredicate {
/// handlers match_all and negated
fn satisfied_helper(&self, mut nodes: impl Iterator<Item = bool>) -> bool {
if self.match_all {
nodes.all(|matched| matched != self.negated)
} else {
nodes.any(|matched| matched != self.negated)
}
}
pub fn satsified<I: TsInput>(
&self,
input: &mut I,
matched_nodes: &[MatchedNode],
query: &Query,
) -> bool {
let mut capture_nodes = matched_nodes
.iter()
.filter(|matched_node| matched_node.capture == self.capture);
match self.kind {
TextPredicateKind::EqString(str) => self.satisfied_helper(capture_nodes.map(|node| {
let range = node.syntax_node.byte_range();
input_matches_str(query.get_string(str), range.clone(), input)
})),
TextPredicateKind::EqCapture(other_capture) => {
let mut other_nodes = matched_nodes
.iter()
.filter(|matched_node| matched_node.capture == other_capture);
let res = self.satisfied_helper(zip(&mut capture_nodes, &mut other_nodes).map(
|(node1, node2)| {
let range1 = node1.syntax_node.byte_range();
let range2 = node2.syntax_node.byte_range();
input.eq(range1, range2)
},
));
let consumed_all = capture_nodes.next().is_none() && other_nodes.next().is_none();
res && (!self.match_all || consumed_all)
}
TextPredicateKind::MatchString(ref regex) => {
self.satisfied_helper(capture_nodes.map(|node| {
let range = node.syntax_node.byte_range();
let input = regex_cursor::Input::new(input.cursor_at(range.start)).range(range);
regex.is_match(input)
}))
}
TextPredicateKind::AnyString(ref strings) => {
let strings = strings.iter().map(|&str| query.get_string(str));
self.satisfied_helper(capture_nodes.map(|node| {
let range = node.syntax_node.byte_range();
strings
.clone()
.filter(|str| str.len() == range.len())
.any(|str| input_matches_str(str, range.clone(), input))
}))
}
}
}
}
impl Query {
pub(super) fn parse_pattern_predicates(
&mut self,
pattern: Pattern,
mut custom_predicate: impl FnMut(Pattern, UserPredicate) -> Result<(), InvalidPredicateError>,
) -> Result<PatternData, InvalidPredicateError> {
let text_predicate_start = self.text_predicates.len() as u32;
let predicate_steps = unsafe {
let mut len = 0u32;
let raw_predicates = ts_query_predicates_for_pattern(self.raw, pattern.0, &mut len);
(len != 0)
.then(|| slice::from_raw_parts(raw_predicates, len as usize))
.unwrap_or_default()
};
let predicates = predicate_steps
.split(|step| step.kind == PredicateStepKind::Done)
.filter(|predicate| !predicate.is_empty());
for predicate in predicates {
let predicate = unsafe { Predicate::new(self, predicate)? };
match predicate.name() {
"eq?" | "not-eq?" | "any-eq?" | "any-not-eq?" => {
predicate.check_arg_count(2)?;
let capture_idx = predicate.capture_arg(0)?;
let arg2 = predicate.arg(1);
let negated = matches!(predicate.name(), "not-eq?" | "not-any-eq?");
let match_all = matches!(predicate.name(), "eq?" | "not-eq?");
let kind = match arg2 {
PredicateArg::Capture(capture) => TextPredicateKind::EqCapture(capture),
PredicateArg::String(str) => TextPredicateKind::EqString(str),
};
self.text_predicates.push(TextPredicate {
capture: capture_idx,
kind,
negated,
match_all,
});
}
"match?" | "not-match?" | "any-match?" | "any-not-match?" => {
predicate.check_arg_count(2)?;
let capture_idx = predicate.capture_arg(0)?;
let regex = predicate.query_str_arg(1)?.get(self);
let negated = matches!(predicate.name(), "not-match?" | "any-not-match?");
let match_all = matches!(predicate.name(), "match?" | "not-match?");
let regex = match Regex::new(regex) {
Ok(regex) => regex,
Err(err) => bail!("invalid regex '{regex}', {err}"),
};
self.text_predicates.push(TextPredicate {
capture: capture_idx,
kind: TextPredicateKind::MatchString(regex),
negated,
match_all,
});
}
"set!" => {
let property = QueryProperty::parse(&predicate)?;
custom_predicate(
pattern,
UserPredicate::SetProperty {
key: property.key.get(&self),
val: property.val.map(|val| val.get(&self)),
},
)?
}
"is-not?" | "is?" => {
let property = QueryProperty::parse(&predicate)?;
custom_predicate(
pattern,
UserPredicate::IsPropertySet {
negate: predicate.name() == "is-not?",
key: property.key.get(&self),
val: property.val.map(|val| val.get(&self)),
},
)?
}
"any-of?" | "not-any-of?" => {
predicate.check_min_arg_count(1)?;
let capture = predicate.capture_arg(0)?;
let negated = predicate.name() == "not-any-of?";
let values: Result<_, InvalidPredicateError> = (1..predicate.num_args())
.map(|i| predicate.query_str_arg(i))
.collect();
self.text_predicates.push(TextPredicate {
capture,
kind: TextPredicateKind::AnyString(values?),
negated,
match_all: false,
});
}
// is and is-not are better handeled as custom predicates since interpreting is context dependent
// "is?" => property_predicates.push((QueryProperty::parse(&predicate), false)),
// "is-not?" => property_predicates.push((QueryProperty::parse(&predicate), true)),
_ => custom_predicate(pattern, UserPredicate::Other(predicate))?,
}
}
Ok(PatternData {
text_predicates: text_predicate_start..self.text_predicates.len() as u32,
})
}
}
pub enum PredicateArg {
Capture(Capture),
String(QueryStr),
}
pub struct Predicate<'a> {
pub name: QueryStr,
args: &'a [PredicateStep],
query: &'a Query,
}
impl<'a> Predicate<'a> {
unsafe fn new(
query: &'a Query,
predicate: &'a [PredicateStep],
) -> Result<Predicate<'a>, InvalidPredicateError> {
ensure!(
predicate[0].kind == PredicateStepKind::String,
"expected predicate to start with a function name. Got @{}.",
Capture(predicate[0].value_id).name(query)
);
let operator_name = QueryStr(predicate[0].value_id);
Ok(Predicate {
name: operator_name,
args: &predicate[1..],
query,
})
}
pub fn name(&self) -> &str {
self.name.get(self.query)
}
pub fn check_arg_count(&self, n: usize) -> Result<(), InvalidPredicateError> {
ensure!(
self.args.len() == n,
"expected {n} arguments for #{}, got {}",
self.name(),
self.args.len()
);
Ok(())
}
pub fn check_min_arg_count(&self, n: usize) -> Result<(), InvalidPredicateError> {
ensure!(
n <= self.args.len(),
"expected at least {n} arguments for #{}, got {}",
self.name(),
self.args.len()
);
Ok(())
}
pub fn check_max_arg_count(&self, n: usize) -> Result<(), InvalidPredicateError> {
ensure!(
self.args.len() <= n,
"expected at most {n} arguments for #{}, got {}",
self.name(),
self.args.len()
);
Ok(())
}
pub fn query_str_arg(&self, i: usize) -> Result<QueryStr, InvalidPredicateError> {
match self.arg(i) {
PredicateArg::String(str) => Ok(str),
PredicateArg::Capture(capture) => bail!(
"{i}. argument to #{} must be a literal, got capture @{:?}",
self.name(),
capture.name(self.query)
),
}
}
pub fn str_arg(&self, i: usize) -> Result<&str, InvalidPredicateError> {
Ok(self.query_str_arg(i)?.get(self.query))
}
pub fn num_args(&self) -> usize {
self.args.len()
}
pub fn capture_arg(&self, i: usize) -> Result<Capture, InvalidPredicateError> {
match self.arg(i) {
PredicateArg::Capture(capture) => Ok(capture),
PredicateArg::String(str) => bail!(
"{i}. argument to #{} expected a capture, got literal {:?}",
self.name(),
str.get(self.query)
),
}
}
pub fn arg(&self, i: usize) -> PredicateArg {
self.args[i].try_into().unwrap()
}
pub fn args(&self) -> impl Iterator<Item = PredicateArg> + '_ {
self.args.iter().map(|&arg| arg.try_into().unwrap())
}
}
#[derive(Debug)]
pub struct InvalidPredicateError {
pub(super) msg: Box<str>,
}
impl From<String> for InvalidPredicateError {
fn from(value: String) -> Self {
InvalidPredicateError {
msg: value.into_boxed_str(),
}
}
}
impl<'a> From<&'a str> for InvalidPredicateError {
fn from(value: &'a str) -> Self {
InvalidPredicateError { msg: value.into() }
}
}
impl fmt::Display for InvalidPredicateError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.msg)
}
}
impl Error for InvalidPredicateError {}
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum PredicateStepKind {
Done = 0,
Capture = 1,
String = 2,
}
#[repr(C)]
#[derive(Debug, Clone, Copy)]
struct PredicateStep {
kind: PredicateStepKind,
value_id: u32,
}
impl TryFrom<PredicateStep> for PredicateArg {
type Error = ();
fn try_from(step: PredicateStep) -> Result<Self, Self::Error> {
match step.kind {
PredicateStepKind::String => Ok(PredicateArg::String(QueryStr(step.value_id))),
PredicateStepKind::Capture => Ok(PredicateArg::Capture(Capture(step.value_id))),
PredicateStepKind::Done => Err(()),
}
}
}
extern "C" {
/// Get all of the predicates for the given pattern in the query. The
/// predicates are represented as a single array of steps. There are three
/// types of steps in this array, which correspond to the three legal values
/// for the `type` field: - `TSQueryPredicateStepTypeCapture` - Steps with
/// this type represent names of captures. Their `value_id` can be used
/// with the [`ts_query_capture_name_for_id`] function to obtain the name
/// of the capture. - `TSQueryPredicateStepTypeString` - Steps with this
/// type represent literal strings. Their `value_id` can be used with the
/// [`ts_query_string_value_for_id`] function to obtain their string value.
/// - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels*
/// that represent the end of an individual predicate. If a pattern has two
/// predicates, then there will be two steps with this `type` in the array.
fn ts_query_predicates_for_pattern(
query: NonNull<QueryData>,
pattern_index: u32,
step_count: &mut u32,
) -> *const PredicateStep;
}

View File

@@ -0,0 +1,20 @@
use crate::tree_sitter::query::predicate::{InvalidPredicateError, Predicate};
use crate::tree_sitter::query::QueryStr;
#[derive(Debug)]
pub struct QueryProperty {
pub key: QueryStr,
pub val: Option<QueryStr>,
}
impl QueryProperty {
pub fn parse(predicate: &Predicate) -> Result<Self, InvalidPredicateError> {
predicate.check_min_arg_count(1)?;
predicate.check_max_arg_count(2)?;
let key = predicate.query_str_arg(0)?;
let val = (predicate.num_args() == 1)
.then(|| predicate.query_str_arg(1))
.transpose()?;
Ok(QueryProperty { key, val })
}
}

View File

@@ -0,0 +1,343 @@
use core::slice;
use std::cell::UnsafeCell;
use std::marker::PhantomData;
use std::mem::replace;
use std::ops::Range;
use std::ptr::{self, NonNull};
use crate::tree_sitter::query::{Capture, Pattern, Query, QueryData};
use crate::tree_sitter::syntax_tree_node::SyntaxTreeNodeRaw;
use crate::tree_sitter::{SyntaxTree, SyntaxTreeNode, TsInput};
enum QueryCursorData {}
thread_local! {
static CURSOR_CACHE: UnsafeCell<Vec<InactiveQueryCursor>> = UnsafeCell::new(Vec::with_capacity(8));
}
/// SAFETY: must not call itself recuresively
unsafe fn with_cache<T>(f: impl FnOnce(&mut Vec<InactiveQueryCursor>) -> T) -> T {
CURSOR_CACHE.with(|cache| f(&mut *cache.get()))
}
pub struct QueryCursor<'a, 'tree, I: TsInput> {
query: &'a Query,
ptr: *mut QueryCursorData,
tree: PhantomData<&'tree SyntaxTree>,
input: I,
}
impl<'tree, I: TsInput> QueryCursor<'_, 'tree, I> {
pub fn next_match(&mut self) -> Option<QueryMatch<'_, 'tree>> {
let mut query_match = TSQueryMatch {
id: 0,
pattern_index: 0,
capture_count: 0,
captures: ptr::null(),
};
loop {
let success = unsafe { ts_query_cursor_next_match(self.ptr, &mut query_match) };
if !success {
return None;
}
let matched_nodes = unsafe {
slice::from_raw_parts(
query_match.captures.cast(),
query_match.capture_count as usize,
)
};
let satisfies_predicates = self
.query
.pattern_text_predicates(query_match.pattern_index)
.iter()
.all(|predicate| predicate.satsified(&mut self.input, matched_nodes, self.query));
if satisfies_predicates {
let res = QueryMatch {
id: query_match.id,
pattern: Pattern(query_match.pattern_index as u32),
matched_nodes,
query_cursor: unsafe { &mut *self.ptr },
_tree: PhantomData,
};
return Some(res);
}
}
}
pub fn next_matched_node(&mut self) -> Option<(QueryMatch<'_, 'tree>, MatchedNodeIdx)> {
let mut query_match = TSQueryMatch {
id: 0,
pattern_index: 0,
capture_count: 0,
captures: ptr::null(),
};
let mut capture_idx = 0;
loop {
let success = unsafe {
ts_query_cursor_next_capture(self.ptr, &mut query_match, &mut capture_idx)
};
if !success {
return None;
}
let matched_nodes = unsafe {
slice::from_raw_parts(
query_match.captures.cast(),
query_match.capture_count as usize,
)
};
let satisfies_predicates = self
.query
.pattern_text_predicates(query_match.pattern_index)
.iter()
.all(|predicate| predicate.satsified(&mut self.input, matched_nodes, self.query));
if satisfies_predicates {
let res = QueryMatch {
id: query_match.id,
pattern: Pattern(query_match.pattern_index as u32),
matched_nodes,
query_cursor: unsafe { &mut *self.ptr },
_tree: PhantomData,
};
return Some((res, capture_idx));
} else {
unsafe {
ts_query_cursor_remove_match(self.ptr, query_match.id);
}
}
}
}
pub fn set_byte_range(&mut self, range: Range<usize>) {
unsafe {
ts_query_cursor_set_byte_range(self.ptr, range.start as u32, range.end as u32);
}
}
pub fn reuse(mut self) -> InactiveQueryCursor {
let ptr = replace(&mut self.ptr, ptr::null_mut());
InactiveQueryCursor {
ptr: unsafe { NonNull::new_unchecked(ptr) },
}
}
}
impl<I: TsInput> Drop for QueryCursor<'_, '_, I> {
fn drop(&mut self) {
// we allow moving the cursor data out so we need the null check here
// would be cleaner with a subtype but doesn't really matter at the end of the day
if let Some(ptr) = NonNull::new(self.ptr) {
unsafe { with_cache(|cache| cache.push(InactiveQueryCursor { ptr })) }
}
}
}
/// A query cursor that is not actively associated with a query
pub struct InactiveQueryCursor {
ptr: NonNull<QueryCursorData>,
}
impl InactiveQueryCursor {
pub fn new() -> Self {
unsafe {
with_cache(|cache| {
cache.pop().unwrap_or_else(|| InactiveQueryCursor {
ptr: NonNull::new_unchecked(ts_query_cursor_new()),
})
})
}
}
/// Return the maximum number of in-progress matches for this cursor.
#[doc(alias = "ts_query_cursor_match_limit")]
#[must_use]
pub fn match_limit(&self) -> u32 {
unsafe { ts_query_cursor_match_limit(self.ptr.as_ptr()) }
}
/// Set the maximum number of in-progress matches for this cursor. The
/// limit must be > 0 and <= 65536.
#[doc(alias = "ts_query_cursor_set_match_limit")]
pub fn set_match_limit(&mut self, limit: u32) {
unsafe {
ts_query_cursor_set_match_limit(self.ptr.as_ptr(), limit);
}
}
/// Check if, on its last execution, this cursor exceeded its maximum number
/// of in-progress matches.
#[doc(alias = "ts_query_cursor_did_exceed_match_limit")]
#[must_use]
pub fn did_exceed_match_limit(&self) -> bool {
unsafe { ts_query_cursor_did_exceed_match_limit(self.ptr.as_ptr()) }
}
pub fn set_byte_range(&mut self, range: Range<usize>) {
unsafe {
ts_query_cursor_set_byte_range(self.ptr.as_ptr(), range.start as u32, range.end as u32);
}
}
pub fn execute_query<'a, 'tree, I: TsInput>(
self,
query: &'a Query,
node: &SyntaxTreeNode<'tree>,
input: I,
) -> QueryCursor<'a, 'tree, I> {
let ptr = self.ptr.as_ptr();
unsafe { ts_query_cursor_exec(self.ptr.as_ptr(), query.raw.as_ref(), node.as_raw()) };
QueryCursor {
query,
ptr,
tree: PhantomData,
input,
}
}
}
impl Drop for InactiveQueryCursor {
fn drop(&mut self) {
unsafe { ts_query_cursor_delete(self.ptr.as_ptr()) }
}
}
pub type MatchedNodeIdx = u32;
#[repr(C)]
#[derive(Clone)]
pub struct MatchedNode<'tree> {
pub syntax_node: SyntaxTreeNode<'tree>,
pub capture: Capture,
}
pub struct QueryMatch<'cursor, 'tree> {
id: u32,
pattern: Pattern,
matched_nodes: &'cursor [MatchedNode<'tree>],
query_cursor: &'cursor mut QueryCursorData,
_tree: PhantomData<&'tree super::SyntaxTree>,
}
impl<'tree> QueryMatch<'_, 'tree> {
pub fn matched_nodes(&self) -> impl Iterator<Item = &MatchedNode<'tree>> {
self.matched_nodes.iter()
}
pub fn nodes_for_capture(
&self,
capture: Capture,
) -> impl Iterator<Item = &SyntaxTreeNode<'tree>> {
self.matched_nodes
.iter()
.filter(move |mat| mat.capture == capture)
.map(|mat| &mat.syntax_node)
}
pub fn matched_node(&self, i: MatchedNodeIdx) -> &MatchedNode {
&self.matched_nodes[i as usize]
}
#[must_use]
pub const fn id(&self) -> u32 {
self.id
}
#[must_use]
pub const fn pattern(&self) -> Pattern {
self.pattern
}
#[doc(alias = "ts_query_cursor_remove_match")]
/// removes this match from the cursor so that further captures
/// from its cursor so that future captures that belong to this match
/// are no longer returned by capture iterators
pub fn remove(self) {
unsafe {
ts_query_cursor_remove_match(self.query_cursor, self.id);
}
}
}
#[repr(C)]
#[derive(Debug)]
struct TSQueryCapture {
node: SyntaxTreeNodeRaw,
index: u32,
}
#[repr(C)]
#[derive(Debug)]
struct TSQueryMatch {
id: u32,
pattern_index: u16,
capture_count: u16,
captures: *const TSQueryCapture,
}
extern "C" {
/// Advance to the next capture of the currently running query.
/// If there is a capture, write its match to `*match` and its index within
/// the matche's capture list to `*capture_index`. Otherwise, return `false`.
fn ts_query_cursor_next_capture(
self_: *mut QueryCursorData,
match_: &mut TSQueryMatch,
capture_index: &mut u32,
) -> bool;
/// Advance to the next match of the currently running query.
///
/// If there is a match, write it to `*match` and return `true`.
/// Otherwise, return `false`.
pub fn ts_query_cursor_next_match(
self_: *mut QueryCursorData,
match_: &mut TSQueryMatch,
) -> bool;
fn ts_query_cursor_remove_match(self_: *mut QueryCursorData, match_id: u32);
/// Delete a query cursor, freeing all of the memory that it used
pub fn ts_query_cursor_delete(self_: *mut QueryCursorData);
/// Create a new cursor for executing a given query.
/// The cursor stores the state that is needed to iteratively search
/// for matches. To use the query cursor, first call [`ts_query_cursor_exec`]
/// to start running a given query on a given syntax node. Then, there are
/// two options for consuming the results of the query:
/// 1. Repeatedly call [`ts_query_cursor_next_match`] to iterate over all of the
/// *matches* in the order that they were found. Each match contains the
/// index of the pattern that matched, and an array of captures. Because
/// multiple patterns can match the same set of nodes, one match may contain
/// captures that appear *before* some of the captures from a previous match.
/// 2. Repeatedly call [`ts_query_cursor_next_capture`] to iterate over all of the
/// individual *captures* in the order that they appear. This is useful if
/// don't care about which pattern matched, and just want a single ordered
/// sequence of captures.
/// If you don't care about consuming all of the results, you can stop calling
/// [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] at any point.
/// You can then start executing another query on another node by calling
/// [`ts_query_cursor_exec`] again."]
pub fn ts_query_cursor_new() -> *mut QueryCursorData;
/// Start running a given query on a given node.
pub fn ts_query_cursor_exec(
self_: *mut QueryCursorData,
query: &QueryData,
node: SyntaxTreeNodeRaw,
);
/// Manage the maximum number of in-progress matches allowed by this query
/// cursor.
///
/// Query cursors have an optional maximum capacity for storing lists of
/// in-progress captures. If this capacity is exceeded, then the
/// earliest-starting match will silently be dropped to make room for further
/// matches. This maximum capacity is optional — by default, query cursors allow
/// any number of pending matches, dynamically allocating new space for them as
/// needed as the query is executed.
pub fn ts_query_cursor_did_exceed_match_limit(self_: *const QueryCursorData) -> bool;
pub fn ts_query_cursor_match_limit(self_: *const QueryCursorData) -> u32;
pub fn ts_query_cursor_set_match_limit(self_: *mut QueryCursorData, limit: u32);
/// Set the range of bytes or (row, column) positions in which the query
/// will be executed.
pub fn ts_query_cursor_set_byte_range(
self_: *mut QueryCursorData,
start_byte: u32,
end_byte: u32,
);
}

View File

@@ -0,0 +1 @@
pub struct QueryMatch {}

View File

@@ -0,0 +1,54 @@
use std::ops;
use regex_cursor::{Cursor, RopeyCursor};
use ropey::RopeSlice;
use crate::tree_sitter::{IntoTsInput, TsInput};
pub struct RopeTsInput<'a> {
src: RopeSlice<'a>,
cursor: regex_cursor::RopeyCursor<'a>,
}
impl<'a> RopeTsInput<'a> {
pub fn new(src: RopeSlice<'a>) -> Self {
RopeTsInput {
src,
cursor: regex_cursor::RopeyCursor::new(src),
}
}
}
impl<'a> IntoTsInput for RopeSlice<'a> {
type TsInput = RopeTsInput<'a>;
fn into_ts_input(self) -> Self::TsInput {
RopeTsInput {
src: self,
cursor: RopeyCursor::new(self),
}
}
}
impl<'a> TsInput for RopeTsInput<'a> {
type Cursor = RopeyCursor<'a>;
fn cursor_at(&mut self, offset: usize) -> &mut RopeyCursor<'a> {
// this cursor is optimized for contigous reads which are by far the most common during parsing
// very far jumps (like injections at the other end of the document) are handelde
// by starting a new cursor (new chunks iterator)
if offset < self.cursor.offset() || self.cursor.offset() - offset > 4906 {
self.cursor = regex_cursor::RopeyCursor::at(self.src, offset);
} else {
while self.cursor.offset() + self.cursor.chunk().len() >= offset {
if !self.cursor.advance() {
break;
}
}
}
&mut self.cursor
}
fn eq(&mut self, range1: ops::Range<usize>, range2: ops::Range<usize>) -> bool {
self.src.byte_slice(range1) == self.src.byte_slice(range2)
}
}

View File

@@ -0,0 +1,80 @@
use std::fmt;
use std::ptr::NonNull;
use crate::tree_sitter::syntax_tree_node::{SyntaxTreeNode, SyntaxTreeNodeRaw};
use crate::tree_sitter::Point;
// opaque pointers
pub(super) enum SyntaxTreeData {}
pub struct SyntaxTree {
ptr: NonNull<SyntaxTreeData>,
}
impl SyntaxTree {
pub(super) unsafe fn from_raw(raw: NonNull<SyntaxTreeData>) -> SyntaxTree {
SyntaxTree { ptr: raw }
}
pub(super) fn as_raw(&self) -> NonNull<SyntaxTreeData> {
self.ptr
}
pub fn root_node(&self) -> SyntaxTreeNode<'_> {
unsafe { SyntaxTreeNode::from_raw(ts_tree_root_node(self.ptr)).unwrap() }
}
pub fn edit(&mut self, edit: &InputEdit) {
unsafe { ts_tree_edit(self.ptr, edit) }
}
}
impl fmt::Debug for SyntaxTree {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{{Tree {:?}}}", self.root_node())
}
}
impl Drop for SyntaxTree {
fn drop(&mut self) {
unsafe { ts_tree_delete(self.ptr) }
}
}
impl Clone for SyntaxTree {
fn clone(&self) -> Self {
unsafe {
SyntaxTree {
ptr: ts_tree_copy(self.ptr),
}
}
}
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct InputEdit {
pub start_byte: u32,
pub old_end_byte: u32,
pub new_end_byte: u32,
pub start_point: Point,
pub old_end_point: Point,
pub new_end_point: Point,
}
extern "C" {
/// Create a shallow copy of the syntax tree. This is very fast. You need to
/// copy a syntax tree in order to use it on more than one thread at a time,
/// as syntax trees are not thread safe.
fn ts_tree_copy(self_: NonNull<SyntaxTreeData>) -> NonNull<SyntaxTreeData>;
/// Delete the syntax tree, freeing all of the memory that it used.
fn ts_tree_delete(self_: NonNull<SyntaxTreeData>);
/// Get the root node of the syntax tree.
fn ts_tree_root_node<'tree>(self_: NonNull<SyntaxTreeData>) -> SyntaxTreeNodeRaw;
/// Edit the syntax tree to keep it in sync with source code that has been
/// edited.
///
/// You must describe the edit both in terms of byte offsets and in terms of
/// row/column coordinates.
fn ts_tree_edit(self_: NonNull<SyntaxTreeData>, edit: &InputEdit);
}

View File

@@ -0,0 +1,292 @@
use std::ffi::c_void;
use std::marker::PhantomData;
use std::ops::Range;
use std::ptr::NonNull;
use crate::tree_sitter::syntax_tree::SyntaxTree;
use crate::tree_sitter::Grammar;
#[repr(C)]
#[derive(Debug, Clone, Copy)]
pub(super) struct SyntaxTreeNodeRaw {
context: [u32; 4],
id: *const c_void,
tree: *const c_void,
}
impl From<SyntaxTreeNode<'_>> for SyntaxTreeNodeRaw {
fn from(node: SyntaxTreeNode) -> SyntaxTreeNodeRaw {
SyntaxTreeNodeRaw {
context: node.context,
id: node.id.as_ptr(),
tree: node.tree.as_ptr(),
}
}
}
#[derive(Debug, Clone)]
#[repr(C)]
pub struct SyntaxTreeNode<'tree> {
context: [u32; 4],
id: NonNull<c_void>,
tree: NonNull<c_void>,
_phantom: PhantomData<&'tree SyntaxTree>,
}
impl<'tree> SyntaxTreeNode<'tree> {
#[inline]
pub(super) unsafe fn from_raw(raw: SyntaxTreeNodeRaw) -> Option<Self> {
Some(SyntaxTreeNode {
context: raw.context,
id: NonNull::new(raw.id as *mut _)?,
tree: unsafe { NonNull::new_unchecked(raw.tree as *mut _) },
_phantom: PhantomData,
})
}
#[inline]
pub(crate) fn as_raw(&self) -> SyntaxTreeNodeRaw {
SyntaxTreeNodeRaw {
context: self.context,
id: self.id.as_ptr(),
tree: self.tree.as_ptr(),
}
}
/// Get this node's type as a numerical id.
#[inline]
pub fn kind_id(&self) -> u16 {
unsafe { ts_node_symbol(self.as_raw()) }
}
/// Get the [`Language`] that was used to parse this node's syntax tree.
#[inline]
pub fn grammar(&self) -> Grammar {
unsafe { ts_node_language(self.as_raw()) }
}
/// Check if this node is *named*.
///
/// Named nodes correspond to named rules in the grammar, whereas
/// *anonymous* nodes correspond to string literals in the grammar.
#[inline]
pub fn is_named(&self) -> bool {
unsafe { ts_node_is_named(self.as_raw()) }
}
/// Check if this node is *missing*.
///
/// Missing nodes are inserted by the parser in order to recover from
/// certain kinds of syntax errors.
#[inline]
pub fn is_missing(&self) -> bool {
unsafe { ts_node_is_missing(self.as_raw()) }
}
/// Get the byte offsets where this node starts.
#[inline]
pub fn start_byte(&self) -> usize {
unsafe { ts_node_start_byte(self.as_raw()) as usize }
}
/// Get the byte offsets where this node end.
#[inline]
pub fn end_byte(&self) -> usize {
unsafe { ts_node_end_byte(self.as_raw()) as usize }
}
/// Get the byte range of source code that this node represents.
// TODO: use helix_stdx::Range once available
#[inline]
pub fn byte_range(&self) -> Range<usize> {
self.start_byte()..self.end_byte()
}
/// Get the node's child at the given index, where zero represents the first
/// child.
///
/// This method is fairly fast, but its cost is technically log(i), so if
/// you might be iterating over a long list of children, you should use
/// [`SyntaxTreeNode::children`] instead.
#[inline]
pub fn child(&self, i: usize) -> Option<SyntaxTreeNode<'tree>> {
unsafe { SyntaxTreeNode::from_raw(ts_node_child(self.as_raw(), i as u32)) }
}
/// Get this node's number of children.
#[inline]
pub fn child_count(&self) -> usize {
unsafe { ts_node_child_count(self.as_raw()) as usize }
}
/// Get this node's *named* child at the given index.
///
/// See also [`SyntaxTreeNode::is_named`].
/// This method is fairly fast, but its cost is technically log(i), so if
/// you might be iterating over a long list of children, you should use
/// [`SyntaxTreeNode::named_children`] instead.
#[inline]
pub fn named_child(&self, i: usize) -> Option<SyntaxTreeNode<'tree>> {
unsafe { SyntaxTreeNode::from_raw(ts_node_named_child(self.as_raw(), i as u32)) }
}
/// Get this node's number of *named* children.
///
/// See also [`SyntaxTreeNode::is_named`].
#[inline]
pub fn named_child_count(&self) -> usize {
unsafe { ts_node_named_child_count(self.as_raw()) as usize }
}
#[inline]
unsafe fn map(
&self,
f: unsafe extern "C" fn(SyntaxTreeNodeRaw) -> SyntaxTreeNodeRaw,
) -> Option<SyntaxTreeNode<'tree>> {
SyntaxTreeNode::from_raw(f(self.as_raw()))
}
/// Get this node's immediate parent.
#[inline]
pub fn parent(&self) -> Option<Self> {
unsafe { self.map(ts_node_parent) }
}
/// Get this node's next sibling.
#[inline]
pub fn next_sibling(&self) -> Option<Self> {
unsafe { self.map(ts_node_next_sibling) }
}
/// Get this node's previous sibling.
#[inline]
pub fn prev_sibling(&self) -> Option<Self> {
unsafe { self.map(ts_node_prev_sibling) }
}
/// Get this node's next named sibling.
#[inline]
pub fn next_named_sibling(&self) -> Option<Self> {
unsafe { self.map(ts_node_next_named_sibling) }
}
/// Get this node's previous named sibling.
#[inline]
pub fn prev_named_sibling(&self) -> Option<Self> {
unsafe { self.map(ts_node_prev_named_sibling) }
}
/// Get the smallest node within this node that spans the given range.
#[inline]
pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Self> {
unsafe {
Self::from_raw(ts_node_descendant_for_byte_range(
self.as_raw(),
start as u32,
end as u32,
))
}
}
/// Get the smallest named node within this node that spans the given range.
#[inline]
pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option<Self> {
unsafe {
Self::from_raw(ts_node_named_descendant_for_byte_range(
self.as_raw(),
start as u32,
end as u32,
))
}
}
// /// Iterate over this node's children.
// ///
// /// A [`TreeCursor`] is used to retrieve the children efficiently. Obtain
// /// a [`TreeCursor`] by calling [`Tree::walk`] or [`SyntaxTreeNode::walk`]. To avoid
// /// unnecessary allocations, you should reuse the same cursor for
// /// subsequent calls to this method.
// ///
// /// If you're walking the tree recursively, you may want to use the
// /// [`TreeCursor`] APIs directly instead.
// pub fn children<'cursor>(
// &self,
// cursor: &'cursor mut TreeCursor<'tree>,
// ) -> impl ExactSizeIterator<Item = SyntaxTreeNode<'tree>> + 'cursor {
// cursor.reset(self.to_raw());
// cursor.goto_first_child();
// (0..self.child_count()).map(move |_| {
// let result = cursor.node();
// cursor.goto_next_sibling();
// result
// })
// }
}
unsafe impl Send for SyntaxTreeNode<'_> {}
unsafe impl Sync for SyntaxTreeNode<'_> {}
extern "C" {
/// Get the node's type as a numerical id.
fn ts_node_symbol(node: SyntaxTreeNodeRaw) -> u16;
/// Get the node's language.
fn ts_node_language(node: SyntaxTreeNodeRaw) -> Grammar;
/// Check if the node is *named*. Named nodes correspond to named rules in
/// the grammar, whereas *anonymous* nodes correspond to string literals in
/// the grammar
fn ts_node_is_named(node: SyntaxTreeNodeRaw) -> bool;
/// Check if the node is *missing*. Missing nodes are inserted by the parser
/// in order to recover from certain kinds of syntax errors
fn ts_node_is_missing(node: SyntaxTreeNodeRaw) -> bool;
/// Get the node's immediate parent
fn ts_node_parent(node: SyntaxTreeNodeRaw) -> SyntaxTreeNodeRaw;
/// Get the node's child at the given index, where zero represents the first
/// child
fn ts_node_child(node: SyntaxTreeNodeRaw, child_index: u32) -> SyntaxTreeNodeRaw;
/// Get the node's number of children
fn ts_node_child_count(node: SyntaxTreeNodeRaw) -> u32;
/// Get the node's *named* child at the given index. See also
/// [`ts_node_is_named`]
fn ts_node_named_child(node: SyntaxTreeNodeRaw, child_index: u32) -> SyntaxTreeNodeRaw;
/// Get the node's number of *named* children. See also [`ts_node_is_named`]
fn ts_node_named_child_count(node: SyntaxTreeNodeRaw) -> u32;
/// Get the node's next sibling
fn ts_node_next_sibling(node: SyntaxTreeNodeRaw) -> SyntaxTreeNodeRaw;
fn ts_node_prev_sibling(node: SyntaxTreeNodeRaw) -> SyntaxTreeNodeRaw;
/// Get the node's next *named* sibling
fn ts_node_next_named_sibling(node: SyntaxTreeNodeRaw) -> SyntaxTreeNodeRaw;
fn ts_node_prev_named_sibling(node: SyntaxTreeNodeRaw) -> SyntaxTreeNodeRaw;
/// Get the smallest node within this node that spans the given range of
/// bytes or (row, column) positions
fn ts_node_descendant_for_byte_range(
node: SyntaxTreeNodeRaw,
start: u32,
end: u32,
) -> SyntaxTreeNodeRaw;
/// Get the smallest named node within this node that spans the given range
/// of bytes or (row, column) positions
fn ts_node_named_descendant_for_byte_range(
node: SyntaxTreeNodeRaw,
start: u32,
end: u32,
) -> SyntaxTreeNodeRaw;
/// Get the node's start byte.
fn ts_node_start_byte(self_: SyntaxTreeNodeRaw) -> u32;
/// Get the node's end byte.
fn ts_node_end_byte(node: SyntaxTreeNodeRaw) -> u32;
}

View File

@@ -54,7 +54,14 @@ pub fn highlighted_code_block<'a>(
language.into(),
))
.and_then(|config| config.highlight_config(theme.scopes()))
.and_then(|config| Syntax::new(ropeslice, config, Arc::clone(&config_loader)));
.and_then(|config| {
Syntax::new(ropeslice, config, |injection| {
config_loader
.load()
.language_configuration_for_injection_string(injection)
.and_then(|config| config.get_highlight_config())
})
});
let syntax = match syntax {
Some(s) => s,

View File

@@ -82,7 +82,12 @@ impl<T: 'static + Send + Sync, D: 'static + Send + Sync> AsyncHook
let Some(syntax) = language_config
.highlight_config(&loader.load().scopes())
.and_then(|highlight_config| {
helix_core::Syntax::new(text.slice(..), highlight_config, loader)
helix_core::Syntax::new(text.slice(..), highlight_config, |injection| {
loader
.load()
.language_configuration_for_injection_string(injection)
.and_then(|config| config.get_highlight_config())
})
})
else {
log::info!("highlighting picker item failed");

View File

@@ -7,7 +7,7 @@ use helix_core::auto_pairs::AutoPairs;
use helix_core::chars::char_is_word;
use helix_core::doc_formatter::TextFormat;
use helix_core::encoding::Encoding;
use helix_core::syntax::{Highlight, LanguageServerFeature};
use helix_core::syntax::{generate_edits, Highlight, LanguageServerFeature};
use helix_core::text_annotations::{InlineAnnotation, Overlay};
use helix_lsp::util::lsp_pos_to_pos;
use helix_stdx::faccess::{copy_metadata, readonly};
@@ -156,6 +156,7 @@ pub struct Document {
pub syntax: Option<Syntax>,
/// Corresponding language scope name. Usually `source.<lang>`.
pub language: Option<Arc<LanguageConfiguration>>,
loader: Option<Arc<ArcSwap<helix_core::syntax::Loader>>>,
/// Pending changes since last history commit.
changes: ChangeSet,
@@ -678,6 +679,7 @@ impl Document {
focused_at: std::time::Instant::now(),
readonly: false,
jump_labels: HashMap::new(),
loader: None,
}
}
@@ -1131,9 +1133,15 @@ impl Document {
if let Some(highlight_config) =
language_config.highlight_config(&(*loader).load().scopes())
{
self.syntax = Syntax::new(self.text.slice(..), highlight_config, loader);
let loader_ = loader.load_full();
self.syntax = Syntax::new(self.text.slice(..), highlight_config, |injection| {
loader_
.language_configuration_for_injection_string(injection)
.and_then(|config| config.get_highlight_config())
});
}
self.loader = Some(loader);
self.language = Some(language_config);
} else {
self.syntax = None;
@@ -1275,11 +1283,16 @@ impl Document {
// update tree-sitter syntax tree
if let Some(syntax) = &mut self.syntax {
let loader = self.loader.as_ref().unwrap().load_full();
// TODO: no unwrap
let res = syntax.update(
old_doc.slice(..),
self.text.slice(..),
transaction.changes(),
generate_edits(old_doc.slice(..), transaction.changes()),
|injection| {
loader
.language_configuration_for_injection_string(injection)
.and_then(|config| config.get_highlight_config())
},
);
if res.is_err() {
log::error!("TS parser failed, disabling TS for the current buffer: {res:?}");

21
vendor/tree-sitter/LICENSE vendored Normal file
View File

@@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2018-2024 Max Brunsfeld
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

File diff suppressed because it is too large Load Diff

48
vendor/tree-sitter/src/alloc.c vendored Normal file
View File

@@ -0,0 +1,48 @@
#include "alloc.h"
#include "tree_sitter/api.h"
#include <stdlib.h>
static void *ts_malloc_default(size_t size) {
void *result = malloc(size);
if (size > 0 && !result) {
fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size);
abort();
}
return result;
}
static void *ts_calloc_default(size_t count, size_t size) {
void *result = calloc(count, size);
if (count > 0 && !result) {
fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size);
abort();
}
return result;
}
static void *ts_realloc_default(void *buffer, size_t size) {
void *result = realloc(buffer, size);
if (size > 0 && !result) {
fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size);
abort();
}
return result;
}
// Allow clients to override allocation functions dynamically
TS_PUBLIC void *(*ts_current_malloc)(size_t) = ts_malloc_default;
TS_PUBLIC void *(*ts_current_calloc)(size_t, size_t) = ts_calloc_default;
TS_PUBLIC void *(*ts_current_realloc)(void *, size_t) = ts_realloc_default;
TS_PUBLIC void (*ts_current_free)(void *) = free;
void ts_set_allocator(
void *(*new_malloc)(size_t size),
void *(*new_calloc)(size_t count, size_t size),
void *(*new_realloc)(void *ptr, size_t size),
void (*new_free)(void *ptr)
) {
ts_current_malloc = new_malloc ? new_malloc : ts_malloc_default;
ts_current_calloc = new_calloc ? new_calloc : ts_calloc_default;
ts_current_realloc = new_realloc ? new_realloc : ts_realloc_default;
ts_current_free = new_free ? new_free : free;
}

41
vendor/tree-sitter/src/alloc.h vendored Normal file
View File

@@ -0,0 +1,41 @@
#ifndef TREE_SITTER_ALLOC_H_
#define TREE_SITTER_ALLOC_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#if defined(TREE_SITTER_HIDDEN_SYMBOLS) || defined(_WIN32)
#define TS_PUBLIC
#else
#define TS_PUBLIC __attribute__((visibility("default")))
#endif
TS_PUBLIC extern void *(*ts_current_malloc)(size_t);
TS_PUBLIC extern void *(*ts_current_calloc)(size_t, size_t);
TS_PUBLIC extern void *(*ts_current_realloc)(void *, size_t);
TS_PUBLIC extern void (*ts_current_free)(void *);
// Allow clients to override allocation functions
#ifndef ts_malloc
#define ts_malloc ts_current_malloc
#endif
#ifndef ts_calloc
#define ts_calloc ts_current_calloc
#endif
#ifndef ts_realloc
#define ts_realloc ts_current_realloc
#endif
#ifndef ts_free
#define ts_free ts_current_free
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ALLOC_H_

290
vendor/tree-sitter/src/array.h vendored Normal file
View File

@@ -0,0 +1,290 @@
#ifndef TREE_SITTER_ARRAY_H_
#define TREE_SITTER_ARRAY_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./alloc.h"
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#ifdef _MSC_VER
#pragma warning(disable : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif
#define Array(T) \
struct { \
T *contents; \
uint32_t size; \
uint32_t capacity; \
}
/// Initialize an array.
#define array_init(self) \
((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
/// Create an empty array.
#define array_new() \
{ NULL, 0, 0 }
/// Get a pointer to the element at a given `index` in the array.
#define array_get(self, _index) \
(assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
/// Get a pointer to the first element in the array.
#define array_front(self) array_get(self, 0)
/// Get a pointer to the last element in the array.
#define array_back(self) array_get(self, (self)->size - 1)
/// Clear the array, setting its size to zero. Note that this does not free any
/// memory allocated for the array's contents.
#define array_clear(self) ((self)->size = 0)
/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
/// less than the array's current capacity, this function has no effect.
#define array_reserve(self, new_capacity) \
_array__reserve((Array *)(self), array_elem_size(self), new_capacity)
/// Free any memory allocated for this array. Note that this does not free any
/// memory allocated for the array's contents.
#define array_delete(self) _array__delete((Array *)(self))
/// Push a new `element` onto the end of the array.
#define array_push(self, element) \
(_array__grow((Array *)(self), 1, array_elem_size(self)), \
(self)->contents[(self)->size++] = (element))
/// Increase the array's size by `count` elements.
/// New elements are zero-initialized.
#define array_grow_by(self, count) \
do { \
if ((count) == 0) break; \
_array__grow((Array *)(self), count, array_elem_size(self)); \
memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
(self)->size += (count); \
} while (0)
/// Append all elements from one array to the end of another.
#define array_push_all(self, other) \
array_extend((self), (other)->size, (other)->contents)
/// Append `count` elements to the end of the array, reading their values from the
/// `contents` pointer.
#define array_extend(self, count, contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), (self)->size, \
0, count, contents \
)
/// Remove `old_count` elements from the array starting at the given `index`. At
/// the same index, insert `new_count` new elements, reading their values from the
/// `new_contents` pointer.
#define array_splice(self, _index, old_count, new_count, new_contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), _index, \
old_count, new_count, new_contents \
)
/// Insert one `element` into the array at the given `index`.
#define array_insert(self, _index, element) \
_array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
/// Remove one element from the array at the given `index`.
#define array_erase(self, _index) \
_array__erase((Array *)(self), array_elem_size(self), _index)
/// Pop the last element off the array, returning the element by value.
#define array_pop(self) ((self)->contents[--(self)->size])
/// Assign the contents of one array to another, reallocating if necessary.
#define array_assign(self, other) \
_array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
/// Swap one array with another
#define array_swap(self, other) \
_array__swap((Array *)(self), (Array *)(other))
/// Get the size of the array contents
#define array_elem_size(self) (sizeof *(self)->contents)
/// Search a sorted array for a given `needle` value, using the given `compare`
/// callback to determine the order.
///
/// If an existing element is found to be equal to `needle`, then the `index`
/// out-parameter is set to the existing value's index, and the `exists`
/// out-parameter is set to true. Otherwise, `index` is set to an index where
/// `needle` should be inserted in order to preserve the sorting, and `exists`
/// is set to false.
#define array_search_sorted_with(self, compare, needle, _index, _exists) \
_array__search_sorted(self, 0, compare, , needle, _index, _exists)
/// Search a sorted array for a given `needle` value, using integer comparisons
/// of a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_with`.
#define array_search_sorted_by(self, field, needle, _index, _exists) \
_array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
/// Insert a given `value` into a sorted array, using the given `compare`
/// callback to determine the order.
#define array_insert_sorted_with(self, compare, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
/// Insert a given `value` into a sorted array, using integer comparisons of
/// a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_by`.
#define array_insert_sorted_by(self, field, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
// Private
typedef Array(void) Array;
/// This is not what you're looking for, see `array_delete`.
static inline void _array__delete(Array *self) {
if (self->contents) {
ts_free(self->contents);
self->contents = NULL;
self->size = 0;
self->capacity = 0;
}
}
/// This is not what you're looking for, see `array_erase`.
static inline void _array__erase(Array *self, size_t element_size,
uint32_t index) {
assert(index < self->size);
char *contents = (char *)self->contents;
memmove(contents + index * element_size, contents + (index + 1) * element_size,
(self->size - index - 1) * element_size);
self->size--;
}
/// This is not what you're looking for, see `array_reserve`.
static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
if (new_capacity > self->capacity) {
if (self->contents) {
self->contents = ts_realloc(self->contents, new_capacity * element_size);
} else {
self->contents = ts_malloc(new_capacity * element_size);
}
self->capacity = new_capacity;
}
}
/// This is not what you're looking for, see `array_assign`.
static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
_array__reserve(self, element_size, other->size);
self->size = other->size;
memcpy(self->contents, other->contents, self->size * element_size);
}
/// This is not what you're looking for, see `array_swap`.
static inline void _array__swap(Array *self, Array *other) {
Array swap = *other;
*other = *self;
*self = swap;
}
/// This is not what you're looking for, see `array_push` or `array_grow_by`.
static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
uint32_t new_size = self->size + count;
if (new_size > self->capacity) {
uint32_t new_capacity = self->capacity * 2;
if (new_capacity < 8) new_capacity = 8;
if (new_capacity < new_size) new_capacity = new_size;
_array__reserve(self, element_size, new_capacity);
}
}
/// This is not what you're looking for, see `array_splice`.
static inline void _array__splice(Array *self, size_t element_size,
uint32_t index, uint32_t old_count,
uint32_t new_count, const void *elements) {
uint32_t new_size = self->size + new_count - old_count;
uint32_t old_end = index + old_count;
uint32_t new_end = index + new_count;
assert(old_end <= self->size);
_array__reserve(self, element_size, new_size);
char *contents = (char *)self->contents;
if (self->size > old_end) {
memmove(
contents + new_end * element_size,
contents + old_end * element_size,
(self->size - old_end) * element_size
);
}
if (new_count > 0) {
if (elements) {
memcpy(
(contents + index * element_size),
elements,
new_count * element_size
);
} else {
memset(
(contents + index * element_size),
0,
new_count * element_size
);
}
}
self->size += new_count - old_count;
}
/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
do { \
*(_index) = start; \
*(_exists) = false; \
uint32_t size = (self)->size - *(_index); \
if (size == 0) break; \
int comparison; \
while (size > 1) { \
uint32_t half_size = size / 2; \
uint32_t mid_index = *(_index) + half_size; \
comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
if (comparison <= 0) *(_index) = mid_index; \
size -= half_size; \
} \
comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
if (comparison == 0) *(_exists) = true; \
else if (comparison < 0) *(_index) += 1; \
} while (0)
/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
/// parameter by reference in order to work with the generic sorting function above.
#define _compare_int(a, b) ((int)*(a) - (int)(b))
#ifdef _MSC_VER
#pragma warning(default : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ARRAY_H_

68
vendor/tree-sitter/src/atomic.h vendored Normal file
View File

@@ -0,0 +1,68 @@
#ifndef TREE_SITTER_ATOMIC_H_
#define TREE_SITTER_ATOMIC_H_
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#ifdef __TINYC__
static inline size_t atomic_load(const volatile size_t *p) {
return *p;
}
static inline uint32_t atomic_inc(volatile uint32_t *p) {
*p += 1;
return *p;
}
static inline uint32_t atomic_dec(volatile uint32_t *p) {
*p-= 1;
return *p;
}
#elif defined(_WIN32)
#include <windows.h>
static inline size_t atomic_load(const volatile size_t *p) {
return *p;
}
static inline uint32_t atomic_inc(volatile uint32_t *p) {
return InterlockedIncrement((long volatile *)p);
}
static inline uint32_t atomic_dec(volatile uint32_t *p) {
return InterlockedDecrement((long volatile *)p);
}
#else
static inline size_t atomic_load(const volatile size_t *p) {
#ifdef __ATOMIC_RELAXED
return __atomic_load_n(p, __ATOMIC_RELAXED);
#else
return __sync_fetch_and_add((volatile size_t *)p, 0);
#endif
}
static inline uint32_t atomic_inc(volatile uint32_t *p) {
#ifdef __ATOMIC_RELAXED
return __atomic_add_fetch(p, 1U, __ATOMIC_SEQ_CST);
#else
return __sync_add_and_fetch(p, 1U);
#endif
}
static inline uint32_t atomic_dec(volatile uint32_t *p) {
#ifdef __ATOMIC_RELAXED
return __atomic_sub_fetch(p, 1U, __ATOMIC_SEQ_CST);
#else
return __sync_sub_and_fetch(p, 1U);
#endif
}
#endif
#endif // TREE_SITTER_ATOMIC_H_

146
vendor/tree-sitter/src/clock.h vendored Normal file
View File

@@ -0,0 +1,146 @@
#ifndef TREE_SITTER_CLOCK_H_
#define TREE_SITTER_CLOCK_H_
#include <stdbool.h>
#include <stdint.h>
typedef uint64_t TSDuration;
#ifdef _WIN32
// Windows:
// * Represent a time as a performance counter value.
// * Represent a duration as a number of performance counter ticks.
#include <windows.h>
typedef uint64_t TSClock;
static inline TSDuration duration_from_micros(uint64_t micros) {
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
return micros * (uint64_t)frequency.QuadPart / 1000000;
}
static inline uint64_t duration_to_micros(TSDuration self) {
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
return self * 1000000 / (uint64_t)frequency.QuadPart;
}
static inline TSClock clock_null(void) {
return 0;
}
static inline TSClock clock_now(void) {
LARGE_INTEGER result;
QueryPerformanceCounter(&result);
return (uint64_t)result.QuadPart;
}
static inline TSClock clock_after(TSClock base, TSDuration duration) {
return base + duration;
}
static inline bool clock_is_null(TSClock self) {
return !self;
}
static inline bool clock_is_gt(TSClock self, TSClock other) {
return self > other;
}
#elif defined(CLOCK_MONOTONIC) && !defined(__APPLE__)
// POSIX with monotonic clock support (Linux)
// * Represent a time as a monotonic (seconds, nanoseconds) pair.
// * Represent a duration as a number of microseconds.
//
// On these platforms, parse timeouts will correspond accurately to
// real time, regardless of what other processes are running.
#include <time.h>
typedef struct timespec TSClock;
static inline TSDuration duration_from_micros(uint64_t micros) {
return micros;
}
static inline uint64_t duration_to_micros(TSDuration self) {
return self;
}
static inline TSClock clock_now(void) {
TSClock result;
clock_gettime(CLOCK_MONOTONIC, &result);
return result;
}
static inline TSClock clock_null(void) {
return (TSClock) {0, 0};
}
static inline TSClock clock_after(TSClock base, TSDuration duration) {
TSClock result = base;
result.tv_sec += duration / 1000000;
result.tv_nsec += (duration % 1000000) * 1000;
if (result.tv_nsec >= 1000000000) {
result.tv_nsec -= 1000000000;
++(result.tv_sec);
}
return result;
}
static inline bool clock_is_null(TSClock self) {
return !self.tv_sec;
}
static inline bool clock_is_gt(TSClock self, TSClock other) {
if (self.tv_sec > other.tv_sec) return true;
if (self.tv_sec < other.tv_sec) return false;
return self.tv_nsec > other.tv_nsec;
}
#else
// macOS or POSIX without monotonic clock support
// * Represent a time as a process clock value.
// * Represent a duration as a number of process clock ticks.
//
// On these platforms, parse timeouts may be affected by other processes,
// which is not ideal, but is better than using a non-monotonic time API
// like `gettimeofday`.
#include <time.h>
typedef uint64_t TSClock;
static inline TSDuration duration_from_micros(uint64_t micros) {
return micros * (uint64_t)CLOCKS_PER_SEC / 1000000;
}
static inline uint64_t duration_to_micros(TSDuration self) {
return self * 1000000 / (uint64_t)CLOCKS_PER_SEC;
}
static inline TSClock clock_null(void) {
return 0;
}
static inline TSClock clock_now(void) {
return (uint64_t)clock();
}
static inline TSClock clock_after(TSClock base, TSDuration duration) {
return base + duration;
}
static inline bool clock_is_null(TSClock self) {
return !self;
}
static inline bool clock_is_gt(TSClock self, TSClock other) {
return self > other;
}
#endif
#endif // TREE_SITTER_CLOCK_H_

11
vendor/tree-sitter/src/error_costs.h vendored Normal file
View File

@@ -0,0 +1,11 @@
#ifndef TREE_SITTER_ERROR_COSTS_H_
#define TREE_SITTER_ERROR_COSTS_H_
#define ERROR_STATE 0
#define ERROR_COST_PER_RECOVERY 500
#define ERROR_COST_PER_MISSING_TREE 110
#define ERROR_COST_PER_SKIPPED_TREE 100
#define ERROR_COST_PER_SKIPPED_LINE 30
#define ERROR_COST_PER_SKIPPED_CHAR 1
#endif

View File

@@ -0,0 +1,501 @@
#include "./get_changed_ranges.h"
#include "./subtree.h"
#include "./language.h"
#include "./error_costs.h"
#include "./tree_cursor.h"
#include <assert.h>
// #define DEBUG_GET_CHANGED_RANGES
static void ts_range_array_add(
TSRangeArray *self,
Length start,
Length end
) {
if (self->size > 0) {
TSRange *last_range = array_back(self);
if (start.bytes <= last_range->end_byte) {
last_range->end_byte = end.bytes;
last_range->end_point = end.extent;
return;
}
}
if (start.bytes < end.bytes) {
TSRange range = { start.extent, end.extent, start.bytes, end.bytes };
array_push(self, range);
}
}
bool ts_range_array_intersects(
const TSRangeArray *self,
unsigned start_index,
uint32_t start_byte,
uint32_t end_byte
) {
for (unsigned i = start_index; i < self->size; i++) {
TSRange *range = &self->contents[i];
if (range->end_byte > start_byte) {
if (range->start_byte >= end_byte) break;
return true;
}
}
return false;
}
void ts_range_array_get_changed_ranges(
const TSRange *old_ranges, unsigned old_range_count,
const TSRange *new_ranges, unsigned new_range_count,
TSRangeArray *differences
) {
unsigned new_index = 0;
unsigned old_index = 0;
Length current_position = length_zero();
bool in_old_range = false;
bool in_new_range = false;
while (old_index < old_range_count || new_index < new_range_count) {
const TSRange *old_range = &old_ranges[old_index];
const TSRange *new_range = &new_ranges[new_index];
Length next_old_position;
if (in_old_range) {
next_old_position = (Length) {old_range->end_byte, old_range->end_point};
} else if (old_index < old_range_count) {
next_old_position = (Length) {old_range->start_byte, old_range->start_point};
} else {
next_old_position = LENGTH_MAX;
}
Length next_new_position;
if (in_new_range) {
next_new_position = (Length) {new_range->end_byte, new_range->end_point};
} else if (new_index < new_range_count) {
next_new_position = (Length) {new_range->start_byte, new_range->start_point};
} else {
next_new_position = LENGTH_MAX;
}
if (next_old_position.bytes < next_new_position.bytes) {
if (in_old_range != in_new_range) {
ts_range_array_add(differences, current_position, next_old_position);
}
if (in_old_range) old_index++;
current_position = next_old_position;
in_old_range = !in_old_range;
} else if (next_new_position.bytes < next_old_position.bytes) {
if (in_old_range != in_new_range) {
ts_range_array_add(differences, current_position, next_new_position);
}
if (in_new_range) new_index++;
current_position = next_new_position;
in_new_range = !in_new_range;
} else {
if (in_old_range != in_new_range) {
ts_range_array_add(differences, current_position, next_new_position);
}
if (in_old_range) old_index++;
if (in_new_range) new_index++;
in_old_range = !in_old_range;
in_new_range = !in_new_range;
current_position = next_new_position;
}
}
}
typedef struct {
TreeCursor cursor;
const TSLanguage *language;
unsigned visible_depth;
bool in_padding;
} Iterator;
static Iterator iterator_new(
TreeCursor *cursor,
const Subtree *tree,
const TSLanguage *language
) {
array_clear(&cursor->stack);
array_push(&cursor->stack, ((TreeCursorEntry) {
.subtree = tree,
.position = length_zero(),
.child_index = 0,
.structural_child_index = 0,
}));
return (Iterator) {
.cursor = *cursor,
.language = language,
.visible_depth = 1,
.in_padding = false,
};
}
static bool iterator_done(Iterator *self) {
return self->cursor.stack.size == 0;
}
static Length iterator_start_position(Iterator *self) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
if (self->in_padding) {
return entry.position;
} else {
return length_add(entry.position, ts_subtree_padding(*entry.subtree));
}
}
static Length iterator_end_position(Iterator *self) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree));
if (self->in_padding) {
return result;
} else {
return length_add(result, ts_subtree_size(*entry.subtree));
}
}
static bool iterator_tree_is_visible(const Iterator *self) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
if (ts_subtree_visible(*entry.subtree)) return true;
if (self->cursor.stack.size > 1) {
Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree;
return ts_language_alias_at(
self->language,
parent.ptr->production_id,
entry.structural_child_index
) != 0;
}
return false;
}
static void iterator_get_visible_state(
const Iterator *self,
Subtree *tree,
TSSymbol *alias_symbol,
uint32_t *start_byte
) {
uint32_t i = self->cursor.stack.size - 1;
if (self->in_padding) {
if (i == 0) return;
i--;
}
for (; i + 1 > 0; i--) {
TreeCursorEntry entry = self->cursor.stack.contents[i];
if (i > 0) {
const Subtree *parent = self->cursor.stack.contents[i - 1].subtree;
*alias_symbol = ts_language_alias_at(
self->language,
parent->ptr->production_id,
entry.structural_child_index
);
}
if (ts_subtree_visible(*entry.subtree) || *alias_symbol) {
*tree = *entry.subtree;
*start_byte = entry.position.bytes;
break;
}
}
}
static void iterator_ascend(Iterator *self) {
if (iterator_done(self)) return;
if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--;
if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false;
self->cursor.stack.size--;
}
static bool iterator_descend(Iterator *self, uint32_t goal_position) {
if (self->in_padding) return false;
bool did_descend = false;
do {
did_descend = false;
TreeCursorEntry entry = *array_back(&self->cursor.stack);
Length position = entry.position;
uint32_t structural_child_index = 0;
for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) {
const Subtree *child = &ts_subtree_children(*entry.subtree)[i];
Length child_left = length_add(position, ts_subtree_padding(*child));
Length child_right = length_add(child_left, ts_subtree_size(*child));
if (child_right.bytes > goal_position) {
array_push(&self->cursor.stack, ((TreeCursorEntry) {
.subtree = child,
.position = position,
.child_index = i,
.structural_child_index = structural_child_index,
}));
if (iterator_tree_is_visible(self)) {
if (child_left.bytes > goal_position) {
self->in_padding = true;
} else {
self->visible_depth++;
}
return true;
}
did_descend = true;
break;
}
position = child_right;
if (!ts_subtree_extra(*child)) structural_child_index++;
}
} while (did_descend);
return false;
}
static void iterator_advance(Iterator *self) {
if (self->in_padding) {
self->in_padding = false;
if (iterator_tree_is_visible(self)) {
self->visible_depth++;
} else {
iterator_descend(self, 0);
}
return;
}
for (;;) {
if (iterator_tree_is_visible(self)) self->visible_depth--;
TreeCursorEntry entry = array_pop(&self->cursor.stack);
if (iterator_done(self)) return;
const Subtree *parent = array_back(&self->cursor.stack)->subtree;
uint32_t child_index = entry.child_index + 1;
if (ts_subtree_child_count(*parent) > child_index) {
Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree));
uint32_t structural_child_index = entry.structural_child_index;
if (!ts_subtree_extra(*entry.subtree)) structural_child_index++;
const Subtree *next_child = &ts_subtree_children(*parent)[child_index];
array_push(&self->cursor.stack, ((TreeCursorEntry) {
.subtree = next_child,
.position = position,
.child_index = child_index,
.structural_child_index = structural_child_index,
}));
if (iterator_tree_is_visible(self)) {
if (ts_subtree_padding(*next_child).bytes > 0) {
self->in_padding = true;
} else {
self->visible_depth++;
}
} else {
iterator_descend(self, 0);
}
break;
}
}
}
typedef enum {
IteratorDiffers,
IteratorMayDiffer,
IteratorMatches,
} IteratorComparison;
static IteratorComparison iterator_compare(
const Iterator *old_iter,
const Iterator *new_iter
) {
Subtree old_tree = NULL_SUBTREE;
Subtree new_tree = NULL_SUBTREE;
uint32_t old_start = 0;
uint32_t new_start = 0;
TSSymbol old_alias_symbol = 0;
TSSymbol new_alias_symbol = 0;
iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start);
iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start);
if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches;
if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers;
if (
old_alias_symbol == new_alias_symbol &&
ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree)
) {
if (old_start == new_start &&
!ts_subtree_has_changes(old_tree) &&
ts_subtree_symbol(old_tree) != ts_builtin_sym_error &&
ts_subtree_size(old_tree).bytes == ts_subtree_size(new_tree).bytes &&
ts_subtree_parse_state(old_tree) != TS_TREE_STATE_NONE &&
ts_subtree_parse_state(new_tree) != TS_TREE_STATE_NONE &&
(ts_subtree_parse_state(old_tree) == ERROR_STATE) ==
(ts_subtree_parse_state(new_tree) == ERROR_STATE)) {
return IteratorMatches;
} else {
return IteratorMayDiffer;
}
}
return IteratorDiffers;
}
#ifdef DEBUG_GET_CHANGED_RANGES
static inline void iterator_print_state(Iterator *self) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
TSPoint start = iterator_start_position(self).extent;
TSPoint end = iterator_end_position(self).extent;
const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree));
printf(
"(%-25s %s\t depth:%u [%u, %u] - [%u, %u])",
name, self->in_padding ? "(p)" : " ",
self->visible_depth,
start.row + 1, start.column,
end.row + 1, end.column
);
}
#endif
unsigned ts_subtree_get_changed_ranges(
const Subtree *old_tree, const Subtree *new_tree,
TreeCursor *cursor1, TreeCursor *cursor2,
const TSLanguage *language,
const TSRangeArray *included_range_differences,
TSRange **ranges
) {
TSRangeArray results = array_new();
Iterator old_iter = iterator_new(cursor1, old_tree, language);
Iterator new_iter = iterator_new(cursor2, new_tree, language);
unsigned included_range_difference_index = 0;
Length position = iterator_start_position(&old_iter);
Length next_position = iterator_start_position(&new_iter);
if (position.bytes < next_position.bytes) {
ts_range_array_add(&results, position, next_position);
position = next_position;
} else if (position.bytes > next_position.bytes) {
ts_range_array_add(&results, next_position, position);
next_position = position;
}
do {
#ifdef DEBUG_GET_CHANGED_RANGES
printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column);
iterator_print_state(&old_iter);
printf("\tvs\t");
iterator_print_state(&new_iter);
puts("");
#endif
// Compare the old and new subtrees.
IteratorComparison comparison = iterator_compare(&old_iter, &new_iter);
// Even if the two subtrees appear to be identical, they could differ
// internally if they contain a range of text that was previously
// excluded from the parse, and is now included, or vice-versa.
if (comparison == IteratorMatches && ts_range_array_intersects(
included_range_differences,
included_range_difference_index,
position.bytes,
iterator_end_position(&old_iter).bytes
)) {
comparison = IteratorMayDiffer;
}
bool is_changed = false;
switch (comparison) {
// If the subtrees are definitely identical, move to the end
// of both subtrees.
case IteratorMatches:
next_position = iterator_end_position(&old_iter);
break;
// If the subtrees might differ internally, descend into both
// subtrees, finding the first child that spans the current position.
case IteratorMayDiffer:
if (iterator_descend(&old_iter, position.bytes)) {
if (!iterator_descend(&new_iter, position.bytes)) {
is_changed = true;
next_position = iterator_end_position(&old_iter);
}
} else if (iterator_descend(&new_iter, position.bytes)) {
is_changed = true;
next_position = iterator_end_position(&new_iter);
} else {
next_position = length_min(
iterator_end_position(&old_iter),
iterator_end_position(&new_iter)
);
}
break;
// If the subtrees are different, record a change and then move
// to the end of both subtrees.
case IteratorDiffers:
is_changed = true;
next_position = length_min(
iterator_end_position(&old_iter),
iterator_end_position(&new_iter)
);
break;
}
// Ensure that both iterators are caught up to the current position.
while (
!iterator_done(&old_iter) &&
iterator_end_position(&old_iter).bytes <= next_position.bytes
) iterator_advance(&old_iter);
while (
!iterator_done(&new_iter) &&
iterator_end_position(&new_iter).bytes <= next_position.bytes
) iterator_advance(&new_iter);
// Ensure that both iterators are at the same depth in the tree.
while (old_iter.visible_depth > new_iter.visible_depth) {
iterator_ascend(&old_iter);
}
while (new_iter.visible_depth > old_iter.visible_depth) {
iterator_ascend(&new_iter);
}
if (is_changed) {
#ifdef DEBUG_GET_CHANGED_RANGES
printf(
" change: [[%u, %u] - [%u, %u]]\n",
position.extent.row + 1, position.extent.column,
next_position.extent.row + 1, next_position.extent.column
);
#endif
ts_range_array_add(&results, position, next_position);
}
position = next_position;
// Keep track of the current position in the included range differences
// array in order to avoid scanning the entire array on each iteration.
while (included_range_difference_index < included_range_differences->size) {
const TSRange *range = &included_range_differences->contents[
included_range_difference_index
];
if (range->end_byte <= position.bytes) {
included_range_difference_index++;
} else {
break;
}
}
} while (!iterator_done(&old_iter) && !iterator_done(&new_iter));
Length old_size = ts_subtree_total_size(*old_tree);
Length new_size = ts_subtree_total_size(*new_tree);
if (old_size.bytes < new_size.bytes) {
ts_range_array_add(&results, old_size, new_size);
} else if (new_size.bytes < old_size.bytes) {
ts_range_array_add(&results, new_size, old_size);
}
*cursor1 = old_iter.cursor;
*cursor2 = new_iter.cursor;
*ranges = results.contents;
return results.size;
}

View File

@@ -0,0 +1,36 @@
#ifndef TREE_SITTER_GET_CHANGED_RANGES_H_
#define TREE_SITTER_GET_CHANGED_RANGES_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./tree_cursor.h"
#include "./subtree.h"
typedef Array(TSRange) TSRangeArray;
void ts_range_array_get_changed_ranges(
const TSRange *old_ranges, unsigned old_range_count,
const TSRange *new_ranges, unsigned new_range_count,
TSRangeArray *differences
);
bool ts_range_array_intersects(
const TSRangeArray *self, unsigned start_index,
uint32_t start_byte, uint32_t end_byte
);
unsigned ts_subtree_get_changed_ranges(
const Subtree *old_tree, const Subtree *new_tree,
TreeCursor *cursor1, TreeCursor *cursor2,
const TSLanguage *language,
const TSRangeArray *included_range_differences,
TSRange **ranges
);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_GET_CHANGED_RANGES_H_

21
vendor/tree-sitter/src/host.h vendored Normal file
View File

@@ -0,0 +1,21 @@
// Determine endian and pointer size based on known defines.
// TS_BIG_ENDIAN and TS_PTR_SIZE can be set as -D compiler arguments
// to override this.
#if !defined(TS_BIG_ENDIAN)
#if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) \
|| (defined( __APPLE_CC__) && (defined(__ppc__) || defined(__ppc64__)))
#define TS_BIG_ENDIAN 1
#else
#define TS_BIG_ENDIAN 0
#endif
#endif
#if !defined(TS_PTR_SIZE)
#if UINTPTR_MAX == 0xFFFFFFFF
#define TS_PTR_SIZE 32
#else
#define TS_PTR_SIZE 64
#endif
#endif

221
vendor/tree-sitter/src/language.c vendored Normal file
View File

@@ -0,0 +1,221 @@
#include "./language.h"
#include "./wasm_store.h"
#include "tree_sitter/api.h"
#include <string.h>
const TSLanguage *ts_language_copy(const TSLanguage *self) {
if (self && ts_language_is_wasm(self)) {
ts_wasm_language_retain(self);
}
return self;
}
void ts_language_delete(const TSLanguage *self) {
if (self && ts_language_is_wasm(self)) {
ts_wasm_language_release(self);
}
}
uint32_t ts_language_symbol_count(const TSLanguage *self) {
return self->symbol_count + self->alias_count;
}
uint32_t ts_language_state_count(const TSLanguage *self) {
return self->state_count;
}
uint32_t ts_language_version(const TSLanguage *self) {
return self->version;
}
uint32_t ts_language_field_count(const TSLanguage *self) {
return self->field_count;
}
void ts_language_table_entry(
const TSLanguage *self,
TSStateId state,
TSSymbol symbol,
TableEntry *result
) {
if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
result->action_count = 0;
result->is_reusable = false;
result->actions = NULL;
} else {
assert(symbol < self->token_count);
uint32_t action_index = ts_language_lookup(self, state, symbol);
const TSParseActionEntry *entry = &self->parse_actions[action_index];
result->action_count = entry->entry.count;
result->is_reusable = entry->entry.reusable;
result->actions = (const TSParseAction *)(entry + 1);
}
}
TSSymbolMetadata ts_language_symbol_metadata(
const TSLanguage *self,
TSSymbol symbol
) {
if (symbol == ts_builtin_sym_error) {
return (TSSymbolMetadata) {.visible = true, .named = true};
} else if (symbol == ts_builtin_sym_error_repeat) {
return (TSSymbolMetadata) {.visible = false, .named = false};
} else {
return self->symbol_metadata[symbol];
}
}
TSSymbol ts_language_public_symbol(
const TSLanguage *self,
TSSymbol symbol
) {
if (symbol == ts_builtin_sym_error) return symbol;
return self->public_symbol_map[symbol];
}
TSStateId ts_language_next_state(
const TSLanguage *self,
TSStateId state,
TSSymbol symbol
) {
if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
return 0;
} else if (symbol < self->token_count) {
uint32_t count;
const TSParseAction *actions = ts_language_actions(self, state, symbol, &count);
if (count > 0) {
TSParseAction action = actions[count - 1];
if (action.type == TSParseActionTypeShift) {
return action.shift.extra ? state : action.shift.state;
}
}
return 0;
} else {
return ts_language_lookup(self, state, symbol);
}
}
const char *ts_language_symbol_name(
const TSLanguage *self,
TSSymbol symbol
) {
if (symbol == ts_builtin_sym_error) {
return "ERROR";
} else if (symbol == ts_builtin_sym_error_repeat) {
return "_ERROR";
} else if (symbol < ts_language_symbol_count(self)) {
return self->symbol_names[symbol];
} else {
return NULL;
}
}
TSSymbol ts_language_symbol_for_name(
const TSLanguage *self,
const char *string,
uint32_t length,
bool is_named
) {
if (!strncmp(string, "ERROR", length)) return ts_builtin_sym_error;
uint16_t count = (uint16_t)ts_language_symbol_count(self);
for (TSSymbol i = 0; i < count; i++) {
TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i);
if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue;
const char *symbol_name = self->symbol_names[i];
if (!strncmp(symbol_name, string, length) && !symbol_name[length]) {
return self->public_symbol_map[i];
}
}
return 0;
}
TSSymbolType ts_language_symbol_type(
const TSLanguage *self,
TSSymbol symbol
) {
TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol);
if (metadata.named && metadata.visible) {
return TSSymbolTypeRegular;
} else if (metadata.visible) {
return TSSymbolTypeAnonymous;
} else {
return TSSymbolTypeAuxiliary;
}
}
const char *ts_language_field_name_for_id(
const TSLanguage *self,
TSFieldId id
) {
uint32_t count = ts_language_field_count(self);
if (count && id <= count) {
return self->field_names[id];
} else {
return NULL;
}
}
TSFieldId ts_language_field_id_for_name(
const TSLanguage *self,
const char *name,
uint32_t name_length
) {
uint16_t count = (uint16_t)ts_language_field_count(self);
for (TSSymbol i = 1; i < count + 1; i++) {
switch (strncmp(name, self->field_names[i], name_length)) {
case 0:
if (self->field_names[i][name_length] == 0) return i;
break;
case -1:
return 0;
default:
break;
}
}
return 0;
}
TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state) {
if (state >= self->state_count) return NULL;
LookaheadIterator *iterator = ts_malloc(sizeof(LookaheadIterator));
*iterator = ts_language_lookaheads(self, state);
return (TSLookaheadIterator *)iterator;
}
void ts_lookahead_iterator_delete(TSLookaheadIterator *self) {
ts_free(self);
}
bool ts_lookahead_iterator_reset_state(TSLookaheadIterator * self, TSStateId state) {
LookaheadIterator *iterator = (LookaheadIterator *)self;
if (state >= iterator->language->state_count) return false;
*iterator = ts_language_lookaheads(iterator->language, state);
return true;
}
const TSLanguage *ts_lookahead_iterator_language(const TSLookaheadIterator *self) {
const LookaheadIterator *iterator = (const LookaheadIterator *)self;
return iterator->language;
}
bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state) {
if (state >= language->state_count) return false;
LookaheadIterator *iterator = (LookaheadIterator *)self;
*iterator = ts_language_lookaheads(language, state);
return true;
}
bool ts_lookahead_iterator_next(TSLookaheadIterator *self) {
LookaheadIterator *iterator = (LookaheadIterator *)self;
return ts_lookahead_iterator__next(iterator);
}
TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self) {
const LookaheadIterator *iterator = (const LookaheadIterator *)self;
return iterator->symbol;
}
const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self) {
const LookaheadIterator *iterator = (const LookaheadIterator *)self;
return ts_language_symbol_name(iterator->language, iterator->symbol);
}

299
vendor/tree-sitter/src/language.h vendored Normal file
View File

@@ -0,0 +1,299 @@
#ifndef TREE_SITTER_LANGUAGE_H_
#define TREE_SITTER_LANGUAGE_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./subtree.h"
#include "./parser.h"
#define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
#define LANGUAGE_VERSION_WITH_PRIMARY_STATES 14
#define LANGUAGE_VERSION_USABLE_VIA_WASM 13
typedef struct {
const TSParseAction *actions;
uint32_t action_count;
bool is_reusable;
} TableEntry;
typedef struct {
const TSLanguage *language;
const uint16_t *data;
const uint16_t *group_end;
TSStateId state;
uint16_t table_value;
uint16_t section_index;
uint16_t group_count;
bool is_small_state;
const TSParseAction *actions;
TSSymbol symbol;
TSStateId next_state;
uint16_t action_count;
} LookaheadIterator;
void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry *);
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
TSSymbol ts_language_public_symbol(const TSLanguage *, TSSymbol);
TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol);
static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) {
return 0 < symbol && symbol < self->external_token_count + 1;
}
static inline const TSParseAction *ts_language_actions(
const TSLanguage *self,
TSStateId state,
TSSymbol symbol,
uint32_t *count
) {
TableEntry entry;
ts_language_table_entry(self, state, symbol, &entry);
*count = entry.action_count;
return entry.actions;
}
static inline bool ts_language_has_reduce_action(
const TSLanguage *self,
TSStateId state,
TSSymbol symbol
) {
TableEntry entry;
ts_language_table_entry(self, state, symbol, &entry);
return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce;
}
// Lookup the table value for a given symbol and state.
//
// For non-terminal symbols, the table value represents a successor state.
// For terminal symbols, it represents an index in the actions table.
// For 'large' parse states, this is a direct lookup. For 'small' parse
// states, this requires searching through the symbol groups to find
// the given symbol.
static inline uint16_t ts_language_lookup(
const TSLanguage *self,
TSStateId state,
TSSymbol symbol
) {
if (state >= self->large_state_count) {
uint32_t index = self->small_parse_table_map[state - self->large_state_count];
const uint16_t *data = &self->small_parse_table[index];
uint16_t group_count = *(data++);
for (unsigned i = 0; i < group_count; i++) {
uint16_t section_value = *(data++);
uint16_t symbol_count = *(data++);
for (unsigned j = 0; j < symbol_count; j++) {
if (*(data++) == symbol) return section_value;
}
}
return 0;
} else {
return self->parse_table[state * self->symbol_count + symbol];
}
}
static inline bool ts_language_has_actions(
const TSLanguage *self,
TSStateId state,
TSSymbol symbol
) {
return ts_language_lookup(self, state, symbol) != 0;
}
// Iterate over all of the symbols that are valid in the given state.
//
// For 'large' parse states, this just requires iterating through
// all possible symbols and checking the parse table for each one.
// For 'small' parse states, this exploits the structure of the
// table to only visit the valid symbols.
static inline LookaheadIterator ts_language_lookaheads(
const TSLanguage *self,
TSStateId state
) {
bool is_small_state = state >= self->large_state_count;
const uint16_t *data;
const uint16_t *group_end = NULL;
uint16_t group_count = 0;
if (is_small_state) {
uint32_t index = self->small_parse_table_map[state - self->large_state_count];
data = &self->small_parse_table[index];
group_end = data + 1;
group_count = *data;
} else {
data = &self->parse_table[state * self->symbol_count] - 1;
}
return (LookaheadIterator) {
.language = self,
.data = data,
.group_end = group_end,
.group_count = group_count,
.is_small_state = is_small_state,
.symbol = UINT16_MAX,
.next_state = 0,
};
}
static inline bool ts_lookahead_iterator__next(LookaheadIterator *self) {
// For small parse states, valid symbols are listed explicitly,
// grouped by their value. There's no need to look up the actions
// again until moving to the next group.
if (self->is_small_state) {
self->data++;
if (self->data == self->group_end) {
if (self->group_count == 0) return false;
self->group_count--;
self->table_value = *(self->data++);
unsigned symbol_count = *(self->data++);
self->group_end = self->data + symbol_count;
self->symbol = *self->data;
} else {
self->symbol = *self->data;
return true;
}
}
// For large parse states, iterate through every symbol until one
// is found that has valid actions.
else {
do {
self->data++;
self->symbol++;
if (self->symbol >= self->language->symbol_count) return false;
self->table_value = *self->data;
} while (!self->table_value);
}
// Depending on if the symbols is terminal or non-terminal, the table value either
// represents a list of actions or a successor state.
if (self->symbol < self->language->token_count) {
const TSParseActionEntry *entry = &self->language->parse_actions[self->table_value];
self->action_count = entry->entry.count;
self->actions = (const TSParseAction *)(entry + 1);
self->next_state = 0;
} else {
self->action_count = 0;
self->next_state = self->table_value;
}
return true;
}
// Whether the state is a "primary state". If this returns false, it indicates that there exists
// another state that behaves identically to this one with respect to query analysis.
static inline bool ts_language_state_is_primary(
const TSLanguage *self,
TSStateId state
) {
if (self->version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) {
return state == self->primary_state_ids[state];
} else {
return true;
}
}
static inline const bool *ts_language_enabled_external_tokens(
const TSLanguage *self,
unsigned external_scanner_state
) {
if (external_scanner_state == 0) {
return NULL;
} else {
return self->external_scanner.states + self->external_token_count * external_scanner_state;
}
}
static inline const TSSymbol *ts_language_alias_sequence(
const TSLanguage *self,
uint32_t production_id
) {
return production_id ?
&self->alias_sequences[production_id * self->max_alias_sequence_length] :
NULL;
}
static inline TSSymbol ts_language_alias_at(
const TSLanguage *self,
uint32_t production_id,
uint32_t child_index
) {
return production_id ?
self->alias_sequences[production_id * self->max_alias_sequence_length + child_index] :
0;
}
static inline void ts_language_field_map(
const TSLanguage *self,
uint32_t production_id,
const TSFieldMapEntry **start,
const TSFieldMapEntry **end
) {
if (self->field_count == 0) {
*start = NULL;
*end = NULL;
return;
}
TSFieldMapSlice slice = self->field_map_slices[production_id];
*start = &self->field_map_entries[slice.index];
*end = &self->field_map_entries[slice.index] + slice.length;
}
static inline void ts_language_aliases_for_symbol(
const TSLanguage *self,
TSSymbol original_symbol,
const TSSymbol **start,
const TSSymbol **end
) {
*start = &self->public_symbol_map[original_symbol];
*end = *start + 1;
unsigned idx = 0;
for (;;) {
TSSymbol symbol = self->alias_map[idx++];
if (symbol == 0 || symbol > original_symbol) break;
uint16_t count = self->alias_map[idx++];
if (symbol == original_symbol) {
*start = &self->alias_map[idx];
*end = &self->alias_map[idx + count];
break;
}
idx += count;
}
}
static inline void ts_language_write_symbol_as_dot_string(
const TSLanguage *self,
FILE *f,
TSSymbol symbol
) {
const char *name = ts_language_symbol_name(self, symbol);
for (const char *chr = name; *chr; chr++) {
switch (*chr) {
case '"':
case '\\':
fputc('\\', f);
fputc(*chr, f);
break;
case '\n':
fputs("\\n", f);
break;
case '\t':
fputs("\\t", f);
break;
default:
fputc(*chr, f);
break;
}
}
}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_LANGUAGE_H_

52
vendor/tree-sitter/src/length.h vendored Normal file
View File

@@ -0,0 +1,52 @@
#ifndef TREE_SITTER_LENGTH_H_
#define TREE_SITTER_LENGTH_H_
#include <stdlib.h>
#include <stdbool.h>
#include "./point.h"
#include "tree_sitter/api.h"
typedef struct {
uint32_t bytes;
TSPoint extent;
} Length;
static const Length LENGTH_UNDEFINED = {0, {0, 1}};
static const Length LENGTH_MAX = {UINT32_MAX, {UINT32_MAX, UINT32_MAX}};
static inline bool length_is_undefined(Length length) {
return length.bytes == 0 && length.extent.column != 0;
}
static inline Length length_min(Length len1, Length len2) {
return (len1.bytes < len2.bytes) ? len1 : len2;
}
static inline Length length_add(Length len1, Length len2) {
Length result;
result.bytes = len1.bytes + len2.bytes;
result.extent = point_add(len1.extent, len2.extent);
return result;
}
static inline Length length_sub(Length len1, Length len2) {
Length result;
result.bytes = len1.bytes - len2.bytes;
result.extent = point_sub(len1.extent, len2.extent);
return result;
}
static inline Length length_zero(void) {
Length result = {0, {0, 0}};
return result;
}
static inline Length length_saturating_sub(Length len1, Length len2) {
if (len1.bytes > len2.bytes) {
return length_sub(len1, len2);
} else {
return length_zero();
}
}
#endif

419
vendor/tree-sitter/src/lexer.c vendored Normal file
View File

@@ -0,0 +1,419 @@
#include <stdio.h>
#include "./lexer.h"
#include "./subtree.h"
#include "./length.h"
#include "./unicode.h"
#define LOG(message, character) \
if (self->logger.log) { \
snprintf( \
self->debug_buffer, \
TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \
32 <= character && character < 127 ? \
message " character:'%c'" : \
message " character:%d", \
character \
); \
self->logger.log( \
self->logger.payload, \
TSLogTypeLex, \
self->debug_buffer \
); \
}
static const int32_t BYTE_ORDER_MARK = 0xFEFF;
static const TSRange DEFAULT_RANGE = {
.start_point = {
.row = 0,
.column = 0,
},
.end_point = {
.row = UINT32_MAX,
.column = UINT32_MAX,
},
.start_byte = 0,
.end_byte = UINT32_MAX
};
// Check if the lexer has reached EOF. This state is stored
// by setting the lexer's `current_included_range_index` such that
// it has consumed all of its available ranges.
static bool ts_lexer__eof(const TSLexer *_self) {
Lexer *self = (Lexer *)_self;
return self->current_included_range_index == self->included_range_count;
}
// Clear the currently stored chunk of source code, because the lexer's
// position has changed.
static void ts_lexer__clear_chunk(Lexer *self) {
self->chunk = NULL;
self->chunk_size = 0;
self->chunk_start = 0;
}
// Call the lexer's input callback to obtain a new chunk of source code
// for the current position.
static void ts_lexer__get_chunk(Lexer *self) {
self->chunk_start = self->current_position.bytes;
self->chunk = self->input.read(
self->input.payload,
self->current_position.bytes,
self->current_position.extent,
&self->chunk_size
);
if (!self->chunk_size) {
self->current_included_range_index = self->included_range_count;
self->chunk = NULL;
}
}
// Decode the next unicode character in the current chunk of source code.
// This assumes that the lexer has already retrieved a chunk of source
// code that spans the current position.
static void ts_lexer__get_lookahead(Lexer *self) {
uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start;
uint32_t size = self->chunk_size - position_in_chunk;
if (size == 0) {
self->lookahead_size = 1;
self->data.lookahead = '\0';
return;
}
const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
UnicodeDecodeFunction decode = self->input.encoding == TSInputEncodingUTF8
? ts_decode_utf8
: ts_decode_utf16;
self->lookahead_size = decode(chunk, size, &self->data.lookahead);
// If this chunk ended in the middle of a multi-byte character,
// try again with a fresh chunk.
if (self->data.lookahead == TS_DECODE_ERROR && size < 4) {
ts_lexer__get_chunk(self);
chunk = (const uint8_t *)self->chunk;
size = self->chunk_size;
self->lookahead_size = decode(chunk, size, &self->data.lookahead);
}
if (self->data.lookahead == TS_DECODE_ERROR) {
self->lookahead_size = 1;
}
}
static void ts_lexer_goto(Lexer *self, Length position) {
self->current_position = position;
// Move to the first valid position at or after the given position.
bool found_included_range = false;
for (unsigned i = 0; i < self->included_range_count; i++) {
TSRange *included_range = &self->included_ranges[i];
if (
included_range->end_byte > self->current_position.bytes &&
included_range->end_byte > included_range->start_byte
) {
if (included_range->start_byte >= self->current_position.bytes) {
self->current_position = (Length) {
.bytes = included_range->start_byte,
.extent = included_range->start_point,
};
}
self->current_included_range_index = i;
found_included_range = true;
break;
}
}
if (found_included_range) {
// If the current position is outside of the current chunk of text,
// then clear out the current chunk of text.
if (self->chunk && (
self->current_position.bytes < self->chunk_start ||
self->current_position.bytes >= self->chunk_start + self->chunk_size
)) {
ts_lexer__clear_chunk(self);
}
self->lookahead_size = 0;
self->data.lookahead = '\0';
}
// If the given position is beyond any of included ranges, move to the EOF
// state - past the end of the included ranges.
else {
self->current_included_range_index = self->included_range_count;
TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
self->current_position = (Length) {
.bytes = last_included_range->end_byte,
.extent = last_included_range->end_point,
};
ts_lexer__clear_chunk(self);
self->lookahead_size = 1;
self->data.lookahead = '\0';
}
}
// Intended to be called only from functions that control logging.
static void ts_lexer__do_advance(Lexer *self, bool skip) {
if (self->lookahead_size) {
self->current_position.bytes += self->lookahead_size;
if (self->data.lookahead == '\n') {
self->current_position.extent.row++;
self->current_position.extent.column = 0;
} else {
self->current_position.extent.column += self->lookahead_size;
}
}
const TSRange *current_range = &self->included_ranges[self->current_included_range_index];
while (
self->current_position.bytes >= current_range->end_byte ||
current_range->end_byte == current_range->start_byte
) {
if (self->current_included_range_index < self->included_range_count) {
self->current_included_range_index++;
}
if (self->current_included_range_index < self->included_range_count) {
current_range++;
self->current_position = (Length) {
current_range->start_byte,
current_range->start_point,
};
} else {
current_range = NULL;
break;
}
}
if (skip) self->token_start_position = self->current_position;
if (current_range) {
if (
self->current_position.bytes < self->chunk_start ||
self->current_position.bytes >= self->chunk_start + self->chunk_size
) {
ts_lexer__get_chunk(self);
}
ts_lexer__get_lookahead(self);
} else {
ts_lexer__clear_chunk(self);
self->data.lookahead = '\0';
self->lookahead_size = 1;
}
}
// Advance to the next character in the source code, retrieving a new
// chunk of source code if needed.
static void ts_lexer__advance(TSLexer *_self, bool skip) {
Lexer *self = (Lexer *)_self;
if (!self->chunk) return;
if (skip) {
LOG("skip", self->data.lookahead)
} else {
LOG("consume", self->data.lookahead)
}
ts_lexer__do_advance(self, skip);
}
// Mark that a token match has completed. This can be called multiple
// times if a longer match is found later.
static void ts_lexer__mark_end(TSLexer *_self) {
Lexer *self = (Lexer *)_self;
if (!ts_lexer__eof(&self->data)) {
// If the lexer is right at the beginning of included range,
// then the token should be considered to end at the *end* of the
// previous included range, rather than here.
TSRange *current_included_range = &self->included_ranges[
self->current_included_range_index
];
if (
self->current_included_range_index > 0 &&
self->current_position.bytes == current_included_range->start_byte
) {
TSRange *previous_included_range = current_included_range - 1;
self->token_end_position = (Length) {
previous_included_range->end_byte,
previous_included_range->end_point,
};
return;
}
}
self->token_end_position = self->current_position;
}
static uint32_t ts_lexer__get_column(TSLexer *_self) {
Lexer *self = (Lexer *)_self;
uint32_t goal_byte = self->current_position.bytes;
self->did_get_column = true;
self->current_position.bytes -= self->current_position.extent.column;
self->current_position.extent.column = 0;
if (self->current_position.bytes < self->chunk_start) {
ts_lexer__get_chunk(self);
}
uint32_t result = 0;
if (!ts_lexer__eof(_self)) {
ts_lexer__get_lookahead(self);
while (self->current_position.bytes < goal_byte && self->chunk) {
result++;
ts_lexer__do_advance(self, false);
if (ts_lexer__eof(_self)) break;
}
}
return result;
}
// Is the lexer at a boundary between two disjoint included ranges of
// source code? This is exposed as an API because some languages' external
// scanners need to perform custom actions at these boundaries.
static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) {
const Lexer *self = (const Lexer *)_self;
if (self->current_included_range_index < self->included_range_count) {
TSRange *current_range = &self->included_ranges[self->current_included_range_index];
return self->current_position.bytes == current_range->start_byte;
} else {
return false;
}
}
void ts_lexer_init(Lexer *self) {
*self = (Lexer) {
.data = {
// The lexer's methods are stored as struct fields so that generated
// parsers can call them without needing to be linked against this
// library.
.advance = ts_lexer__advance,
.mark_end = ts_lexer__mark_end,
.get_column = ts_lexer__get_column,
.is_at_included_range_start = ts_lexer__is_at_included_range_start,
.eof = ts_lexer__eof,
.lookahead = 0,
.result_symbol = 0,
},
.chunk = NULL,
.chunk_size = 0,
.chunk_start = 0,
.current_position = {0, {0, 0}},
.logger = {
.payload = NULL,
.log = NULL
},
.included_ranges = NULL,
.included_range_count = 0,
.current_included_range_index = 0,
};
ts_lexer_set_included_ranges(self, NULL, 0);
}
void ts_lexer_delete(Lexer *self) {
ts_free(self->included_ranges);
}
void ts_lexer_set_input(Lexer *self, TSInput input) {
self->input = input;
ts_lexer__clear_chunk(self);
ts_lexer_goto(self, self->current_position);
}
// Move the lexer to the given position. This doesn't do any work
// if the parser is already at the given position.
void ts_lexer_reset(Lexer *self, Length position) {
if (position.bytes != self->current_position.bytes) {
ts_lexer_goto(self, position);
}
}
void ts_lexer_start(Lexer *self) {
self->token_start_position = self->current_position;
self->token_end_position = LENGTH_UNDEFINED;
self->data.result_symbol = 0;
self->did_get_column = false;
if (!ts_lexer__eof(&self->data)) {
if (!self->chunk_size) ts_lexer__get_chunk(self);
if (!self->lookahead_size) ts_lexer__get_lookahead(self);
if (
self->current_position.bytes == 0 &&
self->data.lookahead == BYTE_ORDER_MARK
) ts_lexer__advance(&self->data, true);
}
}
void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) {
if (length_is_undefined(self->token_end_position)) {
ts_lexer__mark_end(&self->data);
}
// If the token ended at an included range boundary, then its end position
// will have been reset to the end of the preceding range. Reset the start
// position to match.
if (self->token_end_position.bytes < self->token_start_position.bytes) {
self->token_start_position = self->token_end_position;
}
uint32_t current_lookahead_end_byte = self->current_position.bytes + 1;
// In order to determine that a byte sequence is invalid UTF8 or UTF16,
// the character decoding algorithm may have looked at the following byte.
// Therefore, the next byte *after* the current (invalid) character
// affects the interpretation of the current character.
if (self->data.lookahead == TS_DECODE_ERROR) {
current_lookahead_end_byte++;
}
if (current_lookahead_end_byte > *lookahead_end_byte) {
*lookahead_end_byte = current_lookahead_end_byte;
}
}
void ts_lexer_advance_to_end(Lexer *self) {
while (self->chunk) {
ts_lexer__advance(&self->data, false);
}
}
void ts_lexer_mark_end(Lexer *self) {
ts_lexer__mark_end(&self->data);
}
bool ts_lexer_set_included_ranges(
Lexer *self,
const TSRange *ranges,
uint32_t count
) {
if (count == 0 || !ranges) {
ranges = &DEFAULT_RANGE;
count = 1;
} else {
uint32_t previous_byte = 0;
for (unsigned i = 0; i < count; i++) {
const TSRange *range = &ranges[i];
if (
range->start_byte < previous_byte ||
range->end_byte < range->start_byte
) return false;
previous_byte = range->end_byte;
}
}
size_t size = count * sizeof(TSRange);
self->included_ranges = ts_realloc(self->included_ranges, size);
memcpy(self->included_ranges, ranges, size);
self->included_range_count = count;
ts_lexer_goto(self, self->current_position);
return true;
}
TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) {
*count = self->included_range_count;
return self->included_ranges;
}
#undef LOG

49
vendor/tree-sitter/src/lexer.h vendored Normal file
View File

@@ -0,0 +1,49 @@
#ifndef TREE_SITTER_LEXER_H_
#define TREE_SITTER_LEXER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./length.h"
#include "./subtree.h"
#include "tree_sitter/api.h"
#include "./parser.h"
typedef struct {
TSLexer data;
Length current_position;
Length token_start_position;
Length token_end_position;
TSRange *included_ranges;
const char *chunk;
TSInput input;
TSLogger logger;
uint32_t included_range_count;
uint32_t current_included_range_index;
uint32_t chunk_start;
uint32_t chunk_size;
uint32_t lookahead_size;
bool did_get_column;
char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE];
} Lexer;
void ts_lexer_init(Lexer *);
void ts_lexer_delete(Lexer *);
void ts_lexer_set_input(Lexer *, TSInput);
void ts_lexer_reset(Lexer *, Length);
void ts_lexer_start(Lexer *);
void ts_lexer_finish(Lexer *, uint32_t *);
void ts_lexer_advance_to_end(Lexer *);
void ts_lexer_mark_end(Lexer *);
bool ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count);
TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_LEXER_H_

14
vendor/tree-sitter/src/lib.c vendored Normal file
View File

@@ -0,0 +1,14 @@
#define _POSIX_C_SOURCE 200112L
#include "./alloc.c"
#include "./get_changed_ranges.c"
#include "./language.c"
#include "./lexer.c"
#include "./node.c"
#include "./parser.c"
#include "./query.c"
#include "./stack.c"
#include "./subtree.c"
#include "./tree_cursor.c"
#include "./tree.c"
#include "./wasm_store.c"

774
vendor/tree-sitter/src/node.c vendored Normal file
View File

@@ -0,0 +1,774 @@
#include <stdbool.h>
#include "./subtree.h"
#include "./tree.h"
#include "./language.h"
typedef struct {
Subtree parent;
const TSTree *tree;
Length position;
uint32_t child_index;
uint32_t structural_child_index;
const TSSymbol *alias_sequence;
} NodeChildIterator;
// TSNode - constructors
TSNode ts_node_new(
const TSTree *tree,
const Subtree *subtree,
Length position,
TSSymbol alias
) {
return (TSNode) {
{position.bytes, position.extent.row, position.extent.column, alias},
subtree,
tree,
};
}
static inline TSNode ts_node__null(void) {
return ts_node_new(NULL, NULL, length_zero(), 0);
}
// TSNode - accessors
uint32_t ts_node_start_byte(TSNode self) {
return self.context[0];
}
TSPoint ts_node_start_point(TSNode self) {
return (TSPoint) {self.context[1], self.context[2]};
}
static inline uint32_t ts_node__alias(const TSNode *self) {
return self->context[3];
}
static inline Subtree ts_node__subtree(TSNode self) {
return *(const Subtree *)self.id;
}
// NodeChildIterator
static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) {
Subtree subtree = ts_node__subtree(*node);
if (ts_subtree_child_count(subtree) == 0) {
return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL};
}
const TSSymbol *alias_sequence = ts_language_alias_sequence(
node->tree->language,
subtree.ptr->production_id
);
return (NodeChildIterator) {
.tree = node->tree,
.parent = subtree,
.position = {ts_node_start_byte(*node), ts_node_start_point(*node)},
.child_index = 0,
.structural_child_index = 0,
.alias_sequence = alias_sequence,
};
}
static inline bool ts_node_child_iterator_done(NodeChildIterator *self) {
return self->child_index == self->parent.ptr->child_count;
}
static inline bool ts_node_child_iterator_next(
NodeChildIterator *self,
TSNode *result
) {
if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false;
const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
TSSymbol alias_symbol = 0;
if (!ts_subtree_extra(*child)) {
if (self->alias_sequence) {
alias_symbol = self->alias_sequence[self->structural_child_index];
}
self->structural_child_index++;
}
if (self->child_index > 0) {
self->position = length_add(self->position, ts_subtree_padding(*child));
}
*result = ts_node_new(
self->tree,
child,
self->position,
alias_symbol
);
self->position = length_add(self->position, ts_subtree_size(*child));
self->child_index++;
return true;
}
// TSNode - private
static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) {
Subtree tree = ts_node__subtree(self);
if (include_anonymous) {
return ts_subtree_visible(tree) || ts_node__alias(&self);
} else {
TSSymbol alias = ts_node__alias(&self);
if (alias) {
return ts_language_symbol_metadata(self.tree->language, alias).named;
} else {
return ts_subtree_visible(tree) && ts_subtree_named(tree);
}
}
}
static inline uint32_t ts_node__relevant_child_count(
TSNode self,
bool include_anonymous
) {
Subtree tree = ts_node__subtree(self);
if (ts_subtree_child_count(tree) > 0) {
if (include_anonymous) {
return tree.ptr->visible_child_count;
} else {
return tree.ptr->named_child_count;
}
} else {
return 0;
}
}
static inline TSNode ts_node__child(
TSNode self,
uint32_t child_index,
bool include_anonymous
) {
TSNode result = self;
bool did_descend = true;
while (did_descend) {
did_descend = false;
TSNode child;
uint32_t index = 0;
NodeChildIterator iterator = ts_node_iterate_children(&result);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (ts_node__is_relevant(child, include_anonymous)) {
if (index == child_index) {
return child;
}
index++;
} else {
uint32_t grandchild_index = child_index - index;
uint32_t grandchild_count = ts_node__relevant_child_count(child, include_anonymous);
if (grandchild_index < grandchild_count) {
did_descend = true;
result = child;
child_index = grandchild_index;
break;
}
index += grandchild_count;
}
}
}
return ts_node__null();
}
static bool ts_subtree_has_trailing_empty_descendant(
Subtree self,
Subtree other
) {
for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) {
Subtree child = ts_subtree_children(self)[i];
if (ts_subtree_total_bytes(child) > 0) break;
if (child.ptr == other.ptr || ts_subtree_has_trailing_empty_descendant(child, other)) {
return true;
}
}
return false;
}
static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) {
Subtree self_subtree = ts_node__subtree(self);
bool self_is_empty = ts_subtree_total_bytes(self_subtree) == 0;
uint32_t target_end_byte = ts_node_end_byte(self);
TSNode node = ts_node_parent(self);
TSNode earlier_node = ts_node__null();
bool earlier_node_is_relevant = false;
while (!ts_node_is_null(node)) {
TSNode earlier_child = ts_node__null();
bool earlier_child_is_relevant = false;
bool found_child_containing_target = false;
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (child.id == self.id) break;
if (iterator.position.bytes > target_end_byte) {
found_child_containing_target = true;
break;
}
if (iterator.position.bytes == target_end_byte &&
(!self_is_empty ||
ts_subtree_has_trailing_empty_descendant(ts_node__subtree(child), self_subtree))) {
found_child_containing_target = true;
break;
}
if (ts_node__is_relevant(child, include_anonymous)) {
earlier_child = child;
earlier_child_is_relevant = true;
} else if (ts_node__relevant_child_count(child, include_anonymous) > 0) {
earlier_child = child;
earlier_child_is_relevant = false;
}
}
if (found_child_containing_target) {
if (!ts_node_is_null(earlier_child)) {
earlier_node = earlier_child;
earlier_node_is_relevant = earlier_child_is_relevant;
}
node = child;
} else if (earlier_child_is_relevant) {
return earlier_child;
} else if (!ts_node_is_null(earlier_child)) {
node = earlier_child;
} else if (earlier_node_is_relevant) {
return earlier_node;
} else {
node = earlier_node;
earlier_node = ts_node__null();
earlier_node_is_relevant = false;
}
}
return ts_node__null();
}
static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) {
uint32_t target_end_byte = ts_node_end_byte(self);
TSNode node = ts_node_parent(self);
TSNode later_node = ts_node__null();
bool later_node_is_relevant = false;
while (!ts_node_is_null(node)) {
TSNode later_child = ts_node__null();
bool later_child_is_relevant = false;
TSNode child_containing_target = ts_node__null();
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (iterator.position.bytes < target_end_byte) continue;
if (ts_node_start_byte(child) <= ts_node_start_byte(self)) {
if (ts_node__subtree(child).ptr != ts_node__subtree(self).ptr) {
child_containing_target = child;
}
} else if (ts_node__is_relevant(child, include_anonymous)) {
later_child = child;
later_child_is_relevant = true;
break;
} else if (ts_node__relevant_child_count(child, include_anonymous) > 0) {
later_child = child;
later_child_is_relevant = false;
break;
}
}
if (!ts_node_is_null(child_containing_target)) {
if (!ts_node_is_null(later_child)) {
later_node = later_child;
later_node_is_relevant = later_child_is_relevant;
}
node = child_containing_target;
} else if (later_child_is_relevant) {
return later_child;
} else if (!ts_node_is_null(later_child)) {
node = later_child;
} else if (later_node_is_relevant) {
return later_node;
} else {
node = later_node;
}
}
return ts_node__null();
}
static inline TSNode ts_node__first_child_for_byte(
TSNode self,
uint32_t goal,
bool include_anonymous
) {
TSNode node = self;
bool did_descend = true;
while (did_descend) {
did_descend = false;
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (ts_node_end_byte(child) > goal) {
if (ts_node__is_relevant(child, include_anonymous)) {
return child;
} else if (ts_node_child_count(child) > 0) {
did_descend = true;
node = child;
break;
}
}
}
}
return ts_node__null();
}
static inline TSNode ts_node__descendant_for_byte_range(
TSNode self,
uint32_t range_start,
uint32_t range_end,
bool include_anonymous
) {
TSNode node = self;
TSNode last_visible_node = self;
bool did_descend = true;
while (did_descend) {
did_descend = false;
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
uint32_t node_end = iterator.position.bytes;
// The end of this node must extend far enough forward to touch
// the end of the range and exceed the start of the range.
if (node_end < range_end) continue;
if (node_end <= range_start) continue;
// The start of this node must extend far enough backward to
// touch the start of the range.
if (range_start < ts_node_start_byte(child)) break;
node = child;
if (ts_node__is_relevant(node, include_anonymous)) {
last_visible_node = node;
}
did_descend = true;
break;
}
}
return last_visible_node;
}
static inline TSNode ts_node__descendant_for_point_range(
TSNode self,
TSPoint range_start,
TSPoint range_end,
bool include_anonymous
) {
TSNode node = self;
TSNode last_visible_node = self;
bool did_descend = true;
while (did_descend) {
did_descend = false;
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
TSPoint node_end = iterator.position.extent;
// The end of this node must extend far enough forward to touch
// the end of the range and exceed the start of the range.
if (point_lt(node_end, range_end)) continue;
if (point_lte(node_end, range_start)) continue;
// The start of this node must extend far enough backward to
// touch the start of the range.
if (point_lt(range_start, ts_node_start_point(child))) break;
node = child;
if (ts_node__is_relevant(node, include_anonymous)) {
last_visible_node = node;
}
did_descend = true;
break;
}
}
return last_visible_node;
}
// TSNode - public
uint32_t ts_node_end_byte(TSNode self) {
return ts_node_start_byte(self) + ts_subtree_size(ts_node__subtree(self)).bytes;
}
TSPoint ts_node_end_point(TSNode self) {
return point_add(ts_node_start_point(self), ts_subtree_size(ts_node__subtree(self)).extent);
}
TSSymbol ts_node_symbol(TSNode self) {
TSSymbol symbol = ts_node__alias(&self);
if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self));
return ts_language_public_symbol(self.tree->language, symbol);
}
const char *ts_node_type(TSNode self) {
TSSymbol symbol = ts_node__alias(&self);
if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self));
return ts_language_symbol_name(self.tree->language, symbol);
}
const TSLanguage *ts_node_language(TSNode self) {
return self.tree->language;
}
TSSymbol ts_node_grammar_symbol(TSNode self) {
return ts_subtree_symbol(ts_node__subtree(self));
}
const char *ts_node_grammar_type(TSNode self) {
TSSymbol symbol = ts_subtree_symbol(ts_node__subtree(self));
return ts_language_symbol_name(self.tree->language, symbol);
}
char *ts_node_string(TSNode self) {
TSSymbol alias_symbol = ts_node__alias(&self);
return ts_subtree_string(
ts_node__subtree(self),
alias_symbol,
ts_language_symbol_metadata(self.tree->language, alias_symbol).visible,
self.tree->language,
false
);
}
bool ts_node_eq(TSNode self, TSNode other) {
return self.tree == other.tree && self.id == other.id;
}
bool ts_node_is_null(TSNode self) {
return self.id == 0;
}
bool ts_node_is_extra(TSNode self) {
return ts_subtree_extra(ts_node__subtree(self));
}
bool ts_node_is_named(TSNode self) {
TSSymbol alias = ts_node__alias(&self);
return alias
? ts_language_symbol_metadata(self.tree->language, alias).named
: ts_subtree_named(ts_node__subtree(self));
}
bool ts_node_is_missing(TSNode self) {
return ts_subtree_missing(ts_node__subtree(self));
}
bool ts_node_has_changes(TSNode self) {
return ts_subtree_has_changes(ts_node__subtree(self));
}
bool ts_node_has_error(TSNode self) {
return ts_subtree_error_cost(ts_node__subtree(self)) > 0;
}
bool ts_node_is_error(TSNode self) {
TSSymbol symbol = ts_node_symbol(self);
return symbol == ts_builtin_sym_error;
}
uint32_t ts_node_descendant_count(TSNode self) {
return ts_subtree_visible_descendant_count(ts_node__subtree(self)) + 1;
}
TSStateId ts_node_parse_state(TSNode self) {
return ts_subtree_parse_state(ts_node__subtree(self));
}
TSStateId ts_node_next_parse_state(TSNode self) {
const TSLanguage *language = self.tree->language;
uint16_t state = ts_node_parse_state(self);
if (state == TS_TREE_STATE_NONE) {
return TS_TREE_STATE_NONE;
}
uint16_t symbol = ts_node_grammar_symbol(self);
return ts_language_next_state(language, state, symbol);
}
TSNode ts_node_parent(TSNode self) {
TSNode node = ts_tree_root_node(self.tree);
uint32_t end_byte = ts_node_end_byte(self);
if (node.id == self.id) return ts_node__null();
TSNode last_visible_node = node;
bool did_descend = true;
while (did_descend) {
did_descend = false;
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (
ts_node_start_byte(child) > ts_node_start_byte(self) ||
child.id == self.id
) break;
if (iterator.position.bytes >= end_byte && ts_node_child_count(child) > 0) {
node = child;
if (ts_node__is_relevant(child, true)) {
last_visible_node = node;
}
did_descend = true;
break;
}
}
}
return last_visible_node;
}
TSNode ts_node_child(TSNode self, uint32_t child_index) {
return ts_node__child(self, child_index, true);
}
TSNode ts_node_named_child(TSNode self, uint32_t child_index) {
return ts_node__child(self, child_index, false);
}
TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) {
recur:
if (!field_id || ts_node_child_count(self) == 0) return ts_node__null();
const TSFieldMapEntry *field_map, *field_map_end;
ts_language_field_map(
self.tree->language,
ts_node__subtree(self).ptr->production_id,
&field_map,
&field_map_end
);
if (field_map == field_map_end) return ts_node__null();
// The field mappings are sorted by their field id. Scan all
// the mappings to find the ones for the given field id.
while (field_map->field_id < field_id) {
field_map++;
if (field_map == field_map_end) return ts_node__null();
}
while (field_map_end[-1].field_id > field_id) {
field_map_end--;
if (field_map == field_map_end) return ts_node__null();
}
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&self);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (!ts_subtree_extra(ts_node__subtree(child))) {
uint32_t index = iterator.structural_child_index - 1;
if (index < field_map->child_index) continue;
// Hidden nodes' fields are "inherited" by their visible parent.
if (field_map->inherited) {
// If this is the *last* possible child node for this field,
// then perform a tail call to avoid recursion.
if (field_map + 1 == field_map_end) {
self = child;
goto recur;
}
// Otherwise, descend into this child, but if it doesn't contain
// the field, continue searching subsequent children.
else {
TSNode result = ts_node_child_by_field_id(child, field_id);
if (result.id) return result;
field_map++;
if (field_map == field_map_end) return ts_node__null();
}
}
else if (ts_node__is_relevant(child, true)) {
return child;
}
// If the field refers to a hidden node with visible children,
// return the first visible child.
else if (ts_node_child_count(child) > 0 ) {
return ts_node_child(child, 0);
}
// Otherwise, continue searching subsequent children.
else {
field_map++;
if (field_map == field_map_end) return ts_node__null();
}
}
}
return ts_node__null();
}
static inline const char *ts_node__field_name_from_language(TSNode self, uint32_t structural_child_index) {
const TSFieldMapEntry *field_map, *field_map_end;
ts_language_field_map(
self.tree->language,
ts_node__subtree(self).ptr->production_id,
&field_map,
&field_map_end
);
for (; field_map != field_map_end; field_map++) {
if (!field_map->inherited && field_map->child_index == structural_child_index) {
return self.tree->language->field_names[field_map->field_id];
}
}
return NULL;
}
const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index) {
TSNode result = self;
bool did_descend = true;
const char *inherited_field_name = NULL;
while (did_descend) {
did_descend = false;
TSNode child;
uint32_t index = 0;
NodeChildIterator iterator = ts_node_iterate_children(&result);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (ts_node__is_relevant(child, true)) {
if (index == child_index) {
const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1);
if (field_name) return field_name;
return inherited_field_name;
}
index++;
} else {
uint32_t grandchild_index = child_index - index;
uint32_t grandchild_count = ts_node__relevant_child_count(child, true);
if (grandchild_index < grandchild_count) {
const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1);
if (field_name) inherited_field_name = field_name;
did_descend = true;
result = child;
child_index = grandchild_index;
break;
}
index += grandchild_count;
}
}
}
return NULL;
}
TSNode ts_node_child_by_field_name(
TSNode self,
const char *name,
uint32_t name_length
) {
TSFieldId field_id = ts_language_field_id_for_name(
self.tree->language,
name,
name_length
);
return ts_node_child_by_field_id(self, field_id);
}
uint32_t ts_node_child_count(TSNode self) {
Subtree tree = ts_node__subtree(self);
if (ts_subtree_child_count(tree) > 0) {
return tree.ptr->visible_child_count;
} else {
return 0;
}
}
uint32_t ts_node_named_child_count(TSNode self) {
Subtree tree = ts_node__subtree(self);
if (ts_subtree_child_count(tree) > 0) {
return tree.ptr->named_child_count;
} else {
return 0;
}
}
TSNode ts_node_next_sibling(TSNode self) {
return ts_node__next_sibling(self, true);
}
TSNode ts_node_next_named_sibling(TSNode self) {
return ts_node__next_sibling(self, false);
}
TSNode ts_node_prev_sibling(TSNode self) {
return ts_node__prev_sibling(self, true);
}
TSNode ts_node_prev_named_sibling(TSNode self) {
return ts_node__prev_sibling(self, false);
}
TSNode ts_node_first_child_for_byte(TSNode self, uint32_t byte) {
return ts_node__first_child_for_byte(self, byte, true);
}
TSNode ts_node_first_named_child_for_byte(TSNode self, uint32_t byte) {
return ts_node__first_child_for_byte(self, byte, false);
}
TSNode ts_node_descendant_for_byte_range(
TSNode self,
uint32_t start,
uint32_t end
) {
return ts_node__descendant_for_byte_range(self, start, end, true);
}
TSNode ts_node_named_descendant_for_byte_range(
TSNode self,
uint32_t start,
uint32_t end
) {
return ts_node__descendant_for_byte_range(self, start, end, false);
}
TSNode ts_node_descendant_for_point_range(
TSNode self,
TSPoint start,
TSPoint end
) {
return ts_node__descendant_for_point_range(self, start, end, true);
}
TSNode ts_node_named_descendant_for_point_range(
TSNode self,
TSPoint start,
TSPoint end
) {
return ts_node__descendant_for_point_range(self, start, end, false);
}
void ts_node_edit(TSNode *self, const TSInputEdit *edit) {
uint32_t start_byte = ts_node_start_byte(*self);
TSPoint start_point = ts_node_start_point(*self);
if (start_byte >= edit->old_end_byte) {
start_byte = edit->new_end_byte + (start_byte - edit->old_end_byte);
start_point = point_add(edit->new_end_point, point_sub(start_point, edit->old_end_point));
} else if (start_byte > edit->start_byte) {
start_byte = edit->new_end_byte;
start_point = edit->new_end_point;
}
self->context[0] = start_byte;
self->context[1] = start_point.row;
self->context[2] = start_point.column;
}

2151
vendor/tree-sitter/src/parser.c vendored Normal file

File diff suppressed because it is too large Load Diff

265
vendor/tree-sitter/src/parser.h vendored Normal file
View File

@@ -0,0 +1,265 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
typedef struct {
int32_t start;
int32_t end;
} TSCharacterRange;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
};
static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
uint32_t index = 0;
uint32_t size = len - index;
while (size > 1) {
uint32_t half_size = size / 2;
uint32_t mid_index = index + half_size;
TSCharacterRange *range = &ranges[mid_index];
if (lookahead >= range->start && lookahead <= range->end) {
return true;
} else if (lookahead > range->end) {
index = mid_index;
}
size -= half_size;
}
TSCharacterRange *range = &ranges[index];
return (lookahead >= range->start && lookahead <= range->end);
}
/*
* Lexer Macros
*/
#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif
#define START_LEXER() \
bool result = false; \
bool skip = false; \
UNUSED \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define ADVANCE_MAP(...) \
{ \
static const uint16_t map[] = { __VA_ARGS__ }; \
for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \
if (map[i] == lookahead) { \
state = map[i + 1]; \
goto next_state; \
} \
} \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = (state_value) \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = (state_value), \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_name, children, precedence, prod_id) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_name, \
.child_count = children, \
.dynamic_precedence = precedence, \
.production_id = prod_id \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_

62
vendor/tree-sitter/src/point.h vendored Normal file
View File

@@ -0,0 +1,62 @@
#ifndef TREE_SITTER_POINT_H_
#define TREE_SITTER_POINT_H_
#include "tree_sitter/api.h"
#define POINT_ZERO ((TSPoint) {0, 0})
#define POINT_MAX ((TSPoint) {UINT32_MAX, UINT32_MAX})
static inline TSPoint point__new(unsigned row, unsigned column) {
TSPoint result = {row, column};
return result;
}
static inline TSPoint point_add(TSPoint a, TSPoint b) {
if (b.row > 0)
return point__new(a.row + b.row, b.column);
else
return point__new(a.row, a.column + b.column);
}
static inline TSPoint point_sub(TSPoint a, TSPoint b) {
if (a.row > b.row)
return point__new(a.row - b.row, a.column);
else
return point__new(0, a.column - b.column);
}
static inline bool point_lte(TSPoint a, TSPoint b) {
return (a.row < b.row) || (a.row == b.row && a.column <= b.column);
}
static inline bool point_lt(TSPoint a, TSPoint b) {
return (a.row < b.row) || (a.row == b.row && a.column < b.column);
}
static inline bool point_gt(TSPoint a, TSPoint b) {
return (a.row > b.row) || (a.row == b.row && a.column > b.column);
}
static inline bool point_gte(TSPoint a, TSPoint b) {
return (a.row > b.row) || (a.row == b.row && a.column >= b.column);
}
static inline bool point_eq(TSPoint a, TSPoint b) {
return a.row == b.row && a.column == b.column;
}
static inline TSPoint point_min(TSPoint a, TSPoint b) {
if (a.row < b.row || (a.row == b.row && a.column < b.column))
return a;
else
return b;
}
static inline TSPoint point_max(TSPoint a, TSPoint b) {
if (a.row > b.row || (a.row == b.row && a.column > b.column))
return a;
else
return b;
}
#endif

4134
vendor/tree-sitter/src/query.c vendored Normal file

File diff suppressed because it is too large Load Diff

34
vendor/tree-sitter/src/reduce_action.h vendored Normal file
View File

@@ -0,0 +1,34 @@
#ifndef TREE_SITTER_REDUCE_ACTION_H_
#define TREE_SITTER_REDUCE_ACTION_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./array.h"
#include "tree_sitter/api.h"
typedef struct {
uint32_t count;
TSSymbol symbol;
int dynamic_precedence;
unsigned short production_id;
} ReduceAction;
typedef Array(ReduceAction) ReduceActionSet;
static inline void ts_reduce_action_set_add(ReduceActionSet *self,
ReduceAction new_action) {
for (uint32_t i = 0; i < self->size; i++) {
ReduceAction action = self->contents[i];
if (action.symbol == new_action.symbol && action.count == new_action.count)
return;
}
array_push(self, new_action);
}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_REDUCE_ACTION_H_

95
vendor/tree-sitter/src/reusable_node.h vendored Normal file
View File

@@ -0,0 +1,95 @@
#include "./subtree.h"
typedef struct {
Subtree tree;
uint32_t child_index;
uint32_t byte_offset;
} StackEntry;
typedef struct {
Array(StackEntry) stack;
Subtree last_external_token;
} ReusableNode;
static inline ReusableNode reusable_node_new(void) {
return (ReusableNode) {array_new(), NULL_SUBTREE};
}
static inline void reusable_node_clear(ReusableNode *self) {
array_clear(&self->stack);
self->last_external_token = NULL_SUBTREE;
}
static inline Subtree reusable_node_tree(ReusableNode *self) {
return self->stack.size > 0
? self->stack.contents[self->stack.size - 1].tree
: NULL_SUBTREE;
}
static inline uint32_t reusable_node_byte_offset(ReusableNode *self) {
return self->stack.size > 0
? self->stack.contents[self->stack.size - 1].byte_offset
: UINT32_MAX;
}
static inline void reusable_node_delete(ReusableNode *self) {
array_delete(&self->stack);
}
static inline void reusable_node_advance(ReusableNode *self) {
StackEntry last_entry = *array_back(&self->stack);
uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree);
if (ts_subtree_has_external_tokens(last_entry.tree)) {
self->last_external_token = ts_subtree_last_external_token(last_entry.tree);
}
Subtree tree;
uint32_t next_index;
do {
StackEntry popped_entry = array_pop(&self->stack);
next_index = popped_entry.child_index + 1;
if (self->stack.size == 0) return;
tree = array_back(&self->stack)->tree;
} while (ts_subtree_child_count(tree) <= next_index);
array_push(&self->stack, ((StackEntry) {
.tree = ts_subtree_children(tree)[next_index],
.child_index = next_index,
.byte_offset = byte_offset,
}));
}
static inline bool reusable_node_descend(ReusableNode *self) {
StackEntry last_entry = *array_back(&self->stack);
if (ts_subtree_child_count(last_entry.tree) > 0) {
array_push(&self->stack, ((StackEntry) {
.tree = ts_subtree_children(last_entry.tree)[0],
.child_index = 0,
.byte_offset = last_entry.byte_offset,
}));
return true;
} else {
return false;
}
}
static inline void reusable_node_advance_past_leaf(ReusableNode *self) {
while (reusable_node_descend(self)) {}
reusable_node_advance(self);
}
static inline void reusable_node_reset(ReusableNode *self, Subtree tree) {
reusable_node_clear(self);
array_push(&self->stack, ((StackEntry) {
.tree = tree,
.child_index = 0,
.byte_offset = 0,
}));
// Never reuse the root node, because it has a non-standard internal structure
// due to transformations that are applied when it is accepted: adding the EOF
// child and any extra children.
if (!reusable_node_descend(self)) {
reusable_node_clear(self);
}
}

899
vendor/tree-sitter/src/stack.c vendored Normal file
View File

@@ -0,0 +1,899 @@
#include "./alloc.h"
#include "./language.h"
#include "./subtree.h"
#include "./array.h"
#include "./stack.h"
#include "./length.h"
#include <assert.h>
#include <inttypes.h>
#include <stdio.h>
#define MAX_LINK_COUNT 8
#define MAX_NODE_POOL_SIZE 50
#define MAX_ITERATOR_COUNT 64
#if defined _WIN32 && !defined __GNUC__
#define forceinline __forceinline
#else
#define forceinline static inline __attribute__((always_inline))
#endif
typedef struct StackNode StackNode;
typedef struct {
StackNode *node;
Subtree subtree;
bool is_pending;
} StackLink;
struct StackNode {
TSStateId state;
Length position;
StackLink links[MAX_LINK_COUNT];
short unsigned int link_count;
uint32_t ref_count;
unsigned error_cost;
unsigned node_count;
int dynamic_precedence;
};
typedef struct {
StackNode *node;
SubtreeArray subtrees;
uint32_t subtree_count;
bool is_pending;
} StackIterator;
typedef Array(StackNode *) StackNodeArray;
typedef enum {
StackStatusActive,
StackStatusPaused,
StackStatusHalted,
} StackStatus;
typedef struct {
StackNode *node;
StackSummary *summary;
unsigned node_count_at_last_error;
Subtree last_external_token;
Subtree lookahead_when_paused;
StackStatus status;
} StackHead;
struct Stack {
Array(StackHead) heads;
StackSliceArray slices;
Array(StackIterator) iterators;
StackNodeArray node_pool;
StackNode *base_node;
SubtreePool *subtree_pool;
};
typedef unsigned StackAction;
enum {
StackActionNone,
StackActionStop = 1,
StackActionPop = 2,
};
typedef StackAction (*StackCallback)(void *, const StackIterator *);
static void stack_node_retain(StackNode *self) {
if (!self)
return;
assert(self->ref_count > 0);
self->ref_count++;
assert(self->ref_count != 0);
}
static void stack_node_release(
StackNode *self,
StackNodeArray *pool,
SubtreePool *subtree_pool
) {
recur:
assert(self->ref_count != 0);
self->ref_count--;
if (self->ref_count > 0) return;
StackNode *first_predecessor = NULL;
if (self->link_count > 0) {
for (unsigned i = self->link_count - 1; i > 0; i--) {
StackLink link = self->links[i];
if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree);
stack_node_release(link.node, pool, subtree_pool);
}
StackLink link = self->links[0];
if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree);
first_predecessor = self->links[0].node;
}
if (pool->size < MAX_NODE_POOL_SIZE) {
array_push(pool, self);
} else {
ts_free(self);
}
if (first_predecessor) {
self = first_predecessor;
goto recur;
}
}
/// Get the number of nodes in the subtree, for the purpose of measuring
/// how much progress has been made by a given version of the stack.
static uint32_t stack__subtree_node_count(Subtree subtree) {
uint32_t count = ts_subtree_visible_descendant_count(subtree);
if (ts_subtree_visible(subtree)) count++;
// Count intermediate error nodes even though they are not visible,
// because a stack version's node count is used to check whether it
// has made any progress since the last time it encountered an error.
if (ts_subtree_symbol(subtree) == ts_builtin_sym_error_repeat) count++;
return count;
}
static StackNode *stack_node_new(
StackNode *previous_node,
Subtree subtree,
bool is_pending,
TSStateId state,
StackNodeArray *pool
) {
StackNode *node = pool->size > 0
? array_pop(pool)
: ts_malloc(sizeof(StackNode));
*node = (StackNode) {
.ref_count = 1,
.link_count = 0,
.state = state
};
if (previous_node) {
node->link_count = 1;
node->links[0] = (StackLink) {
.node = previous_node,
.subtree = subtree,
.is_pending = is_pending,
};
node->position = previous_node->position;
node->error_cost = previous_node->error_cost;
node->dynamic_precedence = previous_node->dynamic_precedence;
node->node_count = previous_node->node_count;
if (subtree.ptr) {
node->error_cost += ts_subtree_error_cost(subtree);
node->position = length_add(node->position, ts_subtree_total_size(subtree));
node->node_count += stack__subtree_node_count(subtree);
node->dynamic_precedence += ts_subtree_dynamic_precedence(subtree);
}
} else {
node->position = length_zero();
node->error_cost = 0;
}
return node;
}
static bool stack__subtree_is_equivalent(Subtree left, Subtree right) {
if (left.ptr == right.ptr) return true;
if (!left.ptr || !right.ptr) return false;
// Symbols must match
if (ts_subtree_symbol(left) != ts_subtree_symbol(right)) return false;
// If both have errors, don't bother keeping both.
if (ts_subtree_error_cost(left) > 0 && ts_subtree_error_cost(right) > 0) return true;
return (
ts_subtree_padding(left).bytes == ts_subtree_padding(right).bytes &&
ts_subtree_size(left).bytes == ts_subtree_size(right).bytes &&
ts_subtree_child_count(left) == ts_subtree_child_count(right) &&
ts_subtree_extra(left) == ts_subtree_extra(right) &&
ts_subtree_external_scanner_state_eq(left, right)
);
}
static void stack_node_add_link(
StackNode *self,
StackLink link,
SubtreePool *subtree_pool
) {
if (link.node == self) return;
for (int i = 0; i < self->link_count; i++) {
StackLink *existing_link = &self->links[i];
if (stack__subtree_is_equivalent(existing_link->subtree, link.subtree)) {
// In general, we preserve ambiguities until they are removed from the stack
// during a pop operation where multiple paths lead to the same node. But in
// the special case where two links directly connect the same pair of nodes,
// we can safely remove the ambiguity ahead of time without changing behavior.
if (existing_link->node == link.node) {
if (
ts_subtree_dynamic_precedence(link.subtree) >
ts_subtree_dynamic_precedence(existing_link->subtree)
) {
ts_subtree_retain(link.subtree);
ts_subtree_release(subtree_pool, existing_link->subtree);
existing_link->subtree = link.subtree;
self->dynamic_precedence =
link.node->dynamic_precedence + ts_subtree_dynamic_precedence(link.subtree);
}
return;
}
// If the previous nodes are mergeable, merge them recursively.
if (
existing_link->node->state == link.node->state &&
existing_link->node->position.bytes == link.node->position.bytes &&
existing_link->node->error_cost == link.node->error_cost
) {
for (int j = 0; j < link.node->link_count; j++) {
stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool);
}
int32_t dynamic_precedence = link.node->dynamic_precedence;
if (link.subtree.ptr) {
dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree);
}
if (dynamic_precedence > self->dynamic_precedence) {
self->dynamic_precedence = dynamic_precedence;
}
return;
}
}
}
if (self->link_count == MAX_LINK_COUNT) return;
stack_node_retain(link.node);
unsigned node_count = link.node->node_count;
int dynamic_precedence = link.node->dynamic_precedence;
self->links[self->link_count++] = link;
if (link.subtree.ptr) {
ts_subtree_retain(link.subtree);
node_count += stack__subtree_node_count(link.subtree);
dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree);
}
if (node_count > self->node_count) self->node_count = node_count;
if (dynamic_precedence > self->dynamic_precedence) self->dynamic_precedence = dynamic_precedence;
}
static void stack_head_delete(
StackHead *self,
StackNodeArray *pool,
SubtreePool *subtree_pool
) {
if (self->node) {
if (self->last_external_token.ptr) {
ts_subtree_release(subtree_pool, self->last_external_token);
}
if (self->lookahead_when_paused.ptr) {
ts_subtree_release(subtree_pool, self->lookahead_when_paused);
}
if (self->summary) {
array_delete(self->summary);
ts_free(self->summary);
}
stack_node_release(self->node, pool, subtree_pool);
}
}
static StackVersion ts_stack__add_version(
Stack *self,
StackVersion original_version,
StackNode *node
) {
StackHead head = {
.node = node,
.node_count_at_last_error = self->heads.contents[original_version].node_count_at_last_error,
.last_external_token = self->heads.contents[original_version].last_external_token,
.status = StackStatusActive,
.lookahead_when_paused = NULL_SUBTREE,
};
array_push(&self->heads, head);
stack_node_retain(node);
if (head.last_external_token.ptr) ts_subtree_retain(head.last_external_token);
return (StackVersion)(self->heads.size - 1);
}
static void ts_stack__add_slice(
Stack *self,
StackVersion original_version,
StackNode *node,
SubtreeArray *subtrees
) {
for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) {
StackVersion version = self->slices.contents[i].version;
if (self->heads.contents[version].node == node) {
StackSlice slice = {*subtrees, version};
array_insert(&self->slices, i + 1, slice);
return;
}
}
StackVersion version = ts_stack__add_version(self, original_version, node);
StackSlice slice = { *subtrees, version };
array_push(&self->slices, slice);
}
static StackSliceArray stack__iter(
Stack *self,
StackVersion version,
StackCallback callback,
void *payload,
int goal_subtree_count
) {
array_clear(&self->slices);
array_clear(&self->iterators);
StackHead *head = array_get(&self->heads, version);
StackIterator new_iterator = {
.node = head->node,
.subtrees = array_new(),
.subtree_count = 0,
.is_pending = true,
};
bool include_subtrees = false;
if (goal_subtree_count >= 0) {
include_subtrees = true;
array_reserve(&new_iterator.subtrees, (uint32_t)ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree));
}
array_push(&self->iterators, new_iterator);
while (self->iterators.size > 0) {
for (uint32_t i = 0, size = self->iterators.size; i < size; i++) {
StackIterator *iterator = &self->iterators.contents[i];
StackNode *node = iterator->node;
StackAction action = callback(payload, iterator);
bool should_pop = action & StackActionPop;
bool should_stop = action & StackActionStop || node->link_count == 0;
if (should_pop) {
SubtreeArray subtrees = iterator->subtrees;
if (!should_stop) {
ts_subtree_array_copy(subtrees, &subtrees);
}
ts_subtree_array_reverse(&subtrees);
ts_stack__add_slice(
self,
version,
node,
&subtrees
);
}
if (should_stop) {
if (!should_pop) {
ts_subtree_array_delete(self->subtree_pool, &iterator->subtrees);
}
array_erase(&self->iterators, i);
i--, size--;
continue;
}
for (uint32_t j = 1; j <= node->link_count; j++) {
StackIterator *next_iterator;
StackLink link;
if (j == node->link_count) {
link = node->links[0];
next_iterator = &self->iterators.contents[i];
} else {
if (self->iterators.size >= MAX_ITERATOR_COUNT) continue;
link = node->links[j];
StackIterator current_iterator = self->iterators.contents[i];
array_push(&self->iterators, current_iterator);
next_iterator = array_back(&self->iterators);
ts_subtree_array_copy(next_iterator->subtrees, &next_iterator->subtrees);
}
next_iterator->node = link.node;
if (link.subtree.ptr) {
if (include_subtrees) {
array_push(&next_iterator->subtrees, link.subtree);
ts_subtree_retain(link.subtree);
}
if (!ts_subtree_extra(link.subtree)) {
next_iterator->subtree_count++;
if (!link.is_pending) {
next_iterator->is_pending = false;
}
}
} else {
next_iterator->subtree_count++;
next_iterator->is_pending = false;
}
}
}
}
return self->slices;
}
Stack *ts_stack_new(SubtreePool *subtree_pool) {
Stack *self = ts_calloc(1, sizeof(Stack));
array_init(&self->heads);
array_init(&self->slices);
array_init(&self->iterators);
array_init(&self->node_pool);
array_reserve(&self->heads, 4);
array_reserve(&self->slices, 4);
array_reserve(&self->iterators, 4);
array_reserve(&self->node_pool, MAX_NODE_POOL_SIZE);
self->subtree_pool = subtree_pool;
self->base_node = stack_node_new(NULL, NULL_SUBTREE, false, 1, &self->node_pool);
ts_stack_clear(self);
return self;
}
void ts_stack_delete(Stack *self) {
if (self->slices.contents)
array_delete(&self->slices);
if (self->iterators.contents)
array_delete(&self->iterators);
stack_node_release(self->base_node, &self->node_pool, self->subtree_pool);
for (uint32_t i = 0; i < self->heads.size; i++) {
stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool);
}
array_clear(&self->heads);
if (self->node_pool.contents) {
for (uint32_t i = 0; i < self->node_pool.size; i++)
ts_free(self->node_pool.contents[i]);
array_delete(&self->node_pool);
}
array_delete(&self->heads);
ts_free(self);
}
uint32_t ts_stack_version_count(const Stack *self) {
return self->heads.size;
}
TSStateId ts_stack_state(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->node->state;
}
Length ts_stack_position(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->node->position;
}
Subtree ts_stack_last_external_token(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->last_external_token;
}
void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token) {
StackHead *head = array_get(&self->heads, version);
if (token.ptr) ts_subtree_retain(token);
if (head->last_external_token.ptr) ts_subtree_release(self->subtree_pool, head->last_external_token);
head->last_external_token = token;
}
unsigned ts_stack_error_cost(const Stack *self, StackVersion version) {
StackHead *head = array_get(&self->heads, version);
unsigned result = head->node->error_cost;
if (
head->status == StackStatusPaused ||
(head->node->state == ERROR_STATE && !head->node->links[0].subtree.ptr)) {
result += ERROR_COST_PER_RECOVERY;
}
return result;
}
unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) {
StackHead *head = array_get(&self->heads, version);
if (head->node->node_count < head->node_count_at_last_error) {
head->node_count_at_last_error = head->node->node_count;
}
return head->node->node_count - head->node_count_at_last_error;
}
void ts_stack_push(
Stack *self,
StackVersion version,
Subtree subtree,
bool pending,
TSStateId state
) {
StackHead *head = array_get(&self->heads, version);
StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool);
if (!subtree.ptr) head->node_count_at_last_error = new_node->node_count;
head->node = new_node;
}
forceinline StackAction pop_count_callback(void *payload, const StackIterator *iterator) {
unsigned *goal_subtree_count = payload;
if (iterator->subtree_count == *goal_subtree_count) {
return StackActionPop | StackActionStop;
} else {
return StackActionNone;
}
}
StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) {
return stack__iter(self, version, pop_count_callback, &count, (int)count);
}
forceinline StackAction pop_pending_callback(void *payload, const StackIterator *iterator) {
(void)payload;
if (iterator->subtree_count >= 1) {
if (iterator->is_pending) {
return StackActionPop | StackActionStop;
} else {
return StackActionStop;
}
} else {
return StackActionNone;
}
}
StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) {
StackSliceArray pop = stack__iter(self, version, pop_pending_callback, NULL, 0);
if (pop.size > 0) {
ts_stack_renumber_version(self, pop.contents[0].version, version);
pop.contents[0].version = version;
}
return pop;
}
forceinline StackAction pop_error_callback(void *payload, const StackIterator *iterator) {
if (iterator->subtrees.size > 0) {
bool *found_error = payload;
if (!*found_error && ts_subtree_is_error(iterator->subtrees.contents[0])) {
*found_error = true;
return StackActionPop | StackActionStop;
} else {
return StackActionStop;
}
} else {
return StackActionNone;
}
}
SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) {
StackNode *node = array_get(&self->heads, version)->node;
for (unsigned i = 0; i < node->link_count; i++) {
if (node->links[i].subtree.ptr && ts_subtree_is_error(node->links[i].subtree)) {
bool found_error = false;
StackSliceArray pop = stack__iter(self, version, pop_error_callback, &found_error, 1);
if (pop.size > 0) {
assert(pop.size == 1);
ts_stack_renumber_version(self, pop.contents[0].version, version);
return pop.contents[0].subtrees;
}
break;
}
}
return (SubtreeArray) {.size = 0};
}
forceinline StackAction pop_all_callback(void *payload, const StackIterator *iterator) {
(void)payload;
return iterator->node->link_count == 0 ? StackActionPop : StackActionNone;
}
StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version) {
return stack__iter(self, version, pop_all_callback, NULL, 0);
}
typedef struct {
StackSummary *summary;
unsigned max_depth;
} SummarizeStackSession;
forceinline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) {
SummarizeStackSession *session = payload;
TSStateId state = iterator->node->state;
unsigned depth = iterator->subtree_count;
if (depth > session->max_depth) return StackActionStop;
for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) {
StackSummaryEntry entry = session->summary->contents[i];
if (entry.depth < depth) break;
if (entry.depth == depth && entry.state == state) return StackActionNone;
}
array_push(session->summary, ((StackSummaryEntry) {
.position = iterator->node->position,
.depth = depth,
.state = state,
}));
return StackActionNone;
}
void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth) {
SummarizeStackSession session = {
.summary = ts_malloc(sizeof(StackSummary)),
.max_depth = max_depth
};
array_init(session.summary);
stack__iter(self, version, summarize_stack_callback, &session, -1);
StackHead *head = &self->heads.contents[version];
if (head->summary) {
array_delete(head->summary);
ts_free(head->summary);
}
head->summary = session.summary;
}
StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) {
return array_get(&self->heads, version)->summary;
}
int ts_stack_dynamic_precedence(Stack *self, StackVersion version) {
return array_get(&self->heads, version)->node->dynamic_precedence;
}
bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version) {
const StackHead *head = array_get(&self->heads, version);
const StackNode *node = head->node;
if (node->error_cost == 0) return true;
while (node) {
if (node->link_count > 0) {
Subtree subtree = node->links[0].subtree;
if (subtree.ptr) {
if (ts_subtree_total_bytes(subtree) > 0) {
return true;
} else if (
node->node_count > head->node_count_at_last_error &&
ts_subtree_error_cost(subtree) == 0
) {
node = node->links[0].node;
continue;
}
}
}
break;
}
return false;
}
void ts_stack_remove_version(Stack *self, StackVersion version) {
stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool);
array_erase(&self->heads, version);
}
void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) {
if (v1 == v2) return;
assert(v2 < v1);
assert((uint32_t)v1 < self->heads.size);
StackHead *source_head = &self->heads.contents[v1];
StackHead *target_head = &self->heads.contents[v2];
if (target_head->summary && !source_head->summary) {
source_head->summary = target_head->summary;
target_head->summary = NULL;
}
stack_head_delete(target_head, &self->node_pool, self->subtree_pool);
*target_head = *source_head;
array_erase(&self->heads, v1);
}
void ts_stack_swap_versions(Stack *self, StackVersion v1, StackVersion v2) {
StackHead temporary_head = self->heads.contents[v1];
self->heads.contents[v1] = self->heads.contents[v2];
self->heads.contents[v2] = temporary_head;
}
StackVersion ts_stack_copy_version(Stack *self, StackVersion version) {
assert(version < self->heads.size);
array_push(&self->heads, self->heads.contents[version]);
StackHead *head = array_back(&self->heads);
stack_node_retain(head->node);
if (head->last_external_token.ptr) ts_subtree_retain(head->last_external_token);
head->summary = NULL;
return self->heads.size - 1;
}
bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) {
if (!ts_stack_can_merge(self, version1, version2)) return false;
StackHead *head1 = &self->heads.contents[version1];
StackHead *head2 = &self->heads.contents[version2];
for (uint32_t i = 0; i < head2->node->link_count; i++) {
stack_node_add_link(head1->node, head2->node->links[i], self->subtree_pool);
}
if (head1->node->state == ERROR_STATE) {
head1->node_count_at_last_error = head1->node->node_count;
}
ts_stack_remove_version(self, version2);
return true;
}
bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2) {
StackHead *head1 = &self->heads.contents[version1];
StackHead *head2 = &self->heads.contents[version2];
return
head1->status == StackStatusActive &&
head2->status == StackStatusActive &&
head1->node->state == head2->node->state &&
head1->node->position.bytes == head2->node->position.bytes &&
head1->node->error_cost == head2->node->error_cost &&
ts_subtree_external_scanner_state_eq(head1->last_external_token, head2->last_external_token);
}
void ts_stack_halt(Stack *self, StackVersion version) {
array_get(&self->heads, version)->status = StackStatusHalted;
}
void ts_stack_pause(Stack *self, StackVersion version, Subtree lookahead) {
StackHead *head = array_get(&self->heads, version);
head->status = StackStatusPaused;
head->lookahead_when_paused = lookahead;
head->node_count_at_last_error = head->node->node_count;
}
bool ts_stack_is_active(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->status == StackStatusActive;
}
bool ts_stack_is_halted(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->status == StackStatusHalted;
}
bool ts_stack_is_paused(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->status == StackStatusPaused;
}
Subtree ts_stack_resume(Stack *self, StackVersion version) {
StackHead *head = array_get(&self->heads, version);
assert(head->status == StackStatusPaused);
Subtree result = head->lookahead_when_paused;
head->status = StackStatusActive;
head->lookahead_when_paused = NULL_SUBTREE;
return result;
}
void ts_stack_clear(Stack *self) {
stack_node_retain(self->base_node);
for (uint32_t i = 0; i < self->heads.size; i++) {
stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool);
}
array_clear(&self->heads);
array_push(&self->heads, ((StackHead) {
.node = self->base_node,
.status = StackStatusActive,
.last_external_token = NULL_SUBTREE,
.lookahead_when_paused = NULL_SUBTREE,
}));
}
bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) {
array_reserve(&self->iterators, 32);
if (!f) f = stderr;
fprintf(f, "digraph stack {\n");
fprintf(f, "rankdir=\"RL\";\n");
fprintf(f, "edge [arrowhead=none]\n");
Array(StackNode *) visited_nodes = array_new();
array_clear(&self->iterators);
for (uint32_t i = 0; i < self->heads.size; i++) {
StackHead *head = &self->heads.contents[i];
if (head->status == StackStatusHalted) continue;
fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i);
fprintf(f, "node_head_%u -> node_%p [", i, (void *)head->node);
if (head->status == StackStatusPaused) {
fprintf(f, "color=red ");
}
fprintf(f,
"label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u\nerror_cost: %u",
i,
ts_stack_node_count_since_error(self, i),
ts_stack_error_cost(self, i)
);
if (head->summary) {
fprintf(f, "\nsummary:");
for (uint32_t j = 0; j < head->summary->size; j++) fprintf(f, " %u", head->summary->contents[j].state);
}
if (head->last_external_token.ptr) {
const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state;
const char *data = ts_external_scanner_state_data(state);
fprintf(f, "\nexternal_scanner_state:");
for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]);
}
fprintf(f, "\"]\n");
array_push(&self->iterators, ((StackIterator) {
.node = head->node
}));
}
bool all_iterators_done = false;
while (!all_iterators_done) {
all_iterators_done = true;
for (uint32_t i = 0; i < self->iterators.size; i++) {
StackIterator iterator = self->iterators.contents[i];
StackNode *node = iterator.node;
for (uint32_t j = 0; j < visited_nodes.size; j++) {
if (visited_nodes.contents[j] == node) {
node = NULL;
break;
}
}
if (!node) continue;
all_iterators_done = false;
fprintf(f, "node_%p [", (void *)node);
if (node->state == ERROR_STATE) {
fprintf(f, "label=\"?\"");
} else if (
node->link_count == 1 &&
node->links[0].subtree.ptr &&
ts_subtree_extra(node->links[0].subtree)
) {
fprintf(f, "shape=point margin=0 label=\"\"");
} else {
fprintf(f, "label=\"%d\"", node->state);
}
fprintf(
f,
" tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n",
node->position.extent.row + 1,
node->position.extent.column,
node->node_count,
node->error_cost,
node->dynamic_precedence
);
for (int j = 0; j < node->link_count; j++) {
StackLink link = node->links[j];
fprintf(f, "node_%p -> node_%p [", (void *)node, (void *)link.node);
if (link.is_pending) fprintf(f, "style=dashed ");
if (link.subtree.ptr && ts_subtree_extra(link.subtree)) fprintf(f, "fontcolor=gray ");
if (!link.subtree.ptr) {
fprintf(f, "color=red");
} else {
fprintf(f, "label=\"");
bool quoted = ts_subtree_visible(link.subtree) && !ts_subtree_named(link.subtree);
if (quoted) fprintf(f, "'");
ts_language_write_symbol_as_dot_string(language, f, ts_subtree_symbol(link.subtree));
if (quoted) fprintf(f, "'");
fprintf(f, "\"");
fprintf(
f,
"labeltooltip=\"error_cost: %u\ndynamic_precedence: %" PRId32 "\"",
ts_subtree_error_cost(link.subtree),
ts_subtree_dynamic_precedence(link.subtree)
);
}
fprintf(f, "];\n");
StackIterator *next_iterator;
if (j == 0) {
next_iterator = &self->iterators.contents[i];
} else {
array_push(&self->iterators, iterator);
next_iterator = array_back(&self->iterators);
}
next_iterator->node = link.node;
}
array_push(&visited_nodes, node);
}
}
fprintf(f, "}\n");
array_delete(&visited_nodes);
return true;
}
#undef forceinline

133
vendor/tree-sitter/src/stack.h vendored Normal file
View File

@@ -0,0 +1,133 @@
#ifndef TREE_SITTER_PARSE_STACK_H_
#define TREE_SITTER_PARSE_STACK_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./array.h"
#include "./subtree.h"
#include "./error_costs.h"
#include <stdio.h>
typedef struct Stack Stack;
typedef unsigned StackVersion;
#define STACK_VERSION_NONE ((StackVersion)-1)
typedef struct {
SubtreeArray subtrees;
StackVersion version;
} StackSlice;
typedef Array(StackSlice) StackSliceArray;
typedef struct {
Length position;
unsigned depth;
TSStateId state;
} StackSummaryEntry;
typedef Array(StackSummaryEntry) StackSummary;
// Create a stack.
Stack *ts_stack_new(SubtreePool *);
// Release the memory reserved for a given stack.
void ts_stack_delete(Stack *);
// Get the stack's current number of versions.
uint32_t ts_stack_version_count(const Stack *);
// Get the state at the top of the given version of the stack. If the stack is
// empty, this returns the initial state, 0.
TSStateId ts_stack_state(const Stack *, StackVersion);
// Get the last external token associated with a given version of the stack.
Subtree ts_stack_last_external_token(const Stack *, StackVersion);
// Set the last external token associated with a given version of the stack.
void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree );
// Get the position of the given version of the stack within the document.
Length ts_stack_position(const Stack *, StackVersion);
// Push a tree and state onto the given version of the stack.
//
// This transfers ownership of the tree to the Stack. Callers that
// need to retain ownership of the tree for their own purposes should
// first retain the tree.
void ts_stack_push(Stack *, StackVersion, Subtree , bool, TSStateId);
// Pop the given number of entries from the given version of the stack. This
// operation can increase the number of stack versions by revealing multiple
// versions which had previously been merged. It returns an array that
// specifies the index of each revealed version and the trees that were
// removed from that version.
StackSliceArray ts_stack_pop_count(Stack *, StackVersion, uint32_t count);
// Remove an error at the top of the given version of the stack.
SubtreeArray ts_stack_pop_error(Stack *, StackVersion);
// Remove any pending trees from the top of the given version of the stack.
StackSliceArray ts_stack_pop_pending(Stack *, StackVersion);
// Remove any all trees from the given version of the stack.
StackSliceArray ts_stack_pop_all(Stack *, StackVersion);
// Get the maximum number of tree nodes reachable from this version of the stack
// since the last error was detected.
unsigned ts_stack_node_count_since_error(const Stack *, StackVersion);
int ts_stack_dynamic_precedence(Stack *, StackVersion);
bool ts_stack_has_advanced_since_error(const Stack *, StackVersion);
// Compute a summary of all the parse states near the top of the given
// version of the stack and store the summary for later retrieval.
void ts_stack_record_summary(Stack *, StackVersion, unsigned max_depth);
// Retrieve a summary of all the parse states near the top of the
// given version of the stack.
StackSummary *ts_stack_get_summary(Stack *, StackVersion);
// Get the total cost of all errors on the given version of the stack.
unsigned ts_stack_error_cost(const Stack *, StackVersion version);
// Merge the given two stack versions if possible, returning true
// if they were successfully merged and false otherwise.
bool ts_stack_merge(Stack *, StackVersion, StackVersion);
// Determine whether the given two stack versions can be merged.
bool ts_stack_can_merge(Stack *, StackVersion, StackVersion);
Subtree ts_stack_resume(Stack *, StackVersion);
void ts_stack_pause(Stack *, StackVersion, Subtree);
void ts_stack_halt(Stack *, StackVersion);
bool ts_stack_is_active(const Stack *, StackVersion);
bool ts_stack_is_paused(const Stack *, StackVersion);
bool ts_stack_is_halted(const Stack *, StackVersion);
void ts_stack_renumber_version(Stack *, StackVersion, StackVersion);
void ts_stack_swap_versions(Stack *, StackVersion, StackVersion);
StackVersion ts_stack_copy_version(Stack *, StackVersion);
// Remove the given version from the stack.
void ts_stack_remove_version(Stack *, StackVersion);
void ts_stack_clear(Stack *);
bool ts_stack_print_dot_graph(Stack *, const TSLanguage *, FILE *);
typedef void (*StackIterateCallback)(void *, TSStateId, uint32_t);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSE_STACK_H_

1060
vendor/tree-sitter/src/subtree.c vendored Normal file

File diff suppressed because it is too large Load Diff

382
vendor/tree-sitter/src/subtree.h vendored Normal file
View File

@@ -0,0 +1,382 @@
#ifndef TREE_SITTER_SUBTREE_H_
#define TREE_SITTER_SUBTREE_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
#include "./length.h"
#include "./array.h"
#include "./error_costs.h"
#include "./host.h"
#include "tree_sitter/api.h"
#include "./parser.h"
#define TS_TREE_STATE_NONE USHRT_MAX
#define NULL_SUBTREE ((Subtree) {.ptr = NULL})
// The serialized state of an external scanner.
//
// Every time an external token subtree is created after a call to an
// external scanner, the scanner's `serialize` function is called to
// retrieve a serialized copy of its state. The bytes are then copied
// onto the subtree itself so that the scanner's state can later be
// restored using its `deserialize` function.
//
// Small byte arrays are stored inline, and long ones are allocated
// separately on the heap.
typedef struct {
union {
char *long_data;
char short_data[24];
};
uint32_t length;
} ExternalScannerState;
// A compact representation of a subtree.
//
// This representation is used for small leaf nodes that are not
// errors, and were not created by an external scanner.
//
// The idea behind the layout of this struct is that the `is_inline`
// bit will fall exactly into the same location as the least significant
// bit of the pointer in `Subtree` or `MutableSubtree`, respectively.
// Because of alignment, for any valid pointer this will be 0, giving
// us the opportunity to make use of this bit to signify whether to use
// the pointer or the inline struct.
typedef struct SubtreeInlineData SubtreeInlineData;
#define SUBTREE_BITS \
bool visible : 1; \
bool named : 1; \
bool extra : 1; \
bool has_changes : 1; \
bool is_missing : 1; \
bool is_keyword : 1;
#define SUBTREE_SIZE \
uint8_t padding_columns; \
uint8_t padding_rows : 4; \
uint8_t lookahead_bytes : 4; \
uint8_t padding_bytes; \
uint8_t size_bytes;
#if TS_BIG_ENDIAN
#if TS_PTR_SIZE == 32
struct SubtreeInlineData {
uint16_t parse_state;
uint8_t symbol;
SUBTREE_BITS
bool unused : 1;
bool is_inline : 1;
SUBTREE_SIZE
};
#else
struct SubtreeInlineData {
SUBTREE_SIZE
uint16_t parse_state;
uint8_t symbol;
SUBTREE_BITS
bool unused : 1;
bool is_inline : 1;
};
#endif
#else
struct SubtreeInlineData {
bool is_inline : 1;
SUBTREE_BITS
uint8_t symbol;
uint16_t parse_state;
SUBTREE_SIZE
};
#endif
#undef SUBTREE_BITS
#undef SUBTREE_SIZE
// A heap-allocated representation of a subtree.
//
// This representation is used for parent nodes, external tokens,
// errors, and other leaf nodes whose data is too large to fit into
// the inline representation.
typedef struct {
volatile uint32_t ref_count;
Length padding;
Length size;
uint32_t lookahead_bytes;
uint32_t error_cost;
uint32_t child_count;
TSSymbol symbol;
TSStateId parse_state;
bool visible : 1;
bool named : 1;
bool extra : 1;
bool fragile_left : 1;
bool fragile_right : 1;
bool has_changes : 1;
bool has_external_tokens : 1;
bool has_external_scanner_state_change : 1;
bool depends_on_column: 1;
bool is_missing : 1;
bool is_keyword : 1;
union {
// Non-terminal subtrees (`child_count > 0`)
struct {
uint32_t visible_child_count;
uint32_t named_child_count;
uint32_t visible_descendant_count;
int32_t dynamic_precedence;
uint16_t repeat_depth;
uint16_t production_id;
struct {
TSSymbol symbol;
TSStateId parse_state;
} first_leaf;
};
// External terminal subtrees (`child_count == 0 && has_external_tokens`)
ExternalScannerState external_scanner_state;
// Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`)
int32_t lookahead_char;
};
} SubtreeHeapData;
// The fundamental building block of a syntax tree.
typedef union {
SubtreeInlineData data;
const SubtreeHeapData *ptr;
} Subtree;
// Like Subtree, but mutable.
typedef union {
SubtreeInlineData data;
SubtreeHeapData *ptr;
} MutableSubtree;
typedef Array(Subtree) SubtreeArray;
typedef Array(MutableSubtree) MutableSubtreeArray;
typedef struct {
MutableSubtreeArray free_trees;
MutableSubtreeArray tree_stack;
} SubtreePool;
void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsigned);
const char *ts_external_scanner_state_data(const ExternalScannerState *);
bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *, unsigned);
void ts_external_scanner_state_delete(ExternalScannerState *self);
void ts_subtree_array_copy(SubtreeArray, SubtreeArray *);
void ts_subtree_array_clear(SubtreePool *, SubtreeArray *);
void ts_subtree_array_delete(SubtreePool *, SubtreeArray *);
void ts_subtree_array_remove_trailing_extras(SubtreeArray *, SubtreeArray *);
void ts_subtree_array_reverse(SubtreeArray *);
SubtreePool ts_subtree_pool_new(uint32_t capacity);
void ts_subtree_pool_delete(SubtreePool *);
Subtree ts_subtree_new_leaf(
SubtreePool *, TSSymbol, Length, Length, uint32_t,
TSStateId, bool, bool, bool, const TSLanguage *
);
Subtree ts_subtree_new_error(
SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage *
);
MutableSubtree ts_subtree_new_node(TSSymbol, SubtreeArray *, unsigned, const TSLanguage *);
Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const TSLanguage *);
Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, uint32_t, const TSLanguage *);
MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree);
void ts_subtree_retain(Subtree);
void ts_subtree_release(SubtreePool *, Subtree);
int ts_subtree_compare(Subtree, Subtree, SubtreePool *);
void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *);
void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, const TSLanguage *);
void ts_subtree_summarize_children(MutableSubtree, const TSLanguage *);
void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *);
Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *);
char *ts_subtree_string(Subtree, TSSymbol, bool, const TSLanguage *, bool include_all);
void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *);
Subtree ts_subtree_last_external_token(Subtree);
const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self);
bool ts_subtree_external_scanner_state_eq(Subtree, Subtree);
#define SUBTREE_GET(self, name) ((self).data.is_inline ? (self).data.name : (self).ptr->name)
static inline TSSymbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); }
static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); }
static inline bool ts_subtree_named(Subtree self) { return SUBTREE_GET(self, named); }
static inline bool ts_subtree_extra(Subtree self) { return SUBTREE_GET(self, extra); }
static inline bool ts_subtree_has_changes(Subtree self) { return SUBTREE_GET(self, has_changes); }
static inline bool ts_subtree_missing(Subtree self) { return SUBTREE_GET(self, is_missing); }
static inline bool ts_subtree_is_keyword(Subtree self) { return SUBTREE_GET(self, is_keyword); }
static inline TSStateId ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); }
static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE_GET(self, lookahead_bytes); }
#undef SUBTREE_GET
// Get the size needed to store a heap-allocated subtree with the given
// number of children.
static inline size_t ts_subtree_alloc_size(uint32_t child_count) {
return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData);
}
// Get a subtree's children, which are allocated immediately before the
// tree's own heap data.
#define ts_subtree_children(self) \
((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count)
static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) {
if (self->data.is_inline) {
self->data.extra = is_extra;
} else {
self->ptr->extra = is_extra;
}
}
static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) {
if (self.data.is_inline) return self.data.symbol;
if (self.ptr->child_count == 0) return self.ptr->symbol;
return self.ptr->first_leaf.symbol;
}
static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) {
if (self.data.is_inline) return self.data.parse_state;
if (self.ptr->child_count == 0) return self.ptr->parse_state;
return self.ptr->first_leaf.parse_state;
}
static inline Length ts_subtree_padding(Subtree self) {
if (self.data.is_inline) {
Length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}};
return result;
} else {
return self.ptr->padding;
}
}
static inline Length ts_subtree_size(Subtree self) {
if (self.data.is_inline) {
Length result = {self.data.size_bytes, {0, self.data.size_bytes}};
return result;
} else {
return self.ptr->size;
}
}
static inline Length ts_subtree_total_size(Subtree self) {
return length_add(ts_subtree_padding(self), ts_subtree_size(self));
}
static inline uint32_t ts_subtree_total_bytes(Subtree self) {
return ts_subtree_total_size(self).bytes;
}
static inline uint32_t ts_subtree_child_count(Subtree self) {
return self.data.is_inline ? 0 : self.ptr->child_count;
}
static inline uint32_t ts_subtree_repeat_depth(Subtree self) {
return self.data.is_inline ? 0 : self.ptr->repeat_depth;
}
static inline uint32_t ts_subtree_is_repetition(Subtree self) {
return self.data.is_inline
? 0
: !self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0;
}
static inline uint32_t ts_subtree_visible_descendant_count(Subtree self) {
return (self.data.is_inline || self.ptr->child_count == 0)
? 0
: self.ptr->visible_descendant_count;
}
static inline uint32_t ts_subtree_visible_child_count(Subtree self) {
if (ts_subtree_child_count(self) > 0) {
return self.ptr->visible_child_count;
} else {
return 0;
}
}
static inline uint32_t ts_subtree_error_cost(Subtree self) {
if (ts_subtree_missing(self)) {
return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY;
} else {
return self.data.is_inline ? 0 : self.ptr->error_cost;
}
}
static inline int32_t ts_subtree_dynamic_precedence(Subtree self) {
return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence;
}
static inline uint16_t ts_subtree_production_id(Subtree self) {
if (ts_subtree_child_count(self) > 0) {
return self.ptr->production_id;
} else {
return 0;
}
}
static inline bool ts_subtree_fragile_left(Subtree self) {
return self.data.is_inline ? false : self.ptr->fragile_left;
}
static inline bool ts_subtree_fragile_right(Subtree self) {
return self.data.is_inline ? false : self.ptr->fragile_right;
}
static inline bool ts_subtree_has_external_tokens(Subtree self) {
return self.data.is_inline ? false : self.ptr->has_external_tokens;
}
static inline bool ts_subtree_has_external_scanner_state_change(Subtree self) {
return self.data.is_inline ? false : self.ptr->has_external_scanner_state_change;
}
static inline bool ts_subtree_depends_on_column(Subtree self) {
return self.data.is_inline ? false : self.ptr->depends_on_column;
}
static inline bool ts_subtree_is_fragile(Subtree self) {
return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right);
}
static inline bool ts_subtree_is_error(Subtree self) {
return ts_subtree_symbol(self) == ts_builtin_sym_error;
}
static inline bool ts_subtree_is_eof(Subtree self) {
return ts_subtree_symbol(self) == ts_builtin_sym_end;
}
static inline Subtree ts_subtree_from_mut(MutableSubtree self) {
Subtree result;
result.data = self.data;
return result;
}
static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) {
MutableSubtree result;
result.data = self.data;
return result;
}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_SUBTREE_H_

165
vendor/tree-sitter/src/tree.c vendored Normal file
View File

@@ -0,0 +1,165 @@
#define _POSIX_C_SOURCE 200112L
#include "tree_sitter/api.h"
#include "./array.h"
#include "./get_changed_ranges.h"
#include "./length.h"
#include "./subtree.h"
#include "./tree_cursor.h"
#include "./tree.h"
TSTree *ts_tree_new(
Subtree root, const TSLanguage *language,
const TSRange *included_ranges, unsigned included_range_count
) {
TSTree *result = ts_malloc(sizeof(TSTree));
result->root = root;
result->language = ts_language_copy(language);
result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange));
memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange));
result->included_range_count = included_range_count;
return result;
}
TSTree *ts_tree_copy(const TSTree *self) {
ts_subtree_retain(self->root);
return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count);
}
void ts_tree_delete(TSTree *self) {
if (!self) return;
SubtreePool pool = ts_subtree_pool_new(0);
ts_subtree_release(&pool, self->root);
ts_subtree_pool_delete(&pool);
ts_language_delete(self->language);
ts_free(self->included_ranges);
ts_free(self);
}
TSNode ts_tree_root_node(const TSTree *self) {
return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0);
}
TSNode ts_tree_root_node_with_offset(
const TSTree *self,
uint32_t offset_bytes,
TSPoint offset_extent
) {
Length offset = {offset_bytes, offset_extent};
return ts_node_new(self, &self->root, length_add(offset, ts_subtree_padding(self->root)), 0);
}
const TSLanguage *ts_tree_language(const TSTree *self) {
return self->language;
}
void ts_tree_edit(TSTree *self, const TSInputEdit *edit) {
for (unsigned i = 0; i < self->included_range_count; i++) {
TSRange *range = &self->included_ranges[i];
if (range->end_byte >= edit->old_end_byte) {
if (range->end_byte != UINT32_MAX) {
range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte);
range->end_point = point_add(
edit->new_end_point,
point_sub(range->end_point, edit->old_end_point)
);
if (range->end_byte < edit->new_end_byte) {
range->end_byte = UINT32_MAX;
range->end_point = POINT_MAX;
}
}
} else if (range->end_byte > edit->start_byte) {
range->end_byte = edit->start_byte;
range->end_point = edit->start_point;
}
if (range->start_byte >= edit->old_end_byte) {
range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte);
range->start_point = point_add(
edit->new_end_point,
point_sub(range->start_point, edit->old_end_point)
);
if (range->start_byte < edit->new_end_byte) {
range->start_byte = UINT32_MAX;
range->start_point = POINT_MAX;
}
} else if (range->start_byte > edit->start_byte) {
range->start_byte = edit->start_byte;
range->start_point = edit->start_point;
}
}
SubtreePool pool = ts_subtree_pool_new(0);
self->root = ts_subtree_edit(self->root, edit, &pool);
ts_subtree_pool_delete(&pool);
}
TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length) {
*length = self->included_range_count;
TSRange *ranges = ts_calloc(self->included_range_count, sizeof(TSRange));
memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(TSRange));
return ranges;
}
TSRange *ts_tree_get_changed_ranges(const TSTree *old_tree, const TSTree *new_tree, uint32_t *length) {
TreeCursor cursor1 = {NULL, array_new(), 0};
TreeCursor cursor2 = {NULL, array_new(), 0};
ts_tree_cursor_init(&cursor1, ts_tree_root_node(old_tree));
ts_tree_cursor_init(&cursor2, ts_tree_root_node(new_tree));
TSRangeArray included_range_differences = array_new();
ts_range_array_get_changed_ranges(
old_tree->included_ranges, old_tree->included_range_count,
new_tree->included_ranges, new_tree->included_range_count,
&included_range_differences
);
TSRange *result;
*length = ts_subtree_get_changed_ranges(
&old_tree->root, &new_tree->root, &cursor1, &cursor2,
old_tree->language, &included_range_differences, &result
);
array_delete(&included_range_differences);
array_delete(&cursor1.stack);
array_delete(&cursor2.stack);
return result;
}
#ifdef _WIN32
#include <io.h>
#include <windows.h>
int _ts_dup(HANDLE handle) {
HANDLE dup_handle;
if (!DuplicateHandle(
GetCurrentProcess(), handle,
GetCurrentProcess(), &dup_handle,
0, FALSE, DUPLICATE_SAME_ACCESS
)) return -1;
return _open_osfhandle((intptr_t)dup_handle, 0);
}
void ts_tree_print_dot_graph(const TSTree *self, int fd) {
FILE *file = _fdopen(_ts_dup((HANDLE)_get_osfhandle(fd)), "a");
ts_subtree_print_dot_graph(self->root, self->language, file);
fclose(file);
}
#else
#include <unistd.h>
int _ts_dup(int file_descriptor) {
return dup(file_descriptor);
}
void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) {
FILE *file = fdopen(_ts_dup(file_descriptor), "a");
ts_subtree_print_dot_graph(self->root, self->language, file);
fclose(file);
}
#endif

31
vendor/tree-sitter/src/tree.h vendored Normal file
View File

@@ -0,0 +1,31 @@
#ifndef TREE_SITTER_TREE_H_
#define TREE_SITTER_TREE_H_
#include "./subtree.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
const Subtree *child;
const Subtree *parent;
Length position;
TSSymbol alias_symbol;
} ParentCacheEntry;
struct TSTree {
Subtree root;
const TSLanguage *language;
TSRange *included_ranges;
unsigned included_range_count;
};
TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *, unsigned);
TSNode ts_node_new(const TSTree *, const Subtree *, Length, TSSymbol);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_TREE_H_

714
vendor/tree-sitter/src/tree_cursor.c vendored Normal file
View File

@@ -0,0 +1,714 @@
#include "tree_sitter/api.h"
#include "./alloc.h"
#include "./tree_cursor.h"
#include "./language.h"
#include "./tree.h"
typedef struct {
Subtree parent;
const TSTree *tree;
Length position;
uint32_t child_index;
uint32_t structural_child_index;
uint32_t descendant_index;
const TSSymbol *alias_sequence;
} CursorChildIterator;
// CursorChildIterator
static inline bool ts_tree_cursor_is_entry_visible(const TreeCursor *self, uint32_t index) {
TreeCursorEntry *entry = &self->stack.contents[index];
if (index == 0 || ts_subtree_visible(*entry->subtree)) {
return true;
} else if (!ts_subtree_extra(*entry->subtree)) {
TreeCursorEntry *parent_entry = &self->stack.contents[index - 1];
return ts_language_alias_at(
self->tree->language,
parent_entry->subtree->ptr->production_id,
entry->structural_child_index
);
} else {
return false;
}
}
static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) {
TreeCursorEntry *last_entry = array_back(&self->stack);
if (ts_subtree_child_count(*last_entry->subtree) == 0) {
return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, 0, NULL};
}
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->tree->language,
last_entry->subtree->ptr->production_id
);
uint32_t descendant_index = last_entry->descendant_index;
if (ts_tree_cursor_is_entry_visible(self, self->stack.size - 1)) {
descendant_index += 1;
}
return (CursorChildIterator) {
.tree = self->tree,
.parent = *last_entry->subtree,
.position = last_entry->position,
.child_index = 0,
.structural_child_index = 0,
.descendant_index = descendant_index,
.alias_sequence = alias_sequence,
};
}
static inline bool ts_tree_cursor_child_iterator_next(
CursorChildIterator *self,
TreeCursorEntry *result,
bool *visible
) {
if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false;
const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
*result = (TreeCursorEntry) {
.subtree = child,
.position = self->position,
.child_index = self->child_index,
.structural_child_index = self->structural_child_index,
.descendant_index = self->descendant_index,
};
*visible = ts_subtree_visible(*child);
bool extra = ts_subtree_extra(*child);
if (!extra) {
if (self->alias_sequence) {
*visible |= self->alias_sequence[self->structural_child_index];
}
self->structural_child_index++;
}
self->descendant_index += ts_subtree_visible_descendant_count(*child);
if (*visible) {
self->descendant_index += 1;
}
self->position = length_add(self->position, ts_subtree_size(*child));
self->child_index++;
if (self->child_index < self->parent.ptr->child_count) {
Subtree next_child = ts_subtree_children(self->parent)[self->child_index];
self->position = length_add(self->position, ts_subtree_padding(next_child));
}
return true;
}
// Return a position that, when `b` is added to it, yields `a`. This
// can only be computed if `b` has zero rows. Otherwise, this function
// returns `LENGTH_UNDEFINED`, and the caller needs to recompute
// the position some other way.
static inline Length length_backtrack(Length a, Length b) {
if (length_is_undefined(a) || b.extent.row != 0) {
return LENGTH_UNDEFINED;
}
Length result;
result.bytes = a.bytes - b.bytes;
result.extent.row = a.extent.row;
result.extent.column = a.extent.column - b.extent.column;
return result;
}
static inline bool ts_tree_cursor_child_iterator_previous(
CursorChildIterator *self,
TreeCursorEntry *result,
bool *visible
) {
// this is mostly a reverse `ts_tree_cursor_child_iterator_next` taking into
// account unsigned underflow
if (!self->parent.ptr || (int8_t)self->child_index == -1) return false;
const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
*result = (TreeCursorEntry) {
.subtree = child,
.position = self->position,
.child_index = self->child_index,
.structural_child_index = self->structural_child_index,
};
*visible = ts_subtree_visible(*child);
bool extra = ts_subtree_extra(*child);
if (!extra && self->alias_sequence) {
*visible |= self->alias_sequence[self->structural_child_index];
self->structural_child_index--;
}
self->position = length_backtrack(self->position, ts_subtree_padding(*child));
self->child_index--;
// unsigned can underflow so compare it to child_count
if (self->child_index < self->parent.ptr->child_count) {
Subtree previous_child = ts_subtree_children(self->parent)[self->child_index];
Length size = ts_subtree_size(previous_child);
self->position = length_backtrack(self->position, size);
}
return true;
}
// TSTreeCursor - lifecycle
TSTreeCursor ts_tree_cursor_new(TSNode node) {
TSTreeCursor self = {NULL, NULL, {0, 0, 0}};
ts_tree_cursor_init((TreeCursor *)&self, node);
return self;
}
void ts_tree_cursor_reset(TSTreeCursor *_self, TSNode node) {
ts_tree_cursor_init((TreeCursor *)_self, node);
}
void ts_tree_cursor_init(TreeCursor *self, TSNode node) {
self->tree = node.tree;
self->root_alias_symbol = node.context[3];
array_clear(&self->stack);
array_push(&self->stack, ((TreeCursorEntry) {
.subtree = (const Subtree *)node.id,
.position = {
ts_node_start_byte(node),
ts_node_start_point(node)
},
.child_index = 0,
.structural_child_index = 0,
.descendant_index = 0,
}));
}
void ts_tree_cursor_delete(TSTreeCursor *_self) {
TreeCursor *self = (TreeCursor *)_self;
array_delete(&self->stack);
}
// TSTreeCursor - walking the tree
TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *_self) {
TreeCursor *self = (TreeCursor *)_self;
bool visible;
TreeCursorEntry entry;
CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
if (visible) {
array_push(&self->stack, entry);
return TreeCursorStepVisible;
}
if (ts_subtree_visible_child_count(*entry.subtree) > 0) {
array_push(&self->stack, entry);
return TreeCursorStepHidden;
}
}
return TreeCursorStepNone;
}
bool ts_tree_cursor_goto_first_child(TSTreeCursor *self) {
for (;;) {
switch (ts_tree_cursor_goto_first_child_internal(self)) {
case TreeCursorStepHidden:
continue;
case TreeCursorStepVisible:
return true;
default:
return false;
}
}
return false;
}
TreeCursorStep ts_tree_cursor_goto_last_child_internal(TSTreeCursor *_self) {
TreeCursor *self = (TreeCursor *)_self;
bool visible;
TreeCursorEntry entry;
CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
if (!iterator.parent.ptr || iterator.parent.ptr->child_count == 0) return TreeCursorStepNone;
TreeCursorEntry last_entry = {0};
TreeCursorStep last_step = TreeCursorStepNone;
while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
if (visible) {
last_entry = entry;
last_step = TreeCursorStepVisible;
}
else if (ts_subtree_visible_child_count(*entry.subtree) > 0) {
last_entry = entry;
last_step = TreeCursorStepHidden;
}
}
if (last_entry.subtree) {
array_push(&self->stack, last_entry);
return last_step;
}
return TreeCursorStepNone;
}
bool ts_tree_cursor_goto_last_child(TSTreeCursor *self) {
for (;;) {
switch (ts_tree_cursor_goto_last_child_internal(self)) {
case TreeCursorStepHidden:
continue;
case TreeCursorStepVisible:
return true;
default:
return false;
}
}
return false;
}
static inline int64_t ts_tree_cursor_goto_first_child_for_byte_and_point(
TSTreeCursor *_self,
uint32_t goal_byte,
TSPoint goal_point
) {
TreeCursor *self = (TreeCursor *)_self;
uint32_t initial_size = self->stack.size;
uint32_t visible_child_index = 0;
bool did_descend;
do {
did_descend = false;
bool visible;
TreeCursorEntry entry;
CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
Length entry_end = length_add(entry.position, ts_subtree_size(*entry.subtree));
bool at_goal = entry_end.bytes >= goal_byte && point_gte(entry_end.extent, goal_point);
uint32_t visible_child_count = ts_subtree_visible_child_count(*entry.subtree);
if (at_goal) {
if (visible) {
array_push(&self->stack, entry);
return visible_child_index;
}
if (visible_child_count > 0) {
array_push(&self->stack, entry);
did_descend = true;
break;
}
} else if (visible) {
visible_child_index++;
} else {
visible_child_index += visible_child_count;
}
}
} while (did_descend);
self->stack.size = initial_size;
return -1;
}
int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *self, uint32_t goal_byte) {
return ts_tree_cursor_goto_first_child_for_byte_and_point(self, goal_byte, POINT_ZERO);
}
int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, TSPoint goal_point) {
return ts_tree_cursor_goto_first_child_for_byte_and_point(self, 0, goal_point);
}
TreeCursorStep ts_tree_cursor_goto_sibling_internal(
TSTreeCursor *_self,
bool (*advance)(CursorChildIterator *, TreeCursorEntry *, bool *)) {
TreeCursor *self = (TreeCursor *)_self;
uint32_t initial_size = self->stack.size;
while (self->stack.size > 1) {
TreeCursorEntry entry = array_pop(&self->stack);
CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
iterator.child_index = entry.child_index;
iterator.structural_child_index = entry.structural_child_index;
iterator.position = entry.position;
iterator.descendant_index = entry.descendant_index;
bool visible = false;
advance(&iterator, &entry, &visible);
if (visible && self->stack.size + 1 < initial_size) break;
while (advance(&iterator, &entry, &visible)) {
if (visible) {
array_push(&self->stack, entry);
return TreeCursorStepVisible;
}
if (ts_subtree_visible_child_count(*entry.subtree)) {
array_push(&self->stack, entry);
return TreeCursorStepHidden;
}
}
}
self->stack.size = initial_size;
return TreeCursorStepNone;
}
TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self) {
return ts_tree_cursor_goto_sibling_internal(_self, ts_tree_cursor_child_iterator_next);
}
bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) {
switch (ts_tree_cursor_goto_next_sibling_internal(self)) {
case TreeCursorStepHidden:
ts_tree_cursor_goto_first_child(self);
return true;
case TreeCursorStepVisible:
return true;
default:
return false;
}
}
TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(TSTreeCursor *_self) {
// since subtracting across row loses column information, we may have to
// restore it
TreeCursor *self = (TreeCursor *)_self;
// for that, save current position before traversing
TreeCursorStep step = ts_tree_cursor_goto_sibling_internal(
_self, ts_tree_cursor_child_iterator_previous);
if (step == TreeCursorStepNone)
return step;
// if length is already valid, there's no need to recompute it
if (!length_is_undefined(array_back(&self->stack)->position))
return step;
// restore position from the parent node
const TreeCursorEntry *parent = &self->stack.contents[self->stack.size - 2];
Length position = parent->position;
uint32_t child_index = array_back(&self->stack)->child_index;
const Subtree *children = ts_subtree_children((*(parent->subtree)));
if (child_index > 0) {
// skip first child padding since its position should match the position of the parent
position = length_add(position, ts_subtree_size(children[0]));
for (uint32_t i = 1; i < child_index; ++i) {
position = length_add(position, ts_subtree_total_size(children[i]));
}
position = length_add(position, ts_subtree_padding(children[child_index]));
}
array_back(&self->stack)->position = position;
return step;
}
bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self) {
switch (ts_tree_cursor_goto_previous_sibling_internal(self)) {
case TreeCursorStepHidden:
ts_tree_cursor_goto_last_child(self);
return true;
case TreeCursorStepVisible:
return true;
default:
return false;
}
}
bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) {
TreeCursor *self = (TreeCursor *)_self;
for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) {
if (ts_tree_cursor_is_entry_visible(self, i)) {
self->stack.size = i + 1;
return true;
}
}
return false;
}
void ts_tree_cursor_goto_descendant(
TSTreeCursor *_self,
uint32_t goal_descendant_index
) {
TreeCursor *self = (TreeCursor *)_self;
// Ascend to the lowest ancestor that contains the goal node.
for (;;) {
uint32_t i = self->stack.size - 1;
TreeCursorEntry *entry = &self->stack.contents[i];
uint32_t next_descendant_index =
entry->descendant_index +
(ts_tree_cursor_is_entry_visible(self, i) ? 1 : 0) +
ts_subtree_visible_descendant_count(*entry->subtree);
if (
(entry->descendant_index <= goal_descendant_index) &&
(next_descendant_index > goal_descendant_index)
) {
break;
} else if (self->stack.size <= 1) {
return;
} else {
self->stack.size--;
}
}
// Descend to the goal node.
bool did_descend = true;
do {
did_descend = false;
bool visible;
TreeCursorEntry entry;
CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
if (iterator.descendant_index > goal_descendant_index) {
return;
}
while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
if (iterator.descendant_index > goal_descendant_index) {
array_push(&self->stack, entry);
if (visible && entry.descendant_index == goal_descendant_index) {
return;
} else {
did_descend = true;
break;
}
}
}
} while (did_descend);
}
uint32_t ts_tree_cursor_current_descendant_index(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self;
TreeCursorEntry *last_entry = array_back(&self->stack);
return last_entry->descendant_index;
}
TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self;
TreeCursorEntry *last_entry = array_back(&self->stack);
TSSymbol alias_symbol = self->root_alias_symbol;
if (self->stack.size > 1 && !ts_subtree_extra(*last_entry->subtree)) {
TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2];
alias_symbol = ts_language_alias_at(
self->tree->language,
parent_entry->subtree->ptr->production_id,
last_entry->structural_child_index
);
}
return ts_node_new(
self->tree,
last_entry->subtree,
last_entry->position,
alias_symbol
);
}
// Private - Get various facts about the current node that are needed
// when executing tree queries.
void ts_tree_cursor_current_status(
const TSTreeCursor *_self,
TSFieldId *field_id,
bool *has_later_siblings,
bool *has_later_named_siblings,
bool *can_have_later_siblings_with_this_field,
TSSymbol *supertypes,
unsigned *supertype_count
) {
const TreeCursor *self = (const TreeCursor *)_self;
unsigned max_supertypes = *supertype_count;
*field_id = 0;
*supertype_count = 0;
*has_later_siblings = false;
*has_later_named_siblings = false;
*can_have_later_siblings_with_this_field = false;
// Walk up the tree, visiting the current node and its invisible ancestors,
// because fields can refer to nodes through invisible *wrapper* nodes,
for (unsigned i = self->stack.size - 1; i > 0; i--) {
TreeCursorEntry *entry = &self->stack.contents[i];
TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->tree->language,
parent_entry->subtree->ptr->production_id
);
#define subtree_symbol(subtree, structural_child_index) \
(( \
!ts_subtree_extra(subtree) && \
alias_sequence && \
alias_sequence[structural_child_index] \
) ? \
alias_sequence[structural_child_index] : \
ts_subtree_symbol(subtree))
// Stop walking up when a visible ancestor is found.
TSSymbol entry_symbol = subtree_symbol(
*entry->subtree,
entry->structural_child_index
);
TSSymbolMetadata entry_metadata = ts_language_symbol_metadata(
self->tree->language,
entry_symbol
);
if (i != self->stack.size - 1 && entry_metadata.visible) break;
// Record any supertypes
if (entry_metadata.supertype && *supertype_count < max_supertypes) {
supertypes[*supertype_count] = entry_symbol;
(*supertype_count)++;
}
// Determine if the current node has later siblings.
if (!*has_later_siblings) {
unsigned sibling_count = parent_entry->subtree->ptr->child_count;
unsigned structural_child_index = entry->structural_child_index;
if (!ts_subtree_extra(*entry->subtree)) structural_child_index++;
for (unsigned j = entry->child_index + 1; j < sibling_count; j++) {
Subtree sibling = ts_subtree_children(*parent_entry->subtree)[j];
TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata(
self->tree->language,
subtree_symbol(sibling, structural_child_index)
);
if (sibling_metadata.visible) {
*has_later_siblings = true;
if (*has_later_named_siblings) break;
if (sibling_metadata.named) {
*has_later_named_siblings = true;
break;
}
} else if (ts_subtree_visible_child_count(sibling) > 0) {
*has_later_siblings = true;
if (*has_later_named_siblings) break;
if (sibling.ptr->named_child_count > 0) {
*has_later_named_siblings = true;
break;
}
}
if (!ts_subtree_extra(sibling)) structural_child_index++;
}
}
#undef subtree_symbol
if (!ts_subtree_extra(*entry->subtree)) {
const TSFieldMapEntry *field_map, *field_map_end;
ts_language_field_map(
self->tree->language,
parent_entry->subtree->ptr->production_id,
&field_map, &field_map_end
);
// Look for a field name associated with the current node.
if (!*field_id) {
for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
if (!map->inherited && map->child_index == entry->structural_child_index) {
*field_id = map->field_id;
break;
}
}
}
// Determine if the current node can have later siblings with the same field name.
if (*field_id) {
for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
if (
map->field_id == *field_id &&
map->child_index > entry->structural_child_index
) {
*can_have_later_siblings_with_this_field = true;
break;
}
}
}
}
}
}
uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self;
uint32_t depth = 0;
for (unsigned i = 1; i < self->stack.size; i++) {
if (ts_tree_cursor_is_entry_visible(self, i)) {
depth++;
}
}
return depth;
}
TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self;
for (int i = (int)self->stack.size - 2; i >= 0; i--) {
TreeCursorEntry *entry = &self->stack.contents[i];
bool is_visible = true;
TSSymbol alias_symbol = 0;
if (i > 0) {
TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
alias_symbol = ts_language_alias_at(
self->tree->language,
parent_entry->subtree->ptr->production_id,
entry->structural_child_index
);
is_visible = (alias_symbol != 0) || ts_subtree_visible(*entry->subtree);
}
if (is_visible) {
return ts_node_new(
self->tree,
entry->subtree,
entry->position,
alias_symbol
);
}
}
return ts_node_new(NULL, NULL, length_zero(), 0);
}
TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self;
// Walk up the tree, visiting the current node and its invisible ancestors.
for (unsigned i = self->stack.size - 1; i > 0; i--) {
TreeCursorEntry *entry = &self->stack.contents[i];
TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
// Stop walking up when another visible node is found.
if (
i != self->stack.size - 1 &&
ts_tree_cursor_is_entry_visible(self, i)
) break;
if (ts_subtree_extra(*entry->subtree)) break;
const TSFieldMapEntry *field_map, *field_map_end;
ts_language_field_map(
self->tree->language,
parent_entry->subtree->ptr->production_id,
&field_map, &field_map_end
);
for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
if (!map->inherited && map->child_index == entry->structural_child_index) {
return map->field_id;
}
}
}
return 0;
}
const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) {
TSFieldId id = ts_tree_cursor_current_field_id(_self);
if (id) {
const TreeCursor *self = (const TreeCursor *)_self;
return self->tree->language->field_names[id];
} else {
return NULL;
}
}
TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) {
const TreeCursor *cursor = (const TreeCursor *)_cursor;
TSTreeCursor res = {NULL, NULL, {0, 0}};
TreeCursor *copy = (TreeCursor *)&res;
copy->tree = cursor->tree;
copy->root_alias_symbol = cursor->root_alias_symbol;
array_init(&copy->stack);
array_push_all(&copy->stack, &cursor->stack);
return res;
}
void ts_tree_cursor_reset_to(TSTreeCursor *_dst, const TSTreeCursor *_src) {
const TreeCursor *cursor = (const TreeCursor *)_src;
TreeCursor *copy = (TreeCursor *)_dst;
copy->tree = cursor->tree;
copy->root_alias_symbol = cursor->root_alias_symbol;
array_clear(&copy->stack);
array_push_all(&copy->stack, &cursor->stack);
}

48
vendor/tree-sitter/src/tree_cursor.h vendored Normal file
View File

@@ -0,0 +1,48 @@
#ifndef TREE_SITTER_TREE_CURSOR_H_
#define TREE_SITTER_TREE_CURSOR_H_
#include "./subtree.h"
typedef struct {
const Subtree *subtree;
Length position;
uint32_t child_index;
uint32_t structural_child_index;
uint32_t descendant_index;
} TreeCursorEntry;
typedef struct {
const TSTree *tree;
Array(TreeCursorEntry) stack;
TSSymbol root_alias_symbol;
} TreeCursor;
typedef enum {
TreeCursorStepNone,
TreeCursorStepHidden,
TreeCursorStepVisible,
} TreeCursorStep;
void ts_tree_cursor_init(TreeCursor *, TSNode);
void ts_tree_cursor_current_status(
const TSTreeCursor *,
TSFieldId *,
bool *,
bool *,
bool *,
TSSymbol *,
unsigned *
);
TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *);
TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *);
static inline Subtree ts_tree_cursor_current_subtree(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self;
TreeCursorEntry *last_entry = array_back(&self->stack);
return *last_entry->subtree;
}
TSNode ts_tree_cursor_parent_node(const TSTreeCursor *);
#endif // TREE_SITTER_TREE_CURSOR_H_

50
vendor/tree-sitter/src/unicode.h vendored Normal file
View File

@@ -0,0 +1,50 @@
#ifndef TREE_SITTER_UNICODE_H_
#define TREE_SITTER_UNICODE_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <limits.h>
#include <stdint.h>
#define U_EXPORT
#define U_EXPORT2
#include "unicode/utf8.h"
#include "unicode/utf16.h"
static const int32_t TS_DECODE_ERROR = U_SENTINEL;
// These functions read one unicode code point from the given string,
// returning the number of bytes consumed.
typedef uint32_t (*UnicodeDecodeFunction)(
const uint8_t *string,
uint32_t length,
int32_t *code_point
);
static inline uint32_t ts_decode_utf8(
const uint8_t *string,
uint32_t length,
int32_t *code_point
) {
uint32_t i = 0;
U8_NEXT(string, i, length, *code_point);
return i;
}
static inline uint32_t ts_decode_utf16(
const uint8_t *string,
uint32_t length,
int32_t *code_point
) {
uint32_t i = 0;
U16_NEXT(((uint16_t *)string), i, length, *code_point);
return i * 2;
}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_UNICODE_H_

View File

@@ -0,0 +1 @@
552b01f61127d30d6589aa4bf99468224979b661

414
vendor/tree-sitter/src/unicode/LICENSE vendored Normal file
View File

@@ -0,0 +1,414 @@
COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later)
Copyright © 1991-2019 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Unicode data files and any associated documentation
(the "Data Files") or Unicode software and any associated documentation
(the "Software") to deal in the Data Files or Software
without restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, and/or sell copies of
the Data Files or Software, and to permit persons to whom the Data Files
or Software are furnished to do so, provided that either
(a) this copyright and permission notice appear with all copies
of the Data Files or Software, or
(b) this copyright and permission notice appear in associated
Documentation.
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
Except as contained in this notice, the name of a copyright holder
shall not be used in advertising or otherwise to promote the sale,
use or other dealings in these Data Files or Software without prior
written authorization of the copyright holder.
---------------------
Third-Party Software Licenses
This section contains third-party software notices and/or additional
terms for licensed third-party software components included within ICU
libraries.
1. ICU License - ICU 1.8.1 to ICU 57.1
COPYRIGHT AND PERMISSION NOTICE
Copyright (c) 1995-2016 International Business Machines Corporation and others
All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, and/or sell copies of the Software, and to permit persons
to whom the Software is furnished to do so, provided that the above
copyright notice(s) and this permission notice appear in all copies of
the Software and that both the above copyright notice(s) and this
permission notice appear in supporting documentation.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY
SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
Except as contained in this notice, the name of a copyright holder
shall not be used in advertising or otherwise to promote the sale, use
or other dealings in this Software without prior written authorization
of the copyright holder.
All trademarks and registered trademarks mentioned herein are the
property of their respective owners.
2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt)
# The Google Chrome software developed by Google is licensed under
# the BSD license. Other software included in this distribution is
# provided under other licenses, as set forth below.
#
# The BSD License
# http://opensource.org/licenses/bsd-license.php
# Copyright (C) 2006-2008, Google Inc.
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided with
# the distribution.
# Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# The word list in cjdict.txt are generated by combining three word lists
# listed below with further processing for compound word breaking. The
# frequency is generated with an iterative training against Google web
# corpora.
#
# * Libtabe (Chinese)
# - https://sourceforge.net/project/?group_id=1519
# - Its license terms and conditions are shown below.
#
# * IPADIC (Japanese)
# - http://chasen.aist-nara.ac.jp/chasen/distribution.html
# - Its license terms and conditions are shown below.
#
# ---------COPYING.libtabe ---- BEGIN--------------------
#
# /*
# * Copyright (c) 1999 TaBE Project.
# * Copyright (c) 1999 Pai-Hsiang Hsiao.
# * All rights reserved.
# *
# * Redistribution and use in source and binary forms, with or without
# * modification, are permitted provided that the following conditions
# * are met:
# *
# * . Redistributions of source code must retain the above copyright
# * notice, this list of conditions and the following disclaimer.
# * . Redistributions in binary form must reproduce the above copyright
# * notice, this list of conditions and the following disclaimer in
# * the documentation and/or other materials provided with the
# * distribution.
# * . Neither the name of the TaBE Project nor the names of its
# * contributors may be used to endorse or promote products derived
# * from this software without specific prior written permission.
# *
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# * OF THE POSSIBILITY OF SUCH DAMAGE.
# */
#
# /*
# * Copyright (c) 1999 Computer Systems and Communication Lab,
# * Institute of Information Science, Academia
# * Sinica. All rights reserved.
# *
# * Redistribution and use in source and binary forms, with or without
# * modification, are permitted provided that the following conditions
# * are met:
# *
# * . Redistributions of source code must retain the above copyright
# * notice, this list of conditions and the following disclaimer.
# * . Redistributions in binary form must reproduce the above copyright
# * notice, this list of conditions and the following disclaimer in
# * the documentation and/or other materials provided with the
# * distribution.
# * . Neither the name of the Computer Systems and Communication Lab
# * nor the names of its contributors may be used to endorse or
# * promote products derived from this software without specific
# * prior written permission.
# *
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# * OF THE POSSIBILITY OF SUCH DAMAGE.
# */
#
# Copyright 1996 Chih-Hao Tsai @ Beckman Institute,
# University of Illinois
# c-tsai4@uiuc.edu http://casper.beckman.uiuc.edu/~c-tsai4
#
# ---------------COPYING.libtabe-----END--------------------------------
#
#
# ---------------COPYING.ipadic-----BEGIN-------------------------------
#
# Copyright 2000, 2001, 2002, 2003 Nara Institute of Science
# and Technology. All Rights Reserved.
#
# Use, reproduction, and distribution of this software is permitted.
# Any copy of this software, whether in its original form or modified,
# must include both the above copyright notice and the following
# paragraphs.
#
# Nara Institute of Science and Technology (NAIST),
# the copyright holders, disclaims all warranties with regard to this
# software, including all implied warranties of merchantability and
# fitness, in no event shall NAIST be liable for
# any special, indirect or consequential damages or any damages
# whatsoever resulting from loss of use, data or profits, whether in an
# action of contract, negligence or other tortuous action, arising out
# of or in connection with the use or performance of this software.
#
# A large portion of the dictionary entries
# originate from ICOT Free Software. The following conditions for ICOT
# Free Software applies to the current dictionary as well.
#
# Each User may also freely distribute the Program, whether in its
# original form or modified, to any third party or parties, PROVIDED
# that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
# on, or be attached to, the Program, which is distributed substantially
# in the same form as set out herein and that such intended
# distribution, if actually made, will neither violate or otherwise
# contravene any of the laws and regulations of the countries having
# jurisdiction over the User or the intended distribution itself.
#
# NO WARRANTY
#
# The program was produced on an experimental basis in the course of the
# research and development conducted during the project and is provided
# to users as so produced on an experimental basis. Accordingly, the
# program is provided without any warranty whatsoever, whether express,
# implied, statutory or otherwise. The term "warranty" used herein
# includes, but is not limited to, any warranty of the quality,
# performance, merchantability and fitness for a particular purpose of
# the program and the nonexistence of any infringement or violation of
# any right of any third party.
#
# Each user of the program will agree and understand, and be deemed to
# have agreed and understood, that there is no warranty whatsoever for
# the program and, accordingly, the entire risk arising from or
# otherwise connected with the program is assumed by the user.
#
# Therefore, neither ICOT, the copyright holder, or any other
# organization that participated in or was otherwise related to the
# development of the program and their respective officials, directors,
# officers and other employees shall be held liable for any and all
# damages, including, without limitation, general, special, incidental
# and consequential damages, arising out of or otherwise in connection
# with the use or inability to use the program or any product, material
# or result produced or otherwise obtained by using the program,
# regardless of whether they have been advised of, or otherwise had
# knowledge of, the possibility of such damages at any time during the
# project or thereafter. Each user will be deemed to have agreed to the
# foregoing by his or her commencement of use of the program. The term
# "use" as used herein includes, but is not limited to, the use,
# modification, copying and distribution of the program and the
# production of secondary products from the program.
#
# In the case where the program, whether in its original form or
# modified, was distributed or delivered to or received by a user from
# any person, organization or entity other than ICOT, unless it makes or
# grants independently of ICOT any specific warranty to the user in
# writing, such person, organization or entity, will also be exempted
# from and not be held liable to the user for any such damages as noted
# above as far as the program is concerned.
#
# ---------------COPYING.ipadic-----END----------------------------------
3. Lao Word Break Dictionary Data (laodict.txt)
# Copyright (c) 2013 International Business Machines Corporation
# and others. All Rights Reserved.
#
# Project: http://code.google.com/p/lao-dictionary/
# Dictionary: http://lao-dictionary.googlecode.com/git/Lao-Dictionary.txt
# License: http://lao-dictionary.googlecode.com/git/Lao-Dictionary-LICENSE.txt
# (copied below)
#
# This file is derived from the above dictionary, with slight
# modifications.
# ----------------------------------------------------------------------
# Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification,
# are permitted provided that the following conditions are met:
#
#
# Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer. Redistributions in
# binary form must reproduce the above copyright notice, this list of
# conditions and the following disclaimer in the documentation and/or
# other materials provided with the distribution.
#
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# OF THE POSSIBILITY OF SUCH DAMAGE.
# --------------------------------------------------------------------------
4. Burmese Word Break Dictionary Data (burmesedict.txt)
# Copyright (c) 2014 International Business Machines Corporation
# and others. All Rights Reserved.
#
# This list is part of a project hosted at:
# github.com/kanyawtech/myanmar-karen-word-lists
#
# --------------------------------------------------------------------------
# Copyright (c) 2013, LeRoy Benjamin Sharon
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met: Redistributions of source code must retain the above
# copyright notice, this list of conditions and the following
# disclaimer. Redistributions in binary form must reproduce the
# above copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.
#
# Neither the name Myanmar Karen Word Lists, nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
# --------------------------------------------------------------------------
5. Time Zone Database
ICU uses the public domain data and code derived from Time Zone
Database for its time zone support. The ownership of the TZ database
is explained in BCP 175: Procedure for Maintaining the Time Zone
Database section 7.
# 7. Database Ownership
#
# The TZ database itself is not an IETF Contribution or an IETF
# document. Rather it is a pre-existing and regularly updated work
# that is in the public domain, and is intended to remain in the
# public domain. Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do
# not apply to the TZ Database or contributions that individuals make
# to it. Should any claims be made and substantiated against the TZ
# Database, the organization that is providing the IANA
# Considerations defined in this RFC, under the memorandum of
# understanding with the IETF, currently ICANN, may act in accordance
# with all competent court orders. No ownership claims will be made
# by ICANN or the IETF Trust on the database or the code. Any person
# making a contribution to the database or code waives all rights to
# future claims in that contribution or in the TZ Database.
6. Google double-conversion
Copyright 2006-2011, the V8 project authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,29 @@
# ICU Parts
This directory contains a small subset of files from the Unicode organization's [ICU repository](https://github.com/unicode-org/icu).
### License
The license for these files is contained in the `LICENSE` file within this directory.
### Contents
* Source files taken from the [`icu4c/source/common/unicode`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c/source/common/unicode) directory:
* `utf8.h`
* `utf16.h`
* `umachine.h`
* Empty source files that are referenced by the above source files, but whose original contents in `libicu` are not needed:
* `ptypes.h`
* `urename.h`
* `utf.h`
* `ICU_SHA` - File containing the Git SHA of the commit in the `icu` repository from which the files were obtained.
* `LICENSE` - The license file from the [`icu4c`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c) directory of the `icu` repository.
* `README.md` - This text file.
### Updating ICU
To incorporate changes from the upstream `icu` repository:
* Update `ICU_SHA` with the new Git SHA.
* Update `LICENSE` with the license text from the directory mentioned above.
* Update `utf8.h`, `utf16.h`, and `umachine.h` with their new contents in the `icu` repository.

View File

@@ -0,0 +1 @@
// This file must exist in order for `utf8.h` and `utf16.h` to be used.

View File

@@ -0,0 +1,448 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
* Copyright (C) 1999-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: umachine.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999sep13
* created by: Markus W. Scherer
*
* This file defines basic types and constants for ICU to be
* platform-independent. umachine.h and utf.h are included into
* utypes.h to provide all the general definitions for ICU.
* All of these definitions used to be in utypes.h before
* the UTF-handling macros made this unmaintainable.
*/
#ifndef __UMACHINE_H__
#define __UMACHINE_H__
/**
* \file
* \brief Basic types and constants for UTF
*
* <h2> Basic types and constants for UTF </h2>
* This file defines basic types and constants for utf.h to be
* platform-independent. umachine.h and utf.h are included into
* utypes.h to provide all the general definitions for ICU.
* All of these definitions used to be in utypes.h before
* the UTF-handling macros made this unmaintainable.
*
*/
/*==========================================================================*/
/* Include platform-dependent definitions */
/* which are contained in the platform-specific file platform.h */
/*==========================================================================*/
#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
/*
* ANSI C headers:
* stddef.h defines wchar_t
*/
#include <stddef.h>
/*==========================================================================*/
/* For C wrappers, we use the symbol U_STABLE. */
/* This works properly if the includer is C or C++. */
/* Functions are declared U_STABLE return-type U_EXPORT2 function-name()... */
/*==========================================================================*/
/**
* \def U_CFUNC
* This is used in a declaration of a library private ICU C function.
* @stable ICU 2.4
*/
/**
* \def U_CDECL_BEGIN
* This is used to begin a declaration of a library private ICU C API.
* @stable ICU 2.4
*/
/**
* \def U_CDECL_END
* This is used to end a declaration of a library private ICU C API
* @stable ICU 2.4
*/
#ifdef __cplusplus
# define U_CFUNC extern "C"
# define U_CDECL_BEGIN extern "C" {
# define U_CDECL_END }
#else
# define U_CFUNC extern
# define U_CDECL_BEGIN
# define U_CDECL_END
#endif
#ifndef U_ATTRIBUTE_DEPRECATED
/**
* \def U_ATTRIBUTE_DEPRECATED
* This is used for GCC specific attributes
* @internal
*/
#if U_GCC_MAJOR_MINOR >= 302
# define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
/**
* \def U_ATTRIBUTE_DEPRECATED
* This is used for Visual C++ specific attributes
* @internal
*/
#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
# define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
#else
# define U_ATTRIBUTE_DEPRECATED
#endif
#endif
/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
#define U_CAPI U_CFUNC U_EXPORT
/** This is used to declare a function as a stable public ICU C API*/
#define U_STABLE U_CAPI
/** This is used to declare a function as a draft public ICU C API */
#define U_DRAFT U_CAPI
/** This is used to declare a function as a deprecated public ICU C API */
#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
/** This is used to declare a function as an obsolete public ICU C API */
#define U_OBSOLETE U_CAPI
/** This is used to declare a function as an internal ICU C API */
#define U_INTERNAL U_CAPI
/**
* \def U_OVERRIDE
* Defined to the C++11 "override" keyword if available.
* Denotes a class or member which is an override of the base class.
* May result in an error if it applied to something not an override.
* @internal
*/
#ifndef U_OVERRIDE
#define U_OVERRIDE override
#endif
/**
* \def U_FINAL
* Defined to the C++11 "final" keyword if available.
* Denotes a class or member which may not be overridden in subclasses.
* May result in an error if subclasses attempt to override.
* @internal
*/
#if !defined(U_FINAL) || defined(U_IN_DOXYGEN)
#define U_FINAL final
#endif
// Before ICU 65, function-like, multi-statement ICU macros were just defined as
// series of statements wrapped in { } blocks and the caller could choose to
// either treat them as if they were actual functions and end the invocation
// with a trailing ; creating an empty statement after the block or else omit
// this trailing ; using the knowledge that the macro would expand to { }.
//
// But doing so doesn't work well with macros that look like functions and
// compiler warnings about empty statements (ICU-20601) and ICU 65 therefore
// switches to the standard solution of wrapping such macros in do { } while.
//
// This will however break existing code that depends on being able to invoke
// these macros without a trailing ; so to be able to remain compatible with
// such code the wrapper is itself defined as macros so that it's possible to
// build ICU 65 and later with the old macro behaviour, like this:
//
// CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""'
// runConfigureICU ...
/**
* \def UPRV_BLOCK_MACRO_BEGIN
* Defined as the "do" keyword by default.
* @internal
*/
#ifndef UPRV_BLOCK_MACRO_BEGIN
#define UPRV_BLOCK_MACRO_BEGIN do
#endif
/**
* \def UPRV_BLOCK_MACRO_END
* Defined as "while (FALSE)" by default.
* @internal
*/
#ifndef UPRV_BLOCK_MACRO_END
#define UPRV_BLOCK_MACRO_END while (FALSE)
#endif
/*==========================================================================*/
/* limits for int32_t etc., like in POSIX inttypes.h */
/*==========================================================================*/
#ifndef INT8_MIN
/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
# define INT8_MIN ((int8_t)(-128))
#endif
#ifndef INT16_MIN
/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
# define INT16_MIN ((int16_t)(-32767-1))
#endif
#ifndef INT32_MIN
/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
# define INT32_MIN ((int32_t)(-2147483647-1))
#endif
#ifndef INT8_MAX
/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
# define INT8_MAX ((int8_t)(127))
#endif
#ifndef INT16_MAX
/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
# define INT16_MAX ((int16_t)(32767))
#endif
#ifndef INT32_MAX
/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
# define INT32_MAX ((int32_t)(2147483647))
#endif
#ifndef UINT8_MAX
/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
# define UINT8_MAX ((uint8_t)(255U))
#endif
#ifndef UINT16_MAX
/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
# define UINT16_MAX ((uint16_t)(65535U))
#endif
#ifndef UINT32_MAX
/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
# define UINT32_MAX ((uint32_t)(4294967295U))
#endif
#if defined(U_INT64_T_UNAVAILABLE)
# error int64_t is required for decimal format and rule-based number format.
#else
# ifndef INT64_C
/**
* Provides a platform independent way to specify a signed 64-bit integer constant.
* note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
* @stable ICU 2.8
*/
# define INT64_C(c) c ## LL
# endif
# ifndef UINT64_C
/**
* Provides a platform independent way to specify an unsigned 64-bit integer constant.
* note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
* @stable ICU 2.8
*/
# define UINT64_C(c) c ## ULL
# endif
# ifndef U_INT64_MIN
/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1))
# endif
# ifndef U_INT64_MAX
/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807)))
# endif
# ifndef U_UINT64_MAX
/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615)))
# endif
#endif
/*==========================================================================*/
/* Boolean data type */
/*==========================================================================*/
/** The ICU boolean type @stable ICU 2.0 */
typedef int8_t UBool;
#ifndef TRUE
/** The TRUE value of a UBool @stable ICU 2.0 */
# define TRUE 1
#endif
#ifndef FALSE
/** The FALSE value of a UBool @stable ICU 2.0 */
# define FALSE 0
#endif
/*==========================================================================*/
/* Unicode data types */
/*==========================================================================*/
/* wchar_t-related definitions -------------------------------------------- */
/*
* \def U_WCHAR_IS_UTF16
* Defined if wchar_t uses UTF-16.
*
* @stable ICU 2.0
*/
/*
* \def U_WCHAR_IS_UTF32
* Defined if wchar_t uses UTF-32.
*
* @stable ICU 2.0
*/
#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
# ifdef __STDC_ISO_10646__
# if (U_SIZEOF_WCHAR_T==2)
# define U_WCHAR_IS_UTF16
# elif (U_SIZEOF_WCHAR_T==4)
# define U_WCHAR_IS_UTF32
# endif
# elif defined __UCS2__
# if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
# define U_WCHAR_IS_UTF16
# endif
# elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
# if (U_SIZEOF_WCHAR_T==4)
# define U_WCHAR_IS_UTF32
# endif
# elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
# define U_WCHAR_IS_UTF32
# elif U_PLATFORM_HAS_WIN32_API
# define U_WCHAR_IS_UTF16
# endif
#endif
/* UChar and UChar32 definitions -------------------------------------------- */
/** Number of bytes in a UChar. @stable ICU 2.0 */
#define U_SIZEOF_UCHAR 2
/**
* \def U_CHAR16_IS_TYPEDEF
* If 1, then char16_t is a typedef and not a real type (yet)
* @internal
*/
#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
// for AIX, uchar.h needs to be included
# include <uchar.h>
# define U_CHAR16_IS_TYPEDEF 1
#elif defined(_MSC_VER) && (_MSC_VER < 1900)
// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx
# define U_CHAR16_IS_TYPEDEF 1
#else
# define U_CHAR16_IS_TYPEDEF 0
#endif
/**
* \var UChar
*
* The base type for UTF-16 code units and pointers.
* Unsigned 16-bit integer.
* Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.
*
* UChar is configurable by defining the macro UCHAR_TYPE
* on the preprocessor or compiler command line:
* -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc.
* (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.)
* This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.
*
* The default is UChar=char16_t.
*
* C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.
*
* In C, char16_t is a simple typedef of uint_least16_t.
* ICU requires uint_least16_t=uint16_t for data memory mapping.
* On macOS, char16_t is not available because the uchar.h standard header is missing.
*
* @stable ICU 4.4
*/
#if 1
// #if 1 is normal. UChar defaults to char16_t in C++.
// For configuration testing of UChar=uint16_t temporarily change this to #if 0.
// The intltest Makefile #defines UCHAR_TYPE=char16_t,
// so we only #define it to uint16_t if it is undefined so far.
#elif !defined(UCHAR_TYPE)
# define UCHAR_TYPE uint16_t
#endif
#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
// Inside the ICU library code, never configurable.
typedef char16_t UChar;
#elif defined(UCHAR_TYPE)
typedef UCHAR_TYPE UChar;
#elif defined(__cplusplus)
typedef char16_t UChar;
#else
typedef uint16_t UChar;
#endif
/**
* \var OldUChar
* Default ICU 58 definition of UChar.
* A base type for UTF-16 code units and pointers.
* Unsigned 16-bit integer.
*
* Define OldUChar to be wchar_t if that is 16 bits wide.
* If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
*
* This makes the definition of OldUChar platform-dependent
* but allows direct string type compatibility with platforms with
* 16-bit wchar_t types.
*
* This is how UChar was defined in ICU 58, for transition convenience.
* Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
* The current UChar responds to UCHAR_TYPE but OldUChar does not.
*
* @stable ICU 59
*/
#if U_SIZEOF_WCHAR_T==2
typedef wchar_t OldUChar;
#elif defined(__CHAR16_TYPE__)
typedef __CHAR16_TYPE__ OldUChar;
#else
typedef uint16_t OldUChar;
#endif
/**
* Define UChar32 as a type for single Unicode code points.
* UChar32 is a signed 32-bit integer (same as int32_t).
*
* The Unicode code point range is 0..0x10ffff.
* All other values (negative or >=0x110000) are illegal as Unicode code points.
* They may be used as sentinel values to indicate "done", "error"
* or similar non-code point conditions.
*
* Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
* to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
* or else to be uint32_t.
* That is, the definition of UChar32 was platform-dependent.
*
* @see U_SENTINEL
* @stable ICU 2.4
*/
typedef int32_t UChar32;
/**
* This value is intended for sentinel values for APIs that
* (take or) return single code points (UChar32).
* It is outside of the Unicode code point range 0..0x10ffff.
*
* For example, a "done" or "error" value in a new API
* could be indicated with U_SENTINEL.
*
* ICU APIs designed before ICU 2.4 usually define service-specific "done"
* values, mostly 0xffff.
* Those may need to be distinguished from
* actual U+ffff text contents by calling functions like
* CharacterIterator::hasNext() or UnicodeString::length().
*
* @return -1
* @see UChar32
* @stable ICU 2.4
*/
#define U_SENTINEL (-1)
#include "unicode/urename.h"
#endif

View File

@@ -0,0 +1 @@
// This file must exist in order for `utf8.h` and `utf16.h` to be used.

1
vendor/tree-sitter/src/unicode/utf.h vendored Normal file
View File

@@ -0,0 +1 @@
// This file must exist in order for `utf8.h` and `utf16.h` to be used.

733
vendor/tree-sitter/src/unicode/utf16.h vendored Normal file
View File

@@ -0,0 +1,733 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 1999-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: utf16.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999sep09
* created by: Markus W. Scherer
*/
/**
* \file
* \brief C API: 16-bit Unicode handling macros
*
* This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
*
* For more information see utf.h and the ICU User Guide Strings chapter
* (http://userguide.icu-project.org/strings).
*
* <em>Usage:</em>
* ICU coding guidelines for if() statements should be followed when using these macros.
* Compound statements (curly braces {}) must be used for if-else-while...
* bodies and all macro statements should be terminated with semicolon.
*/
#ifndef __UTF16_H__
#define __UTF16_H__
#include "unicode/umachine.h"
#ifndef __UTF_H__
# include "unicode/utf.h"
#endif
/* single-code point definitions -------------------------------------------- */
/**
* Does this code unit alone encode a code point (BMP, not a surrogate)?
* @param c 16-bit code unit
* @return TRUE or FALSE
* @stable ICU 2.4
*/
#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
/**
* Is this code unit a lead surrogate (U+d800..U+dbff)?
* @param c 16-bit code unit
* @return TRUE or FALSE
* @stable ICU 2.4
*/
#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
/**
* Is this code unit a trail surrogate (U+dc00..U+dfff)?
* @param c 16-bit code unit
* @return TRUE or FALSE
* @stable ICU 2.4
*/
#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
/**
* Is this code unit a surrogate (U+d800..U+dfff)?
* @param c 16-bit code unit
* @return TRUE or FALSE
* @stable ICU 2.4
*/
#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
/**
* Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
* is it a lead surrogate?
* @param c 16-bit code unit
* @return TRUE or FALSE
* @stable ICU 2.4
*/
#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
/**
* Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
* is it a trail surrogate?
* @param c 16-bit code unit
* @return TRUE or FALSE
* @stable ICU 4.2
*/
#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
/**
* Helper constant for U16_GET_SUPPLEMENTARY.
* @internal
*/
#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
/**
* Get a supplementary code point value (U+10000..U+10ffff)
* from its lead and trail surrogates.
* The result is undefined if the input values are not
* lead and trail surrogates.
*
* @param lead lead surrogate (U+d800..U+dbff)
* @param trail trail surrogate (U+dc00..U+dfff)
* @return supplementary code point (U+10000..U+10ffff)
* @stable ICU 2.4
*/
#define U16_GET_SUPPLEMENTARY(lead, trail) \
(((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
/**
* Get the lead surrogate (0xd800..0xdbff) for a
* supplementary code point (0x10000..0x10ffff).
* @param supplementary 32-bit code point (U+10000..U+10ffff)
* @return lead surrogate (U+d800..U+dbff) for supplementary
* @stable ICU 2.4
*/
#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
/**
* Get the trail surrogate (0xdc00..0xdfff) for a
* supplementary code point (0x10000..0x10ffff).
* @param supplementary 32-bit code point (U+10000..U+10ffff)
* @return trail surrogate (U+dc00..U+dfff) for supplementary
* @stable ICU 2.4
*/
#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
/**
* How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
* The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
* @param c 32-bit code point
* @return 1 or 2
* @stable ICU 2.4
*/
#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
/**
* The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
* @return 2
* @stable ICU 2.4
*/
#define U16_MAX_LENGTH 2
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* "Unsafe" macro, assumes well-formed UTF-16.
*
* The offset may point to either the lead or trail surrogate unit
* for a supplementary code point, in which case the macro will read
* the adjacent matching surrogate as well.
* The result is undefined if the offset points to a single, unpaired surrogate.
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
*
* @param s const UChar * string
* @param i string offset
* @param c output UChar32 variable
* @see U16_GET
* @stable ICU 2.4
*/
#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[i]; \
if(U16_IS_SURROGATE(c)) { \
if(U16_IS_SURROGATE_LEAD(c)) { \
(c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
} else { \
(c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The offset may point to either the lead or trail surrogate unit
* for a supplementary code point, in which case the macro will read
* the adjacent matching surrogate as well.
*
* The length can be negative for a NUL-terminated string.
*
* If the offset points to a single, unpaired surrogate, then
* c is set to that unpaired surrogate.
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<=i<length
* @param length string length
* @param c output UChar32 variable
* @see U16_GET_UNSAFE
* @stable ICU 2.4
*/
#define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[i]; \
if(U16_IS_SURROGATE(c)) { \
uint16_t __c2; \
if(U16_IS_SURROGATE_LEAD(c)) { \
if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
} \
} else { \
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
} \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The offset may point to either the lead or trail surrogate unit
* for a supplementary code point, in which case the macro will read
* the adjacent matching surrogate as well.
*
* The length can be negative for a NUL-terminated string.
*
* If the offset points to a single, unpaired surrogate, then
* c is set to U+FFFD.
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<=i<length
* @param length string length
* @param c output UChar32 variable
* @see U16_GET_UNSAFE
* @stable ICU 60
*/
#define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[i]; \
if(U16_IS_SURROGATE(c)) { \
uint16_t __c2; \
if(U16_IS_SURROGATE_LEAD(c)) { \
if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
} else { \
(c)=0xfffd; \
} \
} else { \
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
} else { \
(c)=0xfffd; \
} \
} \
} \
} UPRV_BLOCK_MACRO_END
/* definitions with forward iteration --------------------------------------- */
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Unsafe" macro, assumes well-formed UTF-16.
*
* The offset may point to the lead surrogate unit
* for a supplementary code point, in which case the macro will read
* the following trail surrogate as well.
* If the offset points to a trail surrogate, then that itself
* will be returned as the code point.
* The result is undefined if the offset points to a single, unpaired lead surrogate.
*
* @param s const UChar * string
* @param i string offset
* @param c output UChar32 variable
* @see U16_NEXT
* @stable ICU 2.4
*/
#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[(i)++]; \
if(U16_IS_LEAD(c)) { \
(c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* The offset may point to the lead surrogate unit
* for a supplementary code point, in which case the macro will read
* the following trail surrogate as well.
* If the offset points to a trail surrogate or
* to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
*
* @param s const UChar * string
* @param i string offset, must be i<length
* @param length string length
* @param c output UChar32 variable
* @see U16_NEXT_UNSAFE
* @stable ICU 2.4
*/
#define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[(i)++]; \
if(U16_IS_LEAD(c)) { \
uint16_t __c2; \
if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
++(i); \
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* The offset may point to the lead surrogate unit
* for a supplementary code point, in which case the macro will read
* the following trail surrogate as well.
* If the offset points to a trail surrogate or
* to a single, unpaired lead surrogate, then c is set to U+FFFD.
*
* @param s const UChar * string
* @param i string offset, must be i<length
* @param length string length
* @param c output UChar32 variable
* @see U16_NEXT_UNSAFE
* @stable ICU 60
*/
#define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[(i)++]; \
if(U16_IS_SURROGATE(c)) { \
uint16_t __c2; \
if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
++(i); \
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
} else { \
(c)=0xfffd; \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Append a code point to a string, overwriting 1 or 2 code units.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Unsafe" macro, assumes a valid code point and sufficient space in the string.
* Otherwise, the result is undefined.
*
* @param s const UChar * string buffer
* @param i string offset
* @param c code point to append
* @see U16_APPEND
* @stable ICU 2.4
*/
#define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
if((uint32_t)(c)<=0xffff) { \
(s)[(i)++]=(uint16_t)(c); \
} else { \
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Append a code point to a string, overwriting 1 or 2 code units.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Safe" macro, checks for a valid code point.
* If a surrogate pair is written, checks for sufficient space in the string.
* If the code point is not valid or a trail surrogate does not fit,
* then isError is set to TRUE.
*
* @param s const UChar * string buffer
* @param i string offset, must be i<capacity
* @param capacity size of the string buffer
* @param c code point to append
* @param isError output UBool set to TRUE if an error occurs, otherwise not modified
* @see U16_APPEND_UNSAFE
* @stable ICU 2.4
*/
#define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
if((uint32_t)(c)<=0xffff) { \
(s)[(i)++]=(uint16_t)(c); \
} else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
} else /* c>0x10ffff or not enough space */ { \
(isError)=TRUE; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the next.
* (Post-incrementing iteration.)
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @see U16_FWD_1
* @stable ICU 2.4
*/
#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_LEAD((s)[(i)++])) { \
++(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the next.
* (Post-incrementing iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const UChar * string
* @param i string offset, must be i<length
* @param length string length
* @see U16_FWD_1_UNSAFE
* @stable ICU 2.4
*/
#define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
++(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the n-th next one,
* i.e., move forward by n code points.
* (Post-incrementing iteration.)
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @param n number of code points to skip
* @see U16_FWD_N
* @stable ICU 2.4
*/
#define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0) { \
U16_FWD_1_UNSAFE(s, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the n-th next one,
* i.e., move forward by n code points.
* (Post-incrementing iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const UChar * string
* @param i int32_t string offset, must be i<length
* @param length int32_t string length
* @param n number of code points to skip
* @see U16_FWD_N_UNSAFE
* @stable ICU 2.4
*/
#define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
U16_FWD_1(s, i, length); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary
* at the start of a code point.
* If the offset points to the trail surrogate of a surrogate pair,
* then the offset is decremented.
* Otherwise, it is not modified.
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @see U16_SET_CP_START
* @stable ICU 2.4
*/
#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_TRAIL((s)[i])) { \
--(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary
* at the start of a code point.
* If the offset points to the trail surrogate of a surrogate pair,
* then the offset is decremented.
* Otherwise, it is not modified.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<=i
* @see U16_SET_CP_START_UNSAFE
* @stable ICU 2.4
*/
#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
--(i); \
} \
} UPRV_BLOCK_MACRO_END
/* definitions with backward iteration -------------------------------------- */
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Unsafe" macro, assumes well-formed UTF-16.
*
* The input offset may be the same as the string length.
* If the offset is behind a trail surrogate unit
* for a supplementary code point, then the macro will read
* the preceding lead surrogate as well.
* If the offset is behind a lead surrogate, then that itself
* will be returned as the code point.
* The result is undefined if the offset is behind a single, unpaired trail surrogate.
*
* @param s const UChar * string
* @param i string offset
* @param c output UChar32 variable
* @see U16_PREV
* @stable ICU 2.4
*/
#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[--(i)]; \
if(U16_IS_TRAIL(c)) { \
(c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The input offset may be the same as the string length.
* If the offset is behind a trail surrogate unit
* for a supplementary code point, then the macro will read
* the preceding lead surrogate as well.
* If the offset is behind a lead surrogate or behind a single, unpaired
* trail surrogate, then c is set to that unpaired surrogate.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<i
* @param c output UChar32 variable
* @see U16_PREV_UNSAFE
* @stable ICU 2.4
*/
#define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[--(i)]; \
if(U16_IS_TRAIL(c)) { \
uint16_t __c2; \
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
--(i); \
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The input offset may be the same as the string length.
* If the offset is behind a trail surrogate unit
* for a supplementary code point, then the macro will read
* the preceding lead surrogate as well.
* If the offset is behind a lead surrogate or behind a single, unpaired
* trail surrogate, then c is set to U+FFFD.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<i
* @param c output UChar32 variable
* @see U16_PREV_UNSAFE
* @stable ICU 60
*/
#define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[--(i)]; \
if(U16_IS_SURROGATE(c)) { \
uint16_t __c2; \
if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
--(i); \
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
} else { \
(c)=0xfffd; \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @see U16_BACK_1
* @stable ICU 2.4
*/
#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_TRAIL((s)[--(i)])) { \
--(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<i
* @see U16_BACK_1_UNSAFE
* @stable ICU 2.4
*/
#define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
--(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the n-th one before it,
* i.e., move backward by n code points.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @param n number of code points to skip
* @see U16_BACK_N
* @stable ICU 2.4
*/
#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0) { \
U16_BACK_1_UNSAFE(s, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the n-th one before it,
* i.e., move backward by n code points.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* @param s const UChar * string
* @param start start of string
* @param i string offset, must be start<i
* @param n number of code points to skip
* @see U16_BACK_N_UNSAFE
* @stable ICU 2.4
*/
#define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0 && (i)>(start)) { \
U16_BACK_1(s, start, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary after a code point.
* If the offset is behind the lead surrogate of a surrogate pair,
* then the offset is incremented.
* Otherwise, it is not modified.
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @see U16_SET_CP_LIMIT
* @stable ICU 2.4
*/
#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_LEAD((s)[(i)-1])) { \
++(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary after a code point.
* If the offset is behind the lead surrogate of a surrogate pair,
* then the offset is incremented.
* Otherwise, it is not modified.
* The input offset may be the same as the string length.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const UChar * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, start<=i<=length
* @param length int32_t string length
* @see U16_SET_CP_LIMIT_UNSAFE
* @stable ICU 2.4
*/
#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
++(i); \
} \
} UPRV_BLOCK_MACRO_END
#endif

881
vendor/tree-sitter/src/unicode/utf8.h vendored Normal file
View File

@@ -0,0 +1,881 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 1999-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: utf8.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999sep13
* created by: Markus W. Scherer
*/
/**
* \file
* \brief C API: 8-bit Unicode handling macros
*
* This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
*
* For more information see utf.h and the ICU User Guide Strings chapter
* (http://userguide.icu-project.org/strings).
*
* <em>Usage:</em>
* ICU coding guidelines for if() statements should be followed when using these macros.
* Compound statements (curly braces {}) must be used for if-else-while...
* bodies and all macro statements should be terminated with semicolon.
*/
#ifndef __UTF8_H__
#define __UTF8_H__
#include "unicode/umachine.h"
#ifndef __UTF_H__
# include "unicode/utf.h"
#endif
/* internal definitions ----------------------------------------------------- */
/**
* Counts the trail bytes for a UTF-8 lead byte.
* Returns 0 for 0..0xc1 as well as for 0xf5..0xff.
* leadByte might be evaluated multiple times.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this file and thus must remain stable.
*
* @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
* @internal
*/
#define U8_COUNT_TRAIL_BYTES(leadByte) \
(U8_IS_LEAD(leadByte) ? \
((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0)
/**
* Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
* Returns 0 for 0..0xc1. Undefined for 0xf5..0xff.
* leadByte might be evaluated multiple times.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this file and thus must remain stable.
*
* @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
* @internal
*/
#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
(((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0))
/**
* Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this file and thus must remain stable.
* @internal
*/
#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
/**
* Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
* Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
* Lead byte E0..EF bits 3..0 are used as byte index,
* first trail byte bits 7..5 are used as bit index into that byte.
* @see U8_IS_VALID_LEAD3_AND_T1
* @internal
*/
#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
/**
* Internal 3-byte UTF-8 validity check.
* Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence.
* @internal
*/
#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
/**
* Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
* Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
* First trail byte bits 7..4 are used as byte index,
* lead byte F0..F4 bits 2..0 are used as bit index into that byte.
* @see U8_IS_VALID_LEAD4_AND_T1
* @internal
*/
#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
/**
* Internal 4-byte UTF-8 validity check.
* Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence.
* @internal
*/
#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
/**
* Function for handling "next code point" with error-checking.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
* file and thus must remain stable, and should not be hidden when other internal
* functions are hidden (otherwise public macros would fail to compile).
* @internal
*/
U_STABLE UChar32 U_EXPORT2
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
/**
* Function for handling "append code point" with error-checking.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
* file and thus must remain stable, and should not be hidden when other internal
* functions are hidden (otherwise public macros would fail to compile).
* @internal
*/
U_STABLE int32_t U_EXPORT2
utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);
/**
* Function for handling "previous code point" with error-checking.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
* file and thus must remain stable, and should not be hidden when other internal
* functions are hidden (otherwise public macros would fail to compile).
* @internal
*/
U_STABLE UChar32 U_EXPORT2
utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);
/**
* Function for handling "skip backward one code point" with error-checking.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
* file and thus must remain stable, and should not be hidden when other internal
* functions are hidden (otherwise public macros would fail to compile).
* @internal
*/
U_STABLE int32_t U_EXPORT2
utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
/* single-code point definitions -------------------------------------------- */
/**
* Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
* @param c 8-bit code unit (byte)
* @return TRUE or FALSE
* @stable ICU 2.4
*/
#define U8_IS_SINGLE(c) (((c)&0x80)==0)
/**
* Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
* @param c 8-bit code unit (byte)
* @return TRUE or FALSE
* @stable ICU 2.4
*/
#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32)
// 0x32=0xf4-0xc2
/**
* Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
* @param c 8-bit code unit (byte)
* @return TRUE or FALSE
* @stable ICU 2.4
*/
#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40)
/**
* How many code units (bytes) are used for the UTF-8 encoding
* of this Unicode code point?
* @param c 32-bit code point
* @return 1..4, or 0 if c is a surrogate or not a Unicode code point
* @stable ICU 2.4
*/
#define U8_LENGTH(c) \
((uint32_t)(c)<=0x7f ? 1 : \
((uint32_t)(c)<=0x7ff ? 2 : \
((uint32_t)(c)<=0xd7ff ? 3 : \
((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
((uint32_t)(c)<=0xffff ? 3 : 4)\
) \
) \
) \
)
/**
* The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
* @return 4
* @stable ICU 2.4
*/
#define U8_MAX_LENGTH 4
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* The offset may point to either the lead byte or one of the trail bytes
* for a code point, in which case the macro will read all of the bytes
* for the code point.
* The result is undefined if the offset points to an illegal UTF-8
* byte sequence.
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
*
* @param s const uint8_t * string
* @param i string offset
* @param c output UChar32 variable
* @see U8_GET
* @stable ICU 2.4
*/
#define U8_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
int32_t _u8_get_unsafe_index=(int32_t)(i); \
U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* The offset may point to either the lead byte or one of the trail bytes
* for a code point, in which case the macro will read all of the bytes
* for the code point.
*
* The length can be negative for a NUL-terminated string.
*
* If the offset points to an illegal UTF-8 byte sequence, then
* c is set to a negative value.
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset
* @param i int32_t string offset, must be start<=i<length
* @param length int32_t string length
* @param c output UChar32 variable, set to <0 in case of an error
* @see U8_GET_UNSAFE
* @stable ICU 2.4
*/
#define U8_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
int32_t _u8_get_index=(i); \
U8_SET_CP_START(s, start, _u8_get_index); \
U8_NEXT(s, _u8_get_index, length, c); \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* The offset may point to either the lead byte or one of the trail bytes
* for a code point, in which case the macro will read all of the bytes
* for the code point.
*
* The length can be negative for a NUL-terminated string.
*
* If the offset points to an illegal UTF-8 byte sequence, then
* c is set to U+FFFD.
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD.
*
* This macro does not distinguish between a real U+FFFD in the text
* and U+FFFD returned for an ill-formed sequence.
* Use U8_GET() if that distinction is important.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset
* @param i int32_t string offset, must be start<=i<length
* @param length int32_t string length
* @param c output UChar32 variable, set to U+FFFD in case of an error
* @see U8_GET
* @stable ICU 51
*/
#define U8_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
int32_t _u8_get_index=(i); \
U8_SET_CP_START(s, start, _u8_get_index); \
U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \
} UPRV_BLOCK_MACRO_END
/* definitions with forward iteration --------------------------------------- */
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Unsafe" macro, assumes well-formed UTF-8.
*
* The offset may point to the lead byte of a multi-byte sequence,
* in which case the macro will read the whole sequence.
* The result is undefined if the offset points to a trail byte
* or an illegal UTF-8 sequence.
*
* @param s const uint8_t * string
* @param i string offset
* @param c output UChar32 variable
* @see U8_NEXT
* @stable ICU 2.4
*/
#define U8_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(uint8_t)(s)[(i)++]; \
if(!U8_IS_SINGLE(c)) { \
if((c)<0xe0) { \
(c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \
} else if((c)<0xf0) { \
/* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
(c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \
(i)+=2; \
} else { \
(c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \
(i)+=3; \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* The offset may point to the lead byte of a multi-byte sequence,
* in which case the macro will read the whole sequence.
* If the offset points to a trail byte or an illegal UTF-8 sequence, then
* c is set to a negative value.
*
* @param s const uint8_t * string
* @param i int32_t string offset, must be i<length
* @param length int32_t string length
* @param c output UChar32 variable, set to <0 in case of an error
* @see U8_NEXT_UNSAFE
* @stable ICU 2.4
*/
#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL)
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* The offset may point to the lead byte of a multi-byte sequence,
* in which case the macro will read the whole sequence.
* If the offset points to a trail byte or an illegal UTF-8 sequence, then
* c is set to U+FFFD.
*
* This macro does not distinguish between a real U+FFFD in the text
* and U+FFFD returned for an ill-formed sequence.
* Use U8_NEXT() if that distinction is important.
*
* @param s const uint8_t * string
* @param i int32_t string offset, must be i<length
* @param length int32_t string length
* @param c output UChar32 variable, set to U+FFFD in case of an error
* @see U8_NEXT
* @stable ICU 51
*/
#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd)
/** @internal */
#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(uint8_t)(s)[(i)++]; \
if(!U8_IS_SINGLE(c)) { \
uint8_t __t = 0; \
if((i)!=(length) && \
/* fetch/validate/assemble all but last trail byte */ \
((c)>=0xe0 ? \
((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \
U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
(__t&=0x3f, 1) \
: /* U+10000..U+10FFFF */ \
((c)-=0xf0)<=4 && \
U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
(__t=(s)[i]-0x80)<=0x3f) && \
/* valid second-to-last trail byte */ \
((c)=((c)<<6)|__t, ++(i)!=(length)) \
: /* U+0080..U+07FF */ \
(c)>=0xc2 && ((c)&=0x1f, 1)) && \
/* last trail byte */ \
(__t=(s)[i]-0x80)<=0x3f && \
((c)=((c)<<6)|__t, ++(i), 1)) { \
} else { \
(c)=(sub); /* ill-formed*/ \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Append a code point to a string, overwriting 1 to 4 bytes.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Unsafe" macro, assumes a valid code point and sufficient space in the string.
* Otherwise, the result is undefined.
*
* @param s const uint8_t * string buffer
* @param i string offset
* @param c code point to append
* @see U8_APPEND
* @stable ICU 2.4
*/
#define U8_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
uint32_t __uc=(c); \
if(__uc<=0x7f) { \
(s)[(i)++]=(uint8_t)__uc; \
} else { \
if(__uc<=0x7ff) { \
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
} else { \
if(__uc<=0xffff) { \
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
} else { \
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
} \
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
} \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Append a code point to a string, overwriting 1 to 4 bytes.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Safe" macro, checks for a valid code point.
* If a non-ASCII code point is written, checks for sufficient space in the string.
* If the code point is not valid or trail bytes do not fit,
* then isError is set to TRUE.
*
* @param s const uint8_t * string buffer
* @param i int32_t string offset, must be i<capacity
* @param capacity int32_t size of the string buffer
* @param c UChar32 code point to append
* @param isError output UBool set to TRUE if an error occurs, otherwise not modified
* @see U8_APPEND_UNSAFE
* @stable ICU 2.4
*/
#define U8_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
uint32_t __uc=(c); \
if(__uc<=0x7f) { \
(s)[(i)++]=(uint8_t)__uc; \
} else if(__uc<=0x7ff && (i)+1<(capacity)) { \
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} else { \
(isError)=TRUE; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the next.
* (Post-incrementing iteration.)
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @see U8_FWD_1
* @stable ICU 2.4
*/
#define U8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
(i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the next.
* (Post-incrementing iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const uint8_t * string
* @param i int32_t string offset, must be i<length
* @param length int32_t string length
* @see U8_FWD_1_UNSAFE
* @stable ICU 2.4
*/
#define U8_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
uint8_t __b=(s)[(i)++]; \
if(U8_IS_LEAD(__b) && (i)!=(length)) { \
uint8_t __t1=(s)[i]; \
if((0xe0<=__b && __b<0xf0)) { \
if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \
++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
++(i); \
} \
} else if(__b<0xe0) { \
if(U8_IS_TRAIL(__t1)) { \
++(i); \
} \
} else /* c>=0xf0 */ { \
if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
++(i); \
} \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the n-th next one,
* i.e., move forward by n code points.
* (Post-incrementing iteration.)
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @param n number of code points to skip
* @see U8_FWD_N
* @stable ICU 2.4
*/
#define U8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0) { \
U8_FWD_1_UNSAFE(s, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the n-th next one,
* i.e., move forward by n code points.
* (Post-incrementing iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const uint8_t * string
* @param i int32_t string offset, must be i<length
* @param length int32_t string length
* @param n number of code points to skip
* @see U8_FWD_N_UNSAFE
* @stable ICU 2.4
*/
#define U8_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
U8_FWD_1(s, i, length); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary
* at the start of a code point.
* If the offset points to a UTF-8 trail byte,
* then the offset is moved backward to the corresponding lead byte.
* Otherwise, it is not modified.
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @see U8_SET_CP_START
* @stable ICU 2.4
*/
#define U8_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
while(U8_IS_TRAIL((s)[i])) { --(i); } \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary
* at the start of a code point.
* If the offset points to a UTF-8 trail byte,
* then the offset is moved backward to the corresponding lead byte.
* Otherwise, it is not modified.
*
* "Safe" macro, checks for illegal sequences and for string boundaries.
* Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<=i
* @see U8_SET_CP_START_UNSAFE
* @see U8_TRUNCATE_IF_INCOMPLETE
* @stable ICU 2.4
*/
#define U8_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U8_IS_TRAIL((s)[(i)])) { \
(i)=utf8_back1SafeBody(s, start, (i)); \
} \
} UPRV_BLOCK_MACRO_END
/**
* If the string ends with a UTF-8 byte sequence that is valid so far
* but incomplete, then reduce the length of the string to end before
* the lead byte of that incomplete sequence.
* For example, if the string ends with E1 80, the length is reduced by 2.
*
* In all other cases (the string ends with a complete sequence, or it is not
* possible for any further trail byte to extend the trailing sequence)
* the length remains unchanged.
*
* Useful for processing text split across multiple buffers
* (save the incomplete sequence for later)
* and for optimizing iteration
* (check for string length only once per character).
*
* "Safe" macro, checks for illegal sequences and for string boundaries.
* Unlike U8_SET_CP_START(), this macro never reads s[length].
*
* (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param length int32_t string length (usually start<=length)
* @see U8_SET_CP_START
* @stable ICU 61
*/
#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) UPRV_BLOCK_MACRO_BEGIN { \
if((length)>(start)) { \
uint8_t __b1=s[(length)-1]; \
if(U8_IS_SINGLE(__b1)) { \
/* common ASCII character */ \
} else if(U8_IS_LEAD(__b1)) { \
--(length); \
} else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
uint8_t __b2=s[(length)-2]; \
if(0xe0<=__b2 && __b2<=0xf4) { \
if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
(length)-=2; \
} \
} else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
uint8_t __b3=s[(length)-3]; \
if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
(length)-=3; \
} \
} \
} \
} \
} UPRV_BLOCK_MACRO_END
/* definitions with backward iteration -------------------------------------- */
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Unsafe" macro, assumes well-formed UTF-8.
*
* The input offset may be the same as the string length.
* If the offset is behind a multi-byte sequence, then the macro will read
* the whole sequence.
* If the offset is behind a lead byte, then that itself
* will be returned as the code point.
* The result is undefined if the offset is behind an illegal UTF-8 sequence.
*
* @param s const uint8_t * string
* @param i string offset
* @param c output UChar32 variable
* @see U8_PREV
* @stable ICU 2.4
*/
#define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(uint8_t)(s)[--(i)]; \
if(U8_IS_TRAIL(c)) { \
uint8_t __b, __count=1, __shift=6; \
\
/* c is a trail byte */ \
(c)&=0x3f; \
for(;;) { \
__b=(s)[--(i)]; \
if(__b>=0xc0) { \
U8_MASK_LEAD_BYTE(__b, __count); \
(c)|=(UChar32)__b<<__shift; \
break; \
} else { \
(c)|=(UChar32)(__b&0x3f)<<__shift; \
++__count; \
__shift+=6; \
} \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The input offset may be the same as the string length.
* If the offset is behind a multi-byte sequence, then the macro will read
* the whole sequence.
* If the offset is behind a lead byte, then that itself
* will be returned as the code point.
* If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<i
* @param c output UChar32 variable, set to <0 in case of an error
* @see U8_PREV_UNSAFE
* @stable ICU 2.4
*/
#define U8_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(uint8_t)(s)[--(i)]; \
if(!U8_IS_SINGLE(c)) { \
(c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The input offset may be the same as the string length.
* If the offset is behind a multi-byte sequence, then the macro will read
* the whole sequence.
* If the offset is behind a lead byte, then that itself
* will be returned as the code point.
* If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD.
*
* This macro does not distinguish between a real U+FFFD in the text
* and U+FFFD returned for an ill-formed sequence.
* Use U8_PREV() if that distinction is important.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<i
* @param c output UChar32 variable, set to U+FFFD in case of an error
* @see U8_PREV
* @stable ICU 51
*/
#define U8_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(uint8_t)(s)[--(i)]; \
if(!U8_IS_SINGLE(c)) { \
(c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @see U8_BACK_1
* @stable ICU 2.4
*/
#define U8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
while(U8_IS_TRAIL((s)[--(i)])) {} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<i
* @see U8_BACK_1_UNSAFE
* @stable ICU 2.4
*/
#define U8_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U8_IS_TRAIL((s)[--(i)])) { \
(i)=utf8_back1SafeBody(s, start, (i)); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the n-th one before it,
* i.e., move backward by n code points.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @param n number of code points to skip
* @see U8_BACK_N
* @stable ICU 2.4
*/
#define U8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0) { \
U8_BACK_1_UNSAFE(s, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the n-th one before it,
* i.e., move backward by n code points.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* @param s const uint8_t * string
* @param start int32_t index of the start of the string
* @param i int32_t string offset, must be start<i
* @param n number of code points to skip
* @see U8_BACK_N_UNSAFE
* @stable ICU 2.4
*/
#define U8_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0 && (i)>(start)) { \
U8_BACK_1(s, start, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary after a code point.
* If the offset is behind a partial multi-byte sequence,
* then the offset is incremented to behind the whole sequence.
* Otherwise, it is not modified.
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @see U8_SET_CP_LIMIT
* @stable ICU 2.4
*/
#define U8_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
U8_BACK_1_UNSAFE(s, i); \
U8_FWD_1_UNSAFE(s, i); \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary after a code point.
* If the offset is behind a partial multi-byte sequence,
* then the offset is incremented to behind the whole sequence.
* Otherwise, it is not modified.
* The input offset may be the same as the string length.
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<=i<=length
* @param length int32_t string length
* @see U8_SET_CP_LIMIT_UNSAFE
* @stable ICU 2.4
*/
#define U8_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
if((start)<(i) && ((i)<(length) || (length)<0)) { \
U8_BACK_1(s, start, i); \
U8_FWD_1(s, i, length); \
} \
} UPRV_BLOCK_MACRO_END
#endif

View File

@@ -0,0 +1,24 @@
"calloc",
"free",
"iswalnum",
"iswalpha",
"iswblank",
"iswdigit",
"iswlower",
"iswspace",
"iswupper",
"iswxdigit",
"malloc",
"memchr",
"memcmp",
"memcpy",
"memmove",
"memset",
"realloc",
"strcmp",
"strlen",
"strncat",
"strncmp",
"strncpy",
"towlower",
"towupper",

109
vendor/tree-sitter/src/wasm/stdlib.c vendored Normal file
View File

@@ -0,0 +1,109 @@
// This file implements a very simple allocator for external scanners running
// in WASM. Allocation is just bumping a static pointer and growing the heap
// as needed, and freeing is mostly a noop. But in the special case of freeing
// the last-allocated pointer, we'll reuse that pointer again.
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
extern void tree_sitter_debug_message(const char *, size_t);
#define PAGESIZE 0x10000
#define MAX_HEAP_SIZE (4 * 1024 * 1024)
typedef struct {
size_t size;
char data[0];
} Region;
static Region *heap_end = NULL;
static Region *heap_start = NULL;
static Region *next = NULL;
// Get the region metadata for the given heap pointer.
static inline Region *region_for_ptr(void *ptr) {
return ((Region *)ptr) - 1;
}
// Get the location of the next region after the given region,
// if the given region had the given size.
static inline Region *region_after(Region *self, size_t len) {
char *address = self->data + len;
char *aligned = (char *)((uintptr_t)(address + 3) & ~0x3);
return (Region *)aligned;
}
static void *get_heap_end() {
return (void *)(__builtin_wasm_memory_size(0) * PAGESIZE);
}
static int grow_heap(size_t size) {
size_t new_page_count = ((size - 1) / PAGESIZE) + 1;
return __builtin_wasm_memory_grow(0, new_page_count) != SIZE_MAX;
}
// Clear out the heap, and move it to the given address.
void reset_heap(void *new_heap_start) {
heap_start = new_heap_start;
next = new_heap_start;
heap_end = get_heap_end();
}
void *malloc(size_t size) {
Region *region_end = region_after(next, size);
if (region_end > heap_end) {
if ((char *)region_end - (char *)heap_start > MAX_HEAP_SIZE) {
return NULL;
}
if (!grow_heap(size)) return NULL;
heap_end = get_heap_end();
}
void *result = &next->data;
next->size = size;
next = region_end;
return result;
}
void free(void *ptr) {
if (ptr == NULL) return;
Region *region = region_for_ptr(ptr);
Region *region_end = region_after(region, region->size);
// When freeing the last allocated pointer, re-use that
// pointer for the next allocation.
if (region_end == next) {
next = region;
}
}
void *calloc(size_t count, size_t size) {
void *result = malloc(count * size);
memset(result, 0, count * size);
return result;
}
void *realloc(void *ptr, size_t new_size) {
if (ptr == NULL) {
return malloc(new_size);
}
Region *region = region_for_ptr(ptr);
Region *region_end = region_after(region, region->size);
// When reallocating the last allocated region, return
// the same pointer, and skip copying the data.
if (region_end == next) {
next = region;
return malloc(new_size);
}
void *result = malloc(new_size);
memcpy(result, &region->data, region->size);
return result;
}

1302
vendor/tree-sitter/src/wasm/wasm-stdlib.h vendored Normal file

File diff suppressed because it is too large Load Diff

1823
vendor/tree-sitter/src/wasm_store.c vendored Normal file

File diff suppressed because it is too large Load Diff

31
vendor/tree-sitter/src/wasm_store.h vendored Normal file
View File

@@ -0,0 +1,31 @@
#ifndef TREE_SITTER_WASM_H_
#define TREE_SITTER_WASM_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "tree_sitter/api.h"
#include "./parser.h"
bool ts_wasm_store_start(TSWasmStore *, TSLexer *, const TSLanguage *);
void ts_wasm_store_reset(TSWasmStore *);
bool ts_wasm_store_has_error(const TSWasmStore *);
bool ts_wasm_store_call_lex_main(TSWasmStore *, TSStateId);
bool ts_wasm_store_call_lex_keyword(TSWasmStore *, TSStateId);
uint32_t ts_wasm_store_call_scanner_create(TSWasmStore *);
void ts_wasm_store_call_scanner_destroy(TSWasmStore *, uint32_t);
bool ts_wasm_store_call_scanner_scan(TSWasmStore *, uint32_t, uint32_t);
uint32_t ts_wasm_store_call_scanner_serialize(TSWasmStore *, uint32_t, char *);
void ts_wasm_store_call_scanner_deserialize(TSWasmStore *, uint32_t, const char *, unsigned);
void ts_wasm_language_retain(const TSLanguage *);
void ts_wasm_language_release(const TSLanguage *);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_WASM_H_

13
vendor/vendor_tree_sitter.sh vendored Executable file
View File

@@ -0,0 +1,13 @@
#!/usr/bin/env bash
REMOTE=https://github.com/tree-sitter/tree-sitter.git
BRANCH=v0.22.5
rm -rf tree-sitter
rm -rf tmp
git clone --depth 1 --branch $BRANCH $REMOTE tmp
mkdir tree-sitter
mv tmp/lib/src tree-sitter
mv tmp/lib/include tree-sitter
mv tmp/LICENSE tree-sitter
rm -rf tmp