mirror of
https://github.com/helix-editor/helix.git
synced 2025-10-06 00:13:28 +02:00
Compare commits
3 Commits
filesentry
...
ropey2
Author | SHA1 | Date | |
---|---|---|---|
|
42619d7646 | ||
|
5d641b1722 | ||
|
5f4a1ca31f |
30
Cargo.lock
generated
30
Cargo.lock
generated
@@ -1418,12 +1418,13 @@ dependencies = [
|
||||
"quickcheck",
|
||||
"regex",
|
||||
"regex-cursor",
|
||||
"ropey",
|
||||
"ropey 2.0.0-alpha.3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"slotmap",
|
||||
"smallvec",
|
||||
"smartstring",
|
||||
"str_indices",
|
||||
"textwrap",
|
||||
"toml",
|
||||
"tree-house",
|
||||
@@ -1526,7 +1527,7 @@ dependencies = [
|
||||
"once_cell",
|
||||
"regex-automata",
|
||||
"regex-cursor",
|
||||
"ropey",
|
||||
"ropey 2.0.0-alpha.3",
|
||||
"rustix 1.0.7",
|
||||
"tempfile",
|
||||
"unicode-segmentation",
|
||||
@@ -2324,14 +2325,14 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "regex-cursor"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0497c781d2f982ae8284d2932aee6a877e58a4541daa5e8fadc18cc75c23a61d"
|
||||
source = "git+https://github.com/cessen/regex-cursor.git?branch=ropey2#7b8facc3b184da133f86f7c46de3d18d16708bcc"
|
||||
dependencies = [
|
||||
"log",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
"ropey",
|
||||
"ropey 1.6.1",
|
||||
"ropey 2.0.0-alpha.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2350,6 +2351,15 @@ dependencies = [
|
||||
"str_indices",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ropey"
|
||||
version = "2.0.0-alpha.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a6bae11a8c92fe67ffe2817f38315372f113617978665635147fd5eb6b789c4"
|
||||
dependencies = [
|
||||
"str_indices",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
version = "0.1.24"
|
||||
@@ -2801,8 +2811,7 @@ checksum = "bfb942dfe1d8e29a7ee7fcbde5bd2b9a25fb89aa70caea2eba3bee836ff41076"
|
||||
[[package]]
|
||||
name = "tree-house"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "679e3296e503901cd9f6e116be5a43a9270222215bf6c78b4b1f4af5c3dcc62d"
|
||||
source = "git+https://github.com/helix-editor/tree-house?branch=ropey2#d24f15c081faf4b78eb5c9161818f2aaeeaf64f1"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"hashbrown 0.15.3",
|
||||
@@ -2810,7 +2819,7 @@ dependencies = [
|
||||
"once_cell",
|
||||
"regex",
|
||||
"regex-cursor",
|
||||
"ropey",
|
||||
"ropey 2.0.0-alpha.3",
|
||||
"slab",
|
||||
"tree-house-bindings",
|
||||
]
|
||||
@@ -2818,13 +2827,12 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "tree-house-bindings"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "690809022f44e3d2329882649724b6e0027ade3fada65e4631d303e744dc32b4"
|
||||
source = "git+https://github.com/helix-editor/tree-house?branch=ropey2#d24f15c081faf4b78eb5c9161818f2aaeeaf64f1"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libloading",
|
||||
"regex-cursor",
|
||||
"ropey",
|
||||
"ropey 2.0.0-alpha.3",
|
||||
"thiserror 2.0.12",
|
||||
]
|
||||
|
||||
|
@@ -37,16 +37,18 @@ package.helix-tui.opt-level = 2
|
||||
package.helix-term.opt-level = 2
|
||||
|
||||
[workspace.dependencies]
|
||||
tree-house = { version = "0.2.0", default-features = false }
|
||||
tree-house = { git = "https://github.com/helix-editor/tree-house", branch = "ropey2", default-features = false }
|
||||
nucleo = "0.5.0"
|
||||
slotmap = "1.0.7"
|
||||
thiserror = "2.0"
|
||||
tempfile = "3.20.0"
|
||||
bitflags = "2.9"
|
||||
unicode-segmentation = "1.2"
|
||||
ropey = { version = "1.6.1", default-features = false, features = ["simd"] }
|
||||
ropey = { version = "2.0.0-alpha.3", default-features = false, features = ["metric_chars", "metric_utf16", "metric_lines_lf_cr"] }
|
||||
str_indices = "0.4"
|
||||
foldhash = "0.1"
|
||||
parking_lot = "0.12"
|
||||
regex-cursor = { git = "https://github.com/cessen/regex-cursor.git", branch = "ropey2", features = ["ropey2"] }
|
||||
|
||||
[workspace.package]
|
||||
version = "25.1.1"
|
||||
|
@@ -12,7 +12,7 @@ repository.workspace = true
|
||||
homepage.workspace = true
|
||||
|
||||
[features]
|
||||
unicode-lines = ["ropey/unicode_lines"]
|
||||
unicode-lines = ["ropey/metric_lines_unicode"]
|
||||
integration = []
|
||||
|
||||
[dependencies]
|
||||
@@ -21,6 +21,7 @@ helix-loader = { path = "../helix-loader" }
|
||||
helix-parsec = { path = "../helix-parsec" }
|
||||
|
||||
ropey.workspace = true
|
||||
str_indices.workspace = true
|
||||
smallvec = "1.15"
|
||||
smartstring = "1.0.1"
|
||||
unicode-segmentation.workspace = true
|
||||
@@ -57,7 +58,7 @@ textwrap = "0.16.2"
|
||||
nucleo.workspace = true
|
||||
parking_lot.workspace = true
|
||||
globset = "0.4.16"
|
||||
regex-cursor = "0.1.5"
|
||||
regex-cursor.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
quickcheck = { version = "1", default-features = false }
|
||||
|
@@ -1,8 +1,6 @@
|
||||
//! Utility functions to traverse the unicode graphemes of a `Rope`'s text contents.
|
||||
//!
|
||||
//! Based on <https://github.com/cessen/led/blob/c4fa72405f510b7fd16052f90a598c429b3104a6/src/graphemes.rs>
|
||||
use ropey::{str_utils::byte_to_char_idx, RopeSlice};
|
||||
use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
|
||||
use unicode_width::UnicodeWidthStr;
|
||||
|
||||
use std::borrow::Cow;
|
||||
@@ -119,129 +117,6 @@ pub fn grapheme_width(g: &str) -> usize {
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: for byte indexing versions of these functions see `RopeSliceExt`'s
|
||||
// `floor_grapheme_boundary` and `ceil_grapheme_boundary` and the rope grapheme iterators.
|
||||
|
||||
#[must_use]
|
||||
pub fn nth_prev_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -> usize {
|
||||
// Bounds check
|
||||
debug_assert!(char_idx <= slice.len_chars());
|
||||
|
||||
// We work with bytes for this, so convert.
|
||||
let mut byte_idx = slice.char_to_byte(char_idx);
|
||||
|
||||
// Get the chunk with our byte index in it.
|
||||
let (mut chunk, mut chunk_byte_idx, mut chunk_char_idx, _) = slice.chunk_at_byte(byte_idx);
|
||||
|
||||
// Set up the grapheme cursor.
|
||||
let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true);
|
||||
|
||||
// Find the previous grapheme cluster boundary.
|
||||
for _ in 0..n {
|
||||
loop {
|
||||
match gc.prev_boundary(chunk, chunk_byte_idx) {
|
||||
Ok(None) => return 0,
|
||||
Ok(Some(n)) => {
|
||||
byte_idx = n;
|
||||
break;
|
||||
}
|
||||
Err(GraphemeIncomplete::PrevChunk) => {
|
||||
let (a, b, c, _) = slice.chunk_at_byte(chunk_byte_idx - 1);
|
||||
chunk = a;
|
||||
chunk_byte_idx = b;
|
||||
chunk_char_idx = c;
|
||||
}
|
||||
Err(GraphemeIncomplete::PreContext(n)) => {
|
||||
let ctx_chunk = slice.chunk_at_byte(n - 1).0;
|
||||
gc.provide_context(ctx_chunk, n - ctx_chunk.len());
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
let tmp = byte_to_char_idx(chunk, byte_idx - chunk_byte_idx);
|
||||
chunk_char_idx + tmp
|
||||
}
|
||||
|
||||
/// Finds the previous grapheme boundary before the given char position.
|
||||
#[must_use]
|
||||
#[inline(always)]
|
||||
pub fn prev_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
|
||||
nth_prev_grapheme_boundary(slice, char_idx, 1)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn nth_next_grapheme_boundary(slice: RopeSlice, char_idx: usize, n: usize) -> usize {
|
||||
// Bounds check
|
||||
debug_assert!(char_idx <= slice.len_chars());
|
||||
|
||||
// We work with bytes for this, so convert.
|
||||
let mut byte_idx = slice.char_to_byte(char_idx);
|
||||
|
||||
// Get the chunk with our byte index in it.
|
||||
let (mut chunk, mut chunk_byte_idx, mut chunk_char_idx, _) = slice.chunk_at_byte(byte_idx);
|
||||
|
||||
// Set up the grapheme cursor.
|
||||
let mut gc = GraphemeCursor::new(byte_idx, slice.len_bytes(), true);
|
||||
|
||||
// Find the nth next grapheme cluster boundary.
|
||||
for _ in 0..n {
|
||||
loop {
|
||||
match gc.next_boundary(chunk, chunk_byte_idx) {
|
||||
Ok(None) => return slice.len_chars(),
|
||||
Ok(Some(n)) => {
|
||||
byte_idx = n;
|
||||
break;
|
||||
}
|
||||
Err(GraphemeIncomplete::NextChunk) => {
|
||||
chunk_byte_idx += chunk.len();
|
||||
let (a, _, c, _) = slice.chunk_at_byte(chunk_byte_idx);
|
||||
chunk = a;
|
||||
chunk_char_idx = c;
|
||||
}
|
||||
Err(GraphemeIncomplete::PreContext(n)) => {
|
||||
let ctx_chunk = slice.chunk_at_byte(n - 1).0;
|
||||
gc.provide_context(ctx_chunk, n - ctx_chunk.len());
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
let tmp = byte_to_char_idx(chunk, byte_idx - chunk_byte_idx);
|
||||
chunk_char_idx + tmp
|
||||
}
|
||||
|
||||
/// Finds the next grapheme boundary after the given char position.
|
||||
#[must_use]
|
||||
#[inline(always)]
|
||||
pub fn next_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> usize {
|
||||
nth_next_grapheme_boundary(slice, char_idx, 1)
|
||||
}
|
||||
|
||||
/// Returns the passed char index if it's already a grapheme boundary,
|
||||
/// or the next grapheme boundary char index if not.
|
||||
#[must_use]
|
||||
#[inline]
|
||||
pub fn ensure_grapheme_boundary_next(slice: RopeSlice, char_idx: usize) -> usize {
|
||||
if char_idx == 0 {
|
||||
char_idx
|
||||
} else {
|
||||
next_grapheme_boundary(slice, char_idx - 1)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the passed char index if it's already a grapheme boundary,
|
||||
/// or the prev grapheme boundary char index if not.
|
||||
#[must_use]
|
||||
#[inline]
|
||||
pub fn ensure_grapheme_boundary_prev(slice: RopeSlice, char_idx: usize) -> usize {
|
||||
if char_idx == slice.len_chars() {
|
||||
char_idx
|
||||
} else {
|
||||
prev_grapheme_boundary(slice, char_idx + 1)
|
||||
}
|
||||
}
|
||||
|
||||
/// A highly compressed Cow<'a, str> that holds
|
||||
/// atmost u31::MAX bytes and is readonly
|
||||
pub struct GraphemeStr<'a> {
|
||||
|
@@ -45,7 +45,7 @@ pub use helix_loader::find_workspace;
|
||||
mod rope_reader;
|
||||
|
||||
pub use rope_reader::RopeReader;
|
||||
pub use ropey::{self, str_utils, Rope, RopeBuilder, RopeSlice};
|
||||
pub use ropey::{self, Rope, RopeBuilder, RopeSlice};
|
||||
|
||||
// pub use tendril::StrTendril as Tendril;
|
||||
pub use smartstring::SmartString;
|
||||
|
@@ -1,12 +1,11 @@
|
||||
use crate::{movement::Direction, syntax::TreeCursor, Range, RopeSlice, Selection, Syntax};
|
||||
use crate::{movement::Direction, syntax::TreeCursor, Range, Selection, Syntax};
|
||||
|
||||
pub fn expand_selection(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection {
|
||||
pub fn expand_selection(syntax: &Syntax, selection: Selection) -> Selection {
|
||||
let cursor = &mut syntax.walk();
|
||||
|
||||
selection.transform(|range| {
|
||||
let from = text.char_to_byte(range.from()) as u32;
|
||||
let to = text.char_to_byte(range.to()) as u32;
|
||||
|
||||
let from = range.from() as u32;
|
||||
let to = range.to() as u32;
|
||||
let byte_range = from..to;
|
||||
cursor.reset_to_byte_range(from, to);
|
||||
|
||||
@@ -17,17 +16,14 @@ pub fn expand_selection(syntax: &Syntax, text: RopeSlice, selection: Selection)
|
||||
}
|
||||
|
||||
let node = cursor.node();
|
||||
let from = text.byte_to_char(node.start_byte() as usize);
|
||||
let to = text.byte_to_char(node.end_byte() as usize);
|
||||
|
||||
Range::new(to, from).with_direction(range.direction())
|
||||
Range::new(node.start_byte() as usize, node.end_byte() as usize)
|
||||
.with_direction(range.direction())
|
||||
})
|
||||
}
|
||||
|
||||
pub fn shrink_selection(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection {
|
||||
pub fn shrink_selection(syntax: &Syntax, selection: Selection) -> Selection {
|
||||
select_node_impl(
|
||||
syntax,
|
||||
text,
|
||||
selection,
|
||||
|cursor| {
|
||||
cursor.goto_first_child();
|
||||
@@ -36,10 +32,9 @@ pub fn shrink_selection(syntax: &Syntax, text: RopeSlice, selection: Selection)
|
||||
)
|
||||
}
|
||||
|
||||
pub fn select_next_sibling(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection {
|
||||
pub fn select_next_sibling(syntax: &Syntax, selection: Selection) -> Selection {
|
||||
select_node_impl(
|
||||
syntax,
|
||||
text,
|
||||
selection,
|
||||
|cursor| {
|
||||
while !cursor.goto_next_sibling() {
|
||||
@@ -52,34 +47,36 @@ pub fn select_next_sibling(syntax: &Syntax, text: RopeSlice, selection: Selectio
|
||||
)
|
||||
}
|
||||
|
||||
pub fn select_all_siblings(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection {
|
||||
pub fn select_all_siblings(syntax: &Syntax, selection: Selection) -> Selection {
|
||||
let mut cursor = syntax.walk();
|
||||
selection.transform_iter(move |range| {
|
||||
let (from, to) = range.into_byte_range(text);
|
||||
let from = range.from();
|
||||
let to = range.to();
|
||||
cursor.reset_to_byte_range(from as u32, to as u32);
|
||||
|
||||
if !cursor.goto_parent_with(|parent| parent.child_count() > 1) {
|
||||
return vec![range].into_iter();
|
||||
}
|
||||
|
||||
select_children(&mut cursor, text, range).into_iter()
|
||||
select_children(&mut cursor, range).into_iter()
|
||||
})
|
||||
}
|
||||
|
||||
pub fn select_all_children(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection {
|
||||
pub fn select_all_children(syntax: &Syntax, selection: Selection) -> Selection {
|
||||
let mut cursor = syntax.walk();
|
||||
selection.transform_iter(move |range| {
|
||||
let (from, to) = range.into_byte_range(text);
|
||||
let from = range.from();
|
||||
let to = range.to();
|
||||
cursor.reset_to_byte_range(from as u32, to as u32);
|
||||
select_children(&mut cursor, text, range).into_iter()
|
||||
select_children(&mut cursor, range).into_iter()
|
||||
})
|
||||
}
|
||||
|
||||
fn select_children(cursor: &mut TreeCursor, text: RopeSlice, range: Range) -> Vec<Range> {
|
||||
fn select_children(cursor: &mut TreeCursor, range: Range) -> Vec<Range> {
|
||||
let children = cursor
|
||||
.children()
|
||||
.filter(|child| child.is_named())
|
||||
.map(|child| Range::from_node(child, text, range.direction()))
|
||||
.map(|child| Range::from_node(child, range.direction()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if !children.is_empty() {
|
||||
@@ -89,10 +86,9 @@ fn select_children(cursor: &mut TreeCursor, text: RopeSlice, range: Range) -> Ve
|
||||
}
|
||||
}
|
||||
|
||||
pub fn select_prev_sibling(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection {
|
||||
pub fn select_prev_sibling(syntax: &Syntax, selection: Selection) -> Selection {
|
||||
select_node_impl(
|
||||
syntax,
|
||||
text,
|
||||
selection,
|
||||
|cursor| {
|
||||
while !cursor.goto_previous_sibling() {
|
||||
@@ -107,7 +103,6 @@ pub fn select_prev_sibling(syntax: &Syntax, text: RopeSlice, selection: Selectio
|
||||
|
||||
fn select_node_impl<F>(
|
||||
syntax: &Syntax,
|
||||
text: RopeSlice,
|
||||
selection: Selection,
|
||||
motion: F,
|
||||
direction: Option<Direction>,
|
||||
@@ -118,17 +113,12 @@ where
|
||||
let cursor = &mut syntax.walk();
|
||||
|
||||
selection.transform(|range| {
|
||||
let from = text.char_to_byte(range.from()) as u32;
|
||||
let to = text.char_to_byte(range.to()) as u32;
|
||||
|
||||
cursor.reset_to_byte_range(from, to);
|
||||
cursor.reset_to_byte_range(range.from() as u32, range.to() as u32);
|
||||
|
||||
motion(cursor);
|
||||
|
||||
let node = cursor.node();
|
||||
let from = text.byte_to_char(node.start_byte() as usize);
|
||||
let to = text.byte_to_char(node.end_byte() as usize);
|
||||
|
||||
Range::new(from, to).with_direction(direction.unwrap_or_else(|| range.direction()))
|
||||
Range::new(node.start_byte() as usize, node.end_byte() as usize)
|
||||
.with_direction(direction.unwrap_or_else(|| range.direction()))
|
||||
})
|
||||
}
|
||||
|
@@ -3,17 +3,11 @@
|
||||
//!
|
||||
//! All positioning is done via `char` offsets into the buffer.
|
||||
use crate::{
|
||||
graphemes::{
|
||||
ensure_grapheme_boundary_next, ensure_grapheme_boundary_prev, next_grapheme_boundary,
|
||||
prev_grapheme_boundary,
|
||||
},
|
||||
line_ending::get_line_ending,
|
||||
movement::Direction,
|
||||
tree_sitter::Node,
|
||||
Assoc, ChangeSet, RopeSlice,
|
||||
line_ending::get_line_ending, movement::Direction, tree_sitter::Node, Assoc, ChangeSet,
|
||||
RopeSlice,
|
||||
};
|
||||
use helix_stdx::range::is_subset;
|
||||
use helix_stdx::rope::{self, RopeSliceExt};
|
||||
use helix_stdx::{range::is_subset, rope::LINE_TYPE};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::{borrow::Cow, iter, slice};
|
||||
|
||||
@@ -25,9 +19,9 @@ use std::{borrow::Cow, iter, slice};
|
||||
/// can be in any order, or even share the same position.
|
||||
///
|
||||
/// The anchor and head positions use gap indexing, meaning
|
||||
/// that their indices represent the gaps *between* `char`s
|
||||
/// rather than the `char`s themselves. For example, 1
|
||||
/// represents the position between the first and second `char`.
|
||||
/// that their indices represent the gaps *between* bytes
|
||||
/// rather than the bytes themselves. For example, 1
|
||||
/// represents the position between the first and second byte.
|
||||
///
|
||||
/// Below are some examples of `Range` configurations.
|
||||
/// The anchor and head indices are shown as "(anchor, head)"
|
||||
@@ -75,10 +69,9 @@ impl Range {
|
||||
Self::new(head, head)
|
||||
}
|
||||
|
||||
pub fn from_node(node: Node, text: RopeSlice, direction: Direction) -> Self {
|
||||
let from = text.byte_to_char(node.start_byte() as usize);
|
||||
let to = text.byte_to_char(node.end_byte() as usize);
|
||||
Range::new(from, to).with_direction(direction)
|
||||
pub fn from_node(node: Node, direction: Direction) -> Self {
|
||||
let range = node.byte_range();
|
||||
Range::new(range.start as usize, range.end as usize).with_direction(direction)
|
||||
}
|
||||
|
||||
/// Start of the range.
|
||||
@@ -110,10 +103,13 @@ impl Range {
|
||||
let to = if self.is_empty() {
|
||||
self.to()
|
||||
} else {
|
||||
prev_grapheme_boundary(text, self.to()).max(from)
|
||||
text.prev_grapheme_boundary(self.to()).max(from)
|
||||
};
|
||||
|
||||
(text.char_to_line(from), text.char_to_line(to))
|
||||
(
|
||||
text.byte_to_line_idx(from, LINE_TYPE),
|
||||
text.byte_to_line_idx(to, LINE_TYPE),
|
||||
)
|
||||
}
|
||||
|
||||
/// `true` when head and anchor are at the same position.
|
||||
@@ -277,16 +273,16 @@ impl Range {
|
||||
use std::cmp::Ordering;
|
||||
let (new_anchor, new_head) = match self.anchor.cmp(&self.head) {
|
||||
Ordering::Equal => {
|
||||
let pos = ensure_grapheme_boundary_prev(slice, self.anchor);
|
||||
let pos = slice.floor_grapheme_boundary(self.anchor);
|
||||
(pos, pos)
|
||||
}
|
||||
Ordering::Less => (
|
||||
ensure_grapheme_boundary_prev(slice, self.anchor),
|
||||
ensure_grapheme_boundary_next(slice, self.head),
|
||||
slice.floor_char_boundary(self.anchor),
|
||||
slice.ceil_char_boundary(self.head),
|
||||
),
|
||||
Ordering::Greater => (
|
||||
ensure_grapheme_boundary_next(slice, self.anchor),
|
||||
ensure_grapheme_boundary_prev(slice, self.head),
|
||||
slice.ceil_char_boundary(self.anchor),
|
||||
slice.floor_char_boundary(self.head),
|
||||
),
|
||||
};
|
||||
Range {
|
||||
@@ -318,7 +314,7 @@ impl Range {
|
||||
if self.anchor == self.head {
|
||||
Range {
|
||||
anchor: self.anchor,
|
||||
head: next_grapheme_boundary(slice, self.head),
|
||||
head: slice.next_grapheme_boundary(self.head),
|
||||
old_visual_position: self.old_visual_position,
|
||||
}
|
||||
} else {
|
||||
@@ -334,39 +330,39 @@ impl Range {
|
||||
#[inline]
|
||||
pub fn cursor(self, text: RopeSlice) -> usize {
|
||||
if self.head > self.anchor {
|
||||
prev_grapheme_boundary(text, self.head)
|
||||
text.prev_grapheme_boundary(self.head)
|
||||
} else {
|
||||
self.head
|
||||
}
|
||||
}
|
||||
|
||||
/// Puts the left side of the block cursor at `char_idx`, optionally extending.
|
||||
/// Puts the left side of the block cursor at `byte_idx`, optionally extending.
|
||||
///
|
||||
/// This follows "1-width" semantics, and therefore does a combination of anchor
|
||||
/// and head moves to behave as if both the front and back of the range are 1-width
|
||||
/// blocks
|
||||
///
|
||||
/// This method assumes that the range and `char_idx` are already properly
|
||||
/// This method assumes that the range and `byte_idx` are already properly
|
||||
/// grapheme-aligned.
|
||||
#[must_use]
|
||||
#[inline]
|
||||
pub fn put_cursor(self, text: RopeSlice, char_idx: usize, extend: bool) -> Range {
|
||||
pub fn put_cursor(self, text: RopeSlice, byte_idx: usize, extend: bool) -> Range {
|
||||
if extend {
|
||||
let anchor = if self.head >= self.anchor && char_idx < self.anchor {
|
||||
next_grapheme_boundary(text, self.anchor)
|
||||
} else if self.head < self.anchor && char_idx >= self.anchor {
|
||||
prev_grapheme_boundary(text, self.anchor)
|
||||
let anchor = if self.head >= self.anchor && byte_idx < self.anchor {
|
||||
text.next_grapheme_boundary(self.anchor)
|
||||
} else if self.head < self.anchor && byte_idx >= self.anchor {
|
||||
text.prev_grapheme_boundary(self.anchor)
|
||||
} else {
|
||||
self.anchor
|
||||
};
|
||||
|
||||
if anchor <= char_idx {
|
||||
Range::new(anchor, next_grapheme_boundary(text, char_idx))
|
||||
if anchor <= byte_idx {
|
||||
Range::new(anchor, text.next_grapheme_boundary(byte_idx))
|
||||
} else {
|
||||
Range::new(anchor, char_idx)
|
||||
Range::new(anchor, byte_idx)
|
||||
}
|
||||
} else {
|
||||
Range::point(char_idx)
|
||||
Range::point(byte_idx)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -374,7 +370,7 @@ impl Range {
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub fn cursor_line(&self, text: RopeSlice) -> usize {
|
||||
text.char_to_line(self.cursor(text))
|
||||
text.byte_to_line_idx(self.cursor(text), LINE_TYPE)
|
||||
}
|
||||
|
||||
/// Returns true if this Range covers a single grapheme in the given text
|
||||
@@ -384,12 +380,6 @@ impl Range {
|
||||
let second = graphemes.next();
|
||||
first.is_some() && second.is_none()
|
||||
}
|
||||
|
||||
/// Converts this char range into an in order byte range, discarding
|
||||
/// direction.
|
||||
pub fn into_byte_range(&self, text: RopeSlice) -> (usize, usize) {
|
||||
(text.char_to_byte(self.from()), text.char_to_byte(self.to()))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<(usize, usize)> for Range {
|
||||
@@ -772,7 +762,9 @@ pub fn keep_or_remove_matches(
|
||||
) -> Option<Selection> {
|
||||
let result: SmallVec<_> = selection
|
||||
.iter()
|
||||
.filter(|range| regex.is_match(text.regex_input_at(range.from()..range.to())) ^ remove)
|
||||
.filter(|range| {
|
||||
regex.is_match(text.regex_input_at_bytes(range.from()..range.to())) ^ remove
|
||||
})
|
||||
.copied()
|
||||
.collect();
|
||||
|
||||
@@ -792,13 +784,10 @@ pub fn select_on_matches(
|
||||
let mut result = SmallVec::with_capacity(selection.len());
|
||||
|
||||
for sel in selection {
|
||||
for mat in regex.find_iter(text.regex_input_at(sel.from()..sel.to())) {
|
||||
for mat in regex.find_iter(text.regex_input_at_bytes(sel.from()..sel.to())) {
|
||||
// TODO: retain range direction
|
||||
|
||||
let start = text.byte_to_char(mat.start());
|
||||
let end = text.byte_to_char(mat.end());
|
||||
|
||||
let range = Range::new(start, end);
|
||||
let range = Range::new(mat.start(), mat.end());
|
||||
// Make sure the match is not right outside of the selection.
|
||||
// These invalid matches can come from using RegEx anchors like `^`, `$`
|
||||
if range != Range::point(sel.to()) {
|
||||
@@ -830,7 +819,7 @@ pub fn split_on_newline(text: RopeSlice, selection: &Selection) -> Selection {
|
||||
|
||||
let mut start = sel_start;
|
||||
|
||||
for line in sel.slice(text).lines() {
|
||||
for line in sel.slice(text).lines(LINE_TYPE) {
|
||||
let Some(line_ending) = get_line_ending(&line) else {
|
||||
break;
|
||||
};
|
||||
@@ -863,11 +852,11 @@ pub fn split_on_matches(text: RopeSlice, selection: &Selection, regex: &rope::Re
|
||||
let sel_end = sel.to();
|
||||
let mut start = sel_start;
|
||||
|
||||
for mat in regex.find_iter(text.regex_input_at(sel_start..sel_end)) {
|
||||
for mat in regex.find_iter(text.regex_input_at_bytes(sel_start..sel_end)) {
|
||||
// TODO: retain range direction
|
||||
let end = text.byte_to_char(mat.start());
|
||||
let end = mat.start();
|
||||
result.push(Range::new(start, end));
|
||||
start = text.byte_to_char(mat.end());
|
||||
start = mat.end();
|
||||
}
|
||||
|
||||
if start < sel_end {
|
||||
|
@@ -325,14 +325,14 @@ impl Transform {
|
||||
let mut buf = Tendril::new();
|
||||
let it = self
|
||||
.regex
|
||||
.captures_iter(doc.regex_input_at(range))
|
||||
.captures_iter(doc.regex_input_at_bytes(range))
|
||||
.enumerate();
|
||||
doc = doc.slice(range);
|
||||
let mut last_match = 0;
|
||||
for (_, cap) in it {
|
||||
// unwrap on 0 is OK because captures only reports matches
|
||||
let m = cap.get_group(0).unwrap();
|
||||
buf.extend(doc.byte_slice(last_match..m.start).chunks());
|
||||
buf.extend(doc.slice(last_match..m.start).chunks());
|
||||
last_match = m.end;
|
||||
for fmt in &*self.replacement {
|
||||
match *fmt {
|
||||
@@ -341,12 +341,12 @@ impl Transform {
|
||||
}
|
||||
FormatItem::Capture(i) => {
|
||||
if let Some(cap) = cap.get_group(i) {
|
||||
buf.extend(doc.byte_slice(cap.range()).chunks());
|
||||
buf.extend(doc.slice(cap.range()).chunks());
|
||||
}
|
||||
}
|
||||
FormatItem::CaseChange(i, change) => {
|
||||
if let Some(cap) = cap.get_group(i).filter(|i| !i.is_empty()) {
|
||||
let mut chars = doc.byte_slice(cap.range()).chars();
|
||||
let mut chars = doc.slice(cap.range()).chars();
|
||||
match change {
|
||||
CaseChange::Upcase => to_upper_case_with(chars, &mut buf),
|
||||
CaseChange::Downcase => to_lower_case_with(chars, &mut buf),
|
||||
@@ -373,7 +373,7 @@ impl Transform {
|
||||
break;
|
||||
}
|
||||
}
|
||||
buf.extend(doc.byte_slice(last_match..).chunks());
|
||||
buf.extend(doc.slice(last_match..).chunks());
|
||||
buf
|
||||
}
|
||||
}
|
||||
|
@@ -16,7 +16,7 @@ dunce = "1.0"
|
||||
etcetera = "0.10"
|
||||
ropey.workspace = true
|
||||
which = "7.0"
|
||||
regex-cursor = "0.1.5"
|
||||
regex-cursor.workspace = true
|
||||
bitflags.workspace = true
|
||||
once_cell = "1.21"
|
||||
regex-automata = "0.4.9"
|
||||
|
@@ -269,7 +269,7 @@ pub fn get_path_suffix(src: RopeSlice<'_>, match_single_file: bool) -> Option<Ro
|
||||
|
||||
regex
|
||||
.find(Input::new(src))
|
||||
.map(|mat| src.byte_slice(mat.range()))
|
||||
.map(|mat| src.slice(mat.range()))
|
||||
}
|
||||
|
||||
/// Returns an iterator of the **byte** ranges in src that contain a path.
|
||||
|
@@ -1,75 +1,25 @@
|
||||
use std::fmt;
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
|
||||
pub use regex_cursor::engines::meta::{Builder as RegexBuilder, Regex};
|
||||
pub use regex_cursor::regex_automata::util::syntax::Config;
|
||||
use regex_cursor::{Input as RegexInput, RopeyCursor};
|
||||
use ropey::iter::Chunks;
|
||||
use ropey::RopeSlice;
|
||||
use regex_cursor::Input as RegexInput;
|
||||
use ropey::{ChunkCursor, RopeSlice};
|
||||
use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
|
||||
|
||||
pub const LINE_TYPE: ropey::LineType = ropey::LineType::LF_CR;
|
||||
|
||||
pub trait RopeSliceExt<'a>: Sized {
|
||||
fn ends_with(self, text: &str) -> bool;
|
||||
fn starts_with(self, text: &str) -> bool;
|
||||
fn regex_input(self) -> RegexInput<RopeyCursor<'a>>;
|
||||
fn regex_input(self) -> RegexInput<ChunkCursor<'a>>;
|
||||
fn regex_input_at_bytes<R: RangeBounds<usize>>(
|
||||
self,
|
||||
byte_range: R,
|
||||
) -> RegexInput<RopeyCursor<'a>>;
|
||||
fn regex_input_at<R: RangeBounds<usize>>(self, char_range: R) -> RegexInput<RopeyCursor<'a>>;
|
||||
) -> RegexInput<ChunkCursor<'a>>;
|
||||
#[deprecated = "use regex_input_at_bytes instead"]
|
||||
fn regex_input_at<R: RangeBounds<usize>>(self, char_range: R) -> RegexInput<ChunkCursor<'a>>;
|
||||
fn first_non_whitespace_char(self) -> Option<usize>;
|
||||
fn last_non_whitespace_char(self) -> Option<usize>;
|
||||
/// Finds the closest byte index not exceeding `byte_idx` which lies on a character boundary.
|
||||
///
|
||||
/// If `byte_idx` already lies on a character boundary then it is returned as-is. When
|
||||
/// `byte_idx` lies between two character boundaries, this function returns the byte index of
|
||||
/// the lesser / earlier / left-hand-side boundary.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// # use ropey::RopeSlice;
|
||||
/// # use helix_stdx::rope::RopeSliceExt;
|
||||
/// let text = RopeSlice::from("⌚"); // three bytes: e2 8c 9a
|
||||
/// assert_eq!(text.floor_char_boundary(0), 0);
|
||||
/// assert_eq!(text.floor_char_boundary(1), 0);
|
||||
/// assert_eq!(text.floor_char_boundary(2), 0);
|
||||
/// assert_eq!(text.floor_char_boundary(3), 3);
|
||||
/// ```
|
||||
fn floor_char_boundary(self, byte_idx: usize) -> usize;
|
||||
/// Finds the closest byte index not below `byte_idx` which lies on a character boundary.
|
||||
///
|
||||
/// If `byte_idx` already lies on a character boundary then it is returned as-is. When
|
||||
/// `byte_idx` lies between two character boundaries, this function returns the byte index of
|
||||
/// the greater / later / right-hand-side boundary.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// # use ropey::RopeSlice;
|
||||
/// # use helix_stdx::rope::RopeSliceExt;
|
||||
/// let text = RopeSlice::from("⌚"); // three bytes: e2 8c 9a
|
||||
/// assert_eq!(text.ceil_char_boundary(0), 0);
|
||||
/// assert_eq!(text.ceil_char_boundary(1), 3);
|
||||
/// assert_eq!(text.ceil_char_boundary(2), 3);
|
||||
/// assert_eq!(text.ceil_char_boundary(3), 3);
|
||||
/// ```
|
||||
fn ceil_char_boundary(self, byte_idx: usize) -> usize;
|
||||
/// Checks whether the given `byte_idx` lies on a character boundary.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// # use ropey::RopeSlice;
|
||||
/// # use helix_stdx::rope::RopeSliceExt;
|
||||
/// let text = RopeSlice::from("⌚"); // three bytes: e2 8c 9a
|
||||
/// assert!(text.is_char_boundary(0));
|
||||
/// assert!(!text.is_char_boundary(1));
|
||||
/// assert!(!text.is_char_boundary(2));
|
||||
/// assert!(text.is_char_boundary(3));
|
||||
/// ```
|
||||
#[allow(clippy::wrong_self_convention)]
|
||||
fn is_char_boundary(self, byte_idx: usize) -> bool;
|
||||
/// Finds the closest byte index not exceeding `byte_idx` which lies on a grapheme cluster
|
||||
/// boundary.
|
||||
///
|
||||
@@ -82,14 +32,19 @@ pub trait RopeSliceExt<'a>: Sized {
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// # use ropey::RopeSlice;
|
||||
/// # use ropey::{RopeSlice, Rope};
|
||||
/// # use helix_stdx::rope::RopeSliceExt;
|
||||
/// let text = RopeSlice::from("\r\n"); // U+000D U+000A, hex: 0d 0a
|
||||
/// let text = Rope::from_str("\r\n"); // U+000D U+000A, hex: 0d 0a
|
||||
/// let text = text.slice(..);
|
||||
/// assert_eq!(text.floor_grapheme_boundary(0), 0);
|
||||
/// assert_eq!(text.floor_grapheme_boundary(1), 0);
|
||||
/// assert_eq!(text.floor_grapheme_boundary(2), 2);
|
||||
/// ```
|
||||
fn floor_grapheme_boundary(self, byte_idx: usize) -> usize;
|
||||
fn prev_grapheme_boundary(self, byte_idx: usize) -> usize {
|
||||
self.nth_prev_grapheme_boundary(byte_idx, 1)
|
||||
}
|
||||
fn nth_prev_grapheme_boundary(self, byte_idx: usize, n: usize) -> usize;
|
||||
/// Finds the closest byte index not exceeding `byte_idx` which lies on a grapheme cluster
|
||||
/// boundary.
|
||||
///
|
||||
@@ -102,22 +57,28 @@ pub trait RopeSliceExt<'a>: Sized {
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// # use ropey::RopeSlice;
|
||||
/// # use ropey::{RopeSlice, Rope};
|
||||
/// # use helix_stdx::rope::RopeSliceExt;
|
||||
/// let text = RopeSlice::from("\r\n"); // U+000D U+000A, hex: 0d 0a
|
||||
/// let text = Rope::from_str("\r\n"); // U+000D U+000A, hex: 0d 0a
|
||||
/// let text = text.slice(..);
|
||||
/// assert_eq!(text.ceil_grapheme_boundary(0), 0);
|
||||
/// assert_eq!(text.ceil_grapheme_boundary(1), 2);
|
||||
/// assert_eq!(text.ceil_grapheme_boundary(2), 2);
|
||||
/// ```
|
||||
fn ceil_grapheme_boundary(self, byte_idx: usize) -> usize;
|
||||
fn next_grapheme_boundary(self, byte_idx: usize) -> usize {
|
||||
self.nth_next_grapheme_boundary(byte_idx, 1)
|
||||
}
|
||||
fn nth_next_grapheme_boundary(self, byte_idx: usize, n: usize) -> usize;
|
||||
/// Checks whether the `byte_idx` lies on a grapheme cluster boundary.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// # use ropey::RopeSlice;
|
||||
/// # use ropey::{RopeSlice, Rope};
|
||||
/// # use helix_stdx::rope::RopeSliceExt;
|
||||
/// let text = RopeSlice::from("\r\n"); // U+000D U+000A, hex: 0d 0a
|
||||
/// let text = Rope::from_str("\r\n"); // U+000D U+000A, hex: 0d 0a
|
||||
/// let text = text.slice(..);
|
||||
/// assert!(text.is_grapheme_boundary(0));
|
||||
/// assert!(!text.is_grapheme_boundary(1));
|
||||
/// assert!(text.is_grapheme_boundary(2));
|
||||
@@ -129,10 +90,10 @@ pub trait RopeSliceExt<'a>: Sized {
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// # use ropey::RopeSlice;
|
||||
/// # use ropey::{RopeSlice, Rope};
|
||||
/// # use helix_stdx::rope::RopeSliceExt;
|
||||
/// let text = RopeSlice::from("😶🌫️🏴☠️🖼️");
|
||||
/// let graphemes: Vec<_> = text.graphemes().collect();
|
||||
/// let text = Rope::from_str("😶🌫️🏴☠️🖼️");
|
||||
/// let graphemes: Vec<_> = text.slice(..).graphemes().collect();
|
||||
/// assert_eq!(graphemes.as_slice(), &["😶🌫️", "🏴☠️", "🖼️"]);
|
||||
/// ```
|
||||
fn graphemes(self) -> RopeGraphemes<'a>;
|
||||
@@ -144,10 +105,10 @@ pub trait RopeSliceExt<'a>: Sized {
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// # use ropey::RopeSlice;
|
||||
/// # use ropey::{RopeSlice, Rope};
|
||||
/// # use helix_stdx::rope::RopeSliceExt;
|
||||
/// let text = RopeSlice::from("😶🌫️🏴☠️🖼️");
|
||||
/// let graphemes: Vec<_> = text.graphemes_rev().collect();
|
||||
/// let text = Rope::from_str("😶🌫️🏴☠️🖼️");
|
||||
/// let graphemes: Vec<_> = text.slice(..).graphemes_rev().collect();
|
||||
/// assert_eq!(graphemes.as_slice(), &["🖼️", "🏴☠️", "😶🌫️"]);
|
||||
/// ```
|
||||
fn graphemes_rev(self) -> RevRopeGraphemes<'a>;
|
||||
@@ -155,36 +116,36 @@ pub trait RopeSliceExt<'a>: Sized {
|
||||
|
||||
impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
|
||||
fn ends_with(self, text: &str) -> bool {
|
||||
let len = self.len_bytes();
|
||||
let len = self.len();
|
||||
if len < text.len() {
|
||||
return false;
|
||||
}
|
||||
self.get_byte_slice(len - text.len()..)
|
||||
.is_some_and(|end| end == text)
|
||||
self.try_slice(len - text.len()..)
|
||||
.is_ok_and(|end| end == text)
|
||||
}
|
||||
|
||||
fn starts_with(self, text: &str) -> bool {
|
||||
let len = self.len_bytes();
|
||||
let len = self.len();
|
||||
if len < text.len() {
|
||||
return false;
|
||||
}
|
||||
self.get_byte_slice(..text.len())
|
||||
.is_some_and(|start| start == text)
|
||||
self.try_slice(..text.len())
|
||||
.is_ok_and(|start| start == text)
|
||||
}
|
||||
|
||||
fn regex_input(self) -> RegexInput<RopeyCursor<'a>> {
|
||||
fn regex_input(self) -> RegexInput<ChunkCursor<'a>> {
|
||||
RegexInput::new(self)
|
||||
}
|
||||
|
||||
fn regex_input_at<R: RangeBounds<usize>>(self, char_range: R) -> RegexInput<RopeyCursor<'a>> {
|
||||
fn regex_input_at<R: RangeBounds<usize>>(self, char_range: R) -> RegexInput<ChunkCursor<'a>> {
|
||||
let start_bound = match char_range.start_bound() {
|
||||
Bound::Included(&val) => Bound::Included(self.char_to_byte(val)),
|
||||
Bound::Excluded(&val) => Bound::Excluded(self.char_to_byte(val)),
|
||||
Bound::Included(&val) => Bound::Included(self.char_to_byte_idx(val)),
|
||||
Bound::Excluded(&val) => Bound::Excluded(self.char_to_byte_idx(val)),
|
||||
Bound::Unbounded => Bound::Unbounded,
|
||||
};
|
||||
let end_bound = match char_range.end_bound() {
|
||||
Bound::Included(&val) => Bound::Included(self.char_to_byte(val)),
|
||||
Bound::Excluded(&val) => Bound::Excluded(self.char_to_byte(val)),
|
||||
Bound::Included(&val) => Bound::Included(self.char_to_byte_idx(val)),
|
||||
Bound::Excluded(&val) => Bound::Excluded(self.char_to_byte_idx(val)),
|
||||
Bound::Unbounded => Bound::Unbounded,
|
||||
};
|
||||
self.regex_input_at_bytes((start_bound, end_bound))
|
||||
@@ -192,10 +153,10 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
|
||||
fn regex_input_at_bytes<R: RangeBounds<usize>>(
|
||||
self,
|
||||
byte_range: R,
|
||||
) -> RegexInput<RopeyCursor<'a>> {
|
||||
) -> RegexInput<ChunkCursor<'a>> {
|
||||
let input = match byte_range.start_bound() {
|
||||
Bound::Included(&pos) | Bound::Excluded(&pos) => {
|
||||
RegexInput::new(RopeyCursor::at(self, pos))
|
||||
RegexInput::new(self.chunk_cursor_at(pos))
|
||||
}
|
||||
Bound::Unbounded => RegexInput::new(self),
|
||||
};
|
||||
@@ -211,69 +172,22 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
|
||||
.map(|pos| self.len_chars() - pos - 1)
|
||||
}
|
||||
|
||||
// These three are adapted from std:
|
||||
|
||||
fn floor_char_boundary(self, byte_idx: usize) -> usize {
|
||||
if byte_idx >= self.len_bytes() {
|
||||
self.len_bytes()
|
||||
} else {
|
||||
let offset = self
|
||||
.bytes_at(byte_idx + 1)
|
||||
.reversed()
|
||||
.take(4)
|
||||
.position(is_utf8_char_boundary)
|
||||
// A char can only be four bytes long so we are guaranteed to find a boundary.
|
||||
.unwrap();
|
||||
|
||||
byte_idx - offset
|
||||
}
|
||||
}
|
||||
|
||||
fn ceil_char_boundary(self, byte_idx: usize) -> usize {
|
||||
if byte_idx > self.len_bytes() {
|
||||
self.len_bytes()
|
||||
} else {
|
||||
let upper_bound = self.len_bytes().min(byte_idx + 4);
|
||||
self.bytes_at(byte_idx)
|
||||
.position(is_utf8_char_boundary)
|
||||
.map_or(upper_bound, |pos| pos + byte_idx)
|
||||
}
|
||||
}
|
||||
|
||||
fn is_char_boundary(self, byte_idx: usize) -> bool {
|
||||
if byte_idx == 0 {
|
||||
return true;
|
||||
}
|
||||
|
||||
if byte_idx >= self.len_bytes() {
|
||||
byte_idx == self.len_bytes()
|
||||
} else {
|
||||
is_utf8_char_boundary(self.bytes_at(byte_idx).next().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
fn floor_grapheme_boundary(self, mut byte_idx: usize) -> usize {
|
||||
if byte_idx >= self.len_bytes() {
|
||||
return self.len_bytes();
|
||||
if byte_idx >= self.len() {
|
||||
return self.len();
|
||||
}
|
||||
|
||||
byte_idx = self.ceil_char_boundary(byte_idx + 1);
|
||||
|
||||
let (mut chunk, mut chunk_byte_idx, _, _) = self.chunk_at_byte(byte_idx);
|
||||
|
||||
let mut cursor = GraphemeCursor::new(byte_idx, self.len_bytes(), true);
|
||||
|
||||
let mut chunk_cursor = self.chunk_cursor_at(byte_idx);
|
||||
let mut cursor = GraphemeCursor::new(byte_idx, self.len(), true);
|
||||
loop {
|
||||
match cursor.prev_boundary(chunk, chunk_byte_idx) {
|
||||
match cursor.prev_boundary(chunk_cursor.chunk(), chunk_cursor.byte_offset()) {
|
||||
Ok(None) => return 0,
|
||||
Ok(Some(boundary)) => return boundary,
|
||||
Err(GraphemeIncomplete::PrevChunk) => {
|
||||
let (ch, ch_byte_idx, _, _) = self.chunk_at_byte(chunk_byte_idx - 1);
|
||||
chunk = ch;
|
||||
chunk_byte_idx = ch_byte_idx;
|
||||
}
|
||||
Err(GraphemeIncomplete::PrevChunk) => assert!(chunk_cursor.prev()),
|
||||
Err(GraphemeIncomplete::PreContext(n)) => {
|
||||
let ctx_chunk = self.chunk_at_byte(n - 1).0;
|
||||
let ctx_chunk = self.chunk(n - 1).0;
|
||||
cursor.provide_context(ctx_chunk, n - ctx_chunk.len());
|
||||
}
|
||||
_ => unreachable!(),
|
||||
@@ -281,9 +195,34 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn nth_prev_grapheme_boundary(self, mut byte_idx: usize, n: usize) -> usize {
|
||||
byte_idx = self.floor_char_boundary(byte_idx);
|
||||
|
||||
let mut chunk_cursor = self.chunk_cursor_at(byte_idx);
|
||||
let mut cursor = GraphemeCursor::new(byte_idx, self.len(), true);
|
||||
for _ in 0..n {
|
||||
loop {
|
||||
match cursor.prev_boundary(chunk_cursor.chunk(), chunk_cursor.byte_offset()) {
|
||||
Ok(None) => return 0,
|
||||
Ok(Some(boundary)) => {
|
||||
byte_idx = boundary;
|
||||
break;
|
||||
}
|
||||
Err(GraphemeIncomplete::PrevChunk) => assert!(chunk_cursor.prev()),
|
||||
Err(GraphemeIncomplete::PreContext(n)) => {
|
||||
let ctx_chunk = self.chunk(n - 1).0;
|
||||
cursor.provide_context(ctx_chunk, n - ctx_chunk.len());
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
byte_idx
|
||||
}
|
||||
|
||||
fn ceil_grapheme_boundary(self, mut byte_idx: usize) -> usize {
|
||||
if byte_idx >= self.len_bytes() {
|
||||
return self.len_bytes();
|
||||
if byte_idx >= self.len() {
|
||||
return self.len();
|
||||
}
|
||||
|
||||
if byte_idx == 0 {
|
||||
@@ -292,20 +231,15 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
|
||||
|
||||
byte_idx = self.floor_char_boundary(byte_idx - 1);
|
||||
|
||||
let (mut chunk, mut chunk_byte_idx, _, _) = self.chunk_at_byte(byte_idx);
|
||||
|
||||
let mut cursor = GraphemeCursor::new(byte_idx, self.len_bytes(), true);
|
||||
|
||||
let mut chunk_cursor = self.chunk_cursor_at(byte_idx);
|
||||
let mut cursor = GraphemeCursor::new(byte_idx, self.len(), true);
|
||||
loop {
|
||||
match cursor.next_boundary(chunk, chunk_byte_idx) {
|
||||
Ok(None) => return self.len_bytes(),
|
||||
match cursor.next_boundary(chunk_cursor.chunk(), chunk_cursor.byte_offset()) {
|
||||
Ok(None) => return self.len(),
|
||||
Ok(Some(boundary)) => return boundary,
|
||||
Err(GraphemeIncomplete::NextChunk) => {
|
||||
chunk_byte_idx += chunk.len();
|
||||
chunk = self.chunk_at_byte(chunk_byte_idx).0;
|
||||
}
|
||||
Err(GraphemeIncomplete::NextChunk) => assert!(chunk_cursor.next()),
|
||||
Err(GraphemeIncomplete::PreContext(n)) => {
|
||||
let ctx_chunk = self.chunk_at_byte(n - 1).0;
|
||||
let ctx_chunk = self.chunk(n - 1).0;
|
||||
cursor.provide_context(ctx_chunk, n - ctx_chunk.len());
|
||||
}
|
||||
_ => unreachable!(),
|
||||
@@ -313,21 +247,44 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn nth_next_grapheme_boundary(self, mut byte_idx: usize, n: usize) -> usize {
|
||||
byte_idx = self.ceil_char_boundary(byte_idx);
|
||||
|
||||
let mut chunk_cursor = self.chunk_cursor_at(byte_idx);
|
||||
let mut cursor = GraphemeCursor::new(byte_idx, self.len(), true);
|
||||
for _ in 0..n {
|
||||
loop {
|
||||
match cursor.prev_boundary(chunk_cursor.chunk(), chunk_cursor.byte_offset()) {
|
||||
Ok(None) => return 0,
|
||||
Ok(Some(boundary)) => {
|
||||
byte_idx = boundary;
|
||||
break;
|
||||
}
|
||||
Err(GraphemeIncomplete::NextChunk) => assert!(chunk_cursor.next()),
|
||||
Err(GraphemeIncomplete::PreContext(n)) => {
|
||||
let ctx_chunk = self.chunk(n - 1).0;
|
||||
cursor.provide_context(ctx_chunk, n - ctx_chunk.len());
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
byte_idx
|
||||
}
|
||||
|
||||
fn is_grapheme_boundary(self, byte_idx: usize) -> bool {
|
||||
// The byte must lie on a character boundary to lie on a grapheme cluster boundary.
|
||||
if !self.is_char_boundary(byte_idx) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let (chunk, chunk_byte_idx, _, _) = self.chunk_at_byte(byte_idx);
|
||||
|
||||
let mut cursor = GraphemeCursor::new(byte_idx, self.len_bytes(), true);
|
||||
|
||||
let (chunk, chunk_byte_idx) = self.chunk(byte_idx);
|
||||
let mut cursor = GraphemeCursor::new(byte_idx, self.len(), true);
|
||||
loop {
|
||||
match cursor.is_boundary(chunk, chunk_byte_idx) {
|
||||
Ok(n) => return n,
|
||||
Err(GraphemeIncomplete::PreContext(n)) => {
|
||||
let (ctx_chunk, ctx_byte_start, _, _) = self.chunk_at_byte(n - 1);
|
||||
let (ctx_chunk, ctx_byte_start) = self.chunk(n - 1);
|
||||
cursor.provide_context(ctx_chunk, ctx_byte_start);
|
||||
}
|
||||
Err(_) => unreachable!(),
|
||||
@@ -336,61 +293,30 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
|
||||
}
|
||||
|
||||
fn graphemes(self) -> RopeGraphemes<'a> {
|
||||
let mut chunks = self.chunks();
|
||||
let first_chunk = chunks.next().unwrap_or("");
|
||||
RopeGraphemes {
|
||||
chunk_cursor: self.chunk_cursor(),
|
||||
text: self,
|
||||
chunks,
|
||||
cur_chunk: first_chunk,
|
||||
cur_chunk_start: 0,
|
||||
cursor: GraphemeCursor::new(0, self.len_bytes(), true),
|
||||
cursor: GraphemeCursor::new(0, self.len(), true),
|
||||
}
|
||||
}
|
||||
|
||||
fn graphemes_rev(self) -> RevRopeGraphemes<'a> {
|
||||
let (mut chunks, mut cur_chunk_start, _, _) = self.chunks_at_byte(self.len_bytes());
|
||||
chunks.reverse();
|
||||
let first_chunk = chunks.next().unwrap_or("");
|
||||
cur_chunk_start -= first_chunk.len();
|
||||
RevRopeGraphemes {
|
||||
chunk_cursor: self.chunk_cursor_at(self.len()),
|
||||
text: self,
|
||||
chunks,
|
||||
cur_chunk: first_chunk,
|
||||
cur_chunk_start,
|
||||
cursor: GraphemeCursor::new(self.len_bytes(), self.len_bytes(), true),
|
||||
cursor: GraphemeCursor::new(self.len(), self.len(), true),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// copied from std
|
||||
#[inline]
|
||||
const fn is_utf8_char_boundary(b: u8) -> bool {
|
||||
// This is bit magic equivalent to: b < 128 || b >= 192
|
||||
(b as i8) >= -0x40
|
||||
}
|
||||
|
||||
/// An iterator over the graphemes of a `RopeSlice`.
|
||||
#[derive(Clone)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RopeGraphemes<'a> {
|
||||
text: RopeSlice<'a>,
|
||||
chunks: Chunks<'a>,
|
||||
cur_chunk: &'a str,
|
||||
cur_chunk_start: usize,
|
||||
chunk_cursor: ChunkCursor<'a>,
|
||||
cursor: GraphemeCursor,
|
||||
}
|
||||
|
||||
impl fmt::Debug for RopeGraphemes<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("RopeGraphemes")
|
||||
.field("text", &self.text)
|
||||
.field("chunks", &self.chunks)
|
||||
.field("cur_chunk", &self.cur_chunk)
|
||||
.field("cur_chunk_start", &self.cur_chunk_start)
|
||||
// .field("cursor", &self.cursor)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for RopeGraphemes<'a> {
|
||||
type Item = RopeSlice<'a>;
|
||||
|
||||
@@ -400,7 +326,7 @@ impl<'a> Iterator for RopeGraphemes<'a> {
|
||||
loop {
|
||||
match self
|
||||
.cursor
|
||||
.next_boundary(self.cur_chunk, self.cur_chunk_start)
|
||||
.next_boundary(self.chunk_cursor.chunk(), self.chunk_cursor.byte_offset())
|
||||
{
|
||||
Ok(None) => {
|
||||
return None;
|
||||
@@ -409,50 +335,33 @@ impl<'a> Iterator for RopeGraphemes<'a> {
|
||||
b = n;
|
||||
break;
|
||||
}
|
||||
Err(GraphemeIncomplete::NextChunk) => {
|
||||
self.cur_chunk_start += self.cur_chunk.len();
|
||||
self.cur_chunk = self.chunks.next().unwrap_or("");
|
||||
}
|
||||
Err(GraphemeIncomplete::NextChunk) => assert!(self.chunk_cursor.next()),
|
||||
Err(GraphemeIncomplete::PreContext(idx)) => {
|
||||
let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
|
||||
let (chunk, byte_idx) = self.text.chunk(idx.saturating_sub(1));
|
||||
self.cursor.provide_context(chunk, byte_idx);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
if a < self.cur_chunk_start {
|
||||
Some(self.text.byte_slice(a..b))
|
||||
if a < self.chunk_cursor.byte_offset() {
|
||||
Some(self.text.slice(a..b))
|
||||
} else {
|
||||
let a2 = a - self.cur_chunk_start;
|
||||
let b2 = b - self.cur_chunk_start;
|
||||
Some((&self.cur_chunk[a2..b2]).into())
|
||||
let a2 = a - self.chunk_cursor.byte_offset();
|
||||
let b2 = b - self.chunk_cursor.byte_offset();
|
||||
Some((&self.chunk_cursor.chunk()[a2..b2]).into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator over the graphemes of a `RopeSlice` in reverse.
|
||||
#[derive(Clone)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RevRopeGraphemes<'a> {
|
||||
text: RopeSlice<'a>,
|
||||
chunks: Chunks<'a>,
|
||||
cur_chunk: &'a str,
|
||||
cur_chunk_start: usize,
|
||||
chunk_cursor: ChunkCursor<'a>,
|
||||
cursor: GraphemeCursor,
|
||||
}
|
||||
|
||||
impl fmt::Debug for RevRopeGraphemes<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("RevRopeGraphemes")
|
||||
.field("text", &self.text)
|
||||
.field("chunks", &self.chunks)
|
||||
.field("cur_chunk", &self.cur_chunk)
|
||||
.field("cur_chunk_start", &self.cur_chunk_start)
|
||||
// .field("cursor", &self.cursor)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for RevRopeGraphemes<'a> {
|
||||
type Item = RopeSlice<'a>;
|
||||
|
||||
@@ -462,7 +371,7 @@ impl<'a> Iterator for RevRopeGraphemes<'a> {
|
||||
loop {
|
||||
match self
|
||||
.cursor
|
||||
.prev_boundary(self.cur_chunk, self.cur_chunk_start)
|
||||
.prev_boundary(self.chunk_cursor.chunk(), self.chunk_cursor.byte_offset())
|
||||
{
|
||||
Ok(None) => {
|
||||
return None;
|
||||
@@ -471,24 +380,21 @@ impl<'a> Iterator for RevRopeGraphemes<'a> {
|
||||
b = n;
|
||||
break;
|
||||
}
|
||||
Err(GraphemeIncomplete::PrevChunk) => {
|
||||
self.cur_chunk = self.chunks.next().unwrap_or("");
|
||||
self.cur_chunk_start -= self.cur_chunk.len();
|
||||
}
|
||||
Err(GraphemeIncomplete::PrevChunk) => assert!(self.chunk_cursor.prev()),
|
||||
Err(GraphemeIncomplete::PreContext(idx)) => {
|
||||
let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
|
||||
let (chunk, byte_idx) = self.text.chunk(idx.saturating_sub(1));
|
||||
self.cursor.provide_context(chunk, byte_idx);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
if a >= self.cur_chunk_start + self.cur_chunk.len() {
|
||||
Some(self.text.byte_slice(b..a))
|
||||
if a >= self.chunk_cursor.byte_offset() + self.chunk_cursor.chunk().len() {
|
||||
Some(self.text.slice(b..a))
|
||||
} else {
|
||||
let a2 = a - self.cur_chunk_start;
|
||||
let b2 = b - self.cur_chunk_start;
|
||||
Some((&self.cur_chunk[b2..a2]).into())
|
||||
let a2 = a - self.chunk_cursor.byte_offset();
|
||||
let b2 = b - self.chunk_cursor.byte_offset();
|
||||
Some((&self.chunk_cursor.chunk()[b2..a2]).into())
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -509,46 +415,11 @@ mod tests {
|
||||
assert!(RopeSlice::from("asdf").ends_with("f"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn char_boundaries() {
|
||||
let ascii = RopeSlice::from("ascii");
|
||||
// When the given index lies on a character boundary, the index should not change.
|
||||
for byte_idx in 0..=ascii.len_bytes() {
|
||||
assert_eq!(ascii.floor_char_boundary(byte_idx), byte_idx);
|
||||
assert_eq!(ascii.ceil_char_boundary(byte_idx), byte_idx);
|
||||
assert!(ascii.is_char_boundary(byte_idx));
|
||||
}
|
||||
|
||||
// This is a polyfill of a method of this trait which was replaced by ceil_char_boundary.
|
||||
// It returns the _character index_ of the given byte index, rounding up if it does not
|
||||
// already lie on a character boundary.
|
||||
fn byte_to_next_char(slice: RopeSlice, byte_idx: usize) -> usize {
|
||||
slice.byte_to_char(slice.ceil_char_boundary(byte_idx))
|
||||
}
|
||||
|
||||
for i in 0..=6 {
|
||||
assert_eq!(byte_to_next_char(RopeSlice::from("foobar"), i), i);
|
||||
}
|
||||
for char_idx in 0..10 {
|
||||
let len = "😆".len();
|
||||
assert_eq!(
|
||||
byte_to_next_char(RopeSlice::from("😆😆😆😆😆😆😆😆😆😆"), char_idx * len),
|
||||
char_idx
|
||||
);
|
||||
for i in 1..=len {
|
||||
assert_eq!(
|
||||
byte_to_next_char(RopeSlice::from("😆😆😆😆😆😆😆😆😆😆"), char_idx * len + i),
|
||||
char_idx + 1
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn grapheme_boundaries() {
|
||||
let ascii = RopeSlice::from("ascii");
|
||||
// When the given index lies on a grapheme boundary, the index should not change.
|
||||
for byte_idx in 0..=ascii.len_bytes() {
|
||||
for byte_idx in 0..=ascii.len() {
|
||||
assert_eq!(ascii.floor_char_boundary(byte_idx), byte_idx);
|
||||
assert_eq!(ascii.ceil_char_boundary(byte_idx), byte_idx);
|
||||
assert!(ascii.is_grapheme_boundary(byte_idx));
|
||||
@@ -558,7 +429,7 @@ mod tests {
|
||||
// 13 bytes, hex: f0 9f 8f b4 + e2 80 8d + e2 98 a0 + ef b8 8f
|
||||
let g = RopeSlice::from("🏴☠️\r\n");
|
||||
let emoji_len = "🏴☠️".len();
|
||||
let end = g.len_bytes();
|
||||
let end = g.len();
|
||||
|
||||
for byte_idx in 0..emoji_len {
|
||||
assert_eq!(g.floor_grapheme_boundary(byte_idx), 0);
|
||||
|
Reference in New Issue
Block a user