├── .github └── workflows │ └── ci.yml ├── .gitignore ├── BENCHMARKS.md ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── README.md ├── benches ├── common │ └── mod.rs ├── creation.rs ├── editing.rs ├── graphemes.rs ├── iterators.rs ├── metric_conversion.rs ├── serde.rs └── slicing.rs ├── examples └── example_usage.rs ├── fuzz ├── .gitignore ├── Cargo.toml └── fuzz_targets │ └── editing.rs ├── rustfmt.toml ├── src ├── lib.rs ├── rope │ ├── gap_buffer.rs │ ├── gap_slice.rs │ ├── iterators.rs │ ├── metrics.rs │ ├── mod.rs │ ├── rope.rs │ ├── rope_builder.rs │ ├── rope_slice.rs │ └── utils.rs └── tree │ ├── leaves.rs │ ├── mod.rs │ ├── node.rs │ ├── node_internal.rs │ ├── node_leaf.rs │ ├── tiny_arc.rs │ ├── traits.rs │ ├── tree.rs │ ├── tree_builder.rs │ ├── tree_slice.rs │ └── units.rs └── tests ├── common ├── large.txt ├── lf.txt ├── medium.txt ├── mod.rs ├── non-ascii.txt ├── small.txt └── tiny.txt ├── graphemes.rs ├── iterators.rs ├── rope_builder.rs ├── rope_indexing.rs ├── rope_replace.rs ├── serde.rs ├── slice_indexing.rs ├── slicing.rs └── utf16_conversion.rs /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | schedule: 11 | - cron: '0 0 1 * *' 12 | 13 | jobs: 14 | test-prod: 15 | name: test-prod 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v2 19 | - uses: dtolnay/rust-toolchain@nightly 20 | - run: cargo test --features graphemes,serde,utf16-metric --no-fail-fast 21 | 22 | test-small-chunks-arity-prod: 23 | name: test-small-chunks-arity-prod 24 | runs-on: ubuntu-latest 25 | steps: 26 | - uses: actions/checkout@v2 27 | - uses: dtolnay/rust-toolchain@nightly 28 | - run: cargo test --features graphemes,serde,utf16-metric,small_chunks --no-fail-fast 29 | 30 | test-small-chunks-arity-4: 31 | name: test-small-chunks-arity-4 32 | runs-on: ubuntu-latest 33 | steps: 34 | - uses: actions/checkout@v2 35 | - uses: dtolnay/rust-toolchain@nightly 36 | - run: cargo test --features graphemes,serde,utf16-metric,arity_4,small_chunks --no-fail-fast 37 | 38 | bench: 39 | name: bench 40 | runs-on: ubuntu-latest 41 | steps: 42 | - uses: actions/checkout@v2 43 | - uses: dtolnay/rust-toolchain@nightly 44 | - run: cargo bench --no-run 45 | 46 | clippy: 47 | name: clippy 48 | runs-on: ubuntu-latest 49 | steps: 50 | - uses: actions/checkout@v2 51 | - uses: dtolnay/rust-toolchain@nightly 52 | with: 53 | components: clippy 54 | - run: cargo clippy --features graphemes,serde,utf16-metric -- -D warnings 55 | 56 | docs: 57 | name: docs 58 | runs-on: ubuntu-latest 59 | steps: 60 | - uses: actions/checkout@v2 61 | - uses: dtolnay/rust-toolchain@nightly 62 | - run: RUSTDOCFLAGS="--cfg docsrs" cargo doc --all-features 63 | 64 | format: 65 | name: format 66 | runs-on: ubuntu-latest 67 | steps: 68 | - uses: actions/checkout@v2 69 | - uses: dtolnay/rust-toolchain@nightly 70 | with: 71 | components: rustfmt 72 | - run: cargo fmt --check 73 | 74 | miri: 75 | name: miri 76 | runs-on: ubuntu-latest 77 | steps: 78 | - uses: actions/checkout@v2 79 | - uses: dtolnay/rust-toolchain@nightly 80 | with: 81 | components: miri 82 | - run: cargo miri test 83 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /Cargo.lock 3 | -------------------------------------------------------------------------------- /BENCHMARKS.md: -------------------------------------------------------------------------------- 1 | # Benchmarks 2 | 3 | The following benchmarks measure crop's performance against 4 | [Jumprope][jumprope] and [Ropey][ropey] across 4 different use cases: 5 | 6 | - [creating](#creation) a rope from a string or using a builder; 7 | - [slicing](#slices) the rope and converting the slice back to an owned `Rope`; 8 | - [iterating](#iterators) over the rope's chunks, bytes, `char`s, and lines; 9 | - [editing](#edits) the rope by continuously inserting/deleting/replacing text 10 | at random offsets; 11 | 12 | using 4 different input texts to track the change in performance as the size of 13 | the underlying buffer increases: 14 | 15 | - [tiny.txt][tiny]: 669 bytes, 9 lines; 16 | - [small.txt][small]: 1.5 KB, 20 lines; 17 | - [medium.txt][medium]: 219 KB, 3086 lines; 18 | - [large.txt][large]: 1.5 MB, 21596 lines. 19 | 20 | It wasn't possible to add Jumprope to the `builder` and `*_slice` benchmarks 21 | because it doesn't provide similar APIs, and it wasn't included in the `iter_*` 22 | benchmarks because even though 23 | [some](https://docs.rs/jumprope/latest/jumprope/struct.JumpRope.html#method.slice_chars) 24 | [iterators](https://docs.rs/jumprope/latest/jumprope/struct.JumpRope.html#method.substrings) 25 | are implemented, they are not exported publicly. 26 | 27 | The `xi_rope` project was not included in the benchmarks as it's been 28 | discontinued together with the rest of the Xi editor (and it was about an order 29 | of magnitude slower that all the other ropes on most use cases). 30 | 31 | The code used to run the benchmarks can be found 32 | [here](https://github.com/noib3/rope_benches). 33 | 34 | All the benchmarks were run on a 2018 MacBook Pro with a (not so) mighty 2.2 35 | GHz 6-Core Intel Core i7. 36 | 37 | ## Creation 38 | 39 | | `cargo bench from_str` | `cargo bench builder` | 40 | | :--: | :--: | 41 | | ![from_str][from_str] | ![builder][builder] | 42 | 43 | ## Slices 44 | 45 | | `cargo bench byte_slice` | `cargo bench line_slice` | 46 | | :--: | :--: | 47 | | ![byte_slice][byte_slice] | ![line_slice][line_slice] | 48 | 49 | | `cargo bench from_slice` | | 50 | | :--: | :--: | 51 | | ![from_slice][from_slice] |                                 | 52 | 53 | ## Iterators 54 | 55 | | `cargo bench iter_chunks` | `cargo bench iter_bytes` | 56 | | :--: | :--: | 57 | | ![iter_chunks][iter_chunks] | ![iter_bytes][iter_bytes] | 58 | 59 | | `cargo bench iter_chars` | `cargo bench iter_lines` | 60 | | :--: | :--: | 61 | | ![iter_chars][iter_chars] | ![iter_lines][iter_lines] | 62 | 63 | 64 | ## Edits 65 | 66 | | `cargo bench insert_char` | `cargo bench insert_sentence` | 67 | | :--: | :--: | 68 | | ![insert_char][insert_char] | ![insert_sentence][insert_sentence] | 69 | 70 | | `cargo bench insert_large` | `cargo bench delete_char` | 71 | | :--: | :--: | 72 | | ![insert_large][insert_large] | ![delete_char][delete_char] | 73 | 74 | | `cargo bench delete_sentence` | `cargo bench delete_large` | 75 | | :--: | :--: | 76 | | ![delete_sentence][delete_sentence] | ![delete_large][delete_large] | 77 | 78 | | `cargo bench replace_char` | `cargo bench replace_sentence` | 79 | | :--: | :--: | 80 | | ![replace_char][replace_char] | ![replace_sentence][replace_sentence] | 81 | 82 | | `cargo bench replace_large` | | 83 | | :--: | :--: | 84 | | ![replace_large][replace_large] |                                 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | [jumprope]: https://github.com/josephg/jumprope-rs 93 | [ropey]: https://github.com/cessen/ropey 94 | 95 | [tiny]: https://github.com/noib3/rope_benches/blob/master/benches/common/tiny.txt 96 | [small]: https://github.com/noib3/rope_benches/blob/master/benches/common/small.txt 97 | [medium]: https://github.com/noib3/rope_benches/blob/master/benches/common/medium.txt 98 | [large]: https://github.com/noib3/rope_benches/blob/master/benches/common/large.txt 99 | 100 | [builder]: https://user-images.githubusercontent.com/59321248/227067900-48478bca-8fe9-403d-a92e-e95bd94e2ebc.png 101 | [byte_slice]: https://user-images.githubusercontent.com/59321248/232328565-ac74e6b6-07cb-40b3-8379-7e9e3f47acad.png 102 | [delete_char]: https://user-images.githubusercontent.com/59321248/227067911-3509f006-3830-4d36-b8f5-8c78bb684b11.png 103 | [delete_large]: https://user-images.githubusercontent.com/59321248/227067916-e3ed1bbe-d706-4e53-bbc1-04218c8e46a7.png 104 | [delete_sentence]: https://user-images.githubusercontent.com/59321248/227067918-91af2770-8dea-49ad-a894-50a0cede60cf.png 105 | [from_slice]: https://user-images.githubusercontent.com/59321248/227067921-c97f882f-5f3e-4d6c-8141-f9692c8935ef.png 106 | [from_str]: https://user-images.githubusercontent.com/59321248/227067923-364d6d7a-86f8-46e1-a371-84fda094fb22.png 107 | [insert_char]: https://user-images.githubusercontent.com/59321248/227067924-7ca04879-7c67-423c-ba96-cd3e43d974a5.png 108 | [insert_large]: https://user-images.githubusercontent.com/59321248/227067926-718b5b34-ff89-458b-9906-4ca19ea1f020.png 109 | [insert_sentence]: https://user-images.githubusercontent.com/59321248/227067928-6a69ff30-73ca-4272-9c2d-381064fc9170.png 110 | [iter_bytes]: https://user-images.githubusercontent.com/59321248/227067929-5f067398-4fae-4f7d-9b49-fcb21efba86f.png 111 | [iter_chars]: https://user-images.githubusercontent.com/59321248/227067932-3f8a5207-1c24-4285-8bfe-3f56f2c26a8b.png 112 | [iter_chunks]: https://user-images.githubusercontent.com/59321248/227067937-7fe9e437-5050-4524-b5b0-8ed56d1dd560.png 113 | [iter_lines]: https://user-images.githubusercontent.com/59321248/227067939-4198ed29-e886-4f90-b9bf-33c817a867a8.png 114 | [line_slice]: https://user-images.githubusercontent.com/59321248/227067941-f41c6970-6ee1-4bd9-aa1a-74ffb816af7c.png 115 | [replace_char]: https://user-images.githubusercontent.com/59321248/227067944-bd1931ba-c287-4b0e-a2d6-c3e46dd9d665.png 116 | [replace_large]: https://user-images.githubusercontent.com/59321248/227067948-78cf6e37-e4e6-4689-834b-b087da538054.png 117 | [replace_sentence]: https://user-images.githubusercontent.com/59321248/227067952-a51b5e77-71d9-4e84-acba-a146013e44da.png 118 | 119 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [Unreleased] 4 | 5 | ## [0.4.3] - Apr 25 2025 6 | 7 | ### Additions 8 | 9 | - added a new `serde` feature that enables the `Serialize` and `Deserialize` 10 | implementations for `Rope` ([#27](https://github.com/nomad/crop/pull/27)); 11 | 12 | ## [0.4.2] - Jan 22 2024 13 | 14 | ### Bug fixes 15 | 16 | - fixed a bug that caused `RopeSlice::line_slice()` to panic or halt forever 17 | when both the start and end of the range are equal to `RopeSlice::line_len()` 18 | and the `RopeSlice` didn't end with a line break 19 | ([#16](https://github.com/nomad/crop/issues/16)); 20 | 21 | ## [0.4.1] - Dec 1 2023 22 | 23 | ### Bug fixes 24 | 25 | - fixed a typo that caused `"r"` to be stripped instead of the carriage return 26 | character (`"\r"`) when truncating trailing line breaks; 27 | 28 | ## [0.4.0] - Oct 11 2023 29 | 30 | ### Additions 31 | 32 | - added a few new methods to `Rope` and `RopeSlice` that allow converting 33 | between UTF-16 and byte offsets by tracking the number of UTF-16 code units 34 | stored in those objects. It is important to note that these APIs come with a 35 | performance cost. As a result, these methods are only accessible by enabling 36 | a new feature flag called `utf16-metric`, which is disabled by default; 37 | 38 | ### Performance 39 | 40 | - the performance of `Rope::replace` was improved by another 10-15%; 41 | 42 | ## [0.3.0] - Apr 16 2023 43 | 44 | ### Changes 45 | 46 | - both the `line_of_byte()` and `byte_of_line()` methods on `Rope`s and 47 | `RopeSlice`s now interpret their argument as byte and line offsets, 48 | respectively. This allows those methods to accept the full byte length or 49 | line length of the `Rope`/`RopeSlice` as a valid argument without panicking; 50 | 51 | ### Bug fixes 52 | 53 | - fixed a very rare bug where the `Lines` iterator would include the trailing 54 | `'\r'` if a line was terminated by a CRLF which was split across consecutive 55 | chunks; 56 | 57 | ### Performance 58 | 59 | - the `byte_slice()` method on `Rope`s and `RopeSlice`s is around 10% faster; 60 | 61 | 62 | ## [0.2.0] - Mar 26 2023 63 | 64 | ### Performance 65 | 66 | - the leaves of the B-tree are now gap buffers instead of simple `String`s, 67 | which improves the performance of consecutive edits applied to the same 68 | cursor position. This alone resulted in a 8-15% improvement in the 69 | [crdt-benchmarks](https://github.com/josephg/crdt-benchmarks), and together 70 | with other tweaks it makes `v0.2` 80-90% faster than `v0.1` on those editing 71 | traces; 72 | 73 | - `RopeBuilder::append()` is around 20% faster; 74 | 75 | ### Breaking changes 76 | 77 | - the `Chunks` iterator no longer implements `ExactSizeIterator`; 78 | 79 | [Unreleased]: https://github.com/nomad/crop/compare/v0.4.3...HEAD 80 | [0.4.3]: https://github.com/nomad/crop/compare/v0.4.2...v0.4.3 81 | [0.4.2]: https://github.com/nomad/crop/compare/v0.4.1...v0.4.2 82 | [0.4.1]: https://github.com/nomad/crop/compare/v0.4.0...v0.4.1 83 | [0.4.0]: https://github.com/nomad/crop/compare/v0.3.0...v0.4.0 84 | [0.3.0]: https://github.com/nomad/crop/compare/v0.2.0...v0.3.0 85 | [0.2.0]: https://github.com/nomad/crop/compare/v0.1.0...v0.2.0 86 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crop" 3 | version = "0.4.3" 4 | edition = "2024" 5 | rust-version = "1.85" 6 | authors = ["Riccardo Mazzarini "] 7 | description = "A pretty fast text rope" 8 | documentation = "https://docs.rs/crop" 9 | repository = "https://github.com/nomad/crop" 10 | readme = "README.md" 11 | license = "MIT" 12 | keywords = ["rope", "tree", "edit", "buffer"] 13 | categories = ["data-structures", "text-editors", "text-processing"] 14 | exclude = [ 15 | "/.github/*", 16 | "/examples/**", 17 | "/fuzz/**", 18 | "/tests/**", 19 | "/BENCHMARKS.md", 20 | ] 21 | 22 | [package.metadata.docs.rs] 23 | features = ["graphemes", "serde", "simd", "utf16-metric"] 24 | rustdoc-args = ["--cfg", "docsrs"] 25 | 26 | [features] 27 | default = ["simd", "std"] 28 | graphemes = ["unicode-segmentation"] 29 | serde = ["dep:serde"] 30 | simd = ["str_indices/simd"] 31 | utf16-metric = [] 32 | std = [] 33 | 34 | # Private features 35 | small_chunks = [] 36 | arity_4 = [] 37 | deep_trees = ["small_chunks", "arity_4"] 38 | dp = ["deep_trees"] 39 | 40 | [dependencies] 41 | str_indices = { version = "0.4.0", default-features = false } 42 | serde = { version = "1", optional = true } 43 | unicode-segmentation = { version = "1.10.0", optional = true } 44 | 45 | [dev-dependencies] 46 | criterion = "0.5" 47 | rand = "0.9" 48 | ropey = "1.6" 49 | serde_json = "1" 50 | serde_test = "1.0.177" 51 | 52 | [lints.rust] 53 | unexpected_cfgs = { level = "warn", check-cfg = ['cfg(fuzzing)'] } 54 | 55 | [[bench]] 56 | name = "creation" 57 | harness = false 58 | 59 | [[bench]] 60 | name = "editing" 61 | harness = false 62 | 63 | [[bench]] 64 | name = "graphemes" 65 | harness = false 66 | required-features = ["graphemes"] 67 | 68 | [[bench]] 69 | name = "iterators" 70 | harness = false 71 | 72 | [[bench]] 73 | name = "metric_conversion" 74 | harness = false 75 | 76 | [[bench]] 77 | name = "serde" 78 | harness = false 79 | required-features = ["serde"] 80 | 81 | [[bench]] 82 | name = "slicing" 83 | harness = false 84 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Riccardo Mazzarini 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🌾 crop 2 | 3 | [![Latest version]](https://crates.io/crates/crop) 4 | [![Docs badge]](https://docs.rs/crop) 5 | [![CI]](https://github.com/nomad/crop/actions) 6 | 7 | [Latest version]: https://img.shields.io/crates/v/crop.svg 8 | [Docs badge]: https://docs.rs/crop/badge.svg 9 | [CI]: https://github.com/nomad/crop/actions/workflows/ci.yml/badge.svg 10 | 11 | crop is an implementation of a text rope, a data structure designed to be used 12 | in applications that need to handle frequent edits to arbitrarily large 13 | buffers, such as text editors. 14 | 15 | crop's `Rope` is backed by a [B-tree](https://en.wikipedia.org/wiki/B-tree), 16 | ensuring that the time complexity of inserting, deleting or replacing a piece 17 | of text is always logarithmic in the size of the `Rope`. 18 | 19 | crop places an extreme focus on performance: check out [the 20 | benchmarks][synthetic-benches] to see how it stacks up against similar 21 | projects. 22 | 23 | ## Built with parallelism in mind 24 | 25 | `Rope`s use thread-safe reference counting to share data between threads. 26 | Cloning a `Rope` takes up only 16 extra bytes of memory, and its copy-on-write 27 | semantics allow the actual text contents to be cloned incrementally as 28 | different clones diverge due to user edits. 29 | 30 | This allows to cheaply snapshot a `Rope` and send it to a background thread to 31 | perform any IO or CPU-intensive computations, while the main thread is kept 32 | responsive and always ready for the next batch of edits. 33 | 34 | ## Example usage 35 | 36 | ```rust 37 | // A `Rope` can be created either directly from a string or incrementally 38 | // using the `RopeBuilder`. 39 | 40 | let mut builder = RopeBuilder::new(); 41 | 42 | builder 43 | .append("I am a 🦀\n") 44 | .append("Who walks the shore\n") 45 | .append("And pinches toes all day.\n") 46 | .append("\n") 47 | .append("If I were you\n") 48 | .append("I'd wear some 👟\n") 49 | .append("And not get in my way.\n"); 50 | 51 | let mut rope: Rope = builder.build(); 52 | 53 | // `Rope`s can be sliced to obtain `RopeSlice`s. 54 | // 55 | // A `RopeSlice` is to a `Rope` as a `&str` is to a `String`: the former in 56 | // each pair is a borrowed reference of the latter. 57 | 58 | // A `Rope` can be sliced using either byte offsets: 59 | 60 | let byte_slice: RopeSlice = rope.byte_slice(..32); 61 | 62 | assert_eq!(byte_slice, "I am a 🦀\nWho walks the shore\n"); 63 | 64 | // or line offsets: 65 | 66 | let line_slice: RopeSlice = rope.line_slice(..2); 67 | 68 | assert_eq!(line_slice, byte_slice); 69 | 70 | // We can also get a `RopeSlice` by asking the `Rope` for a specific line 71 | // index: 72 | 73 | assert_eq!(rope.line(5), "I'd wear some 👟"); 74 | 75 | // We can modify that line by getting its start/end byte offsets: 76 | 77 | let start: usize = rope.byte_of_line(5); 78 | 79 | let end: usize = rope.byte_of_line(6); 80 | 81 | // and replacing that byte range with some other text: 82 | 83 | rope.replace(start..end, "I'd rock some 👠\n"); 84 | 85 | assert_eq!(rope.line(5), "I'd rock some 👠"); 86 | 87 | // `Rope`s use `Arc`s to share data between threads, so cloning them is 88 | // extremely cheap. 89 | 90 | let snapshot: Rope = rope.clone(); 91 | 92 | // This allows to save a `Rope` to disk in a background thread while 93 | // keeping the main thread responsive. 94 | 95 | thread::spawn(move || { 96 | let mut file = 97 | BufWriter::new(File::create("my_little_poem.txt").unwrap()); 98 | 99 | // The text content is stored as separate chunks in the leaves of the 100 | // B-tree. 101 | // 102 | // We can iterate over them using the `Chunks` iterator which yields the 103 | // chunks of the `Rope` as string slices. 104 | 105 | for chunk in snapshot.chunks() { 106 | file.write_all(chunk.as_bytes()).unwrap(); 107 | } 108 | }) 109 | .join() 110 | .unwrap(); 111 | ``` 112 | 113 | Check out [the docs](https://docs.rs/crop) for a more in-depth overview of the 114 | crate. 115 | 116 | ## Comparison with other ropes 117 | 118 | As of April 2023 there are (to my knowledge) 3 rope crates that are still 119 | actively maintained: crop, [Jumprope][jumprope] and [Ropey][ropey]. The 120 | following is a quick (and incomplete) overview of their features and tradeoffs 121 | to help you decide which one is best suited for your specific use case. 122 | 123 | ### Speed 124 | 125 | The following results were obtained by running the real world, 126 | character-by-character editing traces provided by [crdt-benchmarks] on a 2018 127 | MacBook Pro with an Intel Core i7. 128 | 129 | | Dataset | crop (ms) | Jumprope (ms) | Ropey (ms) | `std::string::String` (ms) | 130 | |-----------------|-----------|---------------|------------|----------------------------| 131 | | automerge-paper | 12.39 | 12.52 | 44.14 | 108.57 | 132 | | rustcode | 2.67 | 2.86 | 7.96 | 13.40 | 133 | | sveltecomponent | 0.95 | 1.08 | 3.65 | 1.22 | 134 | | seph-blog1 | 6.47 | 6.94 | 23.46 | 22.26 | 135 | 136 | ### Cheap clones 137 | 138 | Both crop and Ropey allow their `Rope`s to be cloned in `O(1)` in time and 139 | space by sharing data between clones, whereas cloning a `JumpRope` involves 140 | re-allocating the actual text contents, just like it would with a regular 141 | `String`. 142 | 143 | ### Indexing metric 144 | 145 | Jumprope and Ropey both use Unicode codepoint offsets (`char`s in Rust) as 146 | their primary indexing metric. crop uses UTF-8 code unit (aka byte) offsets, 147 | just like Rust's `String`s. 148 | 149 | ### Line breaks 150 | 151 | Both crop and Ropey track line breaks, allowing you to convert between line and 152 | byte offsets and to iterate over the lines of their `Rope`s and `RopeSlice`s. 153 | Ropey can be configured to recognize all Unicode line breaks, while crop only 154 | recognizes LF and CRLF as line terminators. 155 | 156 | Jumprope doesn't currently have any line-based APIs. 157 | 158 | 159 | ## Acknowledgements 160 | 161 | - A significant portion of crop's public API was inspired by the excellent 162 | Ropey crate (from which I also borrowed some test vectors). Unlike crop, 163 | Ropey uses code points (`char`s in Rust-speak) as its primary indexing 164 | metric. If you'd prefer to work with `char` offsets rather than byte offsets 165 | Ropey might be a great alternative; 166 | 167 | - Even though the implementations are quite different, crop's 168 | [`Metric`][crop-metric] trait was inspired by the [homonymous trait in 169 | xi_rope][xi-rope-metric]. Check out the [second blog post][rope-science-2] in 170 | the "Rope science" series by Raph Levien for more infos. 171 | 172 | [crdt-benchmarks]: https://github.com/josephg/crdt-benchmarks 173 | [crop-metric]: https://github.com/nomad/crop/blob/21638ed46864b140ad52f41449f1274b15ca3eb2/src/tree/traits.rs#L71-L92 174 | [jumprope]: https://github.com/josephg/jumprope-rs 175 | [rope-science-2]: https://xi-editor.io/docs/rope_science_02.html 176 | [ropey]: https://github.com/cessen/ropey 177 | [synthetic-benches]: https://github.com/nomad/crop/blob/main/BENCHMARKS.md 178 | [xi-rope-metric]: https://docs.rs/xi-rope/latest/xi_rope/tree/trait.Metric.html 179 | -------------------------------------------------------------------------------- /benches/common/mod.rs: -------------------------------------------------------------------------------- 1 | use std::ops::Range; 2 | 3 | pub const TINY: &str = include_str!("../../tests/common/tiny.txt"); 4 | pub const SMALL: &str = include_str!("../../tests/common/small.txt"); 5 | pub const MEDIUM: &str = include_str!("../../tests/common/medium.txt"); 6 | pub const LARGE: &str = include_str!("../../tests/common/large.txt"); 7 | 8 | #[derive(Debug, Clone)] 9 | pub struct PercentRanges { 10 | start: usize, 11 | end: usize, 12 | half_percent: usize, 13 | } 14 | 15 | impl PercentRanges { 16 | #[allow(dead_code)] 17 | #[inline(always)] 18 | pub fn new(max: usize) -> Self { 19 | Self { start: 0, end: max, half_percent: (max / 200).max(1) } 20 | } 21 | } 22 | 23 | impl Iterator for PercentRanges { 24 | type Item = Range; 25 | 26 | #[inline(always)] 27 | fn next(&mut self) -> Option { 28 | if self.start == self.end { 29 | return None; 30 | } 31 | 32 | let range = self.start..self.end; 33 | 34 | self.end -= self.half_percent; 35 | 36 | self.start = std::cmp::min(self.start + self.half_percent, self.end); 37 | 38 | Some(range) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /benches/creation.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | 3 | use common::{LARGE, MEDIUM, SMALL, TINY}; 4 | use criterion::measurement::WallTime; 5 | use criterion::{BenchmarkGroup, Criterion, criterion_group, criterion_main}; 6 | use crop::{Rope, RopeBuilder}; 7 | 8 | fn bench(group: &mut BenchmarkGroup, to_bench: F) { 9 | group.bench_function("tiny", |bench| bench.iter(|| to_bench(TINY))); 10 | group.bench_function("small", |bench| bench.iter(|| to_bench(SMALL))); 11 | group.bench_function("medium", |bench| bench.iter(|| to_bench(MEDIUM))); 12 | group.bench_function("large", |bench| bench.iter(|| to_bench(LARGE))); 13 | } 14 | 15 | fn from_str(c: &mut Criterion) { 16 | let mut group = c.benchmark_group("from_str"); 17 | 18 | bench(&mut group, |s| { 19 | let _ = Rope::from(s); 20 | }); 21 | } 22 | 23 | fn rope_builder(c: &mut Criterion) { 24 | let mut group = c.benchmark_group("rope_builder"); 25 | 26 | bench(&mut group, |s| { 27 | let mut builder = RopeBuilder::new(); 28 | for line in s.lines() { 29 | builder.append(line); 30 | } 31 | let _ = builder.build(); 32 | }); 33 | } 34 | 35 | criterion_group!(benches, from_str, rope_builder); 36 | criterion_main!(benches); 37 | -------------------------------------------------------------------------------- /benches/editing.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | 3 | use common::{LARGE, MEDIUM, PercentRanges, SMALL, TINY}; 4 | use criterion::measurement::WallTime; 5 | use criterion::{ 6 | Bencher, 7 | BenchmarkGroup, 8 | Criterion, 9 | criterion_group, 10 | criterion_main, 11 | }; 12 | use crop::Rope; 13 | 14 | fn bench_insert(group: &mut BenchmarkGroup, insert: &str) { 15 | #[inline(always)] 16 | fn bench(bench: &mut Bencher, s: &str, insert: &str) { 17 | let mut r = Rope::from(s); 18 | let mut ranges = PercentRanges::new(r.byte_len()).cycle(); 19 | let mut i = 0; 20 | let orig_len = r.byte_len(); 21 | 22 | bench.iter(|| { 23 | let range = ranges.next().unwrap(); 24 | let at = if i % 2 == 0 { range.start } else { range.end }; 25 | r.insert(at, insert); 26 | i += 1; 27 | 28 | // Take the Rope back to its original length when it grows by more 29 | // than 50%. 30 | if r.byte_len() >= (1.5 * (orig_len as f32)) as usize { 31 | r.delete(..r.byte_len() - orig_len); 32 | ranges = PercentRanges::new(r.byte_len()).cycle(); 33 | } 34 | }); 35 | } 36 | 37 | group.bench_function("tiny", |b| bench(b, TINY, insert)); 38 | group.bench_function("small", |b| bench(b, SMALL, insert)); 39 | group.bench_function("medium", |b| bench(b, MEDIUM, insert)); 40 | group.bench_function("large", |b| bench(b, LARGE, insert)); 41 | } 42 | 43 | fn insert_char(c: &mut Criterion) { 44 | let mut group = c.benchmark_group("insert_char"); 45 | bench_insert(&mut group, "a"); 46 | } 47 | 48 | fn insert_sentence(c: &mut Criterion) { 49 | let mut group = c.benchmark_group("insert_sentence"); 50 | bench_insert( 51 | &mut group, 52 | "Lorem ipsum dolor sit amet, consectetur adipiscing elit.", 53 | ); 54 | } 55 | 56 | fn insert_large(c: &mut Criterion) { 57 | let mut group = c.benchmark_group("insert_large"); 58 | bench_insert(&mut group, SMALL); 59 | } 60 | 61 | fn insert_char_with_clone_around(c: &mut Criterion) { 62 | let mut group = c.benchmark_group("insert_char_with_clone_around"); 63 | 64 | group.bench_function("large", |bench| { 65 | let mut r = Rope::from(LARGE); 66 | let mut ranges = PercentRanges::new(r.byte_len()).cycle(); 67 | let orig = r.clone(); 68 | let mut insertions = 0; 69 | bench.iter(|| { 70 | let range = ranges.next().unwrap(); 71 | let at = if insertions % 2 == 0 { range.start } else { range.end }; 72 | r.insert(at, "a"); 73 | insertions += 1; 74 | if insertions == 64 { 75 | insertions = 0; 76 | r = orig.clone(); 77 | } 78 | }) 79 | }); 80 | } 81 | 82 | fn bench_delete(group: &mut BenchmarkGroup, delete_bytes: usize) { 83 | #[inline(always)] 84 | fn bench(bench: &mut Bencher, s: &str, delete_bytes: usize) { 85 | let mut r = Rope::from(s); 86 | let mut ranges = PercentRanges::new(r.byte_len()).cycle(); 87 | let mut i = 0; 88 | let orig_len = r.byte_len(); 89 | bench.iter(|| { 90 | let range = ranges.next().unwrap(); 91 | let start = (if i % 2 == 0 { range.start } else { range.end }) 92 | .min(r.byte_len()); 93 | let end = (start + delete_bytes).min(r.byte_len()); 94 | r.delete(start..end); 95 | i += 1; 96 | 97 | if r.byte_len() < orig_len / 4 { 98 | r = Rope::from(s); 99 | ranges = PercentRanges::new(r.byte_len()).cycle(); 100 | } 101 | }); 102 | } 103 | 104 | group.bench_function("tiny", |b| bench(b, TINY, delete_bytes)); 105 | group.bench_function("small", |b| bench(b, SMALL, delete_bytes)); 106 | group.bench_function("medium", |b| bench(b, MEDIUM, delete_bytes)); 107 | group.bench_function("large", |b| bench(b, LARGE, delete_bytes)); 108 | } 109 | 110 | fn delete_char(c: &mut Criterion) { 111 | let mut group = c.benchmark_group("delete_char"); 112 | bench_delete(&mut group, "a".len()); 113 | } 114 | 115 | fn delete_sentence(c: &mut Criterion) { 116 | let mut group = c.benchmark_group("delete_sentence"); 117 | bench_delete( 118 | &mut group, 119 | "Lorem ipsum dolor sit amet, consectetur adipiscing elit.".len(), 120 | ); 121 | } 122 | 123 | fn delete_large(c: &mut Criterion) { 124 | let mut group = c.benchmark_group("delete_large"); 125 | bench_delete(&mut group, SMALL.len()); 126 | } 127 | 128 | fn delete_char_with_clone_around(c: &mut Criterion) { 129 | let mut group = c.benchmark_group("delete_char_with_clone_around"); 130 | 131 | group.bench_function("large", |bench| { 132 | let mut r = Rope::from(LARGE); 133 | let mut ranges = PercentRanges::new(r.byte_len()).cycle(); 134 | let orig = r.clone(); 135 | let mut deletions = 0; 136 | bench.iter(|| { 137 | let range = ranges.next().unwrap(); 138 | let start = 139 | (if deletions % 2 == 0 { range.start } else { range.end }) 140 | .min(r.byte_len()); 141 | let end = (start + 1).min(r.byte_len()); 142 | r.delete(start..end); 143 | deletions += 1; 144 | if deletions == 64 { 145 | deletions = 0; 146 | r = orig.clone(); 147 | } 148 | }) 149 | }); 150 | } 151 | 152 | fn bench_replace(group: &mut BenchmarkGroup, replace: &str) { 153 | #[inline(always)] 154 | fn bench(bench: &mut Bencher, s: &str, replace: &str) { 155 | let mut r = Rope::from(s); 156 | let mut ranges = PercentRanges::new(r.byte_len()).cycle(); 157 | let mut i = 0; 158 | bench.iter(|| { 159 | let range = ranges.next().unwrap(); 160 | let start = if i % 2 == 0 { range.start } else { range.end }; 161 | let end = (start + replace.len()).min(r.byte_len()); 162 | r.replace(start..end, replace); 163 | i += 1; 164 | }); 165 | } 166 | 167 | group.bench_function("tiny", |b| bench(b, TINY, replace)); 168 | group.bench_function("small", |b| bench(b, SMALL, replace)); 169 | group.bench_function("medium", |b| bench(b, MEDIUM, replace)); 170 | group.bench_function("large", |b| bench(b, LARGE, replace)); 171 | } 172 | 173 | fn replace_char(c: &mut Criterion) { 174 | let mut group = c.benchmark_group("replace_char"); 175 | bench_replace(&mut group, "a"); 176 | } 177 | 178 | fn replace_sentence(c: &mut Criterion) { 179 | let mut group = c.benchmark_group("replace_sentence"); 180 | bench_replace( 181 | &mut group, 182 | "Lorem ipsum dolor sit amet, consectetur adipiscing elit.", 183 | ); 184 | } 185 | 186 | fn replace_large(c: &mut Criterion) { 187 | let mut group = c.benchmark_group("replace_large"); 188 | bench_replace(&mut group, SMALL); 189 | } 190 | 191 | criterion_group!( 192 | benches, 193 | insert_char, 194 | insert_sentence, 195 | insert_large, 196 | insert_char_with_clone_around, 197 | delete_char, 198 | delete_sentence, 199 | delete_large, 200 | delete_char_with_clone_around, 201 | replace_char, 202 | replace_sentence, 203 | replace_large, 204 | ); 205 | 206 | criterion_main!(benches); 207 | -------------------------------------------------------------------------------- /benches/graphemes.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | 3 | use common::{LARGE, MEDIUM, SMALL, TINY}; 4 | use criterion::{Criterion, criterion_group, criterion_main}; 5 | use crop::Rope; 6 | 7 | fn iter_graphemes(c: &mut Criterion) { 8 | let mut group = c.benchmark_group("iter_graphemes"); 9 | 10 | group.bench_function("create", |bench| { 11 | let r = Rope::from(LARGE); 12 | bench.iter(|| { 13 | let _ = r.graphemes(); 14 | }) 15 | }); 16 | 17 | group.bench_function("forward_tiny", |bench| { 18 | let r = Rope::from(TINY); 19 | let mut iter = r.graphemes().cycle(); 20 | bench.iter(|| { 21 | let _ = iter.next(); 22 | }); 23 | }); 24 | 25 | group.bench_function("forward_small", |bench| { 26 | let r = Rope::from(SMALL); 27 | let mut iter = r.graphemes().cycle(); 28 | bench.iter(|| { 29 | let _ = iter.next(); 30 | }); 31 | }); 32 | 33 | group.bench_function("forward_medium", |bench| { 34 | let r = Rope::from(MEDIUM); 35 | let mut iter = r.graphemes().cycle(); 36 | bench.iter(|| { 37 | let _ = iter.next(); 38 | }); 39 | }); 40 | 41 | group.bench_function("forward_large", |bench| { 42 | let r = Rope::from(LARGE); 43 | let mut iter = r.graphemes().cycle(); 44 | bench.iter(|| { 45 | let _ = iter.next(); 46 | }); 47 | }); 48 | 49 | group.bench_function("backward_tiny", |bench| { 50 | let r = Rope::from(TINY); 51 | let mut iter = r.graphemes().rev().cycle(); 52 | bench.iter(|| { 53 | let _ = iter.next(); 54 | }); 55 | }); 56 | 57 | group.bench_function("backward_small", |bench| { 58 | let r = Rope::from(SMALL); 59 | let mut iter = r.graphemes().rev().cycle(); 60 | bench.iter(|| { 61 | let _ = iter.next(); 62 | }); 63 | }); 64 | 65 | group.bench_function("backward_medium", |bench| { 66 | let r = Rope::from(MEDIUM); 67 | let mut iter = r.graphemes().rev().cycle(); 68 | bench.iter(|| { 69 | let _ = iter.next(); 70 | }); 71 | }); 72 | 73 | group.bench_function("backward_large", |bench| { 74 | let r = Rope::from(LARGE); 75 | let mut iter = r.graphemes().rev().cycle(); 76 | bench.iter(|| { 77 | let _ = iter.next(); 78 | }); 79 | }); 80 | } 81 | 82 | criterion_group!(benches, iter_graphemes); 83 | criterion_main!(benches); 84 | -------------------------------------------------------------------------------- /benches/iterators.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | 3 | use common::{LARGE, MEDIUM, SMALL, TINY}; 4 | use criterion::{Criterion, criterion_group, criterion_main}; 5 | use crop::{Rope, iter::*}; 6 | 7 | #[macro_export] 8 | macro_rules! iter_bench { 9 | ($fun:ident, $iterator:ty, $group_name:literal) => { 10 | fn $fun(c: &mut Criterion) { 11 | let mut group = c.benchmark_group($group_name); 12 | 13 | group.bench_function("create", |bench| { 14 | let r = Rope::from(LARGE); 15 | bench.iter(|| { 16 | let _ = <$iterator>::from(&r); 17 | }) 18 | }); 19 | 20 | group.bench_function("forward_tiny", |bench| { 21 | let r = Rope::from(TINY); 22 | let mut iter = <$iterator>::from(&r).cycle(); 23 | bench.iter(|| { 24 | let _ = iter.next(); 25 | }); 26 | }); 27 | 28 | group.bench_function("forward_small", |bench| { 29 | let r = Rope::from(SMALL); 30 | let mut iter = <$iterator>::from(&r).cycle(); 31 | bench.iter(|| { 32 | let _ = iter.next(); 33 | }); 34 | }); 35 | 36 | group.bench_function("forward_medium", |bench| { 37 | let r = Rope::from(MEDIUM); 38 | let mut iter = <$iterator>::from(&r).cycle(); 39 | bench.iter(|| { 40 | let _ = iter.next(); 41 | }); 42 | }); 43 | 44 | group.bench_function("forward_large", |bench| { 45 | let r = Rope::from(LARGE); 46 | let mut iter = <$iterator>::from(&r).cycle(); 47 | bench.iter(|| { 48 | let _ = iter.next(); 49 | }); 50 | }); 51 | 52 | group.bench_function("backward_tiny", |bench| { 53 | let r = Rope::from(TINY); 54 | let mut iter = <$iterator>::from(&r).rev().cycle(); 55 | bench.iter(|| { 56 | let _ = iter.next(); 57 | }); 58 | }); 59 | 60 | group.bench_function("backward_small", |bench| { 61 | let r = Rope::from(SMALL); 62 | let mut iter = <$iterator>::from(&r).rev().cycle(); 63 | bench.iter(|| { 64 | let _ = iter.next(); 65 | }); 66 | }); 67 | 68 | group.bench_function("backward_medium", |bench| { 69 | let r = Rope::from(MEDIUM); 70 | let mut iter = <$iterator>::from(&r).rev().cycle(); 71 | bench.iter(|| { 72 | let _ = iter.next(); 73 | }); 74 | }); 75 | 76 | group.bench_function("backward_large", |bench| { 77 | let r = Rope::from(LARGE); 78 | let mut iter = <$iterator>::from(&r).rev().cycle(); 79 | bench.iter(|| { 80 | let _ = iter.next(); 81 | }); 82 | }); 83 | } 84 | }; 85 | } 86 | 87 | iter_bench!(chunks, Chunks, "iter_chunks"); 88 | iter_bench!(bytes, Bytes, "iter_bytes"); 89 | iter_bench!(chars, Chars, "iter_chars"); 90 | iter_bench!(lines, Lines, "iter_lines"); 91 | iter_bench!(raw_lines, RawLines, "iter_raw_lines"); 92 | 93 | criterion_group!(benches, chunks, bytes, chars, lines, raw_lines); 94 | criterion_main!(benches); 95 | -------------------------------------------------------------------------------- /benches/metric_conversion.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | 3 | use common::{LARGE, MEDIUM, SMALL, TINY}; 4 | use criterion::{Bencher, Criterion, criterion_group, criterion_main}; 5 | use crop::Rope; 6 | 7 | fn line_of_byte(c: &mut Criterion) { 8 | #[inline(always)] 9 | fn bench(bench: &mut Bencher, s: &str) { 10 | let r = Rope::from(s); 11 | let mut byte_offsets = (0..=r.byte_len()).cycle(); 12 | bench.iter(|| { 13 | let _ = r.line_of_byte(byte_offsets.next().unwrap()); 14 | }); 15 | } 16 | 17 | let mut group = c.benchmark_group("line_of_byte"); 18 | 19 | group.bench_function("tiny", |b| bench(b, TINY)); 20 | group.bench_function("small", |b| bench(b, SMALL)); 21 | group.bench_function("medium", |b| bench(b, MEDIUM)); 22 | group.bench_function("large", |b| bench(b, LARGE)); 23 | } 24 | 25 | fn byte_of_line(c: &mut Criterion) { 26 | #[inline(always)] 27 | fn bench(bench: &mut Bencher, s: &str) { 28 | let r = Rope::from(s); 29 | let mut line_offsets = (0..=r.line_len()).cycle(); 30 | bench.iter(|| { 31 | let _ = r.byte_of_line(line_offsets.next().unwrap()); 32 | }); 33 | } 34 | 35 | let mut group = c.benchmark_group("byte_of_line"); 36 | 37 | group.bench_function("tiny", |b| bench(b, TINY)); 38 | group.bench_function("small", |b| bench(b, SMALL)); 39 | group.bench_function("medium", |b| bench(b, MEDIUM)); 40 | group.bench_function("large", |b| bench(b, LARGE)); 41 | } 42 | 43 | criterion_group!(benches, byte_of_line, line_of_byte); 44 | criterion_main!(benches); 45 | -------------------------------------------------------------------------------- /benches/serde.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | 3 | use common::{LARGE, MEDIUM, SMALL, TINY}; 4 | use criterion::measurement::WallTime; 5 | use criterion::{ 6 | BatchSize, 7 | BenchmarkGroup, 8 | BenchmarkId, 9 | Criterion, 10 | criterion_group, 11 | criterion_main, 12 | }; 13 | use crop::Rope; 14 | 15 | trait DataFormat { 16 | const GROUP_NAME: &str; 17 | 18 | type Serialized: Clone; 19 | 20 | fn serialize_rope(&self, rope: &Rope) -> Self::Serialized; 21 | 22 | fn deserialize_rope(&self, serialized: Self::Serialized) -> Rope; 23 | 24 | fn bench(&self, c: &mut Criterion) { 25 | let mut group = c.benchmark_group(Self::GROUP_NAME); 26 | self.bench_serialization(&mut group); 27 | self.bench_deserialization(&mut group); 28 | group.finish(); 29 | } 30 | 31 | fn bench_serialization(&self, group: &mut BenchmarkGroup<'_, WallTime>) { 32 | for (rope, rope_name) in test_vectors() { 33 | let bench_id = BenchmarkId::new("ser", rope_name); 34 | group.bench_function(bench_id, |b| { 35 | b.iter(|| self.serialize_rope(&rope)); 36 | }); 37 | } 38 | } 39 | 40 | fn bench_deserialization(&self, group: &mut BenchmarkGroup<'_, WallTime>) { 41 | for (rope, rope_name) in test_vectors() { 42 | let serialized = self.serialize_rope(&rope); 43 | let bench_id = BenchmarkId::new("de", rope_name); 44 | group.bench_function(bench_id, |b| { 45 | let setup = || serialized.clone(); 46 | let routine = |serialized| self.deserialize_rope(serialized); 47 | b.iter_batched(setup, routine, BatchSize::SmallInput); 48 | }); 49 | } 50 | } 51 | } 52 | 53 | fn test_vectors() -> impl Iterator { 54 | [ 55 | (Rope::from(TINY), "tiny"), 56 | (Rope::from(SMALL), "small"), 57 | (Rope::from(MEDIUM), "medium"), 58 | (Rope::from(LARGE), "large"), 59 | ] 60 | .into_iter() 61 | } 62 | 63 | struct Json; 64 | 65 | impl DataFormat for Json { 66 | const GROUP_NAME: &str = "serde_json"; 67 | 68 | type Serialized = String; 69 | 70 | fn serialize_rope(&self, rope: &Rope) -> Self::Serialized { 71 | serde_json::to_string(rope).unwrap() 72 | } 73 | 74 | fn deserialize_rope(&self, serialized: Self::Serialized) -> Rope { 75 | serde_json::from_str(&serialized).unwrap() 76 | } 77 | } 78 | 79 | fn json(c: &mut Criterion) { 80 | Json.bench(c); 81 | } 82 | 83 | criterion_group!(benches, json); 84 | criterion_main!(benches); 85 | -------------------------------------------------------------------------------- /benches/slicing.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | 3 | use common::{LARGE, MEDIUM, PercentRanges, SMALL, TINY}; 4 | use criterion::{ 5 | BatchSize, 6 | Bencher, 7 | Criterion, 8 | criterion_group, 9 | criterion_main, 10 | }; 11 | use crop::Rope; 12 | 13 | fn byte_slice(c: &mut Criterion) { 14 | #[inline(always)] 15 | fn bench(bench: &mut Bencher, s: &str) { 16 | let r = Rope::from(s); 17 | let mut ranges = PercentRanges::new(r.byte_len()).cycle(); 18 | let setup = || ranges.next().unwrap(); 19 | let routine = |range| r.byte_slice(range); 20 | bench.iter_batched(setup, routine, BatchSize::SmallInput); 21 | } 22 | 23 | let mut group = c.benchmark_group("byte_slice"); 24 | 25 | group.bench_function("tiny", |b| bench(b, TINY)); 26 | group.bench_function("small", |b| bench(b, SMALL)); 27 | group.bench_function("medium", |b| bench(b, MEDIUM)); 28 | group.bench_function("large", |b| bench(b, LARGE)); 29 | } 30 | 31 | fn line_slice(c: &mut Criterion) { 32 | #[inline(always)] 33 | fn bench(bench: &mut Bencher, s: &str) { 34 | let r = Rope::from(s); 35 | let mut ranges = PercentRanges::new(r.line_len()).cycle(); 36 | let setup = || ranges.next().unwrap(); 37 | let routine = |range| r.line_slice(range); 38 | bench.iter_batched(setup, routine, BatchSize::SmallInput); 39 | } 40 | 41 | let mut group = c.benchmark_group("line_slice"); 42 | 43 | group.bench_function("tiny", |b| bench(b, TINY)); 44 | group.bench_function("small", |b| bench(b, SMALL)); 45 | group.bench_function("medium", |b| bench(b, MEDIUM)); 46 | group.bench_function("large", |b| bench(b, LARGE)); 47 | } 48 | 49 | fn rope_from_slice(c: &mut Criterion) { 50 | #[inline(always)] 51 | fn bench(bench: &mut Bencher, s: &str) { 52 | let r = Rope::from(s); 53 | let mut ranges = PercentRanges::new(r.byte_len()).cycle(); 54 | let setup = || { 55 | let range = ranges.next().unwrap(); 56 | r.byte_slice(range) 57 | }; 58 | let routine = Rope::from; 59 | bench.iter_batched(setup, routine, BatchSize::SmallInput); 60 | } 61 | 62 | let mut group = c.benchmark_group("rope_from_slice"); 63 | 64 | group.bench_function("tiny", |b| bench(b, TINY)); 65 | group.bench_function("small", |b| bench(b, SMALL)); 66 | group.bench_function("medium", |b| bench(b, MEDIUM)); 67 | group.bench_function("large", |b| bench(b, LARGE)); 68 | } 69 | 70 | criterion_group!(benches, byte_slice, line_slice, rope_from_slice); 71 | criterion_main!(benches); 72 | -------------------------------------------------------------------------------- /examples/example_usage.rs: -------------------------------------------------------------------------------- 1 | use std::fs::File; 2 | use std::io::{BufWriter, Write}; 3 | use std::thread; 4 | 5 | use crop::{Rope, RopeBuilder, RopeSlice}; 6 | 7 | fn main() { 8 | let mut builder = RopeBuilder::new(); 9 | 10 | builder 11 | .append("I am a 🦀\n") 12 | .append("Who walks the shore\n") 13 | .append("And pinches toes all day.\n") 14 | .append("\n") 15 | .append("If I were you\n") 16 | .append("I'd wear some 👟\n") 17 | .append("And not get in my way.\n"); 18 | 19 | let mut rope: Rope = builder.build(); 20 | 21 | let byte_slice: RopeSlice = rope.byte_slice(..32); 22 | 23 | assert_eq!(byte_slice, "I am a 🦀\nWho walks the shore\n"); 24 | 25 | let line_slice: RopeSlice = rope.line_slice(..2); 26 | 27 | assert_eq!(line_slice, byte_slice); 28 | 29 | assert_eq!(rope.line(5), "I'd wear some 👟"); 30 | 31 | let start: usize = rope.byte_of_line(5); 32 | 33 | let end: usize = rope.byte_of_line(6); 34 | 35 | rope.replace(start..end, "I'd rock some 👠\n"); 36 | 37 | assert_eq!(rope.line(5), "I'd rock some 👠"); 38 | 39 | let snapshot: Rope = rope.clone(); 40 | 41 | thread::spawn(move || { 42 | let mut file = 43 | BufWriter::new(File::create("my_little_poem.txt").unwrap()); 44 | 45 | for chunk in snapshot.chunks() { 46 | file.write_all(chunk.as_bytes()).unwrap(); 47 | } 48 | }) 49 | .join() 50 | .unwrap(); 51 | } 52 | -------------------------------------------------------------------------------- /fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | Cargo.lock 2 | artifacts 3 | corpus 4 | target 5 | -------------------------------------------------------------------------------- /fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crop-fuzz" 3 | version = "0.0.0" 4 | authors = ["Automatically generated"] 5 | publish = false 6 | edition = "2021" 7 | 8 | [package.metadata] 9 | cargo-fuzz = true 10 | 11 | [features] 12 | small_chunks = ["crop/small_chunks"] 13 | 14 | [dependencies] 15 | crop = { path = ".." } 16 | libfuzzer-sys = { version = "0.4", features = ["arbitrary-derive"] } 17 | 18 | # Prevent this from interfering with workspaces 19 | [workspace] 20 | members = ["."] 21 | 22 | [[bin]] 23 | name = "editing" 24 | path = "fuzz_targets/editing.rs" 25 | test = false 26 | doc = false 27 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/editing.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | 3 | use std::ops::Range; 4 | 5 | use crop::Rope; 6 | use libfuzzer_sys::arbitrary::{self, Arbitrary}; 7 | use libfuzzer_sys::fuzz_target; 8 | 9 | const NON_ASCII: &str = include_str!("../../tests/common/non-ascii.txt"); 10 | 11 | #[derive(Arbitrary, Clone, Debug)] 12 | enum EditOp<'a> { 13 | Insert { byte_offset: usize, text: &'a str }, 14 | Delete { byte_range: Range }, 15 | Replace { byte_range: Range, text: &'a str }, 16 | } 17 | 18 | #[derive(Arbitrary, Copy, Clone, Debug)] 19 | enum StartingText<'a> { 20 | Custom(&'a str), 21 | NonAscii, 22 | } 23 | 24 | fuzz_target!(|data: (StartingText, Vec)| { 25 | let (starting, ops) = data; 26 | 27 | let start = match starting { 28 | StartingText::Custom(s) => s, 29 | StartingText::NonAscii => NON_ASCII, 30 | }; 31 | let mut rope = Rope::from(start); 32 | let mut string = String::from(start); 33 | 34 | for op in ops { 35 | match op { 36 | EditOp::Insert { mut byte_offset, text } 37 | if byte_offset <= rope.byte_len() => 38 | { 39 | while !rope.is_char_boundary(byte_offset) { 40 | byte_offset += 1; 41 | } 42 | rope.insert(byte_offset, text); 43 | string.insert_str(byte_offset, text); 44 | }, 45 | 46 | EditOp::Delete { mut byte_range } 47 | if byte_range.start <= byte_range.end 48 | && byte_range.end <= rope.byte_len() => 49 | { 50 | while !rope.is_char_boundary(byte_range.start) { 51 | byte_range.start += 1; 52 | } 53 | while !rope.is_char_boundary(byte_range.end) { 54 | byte_range.end += 1; 55 | } 56 | rope.delete(byte_range.clone()); 57 | string.replace_range(byte_range, ""); 58 | }, 59 | 60 | EditOp::Replace { mut byte_range, text } 61 | if byte_range.start <= byte_range.end 62 | && byte_range.end <= rope.byte_len() => 63 | { 64 | while !rope.is_char_boundary(byte_range.start) { 65 | byte_range.start += 1; 66 | } 67 | while !rope.is_char_boundary(byte_range.end) { 68 | byte_range.end += 1; 69 | } 70 | rope.replace(byte_range.clone(), text); 71 | string.replace_range(byte_range, text); 72 | }, 73 | 74 | _ => continue, 75 | } 76 | } 77 | 78 | rope.assert_invariants(); 79 | assert_eq!(rope, string); 80 | assert_eq!( 81 | rope.lines().collect::>(), 82 | string.lines().collect::>(), 83 | ); 84 | }); 85 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | format_code_in_doc_comments = true 2 | format_strings = true 3 | group_imports = "StdExternalCrate" 4 | imports_layout = "HorizontalVertical" 5 | match_block_trailing_comma = true 6 | max_width = 79 7 | unstable_features = true 8 | use_field_init_shorthand = true 9 | use_small_heuristics = "Max" 10 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! crop is an implementation of a UTF-8 text rope, a data structure designed 2 | //! to be used in applications that need to handle frequent edits to 3 | //! arbitrarily large buffers, such as text editors. 4 | //! 5 | //! crop's `Rope` is backed by a 6 | //! [B-tree](https://en.wikipedia.org/wiki/B-tree), ensuring that the time 7 | //! complexity of inserting, deleting or replacing a piece of text is always 8 | //! logarithmic in the size of the `Rope`. 9 | //! 10 | //! The crate has a relatively straightforward API. There are 3 structs to be 11 | //! aware of: 12 | //! 13 | //! - [`Rope`]: the star of the crate; 14 | //! - [`RopeSlice`]: an immutable slice of a `Rope`; 15 | //! - [`RopeBuilder`]: an incremental `Rope` builder. 16 | //! 17 | //! plus the [`iter`] module which contains iterators over `Rope`s and 18 | //! `RopeSlice`s. That's it. 19 | //! 20 | //! # Example usage 21 | //! 22 | //! ```no_run 23 | //! # use std::fs::{File}; 24 | //! # use std::io::{BufWriter, Write}; 25 | //! # use std::thread; 26 | //! # use crop::{RopeBuilder, RopeSlice}; 27 | //! // A `Rope` can be created either directly from a string or incrementally 28 | //! // using the `RopeBuilder`. 29 | //! 30 | //! let mut builder = RopeBuilder::new(); 31 | //! 32 | //! builder 33 | //! .append("I am a 🦀\n") 34 | //! .append("Who walks the shore\n") 35 | //! .append("And pinches toes all day.\n") 36 | //! .append("\n") 37 | //! .append("If I were you\n") 38 | //! .append("I'd wear some 👟\n") 39 | //! .append("And not get in my way.\n"); 40 | //! 41 | //! let mut rope = builder.build(); 42 | //! 43 | //! // `Rope`s can be sliced to obtain `RopeSlice`s. 44 | //! // 45 | //! // A `RopeSlice` is to a `Rope` as a `&str` is to a `String`: the former in 46 | //! // each pair are borrowed references of the latter. 47 | //! 48 | //! // A `Rope` can be sliced using either byte offsets: 49 | //! 50 | //! let byte_slice: RopeSlice = rope.byte_slice(..32); 51 | //! 52 | //! assert_eq!(byte_slice, "I am a 🦀\nWho walks the shore\n"); 53 | //! 54 | //! // or line offsets: 55 | //! 56 | //! let line_slice: RopeSlice = rope.line_slice(..2); 57 | //! 58 | //! assert_eq!(line_slice, byte_slice); 59 | //! 60 | //! // We can also get a `RopeSlice` by asking the `Rope` for a specific line 61 | //! // index: 62 | //! 63 | //! assert_eq!(rope.line(5), "I'd wear some 👟"); 64 | //! 65 | //! // We can modify that line by getting its start/end byte offsets: 66 | //! 67 | //! let start: usize = rope.byte_of_line(5); 68 | //! 69 | //! let end: usize = rope.byte_of_line(6); 70 | //! 71 | //! // and replacing that byte range with some other text: 72 | //! 73 | //! rope.replace(start..end, "I'd rock some 👠\n"); 74 | //! 75 | //! assert_eq!(rope.line(5), "I'd rock some 👠"); 76 | //! 77 | //! // `Rope`s use `Arc`s to share data between threads, so cloning them is 78 | //! // extremely cheap. 79 | //! 80 | //! let snapshot = rope.clone(); 81 | //! 82 | //! // This allows to save a `Rope` to disk in a background thread while 83 | //! // keeping the main thread responsive. 84 | //! 85 | //! thread::spawn(move || { 86 | //! let mut file = 87 | //! BufWriter::new(File::create("my_little_poem.txt").unwrap()); 88 | //! 89 | //! // The text content is stored in the leaves of the B-tree, where each 90 | //! // chunk can store up to 1KB of data. 91 | //! // 92 | //! // We can iterate over the leaves using the `Chunks` iterator which 93 | //! // yields the chunks of the `Rope` as string slices. 94 | //! 95 | //! for chunk in snapshot.chunks() { 96 | //! file.write_all(chunk.as_bytes()).unwrap(); 97 | //! } 98 | //! }) 99 | //! .join() 100 | //! .unwrap(); 101 | //! ``` 102 | //! 103 | //! # On offsets and indexes 104 | //! 105 | //! Some functions like [`Rope::byte()`] or [`Rope::line()`] take byte or line 106 | //! **indexes** as parameters, while others like [`Rope::insert()`], 107 | //! [`Rope::replace()`] or [`Rope::is_char_boundary()`] expect byte or line 108 | //! **offsets**. 109 | //! 110 | //! These two terms may sound very similar to each other, but in this context 111 | //! they mean slightly different things. 112 | //! 113 | //! An index is a 0-based number used to target **one specific** byte or line. 114 | //! For example, in the word `"bar"` the byte representing the letter `'b'` has 115 | //! an index of 0, `'a'`'s index is 1 and `'r'`'s index is 2. The maximum value 116 | //! for an index is **one less** than the length of the string. 117 | //! 118 | //! Hopefully nothing surprising so far. 119 | //! 120 | //! On the other hand, an offset doesn't refer to an item, it refers to the 121 | //! **boundary** between two adjacent items. For example, if we want to insert 122 | //! another `'a'` between the `'a'` and the `'r'` of the word `"bar"` we need 123 | //! to use a byte offset of 2. The maximum value for an offset is **equal to** 124 | //! the length of the string. 125 | //! 126 | //! # Feature flags 127 | //! 128 | //! The following feature flags can be used to tweak crop's behavior and 129 | //! enable additional APIs: 130 | //! 131 | //! - `simd` (enabled by default): enables SIMD on supported platforms; 132 | //! 133 | //! - `graphemes` (disabled by default): enables a few grapheme-oriented APIs 134 | //! on `Rope`s and `RopeSlice`s such as the 135 | //! [`Graphemes`](crate::iter::Graphemes) iterator and others; 136 | //! 137 | //! - `utf16-metric` (disabled by default): makes the `Rope` and `RopeSlice` 138 | //! track the UTF-16 code units they'd have if their content was stored as 139 | //! UTF-16 instead of UTF-8, allowing them to efficiently convert UTF-16 140 | //! code unit offsets to and from byte offsets in logarithmic time. 141 | 142 | #![cfg_attr(not(any(test, feature = "std")), no_std)] 143 | #![allow(clippy::explicit_auto_deref)] 144 | #![allow(clippy::module_inception)] 145 | #![cfg_attr(docsrs, feature(doc_cfg))] 146 | #![deny(missing_docs)] 147 | #![deny(rustdoc::broken_intra_doc_links)] 148 | #![deny(rustdoc::private_intra_doc_links)] 149 | #![warn(clippy::std_instead_of_core)] 150 | #![warn(clippy::std_instead_of_alloc)] 151 | #![warn(clippy::alloc_instead_of_core)] 152 | 153 | extern crate alloc; 154 | 155 | pub mod iter { 156 | //! Iterators over [`Rope`](crate::Rope)s and 157 | //! [`RopeSlice`](crate::RopeSlice)s. 158 | 159 | pub use crate::rope::iterators::*; 160 | } 161 | 162 | mod rope; 163 | 164 | #[doc(hidden)] 165 | pub mod tree; 166 | 167 | // These are not part of the public API, we only export them to be able to run 168 | // doctests. 169 | pub use rope::{Rope, RopeBuilder, RopeSlice}; 170 | #[doc(hidden)] 171 | pub use rope::{ 172 | gap_buffer::GapBuffer, 173 | gap_slice::GapSlice, 174 | metrics::ChunkSummary, 175 | }; 176 | 177 | #[inline] 178 | pub(crate) fn range_bounds_to_start_end( 179 | range: B, 180 | lo: usize, 181 | hi: usize, 182 | ) -> (usize, usize) 183 | where 184 | B: core::ops::RangeBounds, 185 | T: core::ops::Add + Into + Copy, 186 | { 187 | use core::ops::Bound; 188 | 189 | let start = match range.start_bound() { 190 | Bound::Included(&n) => n.into(), 191 | Bound::Excluded(&n) => n + 1, 192 | Bound::Unbounded => lo, 193 | }; 194 | 195 | let end = match range.end_bound() { 196 | Bound::Included(&n) => n + 1, 197 | Bound::Excluded(&n) => n.into(), 198 | Bound::Unbounded => hi, 199 | }; 200 | 201 | (start, end) 202 | } 203 | -------------------------------------------------------------------------------- /src/rope/gap_slice.rs: -------------------------------------------------------------------------------- 1 | use super::metrics::{ChunkSummary, SummaryUpTo, ToByteOffset}; 2 | use super::utils::{debug_no_quotes, panic_messages as panic}; 3 | use crate::tree::{Metric, Summarize}; 4 | 5 | /// A slice of a [`GapBuffer`](super::gap_buffer::GapBuffer). 6 | #[derive(Copy, Clone, Default)] 7 | pub struct GapSlice<'a> { 8 | pub(super) bytes: &'a [u8], 9 | pub(super) left_summary: ChunkSummary, 10 | pub(super) len_right: u16, 11 | } 12 | 13 | impl core::fmt::Debug for GapSlice<'_> { 14 | #[inline] 15 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { 16 | f.write_str("\"")?; 17 | debug_no_quotes(self.left_chunk(), f)?; 18 | write!(f, "{:~^1$}", "", self.len_gap())?; 19 | debug_no_quotes(self.right_chunk(), f)?; 20 | f.write_str("\"") 21 | } 22 | } 23 | 24 | // We only need this to compare `GapSlice`s with `&str`s in (doc)tests. 25 | impl PartialEq> for &str { 26 | fn eq(&self, rhs: &GapSlice<'_>) -> bool { 27 | self.len() == rhs.len() 28 | && rhs.left_chunk() == &self[..rhs.len_left()] 29 | && rhs.right_chunk() == &self[rhs.len_left()..] 30 | } 31 | } 32 | 33 | impl<'a> GapSlice<'a> { 34 | /// Panics with a nicely formatted error message if the given byte offset 35 | /// is not a character boundary. 36 | #[track_caller] 37 | #[inline] 38 | pub(super) fn assert_char_boundary(&self, byte_offset: usize) { 39 | debug_assert!(byte_offset <= self.len()); 40 | 41 | if !self.is_char_boundary(byte_offset) { 42 | if byte_offset < self.len_left() { 43 | panic::byte_offset_not_char_boundary( 44 | self.left_chunk(), 45 | byte_offset, 46 | ) 47 | } else { 48 | panic::byte_offset_not_char_boundary( 49 | self.right_chunk(), 50 | byte_offset - self.len_left(), 51 | ) 52 | } 53 | } 54 | } 55 | 56 | pub(super) fn assert_invariants(&self) { 57 | assert_eq!(self.left_summary, ChunkSummary::from(self.left_chunk())); 58 | 59 | if self.len_right() == 0 { 60 | assert_eq!(self.len_left(), self.bytes.len()); 61 | } else if self.len_left() == 0 { 62 | assert_eq!(self.len_right(), self.bytes.len()); 63 | } 64 | } 65 | 66 | /// Returns the byte at the given index. 67 | /// 68 | /// # Panics 69 | /// 70 | /// Panics if the index is out of bounds, i.e. greater than or equal to 71 | /// [`len()`](Self::len()). 72 | #[inline] 73 | pub(super) fn byte(&self, byte_index: usize) -> u8 { 74 | debug_assert!(byte_index < self.len()); 75 | 76 | if byte_index < self.len_left() { 77 | self.left_chunk().as_bytes()[byte_index] 78 | } else { 79 | self.right_chunk().as_bytes()[byte_index - self.len_left()] 80 | } 81 | } 82 | 83 | #[inline] 84 | fn left_measure(&self) -> M 85 | where 86 | M: Metric, 87 | { 88 | M::measure(&self.left_summary) 89 | } 90 | 91 | #[inline] 92 | pub(super) fn truncate_last_char( 93 | &mut self, 94 | summary: ChunkSummary, 95 | ) -> ChunkSummary { 96 | debug_assert!(self.len() > 0); 97 | debug_assert_eq!(summary, self.summarize()); 98 | 99 | use core::cmp::Ordering; 100 | 101 | let last_char = self 102 | .last_chunk() 103 | .chars() 104 | .next_back() 105 | .expect("this slice isn't empty"); 106 | 107 | let removed_summary = ChunkSummary::from(last_char); 108 | 109 | let len_utf8 = removed_summary.bytes(); 110 | 111 | match self.len_right.cmp(&(len_utf8 as u16)) { 112 | // The slice doesn't have a right chunk, so we shorten the left 113 | // chunk. 114 | Ordering::Less => { 115 | self.left_summary -= removed_summary; 116 | self.bytes = &self.bytes[..self.len_left()]; 117 | self.left_summary 118 | }, 119 | 120 | // The right chunk has 2 or more characters, so we shorten the right 121 | // chunk. 122 | Ordering::Greater => { 123 | self.len_right -= len_utf8 as u16; 124 | self.bytes = &self.bytes[..self.bytes.len() - len_utf8]; 125 | summary - removed_summary 126 | }, 127 | 128 | // The right chunk has exactly 1 character, so we can keep just the 129 | // left chunk. 130 | Ordering::Equal => { 131 | self.len_right = 0; 132 | self.bytes = &self.bytes[..self.len_left()]; 133 | self.left_summary 134 | }, 135 | } 136 | } 137 | 138 | /// Removes the trailing line break (if it has one), returning the new 139 | /// summary. 140 | #[inline] 141 | pub(super) fn truncate_trailing_line_break( 142 | &mut self, 143 | summary: ChunkSummary, 144 | ) -> ChunkSummary { 145 | debug_assert_eq!(summary, self.summarize()); 146 | 147 | if !self.has_trailing_newline() { 148 | return summary; 149 | } 150 | 151 | let mut new_summary = self.truncate_last_char(summary); 152 | 153 | if self.last_chunk().ends_with('\r') { 154 | new_summary = self.truncate_last_char(new_summary) 155 | } 156 | 157 | new_summary 158 | } 159 | 160 | #[inline] 161 | pub(super) fn empty() -> Self { 162 | Self::default() 163 | } 164 | 165 | /// Returns `true` if it ends with a newline. 166 | #[inline] 167 | pub(super) fn has_trailing_newline(&self) -> bool { 168 | self.last_chunk().ends_with('\n') 169 | } 170 | 171 | #[inline] 172 | pub(super) fn is_char_boundary(&self, byte_offset: usize) -> bool { 173 | debug_assert!(byte_offset <= self.len()); 174 | 175 | if byte_offset <= self.len_left() { 176 | self.left_chunk().is_char_boundary(byte_offset) 177 | } else { 178 | self.right_chunk().is_char_boundary(byte_offset - self.len_left()) 179 | } 180 | } 181 | 182 | /// The second segment if it's not empty, or the first one otherwise. 183 | #[inline] 184 | pub(super) fn last_chunk(&self) -> &'a str { 185 | if self.len_right() == 0 { 186 | self.left_chunk() 187 | } else { 188 | self.right_chunk() 189 | } 190 | } 191 | 192 | #[inline] 193 | pub(super) fn left_chunk(&self) -> &'a str { 194 | // SAFETY: the first `len_left` bytes are valid UTF-8. 195 | unsafe { 196 | core::str::from_utf8_unchecked(&self.bytes[..self.len_left()]) 197 | } 198 | } 199 | 200 | #[inline] 201 | pub(super) fn len(&self) -> usize { 202 | self.len_left() + self.len_right() 203 | } 204 | 205 | #[inline] 206 | pub(super) fn len_gap(&self) -> usize { 207 | self.bytes.len() - self.len() 208 | } 209 | 210 | #[inline] 211 | pub(super) fn len_left(&self) -> usize { 212 | self.left_summary.bytes() 213 | } 214 | 215 | #[inline] 216 | pub(super) fn len_right(&self) -> usize { 217 | self.len_right as _ 218 | } 219 | 220 | #[inline] 221 | pub(super) fn right_chunk(&self) -> &'a str { 222 | // SAFETY: the last `len_right` bytes are valid UTF-8. 223 | unsafe { 224 | core::str::from_utf8_unchecked( 225 | &self.bytes[self.bytes.len() - self.len_right()..], 226 | ) 227 | } 228 | } 229 | 230 | #[inline] 231 | fn right_summary(&self, summary: ChunkSummary) -> ChunkSummary { 232 | debug_assert_eq!(summary, self.summarize()); 233 | summary - self.left_summary 234 | } 235 | 236 | /// Splits the slice at the given offset, returning the left and right 237 | /// slices and their summary. 238 | /// 239 | /// # Panics 240 | /// 241 | /// Panics if the offset is greater than the M-measure of the slice. 242 | /// 243 | /// # Examples 244 | /// 245 | /// ```ignore 246 | /// let gap_buffer = GapBuffer::<20>::from("foo\nbar\r\nbaz"); 247 | /// 248 | /// let summary = gap_buffer.summarize(); 249 | /// 250 | /// let ((left, _), (right, _)) = 251 | /// gap_buffer.as_slice().split_at_offset(RawLineMetric(1)); 252 | /// 253 | /// assert_eq!("foo\n", left); 254 | /// 255 | /// assert_eq!("bar\r\nbaz", right); 256 | /// ``` 257 | #[track_caller] 258 | #[inline] 259 | pub fn split_at_offset( 260 | &self, 261 | mut offset: M, 262 | summary: ChunkSummary, 263 | ) -> ((Self, ChunkSummary), (Self, ChunkSummary)) 264 | where 265 | M: Metric + ToByteOffset + SummaryUpTo, 266 | { 267 | debug_assert_eq!(summary, self.summarize()); 268 | 269 | debug_assert!(offset <= M::measure(&summary)); 270 | 271 | if offset <= self.left_measure::() { 272 | let byte_offset: usize = offset.to_byte_offset(self.left_chunk()); 273 | 274 | let (bytes_left, bytes_right) = self.split_bytes(byte_offset); 275 | 276 | let left_left_summary = M::up_to( 277 | self.left_chunk(), 278 | self.left_summary, 279 | offset, 280 | byte_offset, 281 | ); 282 | 283 | let left = Self { 284 | bytes: bytes_left, 285 | left_summary: left_left_summary, 286 | len_right: 0, 287 | }; 288 | 289 | let right = Self { 290 | bytes: bytes_right, 291 | left_summary: self.left_summary - left_left_summary, 292 | len_right: self.len_right, 293 | }; 294 | 295 | ((left, left.left_summary), (right, summary - left.left_summary)) 296 | } else { 297 | offset -= self.left_measure::(); 298 | 299 | let byte_offset = offset.to_byte_offset(self.right_chunk()); 300 | 301 | let (bytes_left, bytes_right) = 302 | self.split_bytes(self.len_left() + byte_offset); 303 | 304 | let right_left_summary = M::up_to( 305 | self.right_chunk(), 306 | self.right_summary(summary), 307 | offset, 308 | byte_offset, 309 | ); 310 | 311 | let left = Self { 312 | bytes: bytes_left, 313 | left_summary: self.left_summary, 314 | len_right: right_left_summary.bytes() as u16, 315 | }; 316 | 317 | let right = Self { 318 | bytes: bytes_right, 319 | left_summary: self.right_summary(summary) - right_left_summary, 320 | len_right: 0, 321 | }; 322 | 323 | ((left, summary - right.left_summary), (right, right.left_summary)) 324 | } 325 | } 326 | 327 | #[inline] 328 | fn split_bytes(&self, byte_offset: usize) -> (&'a [u8], &'a [u8]) { 329 | debug_assert!(byte_offset <= self.len()); 330 | 331 | use core::cmp::Ordering; 332 | 333 | let offset = match byte_offset.cmp(&self.len_left()) { 334 | Ordering::Less => byte_offset, 335 | 336 | Ordering::Greater => byte_offset + self.len_gap(), 337 | 338 | Ordering::Equal => { 339 | return ( 340 | self.left_chunk().as_bytes(), 341 | self.right_chunk().as_bytes(), 342 | ); 343 | }, 344 | }; 345 | 346 | self.bytes.split_at(offset) 347 | } 348 | 349 | #[inline] 350 | fn summarize_right_chunk(&self) -> ChunkSummary { 351 | ChunkSummary::from(self.right_chunk()) 352 | } 353 | } 354 | 355 | impl Summarize for GapSlice<'_> { 356 | type Summary = ChunkSummary; 357 | 358 | #[inline] 359 | fn summarize(&self) -> Self::Summary { 360 | self.left_summary + self.summarize_right_chunk() 361 | } 362 | } 363 | 364 | #[cfg(test)] 365 | mod tests { 366 | use crate::rope::gap_buffer::GapBuffer; 367 | use crate::tree::{AsSlice, Summarize}; 368 | 369 | #[test] 370 | fn debug_slice() { 371 | let buffer = GapBuffer::<10>::from("Hello"); 372 | assert_eq!("\"He~~~~~llo\"", format!("{:?}", buffer.as_slice())); 373 | } 374 | 375 | #[test] 376 | fn truncate_trailing_crlf() { 377 | let buffer = GapBuffer::<5>::from("bar\r\n"); 378 | let mut slice = buffer.as_slice(); 379 | let summary = slice.summarize(); 380 | slice.truncate_trailing_line_break(summary); 381 | assert_eq!("bar", slice); 382 | } 383 | 384 | #[test] 385 | fn truncate_trailing_lf() { 386 | let buffer = GapBuffer::<5>::from("bar\n"); 387 | let mut slice = buffer.as_slice(); 388 | let summary = slice.summarize(); 389 | slice.truncate_trailing_line_break(summary); 390 | assert_eq!("bar", slice); 391 | } 392 | } 393 | -------------------------------------------------------------------------------- /src/rope/metrics.rs: -------------------------------------------------------------------------------- 1 | use core::ops::{Add, AddAssign, Sub, SubAssign}; 2 | 3 | use super::gap_buffer::GapBuffer; 4 | use super::gap_slice::GapSlice; 5 | use crate::tree::{DoubleEndedUnitMetric, Metric, SlicingMetric, UnitMetric}; 6 | 7 | #[derive(Copy, Clone, Default, Debug, PartialEq)] 8 | #[doc(hidden)] 9 | pub struct ChunkSummary { 10 | bytes: usize, 11 | line_breaks: usize, 12 | #[cfg(feature = "utf16-metric")] 13 | utf16_code_units: usize, 14 | } 15 | 16 | impl From<&str> for ChunkSummary { 17 | #[inline] 18 | fn from(s: &str) -> Self { 19 | Self { 20 | bytes: s.len(), 21 | line_breaks: count::line_breaks(s), 22 | #[cfg(feature = "utf16-metric")] 23 | utf16_code_units: count::utf16_code_units(s), 24 | } 25 | } 26 | } 27 | 28 | impl From for ChunkSummary { 29 | #[inline] 30 | fn from(ch: char) -> Self { 31 | Self { 32 | bytes: ch.len_utf8(), 33 | line_breaks: (ch == '\n') as usize, 34 | #[cfg(feature = "utf16-metric")] 35 | utf16_code_units: ch.len_utf16(), 36 | } 37 | } 38 | } 39 | 40 | impl ChunkSummary { 41 | #[inline] 42 | pub fn bytes(&self) -> usize { 43 | self.bytes 44 | } 45 | 46 | #[inline] 47 | pub fn line_breaks(&self) -> usize { 48 | self.line_breaks 49 | } 50 | 51 | #[doc(hidden)] 52 | #[inline] 53 | pub fn new() -> Self { 54 | Self::default() 55 | } 56 | 57 | #[cfg(feature = "utf16-metric")] 58 | #[inline] 59 | pub fn utf16_code_units(&self) -> usize { 60 | self.utf16_code_units 61 | } 62 | } 63 | 64 | impl Add for ChunkSummary { 65 | type Output = Self; 66 | 67 | #[inline] 68 | fn add(mut self, rhs: Self) -> Self { 69 | self += rhs; 70 | self 71 | } 72 | } 73 | 74 | impl Sub for ChunkSummary { 75 | type Output = Self; 76 | 77 | #[inline] 78 | fn sub(mut self, rhs: Self) -> Self { 79 | self -= rhs; 80 | self 81 | } 82 | } 83 | 84 | impl Add<&Self> for ChunkSummary { 85 | type Output = Self; 86 | 87 | #[inline] 88 | fn add(mut self, rhs: &Self) -> Self { 89 | self += rhs; 90 | self 91 | } 92 | } 93 | 94 | impl Sub<&Self> for ChunkSummary { 95 | type Output = Self; 96 | 97 | #[inline] 98 | fn sub(mut self, rhs: &Self) -> Self { 99 | self -= rhs; 100 | self 101 | } 102 | } 103 | 104 | impl AddAssign for ChunkSummary { 105 | #[inline] 106 | fn add_assign(&mut self, rhs: Self) { 107 | self.bytes += rhs.bytes; 108 | self.line_breaks += rhs.line_breaks; 109 | #[cfg(feature = "utf16-metric")] 110 | { 111 | self.utf16_code_units += rhs.utf16_code_units; 112 | } 113 | } 114 | } 115 | 116 | impl SubAssign for ChunkSummary { 117 | #[inline] 118 | fn sub_assign(&mut self, rhs: Self) { 119 | self.bytes -= rhs.bytes; 120 | self.line_breaks -= rhs.line_breaks; 121 | #[cfg(feature = "utf16-metric")] 122 | { 123 | self.utf16_code_units -= rhs.utf16_code_units; 124 | } 125 | } 126 | } 127 | 128 | impl AddAssign<&Self> for ChunkSummary { 129 | #[inline] 130 | fn add_assign(&mut self, rhs: &Self) { 131 | *self += *rhs; 132 | } 133 | } 134 | 135 | impl SubAssign<&Self> for ChunkSummary { 136 | #[inline] 137 | fn sub_assign(&mut self, rhs: &Self) { 138 | *self -= *rhs; 139 | } 140 | } 141 | 142 | /// Conversion trait from the metric implement this trait to the corresponding 143 | /// byte offset. 144 | pub trait ToByteOffset: Metric { 145 | /// Should return the byte offset of `self` in the given string. 146 | fn to_byte_offset(&self, in_str: &str) -> usize; 147 | } 148 | 149 | /// Trait to get the summary of a string up to a given offset. 150 | pub trait SummaryUpTo: Metric { 151 | /// Return the summary of the given string up to `offset`, where 152 | /// 153 | /// * `str_summary` is the string's summary, 154 | /// * `byte_offset` is byte offset of `offset`. 155 | fn up_to( 156 | in_str: &str, 157 | str_summary: ChunkSummary, 158 | offset: Self, 159 | byte_offset: usize, 160 | ) -> ChunkSummary; 161 | } 162 | 163 | #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] 164 | pub struct ByteMetric(pub(super) usize); 165 | 166 | impl Add for ByteMetric { 167 | type Output = Self; 168 | 169 | #[inline] 170 | fn add(self, other: Self) -> Self { 171 | Self(self.0 + other.0) 172 | } 173 | } 174 | 175 | impl Sub for ByteMetric { 176 | type Output = Self; 177 | 178 | #[inline] 179 | fn sub(self, other: Self) -> Self { 180 | Self(self.0 - other.0) 181 | } 182 | } 183 | 184 | impl AddAssign for ByteMetric { 185 | #[inline] 186 | fn add_assign(&mut self, other: Self) { 187 | self.0 += other.0 188 | } 189 | } 190 | 191 | impl SubAssign for ByteMetric { 192 | #[inline] 193 | fn sub_assign(&mut self, other: Self) { 194 | self.0 -= other.0 195 | } 196 | } 197 | 198 | impl Add for ByteMetric { 199 | type Output = usize; 200 | 201 | #[inline] 202 | fn add(self, other: usize) -> usize { 203 | self.0 + other 204 | } 205 | } 206 | 207 | impl From for usize { 208 | #[inline] 209 | fn from(ByteMetric(value): ByteMetric) -> usize { 210 | value 211 | } 212 | } 213 | 214 | impl ToByteOffset for ByteMetric { 215 | #[inline] 216 | fn to_byte_offset(&self, _: &str) -> usize { 217 | self.0 218 | } 219 | } 220 | 221 | impl SummaryUpTo for ByteMetric { 222 | #[inline] 223 | fn up_to( 224 | in_str: &str, 225 | str_summary: ChunkSummary, 226 | offset: Self, 227 | byte_offset: usize, 228 | ) -> ChunkSummary { 229 | debug_assert_eq!(offset.0, byte_offset); 230 | 231 | ChunkSummary { 232 | bytes: byte_offset, 233 | 234 | line_breaks: count::line_breaks_up_to( 235 | in_str, 236 | byte_offset, 237 | str_summary.line_breaks, 238 | ), 239 | 240 | #[cfg(feature = "utf16-metric")] 241 | utf16_code_units: count::utf16_code_units_up_to( 242 | in_str, 243 | byte_offset, 244 | str_summary.utf16_code_units, 245 | ), 246 | } 247 | } 248 | } 249 | 250 | impl Metric for ByteMetric { 251 | #[inline] 252 | fn zero() -> Self { 253 | Self(0) 254 | } 255 | 256 | #[inline] 257 | fn one() -> Self { 258 | Self(1) 259 | } 260 | 261 | #[inline] 262 | fn measure(summary: &ChunkSummary) -> Self { 263 | Self(summary.bytes) 264 | } 265 | } 266 | 267 | impl SlicingMetric> 268 | for ByteMetric 269 | { 270 | #[track_caller] 271 | #[inline] 272 | fn slice_up_to<'a>( 273 | chunk: GapSlice<'a>, 274 | byte_offset: Self, 275 | &summary: &ChunkSummary, 276 | ) -> (GapSlice<'a>, ChunkSummary) 277 | where 278 | 'a: 'a, 279 | { 280 | chunk.assert_char_boundary(byte_offset.0); 281 | 282 | let (left, _) = chunk.split_at_offset(byte_offset, summary); 283 | left 284 | } 285 | 286 | #[track_caller] 287 | #[inline] 288 | fn slice_from<'a>( 289 | chunk: GapSlice<'a>, 290 | byte_offset: Self, 291 | &summary: &ChunkSummary, 292 | ) -> (GapSlice<'a>, ChunkSummary) 293 | where 294 | 'a: 'a, 295 | { 296 | chunk.assert_char_boundary(byte_offset.0); 297 | 298 | let (_, right) = chunk.split_at_offset(byte_offset, summary); 299 | right 300 | } 301 | } 302 | 303 | #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] 304 | pub struct RawLineMetric(pub usize); 305 | 306 | impl Add for RawLineMetric { 307 | type Output = Self; 308 | 309 | #[inline] 310 | fn add(self, other: Self) -> Self { 311 | Self(self.0 + other.0) 312 | } 313 | } 314 | 315 | impl Sub for RawLineMetric { 316 | type Output = Self; 317 | 318 | #[inline] 319 | fn sub(self, other: Self) -> Self { 320 | Self(self.0 - other.0) 321 | } 322 | } 323 | 324 | impl AddAssign for RawLineMetric { 325 | #[inline] 326 | fn add_assign(&mut self, other: Self) { 327 | self.0 += other.0 328 | } 329 | } 330 | 331 | impl SubAssign for RawLineMetric { 332 | #[inline] 333 | fn sub_assign(&mut self, other: Self) { 334 | self.0 -= other.0 335 | } 336 | } 337 | 338 | impl ToByteOffset for RawLineMetric { 339 | #[inline] 340 | fn to_byte_offset(&self, s: &str) -> usize { 341 | convert::byte_of_line(s, self.0) 342 | } 343 | } 344 | 345 | impl SummaryUpTo for RawLineMetric { 346 | #[cfg_attr(not(feature = "utf16-metric"), allow(unused_variables))] 347 | #[inline] 348 | fn up_to( 349 | in_str: &str, 350 | str_summary: ChunkSummary, 351 | Self(line_offset): Self, 352 | byte_offset: usize, 353 | ) -> ChunkSummary { 354 | ChunkSummary { 355 | bytes: byte_offset, 356 | 357 | line_breaks: line_offset, 358 | 359 | #[cfg(feature = "utf16-metric")] 360 | utf16_code_units: count::utf16_code_units_up_to( 361 | in_str, 362 | byte_offset, 363 | str_summary.utf16_code_units, 364 | ), 365 | } 366 | } 367 | } 368 | 369 | impl Metric for RawLineMetric { 370 | #[inline] 371 | fn zero() -> Self { 372 | Self(0) 373 | } 374 | 375 | #[inline] 376 | fn one() -> Self { 377 | Self(1) 378 | } 379 | 380 | #[inline] 381 | fn measure(summary: &ChunkSummary) -> Self { 382 | Self(summary.line_breaks) 383 | } 384 | } 385 | 386 | impl SlicingMetric> 387 | for RawLineMetric 388 | { 389 | #[inline] 390 | fn slice_up_to<'a>( 391 | chunk: GapSlice<'a>, 392 | line_offset: Self, 393 | &summary: &ChunkSummary, 394 | ) -> (GapSlice<'a>, ChunkSummary) 395 | where 396 | 'a: 'a, 397 | { 398 | let (left, _) = chunk.split_at_offset(line_offset, summary); 399 | left 400 | } 401 | 402 | #[inline] 403 | fn slice_from<'a>( 404 | chunk: GapSlice<'a>, 405 | line_offset: Self, 406 | &summary: &ChunkSummary, 407 | ) -> (GapSlice<'a>, ChunkSummary) 408 | where 409 | 'a: 'a, 410 | { 411 | let (_, right) = chunk.split_at_offset(line_offset, summary); 412 | right 413 | } 414 | } 415 | 416 | impl UnitMetric> 417 | for RawLineMetric 418 | { 419 | #[inline] 420 | fn first_unit<'a>( 421 | chunk: GapSlice<'a>, 422 | &summary: &ChunkSummary, 423 | ) -> (GapSlice<'a>, ChunkSummary, ChunkSummary, GapSlice<'a>, ChunkSummary) 424 | where 425 | 'a: 'a, 426 | { 427 | let ((first, first_summary), (rest, rest_summary)) = 428 | chunk.split_at_offset(RawLineMetric(1), summary); 429 | 430 | (first, first_summary, first_summary, rest, rest_summary) 431 | } 432 | } 433 | 434 | impl DoubleEndedUnitMetric> 435 | for RawLineMetric 436 | { 437 | #[inline] 438 | fn last_unit<'a>( 439 | slice: GapSlice<'a>, 440 | &summary: &ChunkSummary, 441 | ) -> (GapSlice<'a>, ChunkSummary, GapSlice<'a>, ChunkSummary, ChunkSummary) 442 | where 443 | 'a: 'a, 444 | { 445 | let split_offset = 446 | summary.line_breaks - (slice.has_trailing_newline() as usize); 447 | 448 | let ((rest, rest_summary), (last, last_summary)) = 449 | slice.split_at_offset(RawLineMetric(split_offset), summary); 450 | 451 | (rest, rest_summary, last, last_summary, last_summary) 452 | } 453 | 454 | #[inline] 455 | fn remainder<'a>( 456 | chunk: GapSlice<'a>, 457 | summary: &ChunkSummary, 458 | ) -> (GapSlice<'a>, ChunkSummary, GapSlice<'a>, ChunkSummary) 459 | where 460 | 'a: 'a, 461 | { 462 | if chunk.has_trailing_newline() { 463 | (chunk, *summary, GapSlice::empty(), ChunkSummary::new()) 464 | } else { 465 | let (rest, rest_summary, last, last_summary, _) = 466 | >>::last_unit(chunk, summary); 467 | 468 | (rest, rest_summary, last, last_summary) 469 | } 470 | } 471 | } 472 | 473 | #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] 474 | pub(super) struct LineMetric(pub(super) usize); 475 | 476 | impl Add for LineMetric { 477 | type Output = Self; 478 | 479 | #[inline] 480 | fn add(self, other: Self) -> Self { 481 | Self(self.0 + other.0) 482 | } 483 | } 484 | 485 | impl Sub for LineMetric { 486 | type Output = Self; 487 | 488 | #[inline] 489 | fn sub(self, other: Self) -> Self { 490 | Self(self.0 - other.0) 491 | } 492 | } 493 | 494 | impl AddAssign for LineMetric { 495 | #[inline] 496 | fn add_assign(&mut self, other: Self) { 497 | self.0 += other.0 498 | } 499 | } 500 | 501 | impl SubAssign for LineMetric { 502 | #[inline] 503 | fn sub_assign(&mut self, other: Self) { 504 | self.0 -= other.0 505 | } 506 | } 507 | 508 | impl Metric for LineMetric { 509 | #[inline] 510 | fn zero() -> Self { 511 | Self(0) 512 | } 513 | 514 | #[inline] 515 | fn one() -> Self { 516 | Self(1) 517 | } 518 | 519 | #[inline] 520 | fn measure(summary: &ChunkSummary) -> Self { 521 | Self(summary.line_breaks) 522 | } 523 | } 524 | 525 | impl UnitMetric> for LineMetric { 526 | #[inline] 527 | fn first_unit<'a>( 528 | chunk: GapSlice<'a>, 529 | summary: &ChunkSummary, 530 | ) -> (GapSlice<'a>, ChunkSummary, ChunkSummary, GapSlice<'a>, ChunkSummary) 531 | where 532 | 'a: 'a, 533 | { 534 | let (mut first, mut first_summary, advance, rest, rest_summary) = 535 | >>::first_unit( 536 | chunk, summary, 537 | ); 538 | 539 | first_summary = first.truncate_trailing_line_break(first_summary); 540 | 541 | (first, first_summary, advance, rest, rest_summary) 542 | } 543 | } 544 | 545 | impl DoubleEndedUnitMetric> 546 | for LineMetric 547 | { 548 | #[inline] 549 | fn last_unit<'a>( 550 | chunk: GapSlice<'a>, 551 | summary: &ChunkSummary, 552 | ) -> (GapSlice<'a>, ChunkSummary, GapSlice<'a>, ChunkSummary, ChunkSummary) 553 | where 554 | 'a: 'a, 555 | { 556 | let (rest, rest_summary, mut last, mut last_summary, advance) = 557 | >>::last_unit(chunk, summary); 558 | 559 | last_summary = last.truncate_trailing_line_break(last_summary); 560 | 561 | (rest, rest_summary, last, last_summary, advance) 562 | } 563 | 564 | #[inline] 565 | fn remainder<'a>( 566 | chunk: GapSlice<'a>, 567 | summary: &ChunkSummary, 568 | ) -> (GapSlice<'a>, ChunkSummary, GapSlice<'a>, ChunkSummary) 569 | where 570 | 'a: 'a, 571 | { 572 | >>::remainder(chunk, summary) 573 | } 574 | } 575 | 576 | #[cfg(feature = "utf16-metric")] 577 | pub use utf16_metric::Utf16Metric; 578 | 579 | #[cfg(feature = "utf16-metric")] 580 | mod utf16_metric { 581 | use super::*; 582 | 583 | #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] 584 | pub struct Utf16Metric(pub usize); 585 | 586 | impl Add for Utf16Metric { 587 | type Output = Self; 588 | 589 | #[inline] 590 | fn add(self, other: Self) -> Self { 591 | Self(self.0 + other.0) 592 | } 593 | } 594 | 595 | impl Sub for Utf16Metric { 596 | type Output = Self; 597 | 598 | #[inline] 599 | fn sub(self, other: Self) -> Self { 600 | Self(self.0 - other.0) 601 | } 602 | } 603 | 604 | impl AddAssign for Utf16Metric { 605 | #[inline] 606 | fn add_assign(&mut self, other: Self) { 607 | self.0 += other.0 608 | } 609 | } 610 | 611 | impl SubAssign for Utf16Metric { 612 | #[inline] 613 | fn sub_assign(&mut self, other: Self) { 614 | self.0 -= other.0 615 | } 616 | } 617 | 618 | impl ToByteOffset for Utf16Metric { 619 | #[track_caller] 620 | #[inline] 621 | fn to_byte_offset(&self, in_str: &str) -> usize { 622 | // TODO: we should panic the given UTF-16 offset doesn't lie on a 623 | // char boundary. Right now we just return the byte offset up to 624 | // the previous char boundary. 625 | convert::byte_of_utf16_code_unit(in_str, self.0) 626 | } 627 | } 628 | 629 | impl SummaryUpTo for Utf16Metric { 630 | #[inline] 631 | fn up_to( 632 | in_str: &str, 633 | str_summary: ChunkSummary, 634 | Self(utf16_code_unit_offset): Self, 635 | byte_offset: usize, 636 | ) -> ChunkSummary { 637 | ChunkSummary { 638 | bytes: byte_offset, 639 | 640 | line_breaks: count::line_breaks_up_to( 641 | in_str, 642 | byte_offset, 643 | str_summary.line_breaks, 644 | ), 645 | 646 | utf16_code_units: utf16_code_unit_offset, 647 | } 648 | } 649 | } 650 | 651 | impl Metric for Utf16Metric { 652 | #[inline] 653 | fn zero() -> Self { 654 | Self(0) 655 | } 656 | 657 | #[inline] 658 | fn one() -> Self { 659 | Self(1) 660 | } 661 | 662 | #[inline] 663 | fn measure(summary: &ChunkSummary) -> Self { 664 | Self(summary.utf16_code_units) 665 | } 666 | } 667 | 668 | impl SlicingMetric> 669 | for Utf16Metric 670 | { 671 | #[track_caller] 672 | #[inline] 673 | fn slice_up_to<'a>( 674 | chunk: GapSlice<'a>, 675 | utf16_code_unit_offset: Self, 676 | &summary: &ChunkSummary, 677 | ) -> (GapSlice<'a>, ChunkSummary) 678 | where 679 | 'a: 'a, 680 | { 681 | let (left, _) = 682 | chunk.split_at_offset(utf16_code_unit_offset, summary); 683 | left 684 | } 685 | 686 | #[track_caller] 687 | #[inline] 688 | fn slice_from<'a>( 689 | chunk: GapSlice<'a>, 690 | utf16_code_unit_offset: Self, 691 | &summary: &ChunkSummary, 692 | ) -> (GapSlice<'a>, ChunkSummary) 693 | where 694 | 'a: 'a, 695 | { 696 | let (_, right) = 697 | chunk.split_at_offset(utf16_code_unit_offset, summary); 698 | right 699 | } 700 | } 701 | } 702 | 703 | use str_utils::*; 704 | 705 | mod str_utils { 706 | #[cfg(not(miri))] 707 | use str_indices::lines_lf as lines; 708 | #[cfg(all(not(miri), feature = "utf16-metric"))] 709 | use str_indices::utf16; 710 | 711 | pub mod count { 712 | #[cfg(not(miri))] 713 | use super::*; 714 | 715 | #[inline] 716 | pub fn line_breaks(s: &str) -> usize { 717 | #[cfg(not(miri))] 718 | { 719 | lines::count_breaks(s) 720 | } 721 | #[cfg(miri)] 722 | { 723 | s.bytes().filter(|&b| b == b'\n').count() 724 | } 725 | } 726 | 727 | #[cfg(feature = "utf16-metric")] 728 | #[inline] 729 | pub fn utf16_code_units(s: &str) -> usize { 730 | #[cfg(not(miri))] 731 | { 732 | utf16::count(s) 733 | } 734 | #[cfg(miri)] 735 | { 736 | s.encode_utf16().count() 737 | } 738 | } 739 | 740 | #[inline(always)] 741 | pub fn line_breaks_up_to( 742 | s: &str, 743 | byte_offset: usize, 744 | tot_line_breaks: usize, 745 | ) -> usize { 746 | metric_up_to(s, byte_offset, tot_line_breaks, line_breaks) 747 | } 748 | 749 | #[cfg(feature = "utf16-metric")] 750 | #[inline(always)] 751 | pub fn utf16_code_units_up_to( 752 | s: &str, 753 | byte_offset: usize, 754 | tot_utf16_code_units: usize, 755 | ) -> usize { 756 | metric_up_to( 757 | s, 758 | byte_offset, 759 | tot_utf16_code_units, 760 | utf16_code_units, 761 | ) 762 | } 763 | 764 | #[inline(always)] 765 | fn metric_up_to( 766 | s: &str, 767 | byte_offset: usize, 768 | tot: usize, 769 | count: fn(&str) -> usize, 770 | ) -> usize { 771 | debug_assert!(s.is_char_boundary(byte_offset)); 772 | 773 | debug_assert_eq!(tot, count(s)); 774 | 775 | // Count the shorter side and get the other by subtracting it from 776 | // the total if necessary. 777 | if byte_offset <= s.len() / 2 { 778 | count(&s[..byte_offset]) 779 | } else { 780 | tot - count(&s[byte_offset..]) 781 | } 782 | } 783 | } 784 | 785 | pub mod convert { 786 | #[cfg(not(miri))] 787 | use super::*; 788 | 789 | #[inline] 790 | pub fn byte_of_line(s: &str, line_offset: usize) -> usize { 791 | #[cfg(not(miri))] 792 | { 793 | lines::to_byte_idx(s, line_offset) 794 | } 795 | 796 | #[cfg(miri)] 797 | { 798 | if line_offset == 0 { 799 | return 0; 800 | } 801 | 802 | let mut seen = 0; 803 | let mut stop = false; 804 | 805 | s.bytes() 806 | .take_while(|&b| { 807 | !stop && { 808 | if b == b'\n' { 809 | seen += 1; 810 | if seen == line_offset { 811 | stop = true; 812 | } 813 | } 814 | true 815 | } 816 | }) 817 | .count() 818 | } 819 | } 820 | 821 | #[cfg(feature = "utf16-metric")] 822 | #[inline] 823 | pub fn byte_of_utf16_code_unit( 824 | s: &str, 825 | utf16_code_unit_offset: usize, 826 | ) -> usize { 827 | #[cfg(not(miri))] 828 | { 829 | utf16::to_byte_idx(s, utf16_code_unit_offset) 830 | } 831 | 832 | #[cfg(miri)] 833 | { 834 | let encoded_utf16 = s.encode_utf16().collect::>(); 835 | 836 | let decoded_utf16 = String::from_utf16( 837 | &encoded_utf16[..utf16_code_unit_offset], 838 | ) 839 | .unwrap(); 840 | 841 | decoded_utf16.len() 842 | } 843 | } 844 | } 845 | } 846 | -------------------------------------------------------------------------------- /src/rope/mod.rs: -------------------------------------------------------------------------------- 1 | pub(crate) mod gap_buffer; 2 | pub(crate) mod gap_slice; 3 | pub(crate) mod iterators; 4 | pub mod metrics; 5 | mod rope; 6 | mod rope_builder; 7 | mod rope_slice; 8 | mod utils; 9 | 10 | pub use rope::Rope; 11 | pub use rope_builder::RopeBuilder; 12 | pub use rope_slice::RopeSlice; 13 | -------------------------------------------------------------------------------- /src/rope/rope_builder.rs: -------------------------------------------------------------------------------- 1 | use super::Rope; 2 | use super::gap_buffer::GapBuffer; 3 | use super::metrics::ChunkSummary; 4 | use super::rope::RopeChunk; 5 | use super::utils::split_adjusted; 6 | use crate::tree::TreeBuilder; 7 | 8 | /// An incremental [`Rope`](crate::Rope) builder. 9 | #[derive(Clone, Default)] 10 | pub struct RopeBuilder { 11 | tree_builder: TreeBuilder<{ Rope::arity() }, RopeChunk>, 12 | buffer: RopeChunk, 13 | buffer_len_left: usize, 14 | rope_has_trailing_newline: bool, 15 | } 16 | 17 | /// Pushes as mush of the slice as possible onto the left chunk of the gap 18 | /// buffer, returning the rest (if any). 19 | /// 20 | /// Note that this doesn't update the summary of the left chunk of the gap 21 | /// buffer because it's faster to do it only once before passing the buffer to 22 | /// the `TreeBuilder`. 23 | #[inline] 24 | fn gap_buffer_push_with_remainder<'a, const MAX_BYTES: usize>( 25 | buffer: &mut GapBuffer, 26 | buffer_len_left: &mut usize, 27 | s: &'a str, 28 | ) -> Option<&'a str> { 29 | debug_assert_eq!(buffer.len_right(), 0); 30 | 31 | let len_left = *buffer_len_left; 32 | 33 | let space_left = MAX_BYTES - len_left; 34 | 35 | let (push, rest) = split_adjusted::(s, space_left); 36 | 37 | debug_assert!(push.len() <= space_left); 38 | 39 | buffer.bytes[len_left..len_left + push.len()] 40 | .copy_from_slice(push.as_bytes()); 41 | 42 | *buffer_len_left += push.len(); 43 | 44 | if rest.is_empty() { None } else { Some(rest) } 45 | } 46 | 47 | impl RopeBuilder { 48 | /// Appends `text` to the end of the `Rope` being built. 49 | #[inline] 50 | pub fn append(&mut self, text: T) -> &mut Self 51 | where 52 | T: AsRef, 53 | { 54 | let mut text = text.as_ref(); 55 | 56 | while let Some(rest) = gap_buffer_push_with_remainder( 57 | &mut self.buffer, 58 | &mut self.buffer_len_left, 59 | text, 60 | ) { 61 | self.buffer.left_summary = 62 | ChunkSummary::from(self.buffer_left_chunk()); 63 | 64 | self.tree_builder.append(core::mem::take(&mut self.buffer)); 65 | 66 | self.buffer_len_left = 0; 67 | 68 | text = rest; 69 | } 70 | 71 | self.rope_has_trailing_newline = self.buffer.has_trailing_newline(); 72 | 73 | self 74 | } 75 | 76 | #[inline] 77 | fn buffer_left_chunk(&self) -> &str { 78 | // SAFETY: we only append string slices to the left chunk of the gap 79 | // buffer so it's guaranteed to be valid UTF-8. 80 | unsafe { 81 | core::str::from_utf8_unchecked( 82 | &self.buffer.bytes[..self.buffer_len_left], 83 | ) 84 | } 85 | } 86 | 87 | /// Completes the build, consuming the `RopeBuilder` and returning the 88 | /// `Rope`. 89 | /// 90 | /// # Examples 91 | /// 92 | /// ``` 93 | /// # use crop::{Rope, RopeBuilder}; 94 | /// # 95 | /// let mut builder = RopeBuilder::new(); 96 | /// 97 | /// builder.append("ƒoo\n").append("bär\r\n").append("baz"); 98 | /// 99 | /// let rope: Rope = builder.build(); 100 | /// 101 | /// assert_eq!(rope, "ƒoo\nbär\r\nbaz"); 102 | /// ``` 103 | #[inline] 104 | pub fn build(mut self) -> Rope { 105 | if self.buffer_len_left > 0 { 106 | self.buffer.left_summary = 107 | ChunkSummary::from(self.buffer_left_chunk()); 108 | 109 | self.rope_has_trailing_newline = 110 | self.buffer.has_trailing_newline(); 111 | 112 | self.tree_builder.append(self.buffer); 113 | } 114 | 115 | Rope { 116 | tree: self.tree_builder.build(), 117 | has_trailing_newline: self.rope_has_trailing_newline, 118 | } 119 | } 120 | 121 | /// Creates a new `RopeBuilder`. 122 | #[inline] 123 | pub fn new() -> Self { 124 | Self::default() 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /src/rope/utils.rs: -------------------------------------------------------------------------------- 1 | //! This module contains utility functions on strings and code to be shared 2 | //! between `Rope`s and `RopeSlice`s, `RopeChunk`s and `ChunkSlice`s. 3 | 4 | use super::iterators::Chunks; 5 | 6 | /// Adjusts the candidate byte offset to make sure it's a char boundary for 7 | /// `s`. Offsets past the end of the string will be clipped to the length of 8 | /// the string. 9 | /// 10 | /// If the initial candidate is not on a char boundary we can either go left or 11 | /// right until it is. The direction is chosen based on the value of 12 | /// `WITH_RIGHT_BIAS`: true => go right, false => go left. 13 | /// 14 | /// In every case the adjusted split point will be within ± 3 bytes from the 15 | /// initial candidate. 16 | #[inline] 17 | pub(super) fn adjust_split_point( 18 | s: &str, 19 | candidate: usize, 20 | ) -> usize { 21 | if candidate >= s.len() { 22 | return s.len(); 23 | } 24 | 25 | let mut offset = candidate; 26 | 27 | if WITH_RIGHT_BIAS { 28 | while !s.is_char_boundary(offset) { 29 | offset += 1; 30 | } 31 | } else { 32 | while !s.is_char_boundary(offset) { 33 | offset -= 1; 34 | } 35 | } 36 | 37 | offset 38 | } 39 | 40 | /// Checks equality between the chunks yielded by iterating over two 41 | /// [`Chunks`]. 42 | /// 43 | /// This is used in the `PartialEq` implementation between `Rope`s and 44 | /// `RopeSlice`s. It's assumed that if we get this far both chunks yield the 45 | /// same number of bytes. 46 | #[inline] 47 | pub(super) fn chunks_eq_chunks( 48 | mut lhs: Chunks<'_>, 49 | mut rhs: Chunks<'_>, 50 | ) -> bool { 51 | let mut left_chunk = lhs.next().unwrap_or("").as_bytes(); 52 | let mut right_chunk = rhs.next().unwrap_or("").as_bytes(); 53 | 54 | loop { 55 | if left_chunk.len() < right_chunk.len() { 56 | if left_chunk != &right_chunk[..left_chunk.len()] { 57 | return false; 58 | } else { 59 | right_chunk = &right_chunk[left_chunk.len()..]; 60 | left_chunk = &[]; 61 | } 62 | } else if &left_chunk[..right_chunk.len()] != right_chunk { 63 | return false; 64 | } else { 65 | left_chunk = &left_chunk[right_chunk.len()..]; 66 | right_chunk = &[]; 67 | } 68 | 69 | if left_chunk.is_empty() { 70 | match lhs.next() { 71 | Some(chunk) => left_chunk = chunk.as_bytes(), 72 | 73 | // This works because both chunks are assumed to yield the same 74 | // number of bytes, so if one iterator is done then so is the 75 | // other. 76 | _ => return true, 77 | } 78 | } 79 | 80 | if right_chunk.is_empty() { 81 | match rhs.next() { 82 | Some(chunk) => right_chunk = chunk.as_bytes(), 83 | 84 | // Same as above. 85 | _ => return true, 86 | } 87 | } 88 | } 89 | } 90 | 91 | /// Checks equality between the chunks yielded by iterating over a [`Chunks`] 92 | /// and a string slice. 93 | /// 94 | /// This is used in the `PartialEq` implementation between `Rope`/`RopeSlice`s 95 | /// and strings. It's assumed that if we get this far `chunks` and `s` have the 96 | /// same number of bytes. 97 | #[inline] 98 | pub(super) fn chunks_eq_str(chunks: Chunks<'_>, s: &str) -> bool { 99 | let s = s.as_bytes(); 100 | let mut checked = 0; 101 | for chunk in chunks { 102 | if chunk.as_bytes() != &s[checked..(checked + chunk.len())] { 103 | return false; 104 | } 105 | checked += chunk.len(); 106 | } 107 | true 108 | } 109 | 110 | /// Iterates over the string slices yielded by [`Chunks`], writing the debug 111 | /// output of each chunk to a formatter. 112 | #[inline] 113 | pub(super) fn debug_chunks( 114 | chunks: Chunks<'_>, 115 | f: &mut core::fmt::Formatter<'_>, 116 | ) -> core::fmt::Result { 117 | for chunk in chunks { 118 | debug_no_quotes(chunk, f)?; 119 | } 120 | 121 | Ok(()) 122 | } 123 | 124 | /// Writes the `Debug` output of the given string to the formatter without 125 | /// enclosing it in double quotes. 126 | pub(super) fn debug_no_quotes( 127 | s: &str, 128 | f: &mut core::fmt::Formatter<'_>, 129 | ) -> core::fmt::Result { 130 | use core::fmt::Write; 131 | 132 | let mut written = 0; 133 | 134 | for (idx, char) in s.char_indices() { 135 | let escape = char.escape_debug(); 136 | if escape.len() != 1 { 137 | f.write_str(&s[written..idx])?; 138 | for c in escape { 139 | f.write_char(c)?; 140 | } 141 | written = idx + char.len_utf8(); 142 | } 143 | } 144 | 145 | f.write_str(&s[written..]) 146 | } 147 | 148 | /// Returns whether `byte_offset` is a grapheme boundary in the string 149 | /// constructed by concatenating the chunks yielded by `chunks`. 150 | #[cfg(feature = "graphemes")] 151 | #[inline] 152 | pub(super) fn is_grapheme_boundary( 153 | mut chunks: Chunks<'_>, 154 | byte_len: usize, 155 | byte_offset: usize, 156 | ) -> bool { 157 | use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete}; 158 | 159 | debug_assert!(byte_offset <= byte_len); 160 | 161 | if byte_len == 0 { 162 | return true; 163 | } 164 | 165 | let mut cursor = GraphemeCursor::new(0, byte_len, true); 166 | cursor.set_cursor(byte_offset); 167 | 168 | let mut bytes_left = byte_len; 169 | 170 | // Iterate from the back until we reach the chunk containing the given byte 171 | // index. 172 | // 173 | // TODO: we need something like `Rope{Slice}::chunks_{from,up_to}_byte()` 174 | // if we want to make this fast. 175 | let chunk = loop { 176 | let chunk = chunks.next_back().unwrap(); 177 | bytes_left -= chunk.len(); 178 | if bytes_left <= byte_offset { 179 | break chunk; 180 | } 181 | }; 182 | 183 | if !chunk.is_char_boundary(byte_offset - bytes_left) { 184 | return false; 185 | } 186 | 187 | let chunk_start = bytes_left; 188 | 189 | loop { 190 | match cursor.is_boundary(chunk, chunk_start) { 191 | Ok(is_boundary) => return is_boundary, 192 | 193 | Err(GraphemeIncomplete::PreContext(offset)) => { 194 | debug_assert_eq!(offset, bytes_left); 195 | let prev = chunks.next_back().unwrap(); 196 | bytes_left -= prev.len(); 197 | cursor.provide_context(prev, bytes_left); 198 | }, 199 | 200 | _ => unreachable!(), 201 | } 202 | } 203 | } 204 | 205 | #[inline] 206 | pub(super) fn split_adjusted( 207 | s: &str, 208 | candidate: usize, 209 | ) -> (&str, &str) { 210 | let split_point = adjust_split_point::(s, candidate); 211 | (&s[..split_point], &s[split_point..]) 212 | } 213 | 214 | pub mod panic_messages { 215 | #[track_caller] 216 | #[cold] 217 | #[inline(never)] 218 | pub(crate) fn byte_index_out_of_bounds( 219 | byte_index: usize, 220 | byte_len: usize, 221 | ) -> ! { 222 | debug_assert!(byte_index >= byte_len); 223 | 224 | panic!( 225 | "byte index out of bounds: the index is {byte_index} but the \ 226 | length is {byte_len}" 227 | ); 228 | } 229 | 230 | #[track_caller] 231 | #[cold] 232 | #[inline(never)] 233 | pub(crate) fn byte_offset_not_char_boundary( 234 | s: &str, 235 | byte_offset: usize, 236 | ) -> ! { 237 | debug_assert!(byte_offset <= s.len()); 238 | debug_assert!(!s.is_char_boundary(byte_offset)); 239 | 240 | // TODO: use `floor_char_boundary()` and `ceil_char_boundary()` 241 | // once they get stabilized. 242 | 243 | let mut start = byte_offset; 244 | while !s.is_char_boundary(start) { 245 | start -= 1; 246 | } 247 | 248 | let mut end = byte_offset; 249 | while !s.is_char_boundary(end) { 250 | end += 1; 251 | } 252 | 253 | let splitting_char = s[start..end].chars().next().unwrap(); 254 | 255 | panic!( 256 | "byte offset {byte_offset} is not a char boundary: it is inside \ 257 | {splitting_char:?} (bytes {start}..{end}) of {s:?}" 258 | ); 259 | } 260 | 261 | #[track_caller] 262 | #[cold] 263 | #[inline(never)] 264 | pub(crate) fn byte_offset_out_of_bounds( 265 | byte_offset: usize, 266 | byte_len: usize, 267 | ) -> ! { 268 | debug_assert!(byte_offset > byte_len); 269 | 270 | panic!( 271 | "byte offset out of bounds: the offset is {byte_offset} but the \ 272 | length is {byte_len}" 273 | ); 274 | } 275 | 276 | #[track_caller] 277 | #[cold] 278 | #[inline(never)] 279 | pub(crate) fn byte_start_after_end( 280 | byte_start: usize, 281 | byte_end: usize, 282 | ) -> ! { 283 | debug_assert!(byte_start > byte_end); 284 | 285 | panic!( 286 | "byte start after end: the start is {byte_start} but the end is \ 287 | {byte_end}" 288 | ); 289 | } 290 | 291 | #[track_caller] 292 | #[cold] 293 | #[inline(never)] 294 | pub(crate) fn line_index_out_of_bounds( 295 | line_index: usize, 296 | line_len: usize, 297 | ) -> ! { 298 | debug_assert!(line_index >= line_len); 299 | 300 | panic!( 301 | "line index out of bounds: the index is {line_index} but the \ 302 | length is {line_len}" 303 | ); 304 | } 305 | 306 | #[track_caller] 307 | #[cold] 308 | #[inline(never)] 309 | pub(crate) fn line_offset_out_of_bounds( 310 | line_offset: usize, 311 | line_len: usize, 312 | ) -> ! { 313 | debug_assert!(line_offset > line_len); 314 | 315 | panic!( 316 | "line offset out of bounds: the offset is {line_offset} but the \ 317 | length is {line_len}" 318 | ); 319 | } 320 | 321 | #[track_caller] 322 | #[cold] 323 | #[inline(never)] 324 | pub(crate) fn line_start_after_end( 325 | line_start: usize, 326 | line_end: usize, 327 | ) -> ! { 328 | debug_assert!(line_start > line_end); 329 | 330 | panic!( 331 | "line start after end: the start is {line_start} but the end is \ 332 | {line_end}" 333 | ); 334 | } 335 | 336 | #[cfg(feature = "utf16-metric")] 337 | #[track_caller] 338 | #[cold] 339 | #[inline(never)] 340 | pub(crate) fn utf16_offset_out_of_bounds( 341 | utf16_offset: usize, 342 | utf16_len: usize, 343 | ) -> ! { 344 | debug_assert!(utf16_offset > utf16_len); 345 | 346 | panic!( 347 | "UTF-16 offset out of bounds: the offset is {utf16_offset} but \ 348 | the length is {utf16_len}" 349 | ); 350 | } 351 | 352 | #[cfg(feature = "utf16-metric")] 353 | #[track_caller] 354 | #[cold] 355 | #[inline(never)] 356 | pub(crate) fn utf16_start_after_end( 357 | utf16_start: usize, 358 | utf16_end: usize, 359 | ) -> ! { 360 | debug_assert!(utf16_start > utf16_end); 361 | 362 | panic!( 363 | "UTF-16 offset start after end: the start is {utf16_start} but \ 364 | the end is {utf16_end}" 365 | ); 366 | } 367 | } 368 | -------------------------------------------------------------------------------- /src/tree/mod.rs: -------------------------------------------------------------------------------- 1 | mod leaves; 2 | mod node; 3 | mod node_internal; 4 | mod node_leaf; 5 | mod tiny_arc; 6 | mod traits; 7 | mod tree; 8 | mod tree_builder; 9 | mod tree_slice; 10 | mod units; 11 | 12 | use iter_chain::ExactChain; 13 | pub use leaves::Leaves; 14 | use node::Node; 15 | use node_internal::Inode; 16 | use node_leaf::Lnode; 17 | use tiny_arc::Arc; 18 | pub use traits::*; 19 | pub use tree::Tree; 20 | pub use tree_builder::TreeBuilder; 21 | pub use tree_slice::TreeSlice; 22 | pub use units::Units; 23 | 24 | mod iter_chain { 25 | //! This module contains a `Chain` iterator similar to 26 | //! [`core::iter::Chain`] except it implements `ExactSizeIterator` when 27 | //! the iterators being chained are both `ExactSizeIterator`. 28 | //! 29 | //! See [1] or [2] for why this is needed. 30 | //! 31 | //! [1]: https://github.com/rust-lang/rust/issues/34433 32 | //! [2]: https://github.com/rust-lang/rust/pull/66531 33 | 34 | pub(crate) struct Chain { 35 | chain: core::iter::Chain, 36 | yielded: usize, 37 | total: usize, 38 | } 39 | 40 | pub(crate) trait ExactChain: 41 | ExactSizeIterator 42 | { 43 | fn exact_chain(self, other: U) -> Chain 44 | where 45 | Self: Sized, 46 | U: IntoIterator, 47 | U::IntoIter: ExactSizeIterator; 48 | } 49 | 50 | impl ExactChain for T 51 | where 52 | T: ExactSizeIterator, 53 | { 54 | #[inline] 55 | fn exact_chain(self, other: U) -> Chain 56 | where 57 | Self: Sized, 58 | U: IntoIterator, 59 | U::IntoIter: ExactSizeIterator, 60 | { 61 | let other = other.into_iter(); 62 | Chain { 63 | yielded: 0, 64 | total: self.len() + other.len(), 65 | chain: self.chain(other), 66 | } 67 | } 68 | } 69 | 70 | impl Iterator for Chain 71 | where 72 | I1: ExactSizeIterator, 73 | I2: ExactSizeIterator, 74 | { 75 | type Item = T; 76 | 77 | #[inline] 78 | fn next(&mut self) -> Option { 79 | let item = self.chain.next()?; 80 | self.yielded += 1; 81 | Some(item) 82 | } 83 | 84 | #[inline] 85 | fn size_hint(&self) -> (usize, Option) { 86 | let exact = self.len(); 87 | (exact, Some(exact)) 88 | } 89 | } 90 | 91 | impl ExactSizeIterator for Chain 92 | where 93 | I1: ExactSizeIterator, 94 | I2: ExactSizeIterator, 95 | { 96 | #[inline] 97 | fn len(&self) -> usize { 98 | self.total - self.yielded 99 | } 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/tree/node.rs: -------------------------------------------------------------------------------- 1 | use super::traits::{BalancedLeaf, Leaf, Metric, SlicingMetric}; 2 | use super::{Arc, Inode, Lnode}; 3 | 4 | #[derive(Clone)] 5 | pub(super) enum Node { 6 | Internal(Inode), 7 | Leaf(Lnode), 8 | } 9 | 10 | impl Default for Node { 11 | #[inline] 12 | fn default() -> Self { 13 | Node::Leaf(Lnode::default()) 14 | } 15 | } 16 | 17 | impl core::fmt::Debug for Node { 18 | #[inline] 19 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { 20 | if !f.alternate() { 21 | match self { 22 | Self::Internal(inode) => { 23 | f.debug_tuple("Internal").field(&inode).finish() 24 | }, 25 | Self::Leaf(leaf) => { 26 | f.debug_tuple("Leaf").field(&leaf).finish() 27 | }, 28 | } 29 | } else { 30 | match self { 31 | Self::Internal(inode) => write!(f, "{inode:#?}"), 32 | Self::Leaf(leaf) => write!(f, "{leaf:#?}"), 33 | } 34 | } 35 | } 36 | } 37 | 38 | impl Node { 39 | /// Asserts the invariants of this node, then if it's an inode it calls 40 | /// itself recursively on all of its children. 41 | pub(super) fn assert_invariants(&self) { 42 | match self { 43 | Node::Internal(inode) => { 44 | inode.assert_invariants(); 45 | 46 | for child in inode.children() { 47 | child.assert_invariants() 48 | } 49 | }, 50 | 51 | Node::Leaf(leaf) => { 52 | leaf.assert_invariants(); 53 | }, 54 | } 55 | } 56 | 57 | /// # Panics 58 | /// 59 | /// Panics if `other` is at a different depth. 60 | #[inline] 61 | pub(super) fn balance(&mut self, other: &mut Self) 62 | where 63 | L: BalancedLeaf, 64 | { 65 | debug_assert_eq!(self.depth(), other.depth()); 66 | 67 | match (self, other) { 68 | (Node::Internal(left), Node::Internal(right)) => { 69 | left.balance(right) 70 | }, 71 | 72 | (Node::Leaf(left), Node::Leaf(right)) => left.balance(right), 73 | 74 | _ => unreachable!(), 75 | } 76 | } 77 | 78 | #[inline] 79 | pub(super) fn base_measure(&self) -> L::BaseMetric { 80 | self.measure::() 81 | } 82 | 83 | #[track_caller] 84 | #[inline] 85 | pub(super) fn convert_measure(&self, up_to: M1) -> M2 86 | where 87 | M1: SlicingMetric, 88 | M2: Metric, 89 | { 90 | debug_assert!(up_to <= self.measure::()); 91 | 92 | let mut m1 = M1::zero(); 93 | let mut m2 = M2::zero(); 94 | 95 | let mut node = self; 96 | 97 | 'outer: loop { 98 | match node { 99 | Node::Internal(inode) => { 100 | for child in inode.children() { 101 | let child_m1 = child.measure::(); 102 | 103 | if m1 + child_m1 >= up_to { 104 | node = &**child; 105 | continue 'outer; 106 | } else { 107 | m1 += child_m1; 108 | m2 += child.measure::(); 109 | } 110 | } 111 | 112 | unreachable!(); 113 | }, 114 | 115 | Node::Leaf(leaf) => { 116 | let (_, left_summary) = M1::slice_up_to( 117 | leaf.as_slice(), 118 | up_to - m1, 119 | leaf.summary(), 120 | ); 121 | 122 | return m2 + M2::measure(&left_summary); 123 | }, 124 | } 125 | } 126 | } 127 | 128 | #[inline] 129 | pub(super) fn depth(&self) -> usize { 130 | match self { 131 | Node::Internal(inode) => inode.depth(), 132 | Node::Leaf(_) => 0, 133 | } 134 | } 135 | 136 | #[inline] 137 | pub(super) fn get_leaf(&self) -> &Lnode { 138 | match self { 139 | Node::Internal(_) => panic!(""), 140 | Node::Leaf(leaf) => leaf, 141 | } 142 | } 143 | 144 | #[inline] 145 | pub(super) fn get_leaf_mut(&mut self) -> &mut Lnode { 146 | match self { 147 | Node::Internal(_) => panic!(""), 148 | Node::Leaf(leaf) => leaf, 149 | } 150 | } 151 | 152 | #[inline] 153 | pub(super) fn get_internal(&self) -> &Inode { 154 | match self { 155 | Node::Internal(inode) => inode, 156 | Node::Leaf(_) => panic!(""), 157 | } 158 | } 159 | 160 | #[inline] 161 | pub(super) fn get_internal_mut(&mut self) -> &mut Inode { 162 | match self { 163 | Node::Internal(inode) => inode, 164 | Node::Leaf(_) => panic!(""), 165 | } 166 | } 167 | 168 | #[inline] 169 | pub(super) fn is_empty(&self) -> bool { 170 | match self { 171 | Node::Internal(inode) => inode.is_empty(), 172 | Node::Leaf(leaf) => leaf.is_empty(), 173 | } 174 | } 175 | 176 | #[allow(dead_code)] 177 | #[inline] 178 | pub(super) fn is_internal(&self) -> bool { 179 | matches!(self, Node::Internal(_)) 180 | } 181 | 182 | #[inline] 183 | pub(super) fn is_leaf(&self) -> bool { 184 | matches!(self, Node::Leaf(_)) 185 | } 186 | 187 | #[inline] 188 | pub(super) fn is_underfilled(&self) -> bool 189 | where 190 | L: BalancedLeaf, 191 | { 192 | match self { 193 | Node::Internal(inode) => inode.is_underfilled(), 194 | Node::Leaf(leaf) => leaf.is_underfilled(), 195 | } 196 | } 197 | 198 | #[inline] 199 | pub(super) fn leaf_at_measure(&self, measure: M) -> (L::Slice<'_>, M) 200 | where 201 | M: Metric, 202 | { 203 | debug_assert!(measure <= self.measure::()); 204 | 205 | let mut measured = M::zero(); 206 | 207 | let mut node = self; 208 | 209 | loop { 210 | match node { 211 | Node::Internal(inode) => { 212 | let (child_idx, offset) = 213 | inode.child_at_measure(measure - measured); 214 | 215 | measured += offset; 216 | 217 | node = inode.child(child_idx); 218 | }, 219 | 220 | Node::Leaf(leaf) => { 221 | return (leaf.as_slice(), measured); 222 | }, 223 | } 224 | } 225 | } 226 | 227 | #[inline] 228 | pub(super) fn leaf_count(&self) -> usize { 229 | match self { 230 | Node::Internal(inode) => inode.leaf_count(), 231 | Node::Leaf(_) => 1, 232 | } 233 | } 234 | 235 | #[inline] 236 | pub(super) fn measure(&self) -> M 237 | where 238 | M: Metric, 239 | { 240 | match self { 241 | Node::Internal(inode) => inode.measure(), 242 | Node::Leaf(leaf) => leaf.measure(), 243 | } 244 | } 245 | 246 | /// Continuously replaces the node its child qs long as it's an internal 247 | /// node with a single child. Note that an inode might become a leaf node 248 | /// after calling this. 249 | /// 250 | /// # Panics 251 | /// 252 | /// Panics if the `Arc` enclosing the root has a strong counter > 1. 253 | #[inline] 254 | pub(super) fn replace_with_single_child(node: &mut Arc) { 255 | while let Self::Internal(inode) = Arc::get_mut(node).unwrap() { 256 | if inode.len() == 1 { 257 | *node = Arc::clone(inode.first()); 258 | } else { 259 | break; 260 | } 261 | } 262 | } 263 | 264 | #[inline] 265 | pub(super) fn summary(&self) -> &L::Summary { 266 | match self { 267 | Node::Internal(inode) => inode.summary(), 268 | Node::Leaf(leaf) => leaf.summary(), 269 | } 270 | } 271 | } 272 | -------------------------------------------------------------------------------- /src/tree/node_leaf.rs: -------------------------------------------------------------------------------- 1 | use core::ops::RangeBounds; 2 | 3 | use super::traits::{BalancedLeaf, Leaf, Metric, ReplaceableLeaf}; 4 | 5 | #[derive(Clone, Default)] 6 | pub(super) struct Lnode { 7 | value: L, 8 | summary: L::Summary, 9 | } 10 | 11 | impl core::fmt::Debug for Lnode { 12 | #[inline] 13 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { 14 | if !f.alternate() { 15 | f.debug_struct("Lnode") 16 | .field("value", &self.value) 17 | .field("summary", &self.summary) 18 | .finish() 19 | } else { 20 | write!(f, "{:?} — {:?}", self.value, self.summary) 21 | } 22 | } 23 | } 24 | 25 | impl From for Lnode { 26 | #[inline] 27 | fn from(value: L) -> Self { 28 | Self { summary: value.summarize(), value } 29 | } 30 | } 31 | 32 | impl From<(L, L::Summary)> for Lnode { 33 | #[inline] 34 | fn from((value, summary): (L, L::Summary)) -> Self { 35 | Self { value, summary } 36 | } 37 | } 38 | 39 | impl Lnode { 40 | pub(super) fn assert_invariants(&self) { 41 | assert_eq!(self.summary, self.value.summarize()); 42 | } 43 | 44 | #[inline] 45 | pub(super) fn as_slice(&self) -> L::Slice<'_> { 46 | self.value.as_slice() 47 | } 48 | 49 | #[inline] 50 | pub(super) fn balance(&mut self, other: &mut Self) 51 | where 52 | L: BalancedLeaf, 53 | { 54 | L::balance_leaves( 55 | (&mut self.value, &mut self.summary), 56 | (&mut other.value, &mut other.summary), 57 | ) 58 | } 59 | 60 | #[inline] 61 | pub(super) fn base_measure(&self) -> L::BaseMetric { 62 | self.measure::() 63 | } 64 | 65 | #[inline] 66 | pub(super) fn is_underfilled(&self) -> bool 67 | where 68 | L: BalancedLeaf, 69 | { 70 | self.value.is_underfilled(self.summary()) 71 | } 72 | 73 | #[inline] 74 | pub(super) fn is_empty(&self) -> bool { 75 | self.base_measure() == L::BaseMetric::zero() 76 | } 77 | 78 | #[inline] 79 | pub(super) fn measure>(&self) -> M { 80 | M::measure(self.summary()) 81 | } 82 | 83 | #[inline] 84 | pub(super) fn new(value: L, summary: L::Summary) -> Self { 85 | Self { value, summary } 86 | } 87 | 88 | #[inline] 89 | pub(super) fn remove_up_to(&mut self, up_to: M) 90 | where 91 | M: Metric, 92 | L: ReplaceableLeaf, 93 | { 94 | self.value.remove_up_to(&mut self.summary, up_to); 95 | } 96 | 97 | #[track_caller] 98 | #[inline] 99 | pub(super) fn replace( 100 | &mut self, 101 | range: R, 102 | replace_with: L::Replacement<'_>, 103 | ) -> Option + use> 104 | where 105 | M: Metric, 106 | R: RangeBounds, 107 | L: ReplaceableLeaf, 108 | { 109 | self.value 110 | .replace(&mut self.summary, range, replace_with) 111 | .map(|extra_leaves| extra_leaves.map(Self::from)) 112 | } 113 | 114 | #[inline] 115 | pub(super) fn summary(&self) -> &L::Summary { 116 | &self.summary 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/tree/tiny_arc.rs: -------------------------------------------------------------------------------- 1 | //! This module contains an implementation of a tiny `Arc` without weak 2 | //! references, inspired by the `Arc` implementation in [rclite] (of course, 3 | //! all bugs are mine). 4 | //! 5 | //! [rclite]: https://github.com/fereidani/rclite 6 | 7 | use alloc::boxed::Box; 8 | use core::mem::MaybeUninit; 9 | use core::ptr::{NonNull, addr_of_mut}; 10 | use core::sync::atomic; 11 | 12 | /// A tiny `Arc` without weak references. 13 | pub(super) struct Arc { 14 | ptr: NonNull>, 15 | } 16 | 17 | unsafe impl Send for Arc {} 18 | unsafe impl Sync for Arc {} 19 | 20 | struct ArcInner { 21 | counter: atomic::AtomicUsize, 22 | data: T, 23 | } 24 | 25 | unsafe impl Send for ArcInner {} 26 | unsafe impl Sync for ArcInner {} 27 | 28 | impl Arc { 29 | #[inline] 30 | pub(super) fn get_mut(this: &mut Self) -> Option<&mut T> { 31 | if this.is_unique() { 32 | // SAFETY: this is the only `Arc` pointing to the inner value, so 33 | // we can safely mutate it. 34 | unsafe { Some(Self::get_mut_unchecked(this)) } 35 | } else { 36 | None 37 | } 38 | } 39 | 40 | #[inline] 41 | pub(super) unsafe fn get_mut_unchecked(this: &mut Self) -> &mut T { 42 | unsafe { &mut this.ptr.as_mut().data } 43 | } 44 | 45 | #[inline] 46 | fn inner(&self) -> &ArcInner { 47 | // SAFETY: the inner pointer is valid as long as there's at least one 48 | // `Arc` pointing to it. 49 | unsafe { self.ptr.as_ref() } 50 | } 51 | 52 | #[inline] 53 | fn is_unique(&self) -> bool { 54 | self.inner().counter.load(atomic::Ordering::Relaxed) == 1 55 | } 56 | 57 | #[inline] 58 | pub(super) fn new(data: T) -> Self { 59 | let inner = ArcInner { counter: atomic::AtomicUsize::new(1), data }; 60 | 61 | // SAFETY: the pointer returned by `Box::into_raw()` is guaranteed to 62 | // be non-null. 63 | let ptr = 64 | unsafe { NonNull::new_unchecked(Box::into_raw(Box::new(inner))) }; 65 | 66 | Self { ptr } 67 | } 68 | 69 | #[inline] 70 | pub(super) fn ptr_eq(this: &Self, other: &Self) -> bool { 71 | this.ptr == other.ptr 72 | } 73 | } 74 | 75 | impl Arc { 76 | #[inline] 77 | pub(super) fn make_mut(this: &mut Self) -> &mut T { 78 | if !this.is_unique() { 79 | *this = this.optimized_clone(); 80 | } 81 | 82 | // SAFETY: either the reference was unique or we just cloned the T. 83 | unsafe { Self::get_mut_unchecked(this) } 84 | } 85 | 86 | #[inline] 87 | fn optimized_clone(&self) -> Self { 88 | // See the homonymous function in `rclite` for more details. 89 | 90 | let mut buffer: Box>> = 91 | Box::new(MaybeUninit::uninit()); 92 | 93 | let ptr = unsafe { 94 | let ptr = buffer.as_mut_ptr(); 95 | // Here we use `write()` instead of assignment via `=` to avoid 96 | // dropping the old, uninitialized value. 97 | addr_of_mut!((*ptr).data).write(T::clone(self)); 98 | (*ptr).counter = atomic::AtomicUsize::new(1); 99 | NonNull::new_unchecked(Box::into_raw(buffer) as *mut ArcInner) 100 | }; 101 | 102 | Arc { ptr } 103 | } 104 | } 105 | 106 | impl core::fmt::Debug for Arc { 107 | #[inline] 108 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { 109 | core::fmt::Debug::fmt(&**self, f) 110 | } 111 | } 112 | 113 | impl Default for Arc { 114 | #[inline] 115 | fn default() -> Self { 116 | Self::new(T::default()) 117 | } 118 | } 119 | 120 | impl Clone for Arc { 121 | #[inline] 122 | fn clone(&self) -> Self { 123 | let old = self.inner().counter.fetch_add(1, atomic::Ordering::Relaxed); 124 | 125 | // Check for overflow on the counter. See the `Arc` implementation in 126 | // `alloc` for more details. 127 | if unlikely(old > isize::MAX as usize) { 128 | drop(Self { ptr: self.ptr }); 129 | panic!("Arc counter overflow"); 130 | } 131 | 132 | Self { ptr: self.ptr } 133 | } 134 | } 135 | 136 | impl core::ops::Deref for Arc { 137 | type Target = T; 138 | 139 | #[inline] 140 | fn deref(&self) -> &Self::Target { 141 | &self.inner().data 142 | } 143 | } 144 | 145 | impl Drop for Arc { 146 | #[inline] 147 | fn drop(&mut self) { 148 | let old = self.inner().counter.fetch_sub(1, atomic::Ordering::Release); 149 | 150 | if old == 1 { 151 | atomic::fence(atomic::Ordering::Acquire); 152 | 153 | // SAFETY: this is the last owner of the `Arc` so the memory has 154 | // not yet been reclaimed by a previous call to `Box::from_raw()`. 155 | let _ = unsafe { Box::from_raw(self.ptr.as_ptr()) }; 156 | } 157 | } 158 | } 159 | 160 | use predictions::*; 161 | 162 | mod predictions { 163 | //! Hints for branch prediction. 164 | 165 | #[inline(always)] 166 | pub(super) fn unlikely(b: bool) -> bool { 167 | if b { 168 | cold(); 169 | } 170 | b 171 | } 172 | 173 | #[inline(always)] 174 | #[cold] 175 | fn cold() {} 176 | } 177 | -------------------------------------------------------------------------------- /src/tree/traits.rs: -------------------------------------------------------------------------------- 1 | use core::fmt::Debug; 2 | use core::ops::{Add, AddAssign, RangeBounds, Sub, SubAssign}; 3 | 4 | pub trait Summarize: Debug { 5 | type Summary: Debug 6 | + Default 7 | + Clone 8 | + for<'a> Add<&'a Self::Summary, Output = Self::Summary> 9 | + for<'a> Sub<&'a Self::Summary, Output = Self::Summary> 10 | + for<'a> AddAssign<&'a Self::Summary> 11 | + for<'a> SubAssign<&'a Self::Summary> 12 | + PartialEq; 13 | 14 | fn summarize(&self) -> Self::Summary; 15 | } 16 | 17 | pub trait BaseMeasured: Summarize { 18 | type BaseMetric: Metric; 19 | } 20 | 21 | pub trait AsSlice: Summarize { 22 | type Slice<'a>: Copy + Summarize 23 | where 24 | Self: 'a; 25 | 26 | fn as_slice(&self) -> Self::Slice<'_>; 27 | } 28 | 29 | pub trait Leaf: Summarize + BaseMeasured + AsSlice {} 30 | 31 | impl Leaf for T {} 32 | 33 | pub trait BalancedLeaf: Leaf + for<'a> From> { 34 | /// Returns whether the leaf node is too small to be on its own and should 35 | /// be rebalanced with another leaf. 36 | fn is_underfilled(&self, summary: &Self::Summary) -> bool; 37 | 38 | /// Balance two leaves. 39 | /// 40 | /// The `right` leaf can be left empty if the two leaves can be combined 41 | /// into a single one. 42 | fn balance_leaves( 43 | left: (&mut Self, &mut Self::Summary), 44 | right: (&mut Self, &mut Self::Summary), 45 | ); 46 | } 47 | 48 | pub trait ReplaceableLeaf>: BalancedLeaf { 49 | type Replacement<'a>; 50 | 51 | type ExtraLeaves: ExactSizeIterator; 52 | 53 | /// Replace the contents of the leaf in the range with the given 54 | /// replacement. 55 | /// 56 | /// If that would cause the leaf to be too big the function can return an 57 | /// iterator over the leaves to insert right after this leaf. Note that in 58 | /// this case both this leaf and all the leaves yielded by the iterator are 59 | /// assumed to not be underfilled. 60 | fn replace( 61 | &mut self, 62 | summary: &mut Self::Summary, 63 | range: R, 64 | replace_with: Self::Replacement<'_>, 65 | ) -> Option 66 | where 67 | R: RangeBounds; 68 | 69 | fn remove_up_to(&mut self, summary: &mut Self::Summary, up_to: M); 70 | } 71 | 72 | pub trait Metric: 73 | Debug 74 | + Copy 75 | + Ord 76 | + Add 77 | + Sub 78 | + AddAssign 79 | + SubAssign 80 | { 81 | /// The identity element of this metric with respect to addition. 82 | /// 83 | /// Given an implementor `M` of this trait, for all instances `m` of `M` 84 | /// it should hold `m == m + M::zero()`. 85 | fn zero() -> Self; 86 | 87 | /// The smallest value larger than [`zero`](Self::zero()) this metric can 88 | /// measure. 89 | fn one() -> Self; 90 | 91 | /// Returns the measure of the summary according to this metric. 92 | fn measure(summary: &Summary) -> Self; 93 | } 94 | 95 | /// Metrics that can be used to slice `Tree`s and `TreeSlice`s. 96 | pub trait SlicingMetric: Metric { 97 | fn slice_up_to<'a>( 98 | slice: L::Slice<'a>, 99 | up_to: Self, 100 | summary: &L::Summary, 101 | ) -> (L::Slice<'a>, L::Summary); 102 | 103 | fn slice_from<'a>( 104 | slice: L::Slice<'a>, 105 | from: Self, 106 | summary: &L::Summary, 107 | ) -> (L::Slice<'a>, L::Summary); 108 | } 109 | 110 | /// Allows iterating forward over the units of this metric. 111 | pub trait UnitMetric: Metric { 112 | /// Returns a 113 | /// `(first_slice, first_summary, advance, rest_slice, rest_summary)` 114 | /// tuple, where `advance` is equal to `first_summary` **plus** the summary 115 | /// of any content between the end of `first_slice` and the start of 116 | /// `rest_slice` that's not included in neither of them. 117 | /// 118 | /// It follows that if `slice == first_slice ++ rest_slice` (where `++` 119 | /// denotes concatenation) the `first_summary` and the `advance` should be 120 | /// equal. 121 | /// 122 | /// In any case it must always hold `summary == advance + rest_summary`. 123 | #[allow(clippy::type_complexity)] 124 | fn first_unit<'a>( 125 | slice: L::Slice<'a>, 126 | summary: &L::Summary, 127 | ) -> (L::Slice<'a>, L::Summary, L::Summary, L::Slice<'a>, L::Summary); 128 | } 129 | 130 | /// Allows iterating backward over the units of this metric. 131 | pub trait DoubleEndedUnitMetric: UnitMetric { 132 | /// Returns a 133 | /// `(rest_slice, rest_summary, last_slice, last_summary, advance)` 134 | /// tuple, where `advance` is equal to `last_summary` **plus** the summary 135 | /// of any content between the end of `last_slice` and the end of the 136 | /// original `slice`. 137 | /// 138 | /// It follows that if `slice == rest_slice ++ last_slice` (where `++` 139 | /// denotes concatenation) the `last_summary` and the `advance` should be 140 | /// equal. 141 | /// 142 | /// In any case it must always hold `summary == rest_summary + advance`. 143 | #[allow(clippy::type_complexity)] 144 | fn last_unit<'a>( 145 | slice: L::Slice<'a>, 146 | summary: &L::Summary, 147 | ) -> (L::Slice<'a>, L::Summary, L::Slice<'a>, L::Summary, L::Summary); 148 | 149 | /// It's possible for a leaf slice to contain some content that extends 150 | /// past the end of its last `M`-unit. This is referred to as "the 151 | /// remainder of the leaf divided by `M`". 152 | /// 153 | /// Returns a `(rest_slice, rest_summary, remainder, remainder_summary)` 154 | /// tuple. Note that unlike [`last_unit`](Self::last_unit()), this function 155 | /// does not allow an `advance` to be returned. Instead `rest_slice` and 156 | /// `remainder` should always concatenate up the original `slice` and their 157 | /// summaries should sum up to the original `summary`. 158 | /// 159 | /// The remainder can be empty if the last `M`-unit coincides with the end 160 | /// of the leaf slice. 161 | #[allow(clippy::type_complexity)] 162 | fn remainder<'a>( 163 | slice: L::Slice<'a>, 164 | summary: &L::Summary, 165 | ) -> (L::Slice<'a>, L::Summary, L::Slice<'a>, L::Summary); 166 | } 167 | -------------------------------------------------------------------------------- /src/tree/tree_builder.rs: -------------------------------------------------------------------------------- 1 | use alloc::vec::Vec; 2 | 3 | use super::traits::{BalancedLeaf, Leaf}; 4 | use super::{Arc, Inode, Lnode, Node, Tree}; 5 | 6 | /// An incremental [`Tree`] builder. 7 | #[derive(Clone)] 8 | pub struct TreeBuilder { 9 | /// A stack of internal nodes. 10 | /// 11 | /// # Invariants 12 | /// 13 | /// - all the nodes at every stack level are internal nodes; 14 | /// 15 | /// - all the inodes within a stack level have the same depth; 16 | /// 17 | /// - all the vectors at every stack level have a length strictly less than 18 | /// `ARITY` (but it could also be zero, i.e. all levels except the first 19 | /// one can be empty); 20 | /// 21 | /// - the inodes are grouped in order of descending depth, with each stack 22 | /// level containing inodes of depth one less than the previous level; 23 | /// 24 | /// - every inode at every stack level is completely full, i.e. for every 25 | /// inode it holds `inode.leaf_count() == max_children ^ inode.depth()`; 26 | /// 27 | /// - all the inodes in the last stack level (assuming there are any) have 28 | /// a depth of 1. 29 | stack: Vec>>>, 30 | 31 | /// A bunch of leaves waiting to be grouped into an internal node. 32 | leaves: Vec>>, 33 | } 34 | 35 | impl Default for TreeBuilder { 36 | #[inline] 37 | fn default() -> Self { 38 | Self { stack: Vec::new(), leaves: Vec::with_capacity(ARITY) } 39 | } 40 | } 41 | 42 | impl TreeBuilder { 43 | #[inline] 44 | pub fn append(&mut self, leaf: L) { 45 | debug_assert!(self.leaves.len() < ARITY); 46 | 47 | self.leaves.push(Arc::new(Node::Leaf(Lnode::from(leaf)))); 48 | 49 | if self.leaves.len() < ARITY { 50 | return; 51 | } 52 | 53 | let mut inode = Arc::new(Node::Internal(Inode::from_children( 54 | self.leaves.drain(..), 55 | ))); 56 | 57 | let mut stack_idx = match self.stack.len() { 58 | 0 => { 59 | let mut first_level = Vec::with_capacity(ARITY); 60 | first_level.push(inode); 61 | self.stack.push(first_level); 62 | return; 63 | }, 64 | 65 | n => n - 1, 66 | }; 67 | 68 | loop { 69 | let stack_level = &mut self.stack[stack_idx]; 70 | 71 | debug_assert!( 72 | stack_level.is_empty() 73 | || stack_level[0].depth() == inode.depth() 74 | ); 75 | 76 | debug_assert!(stack_level.len() < ARITY); 77 | 78 | stack_level.push(inode); 79 | 80 | if stack_level.len() < ARITY { 81 | return; 82 | } 83 | 84 | inode = Arc::new(Node::Internal(Inode::from_children( 85 | stack_level.drain(..), 86 | ))); 87 | 88 | if stack_idx == 0 { 89 | stack_level.push(inode); 90 | self.stack.push(Vec::with_capacity(ARITY)); 91 | 92 | #[cfg(debug_assertions)] 93 | for level in &self.stack[1..] { 94 | debug_assert!(level.is_empty()); 95 | } 96 | 97 | return; 98 | } else { 99 | stack_idx -= 1; 100 | } 101 | } 102 | } 103 | 104 | /// Completes the build and outputs the final `Tree`, consuming `self`. 105 | #[inline] 106 | pub fn build(mut self) -> Tree 107 | where 108 | L: Default + BalancedLeaf + Clone, 109 | { 110 | if self.stack.is_empty() { 111 | if self.leaves.is_empty() { 112 | // No internal nodes on the stack and no leaves, this means 113 | // that `append` has never been called and we're building an 114 | // empty Tree. This is why we need the `Default` bound on `L`. 115 | return Tree::default(); 116 | } else if self.leaves.len() == 1 { 117 | return Tree { root: self.leaves.into_iter().next().unwrap() }; 118 | } 119 | } 120 | 121 | let mut root = if !self.leaves.is_empty() { 122 | debug_assert!(self.leaves.len() < ARITY); 123 | Arc::new(Node::Internal(Inode::from_children(self.leaves))) 124 | } else { 125 | loop { 126 | let stack_level = self.stack.pop().unwrap(); 127 | 128 | match stack_level.len() { 129 | 0 => continue, 130 | 131 | 1 if self.stack.is_empty() => { 132 | // The stack is now empty and there was a single node 133 | // in its first level. That node is the root. 134 | break stack_level.into_iter().next().unwrap(); 135 | }, 136 | 137 | _ => { 138 | break Arc::new(Node::Internal(Inode::from_children( 139 | stack_level, 140 | ))); 141 | }, 142 | } 143 | } 144 | }; 145 | 146 | while let Some(mut stack_level) = self.stack.pop() { 147 | debug_assert!( 148 | stack_level.is_empty() 149 | || stack_level[0].depth() == root.depth() 150 | ); 151 | 152 | debug_assert!(stack_level.len() < ARITY); 153 | 154 | stack_level.push(root); 155 | 156 | root = Arc::new(Node::Internal(Inode::from_children(stack_level))); 157 | } 158 | 159 | { 160 | // The only way the root can be a leaf node is if the stack is 161 | // empty and `self.leaves` contains a single leaf, and that case 162 | // was handled at the start of this function. 163 | let root = Arc::get_mut(&mut root).unwrap().get_internal_mut(); 164 | 165 | root.balance_right_side(); 166 | } 167 | 168 | Node::replace_with_single_child(&mut root); 169 | 170 | Tree { root } 171 | } 172 | 173 | #[allow(dead_code)] 174 | #[inline] 175 | pub fn new() -> Self { 176 | Self::default() 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /tests/common/mod.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | 3 | pub const TINY: &str = include_str!("tiny.txt"); 4 | pub const SMALL: &str = include_str!("small.txt"); 5 | pub const MEDIUM: &str = include_str!("medium.txt"); 6 | pub const LARGE: &str = include_str!("large.txt"); 7 | 8 | /// A cursed version of a lorem ipsum paragraph taken from [this online 9 | /// tool][1] with mixed line breaks (LF and CRLF). 10 | /// 11 | /// [1]: https://jeff.cis.cabrillo.edu/tools/homoglyphs 12 | pub const CURSED_LIPSUM: &str = "Ḽơᶉëᶆ ȋṕšᶙṁ\nḍỡḽǭᵳ ʂǐť ӓṁệẗ,\r\n \ 13 | ĉṓɲṩḙċťᶒțûɾ \nấɖḯƥĭ\r\nṩčįɳġ ḝłįʈ, șế\r\nᶑ \ 14 | ᶁⱺ ẽḭŭŝḿꝋď\n ṫĕᶆᶈṓɍ ỉñḉīḑȋᵭṵńť \nṷŧ ḹẩḇőꝛế \ 15 | éȶ đꝍꞎ\r\nôꝛȇ ᵯáꞡ\r\nᶇā ąⱡ\nîɋṹẵ."; 16 | 17 | // The following test vectors were taken from Ropey. 18 | 19 | /// 127 bytes, 103 chars, 1 line 20 | pub const TEXT: &str = "Hello there! How're you doing? It's a fine day, \ 21 | isn't it? Aren't you glad we're alive? \ 22 | こんにちは、みんなさん!"; 23 | 24 | /// 124 bytes, 100 chars, 4 lines 25 | pub const TEXT_LINES: &str = "Hello there! How're you doing?\nIt's a fine \ 26 | day, isn't it?\nAren't you glad we're \ 27 | alive?\nこんにちは、みんなさん!"; 28 | 29 | /// 127 bytes, 107 chars, 111 utf16 code units, 1 line 30 | pub const TEXT_EMOJI: &str = "Hello there!🐸 How're you doing?🐸 It's a \ 31 | fine day, isn't it?🐸 Aren't you glad we're \ 32 | alive?🐸 こんにちは、みんなさん!"; 33 | -------------------------------------------------------------------------------- /tests/common/small.txt: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas sit amet tellus 2 | nec turpis feugiat semper. Nam at nulla laoreet, finibus eros sit amet, fringilla 3 | mauris. Fusce vestibulum nec ligula efficitur laoreet. Nunc orci leo, varius eget 4 | ligula vulputate, consequat eleifend nisi. Cras justo purus, imperdiet a augue 5 | malesuada, convallis cursus libero. Fusce pretium arcu in elementum laoreet. Duis 6 | mauris nulla, suscipit at est nec, malesuada pellentesque eros. Quisque semper porta 7 | malesuada. Nunc hendrerit est ac faucibus mollis. Nam fermentum id libero sed 8 | egestas. Duis a accumsan sapien. Nam neque diam, congue non erat et, porta sagittis 9 | turpis. Vivamus vitae mauris sit amet massa mollis molestie. Morbi scelerisque, 10 | augue id congue imperdiet, felis lacus euismod dui, vitae facilisis massa dui quis 11 | sapien. Vivamus hendrerit a urna a lobortis. 12 | 13 | Donec ut suscipit risus. Vivamus dictum auctor vehicula. Sed lacinia ligula sit amet 14 | urna tristique commodo. Sed sapien risus, egestas ac tempus vel, pellentesque sed 15 | velit. Duis pulvinar blandit suscipit. Curabitur viverra dignissim est quis ornare. 16 | Nam et lectus purus. Integer sed augue vehicula, volutpat est vel, convallis justo. 17 | Suspendisse a convallis nibh, pulvinar rutrum nisi. Fusce ultrices accumsan mauris 18 | vitae ornare. Cras elementum et ante at tincidunt. Sed luctus scelerisque lobortis. 19 | Sed vel dictum enim. Fusce quis arcu euismod, iaculis mi id, placerat nulla. 20 | Pellentesque porttitor felis elementum justo porttitor auctor. 21 | -------------------------------------------------------------------------------- /tests/common/tiny.txt: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas sit amet tellus 2 | nec turpis feugiat semper. Nam at nulla laoreet, finibus eros sit amet, fringilla 3 | mauris. Fusce vestibulum nec ligula efficitur laoreet. Nunc orci leo, varius eget 4 | ligula vulputate, consequat eleifend nisi. Cras justo purus, imperdiet a augue 5 | malesuada, convallis cursus libero. Fusce pretium arcu in elementum laoreet. Duis 6 | mauris nulla, suscipit at est nec, malesuada pellentesque eros. Quisque semper porta 7 | malesuada. Nunc hendrerit est ac faucibus mollis. Nam fermentum id libero sed 8 | egestas. Duis a accumsan sapien. Nam neque diam, congue non erat et, porta sagittis 9 | turpis. -------------------------------------------------------------------------------- /tests/graphemes.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "graphemes")] 2 | use std::borrow::Cow; 3 | 4 | #[cfg(feature = "graphemes")] 5 | use crop::Rope; 6 | 7 | // TODO: remove the `#[ignore]`s once 8 | // https://github.com/unicode-rs/unicode-segmentation/issues/115 gets 9 | // resolved. 10 | 11 | #[cfg(feature = "graphemes")] 12 | #[test] 13 | fn iter_graphemes_ascii() { 14 | let r = Rope::from("abcd"); 15 | 16 | let mut graphemes = r.graphemes(); 17 | 18 | assert_eq!(Cow::Borrowed("a"), graphemes.next().unwrap()); 19 | assert_eq!(Cow::Borrowed("b"), graphemes.next().unwrap()); 20 | assert_eq!(Cow::Borrowed("c"), graphemes.next().unwrap()); 21 | assert_eq!(Cow::Borrowed("d"), graphemes.next().unwrap()); 22 | assert_eq!(None, graphemes.next()); 23 | } 24 | 25 | /// ``` 26 | /// Root 27 | /// ├── "🇷" 28 | /// ├── "🇸" 29 | /// ├── "🇮" 30 | /// └── "🇴" 31 | #[ignore] 32 | #[cfg(all(feature = "graphemes", feature = "small_chunks"))] 33 | #[test] 34 | fn iter_graphemes_two_flags() { 35 | let r = Rope::from("🇷🇸🇮🇴"); 36 | 37 | let mut graphemes = r.graphemes(); 38 | 39 | assert_eq!( 40 | Cow::::Owned(String::from("🇷🇸")), 41 | graphemes.next().unwrap() 42 | ); 43 | 44 | assert_eq!( 45 | Cow::::Owned(String::from("🇮🇴")), 46 | graphemes.next().unwrap() 47 | ); 48 | 49 | assert_eq!(None, graphemes.next()); 50 | } 51 | 52 | #[ignore] 53 | #[cfg(feature = "graphemes")] 54 | #[test] 55 | fn graphemes_iter_flags() { 56 | let r = Rope::from("🇬🇧🇯🇵🇺🇸🇫🇷🇷🇺🇨🇳🇩🇪🇪🇸🇬🇧🇯🇵🇺🇸🇫🇷🇷🇺🇨🇳🇩🇪🇪🇸🇬🇧🇯🇵🇺🇸🇫🇷🇷🇺🇨🇳🇩🇪🇪🇸"); 57 | 58 | let mut graphemes = r.graphemes(); 59 | 60 | assert_eq!("🇬🇧", graphemes.next().unwrap()); 61 | assert_eq!("🇯🇵", graphemes.next().unwrap()); 62 | assert_eq!("🇺🇸", graphemes.next().unwrap()); 63 | assert_eq!("🇫🇷", graphemes.next().unwrap()); 64 | assert_eq!("🇷🇺", graphemes.next().unwrap()); 65 | assert_eq!("🇨🇳", graphemes.next().unwrap()); 66 | assert_eq!("🇩🇪", graphemes.next().unwrap()); 67 | assert_eq!("🇪🇸", graphemes.next().unwrap()); 68 | assert_eq!("🇬🇧", graphemes.next().unwrap()); 69 | assert_eq!("🇯🇵", graphemes.next().unwrap()); 70 | assert_eq!("🇺🇸", graphemes.next().unwrap()); 71 | assert_eq!("🇫🇷", graphemes.next().unwrap()); 72 | assert_eq!("🇷🇺", graphemes.next().unwrap()); 73 | assert_eq!("🇨🇳", graphemes.next().unwrap()); 74 | assert_eq!("🇩🇪", graphemes.next().unwrap()); 75 | assert_eq!("🇪🇸", graphemes.next().unwrap()); 76 | assert_eq!("🇬🇧", graphemes.next().unwrap()); 77 | assert_eq!("🇯🇵", graphemes.next().unwrap()); 78 | assert_eq!("🇺🇸", graphemes.next().unwrap()); 79 | assert_eq!("🇫🇷", graphemes.next().unwrap()); 80 | assert_eq!("🇷🇺", graphemes.next().unwrap()); 81 | assert_eq!("🇨🇳", graphemes.next().unwrap()); 82 | assert_eq!("🇩🇪", graphemes.next().unwrap()); 83 | assert_eq!("🇪🇸", graphemes.next().unwrap()); 84 | assert_eq!(None, graphemes.next()); 85 | 86 | let mut graphemes = r.graphemes().rev(); 87 | 88 | assert_eq!("🇪🇸", graphemes.next().unwrap()); 89 | assert_eq!("🇩🇪", graphemes.next().unwrap()); 90 | assert_eq!("🇨🇳", graphemes.next().unwrap()); 91 | assert_eq!("🇷🇺", graphemes.next().unwrap()); 92 | assert_eq!("🇫🇷", graphemes.next().unwrap()); 93 | assert_eq!("🇺🇸", graphemes.next().unwrap()); 94 | assert_eq!("🇯🇵", graphemes.next().unwrap()); 95 | assert_eq!("🇬🇧", graphemes.next().unwrap()); 96 | assert_eq!("🇪🇸", graphemes.next().unwrap()); 97 | assert_eq!("🇩🇪", graphemes.next().unwrap()); 98 | assert_eq!("🇨🇳", graphemes.next().unwrap()); 99 | assert_eq!("🇷🇺", graphemes.next().unwrap()); 100 | assert_eq!("🇫🇷", graphemes.next().unwrap()); 101 | assert_eq!("🇺🇸", graphemes.next().unwrap()); 102 | assert_eq!("🇯🇵", graphemes.next().unwrap()); 103 | assert_eq!("🇬🇧", graphemes.next().unwrap()); 104 | assert_eq!("🇪🇸", graphemes.next().unwrap()); 105 | assert_eq!("🇩🇪", graphemes.next().unwrap()); 106 | assert_eq!("🇨🇳", graphemes.next().unwrap()); 107 | assert_eq!("🇷🇺", graphemes.next().unwrap()); 108 | assert_eq!("🇫🇷", graphemes.next().unwrap()); 109 | assert_eq!("🇺🇸", graphemes.next().unwrap()); 110 | assert_eq!("🇯🇵", graphemes.next().unwrap()); 111 | assert_eq!("🇬🇧", graphemes.next().unwrap()); 112 | assert_eq!(None, graphemes.next()); 113 | } 114 | 115 | #[cfg(feature = "graphemes")] 116 | #[test] 117 | fn graphemes_is_boundary_two_flags() { 118 | let r = Rope::from("🇷🇸🇮🇴"); 119 | assert!(r.is_grapheme_boundary(0)); 120 | 121 | for i in 1..8 { 122 | assert!(!r.is_grapheme_boundary(i)); 123 | } 124 | 125 | assert!(r.is_grapheme_boundary(8)); 126 | 127 | for i in 9..16 { 128 | assert!(!r.is_grapheme_boundary(i)); 129 | } 130 | 131 | assert!(r.is_grapheme_boundary(16)); 132 | } 133 | 134 | #[cfg(feature = "graphemes")] 135 | #[should_panic] 136 | #[test] 137 | fn graphemes_is_boundary_out_of_bounds() { 138 | let r = Rope::from("🇷🇸🇮🇴"); 139 | assert!(r.is_grapheme_boundary(17)); 140 | } 141 | 142 | #[cfg(feature = "graphemes")] 143 | #[test] 144 | fn graphemes_is_boundary_empty_rope() { 145 | assert!(Rope::new().is_grapheme_boundary(0)); 146 | } 147 | -------------------------------------------------------------------------------- /tests/iterators.rs: -------------------------------------------------------------------------------- 1 | use crop::Rope; 2 | use rand::{Rng, rng}; 3 | 4 | mod common; 5 | 6 | use common::{CURSED_LIPSUM, LARGE, MEDIUM, SMALL, TINY}; 7 | 8 | #[test] 9 | fn iter_bytes_empty() { 10 | let r = Rope::new(); 11 | assert_eq!(0, r.bytes().count()); 12 | assert_eq!(0, r.byte_slice(..).bytes().count()); 13 | } 14 | 15 | #[test] 16 | fn iter_bytes_forward() { 17 | let s = if cfg!(miri) { "Hello, world!" } else { LARGE }; 18 | let r = Rope::from(s); 19 | let mut i = 0; 20 | for (b_rope, b_str) in r.bytes().zip(s.bytes()) { 21 | assert_eq!(b_rope, b_str); 22 | i += 1; 23 | } 24 | assert_eq!(i, r.byte_len()); 25 | } 26 | 27 | #[test] 28 | fn iter_bytes_backward() { 29 | let s = if cfg!(miri) { "Hello, world!" } else { LARGE }; 30 | let r = Rope::from(s); 31 | let mut i = 0; 32 | for (b_rope, b_str) in r.bytes().rev().zip(s.bytes().rev()) { 33 | assert_eq!(b_rope, b_str); 34 | i += 1; 35 | } 36 | assert_eq!(i, r.byte_len()); 37 | } 38 | 39 | #[test] 40 | fn iter_bytes_both_ways() { 41 | let s = if cfg!(miri) { "Hello, world!" } else { LARGE }; 42 | let rope = Rope::from(s); 43 | 44 | let i = rng().random_range(0..=s.len()); 45 | 46 | println!("i: {i}"); 47 | 48 | // Go forward for the first `i` bytes, then backward. 49 | 50 | let mut slice_bytes = s.bytes(); 51 | let mut rope_bytes = rope.bytes(); 52 | 53 | for _ in 0..i { 54 | let rope_b = rope_bytes.next().unwrap(); 55 | let slice_b = slice_bytes.next().unwrap(); 56 | assert_eq!(rope_b, slice_b); 57 | } 58 | 59 | for _ in i..s.len() { 60 | let rope_b = rope_bytes.next_back().unwrap(); 61 | let slice_b = slice_bytes.next_back().unwrap(); 62 | assert_eq!(rope_b, slice_b); 63 | } 64 | 65 | assert_eq!(None, rope_bytes.next()); 66 | assert_eq!(None, rope_bytes.next_back()); 67 | 68 | // Now the opposite, go backward for the first `i` bytes, then forward. 69 | 70 | let mut slice_bytes = s.bytes(); 71 | let mut rope_bytes = rope.bytes(); 72 | 73 | for _ in 0..i { 74 | let rope_b = rope_bytes.next_back().unwrap(); 75 | let slice_b = slice_bytes.next_back().unwrap(); 76 | assert_eq!(rope_b, slice_b); 77 | } 78 | 79 | for _ in i..s.len() { 80 | let rope_b = rope_bytes.next().unwrap(); 81 | let slice_b = slice_bytes.next().unwrap(); 82 | assert_eq!(rope_b, slice_b); 83 | } 84 | 85 | assert_eq!(None, rope_bytes.next()); 86 | assert_eq!(None, rope_bytes.next_back()); 87 | } 88 | 89 | #[test] 90 | fn iter_bytes_cursed() { 91 | let s = CURSED_LIPSUM; 92 | let r = Rope::from(s); 93 | 94 | assert_eq!(r.bytes().count(), s.len()); 95 | assert_eq!(r.byte_slice(..).bytes().count(), s.len()); 96 | 97 | for (b1, b2) in r.bytes().zip(s.bytes()) { 98 | assert_eq!(b1, b2); 99 | } 100 | 101 | for (b1, b2) in r.bytes().rev().zip(s.bytes().rev()) { 102 | assert_eq!(b1, b2); 103 | } 104 | } 105 | 106 | #[test] 107 | fn iter_bytes_over_slice_forward() { 108 | let mut rng = rand::rng(); 109 | 110 | let slices = if cfg!(miri) { 111 | ["foo", "bar", "baz", "Hello, world!"] 112 | } else { 113 | [TINY, SMALL, MEDIUM, LARGE] 114 | }; 115 | 116 | for s in slices { 117 | let r = Rope::from(s); 118 | 119 | for _ in 0..1 { 120 | let start = rng.random_range(0..=r.byte_len()); 121 | let end = rng.random_range(start..=r.byte_len()); 122 | 123 | let rope_slice = r.byte_slice(start..end); 124 | let str_slice = &s[start..end]; 125 | 126 | for (idx, (rope_byte, str_byte)) in 127 | rope_slice.bytes().zip(str_slice.bytes()).enumerate() 128 | { 129 | if rope_byte != str_byte { 130 | println!("idx: {idx}"); 131 | println!("Byte range: {start}..{end}"); 132 | panic!("{rope_byte:?} vs {str_byte:?}"); 133 | } 134 | } 135 | } 136 | } 137 | } 138 | 139 | #[test] 140 | fn iter_bytes_over_slice_backward() { 141 | let mut rng = rand::rng(); 142 | 143 | let slices = if cfg!(miri) { 144 | ["foo", "bar", "baz", "Hello, world!"] 145 | } else { 146 | [TINY, SMALL, MEDIUM, LARGE] 147 | }; 148 | 149 | for s in slices { 150 | let r = Rope::from(s); 151 | 152 | for _ in 0..1 { 153 | let start = rng.random_range(0..=r.byte_len()); 154 | let end = rng.random_range(start..=r.byte_len()); 155 | 156 | let rope_slice = r.byte_slice(start..end); 157 | let str_slice = &s[start..end]; 158 | 159 | for (idx, (rope_byte, str_byte)) in rope_slice 160 | .bytes() 161 | .rev() 162 | .zip(str_slice.bytes().rev()) 163 | .enumerate() 164 | { 165 | if rope_byte != str_byte { 166 | println!("idx: {idx}"); 167 | println!("Byte range: {start}..{end}"); 168 | panic!("{rope_byte:?} vs {str_byte:?}"); 169 | } 170 | } 171 | } 172 | } 173 | } 174 | 175 | #[test] 176 | fn iter_chars_empty() { 177 | let r = Rope::new(); 178 | assert_eq!(0, r.chars().count()); 179 | assert_eq!(0, r.byte_slice(..).chars().count()); 180 | } 181 | 182 | #[cfg_attr(miri, ignore)] 183 | #[test] 184 | fn iter_chars_forward() { 185 | let r = Rope::from(LARGE); 186 | let mut i = 0; 187 | for (c_rope, c_str) in r.chars().zip(LARGE.chars()) { 188 | assert_eq!(c_rope, c_str); 189 | i += 1; 190 | } 191 | assert_eq!(i, LARGE.chars().count()); 192 | } 193 | 194 | #[cfg_attr(miri, ignore)] 195 | #[test] 196 | fn iter_chars_backward() { 197 | let r = Rope::from(LARGE); 198 | let mut i = 0; 199 | for (c_rope, c_str) in r.chars().rev().zip(LARGE.chars().rev()) { 200 | assert_eq!(c_rope, c_str); 201 | i += 1; 202 | } 203 | assert_eq!(i, LARGE.chars().count()); 204 | } 205 | 206 | #[cfg_attr(miri, ignore)] 207 | #[test] 208 | fn iter_chars_both_ways() { 209 | let rope = Rope::from(LARGE); 210 | 211 | let total_chars = LARGE.chars().count(); 212 | let i = rng().random_range(0..=total_chars); 213 | 214 | println!("i: {i}"); 215 | 216 | // Go forward for the first `i` chars, then backward. 217 | 218 | let mut slice_chars = LARGE.chars(); 219 | let mut rope_chars = rope.chars(); 220 | 221 | for _ in 0..i { 222 | let rope_c = rope_chars.next().unwrap(); 223 | let slice_c = slice_chars.next().unwrap(); 224 | assert_eq!(rope_c, slice_c); 225 | } 226 | 227 | for _ in i..total_chars { 228 | let rope_c = rope_chars.next_back().unwrap(); 229 | let slice_c = slice_chars.next_back().unwrap(); 230 | assert_eq!(rope_c, slice_c); 231 | } 232 | 233 | assert_eq!(None, rope_chars.next()); 234 | assert_eq!(None, rope_chars.next_back()); 235 | 236 | // Now the opposite, go backward for the first `i` chars, then forward. 237 | 238 | let mut slice_chars = LARGE.chars(); 239 | let mut rope_chars = rope.chars(); 240 | 241 | for _ in 0..i { 242 | let rope_c = rope_chars.next_back().unwrap(); 243 | let slice_c = slice_chars.next_back().unwrap(); 244 | assert_eq!(rope_c, slice_c); 245 | } 246 | 247 | for _ in i..total_chars { 248 | let rope_c = rope_chars.next().unwrap(); 249 | let slice_c = slice_chars.next().unwrap(); 250 | assert_eq!(rope_c, slice_c); 251 | } 252 | 253 | assert_eq!(None, rope_chars.next()); 254 | assert_eq!(None, rope_chars.next_back()); 255 | } 256 | 257 | #[test] 258 | fn iter_chars_cursed() { 259 | let s = CURSED_LIPSUM; 260 | let r = Rope::from(s); 261 | 262 | assert_eq!(r.chars().count(), s.chars().count()); 263 | assert_eq!(r.byte_slice(..).chars().count(), s.chars().count()); 264 | 265 | for (c1, c2) in r.chars().zip(s.chars()) { 266 | assert_eq!(c1, c2); 267 | } 268 | 269 | for (c1, c2) in r.chars().rev().zip(s.chars().rev()) { 270 | assert_eq!(c1, c2); 271 | } 272 | } 273 | 274 | #[test] 275 | fn iter_lines_empty() { 276 | let r = Rope::new(); 277 | assert_eq!(0, r.lines().count()); 278 | assert_eq!(0, r.line_slice(..).lines().count()); 279 | } 280 | 281 | #[test] 282 | fn iter_lines_0() { 283 | let r = Rope::from("abc"); 284 | assert_eq!(1, r.lines().count()); 285 | assert_eq!(1, r.byte_slice(..).lines().count()); 286 | 287 | let r = Rope::from("a\nb"); 288 | assert_eq!(2, r.lines().count()); 289 | assert_eq!(2, r.byte_slice(..).lines().count()); 290 | 291 | let r = Rope::from("a\nb\n"); 292 | assert_eq!(2, r.lines().count()); 293 | assert_eq!(2, r.byte_slice(..).lines().count()); 294 | 295 | let r = Rope::from("\na\nb"); 296 | assert_eq!(3, r.lines().count()); 297 | assert_eq!(3, r.byte_slice(..).lines().count()); 298 | 299 | let r = Rope::from("\n\n\n"); 300 | assert_eq!(3, r.lines().count()); 301 | assert_eq!(3, r.byte_slice(..).lines().count()); 302 | 303 | let r = Rope::from("\n\n\n\n"); 304 | assert_eq!(4, r.lines().count()); 305 | assert_eq!(4, r.byte_slice(..).lines().count()); 306 | 307 | let r = Rope::from("\n\n\na"); 308 | assert_eq!(4, r.lines().count()); 309 | assert_eq!(4, r.byte_slice(..).lines().count()); 310 | } 311 | 312 | #[test] 313 | fn iter_lines_1() { 314 | let s = "\n\n\r\n\r\n\n\r\n\n"; 315 | 316 | let rope = Rope::from(s); 317 | let slice = rope.byte_slice(..); 318 | 319 | assert_eq!(rope.lines().count(), s.lines().count()); 320 | assert_eq!(slice.lines().count(), s.lines().count()); 321 | 322 | for ((rope_line, slice_line), s_line) in 323 | rope.lines().zip(slice.lines()).zip(s.lines()) 324 | { 325 | assert_eq!(rope_line, s_line); 326 | assert_eq!(slice_line, s_line); 327 | } 328 | } 329 | 330 | #[test] 331 | fn iter_lines_2() { 332 | let s = "this is\na line\r\nwith mixed\nline breaks\n"; 333 | 334 | let rope = Rope::from(s); 335 | let slice = rope.byte_slice(..); 336 | 337 | assert_eq!(rope.lines().count(), s.lines().count()); 338 | assert_eq!(slice.lines().count(), s.lines().count()); 339 | 340 | for ((rope_line, slice_line), s_line) in 341 | rope.lines().zip(slice.lines()).zip(s.lines()) 342 | { 343 | assert_eq!(rope_line, s_line); 344 | assert_eq!(slice_line, s_line); 345 | } 346 | } 347 | 348 | #[test] 349 | fn iter_lines_3() { 350 | let s = "This is a piece\nof text that doesn't\nfit in one\n chunk\nand \ 351 | it also\r\nhas mixed\r\n line breaks\n and no trailing\nline \ 352 | break."; 353 | 354 | let rope = Rope::from(s); 355 | let slice = rope.byte_slice(..); 356 | 357 | assert_eq!(rope.lines().count(), s.lines().count()); 358 | assert_eq!(slice.lines().count(), s.lines().count()); 359 | 360 | for ((rope_line, slice_line), s_line) in 361 | rope.lines().zip(slice.lines()).zip(s.lines()) 362 | { 363 | assert_eq!(rope_line, s_line); 364 | assert_eq!(slice_line, s_line); 365 | } 366 | } 367 | 368 | #[test] 369 | fn iter_lines_4() { 370 | let r = Rope::from( 371 | "Hey \r\nthis contains\nmixed line breaks, emojis -> \r\n🐕‍🦺 and \ 372 | other -> こんにちは chars.\r\nCan we iterate\nover this?\n\r\n\n??", 373 | ); 374 | 375 | let mut lines = r.lines(); 376 | 377 | assert_eq!("Hey ", lines.next().unwrap()); 378 | assert_eq!("this contains", lines.next().unwrap()); 379 | assert_eq!("mixed line breaks, emojis -> ", lines.next().unwrap()); 380 | assert_eq!("🐕‍🦺 and other -> こんにちは chars.", lines.next().unwrap()); 381 | assert_eq!("Can we iterate", lines.next().unwrap()); 382 | assert_eq!("over this?", lines.next().unwrap()); 383 | assert_eq!("", lines.next().unwrap()); 384 | assert_eq!("", lines.next().unwrap()); 385 | assert_eq!("??", lines.next().unwrap()); 386 | assert_eq!(None, lines.next()); 387 | } 388 | 389 | #[cfg_attr(miri, ignore)] 390 | #[test] 391 | fn iter_lines_over_test_vectors() { 392 | for s in [TINY, SMALL, MEDIUM, LARGE, CURSED_LIPSUM] { 393 | let rope = Rope::from(s); 394 | let slice = rope.byte_slice(..); 395 | 396 | assert_eq!(rope.lines().count(), s.lines().count()); 397 | assert_eq!(slice.lines().count(), s.lines().count()); 398 | 399 | for ((rope_line, slice_line), s_line) in 400 | rope.lines().zip(slice.lines()).zip(s.lines()) 401 | { 402 | rope_line.assert_invariants(); 403 | slice_line.assert_invariants(); 404 | assert_eq!(rope_line, s_line); 405 | assert_eq!(slice_line, s_line); 406 | } 407 | } 408 | } 409 | 410 | #[test] 411 | fn iter_lines_forward_backward() { 412 | let r = Rope::from("\na\nb\nc\n"); 413 | 414 | let mut forward = r.lines(); 415 | assert_eq!("", forward.next().unwrap()); 416 | assert_eq!("a", forward.next().unwrap()); 417 | assert_eq!("b", forward.next().unwrap()); 418 | assert_eq!("c", forward.next().unwrap()); 419 | assert_eq!(None, forward.next()); 420 | 421 | let mut backward = r.lines().rev(); 422 | assert_eq!("c", backward.next().unwrap()); 423 | assert_eq!("b", backward.next().unwrap()); 424 | assert_eq!("a", backward.next().unwrap()); 425 | assert_eq!("", backward.next().unwrap()); 426 | assert_eq!(None, backward.next()); 427 | } 428 | 429 | #[cfg_attr(miri, ignore)] 430 | #[test] 431 | fn iter_lines_over_random_slices() { 432 | let mut rng = rand::rng(); 433 | 434 | for s in [TINY, SMALL, MEDIUM, LARGE] { 435 | let rope = Rope::from(s); 436 | 437 | for _ in 0..100 { 438 | let start = rng.random_range(0..=rope.byte_len()); 439 | let end = rng.random_range(start..=rope.byte_len()); 440 | 441 | let range = start..end; 442 | 443 | let rope_slice = rope.byte_slice(range.clone()); 444 | let str_slice = &s[range.clone()]; 445 | 446 | for (idx, (rope_line, str_line)) in 447 | rope_slice.lines().zip(str_slice.lines()).enumerate() 448 | { 449 | rope_line.assert_invariants(); 450 | if rope_line != str_line { 451 | println!( 452 | "Failed on line #{} in byte range: {range:?}", 453 | idx + 1 454 | ); 455 | assert_eq!(rope_line, str_line); 456 | } 457 | } 458 | 459 | for ((idx, rope_line), str_line) in rope_slice 460 | .lines() 461 | .enumerate() 462 | .rev() 463 | .zip(str_slice.lines().rev()) 464 | { 465 | rope_line.assert_invariants(); 466 | if rope_line != str_line { 467 | println!( 468 | "Failed on line #{} in byte range: {range:?}", 469 | idx + 1 470 | ); 471 | assert_eq!(rope_line, str_line); 472 | } 473 | } 474 | } 475 | } 476 | } 477 | 478 | #[test] 479 | fn iter_raw_lines_0() { 480 | let r = Rope::from( 481 | "Hey \r\nthis contains\nmixed line breaks, emojis -> \r\n🐕‍🦺 and \ 482 | other -> こんにちは chars.\r\nCan we iterate\nover this?\n\r\n\n??", 483 | ); 484 | 485 | let mut lines = r.raw_lines(); 486 | 487 | assert_eq!("Hey \r\n", lines.next().unwrap()); 488 | assert_eq!("this contains\n", lines.next().unwrap()); 489 | assert_eq!("mixed line breaks, emojis -> \r\n", lines.next().unwrap()); 490 | assert_eq!("🐕‍🦺 and other -> こんにちは chars.\r\n", lines.next().unwrap()); 491 | assert_eq!("Can we iterate\n", lines.next().unwrap()); 492 | assert_eq!("over this?\n", lines.next().unwrap()); 493 | assert_eq!("\r\n", lines.next().unwrap()); 494 | assert_eq!("\n", lines.next().unwrap()); 495 | assert_eq!("??", lines.next().unwrap()); 496 | assert_eq!(None, lines.next()); 497 | } 498 | 499 | #[test] 500 | fn iter_raw_lines_backward_0() { 501 | let r = Rope::from( 502 | "Hey \r\nthis contains\nmixed line breaks, emojis -> \r\n🐕‍🦺 and \ 503 | other -> こんにちは chars.\r\nCan we iterate\nover this?\n\r\n\n??", 504 | ); 505 | 506 | let mut lines = r.raw_lines().rev(); 507 | 508 | assert_eq!("??", lines.next().unwrap()); 509 | assert_eq!("\n", lines.next().unwrap()); 510 | assert_eq!("\r\n", lines.next().unwrap()); 511 | assert_eq!("over this?\n", lines.next().unwrap()); 512 | assert_eq!("Can we iterate\n", lines.next().unwrap()); 513 | assert_eq!("🐕‍🦺 and other -> こんにちは chars.\r\n", lines.next().unwrap()); 514 | assert_eq!("mixed line breaks, emojis -> \r\n", lines.next().unwrap()); 515 | assert_eq!("this contains\n", lines.next().unwrap()); 516 | assert_eq!("Hey \r\n", lines.next().unwrap()); 517 | assert_eq!(None, lines.next()); 518 | } 519 | 520 | #[cfg_attr(miri, ignore)] 521 | #[test] 522 | fn iter_raw_lines_over_test_vectors() { 523 | for s in [TINY, SMALL, MEDIUM, LARGE] { 524 | let rope = Rope::from(s); 525 | 526 | for (i, (rope_line, s_line)) in 527 | rope.raw_lines().zip(s.lines()).enumerate() 528 | { 529 | rope_line.assert_invariants(); 530 | if i != rope.line_len() - 1 || s.ends_with('\n') { 531 | let mut line = s_line.to_owned(); 532 | line.push('\n'); 533 | assert_eq!(line, rope_line); 534 | } else { 535 | assert_eq!(s_line, rope_line); 536 | } 537 | } 538 | } 539 | } 540 | 541 | #[cfg_attr(miri, ignore)] 542 | #[test] 543 | fn iter_raw_lines_over_random_slices() { 544 | let mut rng = rand::rng(); 545 | 546 | for s in [TINY, SMALL, MEDIUM, LARGE] { 547 | let rope = Rope::from(s); 548 | 549 | for _ in 0..100 { 550 | let start = rng.random_range(0..=rope.byte_len()); 551 | let end = rng.random_range(start..=rope.byte_len()); 552 | 553 | let range = start..end; 554 | 555 | let rope_slice = rope.byte_slice(range.clone()); 556 | let str_slice = &s[range.clone()]; 557 | 558 | for (idx, (rope_line, str_line)) in 559 | rope_slice.raw_lines().zip(str_slice.lines()).enumerate() 560 | { 561 | rope_line.assert_invariants(); 562 | 563 | let is_last = idx == rope_slice.line_len() - 1; 564 | 565 | // TODO: use `RopeSlice::ends_with()` if/when that's 566 | // implemented. 567 | let str_line = if !is_last || str_slice.ends_with('\n') { 568 | let mut l = str_line.to_owned(); 569 | l.push('\n'); 570 | std::borrow::Cow::Owned(l) 571 | } else { 572 | std::borrow::Cow::Borrowed(str_line) 573 | }; 574 | 575 | if rope_line != str_line { 576 | println!( 577 | "Failed on line #{} in byte range: {range:?}", 578 | idx + 1 579 | ); 580 | assert_eq!(rope_line, str_line); 581 | } 582 | } 583 | 584 | for ((idx, rope_line), str_line) in rope_slice 585 | .raw_lines() 586 | .enumerate() 587 | .rev() 588 | .zip(str_slice.lines().rev()) 589 | { 590 | rope_line.assert_invariants(); 591 | 592 | let is_last = idx == rope_slice.line_len() - 1; 593 | 594 | // TODO: use `RopeSlice::ends_with()` if/when that's 595 | // implemented. 596 | let str_line = if !is_last || str_slice.ends_with('\n') { 597 | let mut l = str_line.to_owned(); 598 | l.push('\n'); 599 | std::borrow::Cow::Owned(l) 600 | } else { 601 | std::borrow::Cow::Borrowed(str_line) 602 | }; 603 | 604 | if rope_line != str_line { 605 | println!( 606 | "Failed on line #{} in byte range: {range:?}", 607 | idx + 1 608 | ); 609 | assert_eq!(rope_line, str_line); 610 | } 611 | } 612 | } 613 | } 614 | } 615 | -------------------------------------------------------------------------------- /tests/rope_builder.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | 3 | use common::LARGE; 4 | use crop::{Rope, RopeBuilder}; 5 | 6 | #[test] 7 | fn builder_line_len() { 8 | let mut builder = RopeBuilder::new(); 9 | builder.append("\n"); 10 | let rope = builder.build(); 11 | assert_eq!(rope.line_len(), Rope::from("\n").line_len()); 12 | } 13 | 14 | #[test] 15 | fn builder_empty() { 16 | let r = RopeBuilder::new().build(); 17 | 18 | assert!(r.is_empty()); 19 | assert_eq!(Rope::new(), r); 20 | 21 | let mut b = RopeBuilder::new(); 22 | 23 | b.append("").append("").append("").append("").append(""); 24 | 25 | let r = b.build(); 26 | 27 | assert!(r.is_empty()); 28 | assert_eq!(Rope::new(), r); 29 | } 30 | 31 | #[cfg_attr(miri, ignore)] 32 | #[test] 33 | fn builder_0() { 34 | let mut b = RopeBuilder::new(); 35 | let mut s = String::new(); 36 | 37 | for line in LARGE.lines() { 38 | b.append(line); 39 | s.push_str(line); 40 | } 41 | 42 | assert_eq!(s, b.build()); 43 | } 44 | 45 | #[test] 46 | fn builder_crlf_0() { 47 | let mut b = RopeBuilder::new(); 48 | b.append("aaa\r").append("\nbbb"); 49 | let r = b.build(); 50 | r.assert_invariants(); 51 | assert_eq!(r, "aaa\r\nbbb"); 52 | } 53 | 54 | #[test] 55 | fn builder_crlf_1() { 56 | let mut b = RopeBuilder::new(); 57 | b.append("aaa\r\nbbb"); 58 | let r = b.build(); 59 | r.assert_invariants(); 60 | assert_eq!(r, "aaa\r\nbbb"); 61 | } 62 | -------------------------------------------------------------------------------- /tests/rope_indexing.rs: -------------------------------------------------------------------------------- 1 | use crop::Rope; 2 | 3 | mod common; 4 | 5 | use common::{CURSED_LIPSUM, LARGE, MEDIUM, SMALL, TINY}; 6 | 7 | #[cfg_attr(miri, ignore)] 8 | #[test] 9 | fn rope_byte_0() { 10 | for s in 11 | ["", "Hi", "Hello", "🐕‍🦺", TINY, SMALL, MEDIUM, LARGE, CURSED_LIPSUM] 12 | { 13 | let r = Rope::from(s); 14 | for byte_idx in 0..s.len() { 15 | let r_byte = r.byte(byte_idx); 16 | let s_byte = s.as_bytes()[byte_idx]; 17 | assert_eq!(r_byte, s_byte); 18 | } 19 | } 20 | } 21 | 22 | #[cfg_attr(miri, ignore)] 23 | #[test] 24 | fn rope_is_char_boundary() { 25 | for s in 26 | ["", "Hi", "Hello", "🐕‍🦺", TINY, SMALL, MEDIUM, LARGE, CURSED_LIPSUM] 27 | { 28 | let rope = Rope::from(s); 29 | let slice = rope.byte_slice(..); 30 | 31 | for idx in 0..s.len() { 32 | assert_eq!(s.is_char_boundary(idx), rope.is_char_boundary(idx)); 33 | assert_eq!(s.is_char_boundary(idx), slice.is_char_boundary(idx)); 34 | } 35 | } 36 | } 37 | 38 | /// ``` 39 | /// Root 40 | /// ├───┐ 41 | /// │ ├── "aaa\n" 42 | /// │ ├── "bbb\n" 43 | /// │ ├── "ccc\n" 44 | /// │ └── "ddd\n" 45 | /// └───┐ 46 | /// ├── "ee\ne" 47 | /// └── "fff\n" 48 | /// ``` 49 | #[test] 50 | fn rope_line_0() { 51 | let r = Rope::from("aaa\nbbb\nccc\nddd\nee\nefff\n"); 52 | let s = r.line(4); 53 | assert_eq!("ee", s); 54 | } 55 | 56 | /// ``` 57 | /// Root 58 | /// ├───┐ 59 | /// │ ├── "aaaa" 60 | /// │ ├── "aaaa" 61 | /// │ ├── "aaaa" 62 | /// │ └── "aaa\n" 63 | /// └───┐ 64 | /// ├── "bbbb" 65 | /// └── "\nccc" 66 | /// ``` 67 | #[test] 68 | fn rope_line_1() { 69 | let r = Rope::from("aaaaaaaaaaaaaaa\nbbbb\nccc"); 70 | let l = r.line(1); 71 | assert_eq!("bbbb", l); 72 | } 73 | 74 | /// ``` 75 | /// Root 76 | /// ├───┐ 77 | /// │ ├── "aaaa" 78 | /// │ ├── "aaaa" 79 | /// │ ├── "aaaa" 80 | /// │ └── "aaa\n" 81 | /// └───┐ 82 | /// ├── "\nbbb" 83 | /// └── "bbbb" 84 | /// ``` 85 | #[test] 86 | fn rope_line_2() { 87 | let r = Rope::from("aaaaaaaaaaaaaaa\n\nbbbbbbb"); 88 | let l = r.line(1); 89 | assert_eq!("", l); 90 | } 91 | 92 | /// ``` 93 | /// Root 94 | /// └── "\n\n\n\n" 95 | /// ``` 96 | #[test] 97 | fn rope_line_3() { 98 | let r = Rope::from("\n\n\n\n"); 99 | let l = r.line(2); 100 | assert_eq!("", l); 101 | } 102 | -------------------------------------------------------------------------------- /tests/rope_replace.rs: -------------------------------------------------------------------------------- 1 | use crop::Rope; 2 | use rand::Rng; 3 | 4 | mod common; 5 | 6 | use common::{LARGE, MEDIUM, SMALL, TEXT, TINY}; 7 | 8 | #[test] 9 | fn insert_1() { 10 | let mut r = Rope::from(TEXT); 11 | 12 | r.insert(3, "AA"); 13 | 14 | r.assert_invariants(); 15 | 16 | assert_eq!( 17 | r, 18 | "HelAAlo there! How're you doing? It's a fine day, isn't it? \ 19 | Aren't you glad we're alive? こんにちは、みんなさん!" 20 | ); 21 | } 22 | 23 | #[test] 24 | fn insert_2() { 25 | let mut r = Rope::from(TEXT); 26 | 27 | r.insert(0, "AA"); 28 | 29 | r.assert_invariants(); 30 | 31 | assert_eq!( 32 | r, 33 | "AAHello there! How're you doing? It's a fine day, isn't it? \ 34 | Aren't you glad we're alive? こんにちは、みんなさん!" 35 | ); 36 | } 37 | 38 | #[test] 39 | fn insert_3() { 40 | let mut r = Rope::from(TEXT); 41 | 42 | r.insert(127, "AA"); 43 | 44 | r.assert_invariants(); 45 | 46 | assert_eq!( 47 | r, 48 | "Hello there! How're you doing? It's a fine day, isn't it? Aren't \ 49 | you glad we're alive? こんにちは、みんなさん!AA" 50 | ); 51 | } 52 | 53 | #[test] 54 | fn insert_4() { 55 | let mut r = Rope::new(); 56 | 57 | r.insert(0, "He"); 58 | r.insert(2, "l"); 59 | r.insert(3, "l"); 60 | r.insert(4, "o w"); 61 | r.insert(7, "o"); 62 | r.insert(8, "rl"); 63 | r.insert(10, "d!"); 64 | r.insert(3, "zopter"); 65 | 66 | r.assert_invariants(); 67 | 68 | assert_eq!("Helzopterlo world!", r); 69 | } 70 | 71 | #[test] 72 | fn insert_5() { 73 | let mut r = Rope::new(); 74 | 75 | r.insert(0, "こんいちは、みんなさん!"); 76 | r.insert(21, "zopter"); 77 | 78 | r.assert_invariants(); 79 | 80 | assert_eq!("こんいちは、みzopterんなさん!", r); 81 | } 82 | 83 | #[test] 84 | fn insert_6() { 85 | let mut r = Rope::new(); 86 | 87 | r.insert(0, "こ"); 88 | r.insert(3, "ん"); 89 | r.insert(6, "い"); 90 | r.insert(9, "ち"); 91 | r.insert(12, "は"); 92 | r.insert(15, "、"); 93 | r.insert(18, "み"); 94 | r.insert(21, "ん"); 95 | r.insert(24, "な"); 96 | r.insert(27, "さ"); 97 | r.insert(30, "ん"); 98 | r.insert(33, "!"); 99 | r.insert(21, "zopter"); 100 | 101 | r.assert_invariants(); 102 | 103 | assert_eq!("こんいちは、みzopterんなさん!", r); 104 | } 105 | 106 | #[should_panic] 107 | #[test] 108 | fn insert_7() { 109 | let mut r = Rope::new(); 110 | 111 | r.insert(0, "こ"); 112 | r.insert(2, "zopter"); 113 | } 114 | 115 | #[test] 116 | fn insert_8() { 117 | let mut r = Rope::from("Hello Earth!"); 118 | r.insert(11, " 🌎"); 119 | assert_eq!(r, "Hello Earth 🌎!"); 120 | } 121 | 122 | #[cfg_attr(miri, ignore)] 123 | #[test] 124 | fn insert_small_random() { 125 | let mut rng = rand::rng(); 126 | 127 | let mut rope = Rope::new(); 128 | let mut string = String::new(); 129 | 130 | for _ in 0..(1 << 8) { 131 | for s in [ 132 | "Hello ", 133 | "How are ", 134 | "you ", 135 | "doing?\r\n", 136 | "Let's ", 137 | "keep ", 138 | "inserting ", 139 | "more ", 140 | "items.\r\n", 141 | "こんいちは、", 142 | "みんなさん!", 143 | ] { 144 | let mut at = rng.random_range(0..=rope.byte_len()); 145 | 146 | while !string.is_char_boundary(at) { 147 | at = rng.random_range(0..=rope.byte_len()); 148 | } 149 | 150 | rope.insert(at, s); 151 | string.insert_str(at, s); 152 | } 153 | } 154 | 155 | rope.assert_invariants(); 156 | assert_eq!(string, rope); 157 | } 158 | 159 | #[cfg_attr(miri, ignore)] 160 | #[test] 161 | fn insert_random() { 162 | let mut rng = rand::rng(); 163 | 164 | for s in [TINY, SMALL, MEDIUM, LARGE] { 165 | let mut r = Rope::from(s); 166 | let mut s = s.to_owned(); 167 | 168 | for _ in 0..10 { 169 | let insert_at = rng.random_range(0..=r.byte_len()); 170 | 171 | let insert = { 172 | let start = rng.random_range(0..=r.byte_len()); 173 | let end = rng.random_range(start..=r.byte_len()); 174 | s[start..end].to_owned() 175 | }; 176 | 177 | r.insert(insert_at, &insert); 178 | s.insert_str(insert_at, &insert); 179 | 180 | r.assert_invariants(); 181 | assert_eq!(s, r); 182 | } 183 | } 184 | } 185 | #[test] 186 | fn delete_1() { 187 | let mut r = Rope::from(TEXT); 188 | 189 | r.delete(5..11); 190 | r.delete(24..31); 191 | r.delete(19..25); 192 | r.delete(81..93); 193 | 194 | r.assert_invariants(); 195 | 196 | assert_eq!( 197 | r, 198 | "Hello! How're you a fine day, isn't it? Aren't you glad we're \ 199 | alive? こんにんなさん!" 200 | ); 201 | } 202 | 203 | #[test] 204 | fn delete_2() { 205 | let mut r = Rope::from(TEXT); 206 | 207 | // Make sure removing nothing actually does nothing. 208 | r.delete(45..45); 209 | assert_eq!(r, TEXT); 210 | 211 | r.assert_invariants(); 212 | } 213 | 214 | #[test] 215 | fn delete_3() { 216 | let mut r = Rope::from(TEXT); 217 | 218 | // Make sure removing everything works. 219 | r.delete(0..127); 220 | 221 | r.assert_invariants(); 222 | assert_eq!(r, ""); 223 | } 224 | 225 | #[test] 226 | fn delete_4() { 227 | let mut r = Rope::from(TEXT); 228 | 229 | // Make sure removing a large range works. 230 | r.delete(3..118); 231 | 232 | r.assert_invariants(); 233 | assert_eq!(r, "Helさん!"); 234 | } 235 | 236 | #[test] 237 | #[should_panic] 238 | fn delete_5() { 239 | let mut r = Rope::from(TEXT); 240 | #[allow(clippy::reversed_empty_ranges)] 241 | r.delete(56..55); // Wrong ordering of start/end on purpose. 242 | } 243 | 244 | #[test] 245 | #[should_panic] 246 | fn delete_6() { 247 | let mut r = Rope::from(TEXT); 248 | r.delete(126..128); // Removing past the end 249 | } 250 | 251 | #[test] 252 | #[should_panic] 253 | fn delete_7() { 254 | let mut r = Rope::from(TEXT); 255 | r.delete(127..128); // Removing past the end 256 | } 257 | 258 | #[test] 259 | #[should_panic] 260 | fn delete_8() { 261 | let mut r = Rope::from(TEXT); 262 | r.delete(128..128); // Removing past the end 263 | } 264 | 265 | #[test] 266 | #[should_panic] 267 | fn delete_9() { 268 | let mut r = Rope::from(TEXT); 269 | r.delete(128..129); // Removing past the end 270 | } 271 | 272 | #[cfg_attr(miri, ignore)] 273 | #[test] 274 | fn delete_random() { 275 | let mut rng = rand::rng(); 276 | 277 | for s in [TINY, SMALL, MEDIUM, LARGE] { 278 | let mut r = Rope::from(s); 279 | let mut s = s.to_owned(); 280 | 281 | for _ in 0..20 { 282 | let delete_range = { 283 | let start = rng.random_range(0..=r.byte_len()); 284 | let end = rng.random_range(start..=r.byte_len()); 285 | start..end 286 | }; 287 | 288 | r.delete(delete_range.clone()); 289 | s.replace_range(delete_range, ""); 290 | 291 | r.assert_invariants(); 292 | assert_eq!(s, r); 293 | } 294 | } 295 | } 296 | 297 | #[test] 298 | fn replace_0() { 299 | let mut r = Rope::from("aaaa"); 300 | r.replace(2..3, "b"); 301 | r.assert_invariants(); 302 | assert_eq!("aaba", r); 303 | } 304 | 305 | /// ``` 306 | /// Root 307 | /// ├───┐ 308 | /// │ ├── "aaaa" 309 | /// │ ├── "bbbb" 310 | /// │ ├── "cccc" 311 | /// │ └── "dddd" 312 | /// └───┐ 313 | /// ├── "eeee" 314 | /// └── "ffff" 315 | /// ``` 316 | #[test] 317 | fn replace_1() { 318 | let mut r = Rope::from("aaaabbbbccccddddeeeeffff"); 319 | r.replace(2..10, "gggggggggggg"); 320 | r.assert_invariants(); 321 | assert_eq!("aaggggggggggggccddddeeeeffff", r); 322 | } 323 | 324 | #[cfg_attr(miri, ignore)] 325 | #[test] 326 | fn replace_random() { 327 | let mut rng = rand::rng(); 328 | 329 | for s in [TINY, SMALL, MEDIUM, LARGE] { 330 | let mut r = Rope::from(s); 331 | let mut s = s.to_owned(); 332 | 333 | for _ in 0..20 { 334 | let replace_range = { 335 | let start = rng.random_range(0..=r.byte_len()); 336 | let end = rng.random_range(start..=r.byte_len()); 337 | start..end 338 | }; 339 | 340 | let replace_with = { 341 | let start = rng.random_range(0..=r.byte_len()); 342 | let end = rng.random_range(start..=r.byte_len()); 343 | s[start..end].to_owned() 344 | }; 345 | 346 | r.replace(replace_range.clone(), &replace_with); 347 | s.replace_range(replace_range, &replace_with); 348 | 349 | r.assert_invariants(); 350 | assert_eq!(s, r); 351 | } 352 | } 353 | } 354 | 355 | /// ``` 356 | /// Root 357 | /// ├── "aaa\r" 358 | /// ├── "bbbb" 359 | /// └── "\nccc" 360 | /// ``` 361 | #[test] 362 | fn fix_crlf_0() { 363 | let mut r = Rope::from("aaa\rbbbb\nccc"); 364 | r.delete(4..8); 365 | r.assert_invariants(); 366 | assert_eq!(r, "aaa\r\nccc"); 367 | } 368 | 369 | /// ``` 370 | /// Root 371 | /// └── "aaa\r" 372 | /// ``` 373 | #[test] 374 | fn fix_crlf_1() { 375 | let mut r = Rope::from("aaa\r"); 376 | r.insert(4, "\nbbb"); 377 | r.assert_invariants(); 378 | assert_eq!(r, "aaa\r\nbbb"); 379 | } 380 | 381 | /// ``` 382 | /// Root 383 | /// ├── "aaaa" 384 | /// ├── "bbbb" 385 | /// └── "\nccc" 386 | /// ``` 387 | #[test] 388 | fn fix_crlf_2() { 389 | let mut r = Rope::from("aaaabbbb\nccc"); 390 | r.replace(4..8, "ddd\r"); 391 | r.assert_invariants(); 392 | assert_eq!(r, "aaaaddd\r\nccc"); 393 | } 394 | 395 | #[test] 396 | fn fix_crlf_4() { 397 | let mut r = 398 | Rope::from("\r\n\r\n\r\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n"); 399 | 400 | r.delete(3..6); 401 | 402 | r.assert_invariants(); 403 | 404 | assert_eq!(r, "\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n"); 405 | } 406 | -------------------------------------------------------------------------------- /tests/serde.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "serde")] 2 | mod tests { 3 | use crop::Rope; 4 | 5 | #[test] 6 | fn ser_de_empty() { 7 | let rope = Rope::new(); 8 | 9 | serde_test::assert_tokens( 10 | &rope, 11 | &[serde_test::Token::Seq { len: None }, serde_test::Token::SeqEnd], 12 | ); 13 | } 14 | 15 | #[test] 16 | #[cfg_attr(feature = "small_chunks", ignore)] 17 | fn ser_de_single_chunk() { 18 | let mut rope = Rope::new(); 19 | rope.insert(0, "lorem "); 20 | rope.insert(6, "ipsum"); 21 | 22 | serde_test::assert_tokens( 23 | &rope, 24 | &[ 25 | serde_test::Token::Seq { len: None }, 26 | serde_test::Token::Str("lorem ipsum"), 27 | serde_test::Token::SeqEnd, 28 | ], 29 | ); 30 | } 31 | 32 | #[test] 33 | #[cfg_attr(feature = "small_chunks", ignore)] 34 | fn ser_de_multiple_chunks() { 35 | let mut rope = Rope::new(); 36 | rope.insert(0, "lorem dolor"); 37 | rope.insert(6, "ipsuma "); 38 | rope.delete(11..12); 39 | 40 | serde_test::assert_tokens( 41 | &rope, 42 | &[ 43 | serde_test::Token::Seq { len: None }, 44 | serde_test::Token::Str("lorem ipsum"), 45 | serde_test::Token::Str(" dolor"), 46 | serde_test::Token::SeqEnd, 47 | ], 48 | ); 49 | } 50 | 51 | #[test] 52 | #[cfg_attr(feature = "small_chunks", ignore)] 53 | fn ser_de_lf() { 54 | let mut rope = Rope::new(); 55 | rope.insert(0, "lorem\n"); 56 | rope.insert(6, "ipsum"); 57 | 58 | serde_test::assert_tokens( 59 | &rope, 60 | &[ 61 | serde_test::Token::Seq { len: None }, 62 | serde_test::Token::Str("lorem\nipsum"), 63 | serde_test::Token::SeqEnd, 64 | ], 65 | ); 66 | } 67 | 68 | #[test] 69 | #[cfg_attr(feature = "small_chunks", ignore)] 70 | fn ser_de_crlf() { 71 | let mut rope = Rope::new(); 72 | rope.insert(0, "lorem\r\n"); 73 | rope.insert(7, "ipsum"); 74 | 75 | serde_test::assert_tokens( 76 | &rope, 77 | &[ 78 | serde_test::Token::Seq { len: None }, 79 | serde_test::Token::Str("lorem\r\nipsum"), 80 | serde_test::Token::SeqEnd, 81 | ], 82 | ); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /tests/slice_indexing.rs: -------------------------------------------------------------------------------- 1 | use crop::Rope; 2 | use rand::Rng; 3 | 4 | mod common; 5 | 6 | use common::{CURSED_LIPSUM, LARGE, MEDIUM, SMALL, TINY}; 7 | 8 | /// Tests `RopeSlice::byte()` on a bunch of random RopeSlices over different 9 | /// texts. 10 | #[cfg_attr(miri, ignore)] 11 | #[test] 12 | fn byte_random() { 13 | let mut rng = rand::rng(); 14 | 15 | for s in [TINY, SMALL, MEDIUM, LARGE] { 16 | let r = Rope::from(s); 17 | 18 | for _ in 0..10 { 19 | let start = rng.random_range(0..=r.byte_len()); 20 | let end = rng.random_range(start..=r.byte_len()); 21 | 22 | let str_slice = &s[start..end]; 23 | let rope_slice = r.byte_slice(start..end); 24 | 25 | for (idx, byte) in str_slice.bytes().enumerate() { 26 | if byte != rope_slice.byte(idx) { 27 | println!( 28 | "byte index: {idx}, byte range: {:?}", 29 | start..end 30 | ); 31 | assert_eq!(byte, rope_slice.byte(idx)); 32 | } 33 | } 34 | } 35 | } 36 | } 37 | 38 | /// Tests `RopeSlice::is_char_boundary()` on a bunch of random RopeSlices over 39 | /// different texts. 40 | #[cfg_attr(miri, ignore)] 41 | #[test] 42 | fn is_char_boundary_random() { 43 | let mut rng = rand::rng(); 44 | 45 | for s in [CURSED_LIPSUM, TINY, SMALL, MEDIUM, LARGE] { 46 | let r = Rope::from(s); 47 | 48 | for _ in 0..10 { 49 | let start = rng.random_range(0..=r.byte_len()); 50 | let end = rng.random_range(start..=r.byte_len()); 51 | 52 | if !(s.is_char_boundary(start) && s.is_char_boundary(end)) { 53 | continue; 54 | } 55 | 56 | let str_slice = &s[start..end]; 57 | let rope_slice = r.byte_slice(start..end); 58 | 59 | for byte_idx in 0..rope_slice.byte_len() { 60 | if str_slice.is_char_boundary(byte_idx) 61 | != rope_slice.is_char_boundary(byte_idx) 62 | { 63 | println!("byte range: {:?}", start..end); 64 | assert_eq!( 65 | str_slice.is_char_boundary(byte_idx), 66 | rope_slice.is_char_boundary(byte_idx) 67 | ); 68 | } 69 | } 70 | } 71 | } 72 | } 73 | 74 | /// Tests `crop::RopeSlice::line_of_byte()` against Ropey's 75 | /// `ropey::RopeSlice::byte_to_line()`. 76 | #[cfg_attr(miri, ignore)] 77 | #[test] 78 | fn line_of_byte_random() { 79 | let mut rng = rand::rng(); 80 | 81 | for s in [TINY, SMALL, MEDIUM, LARGE] { 82 | let crop = Rope::from(s); 83 | let ropey = ropey::Rope::from(s); 84 | 85 | for _ in 0..100 { 86 | let start = rng.random_range(0..crop.byte_len()); 87 | let end = rng.random_range(start + 1..=crop.byte_len()); 88 | let range = start..end; 89 | 90 | let crop_slice = crop.byte_slice(range.clone()); 91 | let ropey_slice = ropey.byte_slice(range.clone()); 92 | 93 | for _ in 0..10 { 94 | let byte_offset = rng.random_range(0..=crop_slice.byte_len()); 95 | let crop_line_offset = crop_slice.line_of_byte(byte_offset); 96 | let ropey_line_offset = ropey_slice.byte_to_line(byte_offset); 97 | 98 | if crop_line_offset != ropey_line_offset { 99 | println!("byte offset: {byte_offset}"); 100 | println!("byte range: {:?}", start..end); 101 | assert_eq!(crop_line_offset, ropey_line_offset) 102 | } 103 | } 104 | } 105 | } 106 | } 107 | 108 | /// Tests `crop::RopeSlice::byte_of_line()` against Ropey's 109 | /// `ropey::RopeSlice::line_to_byte()`. 110 | #[cfg_attr(miri, ignore)] 111 | #[test] 112 | fn byte_of_line_random() { 113 | let mut rng = rand::rng(); 114 | 115 | for s in [TINY, SMALL, MEDIUM, LARGE] { 116 | let crop = Rope::from(s); 117 | let ropey = ropey::Rope::from(s); 118 | 119 | for _ in 0..100 { 120 | let start = rng.random_range(0..crop.byte_len()); 121 | let end = rng.random_range(start + 1..=crop.byte_len()); 122 | let range = start..end; 123 | 124 | let crop_slice = crop.byte_slice(range.clone()); 125 | let ropey_slice = ropey.byte_slice(range.clone()); 126 | 127 | for _ in 0..10 { 128 | let line_offset = rng.random_range(0..=crop_slice.line_len()); 129 | let crop_byte_offset = crop_slice.byte_of_line(line_offset); 130 | let ropey_byte_offset = ropey_slice.line_to_byte(line_offset); 131 | 132 | if crop_byte_offset != ropey_byte_offset { 133 | println!("line offset: {line_offset}"); 134 | println!("byte range: {range:?}"); 135 | assert_eq!(crop_byte_offset, ropey_byte_offset) 136 | } 137 | } 138 | } 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /tests/slicing.rs: -------------------------------------------------------------------------------- 1 | use crop::Rope; 2 | use rand::Rng; 3 | 4 | mod common; 5 | 6 | use common::{CURSED_LIPSUM, LARGE, MEDIUM, SMALL, TINY}; 7 | 8 | #[test] 9 | fn byte_slice_empty() { 10 | let r = Rope::from(""); 11 | let s = r.byte_slice(..); 12 | assert!(s.is_empty()); 13 | } 14 | 15 | /// Tests that slicing at the start, at a node boundary and at end of a Rope 16 | /// works correctly. 17 | /// ``` 18 | /// Root 19 | /// ├───┐ 20 | /// │ ├── "aaaa" 21 | /// │ ├── "bbbb" 22 | /// │ ├── "cccc" 23 | /// │ └── "dddd" 24 | /// └───┐ 25 | /// ├── "eeee" 26 | /// ├── "ffff" 27 | /// ├── "gggg" 28 | /// └── "hhhh" 29 | /// ``` 30 | #[cfg(all(feature = "small_chunks", feature = "arity_4"))] 31 | #[test] 32 | fn byte_slice_0() { 33 | let r = Rope::from("aaaabbbbccccddddeeeeffffgggghhhh"); 34 | 35 | let s = r.byte_slice(..0); 36 | s.assert_invariants(); 37 | assert_eq!(s, ""); 38 | 39 | let s = r.byte_slice(16..16); 40 | s.assert_invariants(); 41 | assert_eq!(s, ""); 42 | 43 | let s = r.byte_slice(16..20); 44 | s.assert_invariants(); 45 | assert_eq!(s, "eeee"); 46 | 47 | let s = r.byte_slice(r.byte_len()..); 48 | s.assert_invariants(); 49 | assert_eq!(s, ""); 50 | } 51 | 52 | /// Tests that repeatedly byte-slicing a RopeSlice always matches the 53 | /// equivalent str slice. 54 | #[cfg_attr(miri, ignore)] 55 | #[test] 56 | fn byte_slice_random() { 57 | let mut rng = rand::rng(); 58 | 59 | for s in [TINY, SMALL, MEDIUM, LARGE] { 60 | let r = Rope::from(s); 61 | 62 | let mut start = 0; 63 | let mut end = r.byte_len(); 64 | 65 | let mut str_slice = &s[start..end]; 66 | let mut rope_slice = r.byte_slice(start..end); 67 | 68 | let mut ranges = Vec::new(); 69 | 70 | while start != end { 71 | rope_slice.assert_invariants(); 72 | ranges.push(start..end); 73 | if str_slice != rope_slice { 74 | println!("byte ranges: {ranges:?}"); 75 | assert_eq!(str_slice, rope_slice); 76 | } 77 | start = rng.random_range(0..=rope_slice.byte_len()); 78 | end = rng.random_range(start..=rope_slice.byte_len()); 79 | str_slice = &str_slice[start..end]; 80 | rope_slice = rope_slice.byte_slice(start..end); 81 | } 82 | } 83 | } 84 | 85 | #[test] 86 | fn line_slice_empty() { 87 | let r = Rope::from(""); 88 | let s = r.line_slice(..); 89 | assert!(s.is_empty()); 90 | } 91 | 92 | #[test] 93 | fn line_slice_0() { 94 | let r = Rope::from("aaa\nbbb\nccc\nddd\neee\nfff\n"); 95 | 96 | let s = r.line_slice(1..2); 97 | assert_eq!("bbb\n", s); 98 | 99 | let s = r.line_slice(4..); 100 | assert_eq!("eee\nfff\n", s); 101 | 102 | let s = r.line_slice(..); 103 | assert_eq!(r, s); 104 | } 105 | 106 | #[test] 107 | fn line_slice_1() { 108 | let r = Rope::from("Hello world"); 109 | assert_eq!(1, r.line_len()); 110 | assert_eq!("Hello world", r.line_slice(..)); 111 | 112 | let r = Rope::from("Hello world\n"); 113 | assert_eq!(1, r.line_len()); 114 | assert_eq!("Hello world\n", r.line_slice(..)); 115 | 116 | let r = Rope::from("Hello world\nthis is\na test"); 117 | assert_eq!("Hello world\n", r.line_slice(..1)); 118 | assert_eq!("Hello world\nthis is\n", r.line_slice(..2)); 119 | assert_eq!("Hello world\nthis is\na test", r.line_slice(..3)); 120 | assert_eq!("Hello world\nthis is\na test", r.line_slice(..)); 121 | 122 | let r = Rope::from("Hello world\nthis is\na test\n"); 123 | assert_eq!("Hello world\nthis is\na test\n", r.line_slice(..3)); 124 | assert_eq!("Hello world\nthis is\na test\n", r.line_slice(..)); 125 | } 126 | 127 | #[test] 128 | fn byte_slice_then_line() { 129 | let r = Rope::from("foo\nbar\r\nbaz\nfoobar\n"); 130 | let s = r.byte_slice(2..17); 131 | 132 | println!("{s:?}"); 133 | 134 | assert_eq!(s.line_slice(..1), "o\n"); 135 | assert_eq!(s.line_slice(1..3), "bar\r\nbaz\n"); 136 | assert_eq!(s.line_slice(3..), "foob"); 137 | } 138 | 139 | #[cfg_attr(miri, ignore)] 140 | #[test] 141 | fn line_slices_random() { 142 | let mut rng = rand::rng(); 143 | 144 | for s in [TINY, SMALL, MEDIUM, LARGE] { 145 | let r = Rope::from(s); 146 | 147 | let mut start = 0; 148 | let mut end = r.byte_len(); 149 | 150 | let mut str_slice = &s[start..end]; 151 | let mut rope_slice = r.byte_slice(start..end); 152 | 153 | let line_offsets = { 154 | let mut offset = 0; 155 | 156 | rope_slice 157 | .raw_lines() 158 | .map(|line| { 159 | let o = offset; 160 | offset += line.byte_len(); 161 | o 162 | }) 163 | .collect::>() 164 | }; 165 | 166 | assert_eq!(line_offsets.len(), rope_slice.line_len()); 167 | 168 | let mut offset = 0; 169 | 170 | while start != end { 171 | rope_slice.assert_invariants(); 172 | 173 | assert_eq!(str_slice, rope_slice); 174 | 175 | start = rng.random_range(0..rope_slice.line_len()); 176 | end = rng.random_range(start..rope_slice.line_len()); 177 | 178 | str_slice = 179 | &s[line_offsets[offset + start]..line_offsets[offset + end]]; 180 | 181 | rope_slice = rope_slice.line_slice(start..end); 182 | 183 | offset += start; 184 | } 185 | } 186 | } 187 | 188 | /// Tests that a Rope created from a RopeSlice always matches the original 189 | /// content while also satisying its invariants. 190 | #[test] 191 | fn rope_from_slice() { 192 | let mut rng = rand::rng(); 193 | 194 | let slices = if cfg!(miri) { 195 | ["Hello world", "ƒoo", "bär", "baz", "🗻∈🌏"] 196 | } else { 197 | [TINY, SMALL, MEDIUM, LARGE, CURSED_LIPSUM] 198 | }; 199 | 200 | for s in slices { 201 | let r = Rope::from(s); 202 | 203 | for _ in 0..100 { 204 | let mut start = rng.random_range(0..=r.byte_len()); 205 | 206 | while !r.is_char_boundary(start) { 207 | start += 1; 208 | } 209 | 210 | let mut end = rng.random_range(start..=r.byte_len()); 211 | 212 | while !r.is_char_boundary(end) { 213 | end += 1; 214 | } 215 | 216 | let s = r.byte_slice(start..end); 217 | let r = Rope::from(s); 218 | r.assert_invariants(); 219 | assert_eq!(r, s); 220 | } 221 | } 222 | } 223 | -------------------------------------------------------------------------------- /tests/utf16_conversion.rs: -------------------------------------------------------------------------------- 1 | mod common; 2 | 3 | #[cfg(feature = "utf16-metric")] 4 | mod tests { 5 | use crop::Rope; 6 | 7 | use crate::common::{TEXT, TEXT_EMOJI}; 8 | 9 | #[test] 10 | fn utf16_len_0() { 11 | let r = Rope::from(TEXT); 12 | assert_eq!(r.utf16_len(), 103); 13 | 14 | let s = r.byte_slice(..); 15 | assert_eq!(s.utf16_len(), 103); 16 | } 17 | 18 | #[test] 19 | fn utf16_len_1() { 20 | let r = Rope::from(TEXT_EMOJI); 21 | assert_eq!(r.utf16_len(), 111); 22 | 23 | let s = r.byte_slice(..); 24 | assert_eq!(s.utf16_len(), 111); 25 | } 26 | 27 | #[test] 28 | fn utf16_len_2() { 29 | let r = Rope::new(); 30 | assert_eq!(r.utf16_len(), 0); 31 | } 32 | 33 | #[test] 34 | fn utf16_len_3() { 35 | let r = Rope::from("🐸"); 36 | assert_eq!(r.utf16_len(), 2); 37 | 38 | let r = Rope::from(TEXT_EMOJI); 39 | let s = r.byte_slice(16..39); 40 | assert_eq!(s.utf16_len(), 21); 41 | } 42 | 43 | #[test] 44 | fn byte_to_utf16_0() { 45 | let r = Rope::new(); 46 | assert_eq!(r.utf16_code_unit_of_byte(0), 0); 47 | 48 | let s = r.byte_slice(..); 49 | assert_eq!(s.utf16_code_unit_of_byte(0), 0); 50 | } 51 | 52 | #[should_panic] 53 | #[test] 54 | fn byte_to_utf16_1() { 55 | let r = Rope::new(); 56 | let _ = r.utf16_code_unit_of_byte(1); 57 | } 58 | 59 | #[test] 60 | fn byte_to_utf16_2() { 61 | let r = Rope::from("🐸"); 62 | assert_eq!(r.utf16_code_unit_of_byte(4), 2); 63 | 64 | let s = r.byte_slice(..); 65 | assert_eq!(s.utf16_code_unit_of_byte(4), 2); 66 | } 67 | 68 | #[test] 69 | fn byte_to_utf16_3() { 70 | let r = Rope::from(TEXT_EMOJI); 71 | 72 | assert_eq!(0, r.utf16_code_unit_of_byte(0)); 73 | 74 | assert_eq!(12, r.utf16_code_unit_of_byte(12)); 75 | assert_eq!(14, r.utf16_code_unit_of_byte(16)); 76 | 77 | assert_eq!(33, r.utf16_code_unit_of_byte(35)); 78 | assert_eq!(35, r.utf16_code_unit_of_byte(39)); 79 | 80 | assert_eq!(63, r.utf16_code_unit_of_byte(67)); 81 | assert_eq!(65, r.utf16_code_unit_of_byte(71)); 82 | 83 | assert_eq!(95, r.utf16_code_unit_of_byte(101)); 84 | assert_eq!(97, r.utf16_code_unit_of_byte(105)); 85 | 86 | assert_eq!(111, r.utf16_code_unit_of_byte(143)); 87 | } 88 | 89 | #[test] 90 | fn byte_to_utf16_4() { 91 | let r = Rope::from(TEXT_EMOJI); 92 | let s = r.byte_slice(..); 93 | 94 | assert_eq!(0, s.utf16_code_unit_of_byte(0)); 95 | 96 | assert_eq!(12, s.utf16_code_unit_of_byte(12)); 97 | assert_eq!(14, s.utf16_code_unit_of_byte(16)); 98 | 99 | assert_eq!(33, s.utf16_code_unit_of_byte(35)); 100 | assert_eq!(35, s.utf16_code_unit_of_byte(39)); 101 | 102 | assert_eq!(63, s.utf16_code_unit_of_byte(67)); 103 | assert_eq!(65, s.utf16_code_unit_of_byte(71)); 104 | 105 | assert_eq!(95, s.utf16_code_unit_of_byte(101)); 106 | assert_eq!(97, s.utf16_code_unit_of_byte(105)); 107 | 108 | assert_eq!(111, s.utf16_code_unit_of_byte(143)); 109 | } 110 | 111 | #[should_panic] 112 | #[test] 113 | fn byte_to_utf16_5() { 114 | let r = Rope::from(TEXT_EMOJI); 115 | let _ = r.utf16_code_unit_of_byte(13); 116 | } 117 | 118 | #[should_panic] 119 | #[test] 120 | fn byte_to_utf16_6() { 121 | let r = Rope::from(TEXT_EMOJI); 122 | let s = r.byte_slice(..); 123 | let _ = s.utf16_code_unit_of_byte(13); 124 | } 125 | 126 | #[test] 127 | fn utf16_to_byte_0() { 128 | let r = Rope::new(); 129 | assert_eq!(r.byte_of_utf16_code_unit(0), 0); 130 | 131 | let s = r.byte_slice(..); 132 | assert_eq!(s.byte_of_utf16_code_unit(0), 0); 133 | } 134 | 135 | #[should_panic] 136 | #[test] 137 | fn utf16_to_byte_1() { 138 | let r = Rope::new(); 139 | let _ = r.byte_of_utf16_code_unit(1); 140 | } 141 | 142 | #[test] 143 | fn utf16_to_byte_2() { 144 | let r = Rope::from("🐸"); 145 | assert_eq!(r.byte_of_utf16_code_unit(2), 4); 146 | 147 | let s = r.byte_slice(..); 148 | assert_eq!(s.byte_of_utf16_code_unit(2), 4); 149 | } 150 | 151 | #[test] 152 | fn utf16_to_byte_3() { 153 | let r = Rope::from(TEXT_EMOJI); 154 | 155 | assert_eq!(0, r.byte_of_utf16_code_unit(0)); 156 | 157 | assert_eq!(12, r.byte_of_utf16_code_unit(12)); 158 | assert_eq!(16, r.byte_of_utf16_code_unit(14)); 159 | 160 | assert_eq!(35, r.byte_of_utf16_code_unit(33)); 161 | assert_eq!(39, r.byte_of_utf16_code_unit(35)); 162 | 163 | assert_eq!(67, r.byte_of_utf16_code_unit(63)); 164 | assert_eq!(71, r.byte_of_utf16_code_unit(65)); 165 | 166 | assert_eq!(101, r.byte_of_utf16_code_unit(95)); 167 | assert_eq!(105, r.byte_of_utf16_code_unit(97)); 168 | 169 | assert_eq!(143, r.byte_of_utf16_code_unit(111)); 170 | } 171 | 172 | #[test] 173 | fn utf16_to_byte_4() { 174 | let r = Rope::from(TEXT_EMOJI); 175 | let s = r.byte_slice(..); 176 | 177 | assert_eq!(0, s.byte_of_utf16_code_unit(0)); 178 | 179 | assert_eq!(12, s.byte_of_utf16_code_unit(12)); 180 | assert_eq!(16, s.byte_of_utf16_code_unit(14)); 181 | 182 | assert_eq!(35, s.byte_of_utf16_code_unit(33)); 183 | assert_eq!(39, s.byte_of_utf16_code_unit(35)); 184 | 185 | assert_eq!(67, s.byte_of_utf16_code_unit(63)); 186 | assert_eq!(71, s.byte_of_utf16_code_unit(65)); 187 | 188 | assert_eq!(101, s.byte_of_utf16_code_unit(95)); 189 | assert_eq!(105, s.byte_of_utf16_code_unit(97)); 190 | 191 | assert_eq!(143, s.byte_of_utf16_code_unit(111)); 192 | } 193 | 194 | // TODO: we should panic the given UTF-16 offset doesn't lie on a char 195 | // boundary. Right now we just return the byte offset up to the previous 196 | // char boundary. 197 | #[ignore] 198 | #[should_panic] 199 | #[test] 200 | fn utf16_to_byte_5() { 201 | let r = Rope::from(TEXT_EMOJI); 202 | let _ = r.byte_of_utf16_code_unit(13); 203 | } 204 | 205 | // TODO: see above. 206 | #[ignore] 207 | #[should_panic] 208 | #[test] 209 | fn utf16_to_byte_6() { 210 | let r = Rope::from(TEXT_EMOJI); 211 | let s = r.byte_slice(..); 212 | let _ = s.byte_of_utf16_code_unit(13); 213 | } 214 | } 215 | --------------------------------------------------------------------------------