├── .github
    └── workflows
    │   └── test.yml
├── .gitignore
├── CHANGELOG.md
├── Cargo.toml
├── LICENSE.txt
├── README.md
├── benches
    └── benches.rs
├── benchmark_data
    ├── automerge-paper.json.gz
    ├── rustcode.json.gz
    ├── seph-blog1.json.gz
    └── sveltecomponent.json.gz
├── crdt-testdata
    ├── Cargo.toml
    └── src
    │   └── lib.rs
├── jumprope-wasm
    ├── Cargo.toml
    ├── build_wasm.sh
    └── src
    │   └── lib.rs
├── rope_benches
    ├── Cargo.toml
    ├── README.md
    ├── build.rs
    ├── explore_parameters.js
    ├── rope.c
    ├── rope.h
    ├── src
    │   ├── edittablestr.rs
    │   ├── main.rs
    │   └── rope.rs
    └── table.js
├── src
    ├── buffered.rs
    ├── fast_str_tools.rs
    ├── gapbuffer.rs
    ├── iter.rs
    ├── jumprope.rs
    ├── lib.rs
    └── utils.rs
└── tests
    └── test.rs


/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | env:
10 |   CARGO_TERM_COLOR: always
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v2
19 |     - name: Build
20 |       run: cargo build
21 |     - name: Run tests (base)
22 |       run: cargo test
23 |     - name: Run tests (wchar)
24 |       run: cargo test --features "wchar_conversion"
25 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | target
 2 | Cargo.lock
 3 | .idea
 4 | yarn.lock
 5 | node_modules
 6 | src/params.rs
 7 | stats.md
 8 | bench/*.json
 9 | .*.swp
10 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # CURRENT
 2 | 
 3 | - Added `is_ascii_only` - mainly only useful for optimizing lookups for some strings.
 4 | - Microoptimizations of some wchar functions when the rope is ascii only
 5 | 
 6 | # 1.1.2
 7 | 
 8 | - Added explicit `rope.to_string()` method, because doing so is smaller and faster than going through the `Display` trait. Wasm bundle -1.5kb
 9 | - Removed `wee_alloc` from wasm bundle. This makes the wasm bundle +6kb in size, but apparently wee_alloc [has memory leaks and is unmaintained](https://github.com/josephg/jumprope-rs/security/dependabot/1).
10 | - Fixed a terrible bug where `chars_to_wchars` returned the number of surrogate pairs rather than the wchar position. The return value would have been wrong in all cases. Testing fail - ouch!
11 | 
12 | # 1.1.1
13 | 
14 | - Fixed bug where reflexive eq (a == a) would fail for `&JumpRopeBuf`.
15 | 
16 | # 1.1.0
17 | 
18 | - The JumpRopeBuf feature has a lot more methods and is now stable, and included by default. The `buffered` feature flag is no longer needed. It now has no effect, and it will be removed in JumpRope 2.0 (whenever that happens). Please file issues if other useful methods are missing.
19 | - Added Send and Sync markers to `JumpRope`. Thanks to P. Vijay for the suggestion!
20 | 
21 | # 1.0.0
22 | 
23 | - Woohoo!
24 | - **Breaking API change**: Renamed the iterator methods. `rope.chunks()` -> `rope.substrings_with_len()`. Added `rope.substrings()` and `rope.slice_substrings()`.
25 | - Added buffered API, though for now its experimental and behind a feature flag.
26 | - Made miri pass against jumprope. This involved some changes:
27 |   - The dynamically allocated heights in node.nexts lists have been removed. This results in less unsafe code, but increases the memory overhead of the library.
28 |   - Wasm bundle size has grown
29 |   - Performance is mostly unaffected.
30 | - Bumped to str_indices 0.3.2
31 | - Added Eq trait support to all the combinations of `rope` / `&rope` vs `&str` / `String` / `&String`.
32 | 
33 | 
34 | # 0.5.3
35 | 
36 | - Made Jumprope::new() use a hardcoded seed when ddos_protection is disabled. This makes the module 5kb smaller in wasm and avoids getrandom.
37 | 
38 | # 0.5.2
39 | 
40 | - Swapped from inlined string methods to [`str_indices`](https://crates.io/crates/str_indices). Thanks @cessen!
41 | 
42 | # 0.5.1
43 | 
44 | - Only cosmetic (documentation) changes.
45 | 
46 | # 0.5.0
47 | 
48 | - Added support for wchar based indexing, behind a feature flag. (See documentation for details)
49 | - General performance improvements
50 | - Removed ropey as an explicit dependency, inlining the borrowed methods (for now).
51 | 
52 | # 0.4.0
53 | 
54 | - Breaking API change: Renamed `rope.len()` to `rope.len_bytes()`
55 | - Added `rope.mem_size() -> usize` method for debugging
56 | 
57 | # 0.3.1
58 | 
59 | - Fixed a few critical bugs in iterator code which caused slice_chars() to return incorrect results or crash
60 | 
61 | # 0.3.0
62 | 
63 | - Added iterator support (to iterate by character range)
64 | - Added proper rustdocs for core methods
65 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "jumprope"
 3 | version = "1.1.2"
 4 | authors = ["Joseph Gentle <me@josephg.com>"]
 5 | edition = "2021"
 6 | description = "Simple, fast rope (fancy string) library built on top of Skiplists"
 7 | repository = "https://github.com/josephg/jumprope-rs"
 8 | license = "ISC OR Apache-2.0"
 9 | exclude = [
10 |     "benchmark_data/*",
11 |     ".github/**",
12 | ]
13 | 
14 | [workspace]
15 | members = ["jumprope-wasm", "rope_benches", "crdt-testdata"]
16 | 
17 | [features]
18 | default = ["ddos_protection"]
19 | # ddos_protection makes jumprope use a better RNG algorithm to avoid DOS
20 | # attacks
21 | ddos_protection = []
22 | # wchar_conversion adds support for converting wchar positions (eg from JS or
23 | # ObjC) into local unicode offsets.
24 | wchar_conversion = []
25 | 
26 | # Line conversion adds support for editing using line/column offsets instead of character offsets.
27 | line_conversion = []
28 | 
29 | # TODO: Remove me for 2.0 - the buffered feature is no longer needed.
30 | buffered = []
31 | 
32 | [dependencies]
33 | rand = { version = "0.8", features = ["small_rng"] }
34 | str_indices = "0.4.0"
35 | 
36 | [dev-dependencies]
37 | criterion = "0.4.0"
38 | crdt-testdata = { path = "./crdt-testdata" }
39 | 
40 | [[bench]]
41 | name = "benches"
42 | harness = false
43 | 
44 | [profile.release]
45 | lto = true
46 | codegen-units = 1
47 | 
48 | [profile.release.package.jumprope-wasm]
49 | #opt-level = "s"
50 | opt-level = 2
51 | 
52 | 
53 | [package.metadata.docs.rs]
54 | features = ["wchar_conversion"]


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | ISC License
 2 | 
 3 | Copyright 2022 Joseph Gentle
 4 | 
 5 | Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted,
 6 | provided that the above copyright notice and this permission notice appear in all copies.
 7 | 
 8 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
 9 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
10 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
11 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
12 | THIS SOFTWARE.
13 | 
14 | ---
15 | 
16 | Some code in fast_str_tools licensed separately (as follows):
17 | 
18 | Copyright (c) 2017 Nathan Vegdahl
19 | 
20 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
21 | associated documentation files (the "Software"), to deal in the Software without restriction,
22 | including without limitation the rights to use, copy, modify, merge, publish, distribute,
23 | sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
24 | furnished to do so, subject to the following conditions:
25 | 
26 | The above copyright notice and this permission notice shall be included in all copies or substantial
27 | portions of the Software.
28 | 
29 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
30 | NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
32 | OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # JumpRope
  2 | 
  3 | Because inserting into a string should be fast.
  4 | 
  5 | A [rope](https://en.wikipedia.org/wiki/Rope_(data_structure)) is a data structure for efficiently editing large strings, or for processing editing traces.
  6 | 
  7 | As far as I know, JumpRope is the world's fastest rope implementation.
  8 | 
  9 | Unlike traditional strings, JumpRope allows you to:
 10 | 
 11 | - Efficiently insert or delete arbitrary keystrokes from anywhere in the document. Using real world editing traces, jumprope can process about 35-40 million edits per second.
 12 | - Index using unicode character offsets or wchar offsets (like you find in JS and other languages). Jumprope can efficiently convert between these formats.
 13 | 
 14 | JumpRope is optimized for large strings like source code files and text documents. If your strings are very small (less than 100 bytes), you should probably just use Rust's built in [std String](https://doc.rust-lang.org/std/string/struct.String.html) or a small-string-optimized string library like [SmartString](https://crates.io/crates/smartstring).
 15 | 
 16 | JumpRope is similar to [ropey](https://crates.io/crates/ropey). Ropey supports a few more features (like converting line/column positions). However, jumprope is about 3x faster than ropey when processing real editing operations (see below) and jumprope compiles to a smaller wasm bundle. (Ropey is 30kb brotli compressed, vs 18kb for jumprope).
 17 | 
 18 | [API documentation](https://docs.rs/jumprope/)
 19 | 
 20 | [Jumprope on crates.io](https://crates.io/crates/jumprope)
 21 | 
 22 | Add this to Cargo.toml to use:
 23 | 
 24 | ```toml
 25 | jumprope = "1.0.0"
 26 | ```
 27 | 
 28 | 
 29 | # Usage
 30 | 
 31 | JumpRope isn't a drop-in replacement for string, but it supports many similar methods. The most important additions are the [`insert`](https://docs.rs/jumprope/latest/jumprope/struct.JumpRope.html#method.insert), [`remove`](https://docs.rs/jumprope/latest/jumprope/struct.JumpRope.html#method.remove) and [`replace`](https://docs.rs/jumprope/latest/jumprope/struct.JumpRope.html#method.replace) methods - which let you edit strings in-place in (typically) `log(n)` time relative to the size of the existing document.
 32 | 
 33 | ```rust
 34 | use jumprope::JumpRope;
 35 | 
 36 | fn main() {
 37 |     let mut rope = JumpRope::from("Some large text document");
 38 |     rope.insert(5, "really "); // "Some really large text document"
 39 |     rope.replace(0..4, "My rad");  // "My rad really large text document"
 40 |     assert_eq!(rope, "My rad really large text document");
 41 | 
 42 |     // Extract to a string
 43 |     let s: String = rope.to_string();
 44 |     assert_eq!(s, "My rad really large text document");
 45 | }
 46 | ```
 47 | 
 48 | You can read content back out of a rope by:
 49 | 
 50 | - Converting the rope to a string using `rope.to_string()` (requires allocations)
 51 | - Iterating over characters using [`rope.chars()`](https://docs.rs/jumprope/latest/jumprope/struct.JumpRope.html#method.chars)
 52 | - (Fastest) iterating over &str chunks with [`rope.substrings()`](https://docs.rs/jumprope/latest/jumprope/struct.JumpRope.html#method.substrings). This returns an iterator over contained `&str` items in the document.
 53 | 
 54 | If you want to read a subsection of the rope, you can use [`rope.slice_substrings(10..20)`](https://docs.rs/jumprope/latest/jumprope/struct.JumpRope.html#method.slice_chunks) to read all the content within a given range in the rope. Eg:
 55 | 
 56 | ```rust
 57 | fn main() {
 58 |     let rope = JumpRope::from("xxxGreetings!xxx");
 59 | 
 60 |     let string = rope.slice_substrings(3..13).collect::<String>();
 61 |     assert_eq!(string, "Greetings!");
 62 | }
 63 | ```
 64 | 
 65 | For more details, see [JumpRope API documentation](https://docs.rs/jumprope/latest/jumprope/struct.JumpRope.html)
 66 | 
 67 | 
 68 | ## Wchar conversion
 69 | 
 70 | In some languages (notably Javascript, Java and C#) strings are measured by the number of 2-byte "characters" needed when encoding the string using UTF16.
 71 | 
 72 | This is awkward because its difficult to efficiently convert between unicode character offsets (used by jumprope, diamond types and other editors) and these editing locations. The naive approach is an O(n) operation.
 73 | 
 74 | Jumprope supports doing this conversion in `O(log n)` time, by adding extra indexing information to the skip list. This feature is disabled by default, because the extra bookkeeping slows down jumprope by about 15%.
 75 | 
 76 | To use this feature, enable the `wchar_conversion` feature flag:
 77 | 
 78 | ```toml
 79 | jumprope = { version = "1.0.0", features = ["wchar_conversion"] }
 80 | ```
 81 | 
 82 | This feature flag enables a bunch of extra wchar-related methods for interacting with a document:
 83 | 
 84 | - `rope.len_wchars() -> usize`: Return the length of the string in wchars.
 85 | - `rope.chars_to_wchars(chars: usize) -> usize`: Convert a char offset to a wchar offset
 86 | - `rope.wchars_to_chars(wchars: usize) -> usize`: Convert a wchar index back to a unicode character count
 87 | - `rope.insert_at_wchar(pos_wchar: usize, content: &str)`: Insert `content` at the specified wchar offset
 88 | - `rope.remove_at_wchar(range: Range<usize>)`: Remove the specified range, specified using wchar offsets
 89 | - `rope.replace_at_wchar(range: Range<usize>, content: &str)`: Replace the specified range with `content`
 90 | 
 91 | See [documentation on docs.rs](https://docs.rs/jumprope/latest/jumprope/struct.JumpRope.html) for more information about these methods.
 92 | 
 93 | 
 94 | ## Buffered strings
 95 | 
 96 | JumpRope also has an API for buffered edits. Usually when humans edit a string, they insert or delete runs of characters. If you merge these editing runs together before applying them, jumprope is about 10x faster again.
 97 | 
 98 | Jumprope provides a wrapper API to do this transparently in the form of [JumpRopeBuf](https://docs.rs/jumprope/latest/jumprope/struct.JumpRopeBuf.html). JumpRopeBuf does a best-effort attempt to merge incoming writes together before flushing (writing) them to the contained jumprope object.
 99 | 
100 | This API may be missing some methods found on `JumpRope`. You can usually work around any missing methods by calling `rope.borrow()` or `rope.as_mut()` to flush pending changes and access a pointer to the underlying rope. But please file issues if you find any missing functions, because adding direct implementations will usually result in better performance.
101 | 
102 | See [JumpRopeBuf module documentation](https://docs.rs/jumprope/latest/jumprope/struct.JumpRopeBuf.html) for usage.
103 | 
104 | 
105 | ## History / motivation
106 | 
107 | This code is based on an older [skiplist based C rope library](https://github.com/josephg/librope) I wrote several years ago as an excuse to play with skip lists. It has a few notable differences:
108 | 
109 | - Instead of simply being implemented as a skiplist, jumprope is a skiplist where each leaf node contains a [Gap Buffer](https://en.wikipedia.org/wiki/Gap_buffer).
110 | - Jumprope is faster. (See table below)
111 | 
112 | 
113 | ## Benchmarks
114 | 
115 | Running the [editing traces from crdt-benchmarks](https://github.com/josephg/crdt-benchmarks), jumprope is faster than any other library in cargo that I know of:
116 | 
117 | Running on a single core of a Ryzen 5800X:
118 | 
119 | | Dataset         | Raw string | XiRope    | Ropey    | librope (C) | Jumprope |
120 | |-----------------|------------|-----------|----------|-------------|----------|
121 | | automerge-paper | 3908.13 ms | 518.75 ms | 25.16 ms | 16.28 ms    | 6.66 ms  |
122 | | rustcode        | 569.44 ms  | DNF       | 4.71 ms  | 3.93 ms     | 1.66 ms  |
123 | | sveltecomponent | 41.05 ms   | 24.83 ms  | 2.31 ms  | 1.59 ms     | 0.59 ms  |
124 | | seph-blog1      | 1238.44 ms | DNF       | 13.04 ms | 10.01 ms    | 3.81 ms  |
125 | 
126 | Full criterion report is [here](https://home.seph.codes/public/rope_bench/report/).
127 | 
128 | I tried AnRope as well, but it crashed while processing these datasets.
129 | 
130 | 
131 | # LICENSE
132 | 
133 | Licensed under the ISC license:
134 | 
135 | Copyright 2018 Joseph Gentle
136 | 
137 | Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.
138 | 
139 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.


--------------------------------------------------------------------------------
/benches/benches.rs:
--------------------------------------------------------------------------------
  1 | use criterion::*;
  2 | use crdt_testdata::*;
  3 | 
  4 | use jumprope::{JumpRope, JumpRopeBuf};
  5 | 
  6 | fn count_chars(s: &String) -> usize {
  7 |     s.chars().count()
  8 | }
  9 | 
 10 | #[derive(Debug, Clone)]
 11 | enum Op {
 12 |     Ins(usize, String),
 13 |     Del(usize, usize),
 14 | }
 15 | use Op::*;
 16 | 
 17 | fn collapse(test_data: &TestData) -> Vec<Op> {
 18 |     let mut result = Vec::new();
 19 | 
 20 |     let mut merge = |op: Op| {
 21 |         let append = match (&op, result.last_mut()) {
 22 |             (Ins(pos, new_content), Some(Ins(cur_pos, cur_content))) => {
 23 |                 if *pos == *cur_pos + count_chars(&cur_content) {
 24 |                     cur_content.push_str(new_content.as_str());
 25 |                     false
 26 |                 } else { true }
 27 |             }
 28 |             (Del(pos, new_del), Some(Del(cur_pos, cur_del))) => {
 29 |                 if *pos == *cur_pos {
 30 |                     // The new delete follows the old.
 31 |                     *cur_del += *new_del;
 32 |                     false
 33 |                 } else if *pos + *new_del == *cur_pos {
 34 |                     // The new delete is a backspace (before the old)
 35 |                     *cur_pos = *pos;
 36 |                     *cur_del += *new_del;
 37 |                     false
 38 |                 } else {
 39 |                     true
 40 |                 }
 41 |             }
 42 |             _ => true,
 43 |         };
 44 | 
 45 |         if append { result.push(op); }
 46 |     };
 47 | 
 48 |     for txn in test_data.txns.iter() {
 49 |         for TestPatch(pos, del_span, ins_content) in &txn.patches {
 50 |             if *del_span > 0 {
 51 |                 merge(Op::Del(*pos, *del_span));
 52 |             }
 53 |             if !ins_content.is_empty() {
 54 |                 merge(Op::Ins(*pos, ins_content.clone()));
 55 |             }
 56 |         }
 57 |     }
 58 |     result
 59 | }
 60 | 
 61 | fn testing_data(name: &str) -> TestData {
 62 |     let filename = format!("benchmark_data/{}.json.gz", name);
 63 |     load_testing_data(&filename)
 64 | }
 65 | 
 66 | const DATASETS: &[&str] = &["automerge-paper", "rustcode", "sveltecomponent", "seph-blog1"];
 67 | 
 68 | fn realworld_benchmarks(c: &mut Criterion) {
 69 |     for name in DATASETS {
 70 |         let mut group = c.benchmark_group("testdata");
 71 |         // let mut group = c.benchmark_group("local");
 72 |         let test_data = testing_data(name);
 73 |         let merged = collapse(&test_data);
 74 |         assert_eq!(test_data.start_content.len(), 0);
 75 | 
 76 |         let len = test_data.txns.iter()
 77 |             .flat_map(|txn| txn.patches.iter() )
 78 |             .map(|patch| patch.1 + patch.2.len())
 79 |             .sum::<usize>();
 80 |         group.throughput(Throughput::Elements(len as u64));
 81 | 
 82 |         group.bench_function(BenchmarkId::new("direct", name), |b| {
 83 |             b.iter(|| {
 84 |                 let mut rope = JumpRope::new();
 85 |                 for txn in test_data.txns.iter() {
 86 |                     for TestPatch(pos, del_span, ins_content) in &txn.patches {
 87 |                         rope.replace(*pos .. *pos + *del_span, ins_content);
 88 |                         // if *del_span > 0 {
 89 |                         //     rope.remove(*pos .. *pos + *del_span);
 90 |                         // }
 91 |                         // if !ins_content.is_empty() {
 92 |                         //     rope.insert(*pos, ins_content);
 93 |                         // }
 94 |                     }
 95 |                 }
 96 | 
 97 |                 assert_eq!(rope.len_bytes(), test_data.end_content.len());
 98 |                 black_box(rope.len_chars());
 99 |             })
100 |         });
101 | 
102 |         // group.bench_function(BenchmarkId::new("merged", name), |b| {
103 |         //     b.iter(|| {
104 |         //         let mut rope = JumpRope::new();
105 |         //         for op in merged.iter() {
106 |         //             match op {
107 |         //                 Ins(pos, content) => {
108 |         //                     rope.insert(*pos, content);
109 |         //                 }
110 |         //                 Del(pos, del_span) => {
111 |         //                     rope.remove(*pos..*pos + *del_span);
112 |         //                 }
113 |         //             }
114 |         //         }
115 |         //
116 |         //         // assert_eq!(test_data.end_content, rope.to_string());
117 |         //
118 |         //         assert_eq!(rope.len_bytes(), test_data.end_content.len());
119 |         //         black_box(rope.len_chars());
120 |         //     })
121 |         // });
122 | 
123 |         group.bench_function(BenchmarkId::new("buffered", name), |b| {
124 |             b.iter(|| {
125 |                 let mut rope = JumpRopeBuf::new();
126 |                 for op in merged.iter() {
127 |                     match op {
128 |                         Ins(pos, content) => {
129 |                             rope.insert(*pos, content);
130 |                         }
131 |                         Del(pos, del_span) => {
132 |                             rope.remove(*pos..*pos + *del_span);
133 |                         }
134 |                     }
135 |                 }
136 | 
137 |                 // assert_eq!(test_data.end_content, rope.to_string());
138 | 
139 |                 let rope = rope.into_inner();
140 |                 assert_eq!(rope.len_bytes(), test_data.end_content.len());
141 |                 black_box(rope.len_chars());
142 |             })
143 |         });
144 | 
145 |         group.finish();
146 |     }
147 | }
148 | 
149 | criterion_group!(benches, realworld_benchmarks);
150 | criterion_main!(benches);
151 | 


--------------------------------------------------------------------------------
/benchmark_data/automerge-paper.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephg/jumprope-rs/3981256e4e741d8b19efe26ffcfcfe178332eda0/benchmark_data/automerge-paper.json.gz


--------------------------------------------------------------------------------
/benchmark_data/rustcode.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephg/jumprope-rs/3981256e4e741d8b19efe26ffcfcfe178332eda0/benchmark_data/rustcode.json.gz


--------------------------------------------------------------------------------
/benchmark_data/seph-blog1.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephg/jumprope-rs/3981256e4e741d8b19efe26ffcfcfe178332eda0/benchmark_data/seph-blog1.json.gz


--------------------------------------------------------------------------------
/benchmark_data/sveltecomponent.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josephg/jumprope-rs/3981256e4e741d8b19efe26ffcfcfe178332eda0/benchmark_data/sveltecomponent.json.gz


--------------------------------------------------------------------------------
/crdt-testdata/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "crdt-testdata"
 3 | version = "0.0.0"
 4 | authors = ["Seph Gentle <me@josephg.com>"]
 5 | edition = "2021"
 6 | 
 7 | [dependencies]
 8 | flate2 = { version = "1.0.22", features = ["zlib-ng-compat"], default-features = false }
 9 | serde = { version = "1.0.136", features = ["derive"] }
10 | serde_json = "1.0.79"
11 | ropey = "1.6.0"


--------------------------------------------------------------------------------
/crdt-testdata/src/lib.rs:
--------------------------------------------------------------------------------
  1 | // use std::time::SystemTime;
  2 | use std::fs::File;
  3 | use std::io::{BufReader, Read};
  4 | use flate2::bufread::GzDecoder;
  5 | use serde::Deserialize;
  6 | 
  7 | /// This file contains some simple helpers for loading test data. Its used by benchmarking and
  8 | /// testing code.
  9 | 
 10 | /// (position, delete length, insert content).
 11 | #[derive(Debug, Clone, Deserialize, Eq, PartialEq)]
 12 | pub struct TestPatch(pub usize, pub usize, pub String);
 13 | 
 14 | #[derive(Debug, Clone, Deserialize, Eq, PartialEq)]
 15 | pub struct TestTxn {
 16 |     // time: String, // ISO String. Unused.
 17 |     pub patches: Vec<TestPatch>
 18 | }
 19 | 
 20 | #[derive(Debug, Clone, Deserialize, Eq, PartialEq)]
 21 | pub struct TestData {
 22 |     #[serde(default)]
 23 |     pub using_byte_positions: bool,
 24 | 
 25 |     #[serde(rename = "startContent")]
 26 |     pub start_content: String,
 27 |     #[serde(rename = "endContent")]
 28 |     pub end_content: String,
 29 | 
 30 |     pub txns: Vec<TestTxn>,
 31 | }
 32 | 
 33 | impl TestData {
 34 |     pub fn len(&self) -> usize {
 35 |         self.txns.iter()
 36 |             .map(|txn| { txn.patches.len() })
 37 |             .sum::<usize>()
 38 |     }
 39 | 
 40 |     pub fn is_empty(&self) -> bool {
 41 |         !self.txns.iter().any(|txn| !txn.patches.is_empty())
 42 |     }
 43 | 
 44 |     /// This method returns a clone of the testing data using byte offsets instead of codepoint
 45 |     /// indexes.
 46 |     pub fn chars_to_bytes(&self) -> Self {
 47 |         assert_eq!(false, self.using_byte_positions);
 48 | 
 49 |         let mut r = ropey::Rope::new();
 50 | 
 51 |         Self {
 52 |             using_byte_positions: true,
 53 |             start_content: self.start_content.clone(),
 54 |             end_content: self.end_content.clone(),
 55 |             txns: self.txns.iter().map(|txn| {
 56 |                 TestTxn {
 57 |                     patches: txn.patches.iter().map(|TestPatch(pos_chars, del_chars, ins)| {
 58 |                         let pos_bytes = r.char_to_byte(*pos_chars);
 59 |                         // if *pos_chars != pos_bytes {
 60 |                         //     println!("Converted position {} to {}", *pos_chars, pos_bytes);
 61 |                         // }
 62 |                         let del_bytes = if *del_chars > 0 {
 63 |                             let del_end_bytes = r.char_to_byte(pos_chars + *del_chars);
 64 |                             r.remove(*pos_chars..*pos_chars + *del_chars);
 65 |                             del_end_bytes - pos_bytes
 66 |                         } else { 0 };
 67 |                         if !ins.is_empty() { r.insert(*pos_chars, ins); }
 68 | 
 69 |                         TestPatch(pos_bytes, del_bytes, ins.clone())
 70 |                     }).collect(),
 71 |                 }
 72 |             }).collect()
 73 |         }
 74 |     }
 75 | 
 76 |     pub fn patches(&self) -> impl Iterator<Item=&TestPatch> {
 77 |         self.txns.iter().flat_map(|txn| txn.patches.iter())
 78 |     }
 79 | }
 80 | 
 81 | // TODO: Make a try_ version of this method, which returns an appropriate Error object.
 82 | pub fn load_testing_data(filename: &str) -> TestData {
 83 |     // let start = SystemTime::now();
 84 |     // let mut file = File::open("benchmark_data/automerge-paper.json.gz").unwrap();
 85 |     let file = File::open(filename).unwrap();
 86 | 
 87 |     let reader = BufReader::new(file);
 88 |     // We could pass the GzDecoder straight to serde, but it makes it way slower to parse for
 89 |     // some reason.
 90 |     let mut reader = GzDecoder::new(reader);
 91 |     let mut raw_json = vec!();
 92 |     reader.read_to_end(&mut raw_json).unwrap();
 93 | 
 94 |     // println!("uncompress time {}", start.elapsed().unwrap().as_millis());
 95 | 
 96 |     // let start = SystemTime::now();
 97 |     let data: TestData = serde_json::from_reader(raw_json.as_slice()).unwrap();
 98 |     // println!("JSON parse time {}", start.elapsed().unwrap().as_millis());
 99 | 
100 |     data
101 | }
102 | 
103 | #[cfg(test)]
104 | mod tests {
105 |     use crate::{load_testing_data, TestData, TestPatch, TestTxn};
106 | 
107 |     #[test]
108 |     fn it_works() {
109 |         let data = load_testing_data("../benchmark_data/sveltecomponent.json.gz");
110 |         assert!(data.txns.len() > 0);
111 |     }
112 | 
113 |     #[test]
114 |     fn convert_chars_to_bytes() {
115 |         let data = TestData {
116 |             using_byte_positions: false,
117 |             start_content: "".to_string(),
118 |             end_content: "".to_string(),
119 |             txns: vec![
120 |                 TestTxn {
121 |                     patches: vec![
122 |                         TestPatch(0, 0, "ツ".into()),
123 |                         TestPatch(1, 0, "x".into()),
124 |                         TestPatch(1, 1, "".into()),
125 |                         TestPatch(0, 1, "".into()),
126 |                     ],
127 |                 }
128 |             ],
129 |         };
130 | 
131 |         // let data = load_testing_data("../benchmark_data/seph-blog1.json.gz");
132 |         // let data = load_testing_data("../benchmark_data/sveltecomponent.json.gz");
133 |         let data2 = data.chars_to_bytes();
134 |         dbg!(&data2);
135 | 
136 |         assert_eq!(data2, TestData {
137 |             using_byte_positions: true,
138 |             start_content: "".to_string(),
139 |             end_content: "".to_string(),
140 |             txns: vec![
141 |                 TestTxn {
142 |                     patches: vec![
143 |                         // Positions have changed!
144 |                         TestPatch(0, 0, "ツ".into()),
145 |                         TestPatch(3, 0, "x".into()),
146 |                         TestPatch(3, 1, "".into()),
147 |                         TestPatch(0, 3, "".into()),
148 |                     ],
149 |                 }
150 |             ],
151 |         });
152 | 
153 |         // dbg!(&data2);
154 | 
155 |         for (p1, p2) in data.patches().zip(data2.patches()) {
156 |             // assert_eq!(p1.1, p2.1);
157 |             assert_eq!(p1.2, p2.2);
158 |             if p1.1 != p2.1 {
159 |                 println!("{} / {} ({} {})", p1.0, p2.0, p1.1, p1.2);
160 |             }
161 | 
162 |             // if p1.2.chars().count() != p1.2.len() {
163 |             //     println!("unicode! {}", p1.2);
164 |             // }
165 |         }
166 |     }
167 | }
168 | 


--------------------------------------------------------------------------------
/jumprope-wasm/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "jumprope-wasm"
 3 | version = "0.1.0"
 4 | edition = "2018"
 5 | 
 6 | [lib]
 7 | crate-type = ["cdylib", "rlib"]
 8 | 
 9 | [features]
10 | default = ["ddos_protection", "jumprope/wchar_conversion"]
11 | #default = ["ddos_protection"]
12 | #default = ["jumprope/wchar_conversion"]
13 | ddos_protection = ["jumprope/ddos_protection"]
14 | 
15 | [dependencies]
16 | wasm-bindgen = "0.2"
17 | 
18 | # Disable default-features to remove obsessive ddos protection by default to
19 | # reduce code size. (42kb -> 33kb). SmallRng is still plenty secure for most
20 | # applications.
21 | jumprope = { path = "..", default-features = false }
22 | 
23 | # Needed for rand in wasm, regardless of whether entropy is used.
24 | getrandom = { version = "0.2.3", features = ["js"] }
25 | 
26 | # The `console_error_panic_hook` crate provides better debugging of panics by
27 | # logging them with `console.error`. This is great for development, but requires
28 | # all the `std::fmt` and `std::panicking` infrastructure, so isn't great for
29 | # code size when deploying.
30 | #console_error_panic_hook = { version = "0.1.6", optional = true }
31 | 


--------------------------------------------------------------------------------
/jumprope-wasm/build_wasm.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | 
 3 | RUSTFLAGS=""
 4 | #cd crates/diamond-wasm
 5 | 
 6 | echo "=== Before ==="
 7 | ls -l pkg
 8 | echo "=== After ==="
 9 | wasm-pack build --target web
10 | 
11 | brotli -f pkg/*.wasm
12 | ls -l pkg
13 | 


--------------------------------------------------------------------------------
/jumprope-wasm/src/lib.rs:
--------------------------------------------------------------------------------
 1 | use wasm_bindgen::prelude::*;
 2 | use jumprope::JumpRope;
 3 | 
 4 | #[wasm_bindgen]
 5 | pub struct Rope(JumpRope);
 6 | 
 7 | #[wasm_bindgen]
 8 | impl Rope {
 9 |     /// Create a new rope, optionally with initial content.
10 |     #[wasm_bindgen(constructor)]
11 |     pub fn new(s: Option<String>) -> Self {
12 |         // Can't use Option<&str> in wasm-bindgen for some reason. It doesn't matter much -
13 |         // the passed string will be heap allocated anyway.
14 | 
15 |         let mut r = if cfg!(feature = "ddos_protection") {
16 |             // Generating a rope from entropy adds 5kb to the binary size.
17 |             JumpRope::new()
18 |         } else {
19 |             JumpRope::new_from_seed(321)
20 |         };
21 |         if let Some(str) = s {
22 |             r.insert(0, &str);
23 |         }
24 |         Self(r)
25 |     }
26 | 
27 |     #[wasm_bindgen]
28 |     pub fn from(s: String) -> Self {
29 |         Self::new(Some(s))
30 |     }
31 | 
32 |     /// Insert new content at the specified position.
33 |     #[wasm_bindgen]
34 |     pub fn insert(&mut self, pos: usize, content: &str) {
35 |         self.0.insert(pos, content);
36 |     }
37 | 
38 |     /// Remove (splice out) rope content of length del_len at the specified position.
39 |     #[wasm_bindgen]
40 |     pub fn remove(&mut self, pos: usize, del_len: usize) {
41 |         self.0.remove(pos..pos+del_len);
42 |     }
43 | 
44 |     #[wasm_bindgen(js_name=toString)]
45 |     pub fn as_string(&self) -> String {
46 |         self.0.to_string()
47 |     }
48 | 
49 |     #[wasm_bindgen(getter)]
50 |     pub fn length(&self) -> usize {
51 |         self.0.len_chars()
52 |     }
53 | }
54 | 
55 | #[cfg(test)]
56 | mod tests {
57 |     use crate::Rope;
58 | 
59 |     #[test]
60 |     fn smoke_test() {
61 |         let mut r: Rope = Rope::new(None);
62 |         assert_eq!(r.as_string(), "");
63 |         r.insert(0, "hi there");
64 |         assert_eq!(r.as_string(), "hi there");
65 |         r.remove(2, 4);
66 |         assert_eq!(r.as_string(), "hire");
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/rope_benches/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | authors = ["Joseph Gentle <me@josephg.com>"]
 3 | edition = "2018"
 4 | name = "rope_benches"
 5 | version = "0.1.0"
 6 | 
 7 | [build-dependencies]
 8 | cc = "1.0"
 9 | 
10 | [dependencies]
11 | criterion = "0.3"
12 | jumprope = { path = ".." }
13 | rand = { version = "0.8", features = ["small_rng"] }
14 | ropey = "1.6.0"
15 | xi-rope = "0.3.0"
16 | an-rope = { version = "0.3.1", features = ["tendril"] }
17 | crdt-testdata = { path = "../crdt-testdata" }
18 | #crop = { path = "../../../3rdparty/crop" }
19 | crop = { git = "https://github.com/noib3/crop" }
20 | 


--------------------------------------------------------------------------------
/rope_benches/README.md:
--------------------------------------------------------------------------------
 1 | # Rust rope benchmarks
 2 | 
 3 | This is a small collection of benchmarks of various rope implementations in rust.
 4 | 
 5 | I'm comparing:
 6 | 
 7 | - Jumprope (this library)
 8 | - The [C version of this rope library](https://github.com/josephg/librope)
 9 | - [ropey](https://crates.io/crates/ropey/)
10 | - [xi-rope](https://crates.io/crates/xi-rope)
11 | - [an-rope](https://crates.io/crates/an-rope)
12 | 
13 | To run the benchmarks, navigate into this directory and run:
14 | 
15 | ```
16 | cargo run --release -- --bench
17 | ```
18 | 
19 | This will produce a report in *target/criterion/report/index.html*.
20 | 
21 | Current benchmark results are published [here](https://home.seph.codes/public/c4/report/)


--------------------------------------------------------------------------------
/rope_benches/build.rs:
--------------------------------------------------------------------------------
1 | extern crate cc;
2 | 
3 | fn main() {
4 |     cc::Build::new()
5 |         .file("rope.c")
6 |         .compile("librope");
7 | }


--------------------------------------------------------------------------------
/rope_benches/explore_parameters.js:
--------------------------------------------------------------------------------
  1 | const asciichart = require('asciichart')
  2 | const fs = require('fs')
  3 | const {spawnSync} = require('child_process')
  4 | 
  5 | const gmean = list => (
  6 |   Math.pow(list.reduce((a, b) => a*b, 1), 1/list.length)
  7 | )
  8 | 
  9 | const names = ["automerge-paper", "rustcode", "sveltecomponent", "seph-blog1"]
 10 | const getScore = () => {
 11 | 
 12 |   const data = names.map(name => {
 13 |     const est_file = `../target/criterion/realworld/JumpRope/${name}/new/estimates.json`
 14 |     const estimates = JSON.parse(fs.readFileSync(est_file, 'utf8'))
 15 | 
 16 |     const bench_file = `../target/criterion/realworld/JumpRope/${name}/new/benchmark.json`
 17 |     const elements = JSON.parse(fs.readFileSync(bench_file, 'utf8')).throughput.Elements
 18 | 
 19 |     return elements / (estimates.mean.point_estimate / 1e9)
 20 |   })
 21 |   // console.log(data)
 22 |   return data
 23 | }
 24 | 
 25 | const setSize = size => {
 26 | //   fs.writeFileSync('../src/params.rs', `
 27 | // pub const XX_SIZE: usize = 380;
 28 | // pub const XX_BIAS: u8 = ${size};
 29 | // `)
 30 |   fs.writeFileSync('../src/params.rs', `
 31 | pub const XX_SIZE: usize = ${size};
 32 | pub const XX_BIAS: u8 = 65;
 33 | `)
 34 | }
 35 | 
 36 | // const cmd = 'cargo build --release && sleep 3 && taskset 0x1 nice -10 cargo run --release -- --bench --measurement-time=3 -n realworld/JumpRope/automerge-paper'
 37 | // const cmd = 'cargo build --release && taskset 0x1 nice -10 cargo run --release -- --bench --measurement-time=10 -n realworld/JumpRope/automerge-paper'
 38 | const cmd = 'cargo build --release && taskset 0x1 nice -10 cargo run --release -- --bench --measurement-time=20 -n realworld/JumpRope'
 39 | const bench = () => {
 40 |   spawnSync(cmd, {
 41 |     shell: true,
 42 |     stdio: 'inherit',
 43 |   })
 44 | }
 45 | 
 46 | // setSize(100)
 47 | 
 48 | const scores = {}
 49 | // The first row is the sizes. second row contains mean. Then results.
 50 | const scores_arr = new Array(names.length + 2).fill().map(() => [])
 51 | 
 52 | const run = size => {
 53 |   setSize(size)
 54 |   bench()
 55 |   const vals = getScore()
 56 |   const gm = gmean(vals)
 57 |   scores[size] = gm
 58 |   scores_arr[0].push(size)
 59 |   scores_arr[1].push(gm)
 60 |   for (let i = 0; i < vals.length; i++) {
 61 |     scores_arr[i+2].push(vals[i])
 62 |   }
 63 | 
 64 |   console.log(`Registered ${size} => ${gm} (${gm / 1e6})`)
 65 | }
 66 | 
 67 | // for (let s = 50; s <= 80; s += 5) {
 68 | //   run(s)
 69 | // }
 70 | for (let s = 380; s <= 400; s += 4) {
 71 |   // console.log(s)
 72 |   run(s)
 73 | }
 74 | // for (let s = 300; s <= 400; s += 20) {
 75 | //   run(s)
 76 | // }
 77 | console.table(scores)
 78 | 
 79 | // run(200)
 80 | // console.log(getScore())
 81 | 
 82 | 
 83 | const pad = arr => {
 84 |   let num = Math.round(80 / (arr.length-1))
 85 |   const result = [arr[0]]
 86 |   for (let i = 1; i < arr.length; i++) {
 87 |     let prev = arr[i-1]
 88 |     let next = arr[i]
 89 | 
 90 |     for (let j = 1; j <= num; j++) {
 91 |       let weight = j/num
 92 |       result.push(next * weight + prev * (1-weight))
 93 |     }
 94 |   }
 95 |   return result
 96 | }
 97 | 
 98 | const drawChart = scores_arr => {
 99 |   console.log(asciichart.plot(scores_arr.slice(1).map(pad), {
100 |     colors: [
101 |       asciichart.white,
102 |       asciichart.blue, asciichart.green, asciichart.red, asciichart.yellow
103 |     ],
104 |     height: 50,
105 |   }))
106 | }
107 | 
108 | // drawChart(JSON.parse(fs.readFileSync('data.json', 'utf8')))
109 | 
110 | drawChart(scores_arr)
111 | 
112 | // console.log(asciichart.plot(pad([0, 2, 3]), {
113 | //   colors: [asciichart.blue, asciichart.green, asciichart.red, asciichart.yellow],
114 | //   height: 20,
115 | // }))
116 | 
117 | fs.writeFileSync('data.json', JSON.stringify(scores_arr))
118 | console.log('data written to data.json')


--------------------------------------------------------------------------------
/rope_benches/rope.c:
--------------------------------------------------------------------------------
  1 | // Implementation for rope library.
  2 | 
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <sys/types.h>
  6 | 
  7 | // Needed for VC++, which always compiles in C++ mode and doesn't have stdbool.
  8 | #ifndef __cplusplus
  9 | #include <stdbool.h>
 10 | #endif
 11 | 
 12 | #include <assert.h>
 13 | #include "rope.h"
 14 | 
 15 | // The number of bytes the rope head structure takes up
 16 | static const size_t ROPE_SIZE = sizeof(rope) + sizeof(rope_node) * ROPE_MAX_HEIGHT;
 17 | 
 18 | // Create a new rope with no contents
 19 | rope *rope_new2(void *(*alloc)(size_t bytes),
 20 |                 void *(*realloc)(void *ptr, size_t newsize),
 21 |                 void (*free)(void *ptr)) {
 22 |   rope *r = (rope *)alloc(ROPE_SIZE);
 23 |   r->num_chars = r->num_bytes = 0;
 24 | 
 25 |   r->alloc = alloc;
 26 |   r->realloc = realloc;
 27 |   r->free = free;
 28 | 
 29 |   r->head.height = 1;
 30 |   r->head.num_bytes = 0;
 31 |   r->head.nexts[0].node = NULL;
 32 |   r->head.nexts[0].skip_chars = 0;
 33 | #if ROPE_WCHAR
 34 |   r->head.nexts[0].wchar_size = 0;
 35 | #endif
 36 |   return r;
 37 | }
 38 | 
 39 | rope *rope_new() {
 40 |   return rope_new2(malloc, realloc, free);
 41 | }
 42 | 
 43 | // Create a new rope containing the specified string
 44 | rope *rope_new_with_utf8(const uint8_t *str) {
 45 |   rope *r = rope_new();
 46 |   ROPE_RESULT result = rope_insert(r, 0, str);
 47 | 
 48 |   if (result != ROPE_OK) {
 49 |     rope_free(r);
 50 |     return NULL;
 51 |   } else {
 52 |     return r;
 53 |   }
 54 | }
 55 | 
 56 | rope *rope_copy(const rope *other) {
 57 |   rope *r = (rope *)other->alloc(ROPE_SIZE);
 58 | 
 59 |   // Just copy most of the head's data. Note this won't copy the nexts list in head.
 60 |   *r = *other;
 61 | 
 62 |   rope_node *nodes[ROPE_MAX_HEIGHT];
 63 | 
 64 |   for (int i = 0; i < other->head.height; i++) {
 65 |     nodes[i] = &r->head;
 66 |     // non-NULL next pointers will be rewritten below.
 67 |     r->head.nexts[i] = other->head.nexts[i];
 68 |   }
 69 | 
 70 |   for (rope_node *n = other->head.nexts[0].node; n != NULL; n = n->nexts[0].node) {
 71 |     // I wonder if it would be faster if we took this opportunity to rebalance the node list..?
 72 |     size_t h = n->height;
 73 |     rope_node *n2 = (rope_node *)r->alloc(sizeof(rope_node) + h * sizeof(rope_skip_node));
 74 | 
 75 |     // Would it be faster to just *n2 = *n; ?
 76 |     n2->num_bytes = n->num_bytes;
 77 |     n2->height = h;
 78 |     memcpy(n2->str, n->str, n->num_bytes);
 79 |     memcpy(n2->nexts, n->nexts, h * sizeof(rope_skip_node));
 80 | 
 81 |     for (size_t i = 0; i < h; i++) {
 82 |       nodes[i]->nexts[i].node = n2;
 83 |       nodes[i] = n2;
 84 |     }
 85 |   }
 86 | 
 87 |   return r;
 88 | }
 89 | 
 90 | // Free the specified rope
 91 | void rope_free(rope *r) {
 92 |   assert(r);
 93 |   rope_node *next;
 94 | 
 95 |   for (rope_node *n = r->head.nexts[0].node; n != NULL; n = next) {
 96 |     next = n->nexts[0].node;
 97 |     r->free(n);
 98 |   }
 99 | 
100 |   r->free(r);
101 | }
102 | 
103 | // Get the number of characters in a rope
104 | size_t rope_char_count(const rope *r) {
105 |   assert(r);
106 |   return r->num_chars;
107 | }
108 | 
109 | // Get the number of bytes which the rope would take up if stored as a utf8
110 | // string
111 | size_t rope_byte_count(const rope *r) {
112 |   assert(r);
113 |   return r->num_bytes;
114 | }
115 | 
116 | // Copies the rope's contents into a utf8 encoded C string. Also copies a trailing '\0' character.
117 | // Returns the number of bytes written, which is rope_byte_count(r) + 1.
118 | size_t rope_write_cstr(rope *r, uint8_t *dest) {
119 |   size_t num_bytes = rope_byte_count(r);
120 |   dest[num_bytes] = '\0';
121 | 
122 |   if (num_bytes) {
123 |     uint8_t *p = dest;
124 |     for (rope_node* restrict n = &r->head; n != NULL; n = n->nexts[0].node) {
125 |       memcpy(p, n->str, n->num_bytes);
126 |       p += n->num_bytes;
127 |     }
128 | 
129 |     assert(p == &dest[num_bytes]);
130 |   }
131 |   return num_bytes + 1;
132 | }
133 | 
134 | // Create a new C string which contains the rope. The string will contain
135 | // the rope encoded as utf8.
136 | uint8_t *rope_create_cstr(rope *r) {
137 |   uint8_t *bytes = (uint8_t *)r->alloc(rope_byte_count(r) + 1); // Room for a zero.
138 |   rope_write_cstr(r, bytes);
139 |   return bytes;
140 | }
141 | 
142 | #if ROPE_WCHAR
143 | size_t rope_wchar_count(rope *r) {
144 |   assert(r);
145 |   return r->head.nexts[r->head.height - 1].wchar_size;
146 | }
147 | #endif
148 | 
149 | #define MIN(x,y) ((x) > (y) ? (y) : (x))
150 | #define MAX(x,y) ((x) > (y) ? (x) : (y))
151 | 
152 | #ifdef _WIN32
153 | inline static long random() {
154 |   return rand();
155 | }
156 | #endif
157 | 
158 | static uint8_t random_height() {
159 |   // This function is horribly inefficient. I'm throwing away heaps of entropy, and
160 |   // the mod could be replaced by some clever shifting.
161 |   //
162 |   // However, random_height barely appears in the profiler output - so its probably
163 |   // not worth investing the time to optimise.
164 | 
165 |   uint8_t height = 1;
166 | 
167 |   // The root node's height is the height of the largest node + 1, so the largest
168 |   // node can only have ROPE_MAX_HEIGHT - 1.
169 |   while(height < (ROPE_MAX_HEIGHT - 1) && (random() % 100) < ROPE_BIAS) {
170 |     height++;
171 |   }
172 | 
173 |   return height;
174 | }
175 | 
176 | // Figure out how many bytes to allocate for a node with the specified height.
177 | static size_t node_size(uint8_t height) {
178 |   return sizeof(rope_node) + height * sizeof(rope_skip_node);
179 | }
180 | 
181 | // Allocate and return a new node. The new node will be full of junk, except
182 | // for its height.
183 | // This function should be replaced at some point with an object pool based version.
184 | static rope_node *alloc_node(rope *r, uint8_t height) {
185 |   rope_node *node = (rope_node *)r->alloc(node_size(height));
186 |   node->height = height;
187 |   return node;
188 | }
189 | 
190 | // Find out how many bytes the unicode character which starts with the specified byte
191 | // will occupy in memory.
192 | // Returns the number of bytes, or SIZE_MAX if the byte is invalid.
193 | static inline size_t codepoint_size(uint8_t byte) {
194 |   if (byte == 0) { return SIZE_MAX; } // NULL byte.
195 |   else if (byte <= 0x7f) { return 1; } // 0x74 = 0111 1111
196 |   else if (byte <= 0xbf) { return SIZE_MAX; } // 1011 1111. Invalid for a starting byte.
197 |   else if (byte <= 0xdf) { return 2; } // 1101 1111
198 |   else if (byte <= 0xef) { return 3; } // 1110 1111
199 |   else if (byte <= 0xf7) { return 4; } // 1111 0111
200 |   else if (byte <= 0xfb) { return 5; } // 1111 1011
201 |   else if (byte <= 0xfd) { return 6; } // 1111 1101
202 |   else { return SIZE_MAX; }
203 | }
204 | 
205 | // This little function counts how many bytes a certain number of characters take up.
206 | static size_t count_bytes_in_utf8(const uint8_t *str, size_t num_chars) {
207 |   const uint8_t *p = str;
208 |   for (unsigned int i = 0; i < num_chars; i++) {
209 |     p += codepoint_size(*p);
210 |   }
211 |   return p - str;
212 | }
213 | 
214 | #if ROPE_WCHAR
215 | 
216 | #define NEEDS_TWO_WCHARS(x) (((x) & 0xf0) == 0xf0)
217 | 
218 | static size_t count_wchars_in_utf8(const uint8_t *str, size_t num_chars) {
219 |   size_t wchars = 0;
220 |   for (unsigned int i = 0; i < num_chars; i++) {
221 |     wchars += 1 + NEEDS_TWO_WCHARS(*str);
222 |     str += codepoint_size(*str);
223 |   }
224 |   return wchars;
225 | }
226 | 
227 | static size_t count_utf8_in_wchars(const uint8_t *str, size_t num_wchars) {
228 |   size_t chars = num_wchars;
229 |   for (unsigned int i = 0; i < num_wchars; i++) {
230 |     if (NEEDS_TWO_WCHARS(*str)) {
231 |       chars--;
232 |       i++;
233 |     }
234 |     str += codepoint_size(*str);
235 |   }
236 |   return chars;
237 | }
238 | #endif
239 | 
240 | // Count the number of characters in a string.
241 | static size_t strlen_utf8(const uint8_t *str) {
242 |   const uint8_t *p = str;
243 |   size_t i = 0;
244 |   while (*p) {
245 |     p += codepoint_size(*p);
246 |     i++;
247 |   }
248 |   return i;
249 | }
250 | 
251 | // Checks if a UTF8 string is ok. Returns the number of bytes in the string if
252 | // it is ok, otherwise returns -1.
253 | static ssize_t bytelen_and_check_utf8(const uint8_t *str) {
254 |   const uint8_t *p = str;
255 |   while (*p != '\0') {
256 |     size_t size = codepoint_size(*p);
257 |     if (size == SIZE_MAX) return -1;
258 |     p++; size--;
259 |     while (size > 0) {
260 |       // Check that any middle bytes are of the form 0x10xx xxxx
261 |       if ((*p & 0xc0) != 0x80)
262 |         return -1;
263 |       p++; size--;
264 |     }
265 |   }
266 | 
267 | #ifdef DEBUG
268 |   size_t num = p - str;
269 |   assert(num == strlen((char *)str));
270 | #endif
271 | 
272 |   return p - str;
273 | }
274 | 
275 | typedef struct {
276 |   // This stores the previous node at each height, and the number of characters from the start of
277 |   // the previous node to the current iterator position.
278 |   rope_skip_node s[ROPE_MAX_HEIGHT];
279 | } rope_iter;
280 | 
281 | // Internal function for navigating to a particular character offset in the rope.
282 | // The function returns the list of nodes which point past the position, as well as
283 | // offsets of how far into their character lists the specified characters are.
284 | static rope_node *iter_at_char_pos(rope *r, size_t char_pos, rope_iter *iter) {
285 |   assert(char_pos <= r->num_chars);
286 | 
287 |   rope_node *e = &r->head;
288 |   int height = r->head.height - 1;
289 | 
290 |   // Offset stores how many characters we still need to skip in the current node.
291 |   size_t offset = char_pos;
292 |   size_t skip;
293 | #if ROPE_WCHAR
294 |   size_t wchar_pos = 0; // Current wchar pos from the start of the rope.
295 | #endif
296 | 
297 |   while (true) {
298 |     skip = e->nexts[height].skip_chars;
299 |     if (offset > skip) {
300 |       // Go right.
301 |       assert(e == &r->head || e->num_bytes);
302 | 
303 |       offset -= skip;
304 | #if ROPE_WCHAR
305 |       wchar_pos += e->nexts[height].wchar_size;
306 | #endif
307 |       e = e->nexts[height].node;
308 |     } else {
309 |       // Go down.
310 |       iter->s[height].skip_chars = offset;
311 |       iter->s[height].node = e;
312 | #if ROPE_WCHAR
313 |       iter->s[height].wchar_size = wchar_pos;
314 | #endif
315 | 
316 |       if (height == 0) {
317 |         break;
318 |       } else {
319 |         height--;
320 |       }
321 |     }
322 |   }
323 | 
324 | #if ROPE_WCHAR
325 |   // For some reason, this is _REALLY SLOW_. Like, 5.5Mops/s -> 4Mops/s from this block of code.
326 |   wchar_pos += count_wchars_in_utf8(e->str, offset);
327 | 
328 |   // The iterator has the wchar pos from the start of the whole string.
329 |   for (int i = 0; i < r->head.height; i++) {
330 |     iter->s[i].wchar_size = wchar_pos - iter->s[i].wchar_size;
331 |   }
332 | #endif
333 | 
334 |   assert(offset <= ROPE_NODE_STR_SIZE);
335 |   assert(iter->s[0].node == e);
336 |   return e;
337 | }
338 | 
339 | #if ROPE_WCHAR
340 | // Equivalent of iter_at_char_pos, but for wchar positions instead.
341 | static rope_node *iter_at_wchar_pos(rope *r, size_t wchar_pos, rope_iter *iter) {
342 |   int height = r->head.height - 1;
343 |   assert(wchar_pos <= r->head.nexts[height].wchar_size);
344 | 
345 |   rope_node *e = &r->head;
346 | 
347 |   // Offset stores how many wchar characters we still need to skip in the current node.
348 |   size_t offset = wchar_pos;
349 |   size_t skip;
350 |   size_t char_pos = 0; // Current char pos from the start of the rope.
351 | 
352 |   while (true) {
353 |     skip = e->nexts[height].wchar_size;
354 |     if (offset > skip) {
355 |       // Go right.
356 |       offset -= skip;
357 |       char_pos += e->nexts[height].skip_chars;
358 |       e = e->nexts[height].node;
359 |     } else {
360 |       // Go down.
361 |       iter->s[height].skip_chars = char_pos;
362 |       iter->s[height].node = e;
363 |       iter->s[height].wchar_size = offset;
364 | 
365 |       if (height == 0) {
366 |         break;
367 |       } else {
368 |         height--;
369 |       }
370 |     }
371 |   }
372 | 
373 |   char_pos += count_utf8_in_wchars(e->str, offset);
374 | 
375 |   // The iterator has character positions from the start of the rope to the start of the node.
376 |   for (int i = 0; i < r->head.height; i++) {
377 |     iter->s[i].skip_chars = char_pos - iter->s[i].skip_chars;
378 |   }
379 |   assert(e == iter->s[0].node);
380 |   return e;
381 | }
382 | #endif
383 | 
384 | #if ROPE_WCHAR
385 | static void update_offset_list(rope *r, rope_iter *iter, size_t num_chars, size_t num_wchars) {
386 |   for (int i = 0; i < r->head.height; i++) {
387 |     iter->s[i].node->nexts[i].skip_chars += num_chars;
388 |     iter->s[i].node->nexts[i].wchar_size += num_wchars;
389 |   }
390 | }
391 | #else
392 | static void update_offset_list(rope *r, rope_iter *iter, size_t num_chars) {
393 |   for (int i = 0; i < r->head.height; i++) {
394 |     iter->s[i].node->nexts[i].skip_chars += num_chars;
395 |   }
396 | }
397 | #endif
398 | 
399 | 
400 | // Internal method of rope_insert.
401 | // This function creates a new node in the rope at the specified position and fills it with the
402 | // passed string.
403 | static void insert_at(rope *r, rope_iter *iter,
404 |     const uint8_t *str, size_t num_bytes, size_t num_chars) {
405 | #if ROPE_WCHAR
406 |   size_t num_wchars = count_wchars_in_utf8(str, num_chars);
407 | #endif
408 | 
409 |   // This describes how many levels of the iter are filled in.
410 |   uint8_t max_height = r->head.height;
411 |   uint8_t new_height = random_height();
412 |   rope_node *new_node = alloc_node(r, new_height);
413 |   new_node->num_bytes = num_bytes;
414 |   memcpy(new_node->str, str, num_bytes);
415 | 
416 |   assert(new_height < ROPE_MAX_HEIGHT);
417 | 
418 |   // Max height (the rope's head's height) must be 1+ the height of the largest node.
419 |   while (max_height <= new_height) {
420 |     r->head.height++;
421 |     r->head.nexts[max_height] = r->head.nexts[max_height - 1];
422 | 
423 |     // This is the position (offset from the start) of the rope.
424 |     iter->s[max_height] = iter->s[max_height - 1];
425 |     max_height++;
426 |   }
427 | 
428 |   // Fill in the new node's nexts array.
429 |   int i;
430 |   for (i = 0; i < new_height; i++) {
431 |     rope_skip_node *prev_skip = &iter->s[i].node->nexts[i];
432 |     new_node->nexts[i].node = prev_skip->node;
433 |     new_node->nexts[i].skip_chars = num_chars + prev_skip->skip_chars - iter->s[i].skip_chars;
434 | 
435 | 
436 |     prev_skip->node = new_node;
437 |     prev_skip->skip_chars = iter->s[i].skip_chars;
438 | 
439 |     // & move the iterator to the end of the newly inserted node.
440 |     iter->s[i].node = new_node;
441 |     iter->s[i].skip_chars = num_chars;
442 | #if ROPE_WCHAR
443 |     new_node->nexts[i].wchar_size = num_wchars + prev_skip->wchar_size - iter->s[i].wchar_size;
444 |     prev_skip->wchar_size = iter->s[i].wchar_size;
445 |     iter->s[i].wchar_size = num_wchars;
446 | #endif
447 |   }
448 | 
449 |   for (; i < max_height; i++) {
450 |     iter->s[i].node->nexts[i].skip_chars += num_chars;
451 |     iter->s[i].skip_chars += num_chars;
452 | #if ROPE_WCHAR
453 |     iter->s[i].node->nexts[i].wchar_size += num_wchars;
454 |     iter->s[i].wchar_size += num_wchars;
455 | #endif
456 |   }
457 | 
458 |   r->num_chars += num_chars;
459 |   r->num_bytes += num_bytes;
460 | }
461 | 
462 | // Insert the given utf8 string into the rope at the specified position.
463 | static ROPE_RESULT rope_insert_at_iter(rope *r, rope_node *e, rope_iter *iter, const uint8_t *str) {
464 |   // iter.offset contains how far (in characters) into the current element to skip.
465 |   // Figure out how much that is in bytes.
466 |   size_t offset_bytes = 0;
467 |   // The insertion offset into the destination node.
468 |   size_t offset = iter->s[0].skip_chars;
469 |   if (offset) {
470 |     assert(offset <= e->nexts[0].skip_chars);
471 |     offset_bytes = count_bytes_in_utf8(e->str, offset);
472 |   }
473 | 
474 |   // We might be able to insert the new data into the current node, depending on
475 |   // how big it is. We'll count the bytes, and also check that its valid utf8.
476 |   ssize_t num_inserted_bytes = bytelen_and_check_utf8(str);
477 |   if (num_inserted_bytes == -1) return ROPE_INVALID_UTF8;
478 | 
479 |   // Can we insert into the current node?
480 |   bool insert_here = e->num_bytes + num_inserted_bytes <= ROPE_NODE_STR_SIZE;
481 | 
482 |   // Can we insert into the subsequent node?
483 |   rope_node *next = NULL;
484 |   if (!insert_here && offset_bytes == e->num_bytes) {
485 |     next = e->nexts[0].node;
486 |     // We can insert into the subsequent node if:
487 |     // - We can't insert into the current node
488 |     // - There _is_ a next node to insert into
489 |     // - The insert would be at the start of the next node
490 |     // - There's room in the next node
491 |     if (next && next->num_bytes + num_inserted_bytes <= ROPE_NODE_STR_SIZE) {
492 |       offset = offset_bytes = 0;
493 |       for (int i = 0; i < next->height; i++) {
494 |         iter->s[i].node = next;
495 |         // tree offset nodes will not be used.
496 |       }
497 |       e = next;
498 | 
499 |       insert_here = true;
500 |     }
501 |   }
502 | 
503 |   if (insert_here) {
504 |     // First move the current bytes later on in the string.
505 |     if (offset_bytes < e->num_bytes) {
506 |       memmove(&e->str[offset_bytes + num_inserted_bytes],
507 |               &e->str[offset_bytes],
508 |               e->num_bytes - offset_bytes);
509 |     }
510 | 
511 |     // Then copy in the string bytes
512 |     memcpy(&e->str[offset_bytes], str, num_inserted_bytes);
513 |     e->num_bytes += num_inserted_bytes;
514 | 
515 |     r->num_bytes += num_inserted_bytes;
516 |     size_t num_inserted_chars = strlen_utf8(str);
517 |     r->num_chars += num_inserted_chars;
518 | 
519 |     // .... aaaand update all the offset amounts.
520 | #if ROPE_WCHAR
521 |     size_t num_inserted_wchars = count_wchars_in_utf8(str, num_inserted_chars);
522 |     update_offset_list(r, iter, num_inserted_chars, num_inserted_wchars);
523 | #else
524 |     update_offset_list(r, iter, num_inserted_chars);
525 | #endif
526 | 
527 |   } else {
528 |     // There isn't room. We'll need to add at least one new node to the rope.
529 | 
530 |     // If we're not at the end of the current node, we'll need to remove
531 |     // the end of the current node's data and reinsert it later.
532 |     size_t num_end_chars = 0, num_end_bytes = e->num_bytes - offset_bytes;
533 |     if (num_end_bytes) {
534 |       // We'll pretend like the character have been deleted from the node, while leaving
535 |       // the bytes themselves there (for later).
536 |       e->num_bytes = offset_bytes;
537 |       num_end_chars = e->nexts[0].skip_chars - offset;
538 | #if ROPE_WCHAR
539 |       size_t num_end_wchars = count_wchars_in_utf8(&e->str[offset_bytes], num_end_chars);
540 |       update_offset_list(r, iter, -num_end_chars, -num_end_wchars);
541 | #else
542 |       update_offset_list(r, iter, -num_end_chars);
543 | #endif
544 | 
545 |       r->num_chars -= num_end_chars;
546 |       r->num_bytes -= num_end_bytes;
547 |     }
548 | 
549 |     // Now we insert new nodes containing the new character data. The data must be broken into
550 |     // pieces of with a maximum size of ROPE_NODE_STR_SIZE. Node boundaries must not occur in the
551 |     // middle of a utf8 codepoint.
552 |     ssize_t str_offset = 0;
553 |     while (str_offset < num_inserted_bytes) {
554 |       size_t new_node_bytes = 0;
555 |       size_t new_node_chars = 0;
556 | 
557 |       while (str_offset + new_node_bytes < (size_t)num_inserted_bytes) {
558 |         size_t cs = codepoint_size(str[str_offset + new_node_bytes]);
559 |         if (cs + new_node_bytes > ROPE_NODE_STR_SIZE) {
560 |           break;
561 |         } else {
562 |           new_node_bytes += cs;
563 |           new_node_chars++;
564 |         }
565 |       }
566 | 
567 |       insert_at(r, iter, &str[str_offset], new_node_bytes, new_node_chars);
568 |       str_offset += new_node_bytes;
569 |     }
570 | 
571 |     if (num_end_bytes) {
572 |       insert_at(r, iter, &e->str[offset_bytes], num_end_bytes, num_end_chars);
573 |     }
574 |   }
575 | 
576 |   return ROPE_OK;
577 | }
578 | 
579 | ROPE_RESULT rope_insert(rope *r, size_t pos, const uint8_t *str) {
580 |   assert(r);
581 |   assert(str);
582 | #ifdef DEBUG
583 |   _rope_check(r);
584 | #endif
585 |   pos = MIN(pos, r->num_chars);
586 | 
587 |   rope_iter iter;
588 |   // First we need to search for the node where we'll insert the string.
589 |   rope_node *e = iter_at_char_pos(r, pos, &iter);
590 | 
591 |   ROPE_RESULT result = rope_insert_at_iter(r, e, &iter, str);
592 | 
593 | #ifdef DEBUG
594 |   _rope_check(r);
595 | #endif
596 | 
597 |   return result;
598 | }
599 | 
600 | #if ROPE_WCHAR
601 | // Insert the given utf8 string into the rope at the specified position.
602 | size_t rope_insert_at_wchar(rope *r, size_t wchar_pos, const uint8_t *str) {
603 |   assert(r);
604 |   assert(str);
605 | #ifdef DEBUG
606 |   _rope_check(r);
607 | #endif
608 |   wchar_pos = MIN(wchar_pos, rope_wchar_count(r));
609 | 
610 |   rope_iter iter;
611 |   // First we need to search for the node where we'll insert the string.
612 |   rope_node *e = iter_at_wchar_pos(r, wchar_pos, &iter);
613 |   size_t pos = iter.s[r->head.height - 1].skip_chars;
614 |   rope_insert_at_iter(r, e, &iter, str);
615 | 
616 | #ifdef DEBUG
617 |   _rope_check(r);
618 | #endif
619 |   return pos;
620 | }
621 | 
622 | #endif
623 | 
624 | // Delete num characters at position pos. Deleting past the end of the string
625 | // has no effect.
626 | static void rope_del_at_iter(rope *r, rope_node *e, rope_iter *iter, size_t length) {
627 |   r->num_chars -= length;
628 |   size_t offset = iter->s[0].skip_chars;
629 |   while (length) {
630 |     if (offset == e->nexts[0].skip_chars) {
631 |       // End of the current node. Skip to the start of the next one.
632 |       e = iter->s[0].node->nexts[0].node;
633 |       offset = 0;
634 |     }
635 | 
636 |     size_t num_chars = e->nexts[0].skip_chars;
637 |     size_t removed = MIN(length, num_chars - offset);
638 | #if ROPE_WCHAR
639 |     size_t removed_wchars;
640 | #endif
641 | 
642 |     int i;
643 |     if (removed < num_chars || e == &r->head) {
644 |       // Just trim this node down to size.
645 |       size_t leading_bytes = count_bytes_in_utf8(e->str, offset);
646 |       size_t removed_bytes = count_bytes_in_utf8(&e->str[leading_bytes], removed);
647 |       size_t trailing_bytes = e->num_bytes - leading_bytes - removed_bytes;
648 | #if ROPE_WCHAR
649 |       removed_wchars = count_wchars_in_utf8(&e->str[leading_bytes], removed);
650 | #endif
651 |       if (trailing_bytes) {
652 |         memmove(&e->str[leading_bytes], &e->str[leading_bytes + removed_bytes], trailing_bytes);
653 |       }
654 |       e->num_bytes -= removed_bytes;
655 |       r->num_bytes -= removed_bytes;
656 | 
657 |       for (i = 0; i < e->height; i++) {
658 |         e->nexts[i].skip_chars -= removed;
659 | #if ROPE_WCHAR
660 |         e->nexts[i].wchar_size -= removed_wchars;
661 | #endif
662 |       }
663 |     } else {
664 |       // Remove the node from the list
665 | #if ROPE_WCHAR
666 |       removed_wchars = e->nexts[0].wchar_size;
667 | #endif
668 |       for (i = 0; i < e->height; i++) {
669 |         iter->s[i].node->nexts[i].node = e->nexts[i].node;
670 |         iter->s[i].node->nexts[i].skip_chars += e->nexts[i].skip_chars - removed;
671 | #if ROPE_WCHAR
672 |         iter->s[i].node->nexts[i].wchar_size += e->nexts[i].wchar_size - removed_wchars;
673 | #endif
674 |       }
675 | 
676 |       r->num_bytes -= e->num_bytes;
677 |       // TODO: Recycle e.
678 |       rope_node *next = e->nexts[0].node;
679 |       r->free(e);
680 |       e = next;
681 |     }
682 | 
683 |     for (; i < r->head.height; i++) {
684 |       iter->s[i].node->nexts[i].skip_chars -= removed;
685 | #if ROPE_WCHAR
686 |       iter->s[i].node->nexts[i].wchar_size -= removed_wchars;
687 | #endif
688 |     }
689 | 
690 |     length -= removed;
691 |   }
692 | }
693 | 
694 | void rope_del(rope *r, size_t pos, size_t length) {
695 | #ifdef DEBUG
696 |   _rope_check(r);
697 | #endif
698 | 
699 |   assert(r);
700 |   pos = MIN(pos, r->num_chars);
701 |   length = MIN(length, r->num_chars - pos);
702 | 
703 |   rope_iter iter;
704 | 
705 |   // Search for the node where we'll insert the string.
706 |   rope_node *e = iter_at_char_pos(r, pos, &iter);
707 | 
708 |   rope_del_at_iter(r, e, &iter, length);
709 | 
710 | #ifdef DEBUG
711 |   _rope_check(r);
712 | #endif
713 | }
714 | 
715 | #if ROPE_WCHAR
716 | size_t rope_del_at_wchar(rope *r, size_t wchar_pos, size_t wchar_num, size_t *char_len_out) {
717 | #ifdef DEBUG
718 |   _rope_check(r);
719 | #endif
720 | 
721 |   assert(r);
722 |   size_t wchar_total = rope_wchar_count(r);
723 |   wchar_pos = MIN(wchar_pos, wchar_total);
724 |   wchar_num = MIN(wchar_num, wchar_total - wchar_pos);
725 | 
726 |   rope_iter iter;
727 | 
728 |   // Search for the node where we'll insert the string.
729 |   rope_node *start = iter_at_wchar_pos(r, wchar_pos, &iter);
730 |   size_t char_pos = iter.s[r->head.height - 1].skip_chars;
731 | 
732 |   rope_iter end_iter;
733 |   int h = r->head.height - 1;
734 |   iter_at_wchar_pos(r, iter.s[h].wchar_size + wchar_num, &end_iter);
735 | 
736 |   size_t char_length = end_iter.s[h].skip_chars - iter.s[h].skip_chars;
737 |   rope_del_at_iter(r, start, &iter, char_length);
738 | 
739 | #ifdef DEBUG
740 |   _rope_check(r);
741 | #endif
742 |   if (char_len_out) {
743 |     *char_len_out = char_length;
744 |   }
745 |   return char_pos;
746 | }
747 | #endif
748 | 
749 | void _rope_check(rope *r) {
750 |   assert(r->head.height); // Even empty ropes have a height of 1.
751 |   assert(r->num_bytes >= r->num_chars);
752 | 
753 |   rope_skip_node skip_over = r->head.nexts[r->head.height - 1];
754 |   assert(skip_over.skip_chars == r->num_chars);
755 |   assert(skip_over.node == NULL);
756 | 
757 |   size_t num_bytes = 0;
758 |   size_t num_chars = 0;
759 | #if ROPE_WCHAR
760 |   size_t num_wchar = 0;
761 | #endif
762 | 
763 |   // The offsets here are used to store the total distance travelled from the start
764 |   // of the rope.
765 |   rope_iter iter = {};
766 |   for (int i = 0; i < r->head.height; i++) {
767 |     iter.s[i].node = &r->head;
768 |   }
769 | 
770 |   for (rope_node *n = &r->head; n != NULL; n = n->nexts[0].node) {
771 |     assert(n == &r->head || n->num_bytes);
772 |     assert(n->height <= ROPE_MAX_HEIGHT);
773 |     assert(count_bytes_in_utf8(n->str, n->nexts[0].skip_chars) == n->num_bytes);
774 | #if ROPE_WCHAR
775 |     assert(count_wchars_in_utf8(n->str, n->nexts[0].skip_chars) == n->nexts[0].wchar_size);
776 | #endif
777 |     for (int i = 0; i < n->height; i++) {
778 |       assert(iter.s[i].node == n);
779 |       assert(iter.s[i].skip_chars == num_chars);
780 |       iter.s[i].node = n->nexts[i].node;
781 |       iter.s[i].skip_chars += n->nexts[i].skip_chars;
782 | #if ROPE_WCHAR
783 |       assert(iter.s[i].wchar_size == num_wchar);
784 |       iter.s[i].wchar_size += n->nexts[i].wchar_size;
785 | #endif
786 |     }
787 | 
788 |     num_bytes += n->num_bytes;
789 |     num_chars += n->nexts[0].skip_chars;
790 | #if ROPE_WCHAR
791 |     num_wchar += n->nexts[0].wchar_size;
792 | #endif
793 |   }
794 | 
795 |   for (int i = 0; i < r->head.height; i++) {
796 |     assert(iter.s[i].node == NULL);
797 |     assert(iter.s[i].skip_chars == num_chars);
798 | #if ROPE_WCHAR
799 |     assert(iter.s[i].wchar_size == num_wchar);
800 | #endif
801 |   }
802 | 
803 |   assert(r->num_bytes == num_bytes);
804 |   assert(r->num_chars == num_chars);
805 | #if ROPE_WCHAR
806 |   assert(skip_over.wchar_size == num_wchar);
807 | #endif
808 | }
809 | 
810 | // For debugging.
811 | #include <stdio.h>
812 | void _rope_print(rope *r) {
813 |   printf("chars: %zd\tbytes: %zd\theight: %d\n", r->num_chars, r->num_bytes, r->head.height);
814 | 
815 |   printf("HEAD");
816 |   for (int i = 0; i < r->head.height; i++) {
817 |     printf(" |%3zd ", r->head.nexts[i].skip_chars);
818 |   }
819 |   printf("\n");
820 | 
821 |   int num = 0;
822 |   for (rope_node *n = &r->head; n != NULL; n = n->nexts[0].node) {
823 |     printf("%3d:", num++);
824 |     for (int i = 0; i < n->height; i++) {
825 |       printf(" |%3zd ", n->nexts[i].skip_chars);
826 |     }
827 |     printf("        : \"");
828 |     fwrite(n->str, n->num_bytes, 1, stdout);
829 |     printf("\"\n");
830 |   }
831 | }
832 | 


--------------------------------------------------------------------------------
/rope_benches/rope.h:
--------------------------------------------------------------------------------
  1 | /* UTF-8 Rope implementation by Joseph Gentle
  2 |  *
  3 |  * This library implements a heavyweight utf8 string type with fast
  4 |  * insert-at-position and delete-at-position operations.
  5 |  * 
  6 |  * It uses skip lists instead of trees. Trees might be faster - who knows?
  7 |  *
  8 |  * Ropes are not syncronized. Do not access the same rope from multiple threads
  9 |  * simultaneously.
 10 |  */
 11 | 
 12 | #ifndef librope_rope_h
 13 | #define librope_rope_h
 14 | 
 15 | #include <stdint.h>
 16 | #include <stddef.h>
 17 | 
 18 | // Whether or not the rope should support converting UTF-8 character offsets to
 19 | // wchar array positions. This is useful when interoperating with strings in
 20 | // JS, Objective-C and many other languages. See
 21 | // http://josephg.com/post/31707645955/string-length-lies
 22 | //
 23 | // Adding wchar conversion support decreases performance by about 30%.
 24 | #ifndef ROPE_WCHAR
 25 | #define ROPE_WCHAR 0
 26 | #endif
 27 | 
 28 | // These two magic values seem to be approximately optimal given the benchmark
 29 | // in tests.c which does lots of small inserts.
 30 | 
 31 | // Must be <= UINT16_MAX. Benchmarking says this is pretty close to optimal
 32 | // (tested on a mac using clang 4.0 and x86_64).
 33 | #ifndef ROPE_NODE_STR_SIZE
 34 | #if ROPE_WCHAR
 35 | #define ROPE_NODE_STR_SIZE 64
 36 | #else
 37 | #define ROPE_NODE_STR_SIZE 136
 38 | #endif
 39 | #endif
 40 | 
 41 | // The likelyhood (%) a node will have height (n+1) instead of n
 42 | #ifndef ROPE_BIAS
 43 | #define ROPE_BIAS 25
 44 | #endif
 45 | 
 46 | // The rope will stop being efficient after the string is 2 ^ ROPE_MAX_HEIGHT
 47 | // nodes.
 48 | #ifndef ROPE_MAX_HEIGHT
 49 | #define ROPE_MAX_HEIGHT 20
 50 | #endif
 51 | 
 52 | struct rope_node_t;
 53 | 
 54 | // The number of characters in str can be read out of nexts[0].skip_chars.
 55 | typedef struct {
 56 |   // The number of _characters_ between the start of the current node
 57 |   // and the start of next.
 58 |   size_t skip_chars;
 59 | 
 60 |   // For some reason, librope runs about 1% faster when this next pointer is
 61 |   // exactly _here_ in the struct.
 62 |   struct rope_node_t *node;
 63 | 
 64 | #if ROPE_WCHAR
 65 |   // The number of wide characters contained in space.
 66 |   size_t wchar_size;
 67 | #endif
 68 | } rope_skip_node;
 69 | 
 70 | typedef struct rope_node_t {
 71 |   uint8_t str[ROPE_NODE_STR_SIZE];
 72 | 
 73 |   // The number of bytes in str in use
 74 |   uint16_t num_bytes;
 75 |   
 76 |   // This is the number of elements allocated in nexts.
 77 |   // Each height is 1/2 as likely as the height before. The minimum height is 1.
 78 |   uint8_t height;
 79 |   
 80 |   rope_skip_node nexts[];
 81 | } rope_node;
 82 | 
 83 | typedef struct {
 84 |   // The total number of characters in the rope.
 85 |   size_t num_chars;
 86 |   
 87 |   // The total number of bytes which the characters in the rope take up.
 88 |   size_t num_bytes;
 89 |   
 90 |   void *(*alloc)(size_t bytes);
 91 |   void *(*realloc)(void *ptr, size_t newsize);
 92 |   void (*free)(void *ptr);
 93 | 
 94 |   // The first node exists inline in the rope structure itself.
 95 |   rope_node head;
 96 | } rope;
 97 | 
 98 | #ifdef __cplusplus
 99 | extern "C" {
100 | #endif
101 |   
102 | // Create a new rope with no contents
103 | rope *rope_new();
104 | 
105 | // Create a new rope using custom allocators.
106 | rope *rope_new2(void *(*alloc)(size_t bytes),
107 |     void *(*realloc)(void *ptr, size_t newsize),
108 |     void (*free)(void *ptr));
109 | 
110 | // Create a new rope containing a copy of the given string. Shorthand for
111 | // r = rope_new(); rope_insert(r, 0, str);
112 | rope *rope_new_with_utf8(const uint8_t *str);
113 | 
114 | // Make a copy of an existing rope
115 | rope *rope_copy(const rope *r);
116 | 
117 | // Free the specified rope
118 | void rope_free(rope *r);
119 | 
120 | // Get the number of characters in a rope
121 | size_t rope_char_count(const rope *r);
122 | 
123 | // Get the number of bytes which the rope would take up if stored as a utf8
124 | // string
125 | size_t rope_byte_count(const rope *r);
126 | 
127 | // Copies the rope's contents into a utf8 encoded C string. Also copies a
128 | // trailing '\0' character.
129 | // Returns the number of bytes written, which is rope_byte_count(r) + 1.
130 | size_t rope_write_cstr(rope *r, uint8_t *dest);
131 | 
132 | // Create a new C string which contains the rope. The string will contain
133 | // the rope encoded as utf8, followed by a trailing '\0'.
134 | // Use rope_byte_count(r) to get the length of the returned string.
135 | uint8_t *rope_create_cstr(rope *r);
136 | 
137 | // If you try to insert data into the rope with an invalid UTF8 encoding,
138 | // nothing will happen and we'll return ROPE_INVALID_UTF8.
139 | typedef enum { ROPE_OK, ROPE_INVALID_UTF8 } ROPE_RESULT;
140 |   
141 | // Insert the given utf8 string into the rope at the specified position.
142 | ROPE_RESULT rope_insert(rope *r, size_t pos, const uint8_t *str);
143 | 
144 | // Delete num characters at position pos. Deleting past the end of the string
145 | // has no effect.
146 | void rope_del(rope *r, size_t pos, size_t num);
147 |   
148 | // This macro expands to a for() loop header which loops over the segments in a
149 | // rope.
150 | //
151 | // Eg:
152 | //  rope *r = rope_new_with_utf8(str);
153 | //  ROPE_FOREACH(r, iter) {
154 | //    printf("%s", rope_node_data(iter));
155 | //  }
156 | #define ROPE_FOREACH(rope, iter) \
157 |   for (rope_node *iter = &(rope)->head; iter != NULL; iter = iter->nexts[0].node)
158 | 
159 | // Get the actual data inside a rope node.
160 | static inline uint8_t *rope_node_data(rope_node *n) {
161 |   return n->str;
162 | }
163 | 
164 | // Get the number of bytes inside a rope node. This is useful when you're
165 | // looping through a rope.
166 | static inline size_t rope_node_num_bytes(rope_node *n) {
167 |   return n->num_bytes;
168 | }
169 | 
170 | // Get the number of characters inside a rope node.
171 | static inline size_t rope_node_chars(rope_node *n) {
172 |   return n->nexts[0].skip_chars;
173 | }
174 |   
175 | #if ROPE_WCHAR
176 | // Get the number of wchar characters in the rope
177 | size_t rope_wchar_count(rope *r);
178 | 
179 | // Insert the given utf8 string into the rope at the specified wchar position.
180 | // This is compatible with NSString, Javascript, etc. The string still needs to
181 | // be passed in using UTF-8.
182 | //
183 | // Returns the insertion position in characters.
184 | size_t rope_insert_at_wchar(rope *r, size_t wchar_pos, const uint8_t *utf8_str);
185 |   
186 | // Delete wchar_num wide characters at the specified wchar position offset.
187 | // If the range is inside character boundaries, behaviour is undefined.
188 | //
189 | // Returns the deletion position in characters. *char_len_out is set to the
190 | // deletion length, in chars if its not null.
191 | size_t rope_del_at_wchar(rope *r, size_t wchar_pos, size_t wchar_num, size_t *char_len_out);
192 |   
193 | // Get the number of wchars inside a rope node. This is useful when you're
194 | // looping through a rope.
195 | static inline size_t rope_node_wchars(rope_node *n) {
196 |   return n->nexts[0].wchar_size;
197 | }
198 | #endif
199 | 
200 | 
201 |   
202 | // For debugging.
203 | void _rope_check(rope *r);
204 | void _rope_print(rope *r);
205 | 
206 | #ifdef __cplusplus
207 | }
208 | #endif
209 | 
210 | #endif
211 | 


--------------------------------------------------------------------------------
/rope_benches/src/edittablestr.rs:
--------------------------------------------------------------------------------
  1 | use super::Rope;
  2 | use std::ptr;
  3 | 
  4 | // pub trait EditableText {
  5 | //     // pos is in utf8 codepoints
  6 | //     fn insert_at(&mut self, pos: usize, contents: &str);
  7 | //     fn remove_at(&mut self, pos: usize, length: usize);
  8 | // }
  9 | 
 10 | impl Rope for String {
 11 |     const NAME: &'static str = "String";
 12 | 
 13 |     fn new() -> Self { String::new() }
 14 |     
 15 |     fn insert_at(&mut self, char_pos: usize, contents: &str) {
 16 |         // If you try to write past the end of the string for now I'll just write at the end.
 17 |         // Panicing might be a better policy.
 18 |         let byte_pos = self.char_indices().skip(char_pos).next()
 19 |             .map(|(p, _)| p).unwrap_or(self.len());
 20 |         //println!("pos {}", byte_pos);
 21 |         //self.insert_str(byte_pos, contents);
 22 |         
 23 |         let old_len = self.len();
 24 |         let new_bytes = contents.len();
 25 | 
 26 |         // This didn't work because it didn't change the string's length
 27 |         //self.reserve(new_bytes);
 28 | 
 29 |         // This is sort of ugly but its fine.
 30 |         for _ in 0..new_bytes { self.push('\0'); }
 31 | 
 32 |         //println!("new bytes {} {} {}", new_bytes, byte_pos, self.len() - byte_pos);
 33 |         unsafe {
 34 |             let bytes = self.as_mut_vec().as_mut_ptr();
 35 |             //println!("{:?}", self.as_mut_vec());
 36 |             ptr::copy(
 37 |                 bytes.offset(byte_pos as isize),
 38 |                 bytes.offset((byte_pos + new_bytes) as isize),
 39 |                 old_len - byte_pos
 40 |             );
 41 |             ptr::copy_nonoverlapping(
 42 |                 contents.as_ptr(),
 43 |                 bytes.offset(byte_pos as isize),
 44 |                 new_bytes
 45 |             );
 46 |             //println!("{:?}", self.as_mut_vec());
 47 |         }
 48 |     }
 49 |     fn del_at(&mut self, pos: usize, length: usize) {
 50 |         let byte_range = {
 51 |             let mut iter = self.char_indices().map(|(p, _)| p).skip(pos).peekable();
 52 | 
 53 |             let start = iter.peek().map_or_else(|| self.len(), |&p| p);
 54 |             let mut iter = iter.skip(length).peekable();
 55 |             let end = iter.peek().map_or_else(|| self.len(), |&p| p);
 56 | 
 57 |             start..end
 58 |         };
 59 | 
 60 |         self.drain(byte_range);
 61 |     }
 62 | 
 63 |     // fn len(&self) -> usize { self.len() }
 64 |     fn char_len(&self) -> usize { self.chars().count() }
 65 |     fn to_string(&self) -> String { self.clone() }
 66 | }
 67 | 
 68 | 
 69 | 
 70 | #[cfg(test)]
 71 | mod tests {
 72 |     use super::Rope;
 73 | 
 74 |     #[test]
 75 |     fn insert_simple() {
 76 |         let mut s = "".to_string();
 77 |         s.insert_at(0, "hi");
 78 |         assert_eq!(s, "hi");
 79 | 
 80 |         let mut s = "a".to_string();
 81 |         s.insert_at(0, "hi");
 82 |         assert_eq!(s, "hia");
 83 | 
 84 |         let mut s = "a".to_string();
 85 |         s.insert_at(1, "hi");
 86 |         assert_eq!(s, "ahi");
 87 | 
 88 |         let mut s = "ac".to_string();
 89 |         s.insert_at(1, "b");
 90 |         assert_eq!(s, "abc");
 91 |     }
 92 | 
 93 |     #[test]
 94 |     fn insert_unicode() {
 95 |         // I mean, its all unicode but ....
 96 |         let mut s = "𝄞𝄞".to_string();
 97 |         s.insert_at(0, "à");
 98 |         assert_eq!(s, "à𝄞𝄞");
 99 |         s.insert_at(2, "ë");
100 |         assert_eq!(s, "à𝄞ë𝄞");
101 |         s.insert_at(4, "ç");
102 |         assert_eq!(s, "à𝄞ë𝄞ç");
103 |         s.insert_at(6, "𝒲");
104 |         assert_eq!(s, "à𝄞ë𝄞ç𝒲");
105 |     }
106 | 
107 |     #[test]
108 |     fn remove_simple() {
109 |         let mut s = "à".to_string();
110 |         s.del_at(0, 1);
111 |         assert_eq!(s, "");
112 |         s.del_at(0, 0);
113 |         assert_eq!(s, "");
114 | 
115 |         let mut s = "à𝄞ç".to_string();
116 |         s.del_at(0, 1);
117 |         assert_eq!(s, "𝄞ç");
118 |         s.del_at(1, 1);
119 |         assert_eq!(s, "𝄞");
120 |         s.del_at(0, 1);
121 |         assert_eq!(s, "");
122 |     }
123 | }
124 | 


--------------------------------------------------------------------------------
/rope_benches/src/main.rs:
--------------------------------------------------------------------------------
  1 | 
  2 | // #[macro_use]
  3 | extern crate criterion;
  4 | use criterion::*;
  5 | 
  6 | use crdt_testdata::*;
  7 | 
  8 | // extern crate rand;
  9 | // use rand::seq::IteratorRandom;
 10 | use rand::prelude::*;
 11 | 
 12 | mod rope;
 13 | use self::rope::*;
 14 | use jumprope::*;
 15 | 
 16 | mod edittablestr;
 17 | 
 18 | use std::cmp::min;
 19 | 
 20 | use ropey::Rope as RopeyRope;
 21 | use an_rope::Rope as AnRope;
 22 | use xi_rope::Rope as XiRope;
 23 | use crop::Rope as CropRope;
 24 | 
 25 | const CHARS: &[u8; 83] = b" ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()[]{}<>?,./";
 26 | 
 27 | // Gross. Find a way to reuse the code from random_unicode_string.
 28 | fn random_ascii_string(rng: &mut SmallRng, len: usize) -> String {
 29 |     let mut s = String::new();
 30 |     for _ in 0..len {
 31 |         // s.push(*rng.choose(CHARS).unwrap() as char);
 32 |         s.push(CHARS[rng.gen_range(0 .. CHARS.len())] as char);
 33 |     }
 34 |     s
 35 | }
 36 | 
 37 | impl Rope for JumpRope {
 38 |     const NAME: &'static str = "JumpRope";
 39 | 
 40 |     #[inline(always)]
 41 |     fn new() -> Self { JumpRope::new() }
 42 | 
 43 |     #[inline(always)]
 44 |     fn insert_at(&mut self, pos: usize, contents: &str) { self.insert(pos, contents); }
 45 |     #[inline(always)]
 46 |     fn del_at(&mut self, pos: usize, len: usize) { self.remove(pos..pos+len); }
 47 | 
 48 |     #[inline(always)]
 49 |     fn edit_at(&mut self, pos: usize, del_len: usize, ins_content: &str) {
 50 |         self.replace(pos..pos+del_len, ins_content);
 51 |     }
 52 | 
 53 |     #[inline(always)]
 54 |     fn to_string(&self) -> String { ToString::to_string(self) }
 55 | 
 56 |     #[inline(always)]
 57 |     fn char_len(&self) -> usize { self.len_chars() } // in unicode values
 58 | }
 59 | 
 60 | impl Rope for JumpRopeBuf {
 61 |     const NAME: &'static str = "JumpRopeBuf";
 62 | 
 63 |     #[inline(always)]
 64 |     fn new() -> Self { JumpRopeBuf::new() }
 65 | 
 66 |     #[inline(always)]
 67 |     fn insert_at(&mut self, pos: usize, contents: &str) { self.insert(pos, contents); }
 68 |     #[inline(always)]
 69 |     fn del_at(&mut self, pos: usize, len: usize) { self.remove(pos..pos+len); }
 70 | 
 71 |     #[inline(always)]
 72 |     fn edit_at(&mut self, pos: usize, del_len: usize, ins_content: &str) {
 73 |         if del_len > 0 {
 74 |             self.remove(pos..pos + del_len);
 75 |         }
 76 |         if !ins_content.is_empty() {
 77 |             self.insert(pos, ins_content);
 78 |         }
 79 |     }
 80 | 
 81 |     #[inline(always)]
 82 |     fn to_string(&self) -> String { ToString::to_string(self) }
 83 | 
 84 |     #[inline(always)]
 85 |     fn char_len(&self) -> usize { self.len_chars() } // in unicode values
 86 | }
 87 | 
 88 | impl Rope for AnRope {
 89 |     const NAME: &'static str = "AnRope";
 90 | 
 91 |     #[inline(always)]
 92 |     fn new() -> Self { AnRope::new() }
 93 | 
 94 |     #[inline(always)]
 95 |     fn insert_at(&mut self, pos: usize, contents: &str) { *self = self.insert_str(pos, contents); }
 96 |     #[inline(always)]
 97 |     fn del_at(&mut self, pos: usize, len: usize) { *self = self.delete(pos..pos+len); }
 98 | 
 99 |     #[inline(always)]
100 |     fn to_string(&self) -> String { ToString::to_string(self) }
101 | 
102 |     #[inline(always)]
103 |     fn char_len(&self) -> usize { self.len() } // in unicode values
104 | }
105 | 
106 | impl Rope for XiRope {
107 |     const NAME: &'static str = "XiRope";
108 | 
109 |     #[inline(always)]
110 |     fn new() -> Self { XiRope::from("") }
111 | 
112 |     #[inline(always)]
113 |     fn insert_at(&mut self, pos: usize, contents: &str) {
114 |         self.edit(pos..pos, contents);
115 |     }
116 |     #[inline(always)]
117 |     fn del_at(&mut self, pos: usize, len: usize) {
118 |         self.edit(pos..pos+len, "");
119 |     }
120 |     #[inline(always)]
121 |     fn edit_at(&mut self, pos: usize, del_len: usize, ins_content: &str) {
122 |         self.edit(pos..pos+del_len, ins_content);
123 |     }
124 | 
125 |     #[inline(always)]
126 |     fn to_string(&self) -> String {
127 |         String::from(self)
128 |     }
129 | 
130 |     #[inline(always)]
131 |     fn char_len(&self) -> usize {
132 |         let mut len = 0;
133 |         for s in self.iter_chunks(..) {
134 |             len += s.chars().count();
135 |         }
136 |         len
137 |     } // in unicode values
138 | }
139 | 
140 | impl Rope for RopeyRope {
141 |     const NAME: &'static str = "Ropey";
142 | 
143 |     #[inline(always)]
144 |     fn new() -> Self { RopeyRope::new() }
145 | 
146 |     #[inline(always)]
147 |     fn insert_at(&mut self, pos: usize, contents: &str) {
148 |         self.insert(pos, contents);
149 |     }
150 |     #[inline(always)]
151 |     fn del_at(&mut self, pos: usize, len: usize) {
152 |         self.remove(pos..pos+len);
153 |     }
154 |     // fn del_at<R: RangeBounds<usize>>(&mut self, range: R);
155 | 
156 |     // fn slice(&self, pos: usize, len: usize) -> Result<String, RopeError>;
157 | 
158 |     #[inline(always)]
159 |     fn to_string(&self) -> String { unimplemented!() }
160 | 
161 |     #[inline(always)]
162 |     fn char_len(&self) -> usize { self.len_chars() } // in unicode values
163 | }
164 | 
165 | impl Rope for CropRope {
166 |     const NAME: &'static str = "Crop";
167 |     const EDITS_USE_BYTE_OFFSETS: bool = true;
168 | 
169 |     fn new() -> Self {
170 |         Self::new()
171 |     }
172 | 
173 |     fn insert_at(&mut self, pos: usize, contents: &str) {
174 |         self.insert(pos, contents);
175 |     }
176 | 
177 |     fn del_at(&mut self, pos: usize, len: usize) {
178 |         self.delete(pos..pos+len)
179 |     }
180 | 
181 |     fn to_string(&self) -> String {
182 |         ToString::to_string(self)
183 |     }
184 | 
185 |     fn char_len(&self) -> usize {
186 |         self.byte_len()
187 |     }
188 | }
189 | 
190 | use std::os::raw::c_char;
191 | use std::ffi::CString;
192 | use crdt_testdata::{load_testing_data, TestData};
193 | use criterion::measurement::WallTime;
194 | 
195 | #[repr(C)]
196 | struct CRopeRaw { _unused : [ u8 ; 0 ] }
197 | 
198 | extern {
199 |     fn rope_new() -> *mut CRopeRaw;
200 |     fn rope_new_with_utf8(s: *const c_char) -> *mut CRopeRaw;
201 |     fn rope_free(r: *mut CRopeRaw);
202 |     fn rope_char_count(r: *const CRopeRaw) -> usize;
203 |     // fn rope_byte_count(r: *const CRopeRaw) -> usize;
204 | 
205 |     fn rope_insert(r: *mut CRopeRaw, pos: usize, s: *const c_char) -> u32;
206 |     fn rope_del(r: *mut CRopeRaw, pos: usize, len: usize) -> u32;
207 | }
208 | 
209 | struct CRope(*mut CRopeRaw);
210 | impl Rope for CRope {
211 |     const NAME: &'static str = "C-JumpRope";
212 | 
213 |     #[inline(always)]
214 |     fn new() -> Self { unsafe { CRope(rope_new()) } }
215 | 
216 |     #[inline(always)]
217 |     fn insert_at(&mut self, pos: usize, contents: &str) {
218 |         unsafe {
219 |             let cstr = CString::new(contents).unwrap();
220 |             rope_insert(self.0, pos, cstr.as_ptr());
221 |         }
222 |     }
223 |     #[inline(always)]
224 |     fn del_at(&mut self, pos: usize, len: usize) {
225 |         unsafe { rope_del(self.0, pos, len); }
226 |     }
227 |     fn to_string(&self) -> String { unimplemented!() }
228 | 
229 |     #[inline(always)]
230 |     fn char_len(&self) -> usize { unsafe { rope_char_count(self.0) } } // in unicode values
231 | }
232 | impl Drop for CRope {
233 |     fn drop(&mut self) {
234 |         unsafe { rope_free(self.0); }
235 |     }
236 | }
237 | impl From<String> for CRope {
238 |     fn from(s: String) -> Self {
239 |         let cstr = CString::new(s).unwrap();
240 |         CRope(unsafe { rope_new_with_utf8(cstr.as_ptr()) })
241 |     }
242 | }
243 | 
244 | #[test]
245 | fn foo() {
246 |     unsafe {
247 |         let r = rope_new();
248 |         println!("size {}", rope_char_count(r));
249 |     }
250 | }
251 | 
252 | fn gen_strings(rng: &mut SmallRng) -> Vec<String> {
253 |     // I wish there was a better syntax for just making an array here.
254 |     let mut strings = Vec::<String>::new();
255 |     for _ in 0..100 {
256 |         let len = rng.gen_range(1 .. 3);
257 |         strings.push(random_ascii_string(rng, len));
258 |     }
259 | 
260 |     strings
261 | }
262 | 
263 | fn ins_append<R: Rope>(b: &mut Bencher) {
264 |     let mut rng = SmallRng::seed_from_u64(123);
265 |     let strings = gen_strings(&mut rng);
266 | 
267 |     let mut r = R::new();
268 |     let mut len = 0;
269 |     b.iter(|| {
270 |         // let pos = rng.gen_range(0, len+1);
271 |         let text = &strings[rng.gen_range(0 .. strings.len())];
272 |         r.insert_at(len, text.as_str());
273 |         len += text.chars().count();
274 |     });
275 | 
276 |     black_box(r.char_len());
277 | }
278 | 
279 | fn ins_random<R: Rope>(b: &mut Bencher) {
280 |     let mut rng = SmallRng::seed_from_u64(123);
281 |     let strings = gen_strings(&mut rng);
282 | 
283 |     let mut r = R::new();
284 |     // Len isn't needed, but its here to allow direct comparison with ins_append.
285 |     let mut len = 0;
286 |     b.iter(|| {
287 |         let pos = rng.gen_range(0 .. len+1);
288 |         let text = &strings[rng.gen_range(0 .. strings.len())];
289 |         r.insert_at(pos, text.as_str());
290 |         len += text.chars().count();
291 |     });
292 | 
293 |     black_box(r.char_len());
294 |     black_box(len); 
295 | }
296 | 
297 | fn stable_ins_del<R: Rope + From<String>>(b: &mut Bencher, target_length: &u64) {
298 |     let target_length = *target_length as usize;
299 |     let mut rng = SmallRng::seed_from_u64(123);
300 | 
301 |     // I wish there was a better syntax for just making an array here.
302 |     let strings = gen_strings(&mut rng);
303 |     
304 |     // let target_length = 100000;
305 |     // let mut r = R::new();
306 |     // while r.char_len() < target_length {
307 |     //     // The rope should be a hot mess.
308 |     //     let pos = rng.gen_range(0, r.char_len()+1);
309 |     //     r.insert_at(pos, strings[rng.gen_range(0, strings.len())].as_str()).unwrap();
310 |     // }
311 |     let mut r = R::from(random_ascii_string(&mut rng, target_length));
312 |     let mut len = target_length;
313 | 
314 |     b.iter(|| {
315 |         // let len = r.char_len();
316 |         // if len == 0 || rng.gen::<bool>() {
317 |         if len <= target_length {
318 |             // Insert
319 |             let pos = rng.gen_range(0 .. len+1);
320 |             let text = &strings[rng.gen_range(0 .. strings.len())];
321 |             r.insert_at(pos, text.as_str());
322 |             len += text.chars().count();
323 |         } else {
324 |             // Delete
325 |             let pos = rng.gen_range(0 .. len);
326 |             let dlen = min(rng.gen_range(0 .. 10), len - pos);
327 |             len -= dlen;
328 | 
329 |             r.del_at(pos, dlen);
330 |         }
331 |     });
332 | 
333 |     // Return something based on the computation to avoid it being optimized
334 |     // out. Although right now the compiler isn't smart enough for that
335 |     // anyway.
336 |     // r.len()
337 |     black_box(r.char_len());
338 | }
339 | 
340 | #[allow(unused)]
341 | fn bench_ins_append(c: &mut Criterion) {
342 |     let mut group = c.benchmark_group("ins_append");
343 | 
344 |     group.bench_function("jumprope", ins_append::<JumpRope>);
345 |     group.bench_function("ropey", ins_append::<RopeyRope>);
346 |     // group.bench_function("anrope", ins_append::<AnRope>);
347 |     group.bench_function("xirope", ins_append::<XiRope>);
348 |     group.bench_function("jumprope_c", ins_append::<CRope>);
349 |     group.bench_function("raw_string", ins_append::<String>);
350 |     group.finish();
351 | }
352 | 
353 | #[allow(unused)]
354 | fn bench_ins_random(c: &mut Criterion) {
355 |     let mut group = c.benchmark_group("ins_random");
356 | 
357 |     group.bench_function("jumprope", ins_random::<JumpRope>);
358 |     group.bench_function("ropey", ins_random::<RopeyRope>);
359 |     // group.bench_function("anrope", ins_random::<AnRope>);
360 |     group.bench_function("xirope", ins_random::<XiRope>);
361 |     group.bench_function("jumprope_c", ins_random::<CRope>);
362 |     group.bench_function("raw_string", ins_random::<String>);
363 |     group.finish();
364 | }
365 | 
366 | #[allow(unused)]
367 | fn bench_stable_ins_del(c: &mut Criterion) {
368 |     let mut group = c.benchmark_group("stable_ins_del");
369 | 
370 |     for size in [1000, 10000, 100000, 1000000, 10000000].iter() {
371 |         group.throughput(Throughput::Elements(*size));
372 |         group.bench_with_input(BenchmarkId::new("jumprope", size), size, stable_ins_del::<JumpRope>);
373 |         group.bench_with_input(BenchmarkId::new("ropey", size), size, stable_ins_del::<RopeyRope>);
374 |         // group.bench_with_input(BenchmarkId::new("anrope", size), size, stable_ins_del::<AnRope>);
375 |         group.bench_with_input(BenchmarkId::new("xirope", size), size, stable_ins_del::<XiRope>);
376 |         group.bench_with_input(BenchmarkId::new("jumprope_c", size), size, stable_ins_del::<CRope>);
377 |     }
378 |     group.finish();
379 | }
380 | 
381 | fn load_named_data(name: &str) -> TestData {
382 |     let filename = format!("/home/seph/src/diamond-types/benchmark_data/{}.json.gz", name);
383 |     load_testing_data(&filename)
384 | }
385 | 
386 | // const DATASETS: &[&str] = &["automerge-paper"];
387 | const DATASETS: &[&str] = &["automerge-paper", "rustcode", "sveltecomponent", "seph-blog1"];
388 | 
389 | fn realworld(c: &mut Criterion) {
390 |     for name in DATASETS {
391 |         let mut group = c.benchmark_group("realworld");
392 |         let test_data_chars = load_named_data(name);
393 |         group.throughput(Throughput::Elements(test_data_chars.len() as u64));
394 |         let test_data_bytes = test_data_chars.chars_to_bytes();
395 | 
396 |         let mut all_ascii = true;
397 |         for txn in &test_data_chars.txns {
398 |             for TestPatch(_pos, _del, ins) in &txn.patches {
399 |                 if ins.chars().count() != ins.len() { all_ascii = false; }
400 |             }
401 |         }
402 | 
403 |         fn x<R: Rope>(group: &mut BenchmarkGroup<WallTime>, name: &str, test_data: &TestData) {
404 |             assert_eq!(R::EDITS_USE_BYTE_OFFSETS, test_data.using_byte_positions);
405 | 
406 |             group.bench_function(BenchmarkId::new(R::NAME, name), |b| {
407 |                 b.iter(|| {
408 |                     let mut r = R::new();
409 |                     for txn in &test_data.txns {
410 |                         for TestPatch(pos, del, ins) in &txn.patches {
411 |                             r.edit_at(*pos, *del, ins);
412 |                         }
413 |                     }
414 |                     assert_eq!(r.char_len(), test_data.end_content.len());
415 |                     black_box(r.char_len());
416 |                 })
417 |             });
418 |         }
419 | 
420 |         x::<RopeyRope>(&mut group, name, &test_data_chars);
421 |         x::<JumpRope>(&mut group, name, &test_data_chars);
422 |         x::<JumpRopeBuf>(&mut group, name, &test_data_chars);
423 |         x::<CRope>(&mut group, name, &test_data_chars);
424 |         x::<CropRope>(&mut group, name, &test_data_bytes);
425 | 
426 |         // These two crash on non-ascii characters for some reason.
427 |         if all_ascii {
428 |             // Extremely slow.
429 |             x::<XiRope>(&mut group, name, &test_data_chars);
430 | 
431 |             // Crashes.
432 |             // x::<AnRope>(&mut group, name, &test_data);
433 |         }
434 | 
435 |         // This takes a long time to run.
436 |         // x::<String>(&mut group, name, &test_data);
437 | 
438 |         group.finish();
439 |     }
440 | }
441 | 
442 | criterion_group!(benches,
443 |     bench_ins_append,
444 |     bench_ins_random,
445 |     bench_stable_ins_del,
446 |     realworld
447 | );
448 | // criterion_group!(benches, bench_all);
449 | criterion_main!(benches);


--------------------------------------------------------------------------------
/rope_benches/src/rope.rs:
--------------------------------------------------------------------------------
 1 | // use std::ops::RangeBounds;
 2 | 
 3 | // #[derive(Debug)]
 4 | // pub enum RopeError {
 5 | //     PositionOutOfBounds,
 6 | // }
 7 | 
 8 | pub trait Rope: From<String> {
 9 |     const NAME: &'static str;
10 |     const EDITS_USE_BYTE_OFFSETS: bool = false;
11 | 
12 |     fn new() -> Self;
13 | 
14 |     fn insert_at(&mut self, pos: usize, contents: &str);// -> Result<(), RopeError>;
15 |     fn del_at(&mut self, pos: usize, len: usize);// -> Result<(), RopeError>;
16 |     fn edit_at(&mut self, pos: usize, del_len: usize, ins_content: &str) {
17 |         if del_len > 0 {
18 |             self.del_at(pos, del_len);
19 |         }
20 |         if !ins_content.is_empty() {
21 |             self.insert_at(pos, ins_content);
22 |         }
23 |     }
24 | 
25 |     // fn del_at<R: RangeBounds<usize>>(&mut self, range: R) -> Result<(), RopeError>;
26 | 
27 |     // fn slice(&self, pos: usize, len: usize) -> Result<String, RopeError>;
28 | 
29 |     fn to_string(&self) -> String;
30 |     
31 |     // fn len(&self) -> usize; // in bytes
32 |     fn char_len(&self) -> usize; // in unicode values
33 | }


--------------------------------------------------------------------------------
/rope_benches/table.js:
--------------------------------------------------------------------------------
 1 | const fs = require('fs')
 2 | 
 3 | const datasets = ["automerge-paper", "rustcode", "sveltecomponent", "seph-blog1"]
 4 | const algorithms = ['String', 'XiRope', 'Ropey', 'C-JumpRope', 'JumpRope']
 5 | 
 6 | console.log('| Dataset | Raw string | XiRope | Ropey | librope (C) | Jumprope |')
 7 | console.log('|---------|------------|--------|-------|-------------|----------|')
 8 | 
 9 | const roundN = n => Math.round(n * 100) / 100
10 | 
11 | for (const ds of datasets) {
12 |   const row = `${ds} | ` + algorithms.map(alg => {
13 |     const filename = `../target/criterion/realworld/${alg}/${ds}/new/estimates.json`
14 | 
15 |     if (fs.existsSync(filename)) {
16 |       const data = JSON.parse(fs.readFileSync(filename, 'utf8')).mean.point_estimate / 1e6
17 |       return `${roundN(data)} ms`
18 |     } else {
19 |       return 'DNF'
20 |     }
21 |   }).join(' | ')
22 | 
23 |   console.log(row)
24 | }


--------------------------------------------------------------------------------
/src/buffered.rs:
--------------------------------------------------------------------------------
  1 | //! This module provides an optimized wrapper around a [`JumpRope`] struct which buffers incoming
  2 | //! edits and applies them "all at once" when the rope is read. This makes access patterns involving
  3 | //! replaying many small operations much faster (8x faster on some real world testing data).
  4 | //!
  5 | //! Using [`JumpRopeBuf`] instead of [`JumpRope`] directly is equivalent to using a
  6 | //! [`BufWriter`](std::io::BufWriter) to write to a file / stream.
  7 | //!
  8 | //! This API should be almost identical with JumpRope, but I've probably forgotten a few methods.
  9 | //! If you find some useful methods which are missing, please file issues and I can add them
 10 | //! explicitly to the wrapper. You can also use `rope.borrow().read_method()` or
 11 | //! `rope.as_mut().write_method()` as workarounds.
 12 | //!
 13 | //! Internally, JumpRopeBuf stores incoming writes in a write buffer before applying them. Adjacent
 14 | //! edits can be merged before the skip list is edited, which reduces the need for (relatively)
 15 | //! more expensive skip list lookups.
 16 | //!
 17 | //! ## Caveats:
 18 | //!
 19 | //! - [`JumpRopeBuf`] uses a RefCell internally. As a result, it does not expose a &JumpRope
 20 | //!   directly.
 21 | //! - Use of the RefCell means JumpRope is [`Send`](std::marker::Send) but not [`Sync`](std::marker::Sync).
 22 | 
 23 | 
 24 | #[derive(Debug, Clone, Copy)]
 25 | enum Kind { Ins, Del }
 26 | 
 27 | use std::cell::{Ref, RefCell};
 28 | use std::fmt::{Debug, Display, Formatter};
 29 | use std::ops::{Deref, DerefMut, Range};
 30 | use Op::*;
 31 | use crate::fast_str_tools::{char_to_byte_idx, count_chars};
 32 | use crate::JumpRope;
 33 | 
 34 | /// This struct provides an optimized wrapper around JumpRope which buffers adjacent incoming writes
 35 | /// before forwarding them to the underlying JumpRope.
 36 | ///
 37 | /// Most of the overhead of writing to a rope comes from finding the edit location in the rope and
 38 | /// bookkeeping. Because text editing operations are usually sequential, by aggregating adjacent
 39 | /// editing operations together we can amortize the cost of updating the underlying data structure
 40 | /// itself. This improves performance by about 10x compared to inserting and deleting individual
 41 | /// characters.
 42 | ///
 43 | /// There is nothing jumprope-specific in this library. It could easily be adapted to wrap other
 44 | /// rope libraries (like Ropey) too.
 45 | ///
 46 | /// This API is still experimental. This library is only enabled by enabling the "buffered' feature.
 47 | pub struct JumpRopeBuf(RefCell<(JumpRope, BufferedOp)>);
 48 | 
 49 | #[derive(Debug, Clone)]
 50 | struct BufferedOp {
 51 |     kind: Kind,
 52 |     // Always empty for deletes.
 53 |     ins_content: String,
 54 |     range: Range<usize>,
 55 | }
 56 | 
 57 | #[derive(Debug, Clone, Copy)]
 58 | enum Op<'a> {
 59 |     Ins(usize, &'a str),
 60 |     Del(usize, usize), // start, end.
 61 | }
 62 | 
 63 | impl BufferedOp {
 64 |     fn new() -> Self {
 65 |         Self {
 66 |             kind: Kind::Ins,
 67 |             ins_content: "".to_string(),
 68 |             range: Range::default(),
 69 |         }
 70 |     }
 71 | 
 72 |     fn is_empty(&self) -> bool {
 73 |         // self.len == 0
 74 |         self.range.is_empty()
 75 |     }
 76 | 
 77 |     /// Length of the inserted / deleted section
 78 |     fn len(&self) -> usize {
 79 |         self.range.len()
 80 |     }
 81 | 
 82 |     fn clear(&mut self) {
 83 |         // We don't care about the tag.
 84 |         self.ins_content.clear();
 85 |         self.range = Range::default();
 86 |     }
 87 | 
 88 |     fn try_append(&mut self, op: Op) -> Result<(), ()> {
 89 |         if self.is_empty() {
 90 |             // Just set to op.
 91 |             match op {
 92 |                 // I'm setting fields individually here rather than implementing From<Op> or
 93 |                 // BufferedOp so we can reuse the allocation in self.ins_content.
 94 |                 Ins(pos, content) => {
 95 |                     self.kind = Kind::Ins;
 96 |                     self.ins_content.push_str(content);
 97 |                     self.range.start = pos;
 98 |                     self.range.end = pos + count_chars(content);
 99 |                 }
100 |                 Del(start, end) => {
101 |                     self.kind = Kind::Del;
102 |                     debug_assert!(self.ins_content.is_empty());
103 |                     self.range = start..end;
104 |                 }
105 |             }
106 |             Ok(())
107 |         } else {
108 |             match (self.kind, op) {
109 |                 (Kind::Ins, Op::Ins(pos, content)) if pos == self.range.end => {
110 |                     // The new insert is at the end of the buffered op.
111 |                     self.ins_content.push_str(content);
112 |                     self.range.end += count_chars(content);
113 |                     Ok(())
114 |                 }
115 |                 (Kind::Ins, Op::Del(start, end)) if end == self.range.end && start >= self.range.start => {
116 |                     // We can merge if the delete trims the end of the insert. There's more complex
117 |                     // trimming we could do here, but anything too complex and we may as well just
118 |                     // let the rope handle it.
119 |                     if start == self.range.start {
120 |                         // Discard our local insert.
121 |                         self.ins_content.clear();
122 |                         self.range.end = self.range.start;
123 |                         Ok(())
124 |                     } else {
125 |                         // Trim from the end.
126 |                         let char_offset = start - self.range.start;
127 | 
128 |                         let byte_offset = if self.range.len() == self.ins_content.len() {
129 |                             // If its all ascii, char offset == byte offset.
130 |                             char_offset
131 |                         } else {
132 |                             // TODO: Come up with a better way to calculate this.
133 |                             char_to_byte_idx(self.ins_content.as_str(), char_offset)
134 |                         };
135 | 
136 |                         self.range.end = start;
137 |                         self.ins_content.truncate(byte_offset);
138 |                         Ok(())
139 |                     }
140 |                 }
141 |                 (Kind::Del, Op::Del(start, end)) if start <= self.range.start && end >= self.range.start => {
142 |                     // We can merge if our delete is inside the operation.
143 |                     // let self_len = self.range.len();
144 |                     // dbg!(&self.range, (start, end));
145 |                     self.range.end += end - self.range.start;
146 |                     self.range.start = start;
147 |                     Ok(())
148 |                 }
149 |                 (_, _) => Err(()),
150 |             }
151 |         }
152 |     }
153 | }
154 | 
155 | impl From<JumpRope> for JumpRopeBuf {
156 |     fn from(rope: JumpRope) -> Self {
157 |         Self::with_rope(rope)
158 |     }
159 | }
160 | 
161 | impl JumpRopeBuf {
162 |     pub fn with_rope(rope: JumpRope) -> Self {
163 |         Self(RefCell::new((rope, BufferedOp::new())))
164 |     }
165 | 
166 |     pub fn new() -> Self {
167 |         Self::with_rope(JumpRope::new())
168 |     }
169 | 
170 |     pub fn new_from_str(s: &str) -> Self {
171 |         Self::with_rope(JumpRope::from(s))
172 |     }
173 | 
174 |     fn flush_mut(inner: &mut (JumpRope, BufferedOp)) {
175 |         if !inner.1.is_empty() {
176 |             match inner.1.kind {
177 |                 Kind::Ins => {
178 |                     inner.0.insert(inner.1.range.start, &inner.1.ins_content);
179 |                 },
180 |                 Kind::Del => {
181 |                     inner.0.remove(inner.1.range.clone());
182 |                 }
183 |             }
184 |             inner.1.clear();
185 |         }
186 |     }
187 | 
188 |     // fn flush(&self) {
189 |     //     let mut inner = self.0.borrow_mut();
190 |     //     Self::flush_mut(inner.deref_mut());
191 |     // }
192 | 
193 |     fn internal_push_op(&mut self, op: Op) {
194 |         // let mut inner = self.0.borrow_mut();
195 |         let inner = self.0.get_mut();
196 |         match inner.1.try_append(op) {
197 |             Ok(_) => {}
198 |             Err(_) => {
199 |                 // Self::flush_mut(inner.deref_mut());
200 |                 Self::flush_mut(inner);
201 |                 // inner.0.insert(pos, content);
202 |                 inner.1.try_append(op).unwrap();
203 |             }
204 |         }
205 |     }
206 | 
207 |     /// Insert new content into the rope at the specified position. This method is semantically
208 |     /// equivalent to [`JumpRope::insert`](JumpRope::insert). The only difference is that here we
209 |     /// buffer the incoming edit.
210 |     pub fn insert(&mut self, pos: usize, content: &str) {
211 |         self.internal_push_op(Op::Ins(pos, content))
212 |     }
213 | 
214 |     /// Remove content from the rope at the specified position. This method is semantically
215 |     /// equivalent to [`JumpRope::remove`](JumpRope::insert). The only difference is that here we
216 |     /// buffer the incoming remove operation.
217 |     pub fn remove(&mut self, range: Range<usize>) {
218 |         self.internal_push_op(Op::Del(range.start, range.end))
219 |     }
220 | 
221 |     // TODO: Replace!
222 | 
223 |     /// Return the length of the rope in unicode characters. Note this is not the same as either
224 |     /// the number of bytes the characters take, or the number of grapheme clusters in the string.
225 |     ///
226 |     /// This method returns the length in constant-time (*O(1)*).
227 |     pub fn len_chars(&self) -> usize {
228 |         let borrow = self.0.borrow();
229 |         match borrow.1.kind {
230 |             Kind::Ins => borrow.0.len_chars() + borrow.1.range.len(),
231 |             Kind::Del => borrow.0.len_chars() - borrow.1.range.len()
232 |         }
233 |     }
234 | 
235 |     /// Get the number of bytes used for the UTF8 representation of the rope. This will always match
236 |     /// the .len() property of the equivalent String.
237 |     pub fn len_bytes(&self) -> usize {
238 |         let mut borrow = self.0.borrow_mut();
239 |         match borrow.1.kind {
240 |             Kind::Ins => borrow.0.len_bytes() + borrow.1.ins_content.len(),
241 |             Kind::Del => {
242 |                 // Unfortunately we have to flush to calculate byte length.
243 |                 Self::flush_mut(borrow.deref_mut());
244 |                 borrow.0.len_bytes()
245 |             }
246 |         }
247 |     }
248 | 
249 |     pub fn is_empty(&self) -> bool {
250 |         let borrow = self.0.borrow();
251 |         let len_chars = borrow.0.len_chars();
252 |         match borrow.1.kind {
253 |             Kind::Ins => len_chars == 0 && borrow.1.is_empty(),
254 |             Kind::Del => len_chars - borrow.1.len() == 0,
255 |         }
256 |     }
257 | 
258 |     /// Consume the JumpRopeBuf, flush any buffered operations and return the contained JumpRope.
259 |     pub fn into_inner(self) -> JumpRope {
260 |         let mut contents = self.0.into_inner();
261 |         Self::flush_mut(&mut contents);
262 |         contents.0
263 |     }
264 | 
265 |     /// Flush changes into the rope and return a borrowed reference to the rope itself. This makes
266 |     /// it easy to call any methods on the underlying rope which aren't already exposed through the
267 |     /// buffered API.
268 |     ///
269 |     /// # Panics
270 |     ///
271 |     /// borrow panics if the value is currently borrowed already.
272 |     pub fn borrow(&self) -> Ref<'_, JumpRope> {
273 |         let mut borrow = self.0.borrow_mut();
274 |         Self::flush_mut(borrow.deref_mut());
275 |         drop(borrow);
276 |         // This method could provide &mut access to the rope via the cell, but I think thats a bad
277 |         // idea.
278 |         Ref::map(self.0.borrow(), |(rope, _)| rope)
279 |     }
280 | 
281 |     fn eq_str(&self, s: &str) -> bool {
282 |         self.borrow().deref().eq(s)
283 |     }
284 | }
285 | 
286 | impl AsMut<JumpRope> for JumpRopeBuf {
287 |     /// Flush changes into the rope and mutably borrow the rope.
288 |     fn as_mut(&mut self) -> &mut JumpRope {
289 |         let inner = self.0.get_mut();
290 |         Self::flush_mut(inner);
291 |         &mut inner.0
292 |     }
293 | }
294 | 
295 | impl Default for JumpRopeBuf {
296 |     fn default() -> Self {
297 |         JumpRopeBuf::new()
298 |     }
299 | }
300 | 
301 | impl Debug for JumpRopeBuf {
302 |     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
303 |         let inner = self.0.borrow();
304 |         f.debug_struct("BufferedRope")
305 |             .field("op", &inner.1)
306 |             .field("rope", &inner.0)
307 |             .finish()
308 |     }
309 | }
310 | 
311 | impl Display for JumpRopeBuf {
312 |     fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
313 |         for s in self.borrow().substrings() {
314 |             f.write_str(s)?;
315 |         }
316 |         Ok(())
317 |     }
318 | }
319 | 
320 | impl Clone for JumpRopeBuf {
321 |     fn clone(&self) -> Self {
322 |         let inner = self.0.borrow();
323 |         Self(RefCell::new((inner.0.clone(), inner.1.clone())))
324 |     }
325 | }
326 | 
327 | impl<S: AsRef<str>> From<S> for JumpRopeBuf {
328 |     fn from(str: S) -> Self {
329 |         JumpRopeBuf::new_from_str(str.as_ref())
330 |     }
331 | }
332 | 
333 | impl<T: AsRef<str>> PartialEq<T> for JumpRopeBuf {
334 |     fn eq(&self, other: &T) -> bool {
335 |         self.eq_str(other.as_ref())
336 |     }
337 | }
338 | 
339 | // Needed for assert_eq!(&rope, "Hi there");
340 | impl PartialEq<str> for JumpRopeBuf {
341 |     fn eq(&self, other: &str) -> bool {
342 |         self.eq_str(other)
343 |     }
344 | }
345 | 
346 | // Needed for assert_eq!(&rope, String::from("Hi there"));
347 | impl PartialEq<String> for &JumpRopeBuf {
348 |     fn eq(&self, other: &String) -> bool {
349 |         self.eq_str(other.as_str())
350 |     }
351 | }
352 | 
353 | impl PartialEq<JumpRope> for JumpRopeBuf {
354 |     fn eq(&self, other: &JumpRope) -> bool {
355 |         self.borrow().eq(other)
356 |     }
357 | }
358 | 
359 | impl PartialEq<JumpRopeBuf> for JumpRopeBuf {
360 |     fn eq(&self, other: &JumpRopeBuf) -> bool {
361 |         // This check is important because we can't borrow the Cell twice at runtime.
362 |         std::ptr::eq(self as *const _, other as *const _)
363 |             || self.borrow().eq(other.borrow().deref())
364 |     }
365 | }
366 | 
367 | impl Eq for JumpRopeBuf {}
368 | 
369 | #[cfg(test)]
370 | mod test {
371 |     use crate::JumpRopeBuf;
372 | 
373 |     // TODO: This could probably use more specific tests. JumpRopeBuf is currently thoroughly
374 |     // tested more deeply by a fuzzer, but it'd be good to have more tests here.
375 | 
376 |     #[test]
377 |     fn is_empty() {
378 |         let mut r = JumpRopeBuf::new();
379 |         assert!(r.is_empty());
380 | 
381 |         r.insert(0, "hi");
382 |         assert!(!r.is_empty());
383 | 
384 |         // Force the rope to be flushed.
385 |         r.borrow();
386 | 
387 |         r.remove(0..2);
388 |         assert!(r.is_empty());
389 |     }
390 | 
391 |     #[test]
392 |     fn eq_reflexive() {
393 |         // This was a regression.
394 |         let r = JumpRopeBuf::new();
395 |         assert_eq!(r, r);
396 |     }
397 | }
398 | 


--------------------------------------------------------------------------------
/src/fast_str_tools.rs:
--------------------------------------------------------------------------------
  1 | //! Utility functions for utf8 string slices.
  2 | //!
  3 | //! This file mostly defers to str_indicies but overrides some methods because the compiler is
  4 | //! smart.
  5 | 
  6 | /// Converts from byte-index to char-index in a string slice.
  7 | ///
  8 | /// If the byte is in the middle of a multi-byte char, returns the index of
  9 | /// the char that the byte belongs to.
 10 | ///
 11 | /// Any past-the-end index will return the one-past-the-end char index.
 12 | ///
 13 | /// Runs in O(N) time.
 14 | #[inline]
 15 | #[allow(unused)]
 16 | pub fn byte_to_char_idx(text: &str, byte_idx: usize) -> usize {
 17 |     let count = count_chars_in_bytes(&text.as_bytes()[0..(byte_idx + 1).min(text.len())]);
 18 |     if byte_idx < text.len() {
 19 |         count - 1
 20 |     } else {
 21 |         count
 22 |     }
 23 | }
 24 | 
 25 | /// Converts from char-index to byte-index in a string slice.
 26 | ///
 27 | /// Any past-the-end index will return the one-past-the-end byte index.
 28 | ///
 29 | /// Runs in O(N) time.
 30 | #[inline]
 31 | pub fn char_to_byte_idx(text: &str, char_idx: usize) -> usize {
 32 |     if cfg!(not(miri)) {
 33 |         str_indices::chars::to_byte_idx(text, char_idx)
 34 |     } else {
 35 |         // Naive version.
 36 |         let mut byte_count = 0;
 37 |         let mut char_count = 0;
 38 | 
 39 |         let mut i = 0;
 40 |         let text = text.as_bytes();
 41 |         while i < text.len() && char_count <= char_idx {
 42 |             char_count += ((text[i] & 0xC0) != 0x80) as usize;
 43 |             i += 1;
 44 |         }
 45 |         byte_count += i;
 46 | 
 47 |         if byte_count == text.len() && char_count <= char_idx {
 48 |             byte_count
 49 |         } else {
 50 |             byte_count - 1
 51 |         }
 52 |     }
 53 | }
 54 | 
 55 | // #[allow(unused)]
 56 | // #[inline(always)]
 57 | // fn char_to_byte_idx_naive(text: &[u8], char_idx: usize) -> usize {
 58 | //     let mut byte_count = 0;
 59 | //     let mut char_count = 0;
 60 | //
 61 | //     let mut i = 0;
 62 | //     while i < text.len() && char_count <= char_idx {
 63 | //         char_count += ((text[i] & 0xC0) != 0x80) as usize;
 64 | //         i += 1;
 65 | //     }
 66 | //     byte_count += i;
 67 | //
 68 | //     if byte_count == text.len() && char_count <= char_idx {
 69 | //         byte_count
 70 | //     } else {
 71 | //         byte_count - 1
 72 | //     }
 73 | // }
 74 | 
 75 | /// Counts the utf16 surrogate pairs that would be in `text` if it were encoded
 76 | /// as utf16.
 77 | #[inline]
 78 | pub(crate) fn count_utf16_surrogates(text: &str) -> usize {
 79 |     unsafe { count_utf16_surrogates_in_bytes(text.as_bytes()) }
 80 | }
 81 | 
 82 | /// SAFETY: Passed text array must be a valid UTF8 string. This will not be checked at runtime.
 83 | #[inline]
 84 | pub(crate) unsafe fn count_utf16_surrogates_in_bytes(text: &[u8]) -> usize {
 85 |     if cfg!(miri) {
 86 |         // Naive version
 87 |         let mut utf16_surrogate_count = 0;
 88 | 
 89 |         for byte in text.iter() {
 90 |             utf16_surrogate_count += ((byte & 0xf0) == 0xf0) as usize;
 91 |         }
 92 | 
 93 |         utf16_surrogate_count
 94 |     } else {
 95 |         str_indices::utf16::count_surrogates(std::str::from_utf8_unchecked(text))
 96 |     }
 97 | }
 98 | 
 99 | // This is an alternate naive method which may make sense later.
100 | // #[inline]
101 | // #[allow(unused)]
102 | // pub(crate) fn count_utf16_surrogates_in_bytes_naive(text: &[u8]) -> usize {
103 | //     let mut utf16_surrogate_count = 0;
104 | //
105 | //     for byte in text.iter() {
106 | //         utf16_surrogate_count += ((byte & 0xf0) == 0xf0) as usize;
107 | //     }
108 | //
109 | //     utf16_surrogate_count
110 | // }
111 | 
112 | #[inline(always)]
113 | #[allow(unused)]
114 | pub(crate) fn byte_to_utf16_surrogate_idx(text: &str, byte_idx: usize) -> usize {
115 |     count_utf16_surrogates(&text[..byte_idx])
116 | }
117 | 
118 | #[inline(always)]
119 | #[allow(unused)]
120 | pub(crate) fn utf16_code_unit_to_char_idx(text: &str, utf16_idx: usize) -> usize {
121 |     // TODO: optimized version.  This is pretty slow.  It isn't expected to be
122 |     // used in performance critical functionality, so this isn't urgent.  But
123 |     // might as well make it faster when we get the chance.
124 |     let mut char_i = 0;
125 |     let mut utf16_i = 0;
126 |     for c in text.chars() {
127 |         if utf16_idx <= utf16_i {
128 |             break;
129 |         }
130 |         char_i += 1;
131 |         utf16_i += c.len_utf16();
132 |     }
133 | 
134 |     if utf16_idx < utf16_i {
135 |         char_i -= 1;
136 |     }
137 | 
138 |     char_i
139 | }
140 | 
141 | //===========================================================================
142 | // Internal
143 | //===========================================================================
144 | 
145 | /// Uses bit-fiddling magic to count utf8 chars really quickly.
146 | /// We actually count the number of non-starting utf8 bytes, since
147 | /// they have a consistent starting two-bit pattern.  We then
148 | /// subtract from the byte length of the text to get the final
149 | /// count.
150 | #[inline]
151 | #[allow(unused)]
152 | pub(crate) fn count_chars(text: &str) -> usize {
153 |     count_chars_in_bytes(text.as_bytes())
154 | }
155 | 
156 | #[inline]
157 | pub(crate) fn count_chars_in_bytes(text: &[u8]) -> usize {
158 |     if text.len() <= 1 { text.len() }
159 |     else if !cfg!(miri) {
160 |         unsafe { str_indices::chars::count(std::str::from_utf8_unchecked(text)) }
161 |     } else {
162 |         let mut inv_count = 0;
163 |         for byte in text.iter() {
164 |             inv_count += ((byte & 0xC0) != 0x80) as usize;
165 |         }
166 |         inv_count
167 |     }
168 | }
169 | 
170 | #[cfg(test)]
171 | mod tests {
172 |     use super::*;
173 | 
174 |     // 124 bytes, 100 chars, 4 lines
175 |     const TEXT_LINES: &str = "Hello there!  How're you doing?\nIt's \
176 |                               a fine day, isn't it?\nAren't you glad \
177 |                               we're alive?\nこんにちは、みんなさん！";
178 | 
179 |     #[test]
180 |     fn count_chars_01() {
181 |         let text = "Hello せかい! Hello せかい! Hello せかい! Hello せかい! Hello せかい!";
182 | 
183 |         assert_eq!(54, count_chars(text));
184 |     }
185 | 
186 |     #[test]
187 |     fn count_chars_02() {
188 |         assert_eq!(100, count_chars(TEXT_LINES));
189 |     }
190 | 
191 |     #[test]
192 |     fn byte_to_char_idx_01() {
193 |         let text = "Hello せかい!";
194 |         assert_eq!(0, byte_to_char_idx(text, 0));
195 |         assert_eq!(1, byte_to_char_idx(text, 1));
196 |         assert_eq!(6, byte_to_char_idx(text, 6));
197 |         assert_eq!(6, byte_to_char_idx(text, 7));
198 |         assert_eq!(6, byte_to_char_idx(text, 8));
199 |         assert_eq!(7, byte_to_char_idx(text, 9));
200 |         assert_eq!(7, byte_to_char_idx(text, 10));
201 |         assert_eq!(7, byte_to_char_idx(text, 11));
202 |         assert_eq!(8, byte_to_char_idx(text, 12));
203 |         assert_eq!(8, byte_to_char_idx(text, 13));
204 |         assert_eq!(8, byte_to_char_idx(text, 14));
205 |         assert_eq!(9, byte_to_char_idx(text, 15));
206 |         assert_eq!(10, byte_to_char_idx(text, 16));
207 |         assert_eq!(10, byte_to_char_idx(text, 17));
208 |         assert_eq!(10, byte_to_char_idx(text, 18));
209 |         assert_eq!(10, byte_to_char_idx(text, 19));
210 |     }
211 | 
212 |     #[test]
213 |     fn byte_to_char_idx_02() {
214 |         let text = "";
215 |         assert_eq!(0, byte_to_char_idx(text, 0));
216 |         assert_eq!(0, byte_to_char_idx(text, 1));
217 | 
218 |         let text = "h";
219 |         assert_eq!(0, byte_to_char_idx(text, 0));
220 |         assert_eq!(1, byte_to_char_idx(text, 1));
221 |         assert_eq!(1, byte_to_char_idx(text, 2));
222 | 
223 |         let text = "hi";
224 |         assert_eq!(0, byte_to_char_idx(text, 0));
225 |         assert_eq!(1, byte_to_char_idx(text, 1));
226 |         assert_eq!(2, byte_to_char_idx(text, 2));
227 |         assert_eq!(2, byte_to_char_idx(text, 3));
228 |     }
229 | 
230 |     #[test]
231 |     fn byte_to_char_idx_03() {
232 |         let text = "せかい";
233 |         assert_eq!(0, byte_to_char_idx(text, 0));
234 |         assert_eq!(0, byte_to_char_idx(text, 1));
235 |         assert_eq!(0, byte_to_char_idx(text, 2));
236 |         assert_eq!(1, byte_to_char_idx(text, 3));
237 |         assert_eq!(1, byte_to_char_idx(text, 4));
238 |         assert_eq!(1, byte_to_char_idx(text, 5));
239 |         assert_eq!(2, byte_to_char_idx(text, 6));
240 |         assert_eq!(2, byte_to_char_idx(text, 7));
241 |         assert_eq!(2, byte_to_char_idx(text, 8));
242 |         assert_eq!(3, byte_to_char_idx(text, 9));
243 |         assert_eq!(3, byte_to_char_idx(text, 10));
244 |         assert_eq!(3, byte_to_char_idx(text, 11));
245 |         assert_eq!(3, byte_to_char_idx(text, 12));
246 |     }
247 | 
248 |     #[test]
249 |     fn byte_to_char_idx_04() {
250 |         // Ascii range
251 |         for i in 0..88 {
252 |             assert_eq!(i, byte_to_char_idx(TEXT_LINES, i));
253 |         }
254 | 
255 |         // Hiragana characters
256 |         for i in 88..125 {
257 |             assert_eq!(88 + ((i - 88) / 3), byte_to_char_idx(TEXT_LINES, i));
258 |         }
259 | 
260 |         // Past the end
261 |         for i in 125..130 {
262 |             assert_eq!(100, byte_to_char_idx(TEXT_LINES, i));
263 |         }
264 |     }
265 | 
266 |     #[test]
267 |     fn char_to_byte_idx_01() {
268 |         let text = "Hello せかい!";
269 |         assert_eq!(0, char_to_byte_idx(text, 0));
270 |         assert_eq!(1, char_to_byte_idx(text, 1));
271 |         assert_eq!(2, char_to_byte_idx(text, 2));
272 |         assert_eq!(5, char_to_byte_idx(text, 5));
273 |         assert_eq!(6, char_to_byte_idx(text, 6));
274 |         assert_eq!(12, char_to_byte_idx(text, 8));
275 |         assert_eq!(15, char_to_byte_idx(text, 9));
276 |         assert_eq!(16, char_to_byte_idx(text, 10));
277 |     }
278 | 
279 |     #[test]
280 |     fn char_to_byte_idx_02() {
281 |         let text = "せかい";
282 |         assert_eq!(0, char_to_byte_idx(text, 0));
283 |         assert_eq!(3, char_to_byte_idx(text, 1));
284 |         assert_eq!(6, char_to_byte_idx(text, 2));
285 |         assert_eq!(9, char_to_byte_idx(text, 3));
286 |     }
287 | 
288 |     #[test]
289 |     fn char_to_byte_idx_03() {
290 |         let text = "Hello world!";
291 |         assert_eq!(0, char_to_byte_idx(text, 0));
292 |         assert_eq!(1, char_to_byte_idx(text, 1));
293 |         assert_eq!(8, char_to_byte_idx(text, 8));
294 |         assert_eq!(11, char_to_byte_idx(text, 11));
295 |         assert_eq!(12, char_to_byte_idx(text, 12));
296 |     }
297 | 
298 |     #[test]
299 |     fn char_to_byte_idx_04() {
300 |         let text = "Hello world! Hello せかい! Hello world! Hello せかい! \
301 |                     Hello world! Hello せかい! Hello world! Hello せかい! \
302 |                     Hello world! Hello せかい! Hello world! Hello せかい! \
303 |                     Hello world! Hello せかい! Hello world! Hello せかい!";
304 |         assert_eq!(0, char_to_byte_idx(text, 0));
305 |         assert_eq!(30, char_to_byte_idx(text, 24));
306 |         assert_eq!(60, char_to_byte_idx(text, 48));
307 |         assert_eq!(90, char_to_byte_idx(text, 72));
308 |         assert_eq!(115, char_to_byte_idx(text, 93));
309 |         assert_eq!(120, char_to_byte_idx(text, 96));
310 |         assert_eq!(150, char_to_byte_idx(text, 120));
311 |         assert_eq!(180, char_to_byte_idx(text, 144));
312 |         assert_eq!(210, char_to_byte_idx(text, 168));
313 |         assert_eq!(239, char_to_byte_idx(text, 191));
314 |     }
315 | 
316 |     #[test]
317 |     fn char_to_byte_idx_05() {
318 |         // Ascii range
319 |         for i in 0..88 {
320 |             assert_eq!(i, char_to_byte_idx(TEXT_LINES, i));
321 |         }
322 | 
323 |         // Hiragana characters
324 |         for i in 88..100 {
325 |             assert_eq!(88 + ((i - 88) * 3), char_to_byte_idx(TEXT_LINES, i));
326 |         }
327 | 
328 |         // Past the end
329 |         for i in 100..110 {
330 |             assert_eq!(124, char_to_byte_idx(TEXT_LINES, i));
331 |         }
332 |     }
333 | }
334 | 


--------------------------------------------------------------------------------
/src/gapbuffer.rs:
--------------------------------------------------------------------------------
  1 | use crate::fast_str_tools::*;
  2 | #[cfg(feature = "line_conversion")]
  3 | use crate::utils::count_lines;
  4 | use crate::utils::str_chars_to_bytes_rev;
  5 | 
  6 | #[derive(Debug, Clone, Eq)]
  7 | pub struct GapBuffer<const LEN: usize> {
  8 |     data: [u8; LEN],
  9 | 
 10 |     pub(crate) gap_start_bytes: u16,
 11 |     pub(crate) gap_start_chars: u16,
 12 | 
 13 |     /// The number of UTF16 surrogate pairs before the gap.
 14 |     #[cfg(feature = "wchar_conversion")]
 15 |     pub(crate) gap_start_surrogate_pairs: u16,
 16 | 
 17 |     /// The number of lines before the gap
 18 |     #[cfg(feature = "line_conversion")]
 19 |     pub(crate) gap_start_lines: u16,
 20 | 
 21 |     pub(crate) gap_len: u16,
 22 |     all_ascii: bool,
 23 | }
 24 | 
 25 | #[inline]
 26 | unsafe fn slice_to_str(arr: &[u8]) -> &str {
 27 |     if cfg!(debug_assertions) {
 28 |         std::str::from_utf8(arr).unwrap()
 29 |     } else {
 30 |         std::str::from_utf8_unchecked(arr)
 31 |     }
 32 | }
 33 | 
 34 | impl<const LEN: usize> GapBuffer<LEN> {
 35 |     pub fn new() -> Self {
 36 |         Self {
 37 |             data: [0; LEN],
 38 |             gap_start_bytes: 0,
 39 |             gap_start_chars: 0,
 40 |             #[cfg(feature = "wchar_conversion")]
 41 |             gap_start_surrogate_pairs: 0,
 42 |             #[cfg(feature = "line_conversion")]
 43 |             gap_start_lines: 0,
 44 |             gap_len: LEN as u16,
 45 |             all_ascii: true,
 46 |         }
 47 |     }
 48 | 
 49 |     pub fn new_from_str(s: &str) -> Self {
 50 |         let mut val = Self::new();
 51 |         val.try_insert(0, s).unwrap();
 52 |         val
 53 |     }
 54 | 
 55 |     // #[allow(unused)]
 56 |     // pub fn len_space(&self) -> usize {
 57 |     //     self.gap_len as usize
 58 |     // }
 59 | 
 60 |     /// In bytes.
 61 |     pub fn len_bytes(&self) -> usize {
 62 |         LEN - self.gap_len as usize
 63 |     }
 64 | 
 65 |     // #[allow(unused)]
 66 |     // pub fn char_len(&self) -> usize {
 67 |     //     count_chars(self.start_as_str()) + count_chars(self.end_as_str())
 68 |     // }
 69 | 
 70 |     pub fn is_empty(&self) -> bool {
 71 |         self.gap_len as usize == LEN
 72 |     }
 73 | 
 74 |     fn count_internal_chars(&self, s: &str) -> usize {
 75 |         if self.all_ascii { s.len() } else { count_chars(s) }
 76 |     }
 77 | 
 78 |     #[cfg(feature = "wchar_conversion")]
 79 |     fn int_count_surrogate_pairs(&self, s: &str) -> usize {
 80 |         if self.all_ascii { 0 } else { count_utf16_surrogates(s) }
 81 |     }
 82 | 
 83 |     fn int_str_get_byte_offset(&self, s: &str, char_pos: usize) -> usize {
 84 |         if self.all_ascii { char_pos } else { char_to_byte_idx(s, char_pos) }
 85 |     }
 86 |     fn int_chars_to_bytes_backwards(&self, s: &str, char_len: usize) -> usize {
 87 |         if self.all_ascii { char_len } else { str_chars_to_bytes_rev(s, char_len) }
 88 |     }
 89 | 
 90 |     pub fn move_gap(&mut self, new_start_bytes: usize) {
 91 |         let current_start = self.gap_start_bytes as usize;
 92 | 
 93 |         if new_start_bytes != current_start {
 94 |             let len = self.gap_len as usize;
 95 |             debug_assert!(new_start_bytes <= LEN-len);
 96 | 
 97 |             #[allow(clippy::comparison_chain)]
 98 |             if new_start_bytes < current_start {
 99 |                 // move characters to the right (gap to the left)
100 |                 let moved_chars = new_start_bytes..current_start;
101 |                 let s = unsafe { slice_to_str(&self.data[moved_chars.clone()]) };
102 |                 let char_len = self.count_internal_chars(s);
103 | 
104 |                 #[cfg(feature = "wchar_conversion")] {
105 |                     let surrogate_pairs = self.int_count_surrogate_pairs(s);
106 |                     self.gap_start_surrogate_pairs -= surrogate_pairs as u16;
107 |                 }
108 | 
109 |                 #[cfg(feature = "line_conversion")] {
110 |                     self.gap_start_lines -= count_lines(s) as u16;
111 |                 }
112 | 
113 |                 self.gap_start_chars -= char_len as u16;
114 | 
115 |                 self.data.copy_within(moved_chars, new_start_bytes + len);
116 |             } else if current_start < new_start_bytes {
117 |                 // Move characters to the left (gap to the right)
118 |                 let moved_chars = current_start+len..new_start_bytes +len;
119 |                 let s = unsafe { slice_to_str(&self.data[moved_chars.clone()]) };
120 |                 let char_len = self.count_internal_chars(s);
121 | 
122 |                 #[cfg(feature = "wchar_conversion")] {
123 |                     let surrogate_pairs = self.int_count_surrogate_pairs(s);
124 |                     self.gap_start_surrogate_pairs += surrogate_pairs as u16;
125 |                 }
126 | 
127 |                 #[cfg(feature = "line_conversion")] {
128 |                     self.gap_start_lines += count_lines(s) as u16;
129 |                 }
130 | 
131 |                 self.gap_start_chars += char_len as u16;
132 | 
133 |                 self.data.copy_within(moved_chars, current_start);
134 |             }
135 | 
136 |             if cfg!(debug_assertions) {
137 |                 // This is unnecessary but tidy, and makes debugging easier.
138 |                 self.data[new_start_bytes..new_start_bytes +len].fill(0);
139 |             }
140 | 
141 |             self.gap_start_bytes = new_start_bytes as u16;
142 |         }
143 |     }
144 | 
145 |     /// Panics if there's no room. This inserts at the start of the gap (and moves the gap after the
146 |     /// inserted text).
147 |     pub fn insert_in_gap(&mut self, s: &str) {
148 |         let len = s.len();
149 |         let char_len = count_chars(s);
150 |         assert!(len <= self.gap_len as usize);
151 | 
152 |         let start = self.gap_start_bytes as usize;
153 |         self.data[start..start+len].copy_from_slice(s.as_bytes());
154 |         self.gap_start_bytes += len as u16;
155 |         self.gap_start_chars += char_len as u16;
156 |         self.gap_len -= len as u16;
157 | 
158 |         #[cfg(feature = "wchar_conversion")]
159 |         if len != char_len {
160 |             self.gap_start_surrogate_pairs += count_utf16_surrogates(s) as u16;
161 |         }
162 | 
163 |         #[cfg(feature = "line_conversion")] {
164 |             self.gap_start_lines += count_lines(s) as u16;
165 |         }
166 | 
167 |         if len != char_len { self.all_ascii = false; }
168 |     }
169 | 
170 |     pub fn try_insert(&mut self, byte_pos: usize, s: &str) -> Result<(), ()> {
171 |         let len = s.len();
172 |         if len > self.gap_len as usize {
173 |             // No space in this node!
174 |             Result::Err(())
175 |         } else {
176 |             self.move_gap(byte_pos);
177 |             self.insert_in_gap(s);
178 |             Result::Ok(())
179 |         }
180 |     }
181 | 
182 |     /// Remove chars after the gap (ie, at gap .. gap+del_len)
183 |     pub fn remove_after_gap(&mut self, del_bytes: usize) {
184 |         if cfg!(debug_assertions) {
185 |             // Zero out the deleted bytes in debug mode.
186 |             self.data[
187 |                 (self.gap_start_bytes +self.gap_len) as usize..(self.gap_start_bytes +self.gap_len) as usize + del_bytes
188 |                 ].fill(0);
189 |         }
190 |         self.gap_len += del_bytes as u16;
191 |     }
192 | 
193 |     // Returns the number of items actually removed.
194 |     #[allow(unused)]
195 |     pub fn remove(&mut self, pos: usize, del_len: usize) -> usize {
196 |         let len = self.len_bytes();
197 | 
198 |         if pos >= len { return 0; }
199 |         let del_len = del_len.min(len - pos);
200 | 
201 |         self.move_gap(pos);
202 | 
203 |         self.remove_after_gap(del_len);
204 |         del_len
205 |     }
206 | 
207 |     /// Returns the number of bytes removed.
208 |     pub fn remove_chars(&mut self, pos: usize, mut del_len: usize) -> usize {
209 |         // This function is longer than it needs to be; but having it be a bit longer makes the
210 |         // code faster. I think the trade-off is worth it.
211 |         // self.move_gap(self.count_bytes(pos));
212 |         // let removed_bytes = str_get_byte_offset(s.end_as_str(), del_len);
213 |         // self.remove_at_gap(removed_bytes);
214 |         // removed_bytes
215 | 
216 |         if del_len == 0 { return 0; }
217 |         debug_assert!(del_len <= self.len_bytes() - pos);
218 |         let mut rm_start_bytes = 0;
219 | 
220 |         let gap_chars = self.gap_start_chars as usize;
221 |         #[cfg(any(feature = "wchar_conversion", feature = "line_conversion"))]
222 |         let gap_start_bytes = self.gap_start_bytes as usize;
223 |         if pos <= gap_chars && pos+del_len >= gap_chars {
224 |             if pos < gap_chars {
225 |                 // Delete the bit from pos..gap.
226 |                 // TODO: It would be better to count backwards here.
227 |                 // let pos_bytes = str_get_byte_offset(self.start_as_str(), pos) as u16;
228 |                 // rm_start_bytes = self.gap_start_bytes - pos_bytes;
229 |                 rm_start_bytes = self.int_chars_to_bytes_backwards(self.start_as_str(), gap_chars - pos);
230 | 
231 |                 #[cfg(feature = "wchar_conversion")]
232 |                 if !self.all_ascii {
233 |                     self.gap_start_surrogate_pairs -= unsafe {
234 |                         count_utf16_surrogates_in_bytes(&self.data[gap_start_bytes - rm_start_bytes..gap_start_bytes]) as u16
235 |                     }
236 |                 }
237 | 
238 |                 #[cfg(feature = "line_conversion")] {
239 |                     unsafe {
240 |                         let s = std::str::from_utf8_unchecked(&self.data[gap_start_bytes - rm_start_bytes..gap_start_bytes]);
241 |                         self.gap_start_lines -= count_lines(s) as u16;
242 |                     }
243 |                 }
244 | 
245 |                 del_len -= self.gap_start_chars as usize - pos;
246 |                 let rm_start_bytes = rm_start_bytes as u16;
247 |                 self.gap_len += rm_start_bytes;
248 |                 self.gap_start_chars = pos as u16;
249 |                 self.gap_start_bytes -= rm_start_bytes;
250 |                 // self.gap_start_bytes = pos_bytes;
251 |                 if del_len == 0 { return rm_start_bytes as usize; }
252 |             }
253 | 
254 |             debug_assert!(del_len > 0);
255 |             debug_assert!(pos >= self.gap_start_chars as usize);
256 |         } else {
257 |             // This is equivalent to self.count_bytes() (below), but for some reason manually
258 |             // inlining it here results in both faster and smaller executables.
259 |             let gap_bytes = if pos < gap_chars {
260 |                 self.int_str_get_byte_offset(self.start_as_str(), pos)
261 |             } else {
262 |                 self.int_str_get_byte_offset(self.end_as_str(), pos - gap_chars) + self.gap_start_bytes as usize
263 |             };
264 |             self.move_gap(gap_bytes);
265 |         }
266 | 
267 |         // At this point the gap is guaranteed to be directly after pos.
268 |         let rm_end_bytes = self.int_str_get_byte_offset(self.end_as_str(), del_len);
269 |         self.remove_after_gap(rm_end_bytes);
270 |         rm_start_bytes as usize + rm_end_bytes
271 |     }
272 | 
273 |     pub fn start_as_str(&self) -> &str {
274 |         unsafe {
275 |             slice_to_str(&self.data[0..self.gap_start_bytes as usize])
276 |         }
277 |     }
278 |     pub fn end_as_str(&self) -> &str {
279 |         unsafe {
280 |             slice_to_str(&self.data[(self.gap_start_bytes +self.gap_len) as usize..LEN])
281 |         }
282 |     }
283 | 
284 |     pub fn count_bytes(&self, char_pos: usize) -> usize {
285 |         if self.all_ascii { return char_pos; }
286 | 
287 |         let gap_chars = self.gap_start_chars as usize;
288 |         let gap_bytes = self.gap_start_bytes as usize;
289 |         // Clippy complains about this but if I swap to a match expression, performance drops by 1%.
290 |         #[allow(clippy::comparison_chain)]
291 |         if char_pos == gap_chars {
292 |             gap_bytes
293 |         } else if char_pos < gap_chars {
294 |             self.int_str_get_byte_offset(self.start_as_str(), char_pos)
295 |         } else { // char_pos > start_char_len.
296 |             gap_bytes + self.int_str_get_byte_offset(self.end_as_str(), char_pos - gap_chars)
297 |         }
298 |     }
299 | 
300 |     /// Calculate & return the number of surrogate pairs in `[0..char_pos]`
301 |     #[cfg(feature = "wchar_conversion")]
302 |     pub(crate) fn count_chars_in_wchars(&self, wchar_pos: usize) -> usize {
303 |         if self.all_ascii { wchar_pos }
304 |         else {
305 |             let gap_chars = self.gap_start_chars as usize;
306 |             let gap_pairs = self.gap_start_surrogate_pairs as usize;
307 |             let gap_wchars = gap_chars + gap_pairs;
308 | 
309 |             if wchar_pos == gap_wchars {
310 |                 gap_chars
311 |             } else if wchar_pos < gap_wchars {
312 |                 // In start.
313 |                 if self.gap_start_surrogate_pairs == 0 { wchar_pos }
314 |                 else {
315 |                     utf16_code_unit_to_char_idx(self.start_as_str(), wchar_pos)
316 |                 }
317 |             } else {
318 |                 // In end.
319 |                 gap_chars + utf16_code_unit_to_char_idx(self.end_as_str(), wchar_pos - gap_wchars)
320 |             }
321 |         }
322 |     }
323 | 
324 |     #[cfg(feature = "wchar_conversion")]
325 |     pub(crate) fn count_surrogate_pairs(&self, char_pos: usize) -> usize {
326 |         if self.all_ascii {
327 |             0
328 |         } else {
329 |             let gap_chars = self.gap_start_chars as usize;
330 |             if char_pos == gap_chars {
331 |                 self.gap_start_surrogate_pairs as usize
332 |             } else if char_pos < gap_chars {
333 |                 if self.gap_start_surrogate_pairs == 0 { 0 }
334 |                 else {
335 |                     let bytes = self.int_str_get_byte_offset(self.start_as_str(), char_pos);
336 |                     unsafe { count_utf16_surrogates_in_bytes(&self.data[..bytes]) }
337 |                 }
338 |             } else {
339 |                 // Right stuff.
340 |                 let bytes = self.int_str_get_byte_offset(self.end_as_str(), char_pos - gap_chars);
341 |                 let base = (self.gap_start_bytes + self.gap_len) as usize;
342 |                 let slice = &self.data[base..base + bytes];
343 |                 unsafe { self.gap_start_surrogate_pairs as usize + count_utf16_surrogates_in_bytes(slice) }
344 |             }
345 |         }
346 |     }
347 | 
348 |     /// Take the remaining contents in the gap buffer. Mark them as deleted, but return them.
349 |     /// This will leave those items non-zero, but that doesn't matter.
350 |     pub fn take_rest(&mut self) -> &str {
351 |         let last_idx = (self.gap_start_bytes + self.gap_len) as usize;
352 |         self.gap_len = LEN as u16 - self.gap_start_bytes;
353 |         unsafe { slice_to_str(&self.data[last_idx..LEN]) }
354 |     }
355 | 
356 |     pub(crate) fn check(&self) {
357 |         let char_len = count_chars(self.start_as_str());
358 |         assert_eq!(char_len, self.gap_start_chars as usize);
359 | 
360 |         #[cfg(feature = "wchar_conversion")] {
361 |             let pairs = count_utf16_surrogates(self.start_as_str());
362 |             assert_eq!(pairs, self.gap_start_surrogate_pairs as usize);
363 |         }
364 | 
365 |         #[cfg(feature = "line_conversion")] {
366 |             let lines = count_lines(self.start_as_str());
367 |             assert_eq!(lines, self.gap_start_lines as usize);
368 |         }
369 | 
370 |         if self.all_ascii {
371 |             assert_eq!(self.gap_start_bytes, self.gap_start_chars);
372 |             #[cfg(feature = "wchar_conversion")] {
373 |                 assert_eq!(self.gap_start_surrogate_pairs, 0);
374 |             }
375 |         }
376 |     }
377 | }
378 | 
379 | impl<const LEN: usize> ToString for GapBuffer<LEN> {
380 |     fn to_string(&self) -> String {
381 |         let mut result = String::with_capacity(self.len_bytes());
382 |         result.push_str(self.start_as_str());
383 |         result.push_str(self.end_as_str());
384 |         result
385 |     }
386 | }
387 | 
388 | impl<const LEN: usize> PartialEq for GapBuffer<LEN> {
389 |     // Eq is interesting because we need to ignore where the gap is.
390 |     fn eq(&self, other: &Self) -> bool {
391 |         if self.gap_len != other.gap_len { return false; }
392 |         // There's 3 sections to check:
393 |         // - Before our gap
394 |         // - The inter-gap part
395 |         // - The last, common part.
396 |         let (a, b) = if self.gap_start_bytes < other.gap_start_bytes {
397 |             (self, other)
398 |         } else {
399 |             (other, self)
400 |         };
401 |         // a has its gap first (or the gaps are at the same time).
402 |         let a_start = a.gap_start_bytes as usize;
403 |         let b_start = b.gap_start_bytes as usize;
404 |         let gap_len = a.gap_len as usize;
405 | 
406 |         // Section before the gaps
407 |         if a.data[0..a_start] != b.data[0..a_start] { return false; }
408 | 
409 |         // Gappy bit
410 |         if a.data[a_start+gap_len..b_start+gap_len] != b.data[a_start..b_start] { return false; }
411 | 
412 |         // Last bit
413 |         let end_idx = b_start + gap_len;
414 |         a.data[end_idx..LEN] == b.data[end_idx..LEN]
415 |     }
416 | }
417 | 
418 | #[cfg(test)]
419 | mod test {
420 |     use crate::gapbuffer::GapBuffer;
421 | 
422 |     fn check_eq<const LEN: usize>(b: &GapBuffer<LEN>, s: &str) {
423 |         assert_eq!(b.to_string(), s);
424 |         assert_eq!(b.len_bytes(), s.len());
425 |         assert_eq!(s.is_empty(), b.is_empty());
426 |     }
427 | 
428 |     #[test]
429 |     fn smoke_test() {
430 |         let mut b = GapBuffer::<5>::new();
431 | 
432 |         b.try_insert(0, "hi").unwrap();
433 |         b.try_insert(0, "x").unwrap(); // 'xhi'
434 |         // b.move_gap(2);
435 |         b.try_insert(2, "x").unwrap(); // 'xhxi'
436 |         check_eq(&b, "xhxi");
437 |     }
438 | 
439 |     #[test]
440 |     fn remove() {
441 |         let mut b = GapBuffer::<5>::new_from_str("hi");
442 |         assert_eq!(b.remove(2, 2), 0);
443 |         check_eq(&b, "hi");
444 | 
445 |         assert_eq!(b.remove(0, 1), 1);
446 |         check_eq(&b, "i");
447 | 
448 |         assert_eq!(b.remove(0, 1000), 1);
449 |         check_eq(&b, "");
450 |     }
451 | 
452 |     #[test]
453 |     fn eq() {
454 |         let hi = GapBuffer::<5>::new_from_str("hi");
455 |         let yo = GapBuffer::<5>::new_from_str("yo");
456 |         assert_ne!(hi, yo);
457 |         assert_eq!(hi, hi);
458 | 
459 |         let mut hi2 = GapBuffer::<5>::new_from_str("hi");
460 |         hi2.move_gap(1);
461 |         assert_eq!(hi, hi2);
462 | 
463 |         hi2.move_gap(0);
464 |         assert_eq!(hi, hi2);
465 |     }
466 | }


--------------------------------------------------------------------------------
/src/iter.rs:
--------------------------------------------------------------------------------
  1 | use std::ops::Range;
  2 | use crate::jumprope::*;
  3 | use crate::utils::str_chars_to_bytes;
  4 | 
  5 | /// An iterator over chunks (nodes) in the list.
  6 | pub(crate) struct NodeIter<'a>(Option<&'a Node>);
  7 | 
  8 | impl<'a> Iterator for NodeIter<'a> {
  9 |     type Item = &'a Node;
 10 | 
 11 |     fn next(&mut self) -> Option<&'a Node> {
 12 |         let prev = self.0;
 13 |         if let Some(n) = self.0 {
 14 |             // TODO: What?
 15 |             *self = NodeIter(unsafe { n.next_ptr().as_ref() });
 16 |         }
 17 |         prev
 18 |     }
 19 | }
 20 | 
 21 | /// A content iterator iterates over the strings in the rope
 22 | pub struct ContentIter<'a> {
 23 |     next: Option<&'a Node>,
 24 |     /// Are we at the start or the end of the gap buffer?
 25 |     at_start: bool,
 26 | }
 27 | 
 28 | impl<'a> ContentIter<'a> {
 29 |     pub fn substrings(self) -> Substrings<'a> {
 30 |         Substrings(self)
 31 |     }
 32 | 
 33 |     pub fn chars(self) -> Chars<'a> {
 34 |         self.into()
 35 |     }
 36 | }
 37 | 
 38 | impl<'a> Iterator for ContentIter<'a> {
 39 |     type Item = (&'a str, usize);
 40 | 
 41 |     fn next(&mut self) -> Option<Self::Item> {
 42 |         while let Some(n) = self.next {
 43 |             let s = if self.at_start {
 44 |                 self.at_start = false;
 45 |                 (n.str.start_as_str(), n.str.gap_start_chars as usize)
 46 |             } else {
 47 |                 self.next = unsafe { n.next_ptr().as_ref() };
 48 |                 self.at_start = true;
 49 |                 (n.str.end_as_str(), n.num_chars() - n.str.gap_start_chars as usize)
 50 |             };
 51 | 
 52 |             if s.1 > 0 {
 53 |                 return Some(s);
 54 |             }
 55 |         }
 56 | 
 57 |         None
 58 |     }
 59 | }
 60 | 
 61 | /// Iterator over the substrings in some content. This is just a hand-written .map(|s, len| s)
 62 | /// iterator to make it possible to embed a jumprope iterator inside another iterator.
 63 | pub struct Substrings<'a, I: Iterator<Item=(&'a str, usize)> = ContentIter<'a>>(I);
 64 | 
 65 | impl<'a, I: Iterator<Item=(&'a str, usize)>> Substrings<'a, I> {
 66 |     /// Convert this content into a string
 67 |     pub fn into_string(self) -> String {
 68 |         self.collect::<String>()
 69 |     }
 70 | }
 71 | 
 72 | impl<'a, I: Iterator<Item=(&'a str, usize)>> Iterator for Substrings<'a, I> {
 73 |     type Item = &'a str;
 74 | 
 75 |     fn next(&mut self) -> Option<Self::Item> {
 76 |         self.0.next().map(|(s, _)| s)
 77 |     }
 78 | }
 79 | 
 80 | /// Iterator over the individual characters in a rope (or rope slice).
 81 | pub struct Chars<'a, I: Iterator<Item=(&'a str, usize)> = ContentIter<'a>> {
 82 |     inner: I,
 83 |     current: std::str::Chars<'a>,
 84 | }
 85 | 
 86 | impl<'a, I: Iterator<Item=(&'a str, usize)>> From<I> for Chars<'a, I> {
 87 |     fn from(inner: I) -> Self {
 88 |         Self {
 89 |             inner,
 90 |             current: "".chars()
 91 |         }
 92 |     }
 93 | }
 94 | 
 95 | impl<'a, I: Iterator<Item=(&'a str, usize)>> Iterator for Chars<'a, I> {
 96 |     type Item = char;
 97 | 
 98 |     fn next(&mut self) -> Option<Self::Item> {
 99 |         self.current.next().or_else(|| {
100 |             self.current = self.inner.next()?.0.chars();
101 |             let next = self.current.next();
102 |             // None of the items returned from our inner iterator should be empty.
103 |             debug_assert!(next.is_some());
104 |             next
105 |         })
106 |     }
107 | }
108 | 
109 | /// Iterate over a sub-range of the rope.
110 | pub struct SliceIter<'a> {
111 |     inner: ContentIter<'a>,
112 |     skip: usize,
113 |     take_len: usize,
114 | }
115 | 
116 | pub type SubstringsInRange<'a> = Substrings<'a, SliceIter<'a>>;
117 | pub type CharsInRange<'a> = Chars<'a, SliceIter<'a>>;
118 | 
119 | impl<'a> SliceIter<'a> {
120 |     pub fn substrings(self) -> SubstringsInRange<'a> {
121 |         Substrings(self)
122 |     }
123 | 
124 |     pub fn chars(self) -> CharsInRange<'a> {
125 |         self.into()
126 |     }
127 | }
128 | 
129 | impl<'a> Iterator for SliceIter<'a> {
130 |     type Item = (&'a str, usize);
131 | 
132 |     fn next(&mut self) -> Option<Self::Item> {
133 |         if self.take_len == 0 { return None; }
134 | 
135 |         self.inner.next().map(|(mut s, mut char_len)| {
136 |             if self.skip > 0 {
137 |                 let byte = str_chars_to_bytes(s, self.skip);
138 |                 assert!(byte < s.len());
139 | 
140 |                 s = &s[byte..];
141 |                 char_len -= self.skip;
142 |                 self.skip = 0;
143 |             }
144 | 
145 |             if self.take_len < char_len {
146 |                 let byte = str_chars_to_bytes(s, self.take_len);
147 |                 s = &s[0..byte];
148 |                 char_len = self.take_len;
149 |             }
150 | 
151 |             self.take_len -= char_len;
152 | 
153 |             (s, char_len)
154 |         })
155 |     }
156 | }
157 | 
158 | impl JumpRope {
159 |     pub(crate) fn node_iter_at_start(&self) -> NodeIter { NodeIter(Some(&self.head)) }
160 | 
161 |     /// Iterate over the rope, visiting each substring in [`str`] chunks. Whenever possible, this is
162 |     /// the best way for a program to read back the contents of a rope, because it avoids allocating
163 |     /// memory or copying the characters themselves (as you get with .to_string() or .chars()).
164 |     ///
165 |     /// ## Stability Warning
166 |     ///
167 |     /// This iterator will always return all the characters in document order, but the particular
168 |     /// way characters are grouped together is based on internal implementation details. Thus it
169 |     /// might change in arbitrary ways at any time. Your application should not depend on the
170 |     /// specifics of this chunking.
171 |     ///
172 |     /// # Example
173 |     ///
174 |     /// ```
175 |     /// # use jumprope::*;
176 |     /// let rope = JumpRope::from("oh hai");
177 |     /// let mut string = String::new();
178 |     /// for str in rope.substrings() {
179 |     ///     string.push_str(str);
180 |     /// }
181 |     /// assert_eq!(string, "oh hai");
182 |     /// ```
183 |     pub fn substrings(&self) -> Substrings<'_> {
184 |         self.substrings_with_len().substrings()
185 |     }
186 | 
187 |     /// Iterate over all substrings in the rope, but also yield the unicode character length for
188 |     /// each item. A caller could obviously recalculate these lengths from the provided &str
189 |     /// objects, but since the unicode lengths are known this allows small optimizations.
190 |     ///
191 |     /// The iterator yields pairs of (str, char_len).
192 |     ///
193 |     /// ## Stability Warning
194 |     ///
195 |     /// This iterator will always return all the characters in document order, but the particular
196 |     /// way characters are grouped together is based on internal implementation details. Thus it
197 |     /// might change in arbitrary ways at any time. Your application should not depend on the
198 |     /// specifics of this chunking.
199 |     ///
200 |     /// # Example
201 |     ///
202 |     /// ```
203 |     /// # use jumprope::*;
204 |     /// let rope = JumpRope::from("oh hai");
205 |     /// let mut string = String::new();
206 |     /// for (str, char_len) in rope.substrings_with_len() {
207 |     ///     assert_eq!(str.chars().count(), char_len);
208 |     ///     string.push_str(str);
209 |     /// }
210 |     /// assert_eq!(string, "oh hai");
211 |     /// ```
212 |     pub fn substrings_with_len(&self) -> ContentIter {
213 |         ContentIter {
214 |             next: Some(&self.head),
215 |             at_start: true
216 |         }
217 |     }
218 | 
219 |     /// Get an iterator over all characters in the rope.
220 |     ///
221 |     /// In most cases this will be less efficient than using [`substrings`](Self::substrings) to
222 |     /// iterate over all &str items contained in the rope.
223 |     ///
224 |     /// # Example
225 |     ///
226 |     /// ```
227 |     /// # use jumprope::*;
228 |     /// let rope = JumpRope::from("oh hai");
229 |     /// assert_eq!("oh hai", rope.chars().collect::<String>());
230 |     /// ```
231 |     pub fn chars(&self) -> Chars {
232 |         self.substrings_with_len().chars()
233 |     }
234 | 
235 | 
236 | 
237 |     /// Iterate through all the substrings within the specified unicode character range in the
238 |     /// document.
239 |     ///
240 |     /// # Example
241 |     ///
242 |     /// ```
243 |     /// # use jumprope::*;
244 |     /// let rope = JumpRope::from("xxxGreetings!xxx");
245 |     /// let mut string = String::new();
246 |     /// for s in rope.slice_substrings(3..rope.len_chars() - 3) {
247 |     ///     string.push_str(s);
248 |     /// }
249 |     /// assert_eq!(string, "Greetings!");
250 |     /// ```
251 |     pub fn slice_substrings(&self, range: Range<usize>) -> SubstringsInRange {
252 |         self.slice_substrings_with_len(range).substrings()
253 |     }
254 | 
255 |     /// Iterate through chunks across a character range in the document.
256 |     ///
257 |     /// # Example
258 |     ///
259 |     /// ```
260 |     /// # use jumprope::*;
261 |     /// let rope = JumpRope::from("xxxGreetings!xxx");
262 |     /// let mut string = String::new();
263 |     /// for (str, char_len) in rope.slice_substrings_with_len(3..rope.len_chars() - 3) {
264 |     ///     assert_eq!(str.chars().count(), char_len);
265 |     ///     string.push_str(str);
266 |     /// }
267 |     /// assert_eq!(string, "Greetings!");
268 |     /// ```
269 |     ///
270 |     /// Or more simply:
271 |     ///
272 |     /// ```
273 |     /// # use jumprope::*;
274 |     /// let rope = JumpRope::from("xxxGreetings!xxx");
275 |     /// let string = rope.slice_substrings_with_len(3..13).map(|(str, _len)| str).collect::<String>();
276 |     /// assert_eq!(string, "Greetings!");
277 |     /// ```
278 |     pub fn slice_substrings_with_len(&self, range: Range<usize>) -> SliceIter {
279 |         let cursor = self.read_cursor_at_char(range.start, false);
280 |         let node_gap_start = cursor.node.str.gap_start_chars as usize;
281 |         let local_pos = cursor.offset_chars;
282 | 
283 |         let (at_start, skip) = if local_pos >= node_gap_start {
284 |             (false, local_pos - node_gap_start)
285 |         } else {
286 |             (true, local_pos)
287 |         };
288 | 
289 |         SliceIter {
290 |             inner: ContentIter {
291 |                 next: Some(cursor.node), at_start
292 |             },
293 |             skip,
294 |             take_len: range.end - range.start
295 |         }
296 |     }
297 | 
298 |     /// Iterate through characters in the rope within the specified range. The range is specified
299 |     /// using unicode characters, not bytes.
300 |     ///
301 |     /// # Example
302 |     ///
303 |     /// ```
304 |     /// # use jumprope::*;
305 |     /// let rope = JumpRope::from("xxxGreetings!xxx");
306 |     ///
307 |     /// assert_eq!("Greetings!",
308 |     ///     rope.slice_chars(3..rope.len_chars() - 3).collect::<String>()
309 |     /// );
310 |     /// ```
311 |     pub fn slice_chars(&self, range: Range<usize>) -> CharsInRange {
312 |         self.slice_substrings_with_len(range).chars()
313 |     }
314 | 
315 |     // We also have a to_string implementation from Display, but that doesn't provide size hints.
316 |     pub fn to_string(&self) -> String {
317 |         let mut result = String::with_capacity(self.len_bytes());
318 |         for s in self.substrings() {
319 |             result.push_str(s);
320 |         }
321 |         result
322 |     }
323 | }
324 | 
325 | #[cfg(test)]
326 | mod tests {
327 |     use crate::fast_str_tools::*;
328 |     use crate::JumpRope;
329 |     use crate::jumprope::NODE_STR_SIZE;
330 | 
331 |     fn check(rope: &JumpRope) {
332 |         for (s, len) in rope.substrings_with_len() {
333 |             assert_eq!(count_chars(s), len);
334 |             assert_ne!(len, 0); // Returned items may not be empty.
335 |         }
336 | 
337 |         for (s, len) in rope.slice_substrings_with_len(0..rope.len_chars()) {
338 |             assert_eq!(count_chars(s), len);
339 |             assert_ne!(len, 0); // Returned items may not be empty.
340 |         }
341 | 
342 |         assert_eq!(rope.substrings_with_len().chars().collect::<String>(), rope.to_string());
343 |         assert_eq!(rope.chars().collect::<String>(), rope.to_string());
344 |         assert_eq!(rope.slice_chars(0..rope.len_chars()).collect::<String>(), rope.to_string());
345 | 
346 |         let s = rope.to_string();
347 |         for start in 0..=rope.len_chars() {
348 |             let iter = rope.slice_chars(start..rope.len_chars());
349 |             let str = iter.collect::<String>();
350 | 
351 |             let byte_start = char_to_byte_idx(&s, start);
352 |             assert_eq!(str, &s[byte_start..]);
353 |         }
354 |     }
355 | 
356 |     #[test]
357 |     fn iter_smoke_tests() {
358 |         check(&JumpRope::new());
359 |         check(&JumpRope::from("hi there"));
360 | 
361 |         let mut rope = JumpRope::from("aaaa");
362 |         rope.insert(2, "b"); // This will force a gap.
363 |         assert_eq!(rope.substrings_with_len().count(), 2);
364 |         check(&rope);
365 | 
366 |         // Long enough that in debugging mode we'll spill into multiple items.
367 |         let s = "XXXaaaaaaaaaaaaaaaaaaaaaaaaaaXXX";
368 |         let rope = JumpRope::from(s);
369 |         assert!(rope.substrings_with_len().count() > 1);
370 |         check(&rope);
371 | 
372 |         assert_eq!(
373 |             rope.slice_substrings_with_len(3..s.len() - 3).chars().collect::<String>(),
374 |             &s[3..s.len() - 3]
375 |         );
376 |     }
377 | 
378 |     #[test]
379 |     fn iter_non_ascii() {
380 |         check(&JumpRope::from("κό𝕐𝕆😘σμε"));
381 |     }
382 | 
383 |     #[test]
384 |     fn iter_chars_tricky() {
385 |         let mut rope = JumpRope::new();
386 |         rope.extend(std::iter::repeat("x").take(NODE_STR_SIZE * 2));
387 |         check(&rope);
388 |     }
389 | }


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! # JumpRope
  2 | //!
  3 | //! A small, fast rope library for rust built on a skip list of gap buffers
  4 | //!
  5 | //! This library enables super fast in-memory string editing, where an edit might insert, delete
  6 | //! or modify text from anywhere in the string. Unlike inserting and deleting in a String directly,
  7 | //! jumprope avoids expensive memcopy / memmove operations. All editing operations are O(log n)
  8 | //! based on the size of the string.
  9 | //!
 10 | //! ## Example
 11 | //!
 12 | //! ```
 13 | //! use jumprope::JumpRope;
 14 | //!
 15 | //! let mut rope = JumpRope::from("Some large text document");
 16 | //! rope.insert(5, "really "); // "Some really large text document"
 17 | //! rope.replace(0..4, "My rad");  // "My rad really large text document"
 18 | //! assert_eq!(rope, "My rad really large text document");
 19 | //!
 20 | //! // Extract to a string
 21 | //! let s: String = rope.to_string();
 22 | //! assert_eq!(s, "My rad really large text document");
 23 | //! ```
 24 | //!
 25 | //! See the [`JumpRope`] type for more usage details.
 26 | //!
 27 | //! # Random numbers, Determinism and DoS protection
 28 | //!
 29 | //! Jumprope is built on top of [skip lists](https://en.wikipedia.org/wiki/Skip_list), which are a
 30 | //! probabilistic data structure. Each node in the list uses a random number generator to decide
 31 | //! its "height". To do this well, skip lists depend on a random number generator for performance.
 32 | //! If a pathologically bad RNG source was used, the skip list would degrade to a linked list (with
 33 | //! `O(n)` performance).
 34 | //!
 35 | //! ## Security
 36 | //!
 37 | //! We have plenty of high quality RNGs available in rust. However, the bad news is that if a
 38 | //! malicious actor can:
 39 | //!
 40 | //! - Predict the sequence of random numbers, and
 41 | //! - Control a sequence of insert & removal operations in the rope
 42 | //!
 43 | //! Then they can *force* the rope to degrade to `O(n)` performance.
 44 | //!
 45 | //! The obvious protection against this is to use a good RNG, seeded with a good entropy source.
 46 | //! This makes the random sequence impossible to predict. Luckily jumprope isn't sensitive to the
 47 | //! performance of the RNG used. The only downside is that using a CSRNG + a good entropy source
 48 | //! makes the compiled binary bigger.
 49 | //!
 50 | //! So there's a feature flag: `["ddos_protection"]`. This flag configures jumprope to use a larger
 51 | //! CSRNG instead of a PRNG. To disable it (eg for WASM), you need to compile jumprope with default
 52 | //! features turned off:
 53 | //!
 54 | //! ```toml
 55 | //! jumprope = { default-features = false }
 56 | //! ```
 57 | //!
 58 | //!
 59 | //!
 60 | //! # A rant on character lengths
 61 | //!
 62 | //! There are 3 different, useful ways to measure string lengths. All of them are useful in certain
 63 | //! situations:
 64 | //!
 65 | //! - The number of bytes needed to represent the string, in some specific encoding (eg UTF8)
 66 | //! - The number of unicode characters contained within
 67 | //! - The number of grapheme clusters in the string. This is the number of characters drawn to
 68 | //! the screen.
 69 | //!
 70 | //! For example, the unicode polar bear ("🐻‍❄️") has a single grapheme cluster (only one
 71 | //! character is drawn). It contains 4 unicode characters (Bear emoji + zero width joiner + snow
 72 | //! emoji + variation selector). And it takes 16 bytes to store in UTF8.
 73 | //!
 74 | //! ```
 75 | //! # use jumprope::*;
 76 | //! assert_eq!("🐻‍❄️".len(), 13);
 77 | //! assert_eq!("🐻‍❄️".chars().count(), 4);
 78 | //!
 79 | //! let rope = JumpRope::from("🐻‍❄️"); // One grapheme cluster
 80 | //! assert_eq!(rope.len_bytes(), 13); // 13 UTF8 bytes
 81 | //! assert_eq!(rope.len_chars(), 4); // 4 unicode characters
 82 | //! ```
 83 | //!
 84 | //! Worse, many popular languages (including javascript and C#) use UCS2 internally and thus their
 85 | //! `string.length` property doesn't give you a useful value for any application. Javascript reports
 86 | //! a snowman's length as 5 - which is useless:
 87 | //!
 88 | //! ```shell
 89 | //! $ node
 90 | //! Welcome to Node.js v16.6.1.
 91 | //! > "🐻‍❄️".length
 92 | //! 5
 93 | //! ```
 94 | //!
 95 | //! But there is no perfect "length" property for a string anyway:
 96 | //!
 97 | //! - The number of bytes is encoding-specific. The polar bear takes 16 bytes in UTF8, but only 10
 98 | //! bytes in UTF16.
 99 | //! - The number of grapheme clusters varies by device, font and software version. The conversion
100 | //! from characters to grapheme clusters is complex, and changes all the time. The polar bear
101 | //! icon was only added in May 2019. If your software is older than that (or uses a text library
102 | //! older than that), you will just see "🐻❄️".
103 | //!
104 | //! Most CRDTs and OT systems are slowly standardizing on counting unicode character positions as
105 | //! the default "length" property. The number of unicode characters isn't human-meaningful, but it
106 | //! has a number of useful properties:
107 | //!
108 | //! - Its simple and easy to define
109 | //! - Its stable across time (unlike grapheme clusters)
110 | //! - Its rarely convenient, but its very portable across different programming languages,
111 | //! regardless of that language's character encoding system.
112 | //!
113 | //! Jumprope follows this approach, using unicode character positions everywhere internally:
114 | //!
115 | //! ```
116 | //! # use jumprope::*;
117 | //! let mut rope = JumpRope::from("🐻‍❄️");
118 | //! rope.remove(1..4); // Remove "polar" from our polar bear
119 | //! assert_eq!(rope, "🐻");
120 | //! ```
121 | 
122 | #![cfg_attr(doc_cfg, feature(doc_cfg))]
123 | 
124 | mod jumprope;
125 | mod gapbuffer;
126 | mod utils;
127 | mod iter;
128 | mod fast_str_tools;
129 | 
130 | pub use crate::jumprope::JumpRope;
131 | 
132 | mod buffered;
133 | pub use crate::buffered::JumpRopeBuf;


--------------------------------------------------------------------------------
/src/utils.rs:
--------------------------------------------------------------------------------
 1 | use crate::fast_str_tools::*;
 2 | 
 3 | // Get the byte offset after char_pos utf8 characters
 4 | pub(crate) fn str_chars_to_bytes(s: &str, char_pos: usize) -> usize {
 5 |     // s.char_indices().nth(char_pos).map_or_else(
 6 |     //     || s.len(),
 7 |     //     |(i, _)| i
 8 |     // )
 9 | 
10 |     char_to_byte_idx(s, char_pos)
11 | }
12 | 
13 | // pub(crate) fn str_bytes_to_chars(s: &str, bytes: usize) -> usize {
14 | //     byte_to_char_idx(s, bytes)
15 | // }
16 | //
17 | // pub(crate) fn count_chars(s: &str) -> usize {
18 | //     str_bytes_to_chars(s, s.len())
19 | // }
20 | 
21 | pub(crate) fn str_chars_to_bytes_rev(s: &str, char_len: usize) -> usize {
22 |     if char_len == 0 { return 0; }
23 | 
24 |     // Scan backwards, looking for utf8 start bytes (marked by 0b0x or 0b
25 |     let mut chars_remaining = char_len;
26 |     for (i, byte) in s.as_bytes().iter().rev().enumerate() {
27 |         if (*byte & 0b11_00_0000) != 0b10_00_0000 {
28 |             chars_remaining -= 1;
29 |             if chars_remaining == 0 { return i+1; }
30 |         }
31 |     }
32 |     panic!("Insufficient characters in string");
33 | }
34 | 
35 | // #[cfg(feature = "wchar_conversion")]
36 | // pub(crate) fn count_wchars(s: &str) -> usize {
37 | //     // TODO: There's a better way to write this.
38 | //     s.chars()
39 | //         .map(|c| c.len_utf16())
40 | //         .sum()
41 | // }
42 | //
43 | // #[cfg(feature = "wchar_conversion")]
44 | // pub(crate) fn str_chars_to_wchars(s: &str, char_len: usize) -> usize {
45 | //     // TODO: There's a better way to write this.
46 | //     // TODO: Compare this with char_len + filter + count.
47 | //     s.chars()
48 | //         .take(char_len)
49 | //         .map(|c| c.len_utf16())
50 | //         .sum()
51 | // }
52 | 
53 | #[cfg(feature = "line_conversion")]
54 | pub(crate) fn count_lines(s: &str) -> usize {
55 |     // I'm sure there's faster implementations of this but this will do for now.
56 |     s.as_bytes().iter().filter(|b| **b == ('\n' as u8)).count()
57 | }
58 | 
59 | #[cfg(test)]
60 | mod tests {
61 |     use crate::utils::*;
62 | 
63 |     fn check_counts(s: &str) {
64 |         let num_chars = s.chars().count();
65 |         assert_eq!(count_chars(s), num_chars);
66 | 
67 |         for i in 0..=num_chars {
68 |             let byte_offset = str_chars_to_bytes(s, i);
69 |             assert_eq!(count_chars(&s[..byte_offset]), i);
70 | 
71 |             let end_offset = str_chars_to_bytes_rev(s, num_chars - i);
72 |             assert_eq!(end_offset, s.len() - byte_offset);
73 |         }
74 |     }
75 | 
76 |     #[test]
77 |     fn backwards_smoke_tests() {
78 |         check_counts("hi there");
79 |         check_counts("κό𝕐𝕆😘σμε");
80 |     }
81 | 
82 |     #[test]
83 |     #[cfg(feature = "line_conversion")]
84 |     fn count_lines_tests() {
85 |         assert_eq!(count_lines(""), 0);
86 |         assert_eq!(count_lines("\n"), 1);
87 |         assert_eq!(count_lines("fop\n\n"), 2);
88 |     }
89 | }
90 | 


--------------------------------------------------------------------------------
/tests/test.rs:
--------------------------------------------------------------------------------
  1 | // These tests are also adapted from the C code tests here:
  2 | // https://github.com/josephg/librope/blob/master/test/tests.c
  3 | 
  4 | use rand::prelude::*;
  5 | 
  6 | use std::cmp::min;
  7 | use std::ops::Range;
  8 | use std::ptr;
  9 | use jumprope::JumpRope;
 10 | use jumprope::JumpRopeBuf;
 11 | 
 12 | const UNI_CHARS: [char; 24] = [
 13 |   '\n', 'a', 'b', 'c', '1', '2', '3', ' ', '_', // ASCII.
 14 |   '©', '¥', '½', // The Latin-1 suppliment (U+80 - U+ff)
 15 |   'Ύ', 'Δ', 'δ', 'Ϡ', // Greek (U+0370 - U+03FF)
 16 |   '←', '↯', '↻', '⇈', // Arrows (U+2190 – U+21FF)
 17 |   '𐆐', '𐆔', '𐆘', '𐆚', // Ancient roman symbols (U+10190 – U+101CF)
 18 | ];
 19 | 
 20 | fn random_unicode_string(len: usize, rng: &mut SmallRng) -> String {
 21 |     let mut s = String::new();
 22 |     for _ in 0..len {
 23 |         s.push(UNI_CHARS[rng.gen_range(0 .. UNI_CHARS.len())] as char);
 24 |     }
 25 |     s
 26 | }
 27 | 
 28 | const ASCII_CHARS: &[u8; 83] = b" ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()[]{}<>?,./";
 29 | 
 30 | // Gross. Find a way to reuse the code from random_unicode_string.
 31 | #[allow(unused)]
 32 | fn random_ascii_string(len: usize, rng: &mut SmallRng) -> String {
 33 |     let mut s = String::new();
 34 |     for _ in 0..len {
 35 |         s.push(ASCII_CHARS[rng.gen_range(0 .. ASCII_CHARS.len())] as char);
 36 |     }
 37 |     s
 38 | }
 39 | 
 40 | fn check(r: &JumpRope, expected: &str) {
 41 |     // println!("--- rope ---");
 42 |     // r.print();
 43 | 
 44 |     r.check();
 45 |     assert_eq!(r.to_string(), expected);
 46 |     assert_eq!(r.len_bytes(), expected.len());
 47 |     assert_eq!(r.len_chars(), expected.chars().count());
 48 |     #[cfg(feature = "wchar_conversion")] {
 49 |         assert_eq!(r.len_wchars(), expected.chars().map(|c| c.len_utf16()).sum());
 50 | 
 51 |         assert_eq!(r.chars_to_wchars(r.len_chars()), r.len_wchars());
 52 |         assert_eq!(r.chars_to_wchars(0), 0);
 53 |         assert!(r.len_wchars() >= r.len_chars());
 54 | 
 55 |         // And if we convert back, we should get the number of characters.
 56 |         assert_eq!(r.wchars_to_chars(r.len_wchars()), r.len_chars());
 57 |     }
 58 |     assert_eq!(*r, JumpRope::from(expected), "Rope comparison fails");
 59 | 
 60 |     let clone = r.clone();
 61 |     // println!("--- clone ---");
 62 |     // clone.print();
 63 |     clone.check();
 64 |     assert_eq!(*r, clone, "Rope does not equal its clone");
 65 | }
 66 | 
 67 | #[test]
 68 | fn empty_rope_has_no_contents() {
 69 |     let mut r = JumpRope::new();
 70 |     check(&r, "");
 71 | 
 72 |     r.insert(0, "");
 73 |     check(&r, "");
 74 | }
 75 | 
 76 | #[test]
 77 | fn from_str_and_string() {
 78 |     let r1 = JumpRope::from("hi");
 79 |     check(&r1, "hi");
 80 | 
 81 |     let r2 = JumpRope::from(String::from("hi"));
 82 |     check(&r2, "hi");
 83 | }
 84 | 
 85 | #[test]
 86 | fn insert_at_location() {
 87 |     let mut r = JumpRope::new();
 88 | 
 89 |     r.insert(0, "AAA");
 90 |     check(&r, "AAA");
 91 | 
 92 |     r.insert(0, "BBB");
 93 |     check(&r, "BBBAAA");
 94 | 
 95 |     r.insert(6, "CCC");
 96 |     check(&r, "BBBAAACCC");
 97 | 
 98 |     r.insert(5, "DDD");
 99 |     check(&r, "BBBAADDDACCC");
100 | }
101 | 
102 | #[test]
103 | fn new_string_has_content() {
104 |     let r = JumpRope::from("hi there");
105 |     check(&r, "hi there");
106 | 
107 |     let mut r = JumpRope::from("κόσμε");
108 |     check(&r, "κόσμε");
109 |     r.insert(2, "𝕐𝕆😘");
110 |     check(&r, "κό𝕐𝕆😘σμε");
111 | }
112 | 
113 | #[test]
114 | fn del_at_location() {
115 |     let mut r = JumpRope::from("012345678");
116 |     check(&r, "012345678");
117 | 
118 |     r.remove(8..9);
119 |     check(&r, "01234567");
120 | 
121 |     r.remove(0..1);
122 |     check(&r, "1234567");
123 | 
124 |     r.remove(5..6);
125 |     check(&r, "123457");
126 | 
127 |     r.remove(5..6);
128 |     check(&r, "12345");
129 | 
130 |     r.remove(0..5);
131 |     check(&r, "");
132 | }
133 | 
134 | #[test]
135 | fn del_past_end_of_string() {
136 |     let mut r = JumpRope::new();
137 | 
138 |     r.remove(0..100);
139 |     check(&r, "");
140 | 
141 |     r.insert(0, "hi there");
142 |     r.remove(3..13);
143 |     check(&r, "hi ");
144 | }
145 | 
146 | #[test]
147 | fn really_long_ascii_string() {
148 |     let mut rng = SmallRng::seed_from_u64(1234);
149 |     let len = 2000;
150 |     let s = random_ascii_string(len, &mut rng);
151 |     // let s = random_unicode_string(len, &mut rng);
152 | 
153 |     let mut r = JumpRope::from(s.as_str());
154 |     check(&r, s.as_str());
155 | 
156 |     // Delete everything but the first and last characters
157 |     r.remove(1..len - 1);
158 |     let expect = format!("{}{}", s.chars().next().unwrap(), s.chars().rev().next().unwrap());
159 |     check(&r, expect.as_str());
160 | }
161 | 
162 | fn string_insert_at(s: &mut String, char_pos: usize, contents: &str) {
163 |     // If you try to write past the end of the string for now I'll just write at the end.
164 |     // Panicing might be a better policy.
165 |     let byte_pos = s.char_indices().skip(char_pos).next()
166 |         .map(|(p, _)| p).unwrap_or(s.len());
167 | 
168 |     let old_len = s.len();
169 |     let new_bytes = contents.len();
170 | 
171 |     // This didn't work because it didn't change the string's length
172 |     //s.reserve(new_bytes);
173 | 
174 |     // This is sort of ugly but its fine.
175 |     for _ in 0..new_bytes { s.push('\0'); }
176 | 
177 |     //println!("new bytes {} {} {}", new_bytes, byte_pos, s.len() - byte_pos);
178 |     unsafe {
179 |         let bytes = s.as_mut_vec().as_mut_ptr();
180 |         ptr::copy(
181 |             bytes.offset(byte_pos as isize),
182 |             bytes.offset((byte_pos + new_bytes) as isize),
183 |             old_len - byte_pos
184 |         );
185 |         ptr::copy_nonoverlapping(
186 |             contents.as_ptr(),
187 |             bytes.offset(byte_pos as isize),
188 |             new_bytes
189 |         );
190 |     }
191 | }
192 | 
193 | fn char_range_to_byte_range(s: &String, range: Range<usize>) -> Range<usize> {
194 |     let mut iter = s.char_indices().map(|(p, _)| p).skip(range.start).peekable();
195 | 
196 |     let start = iter.peek().map_or_else(|| s.len(), |&p| p);
197 |     let mut iter = iter.skip(range.end - range.start).peekable();
198 |     let end = iter.peek().map_or_else(|| s.len(), |&p| p);
199 | 
200 |     start..end
201 | }
202 | 
203 | fn string_del_at(s: &mut String, pos: usize, length: usize) {
204 |     let byte_range = char_range_to_byte_range(s, pos..pos+length);
205 | 
206 |     s.drain(byte_range);
207 | }
208 | 
209 | fn random_edits(seed: u64, verbose: bool) {
210 |     let mut r = JumpRope::new();
211 |     let mut s = String::new();
212 | 
213 |     // let mut rng = rand::thread_rng();
214 |     let mut rng = SmallRng::seed_from_u64(seed);
215 | 
216 |     for _i in 0..400 {
217 |         if verbose { println!("{_i} s: '{s}'"); }
218 |         // r.print();
219 | 
220 |         let len = s.chars().count();
221 | 
222 |         // if _i == 1 {
223 |         //     println!("haaayyy");
224 |         // }
225 |         // println!("i {}: {}", i, len);
226 | 
227 |         if len == 0 || (len < 1000 && rng.gen::<f32>() < 0.5) {
228 |             // Insert.
229 |             let pos = rng.gen_range(0..len+1);
230 |             // Sometimes generate strings longer than a single node to stress everything.
231 |             let text = random_unicode_string(rng.gen_range(0..20), &mut rng);
232 |             if verbose {
233 |                 println!("Inserting '{text}' at char {pos} (Byte length: {}, char len: {}, wchar len: {})",
234 |                          text.len(), text.chars().count(),
235 |                          text.chars().map(|c| c.len_utf16()).sum::<usize>()
236 |                 );
237 |             }
238 | 
239 |             r.insert(pos, text.as_str());
240 |             string_insert_at(&mut s, pos, text.as_str());
241 |         } else {
242 |             // Delete
243 |             let pos = rng.gen_range(0..len);
244 |             let dlen = min(rng.gen_range(0..10), len - pos);
245 |             if verbose {
246 |                 println!("Removing {dlen} characters at {pos}");
247 |             }
248 | 
249 |             r.remove(pos..pos+dlen);
250 |             string_del_at(&mut s, pos, dlen);
251 |         }
252 | 
253 |         // Calling check() is super slow with miri, and it doesn't matter much so long as we test
254 |         // for correctness normally.
255 |         if !cfg!(miri) {
256 |             check(&r, s.as_str());
257 |         }
258 |     }
259 | 
260 |     if cfg!(miri) {
261 |         check(&r, s.as_str());
262 |     }
263 | }
264 | 
265 | #[test]
266 | fn fuzz_once() {
267 |     random_edits(10, false);
268 | }
269 | 
270 | // Run with:
271 | // cargo test --release fuzz_forever -- --ignored --nocapture
272 | #[test]
273 | #[ignore]
274 | fn fuzz_forever() {
275 |     for seed in 0.. {
276 |         if seed % 100 == 0 { println!("seed: {seed}"); }
277 |         random_edits(seed, false);
278 |     }
279 | }
280 | 
281 | #[cfg(feature = "wchar_conversion")]
282 | fn random_edits_wchar(seed: u64, verbose: bool) {
283 |     let mut r = JumpRope::new();
284 |     let mut s = String::new();
285 | 
286 |     // let mut rng = rand::thread_rng();
287 |     let mut rng = SmallRng::seed_from_u64(seed);
288 | 
289 |     for _i in 0..400 {
290 |         if verbose { println!("{_i} s: '{s}'"); }
291 |         // r.print();
292 |         let len_chars = s.chars().count();
293 | 
294 |         // println!("i {}: {}", i, len);
295 | 
296 |         if len_chars == 0 || (len_chars < 1000 && rng.gen::<f32>() < 0.5) {
297 |             // Insert.
298 |             let pos_chars = rng.gen_range(0..len_chars + 1);
299 |             // Convert pos to wchars
300 |             let pos_wchar = s
301 |                 .chars()
302 |                 .take(pos_chars)
303 |                 .map(|c| c.len_utf16())
304 |                 .sum();
305 |             // Sometimes generate strings longer than a single node to stress everything.
306 |             let text = random_unicode_string(rng.gen_range(0..20), &mut rng);
307 |             if verbose {
308 |                 println!("Inserting '{text}' at char {pos_chars} / wchar {pos_wchar}");
309 |                 println!("Byte length {} char len {} / wchar len {}",
310 |                          text.len(), text.chars().count(), text.chars().map(|c| c.len_utf16()).sum::<usize>());
311 |             }
312 |             r.insert_at_wchar(pos_wchar, text.as_str());
313 |             // r.print();
314 |             string_insert_at(&mut s, pos_chars, text.as_str());
315 |         } else {
316 |             // Delete
317 |             let pos_chars = rng.gen_range(0..len_chars);
318 |             let dlen_chars = min(rng.gen_range(0..10), len_chars - pos_chars);
319 |             let char_range = pos_chars..pos_chars+dlen_chars;
320 |             let byte_range = char_range_to_byte_range(&s, char_range.clone());
321 |             // Now convert it to a wchar range :p
322 |             let start_wchar = s[..byte_range.start].chars().map(|c| c.len_utf16()).sum::<usize>();
323 |             let len_wchar = s[byte_range.clone()].chars().map(|c| c.len_utf16()).sum::<usize>();
324 |             let wchar_range = start_wchar..start_wchar + len_wchar;
325 | 
326 |             if verbose {
327 |                 println!("Removing {}..{} (wchar {}..{})",
328 |                          char_range.start, char_range.end,
329 |                          wchar_range.start, wchar_range.end
330 |                 );
331 |             }
332 | 
333 |             // r.remove(pos_chars..pos_chars + dlen_chars);
334 |             r.remove_at_wchar(wchar_range);
335 |             // r.print();
336 |             // string_del_at(&mut s, pos_chars, dlen_chars);
337 |             s.drain(byte_range);
338 |         }
339 | 
340 |         if !cfg!(miri) {
341 |             check(&r, s.as_str());
342 |         }
343 |     }
344 | }
345 | 
346 | #[cfg(feature = "wchar_conversion")]
347 | #[test]
348 | fn fuzz_wchar_once() {
349 |     random_edits_wchar(22, false);
350 | }
351 | 
352 | // Run with:
353 | // cargo test --release fuzz_forever -- --ignored --nocapture
354 | #[cfg(feature = "wchar_conversion")]
355 | #[test]
356 | #[ignore]
357 | fn fuzz_wchar_forever() {
358 |     for seed in 0.. {
359 |         if seed % 100 == 0 { println!("seed: {seed}"); }
360 |         random_edits_wchar(seed, false);
361 |     }
362 | }
363 | 
364 | fn random_edits_buffered(seed: u64, verbose: bool) {
365 |     let mut r = JumpRopeBuf::new();
366 |     let mut s = String::new();
367 | 
368 |     // let mut rng = rand::thread_rng();
369 |     let mut rng = SmallRng::seed_from_u64(seed);
370 | 
371 |     for _i in 0..400 {
372 |     // for _i in 0..19 {
373 |         if verbose { println!("{_i} s: '{s}'"); }
374 |         // r.print();
375 | 
376 |         let len = s.chars().count();
377 | 
378 |         // if _i == 1 {
379 |         //     println!("haaayyy");
380 |         // }
381 |         // println!("i {}: {}", i, len);
382 | 
383 |         if len == 0 || (len < 1000 && rng.gen::<f32>() < 0.5) {
384 |             // Insert.
385 |             let pos = rng.gen_range(0..len+1);
386 |             // Sometimes generate strings longer than a single node to stress everything.
387 |             let text = random_unicode_string(rng.gen_range(0..20), &mut rng);
388 |             if verbose {
389 |                 println!("Inserting '{text}' at char {pos} (Byte length: {}, char len: {}, wchar len: {})",
390 |                          text.len(), text.chars().count(),
391 |                          text.chars().map(|c| c.len_utf16()).sum::<usize>()
392 |                 );
393 |             }
394 | 
395 |             r.insert(pos, text.as_str());
396 |             string_insert_at(&mut s, pos, text.as_str());
397 |         } else {
398 |             // Delete
399 |             let pos = rng.gen_range(0..len);
400 |             let dlen = min(rng.gen_range(0..10), len - pos);
401 |             if verbose {
402 |                 println!("Removing {dlen} characters at {pos}");
403 |             }
404 | 
405 |             r.remove(pos..pos+dlen);
406 |             string_del_at(&mut s, pos, dlen);
407 |         }
408 |         // dbg!(&r);
409 | 
410 |         assert_eq!(r.is_empty(), s.is_empty());
411 | 
412 |         // Checking the length flushes the buffered op - which is a useful test, but if we do it
413 |         // every time, the buffer won't build up and the test won't have the right coverage.
414 |         if rng.gen_bool(0.05) {
415 |             assert_eq!(r.len_chars(), s.chars().count());
416 |         }
417 |     }
418 | 
419 |     let rope = r.into_inner();
420 |     check(&rope, s.as_str());
421 | }
422 | 
423 | #[test]
424 | fn fuzz_buffered_once() {
425 |     random_edits_buffered(0, false);
426 | }
427 | 
428 | #[test]
429 | #[ignore]
430 | fn fuzz_buffered_forever() {
431 |     for seed in 0.. {
432 |         if seed % 1000 == 0 { println!("seed: {seed}"); }
433 |         random_edits_buffered(seed, false);
434 |     }
435 | }
436 | 
437 | #[test]
438 | fn eq_variants() {
439 |     let rope = JumpRope::from("Hi there");
440 | 
441 |     assert_eq!(rope.clone(), "Hi there");
442 |     assert_eq!(rope.clone(), String::from("Hi there"));
443 |     assert_eq!(rope.clone(), &String::from("Hi there"));
444 | 
445 |     assert_eq!(&rope, "Hi there");
446 |     assert_eq!(&rope, String::from("Hi there"));
447 |     assert_eq!(&rope, &String::from("Hi there"));
448 | }
449 | 
450 | #[test]
451 | fn buffered_eq_variants() {
452 |     let rope = JumpRopeBuf::from("Hi there");
453 | 
454 |     assert_eq!(rope.clone(), "Hi there");
455 |     assert_eq!(rope.clone(), String::from("Hi there"));
456 |     assert_eq!(rope.clone(), &String::from("Hi there"));
457 | 
458 |     assert_eq!(&rope, "Hi there");
459 |     assert_eq!(&rope, String::from("Hi there"));
460 |     assert_eq!(&rope, &String::from("Hi there"));
461 | }


--------------------------------------------------------------------------------