├── .github
    ├── ISSUE_TEMPLATE
    │   └── bug_report.md
    └── workflows
    │   └── ci.yml
├── .gitignore
├── CHANGELOG.md
├── Cargo.toml
├── LICENSE.md
├── README.md
├── benches
    ├── create.rs
    ├── hash.rs
    ├── insert.rs
    ├── iterators.rs
    ├── large.txt
    ├── lf.txt
    ├── medium.txt
    ├── queries.rs
    ├── remove.rs
    ├── small.txt
    └── tiny.txt
├── design
    ├── design.md
    └── images
    │   ├── bad.png
    │   ├── bad.svg
    │   ├── bad_jumping.png
    │   ├── bad_jumping.svg
    │   ├── good.png
    │   ├── good.svg
    │   ├── naive.png
    │   └── naive.svg
├── examples
    ├── front_page.rs
    ├── graphemes_iter.rs
    ├── graphemes_step.rs
    ├── read_latin_1.rs
    ├── search_and_replace.rs
    └── simple_buffer.rs
├── fuzz
    ├── .gitignore
    ├── Cargo.toml
    ├── README.md
    └── fuzz_targets
    │   ├── medium.txt
    │   ├── mutation.rs
    │   ├── mutation_small_chunks.rs
    │   └── small.txt
├── src
    ├── crlf.rs
    ├── iter.rs
    ├── lib.rs
    ├── rope.rs
    ├── rope_builder.rs
    ├── slice.rs
    ├── str_utils.rs
    └── tree
    │   ├── mod.rs
    │   ├── node.rs
    │   ├── node_children.rs
    │   ├── node_text.rs
    │   └── text_info.rs
└── tests
    ├── clone_rope.rs
    ├── clone_rope_to_thread.rs
    ├── crlf.rs
    ├── fix_tree.rs
    ├── from_reader.rs
    ├── from_str.rs
    ├── hash.rs
    ├── lifetimes.rs
    ├── medium.txt
    ├── non_ascii.txt
    ├── non_ascii_comparison.rs
    ├── proptest_tests.proptest-regressions
    ├── proptest_tests.rs
    ├── shrink_to_fit.rs
    ├── small_ascii.txt
    ├── small_random_inserts.rs
    └── test_text.txt


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Report bugs and crashes
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Ropey Version**
11 | 
12 | Full version number (e.g. "2.0.0-alpha-1", not "2").
13 | 
14 | **Describe the bug**
15 | 
16 | A description of the bug, including how to reproduce it (if possible).
17 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: ci
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 |   schedule:
 9 |     # Every-other month, to make sure new Rust
10 |     # releases don't break things.
11 |     - cron: '15 12 5 1,3,5,7,9,11 *'
12 | 
13 | env:
14 |   CARGO_TERM_COLOR: always
15 | 
16 | jobs:
17 |   build-and-test:
18 |     name: Build and test
19 |     strategy:
20 |       matrix:
21 |         toolchain:
22 |           - stable
23 |           - beta
24 |           - "1.65"
25 |     runs-on: ubuntu-latest
26 |     steps:
27 |       # Get a checkout and rust toolchain.
28 |       - uses: actions/checkout@v2
29 |       - uses: actions-rs/toolchain@v1
30 |         with:
31 |           profile: minimal
32 |           toolchain: ${{matrix.toolchain}}
33 |           override: true
34 | 
35 |       # Build and test
36 |       - run: cargo +${{matrix.toolchain}} build
37 |       - run: cargo +${{matrix.toolchain}} test
38 |       - run: cargo +${{matrix.toolchain}} test --no-default-features line
39 |       - run: cargo +${{matrix.toolchain}} test --no-default-features --features=cr_lines line
40 |       - run: cargo +${{matrix.toolchain}} bench --no-run
41 | 
42 |   run-miri:
43 |     name: Run Miri
44 |     runs-on: ubuntu-latest
45 |     steps:
46 |       # Get a checkout and rust toolchain.
47 |       - uses: actions/checkout@v2
48 |       - uses: hecrj/setup-rust-action@v1
49 |         with:
50 |           rust-version: nightly
51 |           components: miri
52 | 
53 |       # Build and test
54 |       # --no-default-features to get rid of the simd feature
55 |       - run: cargo miri test --no-default-features
56 |         env:
57 |           MIRIFLAGS: "-Zmiri-strict-provenance"
58 | 
59 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 | **/*.rs.bk
3 | Cargo.lock
4 | perf.data*
5 | cachegrind.out*
6 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | 
  4 | ## [Unreleased]
  5 | 
  6 | 
  7 | ## [1.6.1] - 2023-10-18
  8 | 
  9 | - Fixed test code that was incorrect on some platforms / with some configurations.
 10 | - Minor documentation improvements.
 11 | 
 12 | 
 13 | ## [1.6.0] - 2023-02-01
 14 | 
 15 | ### New features
 16 | - Added `is_instance()` method, which checks if two ropes are same-memory instances of each other.
 17 | 
 18 | ### Bug fixes
 19 | - Ropey would panic when trying to create a `Lines` iterator for an empty rope or rope slice.
 20 | 
 21 | 
 22 | ## [1.5.1] - 2023-01-01
 23 | 
 24 | ### Performance
 25 | - A much faster `Lines` iterator, thanks to @pascalkuthe (PR #70).
 26 | 
 27 | ### Bug fixes
 28 | - Ropey's `Hash` impl was incorrect, due to making incorrect assumptions about the guaranteed behavior of `Hasher`s.  This didn't cause any problems with Rust's default hasher, but was incorrect in the general case.
 29 | - Comparing ropes for equality would panic when the two ropes had chunk boundaries that weren't mutually aligned at char boundaries.
 30 | - `len_lines()` could give incorrect counts on `RopeSlice`s that split CRLF pairs.
 31 | - Ropey's internal B-Tree representation could (rarely) end up in a state that violated some invariants.  This didn't affect anything in practice, because no code currently depends on the violated invariant.  But future code might.
 32 | 
 33 | 
 34 | ## 1.5.1-alpha - 2022-11-27
 35 | 
 36 | - Special early release, mainly to accomodate the Helix project.  It is not recommended to use this release outside of Helix.
 37 | 
 38 | 
 39 | ## [1.5.0] - 2022-05-29
 40 | 
 41 | ### New features
 42 | - Added a `reversed()` method for Ropey's iterators.  This is the same as `reverse()` except instead of mutating in-place, it consumes the iterator and returns it reversed.  This is more convenient when chaining iterator method calls.
 43 | - Added a `simd` cargo feature flag.  It's enabled by default, but can be disabled to use only scalar code (no simd intrinsics).
 44 | 
 45 | ### Bug fixes
 46 | - Fix a theoretical memory safety issue found via running Ropey's tests through miri.  Thanks to @Nilstrieb!
 47 | - Fix (unintentionally) depending on Rust memory layout to achieve precise node sizes in memory.  We now use `repr(C)`.
 48 | 
 49 | 
 50 | ## [1.4.1] - 2022-03-16
 51 | 
 52 | ### Bug fixes
 53 | - Fix a stupid copy/paste typo in the previous line break feature flag implementation that caused the wrong line break code to be used.
 54 | 
 55 | 
 56 | ## [1.4.0] - 2022-03-16
 57 | 
 58 | ### New features
 59 | - Added `byte_slice()` and `get_byte_slice()` methods to `Rope` and `RopeSlice` to slice by byte index instead of char index.  This can allow optimizations in client code in some cases.
 60 | - Added `cr_lines` and `unicode_lines` feature flags to the crate, to manage what line endings are recognized and tracked.  This allows, for example, building Ropey to only recognize line feed as a line break.  `unicode_lines` is on by default, and corresponds to the original behavior.
 61 | - Implemented `std::hash::Hash` for `Rope` and `RopeSlice`.
 62 | 
 63 | ### Misc
 64 | - Split `str_utils` module out into a separate crate, `str_indices`.  The `str_utils` module still exists, but is now mostly just a re-export of the new crate.
 65 | 
 66 | 
 67 | ## [1.3.2] - 2021-12-30
 68 | 
 69 | ### Bug fixes
 70 | - Relax the lifetime requirements of various `RopeSlice` methods.  They were unintentionally strict.
 71 | 
 72 | 
 73 | ## [1.3.1] - 2021-06-22
 74 | 
 75 | ### Bug fixes
 76 | - Fix unnecessary rope fragmentation when using `Rope::append()` to append many small ropes together.
 77 | - Fix contiguous `RopeSlices` occasionally failing to convert to a `&str` with `RopeSlice::as_str()`.
 78 | 
 79 | 
 80 | ## [1.3.0] - 2021-06-16
 81 | 
 82 | ### New features
 83 | - Added non-panicking versions of all methods on `Rope` and `RopeSlice`.
 84 | - All iterators can now be reversed, swapping the beheavior of `prev()` and `next()`.
 85 | 
 86 | ### Bug fixes
 87 | - The in-memory node size wasn't being computed properly, potentially resulting in unecessary memory fragmentation.
 88 | 
 89 | 
 90 | ## [1.2.0] - 2020-06-14
 91 | 
 92 | ### New features
 93 | - `Rope` and `RopeSlice` can now convert between char indices and utf16 code unit indices.  This useful when interacting with external APIs that use utf16 code units as their text indexing scheme.
 94 | 
 95 | ### Dependencies
 96 | - Updated smallvec to minimum version 1.0.
 97 | 
 98 | 
 99 | ## [1.1.0] - 2019-09-01
100 | 
101 | ### New features
102 | - Iterators can now be created directly to start at any position in the `Rope` or `RopeSlice`.
103 | - All iterators can now iterate backwards via a new `prev()` method.
104 | - All iterators now implement `Clone` and `Debug` traits.
105 | - `Bytes`, `Chars`, and `Lines` iterators now implement `ExactSizeIterator`.
106 | 
107 | ### Changes
108 | - The `Chunks` iterator no longer yields empty chunks, for example if the `Rope` or `RopeSlice` it was created from is also empty.
109 | 
110 | 
111 | ## [1.0.1] - 2019-05-01
112 | 
113 | ### Other
114 | - Converted a lot of unsafe code to safe code, with minimal performance impact.
115 | 
116 | 
117 | ## [1.0.0] - 2019-01-03
118 | 
119 | ### New features
120 | - Implemented `Eq`, `Ord`, and `PartialOrd` traits for `Rope` and `RopeSlice`.
121 | 
122 | 
123 | ## [0.9.2] - 2018-10-04
124 | 
125 | ### Bug fixes
126 | - Turns out the previous Line iterator bug fix introduced a different bug.  Fixed!
127 | 
128 | 
129 | ## [0.9.1] - 2018-10-03
130 | 
131 | ### Bug fixes
132 | - The Lines iterator would sometimes emit an extra blank line when created from a small rope slice.
133 | - The `write_to()` convenience method could potentially write only part of the rope, without any error indication.
134 | 
135 | 
136 | ## [0.9.0] - 2018-09-04
137 | 
138 | ### Performance improvements
139 | - Minor performance improvements to a few methods on `Rope` and `RopeSlice`.
140 | 
141 | ### New features
142 | - Added `Rope::byte()` for fetching individual bytes by index.
143 | - Added more conversion functions for `Rope` and `RopeSlice`, in the form of `From` impls.
144 | 
145 | ### Breaking changes
146 | - Removed `Rope::to_string()`, `RopeSlice::from_str()`, `RopeSlice::to_string()`, and `RopeSlice::to_rope()` in favor of `From` impls that do the same thing.
147 | 
148 | 
149 | ## [0.8.4] - 2018-07-28
150 | 
151 | ### Performance improvements
152 | - Minor across-the-board speedups by using SIMD better.
153 | - Significant speedups for Rope::insert()/remove() by being more clever about node info updates.
154 | - Further significant speedup to Rope::remove() due to a (performance-only) bug fix.
155 | 
156 | ### Bug fixes
157 | - Ropey wouldn't compile on non-x86/64 platforms after the introduction of SSE2 optimizations in v0.8.3.  They are now wrapped properly so that Ropey again compiles on other platforms as well.
158 | 
159 | 
160 | ## [0.8.3] - 2018-07-26
161 | 
162 | ### Performance improvements
163 | - Significant speedups across the board by using SIMD for index conversions.
164 | - Loading texts from files or creating Ropes from strings is now significantly faster.
165 | 
166 | ### Memory usage improvements
167 | - Memory overhead reduced from 17% to 10% for freshly loaded text.
168 | 
169 | ### Bug fixes
170 | - The low-level line -> byte conversion function would sometimes return a byte index in the middle of the line break for multi-byte line break characters.
171 | 
172 | 
173 | ## [0.8.2] - 2018-07-22
174 | 
175 | ### Performance improvements
176 | - File loading is slightly faster.
177 | 
178 | ### Bug fixes
179 | - The low-level line break counting functions could return an incorrect count under certain circumstances.  This also affected the higher-level methods in Ropey, although it was somewhat difficult to trigger in practice.
180 | 
181 | 
182 | ## [0.8.1] - 2018-07-20
183 | 
184 | ### Performance improvements
185 | - Increased Rope::insert() speed by roughly 1.4x for small insertion strings.
186 | - Increased Rope::remove() speed by roughly 1.75x.
187 | 
188 | ### Other
189 | - General documentation improvements, based on feedback.
190 | 
191 | 
192 | ## [0.8.0] - 2018-07-14
193 | 
194 | ### Performance improvements
195 | - Building new ropes via RopeBuilder or Rope::from_str() is now about 15% faster.
196 | - Slicing is now almost twice as fast.
197 | - Fetching lines is now almost twice as fast.
198 | - Significant speedups for byte/char -> line index conversion methods.
199 | - Significant speedups for line -> byte/char index conversion methods.
200 | 
201 | ### New features
202 | - Chunk fetching can now be done by line break index as well as byte/char index.
203 | - Some previously-internal utility functions for working with string slices are now part of Ropey's public API.
204 | - Added Rope::write_to() convenience function for writing a Rope's data to a writer.
205 | 
206 | ### Breaking changes
207 | - Conversion from byte/char indices to line indices has been changed to be more intuitive.  It is now equivalent to counting the line endings before the given byte/char index.
208 | - Chunk fetching now returns the starting byte/char/line of the chunk, which is generally easier to work with.
209 | 
210 | 
211 | ## [0.7.1] - 2018-07-09
212 | 
213 | ### Bug fixes
214 | - The chunk fetching methods on slices returned bogus starting char indices.
215 | 
216 | 
217 | ## [0.7.0] - 2018-07-05
218 | 
219 | ### Performance improvements
220 | - `RopeSlice`s have been given a major speed boost for small slices: for contiguous slices of text in memory, they will simply point at the text without any tree structure.  This makes it feasible to use `RopeSlice`s to yield e.g. graphemes or words, even in tight inner loops, while maintaining performance.
221 | 
222 | ### New features
223 | - You can now fetch contiguous chunks of text directly from `Rope`s and `RopeSlice`s, via byte or char index.  The chunk containing the given byte or char will be returned along with offset information.
224 | - Added more index conversion methods.  For both `Rope`s and `RopeSlice`s, you can now convert between any of: byte, char, and line indices.
225 | - Added a method to directly create `RopeSlice`s from string slices.  This isn't terribly useful when using Ropey's standard API's, but it allows for much more efficient implementations of things like custom iterators.
226 | - Added a method to directly access a `RopeSlice`s text as a contiguous string slice when possible.  This is useful for client code to be able to make a fast-path branch for small slices that happen to be contiguous.  Like the above item, this can result in significant performance gains for certain use-cases.
227 | 
228 | ### API breaking-changes
229 | - All grapheme related APIs have been removed.  However, new APIs have been added that allow the efficient implementation of those same APIs on top of Ropey.  See the grapheme examples in the `examples` directory of the repo for working implementations.
230 | 
231 | 
232 | ## [0.6.3] - 2018-01-28
233 | 
234 | ### Features
235 | - Added a new `Rope::insert_char()` convenience method for inserting a single Unicode scalar value.
236 | 
237 | ### Documentation
238 | - Updated the Chunks iterator docs to accurately reflect the new segmentation API in 0.6.x.
239 | 
240 | 
241 | ## [0.6.2] - 2018-01-11
242 | 
243 | ### Fixes
244 | - 0.6.0 and 0.6.1 had an API regression where you now had to specify the
245 |   segmenter in the type parameters of RopeSlice and the various iterators.
246 | 
247 | 
248 | ## [0.6.1] - 2018-01-11
249 | 
250 | - No functional changes.  Just updated the readme to render properly on crates.io.
251 | 
252 | 
253 | ## [0.6.0] - 2018-01-11
254 | 
255 | ### New features
256 | - Grapheme segmentation can now be customized if needed.
257 | 
258 | ### API changes
259 | - `Rope::remove()`, `Rope::slice()`, and `RopeSlice::slice()` now take range syntax to specify
260 |   their ranges.
261 | 
262 | 
263 | ## [0.5.6] - 2018-01-05
264 | 
265 | ### Documenation
266 | - Added a design overview document to the repo, explaining Ropey's design.  Mainly targeted at potential contributors.
267 | - Added a more integrated example of usage to the front page of the library docs.
268 | 
269 | ### Features
270 | - Fleshed out the `PartialEq` impls.  `Rope` and `RopeSlice` can now be compared for equality with not just `&str`, but also `String` and `Cow<str>`.
271 | 
272 | ### Performance
273 | - `Rope::char()`, which fetches a single Unicode scalar value as a `char`, is now several times faster.
274 | 
275 | ### Misc
276 | - This changelog had the wrong year on some of its dates.  Heh...
277 | 
278 | 
279 | ## [0.5.5] - 2017-12-30
280 | 
281 | ### Bug fixes
282 | - Comparing two empty ropes for equality would panic.
283 | 
284 | ### New features
285 | - Added Rope::capacity() and Rope::shrink_to_fit() methods.  Although these are probably of limited use, they may be useful in especially memory-constrained environments.
286 | 
287 | 
288 | ## [0.5.4] - 2017-12-30
289 | 
290 | ### Bug fixes
291 | - Rope::remove() didn't always merge graphemes between chunks properly.
292 | 
293 | ### Performance and memory
294 | - Inserting large texts into a rope now degrades in performance more gracefully as the insertion text becomes larger, rather than hitting a sudden performance cliff.
295 | - `Rope::remove()` got a nice speed boost.
296 | - Memory overhead has been reduced across the board.  Freshly loaded files now only have ~17% overhead, and the worst-case (built up from lots of small random-location inserts) is now ~60% overhead.
297 | 
298 | ### Misc
299 | - 100% unit test coverage of public APIs.
300 | - Added randomized testing via [QuickCheck](https://crates.io/crates/quickcheck).
301 | - Added benchmarks to the library.
302 | 
303 | 
304 | ## [0.5.3] - 2017-12-28
305 | 
306 | ### Performance and memory
307 | - Massive speed boost for small insertions: between %40 - %50 faster.
308 | - `Rope::from_str()` now only uses stack memory for strings smaller than ~3MB. (Aside from the resulting Rope itself, of course.)
309 | 
310 | ### Misc
311 | - Better unit test coverage of public APIs.  Still not 100%, but getting there!
312 | 
313 | 
314 | ## [0.5.2] - 2017-12-25
315 | 
316 | ### Bug fixes
317 | - There were ocassionally unnecessary heap allocations that took up a small amount of extra space in the rope.
318 | 
319 | ### Misc
320 | - Memory overhead has been significantly reduced for ropes built up by many small coherent insertions.
321 | 
322 | 
323 | ## [0.5.1] - 2017-12-24
324 | 
325 | ### Bug fixes
326 | - Calling `Rope::line_to_char()` with a line index one-past-the-end would panic.  This wasn't consistent with other indexing, and has been fixed and now returns the one-past-the-end char index.
327 | - Had accidentally left some asserts in the `Rope::remove()` code that were put in during debugging.  They were causing significant slow downs for removes.
328 | 
329 | ### Misc
330 | - Added a changelog file.
331 | 
332 | 
333 | [Unreleased]: https://github.com/cessen/ropey/compare/v1.6.1...HEAD
334 | [1.6.1]: https://github.com/cessen/ropey/compare/v1.6.0...v1.6.1
335 | [1.6.0]: https://github.com/cessen/ropey/compare/v1.5.1...v1.6.0
336 | [1.5.1]: https://github.com/cessen/ropey/compare/v1.5.0...v1.5.1
337 | [1.5.0]: https://github.com/cessen/ropey/compare/v1.4.1...v1.5.0
338 | [1.4.1]: https://github.com/cessen/ropey/compare/v1.4.0...v1.4.1
339 | [1.4.0]: https://github.com/cessen/ropey/compare/v1.3.2...v1.4.0
340 | [1.3.2]: https://github.com/cessen/ropey/compare/v1.3.1...v1.3.2
341 | [1.3.1]: https://github.com/cessen/ropey/compare/v1.3.0...v1.3.1
342 | [1.3.0]: https://github.com/cessen/ropey/compare/v1.2.0...v1.3.0
343 | [1.2.0]: https://github.com/cessen/ropey/compare/v1.1.0...v1.2.0
344 | [1.1.0]: https://github.com/cessen/ropey/compare/v1.0.1...v1.1.0
345 | [1.0.1]: https://github.com/cessen/ropey/compare/v1.0.0...v1.0.1
346 | [1.0.0]: https://github.com/cessen/ropey/compare/v0.9.2...v1.0.0
347 | [0.9.2]: https://github.com/cessen/ropey/compare/v0.9.1...v0.9.2
348 | [0.9.1]: https://github.com/cessen/ropey/compare/v0.9.0...v0.9.1
349 | [0.9.0]: https://github.com/cessen/ropey/compare/v0.8.4...v0.9.0
350 | [0.8.4]: https://github.com/cessen/ropey/compare/v0.8.3...v0.8.4
351 | [0.8.3]: https://github.com/cessen/ropey/compare/v0.8.2...v0.8.3
352 | [0.8.2]: https://github.com/cessen/ropey/compare/v0.8.1...v0.8.2
353 | [0.8.1]: https://github.com/cessen/ropey/compare/v0.8.0...v0.8.1
354 | [0.8.0]: https://github.com/cessen/ropey/compare/v0.7.1...v0.8.0
355 | [0.7.1]: https://github.com/cessen/ropey/compare/v0.7.0...v0.7.1
356 | [0.7.0]: https://github.com/cessen/ropey/compare/v0.6.3...v0.7.0
357 | [0.6.3]: https://github.com/cessen/ropey/compare/v0.6.2...v0.6.3
358 | [0.6.2]: https://github.com/cessen/ropey/compare/v0.6.1...v0.6.2
359 | [0.6.1]: https://github.com/cessen/ropey/compare/v0.6.0...v0.6.1
360 | [0.6.0]: https://github.com/cessen/ropey/compare/v0.5.6...v0.6.0
361 | [0.5.6]: https://github.com/cessen/ropey/compare/v0.5.5...v0.5.6
362 | [0.5.5]: https://github.com/cessen/ropey/compare/v0.5.4...v0.5.5
363 | [0.5.4]: https://github.com/cessen/ropey/compare/v0.5.3...v0.5.4
364 | [0.5.3]: https://github.com/cessen/ropey/compare/v0.5.2...v0.5.3
365 | [0.5.2]: https://github.com/cessen/ropey/compare/v0.5.1...v0.5.2
366 | [0.5.1]: https://github.com/cessen/ropey/releases/tag/v0.5.1
367 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "ropey"
 3 | version = "1.6.1"
 4 | authors = ["Nathan Vegdahl <cessen@cessen.com>"]
 5 | description = "A fast and robust text rope for Rust"
 6 | documentation = "https://docs.rs/ropey"
 7 | repository = "https://github.com/cessen/ropey"
 8 | readme = "README.md"
 9 | license = "MIT"
10 | keywords = ["rope", "text", "edit", "buffer"]
11 | categories = ["text-processing", "data-structures"]
12 | exclude = ["/design/*", "/benches/*.txt", "/fuzz/**", "/.github/*"]
13 | 
14 | [features]
15 | default = ["unicode_lines", "simd"]
16 | cr_lines = [] # Enable recognizing carriage returns as line breaks.
17 | unicode_lines = ["cr_lines"] # Enable recognizing all Unicode line breaks.
18 | simd = ["str_indices/simd"]
19 | 
20 | # Internal feature: Not part of public stable API
21 | # enables a much smaller chunk size that makes it
22 | # easier to catch bugs without requiring huge text sizes during fuzzing.
23 | small_chunks = []
24 | 
25 | [dependencies]
26 | smallvec = "1.0.0"
27 | str_indices = { version = "0.4", default-features = false }
28 | 
29 | [dev-dependencies]
30 | rand = "0.8"
31 | proptest = "1.0"
32 | criterion = { version = "0.3", features = ["html_reports"] }
33 | unicode-segmentation = "1.3"
34 | fnv = "1"
35 | fxhash = "0.2"
36 | 
37 | # This is a transitive dependency of criterion--we don't use it directly. We
38 | # lock it to this exact version because newer versions don't work on our MSRV,
39 | # and thus CI fails if it's on a newer version.
40 | unicode-width = "=0.1.13"
41 | 
42 | #-----------------------------------------
43 | 
44 | [[bench]]
45 | name = "create"
46 | harness = false
47 | 
48 | [[bench]]
49 | name = "insert"
50 | harness = false
51 | 
52 | [[bench]]
53 | name = "hash"
54 | harness = false
55 | 
56 | [[bench]]
57 | name = "remove"
58 | harness = false
59 | 
60 | [[bench]]
61 | name = "queries"
62 | harness = false
63 | 
64 | [[bench]]
65 | name = "iterators"
66 | harness = false
67 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017 Nathan Vegdahl
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 7 | of the Software, and to permit persons to whom the Software is furnished to do
 8 | so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Ropey
  2 | 
  3 | [![CI Build Status][github-ci-img]][github-ci]
  4 | [![Latest Release][crates-io-badge]][crates-io-url]
  5 | [![Documentation][docs-rs-img]][docs-rs-url]
  6 | 
  7 | Ropey is a utf8 text rope for Rust, designed to be the backing text-buffer for
  8 | applications such as text editors.  Ropey is fast, robust, and can handle huge
  9 | texts and memory-incoherent edits with ease.
 10 | 
 11 | 
 12 | ## Example Usage
 13 | 
 14 | ```rust
 15 | // Load a text file.
 16 | let mut text = ropey::Rope::from_reader(
 17 |     File::open("my_great_book.txt")?
 18 | )?;
 19 | 
 20 | // Print the 516th line (zero-indexed).
 21 | println!("{}", text.line(515));
 22 | 
 23 | // Get the start/end char indices of the line.
 24 | let start_idx = text.line_to_char(515);
 25 | let end_idx = text.line_to_char(516);
 26 | 
 27 | // Remove the line...
 28 | text.remove(start_idx..end_idx);
 29 | 
 30 | // ...and replace it with something better.
 31 | text.insert(start_idx, "The flowers are... so... dunno.\n");
 32 | 
 33 | // Print the changes, along with the previous few lines for context.
 34 | let start_idx = text.line_to_char(511);
 35 | let end_idx = text.line_to_char(516);
 36 | println!("{}", text.slice(start_idx..end_idx));
 37 | 
 38 | // Write the file back out to disk.
 39 | text.write_to(
 40 |     BufWriter::new(File::create("my_great_book.txt")?)
 41 | )?;
 42 | ```
 43 | 
 44 | ## When Should I Use Ropey?
 45 | 
 46 | Ropey is designed and built to be the backing text buffer for applications
 47 | such as text editors, and its design trade-offs reflect that.  Ropey is good
 48 | at:
 49 | 
 50 | - Handling frequent edits to medium-to-large texts.  Even on texts that are
 51 |   multiple gigabytes large, edits are measured in single-digit microseconds.
 52 | - Handling Unicode correctly.  It is impossible to create invalid utf8 through
 53 |   Ropey, and all Unicode line endings are correctly tracked including CRLF.
 54 | - Having flat, predictable performance characteristics.  Ropey will never be
 55 |   the source of hiccups or stutters in your software.
 56 | 
 57 | On the other hand, Ropey is _not_ good at:
 58 | 
 59 | - Handling texts smaller than a couple of kilobytes or so.  That is to say,
 60 |   Ropey will handle them fine, but Ropey allocates space in kilobyte chunks,
 61 |   which introduces unnecessary bloat if your texts are almost always small.
 62 | - Handling texts that are larger than available memory.  Ropey is an in-memory
 63 |   data structure.
 64 | - Getting the best performance for every possible use-case.  Ropey puts work
 65 |   into tracking both line endings and unicode scalar values, which is
 66 |   performance overhead you may not need depending on your use-case.
 67 | 
 68 | Keep this in mind when selecting Ropey for your project.  Ropey is very good
 69 | at what it does, but like all software it is designed with certain
 70 | applications in mind.
 71 | 
 72 | 
 73 | ## Features
 74 | 
 75 | ### Strong Unicode support
 76 | Ropey's atomic unit of text is
 77 | [Unicode scalar values](https://www.unicode.org/glossary/#unicode_scalar_value)
 78 | (or [`char`](https://doc.rust-lang.org/std/primitive.char.html)s in Rust)
 79 | encoded as utf8.  All of Ropey's editing and slicing operations are done
 80 | in terms of char indices, which prevents accidental creation of invalid
 81 | utf8 data.
 82 | 
 83 | Ropey also supports converting between scalar value indices and utf16 code unit
 84 | indices, for interoperation with external APIs that may still use utf16.
 85 | 
 86 | ### Line-aware
 87 | 
 88 | Ropey knows about line breaks, allowing you to index into and iterate over
 89 | lines of text.
 90 | 
 91 | The line breaks Ropey recognizes are also configurable at build time via
 92 | feature flags.  See Ropey's documentation for details.
 93 | 
 94 | ### Rope slices
 95 | 
 96 | Ropey has rope slices that allow you to work with just parts of a rope, using
 97 | all the read-only operations of a full rope including iterators and making
 98 | sub-slices.
 99 | 
100 | ### Flexible APIs with low-level access
101 | 
102 | Although Ropey is intentionally limited in scope, it also provides APIs for
103 | efficiently accessing and working with its internal text chunk
104 | representation, allowing additional functionality to be efficiently
105 | implemented by client code with minimal overhead.
106 | 
107 | ### Efficient
108 | 
109 | Ropey is fast and minimizes memory usage:
110 | 
111 | - On a recent mobile i7 Intel CPU, Ropey performed over 1.8 million small
112 |   incoherent insertions per second while building up a text roughly 100 MB
113 |   large.  Coherent insertions (i.e. all near the same place in the text) are
114 |   even faster, doing the same task at over 3.3 million insertions per
115 |   second.
116 | - Freshly loading a file from disk only incurs about 10% memory overhead.  For
117 |   example, a 100 MB text file will occupy about 110 MB of memory when loaded
118 |   by Ropey.
119 | - Cloning ropes is _extremely_ cheap.  Rope clones share data, so an initial
120 |   clone only takes 8 bytes of memory.  After that, memory usage will grow
121 |   incrementally as the clones diverge due to edits.
122 | 
123 | ### Thread safe
124 | 
125 | Ropey ensures that even though clones share memory, everything is thread-safe.
126 | Clones can be sent to other threads for both reading and writing.
127 | 
128 | 
129 | ## Unsafe code
130 | 
131 | Ropey uses unsafe code to help achieve some of its space and performance
132 | characteristics.  Although effort has been put into keeping the unsafe code
133 | compartmentalized and making it correct, please be cautious about using Ropey
134 | in software that may face adversarial conditions.
135 | 
136 | Auditing, fuzzing, etc. of the unsafe code in Ropey is extremely welcome.
137 | If you find any unsoundness, _please_ file an issue!  Also welcome are
138 | recommendations for how to remove any of the unsafe code without introducing
139 | significant space or performance regressions, or how to compartmentalize the
140 | unsafe code even better.
141 | 
142 | 
143 | ## License
144 | 
145 | Ropey is licensed under the MIT license (LICENSE.md or http://opensource.org/licenses/MIT)
146 | 
147 | 
148 | ## Contributing
149 | 
150 | Contributions are absolutely welcome!  However, please open an issue to discuss
151 | larger changes, to avoid doing a lot of work that may get rejected.  Also note
152 | that PRs that add dependencies are very likely to be rejected (Ropey aims to
153 | have minimal dependencies).
154 | 
155 | An overview of Ropey's design can be found [here](https://github.com/cessen/ropey/blob/master/design/design.md).
156 | 
157 | Unless you explicitly state otherwise, any contribution intentionally submitted
158 | for inclusion in Ropey by you will be licensed as above, without any additional
159 | terms or conditions.
160 | 
161 | [crates-io-badge]: https://img.shields.io/crates/v/ropey.svg
162 | [crates-io-url]: https://crates.io/crates/ropey
163 | [github-ci-img]: https://github.com/cessen/ropey/workflows/ci/badge.svg
164 | [github-ci]: https://github.com/cessen/ropey/actions?query=workflow%3Aci
165 | [docs-rs-img]: https://docs.rs/ropey/badge.svg
166 | [docs-rs-url]: https://docs.rs/ropey
167 | 


--------------------------------------------------------------------------------
/benches/create.rs:
--------------------------------------------------------------------------------
 1 | extern crate criterion;
 2 | extern crate ropey;
 3 | 
 4 | use criterion::{black_box, criterion_group, criterion_main, Criterion};
 5 | use ropey::Rope;
 6 | 
 7 | const TEXT_SMALL: &str = include_str!("small.txt");
 8 | const TEXT_MEDIUM: &str = include_str!("medium.txt");
 9 | const TEXT_LARGE: &str = include_str!("large.txt");
10 | const TEXT_LF: &str = include_str!("lf.txt");
11 | 
12 | //----
13 | 
14 | fn from_str(c: &mut Criterion) {
15 |     let mut group = c.benchmark_group("from_str");
16 | 
17 |     group.bench_function("small", |bench| {
18 |         bench.iter(|| {
19 |             Rope::from_str(black_box(TEXT_SMALL));
20 |         })
21 |     });
22 | 
23 |     group.bench_function("medium", |bench| {
24 |         bench.iter(|| {
25 |             Rope::from_str(black_box(TEXT_MEDIUM));
26 |         })
27 |     });
28 | 
29 |     group.bench_function("large", |bench| {
30 |         bench.iter(|| {
31 |             Rope::from_str(black_box(TEXT_LARGE));
32 |         })
33 |     });
34 | 
35 |     group.bench_function("linefeeds", |bench| {
36 |         bench.iter(|| {
37 |             Rope::from_str(black_box(TEXT_LF));
38 |         })
39 |     });
40 | }
41 | 
42 | fn rope_clone(c: &mut Criterion) {
43 |     let rope = Rope::from_str(TEXT_LARGE);
44 |     c.bench_function("rope_clone", |bench| {
45 |         bench.iter(|| {
46 |             let _ = black_box(&rope).clone();
47 |         })
48 |     });
49 | }
50 | 
51 | //----
52 | 
53 | criterion_group!(benches, from_str, rope_clone,);
54 | criterion_main!(benches);
55 | 


--------------------------------------------------------------------------------
/benches/hash.rs:
--------------------------------------------------------------------------------
  1 | extern crate criterion;
  2 | extern crate fnv;
  3 | extern crate fxhash;
  4 | extern crate ropey;
  5 | 
  6 | use std::collections::hash_map::DefaultHasher;
  7 | use std::hash::{Hash, Hasher};
  8 | 
  9 | use criterion::{black_box, criterion_group, criterion_main, Criterion};
 10 | use fnv::FnvHasher;
 11 | use fxhash::FxHasher;
 12 | use ropey::Rope;
 13 | 
 14 | const TEXT: &str = include_str!("large.txt");
 15 | const TEXT_SMALL: &str = include_str!("small.txt");
 16 | const TEXT_TINY: &str = "hello";
 17 | 
 18 | //----
 19 | 
 20 | fn hash_large(c: &mut Criterion) {
 21 |     let mut group = c.benchmark_group("hash_large");
 22 | 
 23 |     group.bench_function("default", |bench| {
 24 |         let r = Rope::from_str(TEXT);
 25 |         bench.iter(|| {
 26 |             let mut hasher = DefaultHasher::default();
 27 |             r.hash(black_box(&mut hasher));
 28 |             black_box(hasher.finish());
 29 |         })
 30 |     });
 31 | 
 32 |     group.bench_function("fnv", |bench| {
 33 |         let r = Rope::from_str(TEXT);
 34 |         bench.iter(|| {
 35 |             let mut hasher = FnvHasher::default();
 36 |             r.hash(black_box(&mut hasher));
 37 |             black_box(hasher.finish());
 38 |         })
 39 |     });
 40 | 
 41 |     group.bench_function("fxhash", |bench| {
 42 |         let r = Rope::from_str(TEXT);
 43 |         bench.iter(|| {
 44 |             let mut hasher = FxHasher::default();
 45 |             r.hash(black_box(&mut hasher));
 46 |             black_box(hasher.finish());
 47 |         })
 48 |     });
 49 | }
 50 | 
 51 | fn hash_small(c: &mut Criterion) {
 52 |     let mut group = c.benchmark_group("hash_small");
 53 | 
 54 |     group.bench_function("default", |bench| {
 55 |         let r = Rope::from_str(TEXT_SMALL);
 56 |         bench.iter(|| {
 57 |             let mut hasher = DefaultHasher::default();
 58 |             r.hash(black_box(&mut hasher));
 59 |             black_box(hasher.finish());
 60 |         })
 61 |     });
 62 | 
 63 |     group.bench_function("fnv", |bench| {
 64 |         let r = Rope::from_str(TEXT_SMALL);
 65 |         bench.iter(|| {
 66 |             let mut hasher = FnvHasher::default();
 67 |             r.hash(black_box(&mut hasher));
 68 |             black_box(hasher.finish());
 69 |         })
 70 |     });
 71 | 
 72 |     group.bench_function("fxhash", |bench| {
 73 |         let r = Rope::from_str(TEXT_SMALL);
 74 |         bench.iter(|| {
 75 |             let mut hasher = FxHasher::default();
 76 |             r.hash(black_box(&mut hasher));
 77 |             black_box(hasher.finish());
 78 |         })
 79 |     });
 80 | }
 81 | 
 82 | fn hash_tiny(c: &mut Criterion) {
 83 |     let mut group = c.benchmark_group("hash_tiny");
 84 | 
 85 |     group.bench_function("default", |bench| {
 86 |         let r = Rope::from_str(TEXT_TINY);
 87 |         bench.iter(|| {
 88 |             let mut hasher = DefaultHasher::default();
 89 |             r.hash(black_box(&mut hasher));
 90 |             black_box(hasher.finish());
 91 |         })
 92 |     });
 93 | 
 94 |     group.bench_function("fnv", |bench| {
 95 |         let r = Rope::from_str(TEXT_TINY);
 96 |         bench.iter(|| {
 97 |             let mut hasher = FnvHasher::default();
 98 |             r.hash(black_box(&mut hasher));
 99 |             black_box(hasher.finish());
100 |         })
101 |     });
102 | 
103 |     group.bench_function("fxhash", |bench| {
104 |         let r = Rope::from_str(TEXT_TINY);
105 |         bench.iter(|| {
106 |             let mut hasher = FxHasher::default();
107 |             r.hash(black_box(&mut hasher));
108 |             black_box(hasher.finish());
109 |         })
110 |     });
111 | }
112 | 
113 | //----
114 | 
115 | criterion_group!(benches, hash_large, hash_small, hash_tiny,);
116 | criterion_main!(benches);
117 | 


--------------------------------------------------------------------------------
/benches/insert.rs:
--------------------------------------------------------------------------------
  1 | extern crate criterion;
  2 | extern crate rand;
  3 | extern crate ropey;
  4 | 
  5 | use criterion::{criterion_group, criterion_main, Criterion};
  6 | use rand::random;
  7 | use ropey::Rope;
  8 | 
  9 | const TEXT: &str = include_str!("large.txt");
 10 | 
 11 | //----
 12 | 
 13 | fn insert_char(c: &mut Criterion) {
 14 |     let mut group = c.benchmark_group("insert_char");
 15 | 
 16 |     group.bench_function("random", |bench| {
 17 |         let mut rope = Rope::from_str(TEXT);
 18 |         bench.iter(|| {
 19 |             let len = rope.len_chars();
 20 |             rope.insert_char(random::<usize>() % len, 'a')
 21 |         })
 22 |     });
 23 | 
 24 |     group.bench_function("start", |bench| {
 25 |         let mut rope = Rope::from_str(TEXT);
 26 |         bench.iter(|| {
 27 |             rope.insert_char(0, 'a');
 28 |         })
 29 |     });
 30 | 
 31 |     group.bench_function("middle", |bench| {
 32 |         let mut rope = Rope::from_str(TEXT);
 33 |         bench.iter(|| {
 34 |             let len = rope.len_chars();
 35 |             rope.insert_char(len / 2, 'a');
 36 |         })
 37 |     });
 38 | 
 39 |     group.bench_function("end", |bench| {
 40 |         let mut rope = Rope::from_str(TEXT);
 41 |         bench.iter(|| {
 42 |             let len = rope.len_chars();
 43 |             rope.insert_char(len, 'a');
 44 |         })
 45 |     });
 46 | }
 47 | 
 48 | fn insert_small(c: &mut Criterion) {
 49 |     let mut group = c.benchmark_group("insert_small");
 50 | 
 51 |     group.bench_function("random", |bench| {
 52 |         let mut rope = Rope::from_str(TEXT);
 53 |         bench.iter(|| {
 54 |             let len = rope.len_chars();
 55 |             rope.insert(random::<usize>() % len, "a");
 56 |         })
 57 |     });
 58 | 
 59 |     group.bench_function("start", |bench| {
 60 |         let mut rope = Rope::from_str(TEXT);
 61 |         bench.iter(|| {
 62 |             rope.insert(0, "a");
 63 |         })
 64 |     });
 65 | 
 66 |     group.bench_function("middle", |bench| {
 67 |         let mut rope = Rope::from_str(TEXT);
 68 |         bench.iter(|| {
 69 |             let len = rope.len_chars();
 70 |             rope.insert(len / 2, "a");
 71 |         })
 72 |     });
 73 | 
 74 |     group.bench_function("end", |bench| {
 75 |         let mut rope = Rope::from_str(TEXT);
 76 |         bench.iter(|| {
 77 |             let len = rope.len_chars();
 78 |             rope.insert(len, "a");
 79 |         })
 80 |     });
 81 | }
 82 | 
 83 | fn insert_medium(c: &mut Criterion) {
 84 |     let mut group = c.benchmark_group("insert_medium");
 85 | 
 86 |     group.bench_function("random", |bench| {
 87 |         let mut rope = Rope::from_str(TEXT);
 88 |         bench.iter(|| {
 89 |             let len = rope.len_chars();
 90 |             rope.insert(random::<usize>() % len, "This is some text.");
 91 |         })
 92 |     });
 93 | 
 94 |     group.bench_function("start", |bench| {
 95 |         let mut rope = Rope::from_str(TEXT);
 96 |         bench.iter(|| {
 97 |             rope.insert(0, "This is some text.");
 98 |         })
 99 |     });
100 | 
101 |     group.bench_function("middle", |bench| {
102 |         let mut rope = Rope::from_str(TEXT);
103 |         bench.iter(|| {
104 |             let len = rope.len_chars();
105 |             rope.insert(len / 2, "This is some text.");
106 |         })
107 |     });
108 | 
109 |     group.bench_function("end", |bench| {
110 |         let mut rope = Rope::from_str(TEXT);
111 |         bench.iter(|| {
112 |             let len = rope.len_chars();
113 |             rope.insert(len, "This is some text.");
114 |         })
115 |     });
116 | }
117 | 
118 | const INSERT_TEXT: &str = include_str!("small.txt");
119 | 
120 | fn insert_large(c: &mut Criterion) {
121 |     let mut group = c.benchmark_group("insert_large");
122 | 
123 |     group.bench_function("random", |bench| {
124 |         let mut rope = Rope::from_str(TEXT);
125 |         bench.iter(|| {
126 |             let len = rope.len_chars();
127 |             rope.insert(random::<usize>() % len, INSERT_TEXT);
128 |         })
129 |     });
130 | 
131 |     group.bench_function("start", |bench| {
132 |         let mut rope = Rope::from_str(TEXT);
133 |         bench.iter(|| {
134 |             rope.insert(0, INSERT_TEXT);
135 |         })
136 |     });
137 | 
138 |     group.bench_function("middle", |bench| {
139 |         let mut rope = Rope::from_str(TEXT);
140 |         bench.iter(|| {
141 |             let len = rope.len_chars();
142 |             rope.insert(len / 2, INSERT_TEXT);
143 |         })
144 |     });
145 | 
146 |     group.bench_function("end", |bench| {
147 |         let mut rope = Rope::from_str(TEXT);
148 |         bench.iter(|| {
149 |             let len = rope.len_chars();
150 |             rope.insert(len, INSERT_TEXT);
151 |         })
152 |     });
153 | }
154 | 
155 | //----
156 | 
157 | fn insert_after_clone(c: &mut Criterion) {
158 |     c.bench_function("insert_after_clone", |bench| {
159 |         let rope = Rope::from_str(TEXT);
160 |         let mut rope_clone = rope.clone();
161 |         let mut i = 0;
162 |         bench.iter(|| {
163 |             if i > 32 {
164 |                 i = 0;
165 |                 rope_clone = rope.clone();
166 |             }
167 |             let len = rope_clone.len_chars();
168 |             rope_clone.insert(random::<usize>() % len, "a");
169 |             i += 1;
170 |         })
171 |     });
172 | }
173 | 
174 | //----
175 | 
176 | criterion_group!(
177 |     benches,
178 |     insert_char,
179 |     insert_small,
180 |     insert_medium,
181 |     insert_large,
182 |     insert_after_clone
183 | );
184 | criterion_main!(benches);
185 | 


--------------------------------------------------------------------------------
/benches/iterators.rs:
--------------------------------------------------------------------------------
  1 | extern crate criterion;
  2 | extern crate ropey;
  3 | 
  4 | use criterion::{criterion_group, criterion_main, Criterion};
  5 | use ropey::Rope;
  6 | 
  7 | const TEXT: &str = include_str!("large.txt");
  8 | const TEXT_TINY: &str = include_str!("tiny.txt");
  9 | 
 10 | //----
 11 | 
 12 | fn iter_prev(c: &mut Criterion) {
 13 |     let mut group = c.benchmark_group("iter_prev");
 14 | 
 15 |     group.bench_function("bytes", |bench| {
 16 |         let r = Rope::from_str(TEXT);
 17 |         let itr_src = r.bytes_at(r.len_bytes());
 18 |         let mut itr = itr_src.clone();
 19 |         bench.iter(|| {
 20 |             if itr.prev().is_none() {
 21 |                 itr = itr_src.clone();
 22 |             }
 23 |         })
 24 |     });
 25 | 
 26 |     group.bench_function("chars", |bench| {
 27 |         let r = Rope::from_str(TEXT);
 28 |         let itr_src = r.chars_at(r.len_chars());
 29 |         let mut itr = itr_src.clone();
 30 |         bench.iter(|| {
 31 |             if itr.prev().is_none() {
 32 |                 itr = itr_src.clone();
 33 |             }
 34 |         })
 35 |     });
 36 | 
 37 |     group.bench_function("chunks", |bench| {
 38 |         let r = Rope::from_str(TEXT);
 39 |         let itr_src = r.chunks_at_char(r.len_chars()).0;
 40 |         let mut itr = itr_src.clone();
 41 |         bench.iter(|| {
 42 |             if itr.prev().is_none() {
 43 |                 itr = itr_src.clone();
 44 |             }
 45 |         })
 46 |     });
 47 | }
 48 | 
 49 | fn iter_prev_lines(c: &mut Criterion) {
 50 |     let mut group = c.benchmark_group("iter_prev_lines");
 51 | 
 52 |     group.bench_function("lines", |bench| {
 53 |         let r = Rope::from_str(TEXT);
 54 |         let itr_src = r.lines_at(r.len_lines());
 55 |         let mut itr = itr_src.clone();
 56 |         bench.iter(|| {
 57 |             if itr.prev().is_none() {
 58 |                 itr = itr_src.clone();
 59 |             }
 60 |         })
 61 |     });
 62 | 
 63 |     group.bench_function("lines_tiny", |bench| {
 64 |         let r = Rope::from_str(TEXT_TINY);
 65 |         let itr_src = r.lines_at(r.len_lines());
 66 |         let mut itr = itr_src.clone();
 67 |         bench.iter(|| {
 68 |             if itr.prev().is_none() {
 69 |                 itr = itr_src.clone();
 70 |             }
 71 |         })
 72 |     });
 73 | }
 74 | 
 75 | fn iter_next(c: &mut Criterion) {
 76 |     let mut group = c.benchmark_group("iter_next");
 77 | 
 78 |     group.bench_function("bytes", |bench| {
 79 |         let r = Rope::from_str(TEXT);
 80 |         let mut itr = r.bytes().cycle();
 81 |         bench.iter(|| {
 82 |             itr.next();
 83 |         })
 84 |     });
 85 | 
 86 |     group.bench_function("chars", |bench| {
 87 |         let r = Rope::from_str(TEXT);
 88 |         let mut itr = r.chars().cycle();
 89 |         bench.iter(|| {
 90 |             itr.next();
 91 |         })
 92 |     });
 93 | 
 94 |     group.bench_function("chunks", |bench| {
 95 |         let r = Rope::from_str(TEXT);
 96 |         let mut itr = r.chunks().cycle();
 97 |         bench.iter(|| {
 98 |             itr.next();
 99 |         })
100 |     });
101 | }
102 | 
103 | fn iter_next_lines(c: &mut Criterion) {
104 |     let mut group = c.benchmark_group("iter_next_lines");
105 | 
106 |     group.bench_function("lines", |bench| {
107 |         let r = Rope::from_str(TEXT);
108 |         let mut itr = r.lines().cycle();
109 |         bench.iter(|| {
110 |             itr.next();
111 |         })
112 |     });
113 | 
114 |     group.bench_function("lines_tiny", |bench| {
115 |         let r = Rope::from_str(TEXT_TINY);
116 |         let mut itr = r.lines().cycle();
117 |         bench.iter(|| {
118 |             itr.next();
119 |         })
120 |     });
121 | }
122 | 
123 | fn iter_create(c: &mut Criterion) {
124 |     let mut group = c.benchmark_group("iter_create");
125 | 
126 |     group.bench_function("bytes", |bench| {
127 |         let r = Rope::from_str(TEXT);
128 |         bench.iter(|| {
129 |             r.bytes();
130 |         })
131 |     });
132 | 
133 |     group.bench_function("chars", |bench| {
134 |         let r = Rope::from_str(TEXT);
135 |         bench.iter(|| {
136 |             r.chars();
137 |         })
138 |     });
139 | 
140 |     group.bench_function("lines", |bench| {
141 |         let r = Rope::from_str(TEXT);
142 |         bench.iter(|| {
143 |             r.lines();
144 |         })
145 |     });
146 | 
147 |     group.bench_function("chunks", |bench| {
148 |         let r = Rope::from_str(TEXT);
149 |         bench.iter(|| {
150 |             r.chunks();
151 |         })
152 |     });
153 | }
154 | 
155 | fn iter_create_at(c: &mut Criterion) {
156 |     let mut group = c.benchmark_group("iter_create_at");
157 | 
158 |     group.bench_function("bytes", |bench| {
159 |         let r = Rope::from_str(TEXT);
160 |         let len = r.len_bytes();
161 |         let mut i = 0;
162 |         bench.iter(|| {
163 |             r.bytes_at(i % (len + 1));
164 |             i += 1;
165 |         })
166 |     });
167 | 
168 |     group.bench_function("chars", |bench| {
169 |         let r = Rope::from_str(TEXT);
170 |         let len = r.len_chars();
171 |         let mut i = 0;
172 |         bench.iter(|| {
173 |             r.chars_at(i % (len + 1));
174 |             i += 1;
175 |         })
176 |     });
177 | 
178 |     group.bench_function("lines", |bench| {
179 |         let r = Rope::from_str(TEXT);
180 |         let len = r.len_lines();
181 |         let mut i = 0;
182 |         bench.iter(|| {
183 |             r.lines_at(i % (len + 1));
184 |             i += 1;
185 |         })
186 |     });
187 | 
188 |     group.bench_function("chunks_at_byte", |bench| {
189 |         let r = Rope::from_str(TEXT);
190 |         let len = r.len_bytes();
191 |         let mut i = 0;
192 |         bench.iter(|| {
193 |             r.chunks_at_byte(i % (len + 1));
194 |             i += 1;
195 |         })
196 |     });
197 | 
198 |     group.bench_function("chunks_at_char", |bench| {
199 |         let r = Rope::from_str(TEXT);
200 |         let len = r.len_chars();
201 |         let mut i = 0;
202 |         bench.iter(|| {
203 |             r.chunks_at_char(i % (len + 1));
204 |             i += 1;
205 |         })
206 |     });
207 | 
208 |     group.bench_function("chunks_at_line_break", |bench| {
209 |         let r = Rope::from_str(TEXT);
210 |         let len = r.len_lines();
211 |         let mut i = 0;
212 |         bench.iter(|| {
213 |             r.chunks_at_line_break(i % (len + 1));
214 |             i += 1;
215 |         })
216 |     });
217 | }
218 | 
219 | //----
220 | 
221 | criterion_group!(
222 |     benches,
223 |     iter_prev,
224 |     iter_prev_lines,
225 |     iter_next,
226 |     iter_next_lines,
227 |     iter_create,
228 |     iter_create_at,
229 | );
230 | criterion_main!(benches);
231 | 


--------------------------------------------------------------------------------
/benches/queries.rs:
--------------------------------------------------------------------------------
  1 | extern crate criterion;
  2 | extern crate rand;
  3 | extern crate ropey;
  4 | 
  5 | use criterion::{criterion_group, criterion_main, Criterion};
  6 | use rand::random;
  7 | use ropey::Rope;
  8 | 
  9 | const TEXT: &str = include_str!("large.txt");
 10 | const SMALL_TEXT: &str = include_str!("small.txt");
 11 | 
 12 | //----
 13 | 
 14 | fn index_convert(c: &mut Criterion) {
 15 |     let mut group = c.benchmark_group("index_convert");
 16 | 
 17 |     group.bench_function("byte_to_char", |bench| {
 18 |         let rope = Rope::from_str(TEXT);
 19 |         let len = rope.len_bytes();
 20 |         bench.iter(|| {
 21 |             rope.byte_to_char(random::<usize>() % (len + 1));
 22 |         })
 23 |     });
 24 | 
 25 |     group.bench_function("byte_to_line", |bench| {
 26 |         let rope = Rope::from_str(TEXT);
 27 |         let len = rope.len_bytes();
 28 |         bench.iter(|| {
 29 |             rope.byte_to_line(random::<usize>() % (len + 1));
 30 |         })
 31 |     });
 32 | 
 33 |     group.bench_function("char_to_byte", |bench| {
 34 |         let rope = Rope::from_str(TEXT);
 35 |         let len = rope.len_chars();
 36 |         bench.iter(|| {
 37 |             rope.char_to_byte(random::<usize>() % (len + 1));
 38 |         })
 39 |     });
 40 | 
 41 |     group.bench_function("char_to_line", |bench| {
 42 |         let rope = Rope::from_str(TEXT);
 43 |         let len = rope.len_chars();
 44 |         bench.iter(|| {
 45 |             rope.char_to_line(random::<usize>() % (len + 1));
 46 |         })
 47 |     });
 48 | 
 49 |     group.bench_function("line_to_byte", |bench| {
 50 |         let rope = Rope::from_str(TEXT);
 51 |         let len = rope.len_lines();
 52 |         bench.iter(|| {
 53 |             rope.line_to_byte(random::<usize>() % (len + 1));
 54 |         })
 55 |     });
 56 | 
 57 |     group.bench_function("line_to_char", |bench| {
 58 |         let rope = Rope::from_str(TEXT);
 59 |         let len = rope.len_lines();
 60 |         bench.iter(|| {
 61 |             rope.line_to_char(random::<usize>() % (len + 1));
 62 |         })
 63 |     });
 64 | }
 65 | 
 66 | fn get(c: &mut Criterion) {
 67 |     let mut group = c.benchmark_group("get");
 68 | 
 69 |     group.bench_function("byte", |bench| {
 70 |         let rope = Rope::from_str(TEXT);
 71 |         let len = rope.len_bytes();
 72 |         bench.iter(|| {
 73 |             rope.byte(random::<usize>() % len);
 74 |         })
 75 |     });
 76 | 
 77 |     group.bench_function("char", |bench| {
 78 |         let rope = Rope::from_str(TEXT);
 79 |         let len = rope.len_chars();
 80 |         bench.iter(|| {
 81 |             rope.char(random::<usize>() % len);
 82 |         })
 83 |     });
 84 | 
 85 |     group.bench_function("line", |bench| {
 86 |         let rope = Rope::from_str(TEXT);
 87 |         let len = rope.len_lines();
 88 |         bench.iter(|| {
 89 |             rope.line(random::<usize>() % len);
 90 |         })
 91 |     });
 92 | 
 93 |     group.bench_function("chunk_at_byte", |bench| {
 94 |         let rope = Rope::from_str(TEXT);
 95 |         let len = rope.len_bytes();
 96 |         bench.iter(|| {
 97 |             rope.chunk_at_byte(random::<usize>() % (len + 1));
 98 |         })
 99 |     });
100 | 
101 |     group.bench_function("chunk_at_byte_slice", |bench| {
102 |         let rope = Rope::from_str(TEXT);
103 |         let slice = rope.slice(324..(rope.len_chars() - 213));
104 |         let len = slice.len_bytes();
105 |         bench.iter(|| {
106 |             slice.chunk_at_byte(random::<usize>() % (len + 1));
107 |         })
108 |     });
109 | 
110 |     group.bench_function("chunk_at_char", |bench| {
111 |         let rope = Rope::from_str(TEXT);
112 |         let len = rope.len_chars();
113 |         bench.iter(|| {
114 |             rope.chunk_at_char(random::<usize>() % (len + 1));
115 |         })
116 |     });
117 | 
118 |     group.bench_function("chunk_at_char_slice", |bench| {
119 |         let rope = Rope::from_str(TEXT);
120 |         let slice = rope.slice(324..(rope.len_chars() - 213));
121 |         let len = slice.len_chars();
122 |         bench.iter(|| {
123 |             slice.chunk_at_char(random::<usize>() % (len + 1));
124 |         })
125 |     });
126 | 
127 |     group.bench_function("chunk_at_line_break", |bench| {
128 |         let rope = Rope::from_str(TEXT);
129 |         let len = rope.len_lines();
130 |         bench.iter(|| {
131 |             rope.chunk_at_line_break(random::<usize>() % (len + 1));
132 |         })
133 |     });
134 | 
135 |     group.bench_function("chunk_at_line_break_slice", |bench| {
136 |         let rope = Rope::from_str(TEXT);
137 |         let slice = rope.slice(324..(rope.len_chars() - 213));
138 |         let len = slice.len_lines();
139 |         bench.iter(|| {
140 |             slice.chunk_at_line_break(random::<usize>() % (len + 1));
141 |         })
142 |     });
143 | }
144 | 
145 | fn slice(c: &mut Criterion) {
146 |     let mut group = c.benchmark_group("slice");
147 | 
148 |     group.bench_function("slice", |bench| {
149 |         let rope = Rope::from_str(TEXT);
150 |         let len = rope.len_chars();
151 |         bench.iter(|| {
152 |             let mut start = random::<usize>() % (len + 1);
153 |             let mut end = random::<usize>() % (len + 1);
154 |             if start > end {
155 |                 std::mem::swap(&mut start, &mut end);
156 |             }
157 |             rope.slice(start..end);
158 |         })
159 |     });
160 | 
161 |     group.bench_function("slice_small", |bench| {
162 |         let rope = Rope::from_str(TEXT);
163 |         let len = rope.len_chars();
164 |         bench.iter(|| {
165 |             let mut start = random::<usize>() % (len + 1);
166 |             if start > (len - 65) {
167 |                 start = len - 65;
168 |             }
169 |             let end = start + 64;
170 |             rope.slice(start..end);
171 |         })
172 |     });
173 | 
174 |     group.bench_function("slice_from_small_rope", |bench| {
175 |         let rope = Rope::from_str(SMALL_TEXT);
176 |         let len = rope.len_chars();
177 |         bench.iter(|| {
178 |             let mut start = random::<usize>() % (len + 1);
179 |             let mut end = random::<usize>() % (len + 1);
180 |             if start > end {
181 |                 std::mem::swap(&mut start, &mut end);
182 |             }
183 |             rope.slice(start..end);
184 |         })
185 |     });
186 | 
187 |     group.bench_function("slice_whole_rope", |bench| {
188 |         let rope = Rope::from_str(TEXT);
189 |         bench.iter(|| {
190 |             rope.slice(..);
191 |         })
192 |     });
193 | 
194 |     group.bench_function("slice_whole_slice", |bench| {
195 |         let rope = Rope::from_str(TEXT);
196 |         let len = rope.len_chars();
197 |         let slice = rope.slice(1..len - 1);
198 |         bench.iter(|| {
199 |             slice.slice(..);
200 |         })
201 |     });
202 | }
203 | 
204 | //----
205 | 
206 | criterion_group!(benches, index_convert, get, slice,);
207 | criterion_main!(benches);
208 | 


--------------------------------------------------------------------------------
/benches/remove.rs:
--------------------------------------------------------------------------------
  1 | extern crate criterion;
  2 | extern crate rand;
  3 | extern crate ropey;
  4 | 
  5 | use criterion::{criterion_group, criterion_main, Criterion};
  6 | use rand::random;
  7 | use ropey::Rope;
  8 | 
  9 | const TEXT: &str = include_str!("large.txt");
 10 | const TEXT_SMALL: &str = include_str!("small.txt");
 11 | 
 12 | fn mul_string_length(text: &str, n: usize) -> String {
 13 |     let mut mtext = String::new();
 14 |     for _ in 0..n {
 15 |         mtext.push_str(text);
 16 |     }
 17 |     mtext
 18 | }
 19 | 
 20 | //----
 21 | 
 22 | const LEN_MUL_SMALL: usize = 1;
 23 | 
 24 | fn remove_small(c: &mut Criterion) {
 25 |     let mut group = c.benchmark_group("remove_small");
 26 | 
 27 |     group.bench_function("random", |bench| {
 28 |         let text = mul_string_length(TEXT, LEN_MUL_SMALL);
 29 |         let mut rope = Rope::from_str(&text);
 30 | 
 31 |         bench.iter(|| {
 32 |             let len = rope.len_chars();
 33 |             let start = random::<usize>() % (len + 1);
 34 |             let end = (start + 1).min(len);
 35 |             rope.remove(start..end);
 36 | 
 37 |             if rope.len_bytes() == TEXT.len() / 2 {
 38 |                 rope = Rope::from_str(&text);
 39 |             }
 40 |         })
 41 |     });
 42 | 
 43 |     group.bench_function("start", |bench| {
 44 |         let text = mul_string_length(TEXT, LEN_MUL_SMALL);
 45 |         let mut rope = Rope::from_str(&text);
 46 | 
 47 |         bench.iter(|| {
 48 |             let len = rope.len_chars();
 49 |             let start = 0;
 50 |             let end = (start + 1).min(len);
 51 |             rope.remove(start..end);
 52 | 
 53 |             if rope.len_bytes() == TEXT.len() / 2 {
 54 |                 rope = Rope::from_str(&text);
 55 |             }
 56 |         })
 57 |     });
 58 | 
 59 |     group.bench_function("middle", |bench| {
 60 |         let text = mul_string_length(TEXT, LEN_MUL_SMALL);
 61 |         let mut rope = Rope::from_str(&text);
 62 | 
 63 |         bench.iter(|| {
 64 |             let len = rope.len_chars();
 65 |             let start = len / 2;
 66 |             let end = (start + 1).min(len);
 67 |             rope.remove(start..end);
 68 | 
 69 |             if rope.len_bytes() == TEXT.len() / 2 {
 70 |                 rope = Rope::from_str(&text);
 71 |             }
 72 |         })
 73 |     });
 74 | 
 75 |     group.bench_function("end", |bench| {
 76 |         let text = mul_string_length(TEXT, LEN_MUL_SMALL);
 77 |         let mut rope = Rope::from_str(&text);
 78 | 
 79 |         bench.iter(|| {
 80 |             let len = rope.len_chars();
 81 |             let end = len;
 82 |             let start = end - (1).min(len);
 83 |             rope.remove(start..end);
 84 | 
 85 |             if rope.len_bytes() == TEXT.len() / 2 {
 86 |                 rope = Rope::from_str(&text);
 87 |             }
 88 |         })
 89 |     });
 90 | }
 91 | 
 92 | const LEN_MUL_MEDIUM: usize = 1;
 93 | 
 94 | fn remove_medium(c: &mut Criterion) {
 95 |     let mut group = c.benchmark_group("remove_medium");
 96 | 
 97 |     group.bench_function("random", |bench| {
 98 |         let text = mul_string_length(TEXT, LEN_MUL_MEDIUM);
 99 |         let mut rope = Rope::from_str(&text);
100 | 
101 |         bench.iter(|| {
102 |             let len = rope.len_chars();
103 |             let start = random::<usize>() % (len + 1);
104 |             let end = (start + 15).min(len);
105 |             rope.remove(start..end);
106 | 
107 |             if rope.len_bytes() == TEXT.len() / 2 {
108 |                 rope = Rope::from_str(&text);
109 |             }
110 |         })
111 |     });
112 | 
113 |     group.bench_function("start", |bench| {
114 |         let text = mul_string_length(TEXT, LEN_MUL_MEDIUM);
115 |         let mut rope = Rope::from_str(&text);
116 | 
117 |         bench.iter(|| {
118 |             let len = rope.len_chars();
119 |             let start = 0;
120 |             let end = (start + 15).min(len);
121 |             rope.remove(start..end);
122 | 
123 |             if rope.len_bytes() == TEXT.len() / 2 {
124 |                 rope = Rope::from_str(&text);
125 |             }
126 |         })
127 |     });
128 | 
129 |     group.bench_function("middle", |bench| {
130 |         let text = mul_string_length(TEXT, LEN_MUL_MEDIUM);
131 |         let mut rope = Rope::from_str(&text);
132 | 
133 |         bench.iter(|| {
134 |             let len = rope.len_chars();
135 |             let start = len / 2;
136 |             let end = (start + 15).min(len);
137 |             rope.remove(start..end);
138 | 
139 |             if rope.len_bytes() == TEXT.len() / 2 {
140 |                 rope = Rope::from_str(&text);
141 |             }
142 |         })
143 |     });
144 | 
145 |     group.bench_function("end", |bench| {
146 |         let text = mul_string_length(TEXT, LEN_MUL_MEDIUM);
147 |         let mut rope = Rope::from_str(&text);
148 | 
149 |         bench.iter(|| {
150 |             let len = rope.len_chars();
151 |             let end = len;
152 |             let start = end - (15).min(len);
153 |             rope.remove(start..end);
154 | 
155 |             if rope.len_bytes() == TEXT.len() / 2 {
156 |                 rope = Rope::from_str(&text);
157 |             }
158 |         })
159 |     });
160 | }
161 | 
162 | const LEN_MUL_LARGE: usize = 4;
163 | 
164 | fn remove_large(c: &mut Criterion) {
165 |     let mut group = c.benchmark_group("remove_large");
166 | 
167 |     group.bench_function("random", |bench| {
168 |         let text = mul_string_length(TEXT, LEN_MUL_LARGE);
169 |         let mut rope = Rope::from_str(&text);
170 | 
171 |         bench.iter(|| {
172 |             let len = rope.len_chars();
173 |             let start = random::<usize>() % (len + 1);
174 |             let end = (start + TEXT_SMALL.len()).min(len);
175 |             rope.remove(start..end);
176 | 
177 |             if rope.len_bytes() == 0 {
178 |                 rope = Rope::from_str(&text);
179 |             }
180 |         })
181 |     });
182 | 
183 |     group.bench_function("start", |bench| {
184 |         let text = mul_string_length(TEXT, LEN_MUL_LARGE);
185 |         let mut rope = Rope::from_str(&text);
186 | 
187 |         bench.iter(|| {
188 |             let len = rope.len_chars();
189 |             let start = 0;
190 |             let end = (start + TEXT_SMALL.len()).min(len);
191 |             rope.remove(start..end);
192 | 
193 |             if rope.len_bytes() == 0 {
194 |                 rope = Rope::from_str(&text);
195 |             }
196 |         })
197 |     });
198 | 
199 |     group.bench_function("middle", |bench| {
200 |         let text = mul_string_length(TEXT, LEN_MUL_LARGE);
201 |         let mut rope = Rope::from_str(&text);
202 | 
203 |         bench.iter(|| {
204 |             let len = rope.len_chars();
205 |             let start = len / 2;
206 |             let end = (start + TEXT_SMALL.len()).min(len);
207 |             rope.remove(start..end);
208 | 
209 |             if rope.len_bytes() == 0 {
210 |                 rope = Rope::from_str(&text);
211 |             }
212 |         })
213 |     });
214 | 
215 |     group.bench_function("end", |bench| {
216 |         let text = mul_string_length(TEXT, LEN_MUL_LARGE);
217 |         let mut rope = Rope::from_str(&text);
218 | 
219 |         bench.iter(|| {
220 |             let len = rope.len_chars();
221 |             let end = len;
222 |             let start = end - TEXT_SMALL.len().min(len);
223 |             rope.remove(start..end);
224 | 
225 |             if rope.len_bytes() == 0 {
226 |                 rope = Rope::from_str(&text);
227 |             }
228 |         })
229 |     });
230 | }
231 | 
232 | fn remove_initial_after_clone(c: &mut Criterion) {
233 |     c.bench_function("remove_initial_after_clone", |bench| {
234 |         let rope = Rope::from_str(TEXT);
235 |         let mut rope_clone = rope.clone();
236 |         let mut i = 0;
237 |         bench.iter(|| {
238 |             if i > 32 {
239 |                 i = 0;
240 |                 rope_clone = rope.clone();
241 |             }
242 |             let len = rope_clone.len_chars();
243 |             let start = random::<usize>() % (len + 1);
244 |             let end = (start + 1).min(len);
245 |             rope_clone.remove(start..end);
246 |             i += 1;
247 |         })
248 |     });
249 | }
250 | 
251 | //----
252 | 
253 | criterion_group!(
254 |     benches,
255 |     remove_small,
256 |     remove_medium,
257 |     remove_large,
258 |     remove_initial_after_clone
259 | );
260 | criterion_main!(benches);
261 | 


--------------------------------------------------------------------------------
/benches/small.txt:
--------------------------------------------------------------------------------
 1 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas sit amet tellus 
 2 | nec turpis feugiat semper. Nam at nulla laoreet, finibus eros sit amet, fringilla 
 3 | mauris. Fusce vestibulum nec ligula efficitur laoreet. Nunc orci leo, varius eget 
 4 | ligula vulputate, consequat eleifend nisi. Cras justo purus, imperdiet a augue 
 5 | malesuada, convallis cursus libero. Fusce pretium arcu in elementum laoreet. Duis 
 6 | mauris nulla, suscipit at est nec, malesuada pellentesque eros. Quisque semper porta 
 7 | malesuada. Nunc hendrerit est ac faucibus mollis. Nam fermentum id libero sed 
 8 | egestas. Duis a accumsan sapien. Nam neque diam, congue non erat et, porta sagittis 
 9 | turpis. Vivamus vitae mauris sit amet massa mollis molestie. Morbi scelerisque, 
10 | augue id congue imperdiet, felis lacus euismod dui, vitae facilisis massa dui quis 
11 | sapien. Vivamus hendrerit a urna a lobortis.
12 | 
13 | Donec ut suscipit risus. Vivamus dictum auctor vehicula. Sed lacinia ligula sit amet 
14 | urna tristique commodo. Sed sapien risus, egestas ac tempus vel, pellentesque sed 
15 | velit. Duis pulvinar blandit suscipit. Curabitur viverra dignissim est quis ornare. 
16 | Nam et lectus purus. Integer sed augue vehicula, volutpat est vel, convallis justo. 
17 | Suspendisse a convallis nibh, pulvinar rutrum nisi. Fusce ultrices accumsan mauris 
18 | vitae ornare. Cras elementum et ante at tincidunt. Sed luctus scelerisque lobortis. 
19 | Sed vel dictum enim. Fusce quis arcu euismod, iaculis mi id, placerat nulla. 
20 | Pellentesque porttitor felis elementum justo porttitor auctor.
21 | 


--------------------------------------------------------------------------------
/benches/tiny.txt:
--------------------------------------------------------------------------------
1 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas sit amet tellus 
2 | nec turpis feugiat semper. Nam at nulla laoreet, finibus eros sit amet, fringilla 
3 | mauris. Fusce vestibulum nec ligula efficitur laoreet. Nunc orci leo, varius eget 
4 | ligula vulputate, consequat eleifend nisi. Cras justo purus, imperdiet a augue 
5 | malesuada, convallis cursus libero. Fusce pretium arcu in elementum laoreet. Duis 
6 | mauris nulla, suscipit at est nec, malesuada pellentesque eros. Quisque semper porta 
7 | malesuada. Nunc hendrerit est ac faucibus mollis. Nam fermentum id libero sed 
8 | egestas. Duis a accumsan sapien. Nam neque diam, congue non erat et, porta sagittis 
9 | turpis.


--------------------------------------------------------------------------------
/design/design.md:
--------------------------------------------------------------------------------
  1 | # Ropey's Design
  2 | 
  3 | This document is an overview of Ropey's technical design.  It is primarily targeted at potential contributors, to help get up-to-speed with the codebase.  But it may also be of interest to users of the library and the generally-curious.
  4 | 
  5 | This document assumes prior knowledge of [ropes](https://en.wikipedia.org/wiki/Rope_(data_structure)) and [b-trees](https://en.wikipedia.org/wiki/B-tree).
  6 | 
  7 | 
  8 | ## Directory Structure
  9 | 
 10 | Public-facing:
 11 | 
 12 | - `src/rope.rs`: the high-level implementation of `Rope`.
 13 | - `src/slice.rs`: implementation of `RopeSlice`.
 14 | - `src/iter.rs`: implementations of all the iterators.
 15 | - `src/rope_builder.rs`: implementation of `RopeBuilder`.
 16 | - `src/str_utils.rs`: utility functions that operate on `&str` slices.  For example, functions to count chars and line endings.
 17 | 
 18 | Internal-only:
 19 | - `src/tree/`: the low-level implementation of `Rope`'s internals, where most of the meat of the b-tree rope is.
 20 | 
 21 | 
 22 | ## B-tree Rope
 23 | 
 24 | The core data structure in Ropey is a b-tree rope.  This data-structure was chosen for several reasons:
 25 | 
 26 | - It has good random-access editing performance.  [Gap buffers](https://en.wikipedia.org/wiki/Gap_buffer) are another popular choice of data structure for text editing, but they perform poorly on random-access edits.  Multiple-cursor support is common in code editors, so being able to efficiently edit at many locations simultaneously is important.
 27 | - It can naturally track `char` indices and line endings.  Most other data structures require additional external data structures to track such information.
 28 | - B-trees minimize pointer indirection and make traversal patterns more coherent when properly implemented. This is important for performing well with modern memory architectures and large data sets.
 29 | 
 30 | Ropey's b-tree nodes are primarily implemented in three files:
 31 | 
 32 | - `src/tree/node_text.rs`: implementation of `NodeText`, a small string for storing text in leaf nodes.
 33 | - `src/tree/node_children.rs`: implementation of `NodeChildren`, a fixed-capacity vec for storing child meta-data and child pointers in internal nodes.
 34 | - `src/tree/node.rs`: the main `Node` implementation, which is just an `enum` with the above two types as its leaf node and internal node variants.
 35 | 
 36 | 
 37 | ## Memory Layout
 38 | 
 39 | The structures in Ropey's rope implementation have been carefully designed to:
 40 | 
 41 | 1. Minimize pointer chasing.
 42 | 2. Make it easy for memory allocators to compactly store nodes.
 43 | 3. Make it easy for memory allocators to compactly place new nodes in the space vacated by old freed nodes.
 44 | 
 45 | To motivate the design a bit, let's explore what a naive implementation of a b-tree rope might look like.  And for simplicity, let's only track bytes in the rope, not chars or line endings.  It could be implemented as a single `enum`, like this:
 46 | 
 47 | ``` Rust
 48 | enum Node {
 49 |     Internal {
 50 |         byte_count: usize,
 51 |         Vec<Node>,
 52 |     },
 53 |     Leaf(String),
 54 | }
 55 | ```
 56 | 
 57 | And in memory, it looks something like this (touching boxes represent contiguous areas of memory, arrows represent pointers):
 58 | 
 59 | ![Diagram of naive rope](images/naive.png)
 60 | 
 61 | There are a couple of things about this that aren't great:
 62 | 
 63 | - Leaf nodes have an extra level of indirection to get to their text data.
 64 | - Although the children of a node are all stored contiguously in memory (good for memory locality), the size of that contiguous space varies because of the varying number of children.  The memory allocated for leaf text also varies in size.  This can lead to memory fragmentation when holes in memory are left that are too small to use for subsequent sets of children or leaf text.
 65 | 
 66 | Having said that, this is actually a pretty decent design!  The fragmentation is (fingers crossed) unlikely to be a major issue with a decent allocator.  And the extra level of indirection only happens at the very bottom of the tree, so you won't accumulate additional unnecessary indirection as the tree grows.
 67 | 
 68 | But one of Ropey's design goals is to share data between Rope clones to make cloning cheap.  This means that every child needs to be kept under something like a reference-counted smart pointer so it can be shared between trees.  What happens if we do that?
 69 | 
 70 | ``` Rust
 71 | enum Node {
 72 |     Internal {
 73 |         byte_count: usize,
 74 |         Vec<Rc<Node>>,
 75 |     },
 76 |     Leaf(String),
 77 | }
 78 | ```
 79 | 
 80 | ![Diagram of bad rope](images/bad.png)
 81 | 
 82 | Suddenly things are a lot worse off:
 83 | 
 84 | - We have an extra level of memory indirection _at every level of the tree_, and _two_ levels of indirection at the leaf nodes.
 85 | - We no longer have good memory locality: children are all over the place along with their meta-data.
 86 | 
 87 | This is bad for both performance and compact memory usage.  Even just choosing which child to traverse into requires jumping all over the place in memory, because you have to check the children's metadata:
 88 | 
 89 | ![Diagram of bad jumping](images/bad_jumping.png)
 90 | 
 91 | Ropey addresses this in two ways.  First, it stores child meta-data in the _parent_ node in a coherent array.  This allows fast scanning to decide which child to traverse into.  And second, it inlines the leaf's string data into the enum.  A simplified (and not-quite-actually-workable) version of that approach looks like this:
 92 | 
 93 | ``` Rust
 94 | enum Node {
 95 |     Internal {
 96 |         child_count: u8,
 97 |         child_byte_counts: [usize; 4],  // Max four children
 98 |         children: [Rc<Node>; 4],  // Max four children
 99 |     },
100 |     Leaf {
101 |         len: usize,
102 |         [u8; 100],  // Max 100 bytes of text per leaf
103 |     },
104 | }
105 | ```
106 | 
107 | ![Diagram of good rope](images/good.png)
108 | 
109 | Now we've eliminated all unnecessary indirection and we've kept all metadata coherent in memory for fast scanning.  And (bonus!) all nodes are exactly the same size, including leaf text data.  This makes things really easy for the memory allocator.
110 | 
111 | This is essentially Ropey's design, but implemented with a bit more sophistication.  The main differences from Ropey's actual implementation are:
112 | 
113 | - Ropey tracks char count and line-ending count in addition to byte count.
114 | - Ropey wraps the details of the internal and leaf variants in separate types that both have a bit more sophistication.
115 | - Ropey uses `Arc` instead of `Rc`, so clones can be sent between threads.
116 | 
117 | But by-and-large, Ropey's memory layout is essentially identical to the code snippet above.
118 | 
119 | One final piece of the puzzle: since the inlined leaf text and the child pointers/metadata are both crammed into the same enum, they should both be sized to take up roughly the same amount of space to minimize unused bytes.  Moreover, allocators work best with sizes in the multiples of large-ish powers of two.  That's what all the weird calculations for the `MAX_*` constants in `src/tree/mod.rs` are doing.
120 | 
121 | Phew!  Hopefully that all made sense.
122 | 
123 | 
124 | ## Traversal and Mutation
125 | 
126 | Most of the logic for traversing and modifying the tree is implemented in `node.rs` as part of `Node`.  As a general rule, code that needs to know which node is root is implemented in `Rope`, code that otherwise traverses or modifies the tree is implemented in `Node`, and code that only deals with a single node is implemented in `NodeText` and `NodeChildren`.
127 | 
128 | The five main functions to really pay attention to are part of `Node`:
129 | 
130 | - `Node::edit_chunk_at_char()`
131 | - `Node::remove_char_range()`
132 | - `Node::split()`
133 | - `Node::prepend_at_depth()`
134 | - `Node::append_at_depth()`.
135 | 
136 | These are by far the most complex code in Ropey, and are the core editing operations which the `Rope` type uses to implement its own editing operations.  Be very careful when modifying them and their helper methods, as there are many invariants that must be held for everything to work properly.  Ropey has a lot of unit tests, so running `cargo test` is a useful way to help minimize the chances that you break something, but don't depend on that entirely.
137 | 
138 | 
139 | ## Tree Invariants
140 | 
141 | The following invariants must hold true for the tree to operate correctly:
142 | 
143 | - The standard b-tree invariants:
144 |     - All leaf nodes must be at the same depth.
145 |     - Internal nodes must have at least `MIN_CHILDREN` and at most `MAX_CHILDREN`, except for the root which can have as few as two children.
146 | - All child meta-data must be accurate.
147 | - Leaf nodes must never be empty, except for the root node when it is a leaf.
148 | - CRLF pairs must never be split by a leaf node boundary.  Otherwise the code for counting line endings won't work properly.
149 | 
150 | There are some hidden-from-documentation methods on `Rope` that check for and assert these invariants:
151 | 
152 | - `Rope:assert_integrity()`: checks for basic child meta-data integrity.  This is _the most important_ check, as things will break in crazy ways if this isn't true.  If you get really strange behavior from the tree, this is the first thing to check.
153 | - `Rope::assert_invariants()`: checks that the rest of the invariants listed above hold true.  If you get panics or weird performance degradation, this is the second thing to check.
154 | 
155 | (Note: these methods are for debugging while working on Ropey, and are expliclty _not_ part of Ropey's external API promise.  They are doc-hidden for a reason.)
156 | 
157 | There is one final "invariant" that should _generally_ hold true, but doesn't strictly need to for correct operation and _may_ be violated under some circumstances:
158 | 
159 | - Leaf nodes should _generally_ not contain less than `MIN_BYTES` of text or more than `MAX_BYTES` of text.
160 | 
161 | There are two cases where this invariant might not hold true:
162 | 
163 | 1. When the root node is a leaf, it may contain less than `MIN_BYTES` of text.
164 | 2. If the text begins or ends with a CRLF pair that just pushed it over the edge in specific circumstances, then the leaf node that contains it must exceed `MAX_BYTES` to avoid splitting it.  This should be extremely rare.
165 | 
166 | In practice, case 2 is vanishingly unlikely to ever happen in real (and non-broken) text.  Nevertheless, it needs to be handled correctly by all code.
167 | 
168 | 
169 | ## Rope Clones and Thread Safety
170 | 
171 | Ropey shares data between `Rope` clones.  The approach taken for this is very simple: all nodes are wrapped in an `Arc`, and when we need to modify a node we always use `Arc::make_mut()` to access it for mutation.  `Arc::make_mut()` will provide mutable access to the node if there is only one owner, but will make a clone of the node and have you mutate that instead if ownership is shared.  This automatically leads to a thread-safe shared-data model, where nodes have copy-on-write semantics but do the more efficient in-place mutation when their ownership is not shared.
172 | 
173 | The main implication of this when working on Ropey's codebase is pretty simple: make sure to always use `Arc::make_mut()` when accessing nodes for mutation, never `Arc::get_mut()`.
174 | 
175 | 
176 | ## Unsafe Code
177 | 
178 | Ropey uses a fair bit of unsafe code.  The vast majority of it is in the implementations of `NodeChildren`, `NodeText`, and a handful of hot functions in `str_utils.rs`.
179 | 
180 | Reducing and/or better isolating the unsafe code in Ropey would be great, and pull requests along those lines are more than welcome.  However, it shouldn't be at the expense of performance or memory overhead--at least, not significantly so.
181 | 
182 | Reviews, fuzz testing, etc. of the existing unsafe code are also extremely welcome.  The more eyes and tests we have on it, the better!
183 | 


--------------------------------------------------------------------------------
/design/images/bad.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cessen/ropey/4df57d6998270cd2b64612bd5635f4eef1e3c67b/design/images/bad.png


--------------------------------------------------------------------------------
/design/images/bad_jumping.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cessen/ropey/4df57d6998270cd2b64612bd5635f4eef1e3c67b/design/images/bad_jumping.png


--------------------------------------------------------------------------------
/design/images/good.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cessen/ropey/4df57d6998270cd2b64612bd5635f4eef1e3c67b/design/images/good.png


--------------------------------------------------------------------------------
/design/images/naive.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cessen/ropey/4df57d6998270cd2b64612bd5635f4eef1e3c67b/design/images/naive.png


--------------------------------------------------------------------------------
/examples/front_page.rs:
--------------------------------------------------------------------------------
 1 | //! This is the example from the front page of Ropey's documentation.
 2 | 
 3 | extern crate ropey;
 4 | 
 5 | use std::io::Result;
 6 | 
 7 | use ropey::Rope;
 8 | use std::fs::File;
 9 | use std::io::{BufReader, BufWriter};
10 | 
11 | fn main() {
12 |     do_stuff().unwrap();
13 | }
14 | 
15 | /// Wrapper function, so we can use ? operator.
16 | fn do_stuff() -> Result<()> {
17 |     // Load a text file.
18 |     let mut text = Rope::from_reader(BufReader::new(File::open("my_great_book.txt")?))?;
19 | 
20 |     // Print the 516th line (zero-indexed) to see the terrible
21 |     // writing.
22 |     println!("{}", text.line(515));
23 | 
24 |     // Get the start/end char indices of the line.
25 |     let start_idx = text.line_to_char(515);
26 |     let end_idx = text.line_to_char(516);
27 | 
28 |     // Remove the line...
29 |     text.remove(start_idx..end_idx);
30 | 
31 |     // ...and replace it with something better.
32 |     text.insert(start_idx, "The flowers are... so... dunno.\n");
33 | 
34 |     // Print the changes, along with the previous few lines for context.
35 |     let start_idx = text.line_to_char(511);
36 |     let end_idx = text.line_to_char(516);
37 |     println!("{}", text.slice(start_idx..end_idx));
38 | 
39 |     // Write the file back out to disk.
40 |     text.write_to(BufWriter::new(File::create("my_great_book.txt")?))?;
41 | 
42 |     Ok(())
43 | }
44 | 


--------------------------------------------------------------------------------
/examples/graphemes_iter.rs:
--------------------------------------------------------------------------------
  1 | //! This example shows how to implement a grapeheme iterator over the contents
  2 | //! of a `Rope` or `RopeSlice`.  This also serves as a good starting point for
  3 | //! iterators for other kinds of segementation, such as word boundaries.
  4 | 
  5 | #![allow(clippy::redundant_field_names)]
  6 | #![allow(dead_code)]
  7 | 
  8 | extern crate ropey;
  9 | extern crate unicode_segmentation;
 10 | 
 11 | use ropey::{iter::Chunks, RopeSlice};
 12 | use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
 13 | 
 14 | fn main() {}
 15 | 
 16 | /// An implementation of a graphemes iterator, for iterating over
 17 | /// the graphemes of a RopeSlice.
 18 | struct RopeGraphemes<'a> {
 19 |     text: RopeSlice<'a>,
 20 |     chunks: Chunks<'a>,
 21 |     cur_chunk: &'a str,
 22 |     cur_chunk_start: usize,
 23 |     cursor: GraphemeCursor,
 24 | }
 25 | 
 26 | impl<'a> RopeGraphemes<'a> {
 27 |     fn new<'b>(slice: &RopeSlice<'b>) -> RopeGraphemes<'b> {
 28 |         let mut chunks = slice.chunks();
 29 |         let first_chunk = chunks.next().unwrap_or("");
 30 |         RopeGraphemes {
 31 |             text: *slice,
 32 |             chunks: chunks,
 33 |             cur_chunk: first_chunk,
 34 |             cur_chunk_start: 0,
 35 |             cursor: GraphemeCursor::new(0, slice.len_bytes(), true),
 36 |         }
 37 |     }
 38 | }
 39 | 
 40 | impl<'a> Iterator for RopeGraphemes<'a> {
 41 |     type Item = RopeSlice<'a>;
 42 | 
 43 |     fn next(&mut self) -> Option<RopeSlice<'a>> {
 44 |         let a = self.cursor.cur_cursor();
 45 |         let b;
 46 |         loop {
 47 |             match self
 48 |                 .cursor
 49 |                 .next_boundary(self.cur_chunk, self.cur_chunk_start)
 50 |             {
 51 |                 Ok(None) => {
 52 |                     return None;
 53 |                 }
 54 |                 Ok(Some(n)) => {
 55 |                     b = n;
 56 |                     break;
 57 |                 }
 58 |                 Err(GraphemeIncomplete::NextChunk) => {
 59 |                     self.cur_chunk_start += self.cur_chunk.len();
 60 |                     self.cur_chunk = self.chunks.next().unwrap_or("");
 61 |                 }
 62 |                 Err(GraphemeIncomplete::PreContext(idx)) => {
 63 |                     let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
 64 |                     self.cursor.provide_context(chunk, byte_idx);
 65 |                 }
 66 |                 _ => unreachable!(),
 67 |             }
 68 |         }
 69 | 
 70 |         if a < self.cur_chunk_start {
 71 |             let a_char = self.text.byte_to_char(a);
 72 |             let b_char = self.text.byte_to_char(b);
 73 | 
 74 |             Some(self.text.slice(a_char..b_char))
 75 |         } else {
 76 |             let a2 = a - self.cur_chunk_start;
 77 |             let b2 = b - self.cur_chunk_start;
 78 |             Some((&self.cur_chunk[a2..b2]).into())
 79 |         }
 80 |     }
 81 | }
 82 | 
 83 | #[cfg(test)]
 84 | #[rustfmt::skip] // Because of the crazy long graphemes
 85 | mod tests {
 86 |     use super::*;
 87 |     use ropey::Rope;
 88 | 
 89 |     #[test]
 90 |     fn iter_huge_graphemes() {
 91 |         let r = Rope::from_str("Hẽ̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃llõ̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃ wõ̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃rld!");
 92 |         let mut grph = RopeGraphemes::new(&r.slice(..));
 93 | 
 94 |         assert_eq!(grph.next().unwrap(), "H");
 95 |         assert_eq!(grph.next().unwrap(), "ẽ̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃");
 96 |         assert_eq!(grph.next().unwrap(), "l");
 97 |         assert_eq!(grph.next().unwrap(), "l");
 98 |         assert_eq!(grph.next().unwrap(), "õ̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃");
 99 |         assert_eq!(grph.next().unwrap(), " ");
100 |         assert_eq!(grph.next().unwrap(), "w");
101 |         assert_eq!(grph.next().unwrap(), "õ̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃̃");
102 |         assert_eq!(grph.next().unwrap(), "r");
103 |         assert_eq!(grph.next().unwrap(), "l");
104 |         assert_eq!(grph.next().unwrap(), "d");
105 |         assert_eq!(grph.next().unwrap(), "!");
106 |         assert_eq!(grph.next(), None);
107 |     }
108 | 
109 |     #[test]
110 |     fn iter_regional_symbols() {
111 |         let r = Rope::from_str("🇬🇧🇯🇵🇺🇸🇫🇷🇷🇺🇨🇳🇩🇪🇪🇸🇬🇧🇯🇵🇺🇸🇫🇷🇷🇺🇨🇳🇩🇪🇪🇸🇬🇧🇯🇵🇺🇸🇫🇷🇷🇺🇨🇳🇩🇪🇪🇸");
112 |         let mut grph = RopeGraphemes::new(&r.slice(..));
113 | 
114 |         assert_eq!(grph.next().unwrap(), "🇬🇧");
115 |         assert_eq!(grph.next().unwrap(), "🇯🇵");
116 |         assert_eq!(grph.next().unwrap(), "🇺🇸");
117 |         assert_eq!(grph.next().unwrap(), "🇫🇷");
118 |         assert_eq!(grph.next().unwrap(), "🇷🇺");
119 |         assert_eq!(grph.next().unwrap(), "🇨🇳");
120 |         assert_eq!(grph.next().unwrap(), "🇩🇪");
121 |         assert_eq!(grph.next().unwrap(), "🇪🇸");
122 |         assert_eq!(grph.next().unwrap(), "🇬🇧");
123 |         assert_eq!(grph.next().unwrap(), "🇯🇵");
124 |         assert_eq!(grph.next().unwrap(), "🇺🇸");
125 |         assert_eq!(grph.next().unwrap(), "🇫🇷");
126 |         assert_eq!(grph.next().unwrap(), "🇷🇺");
127 |         assert_eq!(grph.next().unwrap(), "🇨🇳");
128 |         assert_eq!(grph.next().unwrap(), "🇩🇪");
129 |         assert_eq!(grph.next().unwrap(), "🇪🇸");
130 |         assert_eq!(grph.next().unwrap(), "🇬🇧");
131 |         assert_eq!(grph.next().unwrap(), "🇯🇵");
132 |         assert_eq!(grph.next().unwrap(), "🇺🇸");
133 |         assert_eq!(grph.next().unwrap(), "🇫🇷");
134 |         assert_eq!(grph.next().unwrap(), "🇷🇺");
135 |         assert_eq!(grph.next().unwrap(), "🇨🇳");
136 |         assert_eq!(grph.next().unwrap(), "🇩🇪");
137 |         assert_eq!(grph.next().unwrap(), "🇪🇸");
138 |         assert_eq!(grph.next(), None);
139 |     }
140 | }
141 | 


--------------------------------------------------------------------------------
/examples/read_latin_1.rs:
--------------------------------------------------------------------------------
 1 | //! Example of decoding from another text encoding on-the-fly while reading.
 2 | //! In this case, we're decoding from ISO/IEC 8859-1, which conveniently
 3 | //! happens to map 1-to-1 to the first 256 unicode scalar values.
 4 | 
 5 | extern crate ropey;
 6 | 
 7 | use std::fs::File;
 8 | use std::io;
 9 | use std::io::Read;
10 | 
11 | use ropey::RopeBuilder;
12 | 
13 | fn main() {
14 |     // Get filepath from commandline
15 |     let filepath = if std::env::args().count() > 1 {
16 |         std::env::args().nth(1).unwrap()
17 |     } else {
18 |         eprintln!(
19 |             "You must pass a filepath!  Only recieved {} arguments.",
20 |             std::env::args().count()
21 |         );
22 |         panic!()
23 |     };
24 | 
25 |     // Get everything set up to begin reading and decoding.
26 |     let mut buf = vec![0u8; 1 << 14]; // Buffer for raw bytes.
27 |     let mut buf_str = String::with_capacity(1 << 14); // Buffer for decoded utf8.
28 |     let mut builder = RopeBuilder::new();
29 |     let mut file = io::BufReader::new(File::open(&filepath).unwrap());
30 | 
31 |     // Read the data in chunks, decoding and appending to the rope builder
32 |     // as we go.
33 |     // (Note: in real code you should handle errors from the reader!)
34 |     while let Ok(n) = file.read(&mut buf) {
35 |         if n == 0 {
36 |             break;
37 |         }
38 | 
39 |         // Decode and append the chunk to the rope builder.
40 |         buf_str.clear();
41 |         for &byte in &buf[..n] {
42 |             buf_str.push(byte as char);
43 |         }
44 |         builder.append(&buf_str);
45 |     }
46 | 
47 |     // Build rope.
48 |     let _rope = builder.finish();
49 | }
50 | 


--------------------------------------------------------------------------------
/examples/search_and_replace.rs:
--------------------------------------------------------------------------------
  1 | //! Example of basic search-and-replace functionality implemented on top
  2 | //! of Ropey.
  3 | //!
  4 | //! Usage:
  5 | //!     search_and_replace <search_pattern> <replacement_text> <input_filepath>
  6 | //!
  7 | //! The file contents with the search-and-replace performed on it is sent to
  8 | //! stdout.
  9 | 
 10 | #![allow(clippy::redundant_field_names)]
 11 | 
 12 | extern crate ropey;
 13 | 
 14 | use std::fs::File;
 15 | use std::io;
 16 | 
 17 | use ropey::{iter::Chars, Rope, RopeSlice};
 18 | 
 19 | fn main() {
 20 |     // Get arguments from commandline
 21 |     let (search_pattern, replacement_text, filepath) = if std::env::args().count() > 3 {
 22 |         (
 23 |             std::env::args().nth(1).unwrap(),
 24 |             std::env::args().nth(2).unwrap(),
 25 |             std::env::args().nth(3).unwrap(),
 26 |         )
 27 |     } else {
 28 |         eprintln!(
 29 |             "Usage:\n    search_and_replace <search_pattern> <replacement_text> <input_filepath>"
 30 |         );
 31 |         return;
 32 |     };
 33 | 
 34 |     // Load file contents into a rope.
 35 |     let mut text = Rope::from_reader(io::BufReader::new(File::open(&filepath).unwrap())).expect("Cannot read file: either it doesn't exist, file permissions don't allow reading, or is not utf8 text.");
 36 | 
 37 |     // Do the search-and-replace.
 38 |     search_and_replace(&mut text, &search_pattern, &replacement_text);
 39 | 
 40 |     // Print the new text to stdout.
 41 |     println!("{}", text);
 42 | }
 43 | 
 44 | /// Searches the rope for `search_pattern` and replaces all matches with
 45 | /// `replacement_text`.
 46 | ///
 47 | /// There are several ways this could be done:  
 48 | ///
 49 | /// 1. Clone the rope and then do the search on the original while replacing
 50 | ///    on the clone.  This isn't as awful as it sounds because the clone
 51 | ///    operation is constant-time and the two ropes will share most of their
 52 | ///    storage in typical cases.  However, this probably isn't the best
 53 | ///    general solution because it will use a lot of additional space if a
 54 | ///    large percentage of the text is being replaced.
 55 | ///
 56 | /// 2. A two-stage approach: first find and collect all the matches, then
 57 | ///    do the replacements on the original rope.  This is a good solution
 58 | ///    when a relatively small number of matches are expected.  However, if
 59 | ///    there are a large number of matches then the space to store the
 60 | ///    matches themselves can become large.
 61 | ///
 62 | /// 3. A piece-meal approach: search for the first match, replace it, then
 63 | ///    restart the search from there, repeat.  This is a good solution for
 64 | ///    memory-constrained situations.  However, computationally it is likely
 65 | ///    the most expensive when there are a large number of matches and there
 66 | ///    are costs associated with repeatedly restarting the search.
 67 | ///
 68 | /// 4. Combine approaches #2 and #3: collect a fixed number of matches and
 69 | ///    replace them, then collect another batch of matches and replace them,
 70 | ///    and so on.  This is probably the best general solution, because it
 71 | ///    combines the best of both #2 and #3: it allows you to collect the
 72 | ///    matches in a bounded amount of space, and any costs associated with
 73 | ///    restarting the search are amortized over multiple matches.
 74 | ///
 75 | /// In this implementation we take approach #4 because it seems the
 76 | /// all-around best.
 77 | fn search_and_replace(rope: &mut Rope, search_pattern: &str, replacement_text: &str) {
 78 |     const BATCH_SIZE: usize = 256;
 79 |     let replacement_text_len = replacement_text.chars().count();
 80 | 
 81 |     let mut head = 0; // Keep track of where we are between searches
 82 |     let mut matches = Vec::with_capacity(BATCH_SIZE);
 83 |     loop {
 84 |         // Collect the next batch of matches.  Note that we don't use
 85 |         // `Iterator::collect()` to collect the batch because we want to
 86 |         // re-use the same Vec to avoid unnecessary allocations.
 87 |         matches.clear();
 88 |         for m in SearchIter::from_rope_slice(&rope.slice(head..), search_pattern).take(BATCH_SIZE) {
 89 |             matches.push(m);
 90 |         }
 91 | 
 92 |         // If there are no matches, we're done!
 93 |         if matches.is_empty() {
 94 |             break;
 95 |         }
 96 | 
 97 |         // Replace the collected matches.
 98 |         let mut index_diff: isize = 0;
 99 |         for &(start, end) in matches.iter() {
100 |             // Get the properly offset indices.
101 |             let start_d = (head as isize + start as isize + index_diff) as usize;
102 |             let end_d = (head as isize + end as isize + index_diff) as usize;
103 | 
104 |             // Do the replacement.
105 |             rope.remove(start_d..end_d);
106 |             rope.insert(start_d, replacement_text);
107 | 
108 |             // Update the index offset.
109 |             let match_len = (end - start) as isize;
110 |             index_diff = index_diff - match_len + replacement_text_len as isize;
111 |         }
112 | 
113 |         // Update head for next iteration.
114 |         head = (head as isize + index_diff + matches.last().unwrap().1 as isize) as usize;
115 |     }
116 | }
117 | 
118 | /// An iterator over simple textual matches in a RopeSlice.
119 | ///
120 | /// This implementation is somewhat naive, and could be sped up by using a
121 | /// more sophisticated text searching algorithm such as Boyer-Moore or
122 | /// Knuth-Morris-Pratt.
123 | ///
124 | /// The important thing, however, is the interface.  For example, a regex
125 | /// implementation providing an equivalent interface could easily be dropped
126 | /// in, and the search-and-replace function above would work with it quite
127 | /// happily.
128 | struct SearchIter<'a> {
129 |     char_iter: Chars<'a>,
130 |     search_pattern: &'a str,
131 |     search_pattern_char_len: usize,
132 |     cur_index: usize, // The current char index of the search head.
133 |     possible_matches: Vec<std::str::Chars<'a>>, // Tracks where we are in the search pattern for the current possible matches.
134 | }
135 | 
136 | impl<'a> SearchIter<'a> {
137 |     fn from_rope_slice<'b>(slice: &'b RopeSlice, search_pattern: &'b str) -> SearchIter<'b> {
138 |         assert!(
139 |             !search_pattern.is_empty(),
140 |             "Can't search using an empty search pattern."
141 |         );
142 |         SearchIter {
143 |             char_iter: slice.chars(),
144 |             search_pattern: search_pattern,
145 |             search_pattern_char_len: search_pattern.chars().count(),
146 |             cur_index: 0,
147 |             possible_matches: Vec::new(),
148 |         }
149 |     }
150 | }
151 | 
152 | impl<'a> Iterator for SearchIter<'a> {
153 |     type Item = (usize, usize);
154 | 
155 |     // Return the start/end char indices of the next match.
156 |     fn next(&mut self) -> Option<(usize, usize)> {
157 |         #[allow(clippy::while_let_on_iterator)]
158 |         while let Some(next_char) = self.char_iter.next() {
159 |             self.cur_index += 1;
160 | 
161 |             // Push new potential match, for a possible match starting at the
162 |             // current char.
163 |             self.possible_matches.push(self.search_pattern.chars());
164 | 
165 |             // Check the rope's char against the next character in each of
166 |             // the potential matches, removing the potential matches that
167 |             // don't match.  We're using indexing instead of iteration here
168 |             // so that we can remove the possible matches as we go.
169 |             let mut i = 0;
170 |             while i < self.possible_matches.len() {
171 |                 let pattern_char = self.possible_matches[i].next().unwrap();
172 |                 if next_char == pattern_char {
173 |                     if self.possible_matches[i].clone().next() == None {
174 |                         // We have a match!  Reset possible matches and
175 |                         // return the successful match's char indices.
176 |                         let char_match_range = (
177 |                             self.cur_index - self.search_pattern_char_len,
178 |                             self.cur_index,
179 |                         );
180 |                         self.possible_matches.clear();
181 |                         return Some(char_match_range);
182 |                     } else {
183 |                         // Match isn't complete yet, move on to the next.
184 |                         i += 1;
185 |                     }
186 |                 } else {
187 |                     // Doesn't match, remove it.
188 |                     let _ = self.possible_matches.swap_remove(i);
189 |                 }
190 |             }
191 |         }
192 | 
193 |         None
194 |     }
195 | }
196 | 


--------------------------------------------------------------------------------
/examples/simple_buffer.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::redundant_field_names)]
 2 | #![allow(dead_code)]
 3 | 
 4 | extern crate ropey;
 5 | 
 6 | use std::fs::File;
 7 | use std::io;
 8 | 
 9 | use ropey::iter::{Bytes, Chars, Chunks, Lines};
10 | use ropey::{Rope, RopeSlice};
11 | 
12 | struct TextBuffer {
13 |     text: Rope,
14 |     path: String,
15 |     dirty: bool,
16 | }
17 | 
18 | impl TextBuffer {
19 |     fn from_path(path: &str) -> io::Result<TextBuffer> {
20 |         let text = Rope::from_reader(&mut io::BufReader::new(File::open(&path)?))?;
21 |         Ok(TextBuffer {
22 |             text: text,
23 |             path: path.to_string(),
24 |             dirty: false,
25 |         })
26 |     }
27 | 
28 |     fn get_line(&self, idx: usize) -> RopeSlice {
29 |         self.text.line(idx)
30 |     }
31 | 
32 |     fn bytes(&self) -> Bytes {
33 |         self.text.bytes()
34 |     }
35 | 
36 |     fn chars(&self) -> Chars {
37 |         self.text.chars()
38 |     }
39 | 
40 |     fn lines(&self) -> Lines {
41 |         self.text.lines()
42 |     }
43 | 
44 |     fn chunks(&self) -> Chunks {
45 |         self.text.chunks()
46 |     }
47 | 
48 |     fn edit(&mut self, start: usize, end: usize, text: &str) {
49 |         if start != end {
50 |             self.text.remove(start..end);
51 |         }
52 |         if !text.is_empty() {
53 |             self.text.insert(start, text);
54 |         }
55 |         self.dirty = true;
56 |     }
57 | }
58 | 
59 | fn main() {
60 |     // Get filepath from commandline
61 |     let filepath = if std::env::args().count() > 1 {
62 |         std::env::args().nth(1).unwrap()
63 |     } else {
64 |         println!(
65 |             "You must pass a filepath!  Only recieved {} arguments.",
66 |             std::env::args().count()
67 |         );
68 |         panic!()
69 |     };
70 | 
71 |     let mut buf = TextBuffer::from_path(&filepath).unwrap();
72 | 
73 |     buf.edit(3, 5, "Hello!");
74 |     println!("{}", buf.get_line(2));
75 | }
76 | 


--------------------------------------------------------------------------------
/fuzz/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | target
3 | corpus
4 | artifacts
5 | 


--------------------------------------------------------------------------------
/fuzz/Cargo.toml:
--------------------------------------------------------------------------------
 1 | 
 2 | [package]
 3 | name = "ropey-fuzz"
 4 | version = "0.0.0"
 5 | authors = ["Automatically generated"]
 6 | publish = false
 7 | edition = "2018"
 8 | 
 9 | [package.metadata]
10 | cargo-fuzz = true
11 | 
12 | [dependencies]
13 | libfuzzer-sys = { version = "0.4", features = ["arbitrary-derive"] }
14 | 
15 | [dependencies.ropey]
16 | path = ".."
17 | 
18 | [features]
19 | small_chunks = ["ropey/small_chunks"]
20 | 
21 | # Prevent this from interfering with workspaces
22 | [workspace]
23 | members = ["."]
24 | 
25 | 
26 | [[bin]]
27 | name = "mutation"
28 | path = "fuzz_targets/mutation.rs"
29 | test = false
30 | doc = false
31 | 
32 | [[bin]]
33 | name = "mutation_small_chunks"
34 | path = "fuzz_targets/mutation_small_chunks.rs"
35 | test = false
36 | doc = false
37 | required-features = ["small_chunks"]


--------------------------------------------------------------------------------
/fuzz/README.md:
--------------------------------------------------------------------------------
1 | The fuzz tests in this directory can be run via cargo fuzz.
2 | See https://rust-fuzz.github.io/book/ for how to set it up and use it.
3 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/mutation.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | 
 3 | use libfuzzer_sys::{
 4 |     arbitrary::{self, Arbitrary},
 5 |     fuzz_target,
 6 | };
 7 | use ropey::Rope;
 8 | 
 9 | const SMALL_TEXT: &str = include_str!("small.txt");
10 | const MEDIUM_TEXT: &str = include_str!("medium.txt");
11 | 
12 | #[derive(Arbitrary, Copy, Clone, Debug)]
13 | enum Op<'a> {
14 |     Insert(usize, &'a str),
15 |     InsertChar(usize, char),
16 |     Remove(usize, usize),
17 |     SplitOff(usize, bool),
18 |     Append(&'a str),
19 | }
20 | 
21 | #[derive(Arbitrary, Copy, Clone, Debug)]
22 | enum StartingText<'a> {
23 |     Small,
24 |     Medium,
25 |     Custom(&'a str),
26 | }
27 | 
28 | fuzz_target!(|data: (StartingText, Vec<Op>)| {
29 |     let mut r = Rope::from_str(match data.0 {
30 |         StartingText::Small => SMALL_TEXT,
31 |         StartingText::Medium => MEDIUM_TEXT,
32 |         StartingText::Custom(s) => s,
33 |     });
34 | 
35 |     for op in data.1 {
36 |         match op {
37 |             Op::Insert(idx, s) => {
38 |                 let _ = r.try_insert(idx, s);
39 |             }
40 |             Op::InsertChar(idx, c) => {
41 |                 let _ = r.try_insert_char(idx, c);
42 |             }
43 |             Op::Remove(idx_1, idx_2) => {
44 |                 let _ = r.try_remove(idx_1..idx_2);
45 |             }
46 |             Op::SplitOff(idx, keep_right) => match r.try_split_off(idx) {
47 |                 Ok(right) => {
48 |                     if keep_right {
49 |                         r = right;
50 |                     }
51 |                 }
52 |                 Err(_) => {}
53 |             },
54 |             Op::Append(s) => {
55 |                 r.append(Rope::from_str(s));
56 |             }
57 |         }
58 |     }
59 | 
60 |     r.assert_integrity();
61 |     r.assert_invariants();
62 | });
63 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/mutation_small_chunks.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | 
 3 | use libfuzzer_sys::{
 4 |     arbitrary::{self, Arbitrary},
 5 |     fuzz_target,
 6 | };
 7 | use ropey::Rope;
 8 | 
 9 | const SMALL_TEXT: &str = include_str!("small.txt");
10 | 
11 | #[derive(Arbitrary, Copy, Clone, Debug)]
12 | enum Op<'a> {
13 |     Insert(usize, &'a str),
14 |     InsertChar(usize, char),
15 |     Remove(usize, usize),
16 |     SplitOff(usize, bool),
17 |     Append(&'a str),
18 | }
19 | 
20 | #[derive(Arbitrary, Copy, Clone, Debug)]
21 | enum StartingText<'a> {
22 |     Small,
23 |     Custom(&'a str),
24 | }
25 | 
26 | fuzz_target!(|data: (StartingText, Vec<Op>)| {
27 |     let mut r = Rope::from_str(match data.0 {
28 |         StartingText::Small => SMALL_TEXT,
29 |         StartingText::Custom(s) => s,
30 |     });
31 | 
32 |     for op in data.1 {
33 |         match op {
34 |             Op::Insert(idx, s) => {
35 |                 let _ = r.try_insert(idx, s);
36 |             }
37 |             Op::InsertChar(idx, c) => {
38 |                 let _ = r.try_insert_char(idx, c);
39 |             }
40 |             Op::Remove(idx_1, idx_2) => {
41 |                 let _ = r.try_remove(idx_1..idx_2);
42 |             }
43 |             Op::SplitOff(idx, keep_right) => match r.try_split_off(idx) {
44 |                 Ok(right) => {
45 |                     if keep_right {
46 |                         r = right;
47 |                     }
48 |                 }
49 |                 Err(_) => {}
50 |             },
51 |             Op::Append(s) => {
52 |                 r.append(Rope::from_str(s));
53 |             }
54 |         }
55 |     }
56 | 
57 |     r.assert_integrity();
58 |     r.assert_invariants();
59 | });
60 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/small.txt:
--------------------------------------------------------------------------------
 1 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas sit amet tellus 
 2 | nec turpis feugiat semper. Nam at nulla laoreet, finibus eros sit amet, fringilla 
 3 | mauris. Fusce vestibulum nec ligula efficitur laoreet. Nunc orci leo, varius eget 
 4 | ligula vulputate, consequat eleifend nisi. Cras justo purus, imperdiet a augue 
 5 | malesuada, convallis cursus libero. Fusce pretium arcu in elementum laoreet. Duis 
 6 | mauris nulla, suscipit at est nec, malesuada pellentesque eros. Quisque semper porta 
 7 | malesuada. Nunc hendrerit est ac faucibus mollis. Nam fermentum id libero sed 
 8 | egestas. Duis a accumsan sapien. Nam neque diam, congue non erat et, porta sagittis 
 9 | turpis. Vivamus vitae mauris sit amet massa mollis molestie. Morbi scelerisque, 
10 | augue id congue imperdiet, felis lacus euismod dui, vitae facilisis massa dui quis 
11 | sapien. Vivamus hendrerit a urna a lobortis.
12 | 
13 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas sit amet tellus 
14 | nec turpis feugiat semper. Nam at nulla laoreet, finibus eros sit amet, fringilla 
15 | mauris. Fusce vestibulum nec ligula efficitur laoreet. Nunc orci leo, varius eget 
16 | ligula vulputate, consequat eleifend nisi. Cras justo purus, imperdiet a augue 
17 | malesuada, convallis cursus libero. Fusce pretium arcu in elementum laoreet. Duis 
18 | mauris nulla, suscipit at est nec, malesuada pellentesque eros. Quisque semper porta 
19 | malesuada. Nunc hendrerit est ac faucibus mollis. Nam fermentum id libero sed 
20 | egestas. Duis a accumsan sapien. Nam neque diam, congue non erat et, porta sagittis 
21 | turpis. Vivamus vitae mauris sit amet massa mollis molestie. Morbi scelerisque, 
22 | augue id congue imperdiet, felis lacus euismod dui, vitae facilisis massa dui quis 
23 | sapien. Vivamus hendrerit a urna a lobortis.
24 | 
25 | 


--------------------------------------------------------------------------------
/src/crlf.rs:
--------------------------------------------------------------------------------
  1 | /// Returns whether the given byte index in `text` is a valid
  2 | /// splitting point.  Valid splitting point in this case means
  3 | /// that it _is_ a utf8 code point boundary and _is not_ the
  4 | /// middle of a CRLF pair.
  5 | #[inline]
  6 | pub fn is_break(byte_idx: usize, text: &[u8]) -> bool {
  7 |     debug_assert!(byte_idx <= text.len());
  8 | 
  9 |     if byte_idx == 0 || byte_idx == text.len() {
 10 |         true
 11 |     } else {
 12 |         (text[byte_idx] >> 6 != 0b10) && ((text[byte_idx - 1] != 0x0D) | (text[byte_idx] != 0x0A))
 13 |     }
 14 | }
 15 | 
 16 | /// Returns whether the seam between `left` and `right` is a valid
 17 | /// splitting point.  Valid splitting point in this case means
 18 | /// that it _is_ a utf8 code point boundary and _is not_ the middle
 19 | /// of a CRLF pair.
 20 | #[inline]
 21 | pub fn seam_is_break(left: &[u8], right: &[u8]) -> bool {
 22 |     debug_assert!(!left.is_empty() && !right.is_empty());
 23 |     (right[0] >> 6 != 0b10) && ((left[left.len() - 1] != 0x0D) | (right[0] != 0x0A))
 24 | }
 25 | 
 26 | /// Returns the segment break before (but not including) the given byte
 27 | /// boundary.
 28 | ///
 29 | /// This will return back the passed byte boundary if it is at the start
 30 | /// of the string.
 31 | #[inline]
 32 | pub fn prev_break(byte_idx: usize, text: &[u8]) -> usize {
 33 |     // Bounds check
 34 |     debug_assert!(byte_idx <= text.len());
 35 | 
 36 |     if byte_idx == 0 {
 37 |         0
 38 |     } else {
 39 |         let mut boundary_idx = byte_idx - 1;
 40 |         while !is_break(boundary_idx, text) {
 41 |             boundary_idx -= 1;
 42 |         }
 43 |         boundary_idx
 44 |     }
 45 | }
 46 | 
 47 | /// Returns the segment break after (but not including) the given byte
 48 | /// boundary.
 49 | ///
 50 | /// This will return back the passed byte boundary if it is at the end of
 51 | /// the string.
 52 | #[inline]
 53 | pub fn next_break(byte_idx: usize, text: &[u8]) -> usize {
 54 |     // Bounds check
 55 |     debug_assert!(byte_idx <= text.len());
 56 | 
 57 |     if byte_idx == text.len() {
 58 |         text.len()
 59 |     } else {
 60 |         let mut boundary_idx = byte_idx + 1;
 61 |         while !is_break(boundary_idx, text) {
 62 |             boundary_idx += 1;
 63 |         }
 64 |         boundary_idx
 65 |     }
 66 | }
 67 | 
 68 | /// Finds the segment break nearest to the given byte that is not the
 69 | /// left or right edge of the text.
 70 | ///
 71 | /// There is only one circumstance where the left or right edge will be
 72 | /// returned: if the entire text is a single unbroken segment, then the
 73 | /// right edge of the text is returned.
 74 | #[inline]
 75 | pub fn nearest_internal_break(byte_idx: usize, text: &[u8]) -> usize {
 76 |     // Bounds check
 77 |     debug_assert!(byte_idx <= text.len());
 78 | 
 79 |     // Find the two nearest segment boundaries
 80 |     let left = if is_break(byte_idx, text) && byte_idx != text.len() {
 81 |         byte_idx
 82 |     } else {
 83 |         prev_break(byte_idx, text)
 84 |     };
 85 |     let right = next_break(byte_idx, text);
 86 | 
 87 |     // Otherwise, return the closest of left and right that isn't the
 88 |     // start or end of the string
 89 |     if left == 0 || (right != text.len() && (byte_idx - left) >= (right - byte_idx)) {
 90 |         return right;
 91 |     } else {
 92 |         return left;
 93 |     }
 94 | }
 95 | 
 96 | #[inline]
 97 | pub fn find_good_split(byte_idx: usize, text: &[u8], bias_left: bool) -> usize {
 98 |     // Bounds check
 99 |     debug_assert!(byte_idx <= text.len());
100 | 
101 |     if is_break(byte_idx, text) {
102 |         byte_idx
103 |     } else {
104 |         let prev = prev_break(byte_idx, text);
105 |         let next = next_break(byte_idx, text);
106 |         if bias_left {
107 |             if prev > 0 {
108 |                 prev
109 |             } else {
110 |                 next
111 |             }
112 |         } else {
113 |             #[allow(clippy::collapsible_if)] // More readable this way
114 |             if next < text.len() {
115 |                 next
116 |             } else {
117 |                 prev
118 |             }
119 |         }
120 |     }
121 | }
122 | 
123 | //===========================================================================
124 | 
125 | #[cfg(test)]
126 | mod tests {
127 |     use super::*;
128 | 
129 |     #[test]
130 |     fn crlf_segmenter_01() {
131 |         let text = b"Hello world!\r\nHow's it going?";
132 | 
133 |         assert!(is_break(0, b""));
134 |         assert!(is_break(0, text));
135 |         assert!(is_break(12, text));
136 |         assert!(!is_break(13, text));
137 |         assert!(is_break(14, text));
138 |         assert!(is_break(19, text));
139 |     }
140 | 
141 |     #[test]
142 |     fn crlf_segmenter_02() {
143 |         let l = b"Hello world!\r";
144 |         let r = b"\nHow's it going?";
145 | 
146 |         assert!(!seam_is_break(l, r));
147 |         assert!(!seam_is_break(l, b"\n"));
148 |         assert!(!seam_is_break(b"\r", r));
149 |         assert!(!seam_is_break(b"\r", b"\n"));
150 |         assert!(seam_is_break(r, l));
151 |         assert!(seam_is_break(b"\n", b"\r"));
152 |     }
153 | 
154 |     #[test]
155 |     fn nearest_internal_break_01() {
156 |         let text = b"Hello world!";
157 |         assert_eq!(1, nearest_internal_break(0, text));
158 |         assert_eq!(6, nearest_internal_break(6, text));
159 |         assert_eq!(11, nearest_internal_break(12, text));
160 |     }
161 | 
162 |     #[test]
163 |     fn nearest_internal_break_02() {
164 |         let text = b"Hello\r\n world!";
165 |         assert_eq!(5, nearest_internal_break(5, text));
166 |         assert_eq!(7, nearest_internal_break(6, text));
167 |         assert_eq!(7, nearest_internal_break(7, text));
168 |     }
169 | 
170 |     #[test]
171 |     fn nearest_internal_break_03() {
172 |         let text = b"\r\nHello world!\r\n";
173 |         assert_eq!(2, nearest_internal_break(0, text));
174 |         assert_eq!(2, nearest_internal_break(1, text));
175 |         assert_eq!(2, nearest_internal_break(2, text));
176 |         assert_eq!(14, nearest_internal_break(14, text));
177 |         assert_eq!(14, nearest_internal_break(15, text));
178 |         assert_eq!(14, nearest_internal_break(16, text));
179 |     }
180 | 
181 |     #[test]
182 |     fn nearest_internal_break_04() {
183 |         let text = b"\r\n";
184 |         assert_eq!(2, nearest_internal_break(0, text));
185 |         assert_eq!(2, nearest_internal_break(1, text));
186 |         assert_eq!(2, nearest_internal_break(2, text));
187 |     }
188 | 
189 |     #[test]
190 |     fn is_break_01() {
191 |         let text = b"\n\r\n\r\n\r\n\r\n\r\n\r";
192 | 
193 |         assert!(is_break(0, text));
194 |         assert!(is_break(12, text));
195 |         assert!(is_break(3, text));
196 |         assert!(!is_break(6, text));
197 |     }
198 | 
199 |     #[test]
200 |     fn seam_is_break_01() {
201 |         let text1 = b"\r\n\r\n\r\n";
202 |         let text2 = b"\r\n\r\n";
203 | 
204 |         assert!(seam_is_break(text1, text2));
205 |     }
206 | 
207 |     #[test]
208 |     fn seam_is_break_02() {
209 |         let text1 = b"\r\n\r\n\r";
210 |         let text2 = b"\n\r\n\r\n";
211 | 
212 |         assert!(!seam_is_break(text1, text2));
213 |     }
214 | }
215 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! Ropey is a utf8 text rope for Rust.  It is fast, robust, and can handle
  2 | //! huge texts and memory-incoherent edits with ease.
  3 | //!
  4 | //! Ropey's atomic unit of text is Unicode scalar values (or `char`s in Rust)
  5 | //! encoded as utf8.  All of Ropey's editing and slicing operations are done
  6 | //! in terms of char indices, which prevents accidental creation of invalid
  7 | //! utf8 data.
  8 | //!
  9 | //! The library is made up of four main components:
 10 | //!
 11 | //! - [`Rope`]: the main rope type.
 12 | //! - [`RopeSlice`]: an immutable view into part of a
 13 | //!   `Rope`.
 14 | //! - [`iter`]: iterators over `Rope`/`RopeSlice` data.
 15 | //! - [`RopeBuilder`]: an efficient incremental
 16 | //!   `Rope` builder.
 17 | //!
 18 | //!
 19 | //! # A Basic Example
 20 | //!
 21 | //! Let's say we want to open up a text file, replace the 516th line (the
 22 | //! writing was terrible!), and save it back to disk.  It's contrived, but will
 23 | //! give a good sampling of the APIs and how they work together.
 24 | //!
 25 | //! ```no_run
 26 | //! # use std::io::Result;
 27 | //! use std::fs::File;
 28 | //! use std::io::{BufReader, BufWriter};
 29 | //! use ropey::Rope;
 30 | //!
 31 | //! # fn do_stuff() -> Result<()> {
 32 | //! // Load a text file.
 33 | //! let mut text = Rope::from_reader(
 34 | //!     BufReader::new(File::open("my_great_book.txt")?)
 35 | //! )?;
 36 | //!
 37 | //! // Print the 516th line (zero-indexed) to see the terrible
 38 | //! // writing.
 39 | //! println!("{}", text.line(515));
 40 | //!
 41 | //! // Get the start/end char indices of the line.
 42 | //! let start_idx = text.line_to_char(515);
 43 | //! let end_idx = text.line_to_char(516);
 44 | //!
 45 | //! // Remove the line...
 46 | //! text.remove(start_idx..end_idx);
 47 | //!
 48 | //! // ...and replace it with something better.
 49 | //! text.insert(start_idx, "The flowers are... so... dunno.\n");
 50 | //!
 51 | //! // Print the changes, along with the previous few lines for context.
 52 | //! let start_idx = text.line_to_char(511);
 53 | //! let end_idx = text.line_to_char(516);
 54 | //! println!("{}", text.slice(start_idx..end_idx));
 55 | //!
 56 | //! // Write the file back out to disk.
 57 | //! text.write_to(
 58 | //!     BufWriter::new(File::create("my_great_book.txt")?)
 59 | //! )?;
 60 | //! # Ok(())
 61 | //! # }
 62 | //! # do_stuff().unwrap();
 63 | //! ```
 64 | //!
 65 | //! More examples can be found in the `examples` directory of the git
 66 | //! repository.  Many of those examples demonstrate doing non-trivial things
 67 | //! with Ropey such as grapheme handling, search-and-replace, and streaming
 68 | //! loading of non-utf8 text files.
 69 | //!
 70 | //!
 71 | //! # Low-level APIs
 72 | //!
 73 | //! Ropey also provides access to some of its low-level APIs, enabling client
 74 | //! code to efficiently work with a `Rope`'s data and implement new
 75 | //! functionality.  The most important of those API's are:
 76 | //!
 77 | //! - The [`chunk_at_*()`](Rope::chunk_at_byte)
 78 | //!   chunk-fetching methods of `Rope` and `RopeSlice`.
 79 | //! - The [`Chunks`](iter::Chunks) iterator.
 80 | //! - The functions in [`str_utils`] for operating on
 81 | //!   `&str` slices.
 82 | //!
 83 | //! Internally, each `Rope` stores text as a segemented collection of utf8
 84 | //! strings.  The chunk-fetching methods and `Chunks` iterator provide direct
 85 | //! access to those strings (or "chunks") as `&str` slices, allowing client
 86 | //! code to work directly with the underlying utf8 data.
 87 | //!
 88 | //! The chunk-fetching methods and `str_utils` functions are the basic
 89 | //! building blocks that Ropey itself uses to build much of its functionality.
 90 | //! For example, the [`Rope::byte_to_char()`]
 91 | //! method can be reimplemented as a free function like this:
 92 | //!
 93 | //! ```no_run
 94 | //! use ropey::{
 95 | //!     Rope,
 96 | //!     str_utils::byte_to_char_idx
 97 | //! };
 98 | //!
 99 | //! fn byte_to_char(rope: &Rope, byte_idx: usize) -> usize {
100 | //!     let (chunk, b, c, _) = rope.chunk_at_byte(byte_idx);
101 | //!     c + byte_to_char_idx(chunk, byte_idx - b)
102 | //! }
103 | //! ```
104 | //!
105 | //! And this will be just as efficient as Ropey's implementation.
106 | //!
107 | //! The chunk-fetching methods in particular are among the fastest functions
108 | //! that Ropey provides, generally operating in the sub-hundred nanosecond
109 | //! range for medium-sized (~200kB) documents on recent-ish computer systems.
110 | //!
111 | //!
112 | //! # A Note About Line Breaks
113 | //!
114 | //! Some of Ropey's APIs use the concept of line breaks or lines of text.
115 | //!
116 | //! Ropey considers the start of the rope and positions immediately
117 | //! _after_ line breaks to be the start of new lines.  And it treats
118 | //! line breaks as being a part of the lines they mark the end of.
119 | //!
120 | //! For example, the rope `"Hello"` has a single line: `"Hello"`.  The
121 | //! rope `"Hello\nworld"` has two lines: `"Hello\n"` and `"world"`.  And
122 | //! the rope `"Hello\nworld\n"` has three lines: `"Hello\n"`,
123 | //! `"world\n"`, and `""`.
124 | //!
125 | //! Ropey can be configured at build time via feature flags to recognize
126 | //! different line breaks.  Ropey always recognizes:
127 | //!
128 | //! - `U+000A`          &mdash; LF (Line Feed)
129 | //! - `U+000D` `U+000A` &mdash; CRLF (Carriage Return + Line Feed)
130 | //!
131 | //! With the `cr_lines` feature, the following are also recognized:
132 | //!
133 | //! - `U+000D`          &mdash; CR (Carriage Return)
134 | //!
135 | //! With the `unicode_lines` feature, in addition to all of the
136 | //! above, the following are also recognized (bringing Ropey into
137 | //! conformance with
138 | //! [Unicode Annex #14](https://www.unicode.org/reports/tr14/#BK)):
139 | //!
140 | //! - `U+000B`          &mdash; VT (Vertical Tab)
141 | //! - `U+000C`          &mdash; FF (Form Feed)
142 | //! - `U+0085`          &mdash; NEL (Next Line)
143 | //! - `U+2028`          &mdash; Line Separator
144 | //! - `U+2029`          &mdash; Paragraph Separator
145 | //!
146 | //! (Note: `unicode_lines` is enabled by default, and always implies
147 | //! `cr_lines`.)
148 | //!
149 | //! CRLF pairs are always treated as a single line break, and are never split
150 | //! across chunks.  Note, however, that slicing can still split them.
151 | //!
152 | //!
153 | //! # A Note About SIMD Acceleration
154 | //!
155 | //! Ropey has a `simd` feature flag (enabled by default) that enables
156 | //! explicit SIMD on supported platforms to improve performance.
157 | //!
158 | //! There is a bit of a footgun here: if you disable default features to
159 | //! configure line break behavior (as per the section above) then SIMD
160 | //! will also get disabled, and performance will suffer.  So be careful
161 | //! to explicitly re-enable the `simd` feature flag (if desired) when
162 | //! doing that.
163 | 
164 | #![allow(clippy::collapsible_if)]
165 | #![allow(clippy::inline_always)]
166 | #![allow(clippy::needless_return)]
167 | #![allow(clippy::redundant_field_names)]
168 | #![allow(clippy::type_complexity)]
169 | 
170 | extern crate smallvec;
171 | extern crate str_indices;
172 | 
173 | mod crlf;
174 | mod rope;
175 | mod rope_builder;
176 | mod slice;
177 | mod tree;
178 | 
179 | pub mod iter;
180 | pub mod str_utils;
181 | 
182 | use std::ops::Bound;
183 | 
184 | pub use crate::rope::Rope;
185 | pub use crate::rope_builder::RopeBuilder;
186 | pub use crate::slice::RopeSlice;
187 | 
188 | /// NOT PART OF THE PUBLIC API (hidden from docs for a reason!)
189 | /// These are only exposed for tests that live in the `tests` directory.
190 | #[doc(hidden)]
191 | pub use crate::tree::{MAX_BYTES, MAX_CHILDREN, MIN_BYTES, MIN_CHILDREN};
192 | 
193 | //==============================================================
194 | // Error reporting types.
195 | 
196 | /// Ropey's result type.
197 | pub type Result<T> = std::result::Result<T, Error>;
198 | 
199 | /// Ropey's error type.
200 | #[derive(Clone, Copy)]
201 | #[non_exhaustive]
202 | pub enum Error {
203 |     /// Indicates that the passed byte index was out of bounds.
204 |     ///
205 |     /// Contains the index attempted and the actual length of the
206 |     /// `Rope`/`RopeSlice` in bytes, in that order.
207 |     ByteIndexOutOfBounds(usize, usize),
208 | 
209 |     /// Indicates that the passed char index was out of bounds.
210 |     ///
211 |     /// Contains the index attempted and the actual length of the
212 |     /// `Rope`/`RopeSlice` in chars, in that order.
213 |     CharIndexOutOfBounds(usize, usize),
214 | 
215 |     /// Indicates that the passed line index was out of bounds.
216 |     ///
217 |     /// Contains the index attempted and the actual length of the
218 |     /// `Rope`/`RopeSlice` in lines, in that order.
219 |     LineIndexOutOfBounds(usize, usize),
220 | 
221 |     /// Indicates that the passed utf16 code-unit index was out of
222 |     /// bounds.
223 |     ///
224 |     /// Contains the index attempted and the actual length of the
225 |     /// `Rope`/`RopeSlice` in utf16 code units, in that order.
226 |     Utf16IndexOutOfBounds(usize, usize),
227 | 
228 |     /// Indicates that the passed byte index was not a char boundary.
229 |     ///
230 |     /// Contains the passed byte index.
231 |     ByteIndexNotCharBoundary(usize),
232 | 
233 |     /// Indicates that the passed byte range didn't line up with char
234 |     /// boundaries.
235 |     ///
236 |     /// Contains the [start, end) byte indices of the range, in that order.
237 |     /// When either the start or end are `None`, that indicates a half-open
238 |     /// range.
239 |     ByteRangeNotCharBoundary(
240 |         Option<usize>, // Start.
241 |         Option<usize>, // End.
242 |     ),
243 | 
244 |     /// Indicates that a reversed byte-index range (end < start) was
245 |     /// encountered.
246 |     ///
247 |     /// Contains the [start, end) byte indices of the range, in that order.
248 |     ByteRangeInvalid(
249 |         usize, // Start.
250 |         usize, // End.
251 |     ),
252 | 
253 |     /// Indicates that a reversed char-index range (end < start) was
254 |     /// encountered.
255 |     ///
256 |     /// Contains the [start, end) char indices of the range, in that order.
257 |     CharRangeInvalid(
258 |         usize, // Start.
259 |         usize, // End.
260 |     ),
261 | 
262 |     /// Indicates that the passed byte-index range was partially or fully
263 |     /// out of bounds.
264 |     ///
265 |     /// Contains the [start, end) byte indices of the range and the actual
266 |     /// length of the `Rope`/`RopeSlice` in bytes, in that order.  When
267 |     /// either the start or end are `None`, that indicates a half-open range.
268 |     ByteRangeOutOfBounds(
269 |         Option<usize>, // Start.
270 |         Option<usize>, // End.
271 |         usize,         // Rope byte length.
272 |     ),
273 | 
274 |     /// Indicates that the passed char-index range was partially or fully
275 |     /// out of bounds.
276 |     ///
277 |     /// Contains the [start, end) char indices of the range and the actual
278 |     /// length of the `Rope`/`RopeSlice` in chars, in that order.  When
279 |     /// either the start or end are `None`, that indicates a half-open range.
280 |     CharRangeOutOfBounds(
281 |         Option<usize>, // Start.
282 |         Option<usize>, // End.
283 |         usize,         // Rope char length.
284 |     ),
285 | }
286 | 
287 | impl std::error::Error for Error {
288 |     fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
289 |         None
290 |     }
291 | 
292 |     // Deprecated in std.
293 |     fn description(&self) -> &str {
294 |         ""
295 |     }
296 | 
297 |     // Deprecated in std.
298 |     fn cause(&self) -> Option<&dyn std::error::Error> {
299 |         None
300 |     }
301 | }
302 | 
303 | impl std::fmt::Debug for Error {
304 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
305 |         match *self {
306 |             Error::ByteIndexOutOfBounds(index, len) => {
307 |                 write!(
308 |                     f,
309 |                     "Byte index out of bounds: byte index {}, Rope/RopeSlice byte length {}",
310 |                     index, len
311 |                 )
312 |             }
313 |             Error::CharIndexOutOfBounds(index, len) => {
314 |                 write!(
315 |                     f,
316 |                     "Char index out of bounds: char index {}, Rope/RopeSlice char length {}",
317 |                     index, len
318 |                 )
319 |             }
320 |             Error::LineIndexOutOfBounds(index, len) => {
321 |                 write!(
322 |                     f,
323 |                     "Line index out of bounds: line index {}, Rope/RopeSlice line count {}",
324 |                     index, len
325 |                 )
326 |             }
327 |             Error::Utf16IndexOutOfBounds(index, len) => {
328 |                 write!(f, "Utf16 code-unit index out of bounds: utf16 index {}, Rope/RopeSlice utf16 length {}", index, len)
329 |             }
330 |             Error::ByteIndexNotCharBoundary(index) => {
331 |                 write!(
332 |                     f,
333 |                     "Byte index is not a valid char boundary: byte index {}",
334 |                     index
335 |                 )
336 |             }
337 |             Error::ByteRangeNotCharBoundary(start_idx_opt, end_idx_opt) => {
338 |                 write!(f, "Byte range does not align with char boundaries: range ")?;
339 |                 write_range(f, start_idx_opt, end_idx_opt)
340 |             }
341 |             Error::ByteRangeInvalid(start_idx, end_idx) => {
342 |                 write!(
343 |                     f,
344 |                     "Invalid byte range {}..{}: start must be <= end",
345 |                     start_idx, end_idx
346 |                 )
347 |             }
348 |             Error::CharRangeInvalid(start_idx, end_idx) => {
349 |                 write!(
350 |                     f,
351 |                     "Invalid char range {}..{}: start must be <= end",
352 |                     start_idx, end_idx
353 |                 )
354 |             }
355 |             Error::ByteRangeOutOfBounds(start_idx_opt, end_idx_opt, len) => {
356 |                 write!(f, "Byte range out of bounds: byte range ")?;
357 |                 write_range(f, start_idx_opt, end_idx_opt)?;
358 |                 write!(f, ", Rope/RopeSlice byte length {}", len)
359 |             }
360 |             Error::CharRangeOutOfBounds(start_idx_opt, end_idx_opt, len) => {
361 |                 write!(f, "Char range out of bounds: char range ")?;
362 |                 write_range(f, start_idx_opt, end_idx_opt)?;
363 |                 write!(f, ", Rope/RopeSlice char length {}", len)
364 |             }
365 |         }
366 |     }
367 | }
368 | 
369 | impl std::fmt::Display for Error {
370 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
371 |         // Just re-use the debug impl.
372 |         std::fmt::Debug::fmt(self, f)
373 |     }
374 | }
375 | 
376 | fn write_range(
377 |     f: &mut std::fmt::Formatter<'_>,
378 |     start_idx: Option<usize>,
379 |     end_idx: Option<usize>,
380 | ) -> std::fmt::Result {
381 |     match (start_idx, end_idx) {
382 |         (None, None) => {
383 |             write!(f, "..")
384 |         }
385 | 
386 |         (Some(start), None) => {
387 |             write!(f, "{}..", start)
388 |         }
389 | 
390 |         (None, Some(end)) => {
391 |             write!(f, "..{}", end)
392 |         }
393 | 
394 |         (Some(start), Some(end)) => {
395 |             write!(f, "{}..{}", start, end)
396 |         }
397 |     }
398 | }
399 | 
400 | //==============================================================
401 | // Range handling utilities.
402 | 
403 | #[inline(always)]
404 | pub(crate) fn start_bound_to_num(b: Bound<&usize>) -> Option<usize> {
405 |     match b {
406 |         Bound::Included(n) => Some(*n),
407 |         Bound::Excluded(n) => Some(*n + 1),
408 |         Bound::Unbounded => None,
409 |     }
410 | }
411 | 
412 | #[inline(always)]
413 | pub(crate) fn end_bound_to_num(b: Bound<&usize>) -> Option<usize> {
414 |     match b {
415 |         Bound::Included(n) => Some(*n + 1),
416 |         Bound::Excluded(n) => Some(*n),
417 |         Bound::Unbounded => None,
418 |     }
419 | }
420 | 


--------------------------------------------------------------------------------
/src/rope_builder.rs:
--------------------------------------------------------------------------------
  1 | use std::sync::Arc;
  2 | 
  3 | use smallvec::SmallVec;
  4 | 
  5 | use crate::crlf;
  6 | use crate::rope::Rope;
  7 | use crate::tree::{Node, NodeChildren, NodeText, MAX_BYTES, MAX_CHILDREN, MIN_BYTES};
  8 | 
  9 | /// An efficient incremental `Rope` builder.
 10 | ///
 11 | /// This is used to efficiently build ropes from sequences of text
 12 | /// chunks.  It is useful for creating ropes from:
 13 | ///
 14 | /// - ...large text files, without pre-loading their entire contents into
 15 | ///   memory (but see [`from_reader()`](Rope::from_reader) for a convenience
 16 | ///   function that does this for casual use-cases).
 17 | /// - ...streaming data sources.
 18 | /// - ...non-utf8 text data, doing the encoding conversion incrementally
 19 | ///   as you go.
 20 | ///
 21 | /// Unlike repeatedly calling `Rope::insert()` on the end of a rope,
 22 | /// this API runs in time linear to the amount of data fed to it, and
 23 | /// is overall much faster.
 24 | ///
 25 | /// # Example
 26 | /// ```
 27 | /// # use ropey::RopeBuilder;
 28 | /// #
 29 | /// let mut builder = RopeBuilder::new();
 30 | ///
 31 | /// builder.append("Hello ");
 32 | /// builder.append("world!\n");
 33 | /// builder.append("How's ");
 34 | /// builder.append("it goin");
 35 | /// builder.append("g?");
 36 | ///
 37 | /// let rope = builder.finish();
 38 | ///
 39 | /// assert_eq!(rope, "Hello world!\nHow's it going?");
 40 | /// ```
 41 | #[derive(Debug, Clone)]
 42 | pub struct RopeBuilder {
 43 |     stack: SmallVec<[Arc<Node>; 4]>,
 44 |     buffer: String,
 45 |     last_chunk_len_bytes: usize,
 46 | }
 47 | 
 48 | impl RopeBuilder {
 49 |     /// Creates a new RopeBuilder, ready for input.
 50 |     pub fn new() -> Self {
 51 |         RopeBuilder {
 52 |             stack: {
 53 |                 let mut stack = SmallVec::new();
 54 |                 stack.push(Arc::new(Node::new()));
 55 |                 stack
 56 |             },
 57 |             buffer: String::new(),
 58 |             last_chunk_len_bytes: 0,
 59 |         }
 60 |     }
 61 | 
 62 |     /// Appends `chunk` to the end of the in-progress `Rope`.
 63 |     ///
 64 |     /// Call this method repeatedly to incrementally build up a
 65 |     /// `Rope`.  The passed text chunk can be as large or small as
 66 |     /// desired, but larger chunks are more efficient.
 67 |     ///
 68 |     /// `chunk` must be valid utf8 text.
 69 |     pub fn append(&mut self, chunk: &str) {
 70 |         self.append_internal(chunk, false);
 71 |     }
 72 | 
 73 |     /// Finishes the build, and returns the `Rope`.
 74 |     ///
 75 |     /// Note: this method consumes the builder.  If you want to continue
 76 |     /// building other ropes with the same prefix, you can clone the builder
 77 |     /// before calling `finish()`.
 78 |     pub fn finish(mut self) -> Rope {
 79 |         // Append the last leaf
 80 |         self.append_internal("", true);
 81 |         self.finish_internal(true)
 82 |     }
 83 | 
 84 |     /// Builds a rope all at once from a single string slice.
 85 |     ///
 86 |     /// This avoids the creation and use of the internal buffer.  This is
 87 |     /// for internal use only, because the public-facing API has
 88 |     /// Rope::from_str(), which actually uses this for its implementation.
 89 |     pub(crate) fn build_at_once(mut self, chunk: &str) -> Rope {
 90 |         self.append_internal(chunk, true);
 91 |         self.finish_internal(true)
 92 |     }
 93 | 
 94 |     /// NOT PART OF THE PUBLIC API (hidden from docs for a reason!).
 95 |     ///
 96 |     /// Appends `contents` to the in-progress rope as a single leaf
 97 |     /// node (chunk).  This is useful for building ropes with specific
 98 |     /// chunk configurations for testing purposes.  It will happily append
 99 |     /// both empty and more-than-max-size chunks.
100 |     ///
101 |     /// This makes no attempt to be consistent with the standard `append()`
102 |     /// method, and should not be used in conjunction with it.
103 |     #[doc(hidden)]
104 |     pub fn _append_chunk(&mut self, contents: &str) {
105 |         self.append_leaf_node(Arc::new(Node::Leaf(NodeText::from_str(contents))));
106 |     }
107 | 
108 |     /// NOT PART OF THE PUBLIC API (hidden from docs for a reason!).
109 |     ///
110 |     /// Finishes the build without doing any tree fixing to adhere
111 |     /// to the btree invariants. To be used with `_append_chunk()` to
112 |     /// construct ropes with specific chunk boundaries for testing.
113 |     #[doc(hidden)]
114 |     pub fn _finish_no_fix(self) -> Rope {
115 |         self.finish_internal(false)
116 |     }
117 | 
118 |     //-----------------------------------------------------------------
119 | 
120 |     // Internal workings of `append()`.
121 |     fn append_internal(&mut self, chunk: &str, is_last_chunk: bool) {
122 |         let mut chunk = chunk;
123 | 
124 |         // Repeatedly chop text off the end of the input, creating
125 |         // leaf nodes out of them and appending them to the tree.
126 |         while !chunk.is_empty() || (!self.buffer.is_empty() && is_last_chunk) {
127 |             // Get the text for the next leaf
128 |             let (leaf_text, remainder) = self.get_next_leaf_text(chunk, is_last_chunk);
129 |             chunk = remainder;
130 | 
131 |             self.last_chunk_len_bytes = chunk.len();
132 | 
133 |             // Append the leaf to the rope
134 |             match leaf_text {
135 |                 NextText::None => break,
136 |                 NextText::UseBuffer => {
137 |                     let leaf_text = NodeText::from_str(&self.buffer);
138 |                     self.append_leaf_node(Arc::new(Node::Leaf(leaf_text)));
139 |                     self.buffer.clear();
140 |                 }
141 |                 NextText::String(s) => {
142 |                     self.append_leaf_node(Arc::new(Node::Leaf(NodeText::from_str(s))));
143 |                 }
144 |             }
145 |         }
146 |     }
147 | 
148 |     // Internal workings of `finish()`.
149 |     //
150 |     // When `fix_tree` is false, the resulting node tree is NOT fixed up
151 |     // to adhere to the btree invariants.  This is useful for some testing
152 |     // code.  But generally, `fix_tree` should be set to true.
153 |     fn finish_internal(mut self, fix_tree: bool) -> Rope {
154 |         // Zip up all the remaining nodes on the stack
155 |         let mut stack_idx = self.stack.len() - 1;
156 |         while stack_idx >= 1 {
157 |             let node = self.stack.pop().unwrap();
158 |             if let Node::Internal(ref mut children) = *Arc::make_mut(&mut self.stack[stack_idx - 1])
159 |             {
160 |                 children.push((node.text_info(), node));
161 |             } else {
162 |                 unreachable!();
163 |             }
164 |             stack_idx -= 1;
165 |         }
166 | 
167 |         // Create the rope.
168 |         let mut rope = Rope {
169 |             root: self.stack.pop().unwrap(),
170 |         };
171 | 
172 |         // Fix up the tree to be well-formed.
173 |         if fix_tree {
174 |             Arc::make_mut(&mut rope.root).zip_fix_right();
175 |             if self.last_chunk_len_bytes < MIN_BYTES
176 |                 && self.last_chunk_len_bytes != rope.len_bytes()
177 |             {
178 |                 // Merge the last chunk if it was too small.
179 |                 let idx = rope.len_chars()
180 |                     - rope.byte_to_char(rope.len_bytes() - self.last_chunk_len_bytes);
181 |                 Arc::make_mut(&mut rope.root).fix_tree_seam(idx);
182 |             }
183 |             rope.pull_up_singular_nodes();
184 |         }
185 | 
186 |         return rope;
187 |     }
188 | 
189 |     // Returns (next_leaf_text, remaining_text)
190 |     #[inline(always)]
191 |     fn get_next_leaf_text<'a>(
192 |         &mut self,
193 |         text: &'a str,
194 |         is_last_chunk: bool,
195 |     ) -> (NextText<'a>, &'a str) {
196 |         assert!(
197 |             self.buffer.len() < MAX_BYTES,
198 |             "RopeBuilder: buffer is already full when receiving a chunk! \
199 |              This should never happen!",
200 |         );
201 | 
202 |         // Simplest case: empty buffer and enough in `text` for a full
203 |         // chunk, so just chop a chunk off from `text` and use that.
204 |         if self.buffer.is_empty() && text.len() >= MAX_BYTES {
205 |             let split_idx = crlf::find_good_split(
206 |                 MAX_BYTES.min(text.len() - 1), // - 1 to avoid CRLF split.
207 |                 text.as_bytes(),
208 |                 true,
209 |             );
210 |             return (NextText::String(&text[..split_idx]), &text[split_idx..]);
211 |         }
212 |         // If the buffer + `text` is enough for a full chunk, push enough
213 |         // of `text` onto the buffer to fill it and use that.
214 |         else if (text.len() + self.buffer.len()) >= MAX_BYTES {
215 |             let mut split_idx =
216 |                 crlf::find_good_split(MAX_BYTES - self.buffer.len(), text.as_bytes(), true);
217 |             if split_idx == text.len() && text.as_bytes()[text.len() - 1] == 0x0D {
218 |                 // Avoid CRLF split.
219 |                 split_idx -= 1;
220 |             };
221 |             self.buffer.push_str(&text[..split_idx]);
222 |             return (NextText::UseBuffer, &text[split_idx..]);
223 |         }
224 |         // If we don't have enough text for a full chunk.
225 |         else {
226 |             // If it's our last chunk, wrap it all up!
227 |             if is_last_chunk {
228 |                 if self.buffer.is_empty() {
229 |                     return if text.is_empty() {
230 |                         (NextText::None, "")
231 |                     } else {
232 |                         (NextText::String(text), "")
233 |                     };
234 |                 } else {
235 |                     self.buffer.push_str(text);
236 |                     return (NextText::UseBuffer, "");
237 |                 }
238 |             }
239 |             // Otherwise, just push to the buffer.
240 |             else {
241 |                 self.buffer.push_str(text);
242 |                 return (NextText::None, "");
243 |             }
244 |         }
245 |     }
246 | 
247 |     fn append_leaf_node(&mut self, leaf: Arc<Node>) {
248 |         let last = self.stack.pop().unwrap();
249 |         match *last {
250 |             Node::Leaf(_) => {
251 |                 if last.leaf_text().is_empty() {
252 |                     self.stack.push(leaf);
253 |                 } else {
254 |                     let mut children = NodeChildren::new();
255 |                     children.push((last.text_info(), last));
256 |                     children.push((leaf.text_info(), leaf));
257 |                     self.stack.push(Arc::new(Node::Internal(children)));
258 |                 }
259 |             }
260 | 
261 |             Node::Internal(_) => {
262 |                 self.stack.push(last);
263 |                 let mut left = leaf;
264 |                 let mut stack_idx = (self.stack.len() - 1) as isize;
265 |                 loop {
266 |                     if stack_idx < 0 {
267 |                         // We're above the root, so do a root split.
268 |                         let mut children = NodeChildren::new();
269 |                         children.push((left.text_info(), left));
270 |                         self.stack.insert(0, Arc::new(Node::Internal(children)));
271 |                         break;
272 |                     } else if self.stack[stack_idx as usize].child_count() < (MAX_CHILDREN - 1) {
273 |                         // There's room to add a child, so do that.
274 |                         Arc::make_mut(&mut self.stack[stack_idx as usize])
275 |                             .children_mut()
276 |                             .push((left.text_info(), left));
277 |                         break;
278 |                     } else {
279 |                         // Not enough room to fit a child, so split.
280 |                         left = Arc::new(Node::Internal(
281 |                             Arc::make_mut(&mut self.stack[stack_idx as usize])
282 |                                 .children_mut()
283 |                                 .push_split((left.text_info(), left)),
284 |                         ));
285 |                         std::mem::swap(&mut left, &mut self.stack[stack_idx as usize]);
286 |                         stack_idx -= 1;
287 |                     }
288 |                 }
289 |             }
290 |         }
291 |     }
292 | }
293 | 
294 | impl Default for RopeBuilder {
295 |     fn default() -> Self {
296 |         Self::new()
297 |     }
298 | }
299 | 
300 | enum NextText<'a> {
301 |     None,
302 |     UseBuffer,
303 |     String(&'a str),
304 | }
305 | 
306 | //===========================================================================
307 | 
308 | #[cfg(test)]
309 | mod tests {
310 |     use super::*;
311 | 
312 |     // 127 bytes, 103 chars, 4 lines
313 |     const TEXT: &str = "Hello there!  How're you doing?\r\nIt's \
314 |                         a fine day, isn't it?\r\nAren't you glad \
315 |                         we're alive?\r\nこんにちは、みんなさん！";
316 | 
317 |     #[test]
318 |     fn rope_builder_01() {
319 |         let mut b = RopeBuilder::new();
320 | 
321 |         b.append("Hello there!  How're you doing?\r");
322 |         b.append("\nIt's a fine ");
323 |         b.append("d");
324 |         b.append("a");
325 |         b.append("y,");
326 |         b.append(" ");
327 |         b.append("isn't it?");
328 |         b.append("\r");
329 |         b.append("\nAren't you ");
330 |         b.append("glad we're alive?\r");
331 |         b.append("\n");
332 |         b.append("こんにち");
333 |         b.append("は、みんなさ");
334 |         b.append("ん！");
335 | 
336 |         let r = b.finish();
337 | 
338 |         assert_eq!(r, TEXT);
339 | 
340 |         r.assert_integrity();
341 |         r.assert_invariants();
342 |     }
343 | 
344 |     #[test]
345 |     fn rope_builder_default_01() {
346 |         let mut b = RopeBuilder::default();
347 | 
348 |         b.append("Hello there!  How're you doing?\r");
349 |         b.append("\nIt's a fine day, isn't it?\r\nAren't you ");
350 |         b.append("glad we're alive?\r\nこんにちは、みんなさん！");
351 | 
352 |         let r = b.finish();
353 | 
354 |         assert_eq!(r, TEXT);
355 | 
356 |         r.assert_integrity();
357 |         r.assert_invariants();
358 |     }
359 | }
360 | 


--------------------------------------------------------------------------------
/src/str_utils.rs:
--------------------------------------------------------------------------------
  1 | //! Utility functions for utf8 string slices.
  2 | //!
  3 | //! This module provides various utility functions that operate on string
  4 | //! slices in ways compatible with Ropey.  They may be useful when building
  5 | //! additional functionality on top of Ropey.
  6 | 
  7 | pub(crate) use str_indices::chars::count as count_chars;
  8 | pub use str_indices::chars::from_byte_idx as byte_to_char_idx;
  9 | pub use str_indices::chars::to_byte_idx as char_to_byte_idx;
 10 | pub(crate) use str_indices::utf16::count_surrogates as count_utf16_surrogates;
 11 | 
 12 | // Determine which line implementation to use.
 13 | #[cfg(feature = "unicode_lines")]
 14 | use str_indices::lines;
 15 | #[cfg(all(feature = "cr_lines", not(feature = "unicode_lines")))]
 16 | use str_indices::lines_crlf as lines;
 17 | #[cfg(not(any(feature = "cr_lines", feature = "unicode_lines")))]
 18 | use str_indices::lines_lf as lines;
 19 | 
 20 | pub(crate) use self::lines::count_breaks as count_line_breaks;
 21 | pub use self::lines::from_byte_idx as byte_to_line_idx;
 22 | pub use self::lines::to_byte_idx as line_to_byte_idx;
 23 | 
 24 | /// Converts from char-index to line-index in a string slice.
 25 | ///
 26 | /// This is equivalent to counting the line endings before the given char.
 27 | ///
 28 | /// Any past-the-end index will return the last line index.
 29 | ///
 30 | /// Runs in O(N) time.
 31 | #[inline]
 32 | pub fn char_to_line_idx(text: &str, char_idx: usize) -> usize {
 33 |     lines::from_byte_idx(text, str_indices::chars::to_byte_idx(text, char_idx))
 34 | }
 35 | 
 36 | /// Converts from line-index to char-index in a string slice.
 37 | ///
 38 | /// More specifically, this returns the index of the first char of the given line.
 39 | ///
 40 | /// Any past-the-end index will return the one-past-the-end char index.
 41 | ///
 42 | /// Runs in O(N) time.
 43 | #[inline]
 44 | pub fn line_to_char_idx(text: &str, line_idx: usize) -> usize {
 45 |     str_indices::chars::from_byte_idx(text, lines::to_byte_idx(text, line_idx))
 46 | }
 47 | 
 48 | //-------------------------------------------------------------
 49 | 
 50 | pub(crate) fn byte_to_utf16_surrogate_idx(text: &str, byte_idx: usize) -> usize {
 51 |     let mut i = byte_idx;
 52 |     while !text.is_char_boundary(i) {
 53 |         i -= 1;
 54 |     }
 55 |     str_indices::utf16::count_surrogates(&text[..i])
 56 | }
 57 | 
 58 | pub(crate) fn utf16_code_unit_to_char_idx(text: &str, utf16_idx: usize) -> usize {
 59 |     str_indices::chars::from_byte_idx(text, str_indices::utf16::to_byte_idx(text, utf16_idx))
 60 | }
 61 | 
 62 | /// Returns the byte index of the start of the last line of the passed text.
 63 | ///
 64 | /// Note: if the text ends in a line break, that means the last line is
 65 | /// an empty line that starts at the end of the text.
 66 | pub(crate) fn last_line_start_byte_idx(text: &str) -> usize {
 67 |     let mut itr = text.bytes().enumerate().rev();
 68 | 
 69 |     while let Some((idx, byte)) = itr.next() {
 70 |         match byte {
 71 |             0x0A => {
 72 |                 return idx + 1;
 73 |             }
 74 |             0x0D => {
 75 |                 #[cfg(any(feature = "cr_lines", feature = "unicode_lines"))]
 76 |                 return idx + 1;
 77 |             }
 78 |             0x0B | 0x0C => {
 79 |                 #[cfg(feature = "unicode_lines")]
 80 |                 return idx + 1;
 81 |             }
 82 |             0x85 =>
 83 |             {
 84 |                 #[cfg(feature = "unicode_lines")]
 85 |                 if let Some((_, 0xC2)) = itr.next() {
 86 |                     return idx + 1;
 87 |                 }
 88 |             }
 89 |             0xA8 | 0xA9 =>
 90 |             {
 91 |                 #[cfg(feature = "unicode_lines")]
 92 |                 if let Some((_, 0x80)) = itr.next() {
 93 |                     if let Some((_, 0xE2)) = itr.next() {
 94 |                         return idx + 1;
 95 |                     }
 96 |                 }
 97 |             }
 98 |             _ => {}
 99 |         }
100 |     }
101 | 
102 |     return 0;
103 | }
104 | 
105 | /// Trims a single trailing line break (if any) off the end of the passed string.
106 | ///
107 | /// If the string doesn't end in a line break, returns the string unchanged.
108 | #[inline]
109 | pub(crate) fn trim_line_break(text: &str) -> &str {
110 |     if text.is_empty() {
111 |         return "";
112 |     }
113 | 
114 |     // Find the starting boundary of the last codepoint.
115 |     let mut i = text.len() - 1;
116 |     while !text.is_char_boundary(i) {
117 |         i -= 1;
118 |     }
119 | 
120 |     let tail = &text[i..];
121 | 
122 |     // Check if it's one of the fancy unicode line breaks.
123 |     #[cfg(feature = "unicode_lines")]
124 |     if matches!(
125 |         tail,
126 |         "\u{000B}" | "\u{000C}" | "\u{0085}" | "\u{2028}" | "\u{2029}"
127 |     ) {
128 |         return &text[..i];
129 |     }
130 | 
131 |     #[cfg(feature = "cr_lines")]
132 |     if tail == "\u{000D}" {
133 |         return &text[..i];
134 |     }
135 | 
136 |     if tail == "\u{000A}" {
137 |         #[cfg(feature = "cr_lines")]
138 |         if i > 0 && text.as_bytes()[i - 1] == 0xd {
139 |             return &text[..(i - 1)];
140 |         }
141 | 
142 |         return &text[..i];
143 |     }
144 | 
145 |     return text;
146 | }
147 | 
148 | /// Returns whether the given string ends in a line break or not.
149 | #[inline]
150 | pub(crate) fn ends_with_line_break(text: &str) -> bool {
151 |     if text.is_empty() {
152 |         return false;
153 |     }
154 | 
155 |     // Find the starting boundary of the last codepoint.
156 |     let mut i = text.len() - 1;
157 |     while !text.is_char_boundary(i) {
158 |         i -= 1;
159 |     }
160 | 
161 |     // Check if the last codepoint is a line break.
162 |     #[cfg(feature = "unicode_lines")]
163 |     return matches!(
164 |         &text[i..],
165 |         "\u{000A}" | "\u{000B}" | "\u{000C}" | "\u{000D}" | "\u{0085}" | "\u{2028}" | "\u{2029}"
166 |     );
167 | 
168 |     #[cfg(all(feature = "cr_lines", not(feature = "unicode_lines")))]
169 |     return matches!(&text[i..], "\u{000A}" | "\u{000D}");
170 | 
171 |     #[cfg(not(any(feature = "cr_lines", feature = "unicode_lines")))]
172 |     return &text[i..] == "\u{000A}";
173 | }
174 | 
175 | //======================================================================
176 | 
177 | #[cfg(test)]
178 | mod tests {
179 |     use super::*;
180 | 
181 |     #[cfg(not(any(feature = "cr_lines", feature = "unicode_lines")))]
182 |     #[test]
183 |     fn last_line_start_byte_idx_lf_01() {
184 |         assert_eq!(0, last_line_start_byte_idx(""));
185 |         assert_eq!(0, last_line_start_byte_idx("Hi"));
186 | 
187 |         assert_eq!(3, last_line_start_byte_idx("Hi\u{000A}there."));
188 |         assert_eq!(0, last_line_start_byte_idx("Hi\u{000B}there."));
189 |         assert_eq!(0, last_line_start_byte_idx("Hi\u{000C}there."));
190 |         assert_eq!(0, last_line_start_byte_idx("Hi\u{000D}there."));
191 |         assert_eq!(0, last_line_start_byte_idx("Hi\u{0085}there."));
192 |         assert_eq!(0, last_line_start_byte_idx("Hi\u{2028}there."));
193 |         assert_eq!(0, last_line_start_byte_idx("Hi\u{2029}there."));
194 |     }
195 | 
196 |     #[cfg(not(any(feature = "cr_lines", feature = "unicode_lines")))]
197 |     #[test]
198 |     fn last_line_start_byte_idx_lf_02() {
199 |         let mut text = "\u{000A}Hello\u{000D}\u{000A}\u{000D}せ\u{000B}か\u{000C}い\u{0085}. \
200 |                         There\u{2028}is something.\u{2029}";
201 | 
202 |         assert_eq!(48, text.len());
203 |         text = &text[..last_line_start_byte_idx(trim_line_break(text))];
204 |         assert_eq!(8, text.len());
205 |         text = &text[..last_line_start_byte_idx(trim_line_break(text))];
206 |         assert_eq!(1, text.len());
207 |         text = &text[..last_line_start_byte_idx(trim_line_break(text))];
208 |         assert_eq!(0, text.len());
209 |     }
210 | 
211 |     #[cfg(all(feature = "cr_lines", not(feature = "unicode_lines")))]
212 |     #[test]
213 |     fn last_line_start_byte_idx_crlf_01() {
214 |         assert_eq!(0, last_line_start_byte_idx(""));
215 |         assert_eq!(0, last_line_start_byte_idx("Hi"));
216 | 
217 |         assert_eq!(3, last_line_start_byte_idx("Hi\u{000A}there."));
218 |         assert_eq!(0, last_line_start_byte_idx("Hi\u{000B}there."));
219 |         assert_eq!(0, last_line_start_byte_idx("Hi\u{000C}there."));
220 |         assert_eq!(3, last_line_start_byte_idx("Hi\u{000D}there."));
221 |         assert_eq!(0, last_line_start_byte_idx("Hi\u{0085}there."));
222 |         assert_eq!(0, last_line_start_byte_idx("Hi\u{2028}there."));
223 |         assert_eq!(0, last_line_start_byte_idx("Hi\u{2029}there."));
224 |     }
225 | 
226 |     #[cfg(all(feature = "cr_lines", not(feature = "unicode_lines")))]
227 |     #[test]
228 |     fn last_line_start_byte_idx_crlf_02() {
229 |         let mut text = "\u{000A}Hello\u{000D}\u{000A}\u{000D}せ\u{000B}か\u{000C}い\u{0085}. \
230 |                         There\u{2028}is something.\u{2029}";
231 | 
232 |         assert_eq!(48, text.len());
233 |         text = &text[..last_line_start_byte_idx(trim_line_break(text))];
234 |         assert_eq!(9, text.len());
235 |         text = &text[..last_line_start_byte_idx(trim_line_break(text))];
236 |         assert_eq!(8, text.len());
237 |         text = &text[..last_line_start_byte_idx(trim_line_break(text))];
238 |         assert_eq!(1, text.len());
239 |         text = &text[..last_line_start_byte_idx(trim_line_break(text))];
240 |         assert_eq!(0, text.len());
241 |     }
242 | 
243 |     #[cfg(feature = "unicode_lines")]
244 |     #[test]
245 |     fn last_line_start_byte_idx_unicode_01() {
246 |         assert_eq!(0, last_line_start_byte_idx(""));
247 |         assert_eq!(0, last_line_start_byte_idx("Hi"));
248 | 
249 |         assert_eq!(3, last_line_start_byte_idx("Hi\u{000A}there."));
250 |         assert_eq!(3, last_line_start_byte_idx("Hi\u{000B}there."));
251 |         assert_eq!(3, last_line_start_byte_idx("Hi\u{000C}there."));
252 |         assert_eq!(3, last_line_start_byte_idx("Hi\u{000D}there."));
253 |         assert_eq!(4, last_line_start_byte_idx("Hi\u{0085}there."));
254 |         assert_eq!(5, last_line_start_byte_idx("Hi\u{2028}there."));
255 |         assert_eq!(5, last_line_start_byte_idx("Hi\u{2029}there."));
256 |     }
257 | 
258 |     #[cfg(feature = "unicode_lines")]
259 |     #[test]
260 |     fn last_line_start_byte_idx_unicode_02() {
261 |         let mut text = "\u{000A}Hello\u{000D}\u{000A}\u{000D}せ\u{000B}か\u{000C}い\u{0085}. \
262 |                         There\u{2028}is something.\u{2029}";
263 | 
264 |         assert_eq!(48, text.len());
265 |         text = &text[..last_line_start_byte_idx(trim_line_break(text))];
266 |         assert_eq!(32, text.len());
267 |         text = &text[..last_line_start_byte_idx(trim_line_break(text))];
268 |         assert_eq!(22, text.len());
269 |         text = &text[..last_line_start_byte_idx(trim_line_break(text))];
270 |         assert_eq!(17, text.len());
271 |         text = &text[..last_line_start_byte_idx(trim_line_break(text))];
272 |         assert_eq!(13, text.len());
273 |         text = &text[..last_line_start_byte_idx(trim_line_break(text))];
274 |         assert_eq!(9, text.len());
275 |         text = &text[..last_line_start_byte_idx(trim_line_break(text))];
276 |         assert_eq!(8, text.len());
277 |         text = &text[..last_line_start_byte_idx(trim_line_break(text))];
278 |         assert_eq!(1, text.len());
279 |         text = &text[..last_line_start_byte_idx(trim_line_break(text))];
280 |         assert_eq!(0, text.len());
281 |     }
282 | 
283 |     #[cfg(not(any(feature = "cr_lines", feature = "unicode_lines")))]
284 |     #[test]
285 |     fn trim_line_break_lf_01() {
286 |         assert_eq!("", trim_line_break(""));
287 |         assert_eq!("Hi", trim_line_break("Hi"));
288 | 
289 |         assert_eq!("Hi", trim_line_break("Hi\u{000A}"));
290 |         assert_eq!("Hi\u{000B}", trim_line_break("Hi\u{000B}"));
291 |         assert_eq!("Hi\u{000C}", trim_line_break("Hi\u{000C}"));
292 |         assert_eq!("Hi\u{000D}", trim_line_break("Hi\u{000D}"));
293 |         assert_eq!("Hi\u{0085}", trim_line_break("Hi\u{0085}"));
294 |         assert_eq!("Hi\u{2028}", trim_line_break("Hi\u{2028}"));
295 |         assert_eq!("Hi\u{2029}", trim_line_break("Hi\u{2029}"));
296 | 
297 |         assert_eq!("\r", trim_line_break("\r\n"));
298 |         assert_eq!("Hi\r", trim_line_break("Hi\r\n"));
299 |     }
300 | 
301 |     #[cfg(all(feature = "cr_lines", not(feature = "unicode_lines")))]
302 |     #[test]
303 |     fn trim_line_break_crlf_01() {
304 |         assert_eq!("", trim_line_break(""));
305 |         assert_eq!("Hi", trim_line_break("Hi"));
306 | 
307 |         assert_eq!("Hi", trim_line_break("Hi\u{000A}"));
308 |         assert_eq!("Hi\u{000B}", trim_line_break("Hi\u{000B}"));
309 |         assert_eq!("Hi\u{000C}", trim_line_break("Hi\u{000C}"));
310 |         assert_eq!("Hi", trim_line_break("Hi\u{000D}"));
311 |         assert_eq!("Hi\u{0085}", trim_line_break("Hi\u{0085}"));
312 |         assert_eq!("Hi\u{2028}", trim_line_break("Hi\u{2028}"));
313 |         assert_eq!("Hi\u{2029}", trim_line_break("Hi\u{2029}"));
314 | 
315 |         assert_eq!("", trim_line_break("\r\n"));
316 |         assert_eq!("Hi", trim_line_break("Hi\r\n"));
317 |     }
318 | 
319 |     #[cfg(feature = "unicode_lines")]
320 |     #[test]
321 |     fn trim_line_break_unicode_01() {
322 |         assert_eq!("", trim_line_break(""));
323 |         assert_eq!("Hi", trim_line_break("Hi"));
324 | 
325 |         assert_eq!("Hi", trim_line_break("Hi\u{000A}"));
326 |         assert_eq!("Hi", trim_line_break("Hi\u{000B}"));
327 |         assert_eq!("Hi", trim_line_break("Hi\u{000C}"));
328 |         assert_eq!("Hi", trim_line_break("Hi\u{000D}"));
329 |         assert_eq!("Hi", trim_line_break("Hi\u{0085}"));
330 |         assert_eq!("Hi", trim_line_break("Hi\u{2028}"));
331 |         assert_eq!("Hi", trim_line_break("Hi\u{2029}"));
332 | 
333 |         assert_eq!("", trim_line_break("\r\n"));
334 |         assert_eq!("Hi", trim_line_break("Hi\r\n"));
335 |     }
336 | 
337 |     #[test]
338 |     fn ends_with_line_break_01() {
339 |         assert!(ends_with_line_break("\n"));
340 | 
341 |         #[cfg(any(feature = "cr_lines", feature = "unicode_lines"))]
342 |         assert!(ends_with_line_break("\r"));
343 | 
344 |         #[cfg(feature = "unicode_lines")]
345 |         {
346 |             assert!(ends_with_line_break("\u{000A}"));
347 |             assert!(ends_with_line_break("\u{000B}"));
348 |             assert!(ends_with_line_break("\u{000C}"));
349 |             assert!(ends_with_line_break("\u{000D}"));
350 |             assert!(ends_with_line_break("\u{0085}"));
351 |             assert!(ends_with_line_break("\u{2028}"));
352 |             assert!(ends_with_line_break("\u{2029}"));
353 |         }
354 |     }
355 | 
356 |     #[test]
357 |     fn ends_with_line_break_02() {
358 |         assert!(ends_with_line_break("Hi there!\n"));
359 | 
360 |         #[cfg(any(feature = "cr_lines", feature = "unicode_lines"))]
361 |         assert!(ends_with_line_break("Hi there!\r"));
362 | 
363 |         #[cfg(feature = "unicode_lines")]
364 |         {
365 |             assert!(ends_with_line_break("Hi there!\u{000A}"));
366 |             assert!(ends_with_line_break("Hi there!\u{000B}"));
367 |             assert!(ends_with_line_break("Hi there!\u{000C}"));
368 |             assert!(ends_with_line_break("Hi there!\u{000D}"));
369 |             assert!(ends_with_line_break("Hi there!\u{0085}"));
370 |             assert!(ends_with_line_break("Hi there!\u{2028}"));
371 |             assert!(ends_with_line_break("Hi there!\u{2029}"));
372 |         }
373 |     }
374 | 
375 |     #[test]
376 |     fn ends_with_line_break_03() {
377 |         assert!(!ends_with_line_break(""));
378 |         assert!(!ends_with_line_break("a"));
379 |         assert!(!ends_with_line_break("Hi there!"));
380 |     }
381 | 
382 |     #[test]
383 |     fn ends_with_line_break_04() {
384 |         assert!(!ends_with_line_break("\na"));
385 |         assert!(!ends_with_line_break("\ra"));
386 |         assert!(!ends_with_line_break("\u{000A}a"));
387 |         assert!(!ends_with_line_break("\u{000B}a"));
388 |         assert!(!ends_with_line_break("\u{000C}a"));
389 |         assert!(!ends_with_line_break("\u{000D}a"));
390 |         assert!(!ends_with_line_break("\u{0085}a"));
391 |         assert!(!ends_with_line_break("\u{2028}a"));
392 |         assert!(!ends_with_line_break("\u{2029}a"));
393 |     }
394 | 
395 |     #[test]
396 |     fn char_to_line_idx_01() {
397 |         let text = "\u{000A}Hello\u{000D}\u{000A}\u{000D}せ\u{000B}か\u{000C}い\u{0085}. \
398 |                     There\u{2028}is something.\u{2029}";
399 | 
400 |         #[cfg(not(any(feature = "cr_lines", feature = "unicode_lines")))]
401 |         {
402 |             assert_eq!(0, char_to_line_idx(text, 0));
403 |             assert_eq!(1, char_to_line_idx(text, 1));
404 |             assert_eq!(2, char_to_line_idx(text, 8));
405 |             assert_eq!(2, char_to_line_idx(text, 38));
406 |         }
407 | 
408 |         #[cfg(all(feature = "cr_lines", not(feature = "unicode_lines")))]
409 |         {
410 |             assert_eq!(0, char_to_line_idx(text, 0));
411 |             assert_eq!(1, char_to_line_idx(text, 1));
412 |             assert_eq!(2, char_to_line_idx(text, 8));
413 |             assert_eq!(3, char_to_line_idx(text, 9));
414 |             assert_eq!(3, char_to_line_idx(text, 38));
415 |         }
416 | 
417 |         #[cfg(feature = "unicode_lines")]
418 |         {
419 |             assert_eq!(0, char_to_line_idx(text, 0));
420 |             assert_eq!(1, char_to_line_idx(text, 1));
421 |             assert_eq!(2, char_to_line_idx(text, 8));
422 |             assert_eq!(3, char_to_line_idx(text, 9));
423 |             assert_eq!(4, char_to_line_idx(text, 11));
424 |             assert_eq!(5, char_to_line_idx(text, 13));
425 |             assert_eq!(6, char_to_line_idx(text, 15));
426 |             assert_eq!(7, char_to_line_idx(text, 23));
427 |             assert_eq!(8, char_to_line_idx(text, 37));
428 |             assert_eq!(8, char_to_line_idx(text, 38));
429 |         }
430 |     }
431 | 
432 |     #[test]
433 |     fn line_to_char_idx_01() {
434 |         let text = "\u{000A}Hello\u{000D}\u{000A}\u{000D}せ\u{000B}か\u{000C}い\u{0085}. \
435 |                     There\u{2028}is something.\u{2029}";
436 | 
437 |         #[cfg(not(any(feature = "cr_lines", feature = "unicode_lines")))]
438 |         {
439 |             assert_eq!(0, line_to_char_idx(text, 0));
440 |             assert_eq!(1, line_to_char_idx(text, 1));
441 |             assert_eq!(8, line_to_char_idx(text, 2));
442 |             assert_eq!(37, line_to_char_idx(text, 3));
443 |         }
444 | 
445 |         #[cfg(all(feature = "cr_lines", not(feature = "unicode_lines")))]
446 |         {
447 |             assert_eq!(0, line_to_char_idx(text, 0));
448 |             assert_eq!(1, line_to_char_idx(text, 1));
449 |             assert_eq!(8, line_to_char_idx(text, 2));
450 |             assert_eq!(9, line_to_char_idx(text, 3));
451 |             assert_eq!(37, line_to_char_idx(text, 4));
452 |         }
453 | 
454 |         #[cfg(feature = "unicode_lines")]
455 |         {
456 |             assert_eq!(0, line_to_char_idx(text, 0));
457 |             assert_eq!(1, line_to_char_idx(text, 1));
458 |             assert_eq!(8, line_to_char_idx(text, 2));
459 |             assert_eq!(9, line_to_char_idx(text, 3));
460 |             assert_eq!(11, line_to_char_idx(text, 4));
461 |             assert_eq!(13, line_to_char_idx(text, 5));
462 |             assert_eq!(15, line_to_char_idx(text, 6));
463 |             assert_eq!(23, line_to_char_idx(text, 7));
464 |             assert_eq!(37, line_to_char_idx(text, 8));
465 |             assert_eq!(37, line_to_char_idx(text, 9));
466 |         }
467 |     }
468 | }
469 | 


--------------------------------------------------------------------------------
/src/tree/mod.rs:
--------------------------------------------------------------------------------
  1 | mod node;
  2 | mod node_children;
  3 | mod node_text;
  4 | mod text_info;
  5 | 
  6 | pub(crate) use self::node::Node;
  7 | pub(crate) use self::node_children::NodeChildren;
  8 | pub(crate) use self::node_text::NodeText;
  9 | pub(crate) use self::text_info::TextInfo;
 10 | 
 11 | // Type used for storing tree metadata, such as byte and char length.
 12 | pub(crate) type Count = u64;
 13 | 
 14 | // Real constants used in release builds.
 15 | #[cfg(not(any(test, feature = "small_chunks")))]
 16 | mod constants {
 17 |     use super::{Node, TextInfo};
 18 |     use smallvec::SmallVec;
 19 |     use std::{
 20 |         mem::{align_of, size_of},
 21 |         sync::Arc,
 22 |     };
 23 | 
 24 |     // Because stdlib's max is not const for some reason.
 25 |     // TODO: replace with stdlib max once it's const.
 26 |     const fn cmax(a: usize, b: usize) -> usize {
 27 |         if a > b {
 28 |             a
 29 |         } else {
 30 |             b
 31 |         }
 32 |     }
 33 | 
 34 |     // Aim for Node + Arc counters to be 1024 bytes.  Keeping the nodes
 35 |     // multiples of large powers of two makes it easier for the memory
 36 |     // allocator to avoid fragmentation.
 37 |     const TARGET_TOTAL_SIZE: usize = 1024;
 38 | 
 39 |     // Space that the strong and weak Arc counters take up in `ArcInner`.
 40 |     const ARC_COUNTERS_SIZE: usize = size_of::<std::sync::atomic::AtomicUsize>() * 2;
 41 | 
 42 |     // Misc useful info that we need below.
 43 |     const NODE_CHILDREN_ALIGN: usize = cmax(align_of::<Arc<u8>>(), align_of::<TextInfo>());
 44 |     const NODE_TEXT_ALIGN: usize = align_of::<SmallVec<[u8; 16]>>();
 45 |     const START_OFFSET: usize = {
 46 |         const NODE_INNER_ALIGN: usize = cmax(NODE_CHILDREN_ALIGN, NODE_TEXT_ALIGN);
 47 |         // The +NODE_INNER_ALIGN is because of Node's enum discriminant.
 48 |         ARC_COUNTERS_SIZE + NODE_INNER_ALIGN
 49 |     };
 50 | 
 51 |     // Node maximums.
 52 |     #[doc(hidden)] // NOT PART OF THE PUBLIC API!
 53 |     pub const MAX_CHILDREN: usize = {
 54 |         let node_list_align = align_of::<Arc<u8>>();
 55 |         let info_list_align = align_of::<TextInfo>();
 56 |         let field_gap = if node_list_align >= info_list_align {
 57 |             0
 58 |         } else {
 59 |             // This is over-conservative, because in reality it depends
 60 |             // on the number of elements.  But handling that is probably
 61 |             // more complexity than it's worth.
 62 |             info_list_align - node_list_align
 63 |         };
 64 | 
 65 |         // The -NODE_CHILDREN_ALIGN is for the `len` field in `NodeChildrenInternal`.
 66 |         let target_size = TARGET_TOTAL_SIZE - START_OFFSET - NODE_CHILDREN_ALIGN - field_gap;
 67 | 
 68 |         target_size / (size_of::<Arc<u8>>() + size_of::<TextInfo>())
 69 |     };
 70 |     #[doc(hidden)] // NOT PART OF THE PUBLIC API!
 71 |     pub const MAX_BYTES: usize = {
 72 |         let smallvec_overhead = size_of::<SmallVec<[u8; 16]>>() - 16;
 73 |         TARGET_TOTAL_SIZE - START_OFFSET - smallvec_overhead
 74 |     };
 75 | 
 76 |     // Node minimums.
 77 |     // Note: MIN_BYTES is intentionally a little smaller than half
 78 |     // MAX_BYTES, to give a little wiggle room when on the edge of
 79 |     // merging/splitting.
 80 |     #[doc(hidden)] // NOT PART OF THE PUBLIC API!
 81 |     pub const MIN_CHILDREN: usize = MAX_CHILDREN / 2;
 82 |     #[doc(hidden)] // NOT PART OF THE PUBLIC API!
 83 |     pub const MIN_BYTES: usize = (MAX_BYTES / 2) - (MAX_BYTES / 32);
 84 | 
 85 |     // Compile-time assertion.
 86 |     const _: () = {
 87 |         assert!(
 88 |             (ARC_COUNTERS_SIZE + size_of::<Node>()) == TARGET_TOTAL_SIZE,
 89 |             "`Node` is not the target size in memory.",
 90 |         );
 91 |     };
 92 | }
 93 | 
 94 | // Smaller constants used in debug builds.  These are different from release
 95 | // in order to trigger deeper trees without having to use huge text data in
 96 | // the tests.
 97 | #[cfg(any(test, feature = "small_chunks"))]
 98 | mod test_constants {
 99 |     #[doc(hidden)] // NOT PART OF THE PUBLIC API!
100 |     pub const MAX_CHILDREN: usize = 5;
101 |     #[doc(hidden)] // NOT PART OF THE PUBLIC API!
102 |     pub const MIN_CHILDREN: usize = MAX_CHILDREN / 2;
103 | 
104 |     // MAX_BYTES must be >= 4 to allow for 4-byte utf8 characters.
105 |     #[doc(hidden)] // NOT PART OF THE PUBLIC API!
106 |     pub const MAX_BYTES: usize = 9; // Note: can't be 8, because 3-byte characters.
107 |     #[doc(hidden)] // NOT PART OF THE PUBLIC API!
108 |     pub const MIN_BYTES: usize = (MAX_BYTES / 2) - (MAX_BYTES / 32);
109 | }
110 | 
111 | #[cfg(not(any(test, feature = "small_chunks")))]
112 | pub use self::constants::{MAX_BYTES, MAX_CHILDREN, MIN_BYTES, MIN_CHILDREN};
113 | 
114 | #[cfg(any(test, feature = "small_chunks"))]
115 | pub use self::test_constants::{MAX_BYTES, MAX_CHILDREN, MIN_BYTES, MIN_CHILDREN};
116 | 


--------------------------------------------------------------------------------
/src/tree/node_text.rs:
--------------------------------------------------------------------------------
  1 | use std::borrow::Borrow;
  2 | use std::ops::Deref;
  3 | use std::str;
  4 | 
  5 | use crate::crlf;
  6 | 
  7 | /// A custom small string.  The unsafe guts of this are in `NodeSmallString`
  8 | /// further down in this file.
  9 | #[derive(Clone, Default)]
 10 | #[repr(C)]
 11 | pub(crate) struct NodeText(inner::NodeSmallString);
 12 | 
 13 | impl NodeText {
 14 |     /// Creates a new empty `NodeText`
 15 |     #[inline(always)]
 16 |     pub fn new() -> Self {
 17 |         NodeText(inner::NodeSmallString::new())
 18 |     }
 19 | 
 20 |     /// Creates a new `NodeText` with the same contents as the given `&str`.
 21 |     pub fn from_str(string: &str) -> Self {
 22 |         NodeText(inner::NodeSmallString::from_str(string))
 23 |     }
 24 | 
 25 |     /// Inserts a `&str` at byte offset `byte_idx`.
 26 |     pub fn insert_str(&mut self, byte_idx: usize, string: &str) {
 27 |         self.0.insert_str(byte_idx, string);
 28 |     }
 29 | 
 30 |     /// Inserts `string` at `byte_idx` and splits the resulting string in half,
 31 |     /// returning the right half.
 32 |     ///
 33 |     /// Only splits on code point boundaries and will never split CRLF pairs,
 34 |     /// so if the whole string is a single code point or CRLF pair, the split
 35 |     /// will fail and the returned string will be empty.
 36 |     pub fn insert_str_split(&mut self, byte_idx: usize, string: &str) -> Self {
 37 |         debug_assert!(self.is_char_boundary(byte_idx));
 38 | 
 39 |         let tot_len = self.len() + string.len();
 40 |         let mid_idx = tot_len / 2;
 41 |         let a = byte_idx;
 42 |         let b = byte_idx + string.len();
 43 | 
 44 |         // Figure out the split index, accounting for code point
 45 |         // boundaries and CRLF pairs.
 46 |         // We first copy the bytes in the area of the proposed split point into
 47 |         // a small 8-byte buffer.  We then use that buffer to look for the
 48 |         // real split point.
 49 |         let split_idx = {
 50 |             let mut buf = [0u8; 8];
 51 |             let start = mid_idx - 4.min(mid_idx);
 52 |             let end = (mid_idx + 4).min(tot_len);
 53 |             for i in start..end {
 54 |                 buf[i - start] = if i < a {
 55 |                     self.as_bytes()[i]
 56 |                 } else if i < b {
 57 |                     string.as_bytes()[i - a]
 58 |                 } else {
 59 |                     self.as_bytes()[i - string.len()]
 60 |                 };
 61 |             }
 62 | 
 63 |             crlf::nearest_internal_break(mid_idx - start, &buf[..(end - start)]) + start
 64 |         };
 65 | 
 66 |         let mut right = NodeText::new();
 67 |         if split_idx <= a {
 68 |             right.push_str(&self[split_idx..a]);
 69 |             right.push_str(string);
 70 |             right.push_str(&self[a..]);
 71 |             self.truncate(split_idx);
 72 |         } else if split_idx <= b {
 73 |             right.push_str(&string[(split_idx - a)..]);
 74 |             right.push_str(&self[a..]);
 75 |             self.truncate(a);
 76 |             self.push_str(&string[..(split_idx - a)]);
 77 |         } else {
 78 |             right.push_str(&self[(split_idx - string.len())..]);
 79 |             self.truncate(split_idx - string.len());
 80 |             self.insert_str(a, string);
 81 |         }
 82 | 
 83 |         self.0.inline_if_possible();
 84 |         right
 85 |     }
 86 | 
 87 |     /// Appends a `&str` to end the of the `NodeText`.
 88 |     pub fn push_str(&mut self, string: &str) {
 89 |         let len = self.len();
 90 |         self.0.insert_str(len, string);
 91 |     }
 92 | 
 93 |     /// Appends a `&str` and splits the resulting string in half, returning
 94 |     /// the right half.
 95 |     ///
 96 |     /// Only splits on code point boundaries and will never split CRLF pairs,
 97 |     /// so if the whole string is a single code point or CRLF pair, the split
 98 |     /// will fail and the returned string will be empty.
 99 |     pub fn push_str_split(&mut self, string: &str) -> Self {
100 |         let len = self.len();
101 |         self.insert_str_split(len, string)
102 |     }
103 | 
104 |     /// Drops the text after byte index `byte_idx`.
105 |     pub fn truncate(&mut self, byte_idx: usize) {
106 |         self.0.truncate(byte_idx);
107 |         self.0.inline_if_possible();
108 |     }
109 | 
110 |     /// Drops the text before byte index `byte_idx`, shifting the
111 |     /// rest of the text to fill in the space.
112 |     pub fn truncate_front(&mut self, byte_idx: usize) {
113 |         self.0.remove_range(0, byte_idx);
114 |         self.0.inline_if_possible();
115 |     }
116 | 
117 |     /// Removes the text in the byte index interval `[byte_start, byte_end)`.
118 |     pub fn remove_range(&mut self, byte_start: usize, byte_end: usize) {
119 |         self.0.remove_range(byte_start, byte_end);
120 |         self.0.inline_if_possible();
121 |     }
122 | 
123 |     /// Splits the `NodeText` at `byte_idx`.
124 |     ///
125 |     /// The left part remains in the original, and the right part is
126 |     /// returned in a new `NodeText`.
127 |     pub fn split_off(&mut self, byte_idx: usize) -> Self {
128 |         let other = NodeText(self.0.split_off(byte_idx));
129 |         self.0.inline_if_possible();
130 |         other
131 |     }
132 | }
133 | 
134 | impl std::cmp::PartialEq for NodeText {
135 |     fn eq(&self, other: &Self) -> bool {
136 |         let (s1, s2): (&str, &str) = (self, other);
137 |         s1 == s2
138 |     }
139 | }
140 | 
141 | impl<'a> PartialEq<NodeText> for &'a str {
142 |     fn eq(&self, other: &NodeText) -> bool {
143 |         *self == (other as &str)
144 |     }
145 | }
146 | 
147 | impl<'a> PartialEq<&'a str> for NodeText {
148 |     fn eq(&self, other: &&'a str) -> bool {
149 |         (self as &str) == *other
150 |     }
151 | }
152 | 
153 | impl std::fmt::Display for NodeText {
154 |     fn fmt(&self, fm: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
155 |         NodeText::deref(self).fmt(fm)
156 |     }
157 | }
158 | 
159 | impl std::fmt::Debug for NodeText {
160 |     fn fmt(&self, fm: &mut std::fmt::Formatter) -> std::fmt::Result {
161 |         NodeText::deref(self).fmt(fm)
162 |     }
163 | }
164 | 
165 | impl<'a> From<&'a str> for NodeText {
166 |     fn from(s: &str) -> Self {
167 |         Self::from_str(s)
168 |     }
169 | }
170 | 
171 | impl Deref for NodeText {
172 |     type Target = str;
173 | 
174 |     fn deref(&self) -> &str {
175 |         self.0.as_str()
176 |     }
177 | }
178 | 
179 | impl AsRef<str> for NodeText {
180 |     fn as_ref(&self) -> &str {
181 |         self.0.as_str()
182 |     }
183 | }
184 | 
185 | impl Borrow<str> for NodeText {
186 |     fn borrow(&self) -> &str {
187 |         self.0.as_str()
188 |     }
189 | }
190 | 
191 | //=======================================================================
192 | 
193 | /// Takes two `NodeText`s and mends the CRLF break between them, if any.
194 | ///
195 | /// Note: this will leave one of the strings empty if the entire composite string
196 | /// is a single CRLF pair.
197 | pub(crate) fn fix_segment_seam(l: &mut NodeText, r: &mut NodeText) {
198 |     // Early out, if there's nothing to do.
199 |     if crlf::seam_is_break(l.as_bytes(), r.as_bytes()) {
200 |         return;
201 |     }
202 | 
203 |     let tot_len = l.len() + r.len();
204 | 
205 |     // Find the new split position, if any.
206 |     let new_split_pos = {
207 |         let l_split = crlf::prev_break(l.len(), l.as_bytes());
208 |         let r_split = l.len() + crlf::next_break(0, r.as_bytes());
209 |         if l_split != 0 && (r_split == tot_len || l.len() > r.len()) {
210 |             l_split
211 |         } else {
212 |             r_split
213 |         }
214 |     };
215 | 
216 |     // Move the bytes to create the new split
217 |     if new_split_pos < l.len() {
218 |         r.insert_str(0, &l[new_split_pos..]);
219 |         l.truncate(new_split_pos);
220 |     } else {
221 |         let pos = new_split_pos - l.len();
222 |         l.push_str(&r[..pos]);
223 |         r.truncate_front(pos);
224 |     }
225 | }
226 | 
227 | //=======================================================================
228 | 
229 | /// The unsafe guts of NodeText, exposed through a safe API.
230 | ///
231 | /// Try to keep this as small as possible, and implement functionality on
232 | /// NodeText via the safe APIs whenever possible.
233 | mod inner {
234 |     use crate::tree::MAX_BYTES;
235 |     use smallvec::{Array, SmallVec};
236 |     use std::str;
237 | 
238 |     /// The backing internal buffer type for `NodeText`.
239 |     #[derive(Copy, Clone)]
240 |     #[repr(transparent)]
241 |     struct BackingArray([u8; MAX_BYTES]);
242 | 
243 |     /// We need a very specific size of array, which is not necessarily
244 |     /// supported directly by the impls in the smallvec crate.  We therefore
245 |     /// have to implement this unsafe trait for our specific array size.
246 |     /// TODO: once integer const generics land, and smallvec updates its APIs
247 |     /// to use them, switch over and get rid of this unsafe impl.
248 |     unsafe impl Array for BackingArray {
249 |         type Item = u8;
250 |         fn size() -> usize {
251 |             MAX_BYTES
252 |         }
253 |     }
254 | 
255 |     /// Internal small string for `NodeText`.
256 |     #[derive(Clone, Default)]
257 |     #[repr(C)]
258 |     pub struct NodeSmallString {
259 |         buffer: SmallVec<BackingArray>,
260 |     }
261 | 
262 |     impl NodeSmallString {
263 |         #[inline(always)]
264 |         pub fn new() -> Self {
265 |             NodeSmallString {
266 |                 buffer: SmallVec::new(),
267 |             }
268 |         }
269 | 
270 |         #[inline(always)]
271 |         pub fn with_capacity(capacity: usize) -> Self {
272 |             NodeSmallString {
273 |                 buffer: SmallVec::with_capacity(capacity),
274 |             }
275 |         }
276 | 
277 |         #[inline(always)]
278 |         pub fn from_str(string: &str) -> Self {
279 |             let mut nodetext = NodeSmallString::with_capacity(string.len());
280 |             nodetext.insert_str(0, string);
281 |             nodetext
282 |         }
283 | 
284 |         #[inline(always)]
285 |         pub fn len(&self) -> usize {
286 |             self.buffer.len()
287 |         }
288 | 
289 |         #[inline(always)]
290 |         pub fn as_str(&self) -> &str {
291 |             // NodeSmallString's methods don't allow `buffer` to become invalid
292 |             // utf8, so this is safe.
293 |             unsafe { str::from_utf8_unchecked(self.buffer.as_ref()) }
294 |         }
295 | 
296 |         /// Inserts `string` at `byte_idx`.
297 |         ///
298 |         /// Panics on out-of-bounds or of `byte_idx` isn't a char boundary.
299 |         #[inline(always)]
300 |         pub fn insert_str(&mut self, byte_idx: usize, string: &str) {
301 |             assert!(self.as_str().is_char_boundary(byte_idx));
302 | 
303 |             // Copy bytes from `string` into the appropriate space in the
304 |             // buffer.
305 |             self.buffer.insert_from_slice(byte_idx, string.as_bytes());
306 |         }
307 | 
308 |         /// Removes text in range `[start_byte_idx, end_byte_idx)`
309 |         ///
310 |         /// Panics on out-of-bounds or non-char-boundary indices.
311 |         #[inline(always)]
312 |         pub fn remove_range(&mut self, start_byte_idx: usize, end_byte_idx: usize) {
313 |             assert!(start_byte_idx <= end_byte_idx);
314 |             // Already checked by copy_within/is_char_boundary.
315 |             debug_assert!(end_byte_idx <= self.len());
316 |             assert!(self.as_str().is_char_boundary(start_byte_idx));
317 |             assert!(self.as_str().is_char_boundary(end_byte_idx));
318 |             let len = self.len();
319 |             let amt = end_byte_idx - start_byte_idx;
320 | 
321 |             self.buffer.copy_within(end_byte_idx..len, start_byte_idx);
322 | 
323 |             self.buffer.truncate(len - amt);
324 |         }
325 | 
326 |         /// Removes text after `byte_idx`.
327 |         #[inline(always)]
328 |         pub fn truncate(&mut self, byte_idx: usize) {
329 |             // Already checked by is_char_boundary.
330 |             debug_assert!(byte_idx <= self.len());
331 |             assert!(self.as_str().is_char_boundary(byte_idx));
332 |             self.buffer.truncate(byte_idx);
333 |         }
334 | 
335 |         /// Splits at `byte_idx`, returning the right part and leaving the
336 |         /// left part in the original.
337 |         ///
338 |         /// Panics on out-of-bounds or of `byte_idx` isn't a char boundary.
339 |         #[inline(always)]
340 |         pub fn split_off(&mut self, byte_idx: usize) -> Self {
341 |             // Already checked by is_char_boundary.
342 |             debug_assert!(byte_idx <= self.len());
343 |             assert!(self.as_str().is_char_boundary(byte_idx));
344 |             let len = self.len();
345 |             let mut other = NodeSmallString::with_capacity(len - byte_idx);
346 |             other.buffer.extend_from_slice(&self.buffer[byte_idx..]);
347 |             self.buffer.truncate(byte_idx);
348 |             other
349 |         }
350 | 
351 |         /// Re-inlines the data if it's been heap allocated but can
352 |         /// fit inline.
353 |         #[inline(always)]
354 |         pub fn inline_if_possible(&mut self) {
355 |             if self.buffer.spilled() && (self.buffer.len() <= self.buffer.inline_size()) {
356 |                 self.buffer.shrink_to_fit();
357 |             }
358 |         }
359 |     }
360 | 
361 |     //-----------------------------------------------------------------------
362 | 
363 |     #[cfg(test)]
364 |     mod tests {
365 |         use super::*;
366 | 
367 |         #[test]
368 |         fn small_string_basics() {
369 |             let s = NodeSmallString::from_str("Hello!");
370 |             assert_eq!("Hello!", s.as_str());
371 |             assert_eq!(6, s.len());
372 |         }
373 | 
374 |         #[test]
375 |         fn insert_str_01() {
376 |             let mut s = NodeSmallString::from_str("Hello!");
377 |             s.insert_str(3, "oz");
378 |             assert_eq!("Helozlo!", s.as_str());
379 |         }
380 | 
381 |         #[test]
382 |         #[should_panic]
383 |         fn insert_str_02() {
384 |             let mut s = NodeSmallString::from_str("Hello!");
385 |             s.insert_str(7, "oz");
386 |         }
387 | 
388 |         #[test]
389 |         #[should_panic]
390 |         fn insert_str_03() {
391 |             let mut s = NodeSmallString::from_str("こんにちは");
392 |             s.insert_str(4, "oz");
393 |         }
394 | 
395 |         #[test]
396 |         fn remove_range_01() {
397 |             let mut s = NodeSmallString::from_str("Hello!");
398 |             s.remove_range(2, 4);
399 |             assert_eq!("Heo!", s.as_str());
400 |         }
401 | 
402 |         #[test]
403 |         #[should_panic]
404 |         fn remove_range_02() {
405 |             let mut s = NodeSmallString::from_str("Hello!");
406 |             s.remove_range(4, 2);
407 |         }
408 | 
409 |         #[test]
410 |         #[should_panic]
411 |         fn remove_range_03() {
412 |             let mut s = NodeSmallString::from_str("Hello!");
413 |             s.remove_range(2, 7);
414 |         }
415 | 
416 |         #[test]
417 |         #[should_panic]
418 |         fn remove_range_04() {
419 |             let mut s = NodeSmallString::from_str("こんにちは");
420 |             s.remove_range(2, 4);
421 |         }
422 | 
423 |         #[test]
424 |         fn truncate_01() {
425 |             let mut s = NodeSmallString::from_str("Hello!");
426 |             s.truncate(4);
427 |             assert_eq!("Hell", s.as_str());
428 |         }
429 | 
430 |         #[test]
431 |         #[should_panic]
432 |         fn truncate_02() {
433 |             let mut s = NodeSmallString::from_str("Hello!");
434 |             s.truncate(7);
435 |         }
436 | 
437 |         #[test]
438 |         #[should_panic]
439 |         fn truncate_03() {
440 |             let mut s = NodeSmallString::from_str("こんにちは");
441 |             s.truncate(4);
442 |         }
443 | 
444 |         #[test]
445 |         fn split_off_01() {
446 |             let mut s1 = NodeSmallString::from_str("Hello!");
447 |             let s2 = s1.split_off(4);
448 |             assert_eq!("Hell", s1.as_str());
449 |             assert_eq!("o!", s2.as_str());
450 |         }
451 | 
452 |         #[test]
453 |         #[should_panic]
454 |         fn split_off_02() {
455 |             let mut s1 = NodeSmallString::from_str("Hello!");
456 |             s1.split_off(7);
457 |         }
458 | 
459 |         #[test]
460 |         #[should_panic]
461 |         fn split_off_03() {
462 |             let mut s1 = NodeSmallString::from_str("こんにちは");
463 |             s1.split_off(4);
464 |         }
465 |     }
466 | }
467 | 


--------------------------------------------------------------------------------
/src/tree/text_info.rs:
--------------------------------------------------------------------------------
 1 | use std::ops::{Add, AddAssign, Sub, SubAssign};
 2 | 
 3 | use crate::str_utils::{count_chars, count_line_breaks, count_utf16_surrogates};
 4 | use crate::tree::Count;
 5 | 
 6 | #[derive(Debug, Copy, Clone, PartialEq)]
 7 | pub struct TextInfo {
 8 |     pub(crate) bytes: Count,
 9 |     pub(crate) chars: Count,
10 |     pub(crate) utf16_surrogates: Count,
11 |     pub(crate) line_breaks: Count,
12 | }
13 | 
14 | impl TextInfo {
15 |     #[inline]
16 |     pub fn new() -> TextInfo {
17 |         TextInfo {
18 |             bytes: 0,
19 |             chars: 0,
20 |             utf16_surrogates: 0,
21 |             line_breaks: 0,
22 |         }
23 |     }
24 | 
25 |     #[inline]
26 |     pub fn from_str(text: &str) -> TextInfo {
27 |         TextInfo {
28 |             bytes: text.len() as Count,
29 |             chars: count_chars(text) as Count,
30 |             utf16_surrogates: count_utf16_surrogates(text) as Count,
31 |             line_breaks: count_line_breaks(text) as Count,
32 |         }
33 |     }
34 | }
35 | 
36 | impl Add for TextInfo {
37 |     type Output = Self;
38 |     #[inline]
39 |     fn add(self, rhs: TextInfo) -> TextInfo {
40 |         TextInfo {
41 |             bytes: self.bytes + rhs.bytes,
42 |             chars: self.chars + rhs.chars,
43 |             utf16_surrogates: self.utf16_surrogates + rhs.utf16_surrogates,
44 |             line_breaks: self.line_breaks + rhs.line_breaks,
45 |         }
46 |     }
47 | }
48 | 
49 | impl AddAssign for TextInfo {
50 |     #[inline]
51 |     fn add_assign(&mut self, other: TextInfo) {
52 |         *self = *self + other;
53 |     }
54 | }
55 | 
56 | impl Sub for TextInfo {
57 |     type Output = Self;
58 |     #[inline]
59 |     fn sub(self, rhs: TextInfo) -> TextInfo {
60 |         TextInfo {
61 |             bytes: self.bytes - rhs.bytes,
62 |             chars: self.chars - rhs.chars,
63 |             utf16_surrogates: self.utf16_surrogates - rhs.utf16_surrogates,
64 |             line_breaks: self.line_breaks - rhs.line_breaks,
65 |         }
66 |     }
67 | }
68 | 
69 | impl SubAssign for TextInfo {
70 |     #[inline]
71 |     fn sub_assign(&mut self, other: TextInfo) {
72 |         *self = *self - other;
73 |     }
74 | }
75 | 


--------------------------------------------------------------------------------
/tests/clone_rope.rs:
--------------------------------------------------------------------------------
 1 | extern crate ropey;
 2 | 
 3 | use std::iter::Iterator;
 4 | 
 5 | use ropey::Rope;
 6 | 
 7 | const TEXT: &str = include_str!("test_text.txt");
 8 | 
 9 | #[test]
10 | #[cfg_attr(miri, ignore)]
11 | fn clone_rope() {
12 |     let mut rope1 = Rope::from_str(TEXT);
13 |     let mut rope2 = rope1.clone();
14 | 
15 |     // Do identical insertions into both ropes
16 |     rope1.insert(432, "Hello ");
17 |     rope1.insert(2345, "world! ");
18 |     rope1.insert(5256, "How are ");
19 |     rope1.insert(53, "you ");
20 |     rope1.insert(768, "doing?\r\n");
21 | 
22 |     rope2.insert(432, "Hello ");
23 |     rope2.insert(2345, "world! ");
24 |     rope2.insert(5256, "How are ");
25 |     rope2.insert(53, "you ");
26 |     rope2.insert(768, "doing?\r\n");
27 | 
28 |     // Make sure they match
29 |     let matches = Iterator::zip(rope1.chars(), rope2.chars())
30 |         .map(|(a, b)| a == b)
31 |         .all(|n| n);
32 |     assert!(matches);
33 | 
34 |     // Insert something into the clone, and make sure they don't match
35 |     // afterwards.
36 |     rope2.insert(3891, "I'm doing fine, thanks!");
37 |     let matches = Iterator::zip(rope1.chars(), rope2.chars())
38 |         .map(|(a, b)| a == b)
39 |         .all(|n| n);
40 |     assert!(!matches);
41 | }
42 | 


--------------------------------------------------------------------------------
/tests/clone_rope_to_thread.rs:
--------------------------------------------------------------------------------
 1 | extern crate ropey;
 2 | 
 3 | use std::sync::mpsc;
 4 | use std::thread;
 5 | 
 6 | use std::iter::Iterator;
 7 | 
 8 | use ropey::Rope;
 9 | 
10 | const TEXT: &str = include_str!("test_text.txt");
11 | 
12 | #[test]
13 | #[cfg_attr(miri, ignore)]
14 | fn clone_rope_to_thread() {
15 |     let mut rope1 = Rope::from_str(TEXT);
16 |     let rope2 = rope1.clone();
17 | 
18 |     // Spawn a thread for modifying the clone
19 |     let (tx1, rx1) = mpsc::channel::<Rope>();
20 |     let (tx2, rx2) = mpsc::channel::<Rope>();
21 |     thread::spawn(move || {
22 |         // Modify rope2
23 |         let mut rope = rx1.recv().unwrap();
24 |         rope.insert(432, "Hello ");
25 |         rope.insert(2345, "world! ");
26 |         rope.insert(5256, "How are ");
27 |         rope.insert(53, "you ");
28 |         rope.insert(768, "doing?\r\n");
29 | 
30 |         // Send it back
31 |         tx2.send(rope).unwrap();
32 | 
33 |         // Modify it again
34 |         let mut rope = rx1.recv().unwrap();
35 |         rope.insert(3891, "I'm doing fine, thanks!");
36 |         tx2.send(rope).unwrap();
37 |     });
38 | 
39 |     // Send the clone to the other thread for modification
40 |     tx1.send(rope2).unwrap();
41 | 
42 |     // Make identical modifications to rope1 as are being made
43 |     // to rope2 in the other thread.
44 |     rope1.insert(432, "Hello ");
45 |     rope1.insert(2345, "world! ");
46 |     rope1.insert(5256, "How are ");
47 |     rope1.insert(53, "you ");
48 |     rope1.insert(768, "doing?\r\n");
49 | 
50 |     // Get rope2 back and make sure they match
51 |     let rope2 = rx2.recv().unwrap();
52 |     let matches = Iterator::zip(rope1.chars(), rope2.chars())
53 |         .map(|(a, b)| a == b)
54 |         .all(|n| n);
55 |     assert!(matches);
56 | 
57 |     // Send rope2 to the other thread again for more modifications.
58 |     tx1.send(rope2).unwrap();
59 | 
60 |     // Get rope2 back again and make sure they don't match now.
61 |     let rope2 = rx2.recv().unwrap();
62 |     let matches = Iterator::zip(rope1.chars(), rope2.chars())
63 |         .map(|(a, b)| a == b)
64 |         .all(|n| n);
65 |     assert!(!matches);
66 | }
67 | 


--------------------------------------------------------------------------------
/tests/crlf.rs:
--------------------------------------------------------------------------------
 1 | //! Randomized tests to try to catch crlf seam errors.
 2 | 
 3 | extern crate rand;
 4 | extern crate ropey;
 5 | 
 6 | use rand::Rng;
 7 | use ropey::Rope;
 8 | 
 9 | #[test]
10 | #[cfg_attr(miri, ignore)]
11 | fn crlf_inserts() {
12 |     let mut rng = rand::thread_rng();
13 |     let mut tree = Rope::new();
14 | 
15 |     // Do a bunch of random incoherent inserts of CRLF
16 |     // pairs.
17 |     for _ in 0..(1 << 12) {
18 |         let len = tree.len_chars().max(1);
19 |         tree.insert(rng.gen::<usize>() % len, "\r\n\r\n");
20 |         tree.insert(rng.gen::<usize>() % len, "\n\r\n\r");
21 |         tree.insert(rng.gen::<usize>() % len, "\r\n\r\n");
22 |         tree.insert(rng.gen::<usize>() % len, "\n\r\n\r");
23 |         tree.insert(rng.gen::<usize>() % len, "\r\n\r\n");
24 |         tree.insert(rng.gen::<usize>() % len, "こんいちは、");
25 |         tree.insert(rng.gen::<usize>() % len, "\n\r\n\r");
26 |         tree.insert(rng.gen::<usize>() % len, "\r\n\r\n");
27 |         tree.insert(rng.gen::<usize>() % len, "\n\r\n\r");
28 |         tree.insert(rng.gen::<usize>() % len, "\r\n\r\n");
29 |         tree.insert(rng.gen::<usize>() % len, "\n\r\n\r");
30 |         tree.insert(rng.gen::<usize>() % len, "みんなさん！");
31 | 
32 |         // Make sure the tree is sound
33 |         tree.assert_invariants();
34 |     }
35 | }
36 | 
37 | #[test]
38 | #[cfg_attr(miri, ignore)]
39 | fn crlf_removals() {
40 |     let mut rng = rand::thread_rng();
41 |     let mut tree = Rope::new();
42 | 
43 |     // Build tree.
44 |     for _ in 0..(1 << 9) {
45 |         let len = tree.len_chars().max(1);
46 |         tree.insert(rng.gen::<usize>() % len, "\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\nこんいちは、\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\nこんいちは、r\n\r\n\r\n\r\nみんなさん！\n\r\n\r\n\r\nこんいちは、\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\nみんなさん！\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\r\n\r\n\r\n\r\n\r\n\r\nみんなさん！\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\rみんなさん！\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r");
47 |     }
48 | 
49 |     // Do a bunch of random incoherent removals
50 |     for _ in 0..(1 << 11) {
51 |         let start = rng.gen::<usize>() % tree.len_chars().max(1);
52 |         let end = (start + 5).min(tree.len_chars());
53 |         tree.remove(start..end);
54 | 
55 |         let start = rng.gen::<usize>() % tree.len_chars().max(1);
56 |         let end = (start + 9).min(tree.len_chars());
57 |         tree.remove(start..end);
58 | 
59 |         // Make sure the tree is sound
60 |         tree.assert_invariants();
61 |     }
62 | }
63 | 


--------------------------------------------------------------------------------
/tests/fix_tree.rs:
--------------------------------------------------------------------------------
 1 | extern crate ropey;
 2 | 
 3 | use ropey::Rope;
 4 | 
 5 | const MEDIUM_TEXT: &str = include_str!("medium.txt");
 6 | 
 7 | #[test]
 8 | #[cfg_attr(miri, ignore)]
 9 | fn remove_at_chunk_boundery() {
10 |     let mut r = Rope::from_str(MEDIUM_TEXT);
11 |     // remove exactly at a chunk boundry
12 |     // to trigger an edgecase in fix_tree_seam
13 |     r.remove(31354..58881);
14 | 
15 |     // Verify rope integrity
16 |     r.assert_integrity();
17 |     r.assert_invariants();
18 | }
19 | 


--------------------------------------------------------------------------------
/tests/from_reader.rs:
--------------------------------------------------------------------------------
 1 | extern crate rand;
 2 | extern crate ropey;
 3 | 
 4 | use std::io::Cursor;
 5 | 
 6 | use ropey::Rope;
 7 | 
 8 | const TEXT: &str = include_str!("test_text.txt");
 9 | 
10 | #[test]
11 | #[cfg_attr(miri, ignore)]
12 | fn from_reader_01() {
13 |     // Make a reader from our in-memory text
14 |     let text_reader = Cursor::new(TEXT);
15 | 
16 |     let rope = Rope::from_reader(text_reader).unwrap();
17 | 
18 |     assert_eq!(rope, TEXT);
19 | 
20 |     // Make sure the tree is sound
21 |     rope.assert_integrity();
22 |     rope.assert_invariants();
23 | }
24 | 
25 | #[test]
26 | #[cfg_attr(miri, ignore)]
27 | fn from_reader_02() {
28 |     // Make a reader from blank text
29 |     let text_reader = Cursor::new("");
30 | 
31 |     let rope = Rope::from_reader(text_reader).unwrap();
32 | 
33 |     assert_eq!(rope, "");
34 | 
35 |     // Make sure the tree is sound
36 |     rope.assert_integrity();
37 |     rope.assert_invariants();
38 | }
39 | 
40 | #[test]
41 | #[cfg_attr(miri, ignore)]
42 | fn from_reader_03() {
43 |     // Make text with a utf8-invalid byte sequence in it.
44 |     let mut text = Vec::new();
45 |     text.extend(TEXT.as_bytes());
46 |     text[6132] = 0b1100_0000;
47 |     text[6133] = 0b0100_0000;
48 | 
49 |     // Make a reader from the invalid data
50 |     let text_reader = Cursor::new(text);
51 | 
52 |     // Try to read the data, and verify that we get the right error.
53 |     if let Err(e) = Rope::from_reader(text_reader) {
54 |         assert_eq!(e.kind(), std::io::ErrorKind::InvalidData);
55 |     } else {
56 |         panic!("Should have returned an invalid data error.")
57 |     }
58 | }
59 | 


--------------------------------------------------------------------------------
/tests/from_str.rs:
--------------------------------------------------------------------------------
 1 | extern crate ropey;
 2 | 
 3 | use ropey::Rope;
 4 | 
 5 | const TEXT: &str = include_str!("test_text.txt");
 6 | 
 7 | #[test]
 8 | #[cfg_attr(miri, ignore)]
 9 | fn from_str() {
10 |     // Build rope from file contents
11 |     let rope = Rope::from_str(TEXT);
12 | 
13 |     // Verify rope integrity
14 |     rope.assert_integrity();
15 |     rope.assert_invariants();
16 | 
17 |     // Verify that they match
18 |     assert_eq!(rope, TEXT);
19 | }
20 | 


--------------------------------------------------------------------------------
/tests/hash.rs:
--------------------------------------------------------------------------------
  1 | extern crate ropey;
  2 | 
  3 | use std::hash::{Hash, Hasher};
  4 | 
  5 | use ropey::RopeBuilder;
  6 | 
  7 | const SMALL_TEXT: &str = include_str!("small_ascii.txt");
  8 | 
  9 | /// This is an example `Hasher` to demonstrate a property guaranteed by
 10 | /// the documentation that is not exploited by the default `Hasher` (SipHash)
 11 | /// Relevant excerpt from the `Hasher` documentation:
 12 | /// > Nor can you assume that adjacent
 13 | /// > `write` calls are merged, so it's possible, for example, that
 14 | /// > ```
 15 | /// > # fn foo(hasher: &mut impl std::hash::Hasher) {
 16 | /// > hasher.write(&[1, 2]);
 17 | /// > hasher.write(&[3, 4, 5, 6]);
 18 | /// > # }
 19 | /// > ```
 20 | /// > and
 21 | /// > ```
 22 | /// > # fn foo(hasher: &mut impl std::hash::Hasher) {
 23 | /// > hasher.write(&[1, 2, 3, 4]);
 24 | /// > hasher.write(&[5, 6]);
 25 | /// > # }
 26 | /// > ```
 27 | /// > end up producing different hashes.
 28 | ///
 29 | /// This dummy hasher simply collects all bytes and inserts a separator byte (0xFF) at the end of `write`.
 30 | /// While this hasher might seem a little silly, it is perfectly inline with the std documentation.
 31 | /// Many other commonly used high performance `Hasher`s (fxhash, ahash, fnvhash) exploit the same property
 32 | /// to improve the performance of `write`, so violating this property will cause issues in practice.
 33 | #[derive(Default)]
 34 | struct TestHasher(std::collections::hash_map::DefaultHasher);
 35 | impl Hasher for TestHasher {
 36 |     fn finish(&self) -> u64 {
 37 |         self.0.finish()
 38 |     }
 39 | 
 40 |     fn write(&mut self, bytes: &[u8]) {
 41 |         self.0.write(bytes);
 42 |         self.0.write_u8(0xFF);
 43 |     }
 44 | }
 45 | 
 46 | #[test]
 47 | #[cfg_attr(miri, ignore)]
 48 | fn hash_1() {
 49 |     // Build two ropes with the same contents but different chunk boundaries.
 50 |     let r1 = {
 51 |         let mut b = RopeBuilder::new();
 52 |         b._append_chunk("Hello w");
 53 |         b._append_chunk("orld");
 54 |         b._finish_no_fix()
 55 |     };
 56 |     let r2 = {
 57 |         let mut b = RopeBuilder::new();
 58 |         b._append_chunk("Hell");
 59 |         b._append_chunk("o world");
 60 |         b._finish_no_fix()
 61 |     };
 62 | 
 63 |     let mut hasher1 = TestHasher::default();
 64 |     let mut hasher2 = TestHasher::default();
 65 |     r1.hash(&mut hasher1);
 66 |     r2.hash(&mut hasher2);
 67 | 
 68 |     assert_eq!(hasher1.finish(), hasher2.finish());
 69 | }
 70 | 
 71 | #[test]
 72 | #[cfg_attr(miri, ignore)]
 73 | fn hash_2() {
 74 |     // Build two ropes with the same contents but different chunk boundaries.
 75 |     let r1 = {
 76 |         let mut b = RopeBuilder::new();
 77 |         for chunk in SMALL_TEXT.as_bytes().chunks(5) {
 78 |             b._append_chunk(std::str::from_utf8(chunk).unwrap());
 79 |         }
 80 |         b._finish_no_fix()
 81 |     };
 82 |     let r2 = {
 83 |         let mut b = RopeBuilder::new();
 84 |         for chunk in SMALL_TEXT.as_bytes().chunks(7) {
 85 |             b._append_chunk(std::str::from_utf8(chunk).unwrap());
 86 |         }
 87 |         b._finish_no_fix()
 88 |     };
 89 | 
 90 |     for (l1, l2) in r1.lines().zip(r2.lines()) {
 91 |         let mut hasher1 = TestHasher::default();
 92 |         let mut hasher2 = TestHasher::default();
 93 |         l1.hash(&mut hasher1);
 94 |         l2.hash(&mut hasher2);
 95 | 
 96 |         assert_eq!(hasher1.finish(), hasher2.finish());
 97 |     }
 98 | }
 99 | 
100 | #[test]
101 | #[cfg_attr(miri, ignore)]
102 | fn hash_3() {
103 |     // Build two ropes with the same contents but different chunk boundaries.
104 |     let r1 = {
105 |         let mut b = RopeBuilder::new();
106 |         for chunk in SMALL_TEXT.as_bytes().chunks(521) {
107 |             b._append_chunk(std::str::from_utf8(chunk).unwrap());
108 |         }
109 |         b._finish_no_fix()
110 |     };
111 |     let r2 = {
112 |         let mut b = RopeBuilder::new();
113 |         for chunk in SMALL_TEXT.as_bytes().chunks(547) {
114 |             b._append_chunk(std::str::from_utf8(chunk).unwrap());
115 |         }
116 |         b._finish_no_fix()
117 |     };
118 | 
119 |     let mut hasher1 = TestHasher::default();
120 |     let mut hasher2 = TestHasher::default();
121 |     r1.hash(&mut hasher1);
122 |     r2.hash(&mut hasher2);
123 | 
124 |     assert_eq!(hasher1.finish(), hasher2.finish());
125 | }
126 | 


--------------------------------------------------------------------------------
/tests/lifetimes.rs:
--------------------------------------------------------------------------------
 1 | //! This test file ensures that all of the lifetimes work the way we
 2 | //! want, and that there are no regressions. It's a "does this compile?"
 3 | //! test.
 4 | 
 5 | extern crate ropey;
 6 | 
 7 | use ropey::{Rope, RopeSlice};
 8 | 
 9 | const TEXT: &str = include_str!("test_text.txt");
10 | 
11 | fn main() {
12 |     if cfg!(miri) {
13 |         return;
14 |     }
15 | 
16 |     let rope = Rope::from_str(TEXT);
17 | 
18 |     let (a, b, c, d, e, f, g, count, line, string) = {
19 |         // The lifetimes of intermediate slices shouldn't matter.  The
20 |         // lifetimes of the things produced by the calls below should be
21 |         // tied to the lifetime of the original rope, not the lifetimes of
22 |         // the slices they were created from.  Therefore, this should all
23 |         // compile.
24 | 
25 |         let a = rope.slice(4..500).slice(4..400).slice(4..300);
26 |         let b = rope.slice(4..500).slice(4..400).as_str();
27 |         let c = rope.slice(4..500).slice(4..400).line(1);
28 |         let d = rope.line(1).slice(4..20).slice(4..10);
29 |         let e = rope.slice(4..500).slice(4..400).chunk_at_byte(50);
30 |         let f = rope.slice(4..500).slice(4..400).chunk_at_char(50);
31 |         let g = rope.slice(4..500).slice(4..400).chunk_at_line_break(3);
32 | 
33 |         // Same for iterators.  In addition, the items _yielded_ by the
34 |         // iterators should also be tied to the lifetime of the original
35 |         // rope, not to the iterators or slices they came from.
36 | 
37 |         let mut count = 0;
38 |         for _ in rope.slice(4..500).slice(4..400).bytes() {
39 |             count += 1;
40 |         }
41 |         for _ in rope.slice(4..500).slice(4..400).chars() {
42 |             count += 1;
43 |         }
44 | 
45 |         let mut line: RopeSlice = "".into();
46 |         for l in rope.slice(4..500).slice(4..400).lines() {
47 |             line = l;
48 |         }
49 |         line = line.slice(..).slice(..);
50 | 
51 |         let mut string = "";
52 |         for c in rope.slice(4..500).slice(4..400).chunks() {
53 |             string = c;
54 |         }
55 | 
56 |         (a, b, c, d, e, f, g, count, line, string)
57 |     };
58 | 
59 |     println!(
60 |         "{} {:?} {} {} {:?} {:?} {:?} {} {} {}",
61 |         a, b, c, d, e, f, g, count, line, string
62 |     );
63 | }
64 | 


--------------------------------------------------------------------------------
/tests/non_ascii.txt:
--------------------------------------------------------------------------------
 1 | _____________
 2 | 
 3 | ______________ㅇ_________ㅇㅇㅇ____ㅇㅇㅇㅇㅇㅇㅇㅇㅇㅇㅇㅇㅇㅇㅇㅇㅇㅇ__________
 4 | ________________
 5 | _____________________________________________
 6 | ____________________________________________
 7 | _______________
 8 | _____________________________________
 9 | ____________________________________________________________________
10 | ______________________
11 | ________________________
12 | _________________________________________________
13 | ______________________________________________________________
14 | ____________________
15 | ______________________________________________
16 | _________________
17 | ________________
18 | 
19 | __________________________________________________________
20 | ____________________________________________________
21 | _______________
22 | 
23 | ________________________________________________
24 | ______________________
25 | ________________________
26 | ________________________________________________________________________
27 | ______________________________
28 | ____________________
29 | __________________________________________
30 | __________________
31 | ____________________________________________
32 | ________________
33 | 
34 | __________________
35 | _____________________
36 | _____________________________________________________
37 | ____________________________________________
38 | _________________
39 | ___________________________________________
40 | __________________________________________
41 | ___________________________________________________
42 | ___________________________________________________________________
43 | _______________________
44 | _______________
45 | _____________
46 | 
47 | ______________ㅇㅇㅇㅇㅇㅇ____ㅇ_ㅇㅇㅇ____
48 | ______________ㅇㅇㅇㅇ_____________ㅇㅇㅇㅇㅇㅇㅇㅇ____
49 | ____________________________________
50 | ________________________________________________________________________________
51 | ______________________________________
52 | ______________
53 | 
54 | ______________ㅇㅇㅇㅇ_______ㅇㅇㅇㅇㅇㅇ_______ㅇㅇ_____________ㅇㅇㅇㅇㅇ____
55 | ________________________________
56 | __________________________________________________________________
57 | _________________________
58 | ______________
59 | 
60 | __________________________________________________________________
61 | _______________________________________________
62 | 
63 | ______________________________________
64 | ________________________________________________
65 | 
66 | _______________________________________________
67 | __________________________________________________________________
68 | ___________
69 | _________
70 | _______________________
71 | ________________________________________________
72 | _______
73 | ______
74 | 
75 | ________________________________
76 | _______________________________________
77 | ______
78 | 


--------------------------------------------------------------------------------
/tests/non_ascii_comparison.rs:
--------------------------------------------------------------------------------
 1 | extern crate ropey;
 2 | 
 3 | use ropey::Rope;
 4 | 
 5 | const TEXT1: &str = include_str!("non_ascii.txt");
 6 | 
 7 | #[test]
 8 | #[allow(clippy::cmp_owned)]
 9 | #[cfg_attr(miri, ignore)]
10 | fn non_ascii_eq() {
11 |     // Build rope from file contents
12 |     let rope1 = Rope::from_str(TEXT1);
13 | 
14 |     let mut rope2 = Rope::from_str(TEXT1);
15 |     rope2.remove(1467..1827);
16 |     for line1 in rope1.lines() {
17 |         for line2 in rope2.lines() {
18 |             println!("lines1: {line1} line2: {line2}");
19 |             println!("{}", line1.to_string() == line2);
20 |             println!("{}", line1 == line2);
21 |         }
22 |     }
23 | }
24 | 
25 | #[test]
26 | #[cfg_attr(miri, ignore)]
27 | fn non_ascii_ord() {
28 |     // Build rope from file contents
29 |     let rope1 = Rope::from_str(TEXT1);
30 | 
31 |     let mut rope2 = Rope::from_str(TEXT1);
32 |     rope2.remove(1467..1827);
33 |     for line1 in rope1.lines() {
34 |         for line2 in rope2.lines() {
35 |             println!("lines1: {line1} line2: {line2}");
36 |             println!("{:?}", line2.partial_cmp(&line1));
37 |         }
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/tests/proptest_tests.proptest-regressions:
--------------------------------------------------------------------------------
 1 | # Seeds for failure cases proptest has generated in the past. It is
 2 | # automatically read and these particular cases re-run before any
 3 | # novel cases are generated.
 4 | #
 5 | # It is recommended to check this file in to source control so that
 6 | # everyone who runs the test benefits from these saved cases.
 7 | xs 3540557325 1860947506 820813434 2332024384 # shrinks to ref char_idxs = []
 8 | xs 3233894212 2188982698 1827697686 2168070367 # shrinks to ref char_idxs = [0]
 9 | xs 3424326027 3025228200 341421083 1156727702 # shrinks to ref text = "0𑊏טּ𑰀®00𑙐AA 🢐0Aⶠ🇦  A0A ຍ \u{11da0} ", idx = 80
10 | xs 3459659596 2914190641 2301470235 1246147682 # shrinks to ref char_idxs = []
11 | xs 1766171267 3090483113 1561813983 1680450853 # shrinks to idx1 = 0, idx2 = 18268
12 | xs 3414034384 4148684142 2271524827 1599993371 # shrinks to idx = 11953
13 | cc 4eb27f71f7d1f5c5c97eaaac4b74805ef091f7205b89fe8fee4eb666a7072913 # shrinks to range = (16564, 17055)
14 | cc c263569742fedaf3cdfee5398cbe1c0d761179ff67d54984695e17b8ac601e12 # shrinks to ref text = "\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n", idx = 0
15 | cc 92cbac03b8c705ff1c62e10d689efaee7363e8ca27da4dee474f80e9e9fdb7cc # shrinks to char_idx = 17630, ref ins_text = "A"
16 | cc 530b4c5540b55e91808e710667a15d18081849b99ce32f42aa5c0739a362c7d0 # shrinks to idx = 10494
17 | cc 3f41757544c527c968ee0bd9c6941025b092acab605a2d228176209af312862e # shrinks to idx = 16928
18 | 


--------------------------------------------------------------------------------
/tests/shrink_to_fit.rs:
--------------------------------------------------------------------------------
 1 | extern crate rand;
 2 | extern crate ropey;
 3 | 
 4 | use rand::Rng;
 5 | use ropey::Rope;
 6 | 
 7 | #[test]
 8 | #[cfg_attr(miri, ignore)]
 9 | fn shrink_to_fit() {
10 |     let mut rng = rand::thread_rng();
11 |     let mut rope = Rope::new();
12 | 
13 |     // Do a bunch of random incoherent inserts
14 |     for _ in 0..(1 << 12) {
15 |         let len = rope.len_chars().max(1);
16 |         rope.insert(rng.gen::<usize>() % len, "Hello ");
17 |         rope.insert(rng.gen::<usize>() % len, "world! ");
18 |         rope.insert(rng.gen::<usize>() % len, "How are ");
19 |         rope.insert(rng.gen::<usize>() % len, "you ");
20 |         rope.insert(rng.gen::<usize>() % len, "doing?\r\n");
21 |         rope.insert(rng.gen::<usize>() % len, "Let's ");
22 |         rope.insert(rng.gen::<usize>() % len, "keep ");
23 |         rope.insert(rng.gen::<usize>() % len, "inserting ");
24 |         rope.insert(rng.gen::<usize>() % len, "more ");
25 |         rope.insert(rng.gen::<usize>() % len, "items.\r\n");
26 |         rope.insert(rng.gen::<usize>() % len, "こんいちは、");
27 |         rope.insert(rng.gen::<usize>() % len, "みんなさん！");
28 |     }
29 | 
30 |     let rope2 = rope.clone();
31 |     rope.shrink_to_fit();
32 | 
33 |     assert_eq!(rope, rope2);
34 |     assert!(rope.capacity() < rope2.capacity());
35 | 
36 |     // Make sure the rope is sound
37 |     rope.assert_integrity();
38 |     rope.assert_invariants();
39 | 
40 |     rope2.assert_integrity();
41 |     rope2.assert_invariants();
42 | }
43 | 


--------------------------------------------------------------------------------
/tests/small_ascii.txt:
--------------------------------------------------------------------------------
 1 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas sit amet tellus 
 2 | nec turpis feugiat semper. Nam at nulla laoreet, finibus eros sit amet, fringilla 
 3 | mauris. Fusce vestibulum nec ligula efficitur laoreet. Nunc orci leo, varius eget 
 4 | ligula vulputate, consequat eleifend nisi. Cras justo purus, imperdiet a augue 
 5 | malesuada, convallis cursus libero. Fusce pretium arcu in elementum laoreet. Duis 
 6 | mauris nulla, suscipit at est nec, malesuada pellentesque eros. Quisque semper porta 
 7 | malesuada. Nunc hendrerit est ac faucibus mollis. Nam fermentum id libero sed 
 8 | egestas. Duis a accumsan sapien. Nam neque diam, congue non erat et, porta sagittis 
 9 | turpis. Vivamus vitae mauris sit amet massa mollis molestie. Morbi scelerisque, 
10 | augue id congue imperdiet, felis lacus euismod dui, vitae facilisis massa dui quis 
11 | sapien. Vivamus hendrerit a urna a lobortis.
12 | 
13 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas sit amet tellus 
14 | nec turpis feugiat semper. Nam at nulla laoreet, finibus eros sit amet, fringilla 
15 | mauris. Fusce vestibulum nec ligula efficitur laoreet. Nunc orci leo, varius eget 
16 | ligula vulputate, consequat eleifend nisi. Cras justo purus, imperdiet a augue 
17 | malesuada, convallis cursus libero. Fusce pretium arcu in elementum laoreet. Duis 
18 | mauris nulla, suscipit at est nec, malesuada pellentesque eros. Quisque semper porta 
19 | malesuada. Nunc hendrerit est ac faucibus mollis. Nam fermentum id libero sed 
20 | egestas. Duis a accumsan sapien. Nam neque diam, congue non erat et, porta sagittis 
21 | turpis. Vivamus vitae mauris sit amet massa mollis molestie. Morbi scelerisque, 
22 | augue id congue imperdiet, felis lacus euismod dui, vitae facilisis massa dui quis 
23 | sapien. Vivamus hendrerit a urna a lobortis.
24 | 
25 | 


--------------------------------------------------------------------------------
/tests/small_random_inserts.rs:
--------------------------------------------------------------------------------
 1 | extern crate rand;
 2 | extern crate ropey;
 3 | 
 4 | use rand::Rng;
 5 | use ropey::Rope;
 6 | 
 7 | #[test]
 8 | #[cfg_attr(miri, ignore)]
 9 | fn small_random_inserts() {
10 |     let mut rng = rand::thread_rng();
11 |     let mut tree = Rope::new();
12 | 
13 |     // Do a bunch of random incoherent inserts
14 |     for _ in 0..(1 << 10) {
15 |         let len = tree.len_chars().max(1);
16 |         tree.insert(rng.gen::<usize>() % len, "Hello ");
17 |         tree.insert(rng.gen::<usize>() % len, "world! ");
18 |         tree.insert(rng.gen::<usize>() % len, "How are ");
19 |         tree.insert(rng.gen::<usize>() % len, "you ");
20 |         tree.insert(rng.gen::<usize>() % len, "doing?\r\n");
21 |         tree.insert(rng.gen::<usize>() % len, "Let's ");
22 |         tree.insert(rng.gen::<usize>() % len, "keep ");
23 |         tree.insert(rng.gen::<usize>() % len, "inserting ");
24 |         tree.insert(rng.gen::<usize>() % len, "more ");
25 |         tree.insert(rng.gen::<usize>() % len, "items.\r\n");
26 |         tree.insert(rng.gen::<usize>() % len, "こんいちは、");
27 |         tree.insert(rng.gen::<usize>() % len, "みんなさん！");
28 |     }
29 | 
30 |     // Make sure the tree is sound
31 |     tree.assert_integrity();
32 |     tree.assert_invariants();
33 | }
34 | 


--------------------------------------------------------------------------------