├── .gitignore
├── .travis.yml
├── Cargo.toml
├── LICENSE
├── README.md
├── build.rs
├── src
    ├── grapheme.rs
    ├── lib.rs
    └── util.rs
└── update-docs.py


/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | Cargo.lock
3 | .settings
4 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: rust
 2 | rust:
 3 |   - 1.1.0
 4 |   - 1.2.0
 5 |   - 1.3.0
 6 |   - 1.4.0
 7 |   - 1.5.0
 8 |   - 1.6.0
 9 |   - 1.7.0
10 |   - stable
11 |   - beta
12 |   - nightly
13 | matrix:
14 |   allow_failures:
15 |     - rust: nightly
16 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "strcursor"
 3 | version = "0.2.4"
 4 | authors = ["Daniel Keep <daniel.keep@gmail.com>"]
 5 | 
 6 | description = "Provides a string cursor type for seeking through a string whilst respecting grapheme cluster and code point boundaries."
 7 | repository = "https://github.com/DanielKeep/strcursor"
 8 | documentation = "https://danielkeep.github.io/strcursor/doc/strcursor/index.html"
 9 | readme = "README.md"
10 | keywords = ["str", "string", "cursor", "grapheme", "unicode"]
11 | license = "MIT/Apache-2.0"
12 | 
13 | build = "build.rs"
14 | 
15 | exclude = [
16 |     "update-docs.py",
17 | ]
18 | 
19 | [dependencies]
20 | # 0.1.3 breaks semver
21 | unicode-segmentation = "0.1.0, <0.1.3"
22 | 
23 | [build-dependencies]
24 | rustc_version = "0.1.4"
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Copyright ⓒ 2015, 2016 Daniel Keep.
  2 | 
  3 | Licensed under either of:
  4 | 
  5 | * MIT license, or
  6 | * Apache License, Version 2.0
  7 | 
  8 | at your option.
  9 | 
 10 | Unless you explicitly state otherwise, any contribution intentionally
 11 | submitted for inclusion in the work by you shall be dual licensed as
 12 | above, without any additional terms or conditions.
 13 | 
 14 | # MIT License
 15 | 
 16 | Permission is hereby granted, free of charge, to any person obtaining
 17 | a copy of this software and associated documentation files (the
 18 | "Software"), to deal in the Software without restriction, including
 19 | without limitation the rights to use, copy, modify, merge, publish,
 20 | distribute, sublicense, and/or sell copies of the Software, and to
 21 | permit persons to whom the Software is furnished to do so, subject
 22 | to the following conditions:
 23 | 
 24 | The above copyright notice and this permission notice shall be included
 25 | in all copies or substantial portions of the Software.
 26 | 
 27 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 28 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 29 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 30 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 31 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 32 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 33 | OTHER DEALINGS IN THE SOFTWARE.
 34 | 
 35 | # Apache License, Version 2.0
 36 | 
 37 |                               Apache License
 38 |                         Version 2.0, January 2004
 39 |                      http://www.apache.org/licenses/
 40 | 
 41 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
 42 | 
 43 | 1. Definitions.
 44 | 
 45 |    "License" shall mean the terms and conditions for use, reproduction,
 46 |    and distribution as defined by Sections 1 through 9 of this document.
 47 | 
 48 |    "Licensor" shall mean the copyright owner or entity authorized by
 49 |    the copyright owner that is granting the License.
 50 | 
 51 |    "Legal Entity" shall mean the union of the acting entity and all
 52 |    other entities that control, are controlled by, or are under common
 53 |    control with that entity. For the purposes of this definition,
 54 |    "control" means (i) the power, direct or indirect, to cause the
 55 |    direction or management of such entity, whether by contract or
 56 |    otherwise, or (ii) ownership of fifty percent (50%) or more of the
 57 |    outstanding shares, or (iii) beneficial ownership of such entity.
 58 | 
 59 |    "You" (or "Your") shall mean an individual or Legal Entity
 60 |    exercising permissions granted by this License.
 61 | 
 62 |    "Source" form shall mean the preferred form for making modifications,
 63 |    including but not limited to software source code, documentation
 64 |    source, and configuration files.
 65 | 
 66 |    "Object" form shall mean any form resulting from mechanical
 67 |    transformation or translation of a Source form, including but
 68 |    not limited to compiled object code, generated documentation,
 69 |    and conversions to other media types.
 70 | 
 71 |    "Work" shall mean the work of authorship, whether in Source or
 72 |    Object form, made available under the License, as indicated by a
 73 |    copyright notice that is included in or attached to the work
 74 |    (an example is provided in the Appendix below).
 75 | 
 76 |    "Derivative Works" shall mean any work, whether in Source or Object
 77 |    form, that is based on (or derived from) the Work and for which the
 78 |    editorial revisions, annotations, elaborations, or other modifications
 79 |    represent, as a whole, an original work of authorship. For the purposes
 80 |    of this License, Derivative Works shall not include works that remain
 81 |    separable from, or merely link (or bind by name) to the interfaces of,
 82 |    the Work and Derivative Works thereof.
 83 | 
 84 |    "Contribution" shall mean any work of authorship, including
 85 |    the original version of the Work and any modifications or additions
 86 |    to that Work or Derivative Works thereof, that is intentionally
 87 |    submitted to Licensor for inclusion in the Work by the copyright owner
 88 |    or by an individual or Legal Entity authorized to submit on behalf of
 89 |    the copyright owner. For the purposes of this definition, "submitted"
 90 |    means any form of electronic, verbal, or written communication sent
 91 |    to the Licensor or its representatives, including but not limited to
 92 |    communication on electronic mailing lists, source code control systems,
 93 |    and issue tracking systems that are managed by, or on behalf of, the
 94 |    Licensor for the purpose of discussing and improving the Work, but
 95 |    excluding communication that is conspicuously marked or otherwise
 96 |    designated in writing by the copyright owner as "Not a Contribution."
 97 | 
 98 |    "Contributor" shall mean Licensor and any individual or Legal Entity
 99 |    on behalf of whom a Contribution has been received by Licensor and
100 |    subsequently incorporated within the Work.
101 | 
102 | 2. Grant of Copyright License. Subject to the terms and conditions of
103 |    this License, each Contributor hereby grants to You a perpetual,
104 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
105 |    copyright license to reproduce, prepare Derivative Works of,
106 |    publicly display, publicly perform, sublicense, and distribute the
107 |    Work and such Derivative Works in Source or Object form.
108 | 
109 | 3. Grant of Patent License. Subject to the terms and conditions of
110 |    this License, each Contributor hereby grants to You a perpetual,
111 |    worldwide, non-exclusive, no-charge, royalty-free, irrevocable
112 |    (except as stated in this section) patent license to make, have made,
113 |    use, offer to sell, sell, import, and otherwise transfer the Work,
114 |    where such license applies only to those patent claims licensable
115 |    by such Contributor that are necessarily infringed by their
116 |    Contribution(s) alone or by combination of their Contribution(s)
117 |    with the Work to which such Contribution(s) was submitted. If You
118 |    institute patent litigation against any entity (including a
119 |    cross-claim or counterclaim in a lawsuit) alleging that the Work
120 |    or a Contribution incorporated within the Work constitutes direct
121 |    or contributory patent infringement, then any patent licenses
122 |    granted to You under this License for that Work shall terminate
123 |    as of the date such litigation is filed.
124 | 
125 | 4. Redistribution. You may reproduce and distribute copies of the
126 |    Work or Derivative Works thereof in any medium, with or without
127 |    modifications, and in Source or Object form, provided that You
128 |    meet the following conditions:
129 | 
130 |    (a) You must give any other recipients of the Work or
131 |        Derivative Works a copy of this License; and
132 | 
133 |    (b) You must cause any modified files to carry prominent notices
134 |        stating that You changed the files; and
135 | 
136 |    (c) You must retain, in the Source form of any Derivative Works
137 |        that You distribute, all copyright, patent, trademark, and
138 |        attribution notices from the Source form of the Work,
139 |        excluding those notices that do not pertain to any part of
140 |        the Derivative Works; and
141 | 
142 |    (d) If the Work includes a "NOTICE" text file as part of its
143 |        distribution, then any Derivative Works that You distribute must
144 |        include a readable copy of the attribution notices contained
145 |        within such NOTICE file, excluding those notices that do not
146 |        pertain to any part of the Derivative Works, in at least one
147 |        of the following places: within a NOTICE text file distributed
148 |        as part of the Derivative Works; within the Source form or
149 |        documentation, if provided along with the Derivative Works; or,
150 |        within a display generated by the Derivative Works, if and
151 |        wherever such third-party notices normally appear. The contents
152 |        of the NOTICE file are for informational purposes only and
153 |        do not modify the License. You may add Your own attribution
154 |        notices within Derivative Works that You distribute, alongside
155 |        or as an addendum to the NOTICE text from the Work, provided
156 |        that such additional attribution notices cannot be construed
157 |        as modifying the License.
158 | 
159 |    You may add Your own copyright statement to Your modifications and
160 |    may provide additional or different license terms and conditions
161 |    for use, reproduction, or distribution of Your modifications, or
162 |    for any such Derivative Works as a whole, provided Your use,
163 |    reproduction, and distribution of the Work otherwise complies with
164 |    the conditions stated in this License.
165 | 
166 | 5. Submission of Contributions. Unless You explicitly state otherwise,
167 |    any Contribution intentionally submitted for inclusion in the Work
168 |    by You to the Licensor shall be under the terms and conditions of
169 |    this License, without any additional terms or conditions.
170 |    Notwithstanding the above, nothing herein shall supersede or modify
171 |    the terms of any separate license agreement you may have executed
172 |    with Licensor regarding such Contributions.
173 | 
174 | 6. Trademarks. This License does not grant permission to use the trade
175 |    names, trademarks, service marks, or product names of the Licensor,
176 |    except as required for reasonable and customary use in describing the
177 |    origin of the Work and reproducing the content of the NOTICE file.
178 | 
179 | 7. Disclaimer of Warranty. Unless required by applicable law or
180 |    agreed to in writing, Licensor provides the Work (and each
181 |    Contributor provides its Contributions) on an "AS IS" BASIS,
182 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
183 |    implied, including, without limitation, any warranties or conditions
184 |    of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
185 |    PARTICULAR PURPOSE. You are solely responsible for determining the
186 |    appropriateness of using or redistributing the Work and assume any
187 |    risks associated with Your exercise of permissions under this License.
188 | 
189 | 8. Limitation of Liability. In no event and under no legal theory,
190 |    whether in tort (including negligence), contract, or otherwise,
191 |    unless required by applicable law (such as deliberate and grossly
192 |    negligent acts) or agreed to in writing, shall any Contributor be
193 |    liable to You for damages, including any direct, indirect, special,
194 |    incidental, or consequential damages of any character arising as a
195 |    result of this License or out of the use or inability to use the
196 |    Work (including but not limited to damages for loss of goodwill,
197 |    work stoppage, computer failure or malfunction, or any and all
198 |    other commercial damages or losses), even if such Contributor
199 |    has been advised of the possibility of such damages.
200 | 
201 | 9. Accepting Warranty or Additional Liability. While redistributing
202 |    the Work or Derivative Works thereof, You may choose to offer,
203 |    and charge a fee for, acceptance of support, warranty, indemnity,
204 |    or other liability obligations and/or rights consistent with this
205 |    License. However, in accepting such obligations, You may act only
206 |    on Your own behalf and on Your sole responsibility, not on behalf
207 |    of any other Contributor, and only if You agree to indemnify,
208 |    defend, and hold each Contributor harmless for any liability
209 |    incurred by, or claims asserted against, such Contributor by reason
210 |    of your accepting any such warranty or additional liability.
211 | 
212 | END OF TERMS AND CONDITIONS
213 | 
214 | APPENDIX: How to apply the Apache License to your work.
215 | 
216 |    To apply the Apache License to your work, attach the following
217 |    boilerplate notice, with the fields enclosed by brackets "[]"
218 |    replaced with your own identifying information. (Don't include
219 |    the brackets!)  The text should be enclosed in the appropriate
220 |    comment syntax for the file format. We also recommend that a
221 |    file or class name and description of purpose be included on the
222 |    same "printed page" as the copyright notice for easier
223 |    identification within third-party archives.
224 | 
225 | Copyright [yyyy] [name of copyright owner]
226 | 
227 | Licensed under the Apache License, Version 2.0 (the "License");
228 | you may not use this file except in compliance with the License.
229 | You may obtain a copy of the License at
230 | 
231 |     http://www.apache.org/licenses/LICENSE-2.0
232 | 
233 | Unless required by applicable law or agreed to in writing, software
234 | distributed under the License is distributed on an "AS IS" BASIS,
235 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
236 | See the License for the specific language governing permissions and
237 | limitations under the License.
238 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # `strcursor`
 2 | 
 3 | **Note**: This is something of a work-in-progress.  It has tests, but hasn't been exhaustively vetted.
 4 | 
 5 | This crate provides a "cursor" type for string slices.  It provides the ability to safely seek back and forth through a string without worrying about producing invalid UTF-8 sequences, or splitting grapheme clusters.
 6 | 
 7 | In addition, it provides types to represent single grapheme clusters (`Gc`) and `GcBuf`) as distinct from arbitrary string slices.
 8 | 
 9 | See the `StrCursor` type for details.
10 | 
11 | **Links**
12 | 
13 | * [Latest Release](https://crates.io/crates/strcursor/)
14 | * [Latest Docs](https://danielkeep.github.io/strcursor/doc/strcursor/index.html)
15 | * [Repository](https://github.com/DanielKeep/strcursor)
16 | 
17 | ## Compatibility
18 | 
19 | `strcursor` is currently supported on `rustc` version 1.1.0 and higher.
20 | 
21 | * `rustc` < 1.4 will use a larger, less space-efficient implementation of `GcBuf`; rather than being the same size as `Box<str>`, it will be the same size as `String`.
22 | 
23 | * `rustc` < 1.1 is not supported, due to a mysterious compiler crash.
24 | 
25 | ## License
26 | 
27 | Licensed under either of
28 | 
29 | * MIT license (see [LICENSE](LICENSE) or <http://opensource.org/licenses/MIT>)
30 | * Apache License, Version 2.0 (see [LICENSE](LICENSE) or <http://www.apache.org/licenses/LICENSE-2.0>)
31 | 
32 | at your option.
33 | 
34 | ### Contribution
35 | 
36 | Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you shall be dual licensed as above, without any additional terms or conditions.
37 | 


--------------------------------------------------------------------------------
/build.rs:
--------------------------------------------------------------------------------
 1 | extern crate rustc_version;
 2 | use rustc_version::{version_matches};
 3 | 
 4 | fn main() {
 5 |     println!("cargo:rerun-if-changed=build.rs");
 6 | 
 7 |     if version_matches("1.4.0") {
 8 |         println!("cargo:rustc-cfg=has_string_into_boxed_string");
 9 |     }
10 | }
11 | 


--------------------------------------------------------------------------------
/src/grapheme.rs:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright ⓒ 2015, 2016 Daniel Keep.
  3 | 
  4 | Licensed under the MIT license (see LICENSE or <http://opensource.org
  5 | /licenses/MIT>) or the Apache License, Version 2.0 (see LICENSE of
  6 | <http://www.apache.org/licenses/LICENSE-2.0>), at your option. All
  7 | files in the project carrying such notice may not be copied, modified,
  8 | or distributed except according to those terms.
  9 | */
 10 | /*!
 11 | Defines types for representing single grapheme clusters.
 12 | */
 13 | use std::borrow::{Borrow, Cow, ToOwned};
 14 | use std::convert::AsRef;
 15 | use std::cmp::Ordering;
 16 | use std::fmt::{self, Debug, Display};
 17 | use std::mem::transmute;
 18 | use std::ops::Deref;
 19 | use uniseg::UnicodeSegmentation as UniSeg;
 20 | 
 21 | /**
 22 | An iterator over the lower case mapping of a given grapheme cluster, returned from [`Gc::to_lowercase`](struct.Gc.html#method.to_lowercase).
 23 | */
 24 | pub type ToLowercase<'a> = ::std::iter::FlatMap<::std::str::Chars<'a>, ::std::char::ToLowercase, fn(char) -> ::std::char::ToLowercase>;
 25 | 
 26 | /**
 27 | An iterator over the lower case mapping of a given grapheme cluster, returned from [`Gc::to_uppercase`](struct.Gc.html#method.to_uppercase).
 28 | */
 29 | pub type ToUppercase<'a> = ::std::iter::FlatMap<::std::str::Chars<'a>, ::std::char::ToUppercase, fn(char) -> ::std::char::ToUppercase>;
 30 | 
 31 | /**
 32 | A slice of a single Unicode grapheme cluster (GC) (akin to `str`).
 33 | 
 34 | A grapheme cluster is a single visual "unit" in Unicode text, and is composed of *at least* one Unicode code point, possibly more.
 35 | 
 36 | This type is a wrapper around `str` that enforces the additional invariant that it will *always* contain *exactly* one grapheme cluster.  This allows some operations (such as extracting the base code point) simpler.
 37 | 
 38 | ## Why Grapheme Clusters?
 39 | 
 40 | The simplest example is the distinction between "é" ("Latin Small Letter E with Acute") and "é" ("Latin Small Letter E", "Combining Acute Accent"): the first is *one* code point, the second is *two*.
 41 | 
 42 | In Rust, the `char` type is a single code point.  As a result, treating it as a "character" is incorrect for the same reason that using `u8` is: it excludes many legitimate characters.  It can also cause issues whereby naive algorithms may corrupt text by considering components of a grapheme cluster separately.  For example, truncating a string to "10 characters" using `char`s can lead to logical characters being broken apart, potentially changing their meaning.
 43 | 
 44 | One inconvenience when dealing with grapheme clusters in Rust is that they are not accurately represented by any type more-so than a regular `&str`.  However, operations that might make sense on an individual character (such as asking whether it is in the ASCII range, or is numeric) don't make sense on a full string.  In addition, a `&str` can be empty or contain more than one grapheme cluster.
 45 | 
 46 | Hence, this type guarantees that it always represents *exactly* one Unicode grapheme cluster.
 47 | */
 48 | #[derive(Eq, PartialEq, Ord, PartialOrd, Hash)]
 49 | pub struct Gc(str);
 50 | 
 51 | impl Gc {
 52 |     /**
 53 |     Create a new `Gc` from the given string slice.
 54 | 
 55 |     The slice must contain *exactly* one grapheme cluster.  In the event that the input is empty, or contains more than one grapheme cluster, this function will return `None`.
 56 | 
 57 |     See: [`split_from`](#method.split_from).
 58 |     */
 59 |     pub fn from_str(s: &str) -> Option<&Gc> {
 60 |         match Gc::split_from(s) {
 61 |             Some((gc, tail)) => if tail.len() == 0 { Some(gc) } else { None },
 62 |             None => None
 63 |         }
 64 |     }
 65 | 
 66 |     /**
 67 |     Create a new `Gc` from the given string slice.
 68 | 
 69 |     This function *does not* check to ensure the provided slice is a single, valid grapheme cluster.
 70 |     */
 71 |     pub unsafe fn from_str_unchecked(s: &str) -> &Gc {
 72 |         transmute(s)
 73 |     }
 74 | 
 75 |     /**
 76 |     Try to split a single grapheme cluster from the start of `s`.
 77 | 
 78 |     Returns `None` if the given string was empty.
 79 |     */
 80 |     pub fn split_from(s: &str) -> Option<(&Gc, &str)> {
 81 |         unsafe {
 82 |             let gr = match UniSeg::graphemes(s, /*is_extended:*/true).next() {
 83 |                 Some(gr) => gr,
 84 |                 None => return None,
 85 |             };
 86 |             Some((Gc::from_str_unchecked(gr), s.slice_unchecked(gr.len(), s.len())))
 87 |         }
 88 |     }
 89 | 
 90 |     /**
 91 |     Returns the length of this grapheme cluster in bytes.
 92 |     */
 93 |     pub fn len(&self) -> usize {
 94 |         self.0.len()
 95 |     }
 96 | 
 97 |     /**
 98 |     Does this grapheme cluster have additional marks applied to it?
 99 | 
100 |     This is `true` if the cluster is comprised of more than a single code point.
101 |     */
102 |     pub fn has_marks(&self) -> bool {
103 |         self.base_char().len_utf8() != self.as_str().len()
104 |     }
105 | 
106 |     /**
107 |     Converts this to a byte slice.
108 |     */
109 |     pub fn as_bytes(&self) -> &[u8] {
110 |         self.0.as_bytes()
111 |     }
112 | 
113 |     /**
114 |     Converts this to a string slice.
115 |     */
116 |     pub fn as_str(&self) -> &str {
117 |         &self.0
118 |     }
119 | 
120 |     /**
121 |     Returns the "base" code point.
122 | 
123 |     That is, this returns the first code point in the cluster.
124 |     */
125 |     pub fn base_char(&self) -> char {
126 |         unsafe {
127 |             match self.0.chars().next() {
128 |                 Some(cp) => cp,
129 |                 None => debug_unreachable!(),
130 |             }
131 |         }
132 |     }
133 | 
134 |     /**
135 |     Returns the "base" code point as a grapheme cluster.
136 | 
137 |     This is equivalent to converting this GC into a string slice, then slicing off the bytes that make up the first code point.
138 |     */
139 |     pub fn base(&self) -> &Gc {
140 |         unsafe {
141 |             let base_cp = self.base_char();
142 |             let base_len = base_cp.len_utf8();
143 |             Gc::from_str_unchecked(self.0.slice_unchecked(base_len, self.0.len()))
144 |         }
145 |     }
146 | 
147 |     /**
148 |     Returns the combining marks as a string slice.
149 | 
150 |     The result of this method may be empty, or of arbitrary length.
151 |     */
152 |     pub fn mark_str(&self) -> &str {
153 |         unsafe {
154 |             let base_cp = self.base_char();
155 |             let base_len = base_cp.len_utf8();
156 |             self.0.slice_unchecked(base_len, self.0.len())
157 |         }
158 |     }
159 | 
160 |     /**
161 |     An iterator over the code points of this grapheme cluster.
162 |     */
163 |     pub fn chars(&self) -> ::std::str::Chars {
164 |         self.0.chars()
165 |     }
166 | 
167 |     /**
168 |     An iterator over the code points of this grapheme cluster, and their associated byte offsets.
169 |     */
170 |     pub fn char_indices(&self) -> ::std::str::CharIndices {
171 |         self.0.char_indices()
172 |     }
173 | 
174 |     /**
175 |     An iterator over the bytes of this grapheme cluster.
176 |     */
177 |     pub fn bytes(&self) -> ::std::str::Bytes {
178 |         self.0.bytes()
179 |     }
180 | 
181 |     /**
182 |     Returns an iterator over the code points in the lower case equivalent of this grapheme cluster.
183 |     */
184 |     pub fn to_lowercase(&self) -> ToLowercase {
185 |         self.0.chars().flat_map(char::to_lowercase)
186 |     }
187 | 
188 |     /**
189 |     Returns an iterator over the code points in the upper case equivalent of this grapheme cluster.
190 |     */
191 |     pub fn to_uppercase(&self) -> ToUppercase {
192 |         self.0.chars().flat_map(char::to_uppercase)
193 |     }
194 | }
195 | 
196 | impl AsRef<str> for Gc {
197 |     fn as_ref(&self) -> &str {
198 |         self.as_str()
199 |     }
200 | }
201 | 
202 | impl AsRef<[u8]> for Gc {
203 |     fn as_ref(&self) -> &[u8] {
204 |         self.as_str().as_bytes()
205 |     }
206 | }
207 | 
208 | impl Debug for Gc {
209 |     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
210 |         Debug::fmt(&self.0, fmt)
211 |     }
212 | }
213 | 
214 | impl Display for Gc {
215 |     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
216 |         Display::fmt(&self.0, fmt)
217 |     }
218 | }
219 | 
220 | impl<'a> PartialEq<&'a Gc> for Gc {
221 |     fn eq(&self, other: &&'a Gc) -> bool {
222 |         self.eq(*other)
223 |     }
224 | }
225 | 
226 | impl<'a> PartialEq<Gc> for &'a Gc {
227 |     fn eq(&self, other: &Gc) -> bool {
228 |         (*self).eq(other)
229 |     }
230 | }
231 | 
232 | impl PartialEq<char> for Gc {
233 |     fn eq(&self, other: &char) -> bool {
234 |         !self.has_marks() && self.base_char().eq(other)
235 |     }
236 | }
237 | 
238 | impl PartialEq<str> for Gc {
239 |     fn eq(&self, other: &str) -> bool {
240 |         self.0.eq(other)
241 |     }
242 | }
243 | 
244 | impl<'a> PartialEq<&'a str> for Gc {
245 |     fn eq(&self, other: &&'a str) -> bool {
246 |         self.0.eq(*other)
247 |     }
248 | }
249 | 
250 | impl PartialEq<GcBuf> for Gc {
251 |     fn eq(&self, other: &GcBuf) -> bool {
252 |         self.0.eq(other.as_gc())
253 |     }
254 | }
255 | 
256 | impl PartialEq<String> for Gc {
257 |     fn eq(&self, other: &String) -> bool {
258 |         self.0.eq(&**other)
259 |     }
260 | }
261 | 
262 | impl<'a> PartialEq<Cow<'a, Gc>> for Gc {
263 |     fn eq(&self, other: &Cow<'a, Gc>) -> bool {
264 |         self.0.eq((*other).deref())
265 |     }
266 | }
267 | 
268 | impl<'a> PartialEq<char> for &'a Gc {
269 |     fn eq(&self, other: &char) -> bool {
270 |         !self.has_marks() && self.base_char().eq(other)
271 |     }
272 | }
273 | 
274 | impl<'a> PartialEq<str> for &'a Gc {
275 |     fn eq(&self, other: &str) -> bool {
276 |         self.0.eq(other)
277 |     }
278 | }
279 | 
280 | impl<'a> PartialEq<GcBuf> for &'a Gc {
281 |     fn eq(&self, other: &GcBuf) -> bool {
282 |         self.0.eq(other.as_gc())
283 |     }
284 | }
285 | 
286 | impl<'a> PartialEq<String> for &'a Gc {
287 |     fn eq(&self, other: &String) -> bool {
288 |         self.0.eq(&**other)
289 |     }
290 | }
291 | 
292 | impl<'a> PartialEq<Cow<'a, Gc>> for &'a Gc {
293 |     fn eq(&self, other: &Cow<'a, Gc>) -> bool {
294 |         self.0.eq((*other).deref())
295 |     }
296 | }
297 | 
298 | impl PartialEq<Gc> for char {
299 |     fn eq(&self, other: &Gc) -> bool {
300 |         self.eq(&other.base_char())
301 |     }
302 | }
303 | 
304 | impl PartialEq<Gc> for str {
305 |     fn eq(&self, other: &Gc) -> bool {
306 |         self.eq(&other.0)
307 |     }
308 | }
309 | 
310 | impl<'a> PartialEq<Gc> for &'a str {
311 |     fn eq(&self, other: &Gc) -> bool {
312 |         self.eq(&&other.0)
313 |     }
314 | }
315 | 
316 | impl PartialEq<Gc> for String {
317 |     fn eq(&self, other: &Gc) -> bool {
318 |         self.eq(&other.as_str())
319 |     }
320 | }
321 | 
322 | impl<'a> PartialEq<Gc> for Cow<'a, Gc> {
323 |     fn eq(&self, other: &Gc) -> bool {
324 |         (**self).eq(other)
325 |     }
326 | }
327 | 
328 | impl<'a> PartialEq<&'a Gc> for char {
329 |     fn eq(&self, other: &&'a Gc) -> bool {
330 |         self.eq(&other.base_char())
331 |     }
332 | }
333 | 
334 | impl<'a> PartialEq<&'a Gc> for str {
335 |     fn eq(&self, other: &&'a Gc) -> bool {
336 |         self.eq(&other.0)
337 |     }
338 | }
339 | 
340 | impl<'a> PartialEq<&'a Gc> for String {
341 |     fn eq(&self, other: &&'a Gc) -> bool {
342 |         self.eq(&other.as_str())
343 |     }
344 | }
345 | 
346 | impl<'a> PartialEq<&'a Gc> for Cow<'a, Gc> {
347 |     fn eq(&self, other: &&'a Gc) -> bool {
348 |         (**self).eq(*other)
349 |     }
350 | }
351 | 
352 | impl<'a> PartialOrd<&'a Gc> for Gc {
353 |     fn partial_cmp(&self, other: &&'a Gc) -> Option<Ordering> {
354 |         self.partial_cmp(*other)
355 |     }
356 | }
357 | 
358 | impl<'a> PartialOrd<Gc> for &'a Gc {
359 |     fn partial_cmp(&self, other: &Gc) -> Option<Ordering> {
360 |         (*self).partial_cmp(other)
361 |     }
362 | }
363 | 
364 | impl PartialOrd<char> for Gc {
365 |     fn partial_cmp(&self, other: &char) -> Option<Ordering> {
366 |         if !self.has_marks() {
367 |             self.base_char().partial_cmp(other)
368 |         } else {
369 |             match self.base_char().partial_cmp(other) {
370 |                 Some(Ordering::Equal) => Some(Ordering::Less),
371 |                 other => other
372 |             }
373 |         }
374 |     }
375 | }
376 | 
377 | impl PartialOrd<str> for Gc {
378 |     fn partial_cmp(&self, other: &str) -> Option<Ordering> {
379 |         self.0.partial_cmp(other)
380 |     }
381 | }
382 | 
383 | impl<'a> PartialOrd<&'a str> for Gc {
384 |     fn partial_cmp(&self, other: &&'a str) -> Option<Ordering> {
385 |         self.0.partial_cmp(*other)
386 |     }
387 | }
388 | 
389 | impl PartialOrd<GcBuf> for Gc {
390 |     fn partial_cmp(&self, other: &GcBuf) -> Option<Ordering> {
391 |         self.0.partial_cmp(other.as_gc())
392 |     }
393 | }
394 | 
395 | impl PartialOrd<String> for Gc {
396 |     fn partial_cmp(&self, other: &String) -> Option<Ordering> {
397 |         self.0.partial_cmp(&**other)
398 |     }
399 | }
400 | 
401 | impl<'a> PartialOrd<Cow<'a, Gc>> for Gc {
402 |     fn partial_cmp(&self, other: &Cow<'a, Gc>) -> Option<Ordering> {
403 |         self.0.partial_cmp((*other).deref())
404 |     }
405 | }
406 | 
407 | impl<'a> PartialOrd<char> for &'a Gc {
408 |     fn partial_cmp(&self, other: &char) -> Option<Ordering> {
409 |         other.partial_cmp(self).map(Ordering::reverse)
410 |     }
411 | }
412 | 
413 | impl<'a> PartialOrd<str> for &'a Gc {
414 |     fn partial_cmp(&self, other: &str) -> Option<Ordering> {
415 |         self.0.partial_cmp(other)
416 |     }
417 | }
418 | 
419 | impl<'a> PartialOrd<GcBuf> for &'a Gc {
420 |     fn partial_cmp(&self, other: &GcBuf) -> Option<Ordering> {
421 |         self.0.partial_cmp(other.as_gc())
422 |     }
423 | }
424 | 
425 | impl<'a> PartialOrd<String> for &'a Gc {
426 |     fn partial_cmp(&self, other: &String) -> Option<Ordering> {
427 |         self.0.partial_cmp(&**other)
428 |     }
429 | }
430 | 
431 | impl<'a> PartialOrd<Cow<'a, Gc>> for &'a Gc {
432 |     fn partial_cmp(&self, other: &Cow<'a, Gc>) -> Option<Ordering> {
433 |         self.0.partial_cmp((*other).deref())
434 |     }
435 | }
436 | 
437 | impl PartialOrd<Gc> for char {
438 |     fn partial_cmp(&self, other: &Gc) -> Option<Ordering> {
439 |         self.partial_cmp(&other.base_char())
440 |     }
441 | }
442 | 
443 | impl PartialOrd<Gc> for str {
444 |     fn partial_cmp(&self, other: &Gc) -> Option<Ordering> {
445 |         self.partial_cmp(&other.0)
446 |     }
447 | }
448 | 
449 | impl<'a> PartialOrd<Gc> for &'a str {
450 |     fn partial_cmp(&self, other: &Gc) -> Option<Ordering> {
451 |         self.partial_cmp(&&other.0)
452 |     }
453 | }
454 | 
455 | impl PartialOrd<Gc> for String {
456 |     fn partial_cmp(&self, other: &Gc) -> Option<Ordering> {
457 |         (&**self).partial_cmp(other.as_str())
458 |     }
459 | }
460 | 
461 | impl<'a> PartialOrd<Gc> for Cow<'a, Gc> {
462 |     fn partial_cmp(&self, other: &Gc) -> Option<Ordering> {
463 |         (**self).partial_cmp(other)
464 |     }
465 | }
466 | 
467 | impl<'a> PartialOrd<&'a Gc> for char {
468 |     fn partial_cmp(&self, other: &&'a Gc) -> Option<Ordering> {
469 |         self.partial_cmp(&other.base_char())
470 |     }
471 | }
472 | 
473 | impl<'a> PartialOrd<&'a Gc> for str {
474 |     fn partial_cmp(&self, other: &&'a Gc) -> Option<Ordering> {
475 |         self.partial_cmp(&other.0)
476 |     }
477 | }
478 | 
479 | impl<'a> PartialOrd<&'a Gc> for String {
480 |     fn partial_cmp(&self, other: &&'a Gc) -> Option<Ordering> {
481 |         (&**self).partial_cmp(other.as_str())
482 |     }
483 | }
484 | 
485 | impl<'a> PartialOrd<&'a Gc> for Cow<'a, Gc> {
486 |     fn partial_cmp(&self, other: &&'a Gc) -> Option<Ordering> {
487 |         (**self).partial_cmp(*other)
488 |     }
489 | }
490 | 
491 | impl ToOwned for Gc {
492 |     type Owned = GcBuf;
493 |     fn to_owned(&self) -> Self::Owned {
494 |         unsafe {
495 |             GcBuf::from_string_unchecked(self.0.to_owned())
496 |         }
497 |     }
498 | }
499 | 
500 | /**
501 | An owned, single Unicode grapheme cluster (akin to `String`).
502 | 
503 | See [`Gc`](struct.Gc.html) for more details.
504 | */
505 | #[cfg(has_string_into_boxed_string)]
506 | #[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
507 | pub struct GcBuf(Box<str>);
508 | 
509 | /**
510 | An owned, single Unicode grapheme cluster (akin to `String`).
511 | 
512 | See [`Gc`](struct.Gc.html) for more details.
513 | */
514 | #[cfg(not(has_string_into_boxed_string))]
515 | #[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
516 | pub struct GcBuf(String);
517 | 
518 | impl GcBuf {
519 |     /**
520 |     Create a new `GcBuf` from the given `String`.
521 | 
522 |     This function *does not* check to ensure the provided string is a single, valid grapheme cluster.
523 |     */
524 |     pub unsafe fn from_string_unchecked(s: String) -> GcBuf {
525 |         Self::from_string_unchecked_impl(s)
526 |     }
527 | 
528 |     #[cfg(has_string_into_boxed_string)]
529 |     unsafe fn from_string_unchecked_impl(s: String) -> GcBuf {
530 |         GcBuf(s.into_boxed_str())
531 |     }
532 | 
533 |     #[cfg(not(has_string_into_boxed_string))]
534 |     unsafe fn from_string_unchecked_impl(s: String) -> GcBuf {
535 |         GcBuf(s)
536 |     }
537 | 
538 |     /**
539 |     Returns a borrowed grapheme cluster slice.
540 |     */
541 |     pub fn as_gc(&self) -> &Gc {
542 |         unsafe {
543 |             Gc::from_str_unchecked(&self.0)
544 |         }
545 |     }
546 | }
547 | 
548 | impl AsRef<Gc> for GcBuf {
549 |     fn as_ref(&self) -> &Gc {
550 |         self.as_gc()
551 |     }
552 | }
553 | 
554 | impl AsRef<str> for GcBuf {
555 |     fn as_ref(&self) -> &str {
556 |         self.as_str()
557 |     }
558 | }
559 | 
560 | impl AsRef<[u8]> for GcBuf {
561 |     fn as_ref(&self) -> &[u8] {
562 |         self.as_str().as_bytes()
563 |     }
564 | }
565 | 
566 | impl Borrow<Gc> for GcBuf {
567 |     fn borrow(&self) -> &Gc {
568 |         self.as_gc()
569 |     }
570 | }
571 | 
572 | impl Debug for GcBuf {
573 |     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
574 |         Debug::fmt(&self.0, fmt)
575 |     }
576 | }
577 | 
578 | impl Default for GcBuf {
579 |     fn default() -> Self {
580 |         unsafe {
581 |             GcBuf::from_string_unchecked(String::from("\u{0}"))
582 |         }
583 |     }
584 | }
585 | 
586 | impl Deref for GcBuf {
587 |     type Target = Gc;
588 |     fn deref(&self) -> &Gc {
589 |         self.as_gc()
590 |     }
591 | }
592 | 
593 | impl Display for GcBuf {
594 |     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
595 |         Display::fmt(&self.0, fmt)
596 |     }
597 | }
598 | 
599 | impl<'a> From<&'a Gc> for GcBuf {
600 |     fn from(v: &'a Gc) -> Self {
601 |         unsafe {
602 |             GcBuf::from_string_unchecked(v.as_str().to_owned())
603 |         }
604 |     }
605 | }
606 | 
607 | impl From<char> for GcBuf {
608 |     fn from(v: char) -> Self {
609 |         unsafe {
610 |             let mut buf = [0; 4];
611 |             let bs = match ::util::encode_utf8_raw(v as u32, &mut buf) {
612 |                 Some(len) => {
613 |                     if len < 4 {
614 |                         &buf[..len]
615 |                     } else {
616 |                         debug_unreachable!();
617 |                     }
618 |                 },
619 |                 None => debug_unreachable!(),
620 |             };
621 |             let s: &str = transmute(bs);
622 |             let s = s.to_owned();
623 |             GcBuf::from_string_unchecked(s)
624 |         }
625 |     }
626 | }
627 | 
628 | #[cfg(has_string_into_boxed_string)]
629 | impl Into<Box<str>> for GcBuf {
630 |     fn into(self) -> Box<str> {
631 |         self.0
632 |     }
633 | }
634 | 
635 | #[cfg(has_string_into_boxed_string)]
636 | impl Into<String> for GcBuf {
637 |     fn into(self) -> String {
638 |         self.0.into_string()
639 |     }
640 | }
641 | 
642 | #[cfg(has_string_into_boxed_string)]
643 | impl Into<Vec<u8>> for GcBuf {
644 |     fn into(self) -> Vec<u8> {
645 |         self.0.into_string().into()
646 |     }
647 | }
648 | 
649 | #[cfg(not(has_string_into_boxed_string))]
650 | impl Into<String> for GcBuf {
651 |     fn into(self) -> String {
652 |         self.0
653 |     }
654 | }
655 | 
656 | #[cfg(not(has_string_into_boxed_string))]
657 | impl Into<Vec<u8>> for GcBuf {
658 |     fn into(self) -> Vec<u8> {
659 |         self.0.into()
660 |     }
661 | }
662 | 
663 | macro_rules! as_item {
664 |     ($i:item) => { $i };
665 | }
666 | 
667 | macro_rules! forward_partial_eq {
668 |     (~ <$lt:tt> $lhs:ty, $rhs:ty) => {
669 |         as_item! {
670 |             impl<$lt> PartialEq<$rhs> for $lhs {
671 |                 fn eq(&self, other: &$rhs) -> bool {
672 |                     other.as_gc().eq(self)
673 |                 }
674 |             }
675 |         }
676 |     };
677 | 
678 |     (~ $lhs:ty, $rhs:ty) => {
679 |         impl PartialEq<$rhs> for $lhs {
680 |             fn eq(&self, other: &$rhs) -> bool {
681 |                 other.as_gc().eq(self)
682 |             }
683 |         }
684 |     };
685 | 
686 |     (<$lt:tt> $lhs:ty, $rhs:ty) => {
687 |         as_item! {
688 |             impl<$lt> PartialEq<$rhs> for $lhs {
689 |                 fn eq(&self, other: &$rhs) -> bool {
690 |                     self.as_gc().eq(other)
691 |                 }
692 |             }
693 |         }
694 |     };
695 | 
696 |     ($lhs:ty, $rhs:ty) => {
697 |         impl PartialEq<$rhs> for $lhs {
698 |             fn eq(&self, other: &$rhs) -> bool {
699 |                 self.as_gc().eq(other)
700 |             }
701 |         }
702 |     };
703 | }
704 | 
705 | forward_partial_eq! { GcBuf, char }
706 | forward_partial_eq! { GcBuf, str }
707 | forward_partial_eq! { GcBuf, Gc }
708 | forward_partial_eq! { GcBuf, String }
709 | forward_partial_eq! { <'a> GcBuf, &'a str }
710 | forward_partial_eq! { <'a> GcBuf, &'a Gc }
711 | forward_partial_eq! { <'a> GcBuf, Cow<'a, Gc> }
712 | 
713 | forward_partial_eq! { ~ char, GcBuf }
714 | forward_partial_eq! { ~ str, GcBuf }
715 | forward_partial_eq! { ~ String, GcBuf }
716 | forward_partial_eq! { ~ <'a> &'a str, GcBuf }
717 | forward_partial_eq! { ~ <'a> Cow<'a, Gc>, GcBuf }
718 | 
719 | macro_rules! forward_partial_ord {
720 |     (~ <$lt:tt> $lhs:ty, $rhs:ty) => {
721 |         as_item! {
722 |             impl<$lt> PartialOrd<$rhs> for $lhs {
723 |                 fn partial_cmp(&self, other: &$rhs) -> Option<Ordering> {
724 |                     other.as_gc().partial_cmp(self).map(Ordering::reverse)
725 |                 }
726 |             }
727 |         }
728 |     };
729 | 
730 |     (~ $lhs:ty, $rhs:ty) => {
731 |         impl PartialOrd<$rhs> for $lhs {
732 |             fn partial_cmp(&self, other: &$rhs) -> Option<Ordering> {
733 |                 other.as_gc().partial_cmp(self).map(Ordering::reverse)
734 |             }
735 |         }
736 |     };
737 | 
738 |     (<$lt:tt> $lhs:ty, $rhs:ty) => {
739 |         as_item! {
740 |             impl<$lt> PartialOrd<$rhs> for $lhs {
741 |                 fn partial_cmp(&self, other: &$rhs) -> Option<Ordering> {
742 |                     self.as_gc().partial_cmp(other)
743 |                 }
744 |             }
745 |         }
746 |     };
747 | 
748 |     ($lhs:ty, $rhs:ty) => {
749 |         impl PartialOrd<$rhs> for $lhs {
750 |             fn partial_cmp(&self, other: &$rhs) -> Option<Ordering> {
751 |                 self.as_gc().partial_cmp(other)
752 |             }
753 |         }
754 |     };
755 | }
756 | 
757 | forward_partial_ord! { GcBuf, char }
758 | forward_partial_ord! { GcBuf, str }
759 | forward_partial_ord! { GcBuf, Gc }
760 | forward_partial_ord! { GcBuf, String }
761 | forward_partial_ord! { <'a> GcBuf, &'a str }
762 | forward_partial_ord! { <'a> GcBuf, &'a Gc }
763 | forward_partial_ord! { <'a> GcBuf, Cow<'a, Gc> }
764 | 
765 | forward_partial_ord! { ~ char, GcBuf }
766 | forward_partial_ord! { ~ str, GcBuf }
767 | forward_partial_ord! { ~ String, GcBuf }
768 | forward_partial_ord! { ~ <'a> &'a str, GcBuf }
769 | forward_partial_ord! { ~ <'a> Cow<'a, Gc>, GcBuf }
770 | 
771 | #[cfg(test)]
772 | mod gc_tests {
773 |     use super::Gc;
774 | 
775 |     fn gc(s: &str) -> &Gc {
776 |         Gc::from_str(s).unwrap()
777 |     }
778 | 
779 |     #[test]
780 |     fn test_from_str() {
781 |         assert_eq!(Gc::from_str("a").map(Gc::as_str), Some("a"));
782 |         assert_eq!(Gc::from_str("á").map(Gc::as_str), Some("á"));
783 |         assert_eq!(Gc::from_str("ä").map(Gc::as_str), Some("ä"));
784 |         assert_eq!(Gc::from_str("̈").map(Gc::as_str), Some("̈")); // NB: there is a single combining diaereses in the string.
785 |         assert_eq!(Gc::from_str("字").map(Gc::as_str), Some("字"));
786 |         assert_eq!(Gc::from_str("").map(Gc::as_str), None);
787 |         assert_eq!(Gc::from_str("ab").map(Gc::as_str), None);
788 |     }
789 | 
790 |     #[test]
791 |     fn test_split_from() {
792 |         fn map<'a>((gr, s): (&'a Gc, &'a str)) -> (&'a str, &'a str) {
793 |             (gr.as_str(), s)
794 |         }
795 | 
796 |         assert_eq!(Gc::split_from("a").map(map), Some(("a", "")));
797 |         assert_eq!(Gc::split_from("á").map(map), Some(("á", "")));
798 |         assert_eq!(Gc::split_from("ä").map(map), Some(("ä", "")));
799 |         assert_eq!(Gc::split_from("̈").map(map), Some(("̈", ""))); // NB: there is a single combining diaereses in the string.
800 |         assert_eq!(Gc::split_from("字").map(map), Some(("字", "")));
801 |         assert_eq!(Gc::split_from("").map(map), None);
802 |         assert_eq!(Gc::split_from("ab").map(map), Some(("a", "b")));
803 |     }
804 | 
805 |     #[test]
806 |     fn test_has_marks() {
807 |         assert!(!gc("a").has_marks());
808 |         assert!(!gc("á").has_marks());
809 |         assert!(gc("ä").has_marks());
810 |         assert!(!gc("̈").has_marks());
811 |         assert!(!gc("字").has_marks());
812 |     }
813 | 
814 |     #[test]
815 |     fn test_base_char() {
816 |         assert_eq!(gc("a").base_char(), 'a');
817 |         assert_eq!(gc("á").base_char(), 'á');
818 |         assert_eq!(gc("ä").base_char(), 'a');
819 |         assert_eq!(gc("̈").base_char(), '̈');
820 |         assert_eq!(gc("字").base_char(), '字');
821 |     }
822 | 
823 |     #[test]
824 |     fn test_mark_str() {
825 |         assert_eq!(gc("a").mark_str(), "");
826 |         assert_eq!(gc("á").mark_str(), "");
827 |         assert_eq!(gc("ä").mark_str(), "̈");
828 |         assert_eq!(gc("̈").mark_str(), "");
829 |         assert_eq!(gc("字").mark_str(), "");
830 |     }
831 | }


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
   1 | /*
   2 | Copyright ⓒ 2015, 2016 Daniel Keep.
   3 | 
   4 | Licensed under the MIT license (see LICENSE or <http://opensource.org
   5 | /licenses/MIT>) or the Apache License, Version 2.0 (see LICENSE of
   6 | <http://www.apache.org/licenses/LICENSE-2.0>), at your option. All
   7 | files in the project carrying such notice may not be copied, modified,
   8 | or distributed except according to those terms.
   9 | */
  10 | /*!
  11 | This crate provides a "cursor" type for string slices.  It provides the ability to safely seek back and forth through a string without worrying about producing invalid UTF-8 sequences, or splitting grapheme clusters.
  12 | 
  13 | In addition, it provides types to represent single grapheme clusters ([`Gc`](struct.Gc.html) and [`GcBuf`](struct.GcBuf.html)) as distinct from arbitrary string slices.
  14 | 
  15 | See the [`StrCursor`](struct.StrCursor.html) type for details.
  16 | 
  17 | <style type="text/css">
  18 | .link-block { font-family: "Fira Sans"; }
  19 | .link-block > p { display: inline-block; }
  20 | .link-block > p > strong { font-weight: 500; margin-right: 1em; }
  21 | .link-block > ul { display: inline-block; padding: 0; list-style: none; }
  22 | .link-block > ul > li {
  23 |   font-size: 0.8em;
  24 |   background-color: #eee;
  25 |   border: 1px solid #ccc;
  26 |   padding: 0.3em;
  27 |   display: inline-block;
  28 | }
  29 | </style>
  30 | <span></span><div class="link-block">
  31 | 
  32 | **Links**
  33 | 
  34 | * [Latest Release](https://crates.io/crates/strcursor/)
  35 | * [Latest Docs](https://danielkeep.github.io/strcursor/doc/strcursor/index.html)
  36 | * [Repository](https://github.com/DanielKeep/strcursor)
  37 | 
  38 | <span></span></div>
  39 | 
  40 | ## Compatibility
  41 | 
  42 | `strcursor` is currently supported on `rustc` version 1.1.0 and higher.
  43 | 
  44 | * `rustc` < 1.4 will use a larger, less space-efficient implementation of `GcBuf`; rather than being the same size as `Box<str>`, it will be the same size as `String`.
  45 | 
  46 | * `rustc` < 1.1 is not supported, due to a mysterious compiler crash.
  47 | 
  48 | */
  49 | extern crate unicode_segmentation as uniseg;
  50 | 
  51 | /**
  52 | Inserts a panic in debug builds, an optimisation hint in release builds.
  53 | 
  54 | **Do not replace this with the `debug_unreachable` crate.**  Recent versions of that crate do not build under Rust < 1.6, and old versions that used to no longer will, as they have sufficiently vague dependency version specifiers.
  55 | */
  56 | #[doc(hidden)]
  57 | macro_rules! debug_unreachable {
  58 |     () => {
  59 |         if cfg!(ndebug) {
  60 |             ::util::unreachable()
  61 |         } else {
  62 |             panic!("entered unreachable code")
  63 |         }
  64 |     };
  65 | }
  66 | 
  67 | pub use grapheme::{Gc, GcBuf};
  68 | 
  69 | pub mod grapheme;
  70 | mod util;
  71 | 
  72 | use uniseg::UnicodeSegmentation as UniSeg;
  73 | 
  74 | /**
  75 | This type represents a cursor into a string slice; that is, in addition to having a beginning and end, it also has a current position between those two.  This position can be seeked left and right within those bounds.
  76 | 
  77 | > **Note**: the cursor may validly be positioned *at* the end of the string.  That is, in a position where there are no code points or grapheme clusters to the right of the cursor, and the entire contents of the string is to the left of the cursor.
  78 | 
  79 | The main reason for this is that *sometimes*, you want the ability to do things like "advance a character", and the existing APIs for this can be somewhat verbose.
  80 | 
  81 | In addition, *unstable* support for grapheme clusters is exposed by the standard library, which conflicts with the *stable* support provided by the `unicode-segmentation` crate, which makes doing "the right thing" painful.  `StrCursor` exposes grapheme clusters by default, and makes them cleaner to work with.
  82 | 
  83 | The cursor guarantees the following at all times:
  84 | 
  85 | * The cursor position *cannot* be outside of the original string slice it was constructed with.
  86 | * The cursor position *cannot* lie between Unicode code points, meaning that you *cannot* generate an invalid string slice from a cursor.
  87 | * If the code point-specific methods are *not* used, the cursor will always lie between grapheme clusters.
  88 | 
  89 | This last point is somewhat important: the cursor is designed to favour operating on grapheme clusters, rather than code points.  If you misalign the cursor with respect to grapheme clusters, the behaviour of methods that deal with grapheme clusters is officially *undefined*, but is generally well-behaved.
  90 | 
  91 | The methods that operate on the cursor will either return a fresh `Option<StrCursor>` (depending on whether the seek operation is valid or not), or mutate the existing cursor (in which case, they will *panic* if the seek operation is not valid).
  92 | */
  93 | pub struct StrCursor<'a> {
  94 |     s: &'a str,
  95 |     at: *const u8,
  96 | }
  97 | 
  98 | impl<'a> StrCursor<'a> {
  99 |     /**
 100 |     Create a new cursor at the start of `s`.
 101 |     */
 102 |     #[inline]
 103 |     pub fn new_at_start(s: &'a str) -> StrCursor<'a> {
 104 |         StrCursor {
 105 |             s: s,
 106 |             at: s.as_ptr(),
 107 |         }
 108 |     }
 109 | 
 110 |     /**
 111 |     Create a new cursor past at the end of `s`.
 112 |     */
 113 |     #[inline]
 114 |     pub fn new_at_end(s: &'a str) -> StrCursor<'a> {
 115 |         StrCursor {
 116 |             s: s,
 117 |             at: byte_pos_to_ptr(s, s.len()),
 118 |         }
 119 |     }
 120 | 
 121 |     /**
 122 |     Create a new cursor at the first grapheme cluster which begins at or to the left of the given byte position.
 123 |     */
 124 |     #[inline]
 125 |     pub fn new_at_left_of_byte_pos(s: &'a str, byte_pos: usize) -> StrCursor<'a> {
 126 |         // Start at a codepoint.
 127 |         let cur = StrCursor::new_at_cp_left_of_byte_pos(s, byte_pos);
 128 | 
 129 |         // Seek back to the previous grapheme.
 130 |         let prev = cur.at_prev();
 131 | 
 132 |         let prev = match prev {
 133 |             None => return cur, // We were already at the start.
 134 |             Some(c) => c
 135 |         };
 136 | 
 137 |         // unwrap should be OK here.
 138 |         if prev.byte_pos() + prev.after().unwrap().len() > byte_pos {
 139 |             prev
 140 |         } else {
 141 |             cur
 142 |         }
 143 |     }
 144 | 
 145 |     /**
 146 |     Create a new cursor at the first grapheme cluster which begins at or to the right of the given byte position.
 147 |     */
 148 |     #[inline]
 149 |     pub fn new_at_right_of_byte_pos(s: &'a str, byte_pos: usize) -> StrCursor<'a> {
 150 |         // I don't know how robust the grapheme iteration rules are when trying to step forward from a (potentially) invalid position.  As such, I'm *instead* going to start from a known-good position.
 151 |         let cur = StrCursor::new_at_left_of_byte_pos(s, byte_pos);
 152 |         if cur.byte_pos() == byte_pos {
 153 |             return cur;
 154 |         }
 155 | 
 156 |         // This unwrap shouldn't be able to fail.
 157 |         cur.at_next().unwrap()
 158 |     }
 159 | 
 160 |     /**
 161 |     Create a new cursor at the first code point which begins at or to the left of the given byte position.
 162 | 
 163 |     # Note
 164 | 
 165 |     Where possible, you should prefer `new_at_left_of_byte_pos`.
 166 |     */
 167 |     #[inline]
 168 |     pub fn new_at_cp_left_of_byte_pos(s: &'a str, byte_pos: usize) -> StrCursor<'a> {
 169 |         StrCursor {
 170 |             s: s,
 171 |             at: unsafe { seek_utf8_cp_start_left(s, byte_pos_to_ptr(s, byte_pos)) },
 172 |         }
 173 |     }
 174 | 
 175 |     /**
 176 |     Create a new cursor at the first code point which begins at or to the right of the given byte position.
 177 | 
 178 |     # Note
 179 | 
 180 |     Where possible, you should prefer `new_at_right_of_byte_pos`.
 181 |     */
 182 |     #[inline]
 183 |     pub fn new_at_cp_right_of_byte_pos(s: &'a str, byte_pos: usize) -> StrCursor<'a> {
 184 |         StrCursor {
 185 |             s: s,
 186 |             at: unsafe { seek_utf8_cp_start_right(s, byte_pos_to_ptr(s, byte_pos)) },
 187 |         }
 188 |     }
 189 | 
 190 |     /**
 191 |     Returns a new cursor at the beginning of the previous grapheme cluster, or `None` if the cursor is currently positioned at the beginning of the string.
 192 |     */
 193 |     #[inline]
 194 |     pub fn at_prev(mut self) -> Option<StrCursor<'a>> {
 195 |         match self.try_seek_left_gr() {
 196 |             true => Some(self),
 197 |             false => None
 198 |         }
 199 |     }
 200 | 
 201 |     /**
 202 |     Returns a new cursor at the beginning of the next grapheme cluster, or `None` if the cursor is currently positioned at the end of the string.
 203 |     */
 204 |     #[inline]
 205 |     pub fn at_next(mut self) -> Option<StrCursor<'a>> {
 206 |         match self.try_seek_right_gr() {
 207 |             true => Some(self),
 208 |             false => None
 209 |         }
 210 |     }
 211 | 
 212 |     /**
 213 |     Returns a new cursor at the beginning of the previous code point, or `None` if the cursor is currently positioned at the beginning of the string.
 214 | 
 215 |     # Note
 216 | 
 217 |     Where possible, you should prefer `at_prev`.
 218 |     */
 219 |     #[inline]
 220 |     pub fn at_prev_cp(mut self) -> Option<StrCursor<'a>> {
 221 |         match self.try_seek_left_cp() {
 222 |             true => Some(self),
 223 |             false => None
 224 |         }
 225 |     }
 226 | 
 227 |     /**
 228 |     Returns a new cursor at the beginning of the next code point, or `None` if the cursor is currently positioned at the end of the string.
 229 | 
 230 |     # Note
 231 | 
 232 |     Where possible, you should prefer `at_next`.
 233 |     */
 234 |     #[inline]
 235 |     pub fn at_next_cp(mut self) -> Option<StrCursor<'a>> {
 236 |         match self.try_seek_right_cp() {
 237 |             true => Some(self),
 238 |             false => None
 239 |         }
 240 |     }
 241 | 
 242 |     /**
 243 |     Seeks the cursor to the beginning of the previous grapheme cluster.
 244 | 
 245 |     # Panics
 246 | 
 247 |     If the cursor is currently at the start of the string, then this function will panic.
 248 |     */
 249 |     #[inline]
 250 |     pub fn seek_prev(&mut self) {
 251 |         if !self.try_seek_right_gr() {
 252 |             panic!("cannot seek past the beginning of a string");
 253 |         }
 254 |     }
 255 | 
 256 |     /**
 257 |     Seeks the cursor to the beginning of the next grapheme cluster.
 258 | 
 259 |     # Panics
 260 | 
 261 |     If the cursor is currently at the end of the string, then this function will panic.
 262 |     */
 263 |     #[inline]
 264 |     pub fn seek_next(&mut self) {
 265 |         if !self.try_seek_right_gr() {
 266 |             panic!("cannot seek past the end of a string");
 267 |         }
 268 |     }
 269 | 
 270 |     /**
 271 |     Seeks the cursor to the beginning of the previous code point.
 272 | 
 273 |     # Panics
 274 | 
 275 |     If the cursor is currently at the start of the string, then this function will panic.
 276 | 
 277 |     # Note
 278 | 
 279 |     Where possible, you should prefer `seek_prev`.
 280 |     */
 281 |     #[inline]
 282 |     pub fn seek_prev_cp(&mut self) {
 283 |         if !self.try_seek_left_cp() {
 284 |             panic!("cannot seek past the beginning of a string");
 285 |         }
 286 |     }
 287 | 
 288 |     /**
 289 |     Seeks the cursor to the beginning of the next code point.
 290 | 
 291 |     # Panics
 292 | 
 293 |     If the cursor is currently at the end of the string, then this function will panic.
 294 | 
 295 |     # Note
 296 | 
 297 |     Where possible, you should prefer `seek_next`.
 298 |     */
 299 |     #[inline]
 300 |     pub fn seek_next_cp(&mut self) {
 301 |         if !self.try_seek_right_cp() {
 302 |             panic!("cannot seek past the end of a string");
 303 |         }
 304 |     }
 305 | 
 306 |     /**
 307 |     Returns both the previous grapheme cluster and the cursor having seeked before it.
 308 | 
 309 |     This may be more efficient than doing both operations individually.
 310 |     */
 311 |     #[inline]
 312 |     pub fn prev(mut self) -> Option<(&'a Gc, StrCursor<'a>)> {
 313 |         unsafe {
 314 |             let g = match self.before() {
 315 |                 Some(g) => g,
 316 |                 None => return None,
 317 |             };
 318 |             self.unsafe_set_at(g.as_str());
 319 |             Some((g, self))
 320 |         }
 321 |     }
 322 | 
 323 |     /**
 324 |     Returns both the previous code point and the cursor having seeked before it.
 325 | 
 326 |     This may be more efficient than doing both operations individually.
 327 | 
 328 |     # Note
 329 | 
 330 |     Where possible, you should prefer `prev`.
 331 |     */
 332 |     #[inline]
 333 |     pub fn prev_cp(mut self) -> Option<(char, StrCursor<'a>)> {
 334 |         unsafe {
 335 |             let cp = match self.cp_before() {
 336 |                 Some(cp) => cp,
 337 |                 None => return None,
 338 |             };
 339 |             self.unsafe_seek_left(cp.len_utf8());
 340 |             Some((cp, self))
 341 |         }
 342 |     }
 343 | 
 344 |     /**
 345 |     Returns both the next grapheme cluster and the cursor having seeked past it.
 346 | 
 347 |     This may be more efficient than doing both operations individually.
 348 |     */
 349 |     #[inline]
 350 |     pub fn next(mut self) -> Option<(&'a Gc, StrCursor<'a>)> {
 351 |         unsafe {
 352 |             let g = match self.after() {
 353 |                 Some(g) => g,
 354 |                 None => return None,
 355 |             };
 356 |             self.unsafe_seek_right(g.len());
 357 |             Some((g, self))
 358 |         }
 359 |     }
 360 | 
 361 |     /**
 362 |     Returns both the next code point and the cursor having seeked past it.
 363 | 
 364 |     This may be more efficient than doing both operations individually.
 365 | 
 366 |     # Note
 367 | 
 368 |     Where possible, you should prefer `next`.
 369 |     */
 370 |     #[inline]
 371 |     pub fn next_cp(mut self) -> Option<(char, StrCursor<'a>)> {
 372 |         unsafe {
 373 |             let cp = match self.cp_after() {
 374 |                 Some(cp) => cp,
 375 |                 None => return None,
 376 |             };
 377 |             self.unsafe_seek_right(cp.len_utf8());
 378 |             Some((cp, self))
 379 |         }
 380 |     }
 381 | 
 382 |     /**
 383 |     Returns the grapheme cluster immediately to the left of the cursor, or `None` is the cursor is at the start of the string.
 384 |     */
 385 |     #[inline]
 386 |     pub fn before(&self) -> Option<&'a Gc> {
 387 |         self.at_prev().and_then(|cur| cur.after())
 388 |     }
 389 | 
 390 |     /**
 391 |     Returns the grapheme cluster immediately to the right of the cursor, or `None` is the cursor is at the end of the string.
 392 |     */
 393 |     #[inline]
 394 |     pub fn after(&self) -> Option<&'a Gc> {
 395 |         Gc::split_from(self.slice_after()).map(|(gc, _)| gc)
 396 |     }
 397 | 
 398 |     /**
 399 |     Returns the contents of the string to the left of the cursor.
 400 |     */
 401 |     #[inline]
 402 |     pub fn slice_before(&self) -> &'a str {
 403 |         unsafe {
 404 |             self.s.slice_unchecked(0, self.byte_pos())
 405 |         }
 406 |     }
 407 | 
 408 |     /**
 409 |     Returns the contents of the string to the right of the cursor.
 410 |     */
 411 |     #[inline]
 412 |     pub fn slice_after(&self) -> &'a str {
 413 |         unsafe {
 414 |             self.s.slice_unchecked(self.byte_pos(), self.s.len())
 415 |         }
 416 |     }
 417 | 
 418 |     /**
 419 |     Returns the contents of the string *between* this cursor and another cursor.
 420 | 
 421 |     Returns `None` if the cursors are from different strings (even different subsets of the same string).
 422 |     */
 423 |     #[inline]
 424 |     pub fn slice_between(&self, until: StrCursor<'a>) -> Option<&'a str> {
 425 |         if !str_eq_literal(self.s, until.s) {
 426 |             None
 427 |         } else {
 428 |             use std::cmp::{max, min};
 429 |             unsafe {
 430 |                 let beg = min(self.at, until.at);
 431 |                 let end = max(self.at, until.at);
 432 |                 let len = end as usize - beg as usize;
 433 |                 let bytes = ::std::slice::from_raw_parts(beg, len);
 434 |                 Some(::std::str::from_utf8_unchecked(bytes))
 435 |             }
 436 |         }
 437 |     }
 438 | 
 439 |     /**
 440 |     Returns the code point immediately to the left of the cursor, or `None` is the cursor is at the start of the string.
 441 |     */
 442 |     #[inline]
 443 |     pub fn cp_before(&self) -> Option<char> {
 444 |         self.at_prev_cp().and_then(|cur| cur.cp_after())
 445 |     }
 446 | 
 447 |     /**
 448 |     Returns the code point immediately to the right of the cursor, or `None` is the cursor is at the end of the string.
 449 |     */
 450 |     #[inline]
 451 |     pub fn cp_after(&self) -> Option<char> {
 452 |         self.slice_after().chars().next()
 453 |     }
 454 | 
 455 |     /**
 456 |     Returns the entire string slice behind the cursor.
 457 |     */
 458 |     #[inline]
 459 |     pub fn slice_all(&self) -> &'a str {
 460 |         self.s
 461 |     }
 462 | 
 463 |     /**
 464 |     Returns the cursor's current position within the string as the number of UTF-8 code units from the beginning of the string.
 465 |     */
 466 |     #[inline]
 467 |     pub fn byte_pos(&self) -> usize {
 468 |         self.at as usize - self.s.as_ptr() as usize
 469 |     }
 470 | 
 471 |     #[inline]
 472 |     fn try_seek_left_cp(&mut self) -> bool {
 473 |         unsafe {
 474 |             // We just have to ensure that offsetting the `at` pointer *at all* is safe.
 475 |             if self.byte_pos() == 0 {
 476 |                 return false;
 477 |             }
 478 |             self.at = seek_utf8_cp_start_left(self.s, self.at.offset(-1));
 479 |             true
 480 |         }
 481 |     }
 482 | 
 483 |     #[inline]
 484 |     fn try_seek_right_cp(&mut self) -> bool {
 485 |         unsafe {
 486 |             // We just have to ensure that offsetting the `at` pointer *at all* is safe.
 487 |             if self.byte_pos() == self.s.len() {
 488 |                 return false;
 489 |             }
 490 |             self.at = seek_utf8_cp_start_right(self.s, self.at.offset(1));
 491 |             true
 492 |         }
 493 |     }
 494 | 
 495 |     #[inline]
 496 |     fn try_seek_left_gr(&mut self) -> bool {
 497 |         let len = {
 498 |             let gr = UniSeg::graphemes(self.slice_before(), /*is_extended:*/true).next_back();
 499 |             gr.map(|gr| gr.len())
 500 |         };
 501 |         match len {
 502 |             Some(len) => {
 503 |                 unsafe {
 504 |                     self.at = self.at.offset(-(len as isize));
 505 |                 }
 506 |                 true
 507 |             },
 508 |             None => false
 509 |         }
 510 |     }
 511 | 
 512 |     #[inline]
 513 |     fn try_seek_right_gr(&mut self) -> bool {
 514 |         let len = {
 515 |             let gr = UniSeg::graphemes(self.slice_after(), /*is_extended:*/true).next();
 516 |             gr.map(|gr| gr.len())
 517 |         };
 518 |         match len {
 519 |             Some(len) => {
 520 |                 unsafe {
 521 |                     self.at = self.at.offset(len as isize);
 522 |                 }
 523 |                 true
 524 |             },
 525 |             None => false
 526 |         }
 527 |     }
 528 | 
 529 |     /**
 530 |     Seeks exactly `bytes` left, without performing any bounds or validity checks.
 531 |     */
 532 |     #[inline]
 533 |     pub unsafe fn unsafe_seek_left(&mut self, bytes: usize) {
 534 |         self.at = self.at.offset(-(bytes as isize));
 535 |     }
 536 | 
 537 |     /**
 538 |     Seeks exactly `bytes` right, without performing any bounds or validity checks.
 539 |     */
 540 |     #[inline]
 541 |     pub unsafe fn unsafe_seek_right(&mut self, bytes: usize) {
 542 |         self.at = self.at.offset(bytes as isize);
 543 |     }
 544 | 
 545 |     /**
 546 |     Seeks to the start of `s`, without performing any bounds or validity checks.
 547 |     */
 548 |     #[inline]
 549 |     pub unsafe fn unsafe_set_at(&mut self, s: &'a str) {
 550 |         self.at = s.as_bytes().as_ptr();
 551 |     }
 552 | }
 553 | 
 554 | impl<'a> Copy for StrCursor<'a> {}
 555 | 
 556 | impl<'a> Clone for StrCursor<'a> {
 557 |     fn clone(&self) -> StrCursor<'a> {
 558 |         *self
 559 |     }
 560 | }
 561 | 
 562 | impl<'a> std::fmt::Debug for StrCursor<'a> {
 563 | 	fn fmt(&self, fmt: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
 564 |         write!(fmt, "StrCursor({:?} | {:?})", self.slice_before(), self.slice_after())
 565 |     }
 566 | }
 567 | 
 568 | impl<'a> Eq for StrCursor<'a> {}
 569 | 
 570 | impl<'a> PartialEq for StrCursor<'a> {
 571 |     fn eq(&self, other: &StrCursor<'a>) -> bool {
 572 |         (self.at == other.at)
 573 |         && (self.s.as_ptr() == other.s.as_ptr())
 574 |         && (self.s.len() == other.s.len())
 575 |     }
 576 | 
 577 |     fn ne(&self, other: &StrCursor<'a>) -> bool {
 578 |         (self.at != other.at)
 579 |         || (self.s.as_ptr() != other.s.as_ptr())
 580 |         || (self.s.len() != other.s.len())
 581 |     }
 582 | }
 583 | 
 584 | impl<'a> PartialOrd for StrCursor<'a> {
 585 |     fn partial_cmp(&self, other: &StrCursor<'a>) -> Option<std::cmp::Ordering> {
 586 |         // If the cursors are from different strings, they are unordered.
 587 |         if (self.s.as_ptr() != other.s.as_ptr()) || (self.s.len() != other.s.len()) {
 588 |             None
 589 |         } else {
 590 |             self.at.partial_cmp(&other.at)
 591 |         }
 592 |     }
 593 | }
 594 | 
 595 | impl<'a> std::hash::Hash for StrCursor<'a> {
 596 |     fn hash<H>(&self, state: &mut H)
 597 |     where H: std::hash::Hasher {
 598 |         self.s.as_ptr().hash(state);
 599 |         self.s.len().hash(state);
 600 |         self.at.hash(state);
 601 |     }
 602 | }
 603 | 
 604 | #[cfg(test)]
 605 | #[test]
 606 | fn test_new_at_start() {
 607 |     let cur = StrCursor::new_at_start("abcdef");
 608 |     assert_eq!(cur.slice_before(), "");
 609 |     assert_eq!(cur.slice_after(), "abcdef");
 610 | }
 611 | 
 612 | #[cfg(test)]
 613 | #[test]
 614 | fn test_new_at_end() {
 615 |     let cur = StrCursor::new_at_end("abcdef");
 616 |     assert_eq!(cur.slice_before(), "abcdef");
 617 |     assert_eq!(cur.slice_after(), "");
 618 | }
 619 | 
 620 | #[cfg(test)]
 621 | #[test]
 622 | fn test_new_at_cp_left_of_byte_pos() {
 623 |     let s = "This is a 本当 test.";
 624 |     let cur = StrCursor::new_at_cp_left_of_byte_pos(s, 11);
 625 |     assert_eq!(cur.slice_before(), "This is a ");
 626 |     assert_eq!(cur.slice_after(), "本当 test.");
 627 | }
 628 | 
 629 | #[cfg(test)]
 630 | #[test]
 631 | fn test_new_at_cp_right_of_byte_pos() {
 632 |     let s = "This is a 本当 test.";
 633 |     let cur = StrCursor::new_at_cp_right_of_byte_pos(s, 11);
 634 |     assert_eq!(cur.slice_before(), "This is a 本");
 635 |     assert_eq!(cur.slice_after(), "当 test.");
 636 | }
 637 | 
 638 | #[cfg(test)]
 639 | #[test]
 640 | fn test_new_at_left_of_byte_pos() {
 641 |     let s = "Jäger,Jäger,大嫌い,💪❤!";
 642 |     let r = (0..s.len()+1).map(|i| (i, StrCursor::new_at_left_of_byte_pos(s, i)))
 643 |         .map(|(i, cur)| (i, cur.byte_pos(), cur.after().map(Gc::as_str)))
 644 |         .collect::<Vec<_>>();
 645 |     assert_eq!(r, vec![
 646 |         (0, 0, Some("J")),
 647 |         (1, 1, Some("ä")),
 648 |         (2, 1, Some("ä")),
 649 |         (3, 3, Some("g")),
 650 |         (4, 4, Some("e")),
 651 |         (5, 5, Some("r")),
 652 |         (6, 6, Some(",")),
 653 |         (7, 7, Some("J")),
 654 |         (8, 8, Some("ä")),
 655 |         (9, 8, Some("ä")),
 656 |         (10, 8, Some("ä")),
 657 |         (11, 11, Some("g")),
 658 |         (12, 12, Some("e")),
 659 |         (13, 13, Some("r")),
 660 |         (14, 14, Some(",")),
 661 |         (15, 15, Some("大")),
 662 |         (16, 15, Some("大")),
 663 |         (17, 15, Some("大")),
 664 |         (18, 18, Some("嫌")),
 665 |         (19, 18, Some("嫌")),
 666 |         (20, 18, Some("嫌")),
 667 |         (21, 21, Some("い")),
 668 |         (22, 21, Some("い")),
 669 |         (23, 21, Some("い")),
 670 |         (24, 24, Some(",")),
 671 |         (25, 25, Some("💪")),
 672 |         (26, 25, Some("💪")),
 673 |         (27, 25, Some("💪")),
 674 |         (28, 25, Some("💪")),
 675 |         (29, 29, Some("❤")),
 676 |         (30, 29, Some("❤")),
 677 |         (31, 29, Some("❤")),
 678 |         (32, 32, Some("!")),
 679 |         (33, 33, None),
 680 |     ]);
 681 | }
 682 | 
 683 | #[cfg(test)]
 684 | #[test]
 685 | fn test_new_at_right_of_byte_pos() {
 686 |     let s = "Jäger,Jäger,大嫌い,💪❤!";
 687 |     let r = (0..s.len()+1).map(|i| (i, StrCursor::new_at_right_of_byte_pos(s, i)))
 688 |         .map(|(i, cur)| (i, cur.byte_pos(), cur.after().map(Gc::as_str)))
 689 |         .collect::<Vec<_>>();
 690 |     assert_eq!(r, vec![
 691 |         (0, 0, Some("J")),
 692 |         (1, 1, Some("ä")),
 693 |         (2, 3, Some("g")),
 694 |         (3, 3, Some("g")),
 695 |         (4, 4, Some("e")),
 696 |         (5, 5, Some("r")),
 697 |         (6, 6, Some(",")),
 698 |         (7, 7, Some("J")),
 699 |         (8, 8, Some("ä")),
 700 |         (9, 11, Some("g")),
 701 |         (10, 11, Some("g")),
 702 |         (11, 11, Some("g")),
 703 |         (12, 12, Some("e")),
 704 |         (13, 13, Some("r")),
 705 |         (14, 14, Some(",")),
 706 |         (15, 15, Some("大")),
 707 |         (16, 18, Some("嫌")),
 708 |         (17, 18, Some("嫌")),
 709 |         (18, 18, Some("嫌")),
 710 |         (19, 21, Some("い")),
 711 |         (20, 21, Some("い")),
 712 |         (21, 21, Some("い")),
 713 |         (22, 24, Some(",")),
 714 |         (23, 24, Some(",")),
 715 |         (24, 24, Some(",")),
 716 |         (25, 25, Some("💪")),
 717 |         (26, 29, Some("❤")),
 718 |         (27, 29, Some("❤")),
 719 |         (28, 29, Some("❤")),
 720 |         (29, 29, Some("❤")),
 721 |         (30, 32, Some("!")),
 722 |         (31, 32, Some("!")),
 723 |         (32, 32, Some("!")),
 724 |         (33, 33, None),
 725 |     ]);
 726 | }
 727 | 
 728 | #[cfg(test)]
 729 | #[test]
 730 | fn test_at_prev_cp() {
 731 |     let s = "大嫌い,💪❤";
 732 |     let cur = StrCursor::new_at_end(s);
 733 |     let bps = test_util::finite_iterate(cur, StrCursor::at_prev_cp)
 734 |         .map(|cur| cur.byte_pos())
 735 |         .collect::<Vec<_>>();
 736 |     assert_eq!(bps, vec![14, 10, 9, 6, 3, 0]);
 737 | }
 738 | 
 739 | #[cfg(test)]
 740 | #[test]
 741 | fn test_at_next_cp() {
 742 |     let s = "大嫌い,💪❤";
 743 |     let cur = StrCursor::new_at_start(s);
 744 |     let bps = test_util::finite_iterate(cur, StrCursor::at_next_cp)
 745 |         .map(|cur| cur.byte_pos())
 746 |         .collect::<Vec<_>>();
 747 |     assert_eq!(bps, vec![3, 6, 9, 10, 14, 17]);
 748 | }
 749 | 
 750 | #[cfg(test)]
 751 | #[test]
 752 | fn test_at_prev_and_before() {
 753 |     let s = "noe\u{0308}l";
 754 |     let cur = StrCursor::new_at_end(s);
 755 |     let bps = test_util::finite_iterate_lead(cur, StrCursor::at_prev)
 756 |         .map(|cur| (cur.byte_pos(), cur.after().map(Gc::as_str)))
 757 |         .collect::<Vec<_>>();
 758 |     assert_eq!(bps, vec![
 759 |         (6, None),
 760 |         (5, Some("l")),
 761 |         (2, Some("e\u{0308}")),
 762 |         (1, Some("o")),
 763 |         (0, Some("n")),
 764 |     ]);
 765 | }
 766 | 
 767 | #[cfg(test)]
 768 | #[test]
 769 | fn test_at_next_and_after() {
 770 |     let s = "noe\u{0308}l";
 771 |     let cur = StrCursor::new_at_start(s);
 772 |     let bps = test_util::finite_iterate_lead(cur, StrCursor::at_next)
 773 |         .map(|cur| (cur.byte_pos(), cur.after().map(Gc::as_str)))
 774 |         .collect::<Vec<_>>();
 775 |     assert_eq!(bps, vec![
 776 |         (0, Some("n")),
 777 |         (1, Some("o")),
 778 |         (2, Some("e\u{0308}")),
 779 |         (5, Some("l")),
 780 |         (6, None),
 781 |     ]);
 782 | }
 783 | 
 784 | #[cfg(test)]
 785 | #[test]
 786 | fn test_prev() {
 787 |     let s = "Jäger,Jäger,大嫌い,💪❤!";
 788 |     let cur = StrCursor::new_at_end(s);
 789 |     let r = test_util::finite_iterate_lead(cur, StrCursor::at_prev)
 790 |         .map(|cur| cur.prev().map(|(gr, cur)| (gr.as_str(), cur.byte_pos())))
 791 |         .collect::<Vec<_>>();
 792 |     assert_eq!(r, vec![
 793 |         Some(("!", 32)),
 794 |         Some(("❤", 29)),
 795 |         Some(("💪", 25)),
 796 |         Some((",", 24)),
 797 |         Some(("い", 21)),
 798 |         Some(("嫌", 18)),
 799 |         Some(("大", 15)),
 800 |         Some((",", 14)),
 801 |         Some(("r", 13)),
 802 |         Some(("e", 12)),
 803 |         Some(("g", 11)),
 804 |         Some(("ä", 8)),
 805 |         Some(("J", 7)),
 806 |         Some((",", 6)),
 807 |         Some(("r", 5)),
 808 |         Some(("e", 4)),
 809 |         Some(("g", 3)),
 810 |         Some(("ä", 1)),
 811 |         Some(("J", 0)),
 812 |         None,
 813 |     ]);
 814 | }
 815 | 
 816 | #[cfg(test)]
 817 | #[test]
 818 | fn test_prev_cp() {
 819 |     let s = "Jäger,Jäger,大嫌い,💪❤!";
 820 |     let cur = StrCursor::new_at_end(s);
 821 |     let r = test_util::finite_iterate_lead(cur, StrCursor::at_prev_cp)
 822 |         .map(|cur| cur.prev_cp().map(|(cp, cur)| (cp, cur.byte_pos())))
 823 |         .collect::<Vec<_>>();
 824 |     assert_eq!(r, vec![
 825 |         Some(('!', 32)),
 826 |         Some(('❤', 29)),
 827 |         Some(('💪', 25)),
 828 |         Some((',', 24)),
 829 |         Some(('い', 21)),
 830 |         Some(('嫌', 18)),
 831 |         Some(('大', 15)),
 832 |         Some((',', 14)),
 833 |         Some(('r', 13)),
 834 |         Some(('e', 12)),
 835 |         Some(('g', 11)),
 836 |         Some(('̈', 9)),
 837 |         Some(('a', 8)),
 838 |         Some(('J', 7)),
 839 |         Some((',', 6)),
 840 |         Some(('r', 5)),
 841 |         Some(('e', 4)),
 842 |         Some(('g', 3)),
 843 |         Some(('ä', 1)),
 844 |         Some(('J', 0)),
 845 |         None,
 846 |     ]);
 847 | }
 848 | 
 849 | #[cfg(test)]
 850 | #[test]
 851 | fn test_next() {
 852 |     let s = "Jäger,Jäger,大嫌い,💪❤!";
 853 |     let cur = StrCursor::new_at_start(s);
 854 |     let r = test_util::finite_iterate_lead(cur, StrCursor::at_next)
 855 |         .map(|cur| cur.next().map(|(gr, cur)| (gr.as_str(), cur.byte_pos())))
 856 |         .collect::<Vec<_>>();
 857 |     assert_eq!(r, vec![
 858 |         Some(("J", 1)),
 859 |         Some(("ä", 3)),
 860 |         Some(("g", 4)),
 861 |         Some(("e", 5)),
 862 |         Some(("r", 6)),
 863 |         Some((",", 7)),
 864 |         Some(("J", 8)),
 865 |         Some(("ä", 11)),
 866 |         Some(("g", 12)),
 867 |         Some(("e", 13)),
 868 |         Some(("r", 14)),
 869 |         Some((",", 15)),
 870 |         Some(("大", 18)),
 871 |         Some(("嫌", 21)),
 872 |         Some(("い", 24)),
 873 |         Some((",", 25)),
 874 |         Some(("💪", 29)),
 875 |         Some(("❤", 32)),
 876 |         Some(("!", 33)),
 877 |         None,
 878 |     ]);
 879 | }
 880 | 
 881 | #[cfg(test)]
 882 | #[test]
 883 | fn test_next_cp() {
 884 |     let s = "Jäger,Jäger,大嫌い,💪❤!";
 885 |     let cur = StrCursor::new_at_start(s);
 886 |     let r = test_util::finite_iterate_lead(cur, StrCursor::at_next_cp)
 887 |         .map(|cur| cur.next_cp().map(|(cp, cur)| (cp, cur.byte_pos())))
 888 |         .collect::<Vec<_>>();
 889 |     assert_eq!(r, vec![
 890 |         Some(('J', 1)),
 891 |         Some(('ä', 3)),
 892 |         Some(('g', 4)),
 893 |         Some(('e', 5)),
 894 |         Some(('r', 6)),
 895 |         Some((',', 7)),
 896 |         Some(('J', 8)),
 897 |         Some(('a', 9)),
 898 |         Some(('̈', 11)),
 899 |         Some(('g', 12)),
 900 |         Some(('e', 13)),
 901 |         Some(('r', 14)),
 902 |         Some((',', 15)),
 903 |         Some(('大', 18)),
 904 |         Some(('嫌', 21)),
 905 |         Some(('い', 24)),
 906 |         Some((',', 25)),
 907 |         Some(('💪', 29)),
 908 |         Some(('❤', 32)),
 909 |         Some(('!', 33)),
 910 |         None,
 911 |     ]);
 912 | }
 913 | 
 914 | #[cfg(test)]
 915 | #[test]
 916 | fn test_char_before_and_after() {
 917 |     let s = "大嫌い,💪❤";
 918 |     let cur = StrCursor::new_at_start(s);
 919 |     let r = test_util::finite_iterate_lead(cur, StrCursor::at_next_cp)
 920 |         .map(|cur| (cur.byte_pos(), cur.cp_before(), cur.cp_after()))
 921 |         .collect::<Vec<_>>();
 922 |     assert_eq!(r, vec![
 923 |         (0, None, Some('大')),
 924 |         (3, Some('大'), Some('嫌')),
 925 |         (6, Some('嫌'), Some('い')),
 926 |         (9, Some('い'), Some(',')),
 927 |         (10, Some(','), Some('💪')),
 928 |         (14, Some('💪'), Some('❤')),
 929 |         (17, Some('❤'), None)
 930 |     ]);
 931 | }
 932 | 
 933 | #[cfg(test)]
 934 | #[test]
 935 | fn test_slice_between() {
 936 |     let s = "they hit, fight, kick, wreak havoc, and rejoice";
 937 |     let cur0 = StrCursor::new_at_start(s);
 938 |     let cur1 = StrCursor::new_at_end(s);
 939 |     let cur2 = StrCursor::new_at_end("nobody knows what they're lookin' for");
 940 |     let cur3 = StrCursor::new_at_end(&s[1..]);
 941 |     assert_eq!(cur0.slice_between(cur1), Some(s));
 942 |     assert_eq!(cur1.slice_between(cur0), Some(s));
 943 |     assert_eq!(cur0.slice_between(cur2), None);
 944 |     assert_eq!(cur0.slice_between(cur3), None);
 945 | }
 946 | 
 947 | #[inline]
 948 | fn byte_pos_to_ptr(s: &str, byte_pos: usize) -> *const u8 {
 949 |     if s.len() < byte_pos {
 950 |         panic!("byte position out of bounds: the len is {} but the position is {}",
 951 |             s.len(), byte_pos);
 952 |     }
 953 |     unsafe { s.as_ptr().offset(byte_pos as isize) }
 954 | }
 955 | 
 956 | #[inline]
 957 | unsafe fn seek_utf8_cp_start_left(s: &str, mut from: *const u8) -> *const u8 {
 958 |     let beg = s.as_ptr();
 959 |     while from > beg && (*from & 0b11_00_0000 == 0b10_00_0000) {
 960 |         from = from.offset(-1);
 961 |     }
 962 |     from
 963 | }
 964 | 
 965 | #[cfg(test)]
 966 | #[test]
 967 | fn test_seek_utf8_cp_start_left() {
 968 |     let s = "カブム！";
 969 |     let b = s.as_bytes();
 970 |     assert_eq!(unsafe { seek_utf8_cp_start_left(s, &b[0]) }, &b[0]);
 971 |     assert_eq!(unsafe { seek_utf8_cp_start_left(s, &b[1]) }, &b[0]);
 972 |     assert_eq!(unsafe { seek_utf8_cp_start_left(s, &b[2]) }, &b[0]);
 973 |     assert_eq!(unsafe { seek_utf8_cp_start_left(s, &b[3]) }, &b[3]);
 974 |     assert_eq!(unsafe { seek_utf8_cp_start_left(s, &b[4]) }, &b[3]);
 975 |     assert_eq!(unsafe { seek_utf8_cp_start_left(s, &b[5]) }, &b[3]);
 976 | }
 977 | 
 978 | #[inline]
 979 | unsafe fn seek_utf8_cp_start_right(s: &str, mut from: *const u8) -> *const u8 {
 980 |     let end = s.as_ptr().offset(s.len() as isize);
 981 |     while from < end && (*from & 0b11_00_0000 == 0b10_00_0000) {
 982 |         from = from.offset(1);
 983 |     }
 984 |     from
 985 | }
 986 | 
 987 | #[cfg(test)]
 988 | #[test]
 989 | fn test_seek_utf8_cp_start_right() {
 990 |     let s = "カブム！";
 991 |     let b = s.as_bytes();
 992 |     assert_eq!(unsafe { seek_utf8_cp_start_right(s, &b[0]) }, &b[0]);
 993 |     assert_eq!(unsafe { seek_utf8_cp_start_right(s, &b[1]) }, &b[3]);
 994 |     assert_eq!(unsafe { seek_utf8_cp_start_right(s, &b[2]) }, &b[3]);
 995 |     assert_eq!(unsafe { seek_utf8_cp_start_right(s, &b[3]) }, &b[3]);
 996 |     assert_eq!(unsafe { seek_utf8_cp_start_right(s, &b[4]) }, &b[6]);
 997 |     assert_eq!(unsafe { seek_utf8_cp_start_right(s, &b[5]) }, &b[6]);
 998 | }
 999 | 
1000 | #[inline]
1001 | fn str_eq_literal(a: &str, b: &str) -> bool {
1002 |     a.as_bytes().as_ptr() == b.as_bytes().as_ptr()
1003 |         && a.len() == b.len()
1004 | }
1005 | 
1006 | #[cfg(test)]
1007 | #[test]
1008 | fn test_str_eq_literal() {
1009 |     let s = "hare hare yukai";
1010 |     assert!(str_eq_literal(s, s));
1011 |     assert!(str_eq_literal(&s[0..4], &s[0..4]));
1012 |     assert!(!str_eq_literal(&s[0..4], &s[5..9]));
1013 |     assert!(!str_eq_literal(&s[0..4], &s[0..3]));
1014 | }
1015 | 
1016 | #[cfg(test)]
1017 | mod test_util {
1018 |     pub struct FiniteIter<T, F>(Option<T>, F);
1019 | 
1020 |     impl<T, F> Iterator for FiniteIter<T, F>
1021 |     where
1022 |         F: FnMut(T) -> Option<T>,
1023 |         T: Clone,
1024 |     {
1025 |         type Item = T;
1026 | 
1027 |         fn next(&mut self) -> Option<Self::Item> {
1028 |             self.0.take().and_then(|last| {
1029 |                 match (self.1)(last) {
1030 |                     Some(e) => {
1031 |                         self.0 = Some(e);
1032 |                         self.0.clone()
1033 |                     },
1034 |                     None => None
1035 |                 }
1036 |             })
1037 |         }
1038 |     }
1039 | 
1040 |     pub fn finite_iterate<T, F>(seed: T, f: F) -> FiniteIter<T, F>
1041 |     where
1042 |         F: FnMut(T) -> Option<T>,
1043 |         T: Clone,
1044 |     {
1045 |         FiniteIter(Some(seed), f)
1046 |     }
1047 |     pub struct FiniteIterLead<T, F>(Option<T>, F, bool);
1048 | 
1049 |     impl<T, F> Iterator for FiniteIterLead<T, F>
1050 |     where
1051 |         F: FnMut(T) -> Option<T>,
1052 |         T: Clone,
1053 |     {
1054 |         type Item = T;
1055 | 
1056 |         fn next(&mut self) -> Option<Self::Item> {
1057 |             if !self.2 {
1058 |                 self.2 = true;
1059 |                 return self.0.clone();
1060 |             }
1061 | 
1062 |             self.0.take().and_then(|last| {
1063 |                 match (self.1)(last) {
1064 |                     Some(e) => {
1065 |                         self.0 = Some(e);
1066 |                         self.0.clone()
1067 |                     },
1068 |                     None => None
1069 |                 }
1070 |             })
1071 |         }
1072 |     }
1073 | 
1074 |     pub fn finite_iterate_lead<T, F>(seed: T, f: F) -> FiniteIterLead<T, F>
1075 |     where
1076 |         F: FnMut(T) -> Option<T>,
1077 |         T: Clone,
1078 |     {
1079 |         FiniteIterLead(Some(seed), f, false)
1080 |     }
1081 | }
1082 | 


--------------------------------------------------------------------------------
/src/util.rs:
--------------------------------------------------------------------------------
 1 | /*!
 2 | Miscellaneous stuff.
 3 | */
 4 | 
 5 | #[inline(always)]
 6 | pub unsafe fn unreachable() -> ! {
 7 |     enum Knowledge {}
 8 |     #[inline(always)]
 9 |     fn nirvana(knowledge: Knowledge) -> ! {
10 |         match knowledge {}
11 |     }
12 |     nirvana(::std::mem::transmute(()))
13 | }
14 | 
15 | /*
16 | 
17 | TODO: The following code is nicked from libcore, owing to `encode_utf8` not being stable yet.  Specifically, <https://github.com/rust-lang/rust/blob/3d7cd77e442ce34eaac8a176ae8be17669498ebc/src/libcore/char.rs>.
18 | 
19 | This should all be removed as soon as `encode_utf8` *is* stable.
20 | 
21 | */
22 | 
23 | // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
24 | // file at the top-level directory of this distribution and at
25 | // http://rust-lang.org/COPYRIGHT.
26 | //
27 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
28 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
29 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
30 | // option. This file may not be copied, modified, or distributed
31 | // except according to those terms.
32 | 
33 | // UTF-8 ranges and tags for encoding characters
34 | const TAG_CONT: u8    = 0b1000_0000;
35 | const TAG_TWO_B: u8   = 0b1100_0000;
36 | const TAG_THREE_B: u8 = 0b1110_0000;
37 | const TAG_FOUR_B: u8  = 0b1111_0000;
38 | const MAX_ONE_B: u32   =     0x80;
39 | const MAX_TWO_B: u32   =    0x800;
40 | const MAX_THREE_B: u32 =  0x10000;
41 | 
42 | pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> Option<usize> {
43 |     // Marked #[inline] to allow llvm optimizing it away
44 |     if code < MAX_ONE_B && !dst.is_empty() {
45 |         dst[0] = code as u8;
46 |         Some(1)
47 |     } else if code < MAX_TWO_B && dst.len() >= 2 {
48 |         dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
49 |         dst[1] = (code & 0x3F) as u8 | TAG_CONT;
50 |         Some(2)
51 |     } else if code < MAX_THREE_B && dst.len() >= 3  {
52 |         dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
53 |         dst[1] = (code >>  6 & 0x3F) as u8 | TAG_CONT;
54 |         dst[2] = (code & 0x3F) as u8 | TAG_CONT;
55 |         Some(3)
56 |     } else if dst.len() >= 4 {
57 |         dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
58 |         dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
59 |         dst[2] = (code >>  6 & 0x3F) as u8 | TAG_CONT;
60 |         dst[3] = (code & 0x3F) as u8 | TAG_CONT;
61 |         Some(4)
62 |     } else {
63 |         None
64 |     }
65 | }
66 | 


--------------------------------------------------------------------------------
/update-docs.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # coding: utf-8
  3 | 
  4 | # Copyright ⓒ 2016 Daniel Keep.
  5 | #
  6 | # Licensed under the MIT license (see LICENSE or <http://opensource.org
  7 | # /licenses/MIT>) or the Apache License, Version 2.0 (see LICENSE of
  8 | # <http://www.apache.org/licenses/LICENSE-2.0>), at your option. All
  9 | # files in the project carrying such notice may not be copied, modified,
 10 | # or distributed except according to those terms.
 11 | 
 12 | import distutils.dir_util
 13 | import os
 14 | import shutil
 15 | import subprocess
 16 | import sys
 17 | import tempfile
 18 | import time
 19 | 
 20 | DOC_ARGS = '--no-deps'
 21 | DOC_FEATURES = ""
 22 | DOC_TARGET_BRANCH = 'gh-pages'
 23 | TEMP_CHECKOUT_PREFIX = 'gh-pages-checkout-'
 24 | TEMP_OUTPUT_PREFIX = 'gh-pages-generated-'
 25 | 
 26 | USE_ANSI = True if sys.platform != 'win32' else os.environ.get('FORCE_ANSI', '') != ''
 27 | TRACE_UPDATE_DOCS = os.environ.get('TRACE_UPDATE_DOCS', '') != ''
 28 | 
 29 | def sh(cmd):
 30 |     msg_trace('sh(%r)' % cmd)
 31 |     try:
 32 |         subprocess.check_call(cmd, shell=True)
 33 |     except:
 34 |         msg_trace('FAILED!')
 35 |         raise
 36 | 
 37 | def sh_eval(cmd, codec='utf-8', dont_strip=False):
 38 |     msg_trace('sh_eval(%r)' % cmd)
 39 |     result = None
 40 |     try:
 41 |         result = subprocess.check_output(cmd, shell=True).decode(codec)
 42 |         if not dont_strip:
 43 |             result = result.strip()
 44 |     except:
 45 |         msg_trace('FAILED!')
 46 |         raise
 47 |     return result
 48 | 
 49 | def msg(*args):
 50 |     if USE_ANSI: sys.stdout.write('\x1b[1;34m')
 51 |     sys.stdout.write('> ')
 52 |     if USE_ANSI: sys.stdout.write('\x1b[1;32m')
 53 |     for arg in args:
 54 |         sys.stdout.write(str(arg))
 55 |     if USE_ANSI: sys.stdout.write('\x1b[0m')
 56 |     sys.stdout.write('\n')
 57 |     sys.stdout.flush()
 58 | 
 59 | def msg_trace(*args):
 60 |     if TRACE_UPDATE_DOCS:
 61 |         if USE_ANSI: sys.stderr.write('\x1b[1;31m')
 62 |         sys.stderr.write('$ ')
 63 |         if USE_ANSI: sys.stderr.write('\x1b[0m')
 64 |         for arg in args:
 65 |             sys.stderr.write(str(arg))
 66 |         sys.stderr.write('\n')
 67 |         sys.stderr.flush()
 68 | 
 69 | def copytree(src, dst):
 70 |     msg_trace('copytree(%r, %r)' % (src, dst))
 71 |     distutils.dir_util.copy_tree(src=src, dst=dst)
 72 | 
 73 | def really_rmtree(path):
 74 |     msg_trace('really_rmtree(%r)' % path)
 75 | 
 76 |     WAIT_TIME_SECS = 1.0
 77 |     MAX_TRIES = 10
 78 | 
 79 |     def on_error(func, path, exc_info):
 80 |         """
 81 |         Error handler for ``shutil.rmtree``.
 82 | 
 83 |         If the error is due to an access error (read only file)
 84 |         it attempts to add write permission and then retries.
 85 | 
 86 |         If the error is for another reason it re-raises the error.
 87 | 
 88 |         Usage: ``shutil.rmtree(path, onerror=on_error)``
 89 | 
 90 |         From <http://stackoverflow.com/a/2656405>_.
 91 |         """
 92 |         import stat
 93 |         if not os.access(path, os.W_OK):
 94 |             # Is the error an access error ?
 95 |             os.chmod(path, stat.S_IWUSR)
 96 |             func(path)
 97 |         else:
 98 |             raise
 99 | 
100 |     for _ in range(MAX_TRIES):
101 |         failed = True
102 |         try:
103 |             msg_trace('shutil.rmtree(%r)' % path)
104 |             shutil.rmtree(path, onerror=on_error)
105 |             failed = False
106 |         except WindowsError:
107 |             time.sleep(WAIT_TIME_SECS)
108 |         if not failed: return
109 | 
110 |     msg('Warning: failed to remove directory %r' % path)
111 | 
112 | def init_doc_branch():
113 |     msg("Initialising %s branch" % DOC_TARGET_BRANCH)
114 | 
115 |     dir = os.getcwd()
116 |     msg_trace('dir = %r' % dir)
117 | 
118 |     tmp = tempfile.mkdtemp(prefix=TEMP_CHECKOUT_PREFIX)
119 |     msg_trace('tmp = %r' % tmp)
120 | 
121 |     try:
122 |         msg("Cloning into a temporary directory...")
123 |         sh('git init -q "%s"' % tmp)
124 |         msg_trace('os.chdir(%r)' % tmp)
125 |         os.chdir(tmp)
126 |         sh('git checkout -q --orphan "%s"' % DOC_TARGET_BRANCH)
127 |         sh('git commit -qm "Initial commit." --allow-empty')
128 |         sh('git remote add origin "%s"' % dir)
129 |         sh('git push -q origin gh-pages')
130 | 
131 |     finally:
132 |         msg('Cleaning up...')
133 |         msg_trace('os.chdir(%r)' % dir)
134 |         os.chdir(dir)
135 |         msg_trace('shutil.rmtree(%r)' % tmp)
136 |         really_rmtree(tmp)
137 | 
138 |     msg('%s is ready.  Continuing.' % DOC_TARGET_BRANCH)
139 | 
140 | def main():
141 |     if sh_eval('git symbolic-ref --short HEAD') != u'master':
142 |         msg('Not on master; doing nothing.')
143 |         return 0
144 | 
145 |     # Sanity check: does the doc branch exist at all?
146 |     branches = {b[2:].strip() for b in sh_eval('git branch', dont_strip=True).splitlines()}
147 |     msg_trace('branches = %r' % branches)
148 |     if DOC_TARGET_BRANCH not in branches:
149 |         init_doc_branch()
150 | 
151 |     last_rev = sh_eval('git rev-parse HEAD')
152 |     last_msg = sh_eval('git log -1 --pretty=%B')
153 |     msg_trace('last_rev = %r' % last_rev)
154 |     msg_trace('last_msg = %r' % last_msg)
155 | 
156 |     dir = os.getcwd()
157 |     msg_trace('dir = %r' % dir)
158 | 
159 |     tmp1 = tempfile.mkdtemp(prefix=TEMP_CHECKOUT_PREFIX)
160 |     tmp2 = tempfile.mkdtemp(prefix=TEMP_OUTPUT_PREFIX)
161 |     msg_trace('tmp1 = %r' % tmp1)
162 |     msg_trace('tmp2 = %r' % tmp2)
163 | 
164 |     try:
165 |         msg("Cloning into a temporary directory...")
166 |         sh('git clone -qb "%s" "%s" "%s"' % (DOC_TARGET_BRANCH, dir, tmp1))
167 |         msg_trace('os.chdir(%r)' % tmp1)
168 |         os.chdir(tmp1)
169 |         sh('git checkout -q master')
170 | 
171 |         msg("Generating documentation...")
172 |         args = '%s --features="%s"' % (DOC_ARGS, DOC_FEATURES)
173 |         sh('cargo doc %s' % args)
174 |         tmp1_target_doc = '%s/target/doc' % tmp1
175 |         msg_trace('shutil.move(%r, %r)' % (tmp1_target_doc, tmp2))
176 |         shutil.move(tmp1_target_doc, tmp2)
177 | 
178 |         msg('Updating %s...' % DOC_TARGET_BRANCH)
179 |         sh('git checkout -q "%s"' % DOC_TARGET_BRANCH)
180 |         sh('git clean -dfq')
181 |         tmp2_doc = '%s/doc' % tmp2
182 | 
183 |         msg_trace('copytree(%r, %r)' % (tmp2_doc, './doc'))
184 |         copytree(tmp2_doc, './doc')
185 | 
186 |         msg('Committing changes...')
187 |         sh('git add .')
188 |         sh('git commit --amend -m "Update docs for %s" -m "%s"' % (last_rev[:7], last_msg))
189 | 
190 |         sh('git push -fqu origin "%s"' % DOC_TARGET_BRANCH)
191 | 
192 |     finally:
193 |         msg('Cleaning up...')
194 |         msg_trace('os.chdir(%r)' % dir)
195 |         os.chdir(dir)
196 |         msg_trace('shutil.rmtree(%r)' % tmp2)
197 |         really_rmtree(tmp2)
198 |         msg_trace('shutil.rmtree(%r)' % tmp1)
199 |         really_rmtree(tmp1)
200 | 
201 |     msg('Publishing...')
202 |     sh('git push -f origin "%s"' % DOC_TARGET_BRANCH)
203 | 
204 |     msg('Done.')
205 | 
206 | 
207 | if __name__ == '__main__':
208 |     sys.exit(main())
209 | 


--------------------------------------------------------------------------------