├── .gitignore ├── CHANGELOG.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md └── src ├── lib.rs └── tests.rs /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 1.0.9 2 | 3 | * Update Cargo metadata. 4 | * Update to Rust 2021 edition. 5 | 6 | # 1.0.8 7 | 8 | * Update to Rust 2018 edition. 9 | 10 | # 1.0.7 11 | 12 | * Refactoring to make safety invariants clearer. 13 | 14 | # 1.0.6 15 | 16 | * Replace an internal unsafe function with `str::as_mut_bytes` 17 | 18 | # 1.0.5 19 | 20 | * Link to README from Cargo manifest 21 | 22 | # 1.0.4 23 | 24 | * Minor refactoring to make code simpler 25 | 26 | # 1.0.3 27 | 28 | * Update to unicode-segmentation 1.0. 29 | 30 | # 1.0.2 31 | 32 | * Minor refactoring to make unsafe code more readable 33 | 34 | # 1.0.1 35 | 36 | * Improved documentation 37 | 38 | # 1.0.0 39 | 40 | * Initial release 41 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "1.1.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "cfg-if" 16 | version = "1.0.0" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 19 | 20 | [[package]] 21 | name = "env_logger" 22 | version = "0.8.4" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" 25 | dependencies = [ 26 | "log", 27 | "regex", 28 | ] 29 | 30 | [[package]] 31 | name = "getrandom" 32 | version = "0.2.12" 33 | source = "registry+https://github.com/rust-lang/crates.io-index" 34 | checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" 35 | dependencies = [ 36 | "cfg-if", 37 | "libc", 38 | "wasi", 39 | ] 40 | 41 | [[package]] 42 | name = "libc" 43 | version = "0.2.153" 44 | source = "registry+https://github.com/rust-lang/crates.io-index" 45 | checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" 46 | 47 | [[package]] 48 | name = "log" 49 | version = "0.4.21" 50 | source = "registry+https://github.com/rust-lang/crates.io-index" 51 | checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" 52 | 53 | [[package]] 54 | name = "memchr" 55 | version = "2.7.1" 56 | source = "registry+https://github.com/rust-lang/crates.io-index" 57 | checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" 58 | 59 | [[package]] 60 | name = "quickcheck" 61 | version = "1.0.3" 62 | source = "registry+https://github.com/rust-lang/crates.io-index" 63 | checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" 64 | dependencies = [ 65 | "env_logger", 66 | "log", 67 | "rand", 68 | ] 69 | 70 | [[package]] 71 | name = "rand" 72 | version = "0.8.5" 73 | source = "registry+https://github.com/rust-lang/crates.io-index" 74 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 75 | dependencies = [ 76 | "rand_core", 77 | ] 78 | 79 | [[package]] 80 | name = "rand_core" 81 | version = "0.6.4" 82 | source = "registry+https://github.com/rust-lang/crates.io-index" 83 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 84 | dependencies = [ 85 | "getrandom", 86 | ] 87 | 88 | [[package]] 89 | name = "regex" 90 | version = "1.10.3" 91 | source = "registry+https://github.com/rust-lang/crates.io-index" 92 | checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" 93 | dependencies = [ 94 | "aho-corasick", 95 | "memchr", 96 | "regex-automata", 97 | "regex-syntax", 98 | ] 99 | 100 | [[package]] 101 | name = "regex-automata" 102 | version = "0.4.6" 103 | source = "registry+https://github.com/rust-lang/crates.io-index" 104 | checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" 105 | dependencies = [ 106 | "aho-corasick", 107 | "memchr", 108 | "regex-syntax", 109 | ] 110 | 111 | [[package]] 112 | name = "regex-syntax" 113 | version = "0.8.2" 114 | source = "registry+https://github.com/rust-lang/crates.io-index" 115 | checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" 116 | 117 | [[package]] 118 | name = "unicode-reverse" 119 | version = "1.0.9" 120 | dependencies = [ 121 | "quickcheck", 122 | "unicode-segmentation", 123 | ] 124 | 125 | [[package]] 126 | name = "unicode-segmentation" 127 | version = "1.11.0" 128 | source = "registry+https://github.com/rust-lang/crates.io-index" 129 | checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" 130 | 131 | [[package]] 132 | name = "wasi" 133 | version = "0.11.0+wasi-snapshot-preview1" 134 | source = "registry+https://github.com/rust-lang/crates.io-index" 135 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 136 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "unicode-reverse" 3 | version = "1.0.9" 4 | authors = ["Matt Brubeck "] 5 | license = "MIT OR Apache-2.0" 6 | description = "Unicode-aware in-place string reversal" 7 | repository = "https://github.com/mbrubeck/unicode-reverse" 8 | documentation = "https://docs.rs/unicode-reverse" 9 | categories = ["text-processing"] 10 | keywords = ["unicode", "string", "reverse", "graphemes", "no_std"] 11 | edition = "2021" 12 | 13 | [dependencies] 14 | unicode-segmentation = "1.0" 15 | 16 | [dev-dependencies] 17 | quickcheck = "1.0" 18 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 Matt Brubeck 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # unicode-reverse 2 | 3 | Unicode-aware in-place string reversal for Rust UTF-8 strings. 4 | 5 | * [Documentation](https://docs.rs/unicode-reverse) 6 | * [crates.io](https://crates.io/crates/unicode-reverse) 7 | * [Release notes](https://github.com/mbrubeck/unicode-reverse/blob/master/CHANGELOG.md) 8 | 9 | The [`reverse_grapheme_clusters_in_place`][0] function reverses a string slice in-place without 10 | allocating any memory on the heap. It correctly handles multi-byte UTF-8 sequences and 11 | grapheme clusters, including combining marks and astral characters such as Emoji. 12 | 13 | ## Example 14 | 15 | ```rust 16 | use unicode_reverse::reverse_grapheme_clusters_in_place; 17 | 18 | let mut x = "man\u{0303}ana".to_string(); 19 | println!("{}", x); // prints "mañana" 20 | 21 | reverse_grapheme_clusters_in_place(&mut x); 22 | println!("{}", x); // prints "anañam" 23 | ``` 24 | 25 | ## Background 26 | 27 | As described in [this article by Mathias Bynens][1], naively reversing a Unicode string can go 28 | wrong in several ways. For example, merely reversing the `chars` (Unicode Scalar Values) in a 29 | string can cause combining marks to become attached to the wrong characters: 30 | 31 | ```rust 32 | let x = "man\u{0303}ana"; 33 | println!("{}", x); // prints "mañana" 34 | 35 | let y: String = x.chars().rev().collect(); 36 | println!("{}", y); // prints "anãnam": Oops! The '~' is now applied to the 'a'. 37 | ``` 38 | 39 | Reversing the [grapheme clusters][2] of the string fixes this problem: 40 | 41 | ```rust 42 | extern crate unicode_segmentation; 43 | use unicode_segmentation::UnicodeSegmentation; 44 | 45 | fn main() { 46 | let x = "man\u{0303}ana"; 47 | let y: String = x.graphemes(true).rev().collect(); 48 | println!("{}", y); // prints "anañam" 49 | } 50 | ``` 51 | 52 | The `reverse_grapheme_clusters_in_place` function from this crate performs this same operation, 53 | but performs the reversal in-place rather than allocating a new string. 54 | 55 | Note: Even grapheme-level reversal may produce unexpected output if the input string contains 56 | certain non-printable control codes, such as directional formatting characters. Handling such 57 | characters is outside the scope of this crate. 58 | 59 | ## Algorithm 60 | 61 | The implementation is very simple. It makes two passes over the string's contents: 62 | 63 | 1. For each grapheme cluster, reverse the bytes within the grapheme cluster in-place. 64 | 2. Reverse the bytes of the entire string in-place. 65 | 66 | After the second pass, each grapheme cluster has been reversed twice, so its bytes are now back 67 | in their original order, but the clusters are now in the opposite order within the string. 68 | 69 | ## no_std 70 | 71 | This crate does not depend on libstd, so it can be used in [`no_std` projects][3]. 72 | 73 | [0]: https://docs.rs/unicode-reverse/*/unicode_reverse/fn.reverse_grapheme_clusters_in_place.html 74 | [1]: https://mathiasbynens.be/notes/javascript-unicode 75 | [2]: http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries 76 | [3]: https://doc.rust-lang.org/book/no-stdlib.html 77 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | 3 | //! The [`reverse_grapheme_clusters_in_place`][0] function reverses a string slice in-place without 4 | //! allocating any memory on the heap. It correctly handles multi-byte UTF-8 sequences and 5 | //! grapheme clusters, including combining marks and astral characters such as Emoji. 6 | //! 7 | //! ## Example 8 | //! 9 | //! ```rust 10 | //! use unicode_reverse::reverse_grapheme_clusters_in_place; 11 | //! 12 | //! let mut x = "man\u{0303}ana".to_string(); 13 | //! println!("{}", x); // prints "mañana" 14 | //! 15 | //! reverse_grapheme_clusters_in_place(&mut x); 16 | //! println!("{}", x); // prints "anañam" 17 | //! ``` 18 | //! 19 | //! ## Background 20 | //! 21 | //! As described in [this article by Mathias Bynens][1], naively reversing a Unicode string can go 22 | //! wrong in several ways. For example, merely reversing the `chars` (Unicode Scalar Values) in a 23 | //! string can cause combining marks to become attached to the wrong characters: 24 | //! 25 | //! ```rust 26 | //! let x = "man\u{0303}ana"; 27 | //! println!("{}", x); // prints "mañana" 28 | //! 29 | //! let y: String = x.chars().rev().collect(); 30 | //! println!("{}", y); // prints "anãnam": Oops! The '~' is now applied to the 'a'. 31 | //! ``` 32 | //! 33 | //! Reversing the [grapheme clusters][2] of the string fixes this problem: 34 | //! 35 | //! ```rust 36 | //! extern crate unicode_segmentation; 37 | //! use unicode_segmentation::UnicodeSegmentation; 38 | //! 39 | //! # fn main() { 40 | //! let x = "man\u{0303}ana"; 41 | //! let y: String = x.graphemes(true).rev().collect(); 42 | //! println!("{}", y); // prints "anañam" 43 | //! # } 44 | //! ``` 45 | //! 46 | //! The `reverse_grapheme_clusters_in_place` function from this crate performs this same operation, 47 | //! but performs the reversal in-place rather than allocating a new string. 48 | //! 49 | //! Note: Even grapheme-level reversal may produce unexpected output if the input string contains 50 | //! certain non-printable control codes, such as directional formatting characters. Handling such 51 | //! characters is outside the scope of this crate. 52 | //! 53 | //! ## Algorithm 54 | //! 55 | //! The implementation is very simple. It makes two passes over the string's contents: 56 | //! 57 | //! 1. For each grapheme cluster, reverse the bytes within the grapheme cluster in-place. 58 | //! 2. Reverse the bytes of the entire string in-place. 59 | //! 60 | //! After the second pass, each grapheme cluster has been reversed twice, so its bytes are now back 61 | //! in their original order, but the clusters are now in the opposite order within the string. 62 | //! 63 | //! ## no_std 64 | //! 65 | //! This crate does not depend on libstd, so it can be used in [`no_std` projects][3]. 66 | //! 67 | //! [0]: fn.reverse_grapheme_clusters_in_place.html 68 | //! [1]: https://mathiasbynens.be/notes/javascript-unicode 69 | //! [2]: http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries 70 | //! [3]: https://doc.rust-lang.org/book/no-stdlib.html 71 | 72 | #[cfg(test)] 73 | mod tests; 74 | 75 | use core::str; 76 | use unicode_segmentation::UnicodeSegmentation; 77 | 78 | /// Reverse a Unicode string in-place without allocating. 79 | /// 80 | /// This function reverses a string slice in-place without allocating any memory on the heap. It 81 | /// correctly handles multi-byte UTF-8 sequences and grapheme clusters, including combining marks 82 | /// and astral characters such as Emoji. 83 | /// 84 | /// See the [crate-level documentation](index.html) for more details. 85 | /// 86 | /// ## Example 87 | /// 88 | /// ```rust 89 | /// extern crate unicode_reverse; 90 | /// use unicode_reverse::reverse_grapheme_clusters_in_place; 91 | /// 92 | /// fn main() { 93 | /// let mut x = "man\u{0303}ana".to_string(); 94 | /// println!("{}", x); // prints "mañana" 95 | /// 96 | /// reverse_grapheme_clusters_in_place(&mut x); 97 | /// println!("{}", x); // prints "anañam" 98 | /// } 99 | /// ``` 100 | pub fn reverse_grapheme_clusters_in_place(s: &mut str) { 101 | unsafe { 102 | let v = s.as_bytes_mut(); 103 | 104 | // Part 1: Reverse the bytes within each grapheme cluster. 105 | // This does not preserve UTF-8 validity. 106 | { 107 | // Invariant: `tail` points to data we have not modified yet, so it is always valid UTF-8. 108 | let mut tail = &mut v[..]; 109 | while let Some(len) = str::from_utf8_unchecked(tail) 110 | .graphemes(true) 111 | .next() 112 | .map(str::len) 113 | { 114 | let (grapheme, new_tail) = tail.split_at_mut(len); 115 | grapheme.reverse(); 116 | tail = new_tail; 117 | } 118 | } 119 | 120 | // Part 2: Reverse all bytes. This restores multi-byte sequences to their original order. 121 | v.reverse(); 122 | 123 | // The string is now valid UTF-8 again. 124 | debug_assert!(str::from_utf8(v).is_ok()); 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /src/tests.rs: -------------------------------------------------------------------------------- 1 | use super::reverse_grapheme_clusters_in_place; 2 | use quickcheck::quickcheck; 3 | use unicode_segmentation::UnicodeSegmentation; 4 | 5 | extern crate std; 6 | use self::std::string::String; 7 | use self::std::string::ToString; 8 | 9 | fn test_rev(a: &str, b: &str) { 10 | let mut a = a.to_string(); 11 | reverse_grapheme_clusters_in_place(&mut a); 12 | assert_eq!(a, b); 13 | } 14 | 15 | #[test] 16 | fn test_empty() { 17 | test_rev("", ""); 18 | } 19 | 20 | #[test] 21 | fn test_ascii() { 22 | test_rev("Hello", "olleH"); 23 | } 24 | 25 | #[test] 26 | fn test_utf8() { 27 | test_rev("¡Hola!", "!aloH¡"); 28 | } 29 | 30 | #[test] 31 | fn test_emoji() { 32 | test_rev("\u{1F36D}\u{1F36E}", "\u{1F36E}\u{1F36D}"); 33 | } 34 | 35 | #[test] 36 | fn test_combining_mark() { 37 | test_rev("man\u{0303}ana", "anan\u{0303}am"); 38 | } 39 | 40 | quickcheck! { 41 | fn quickchecks(s: String) -> bool { 42 | let mut in_place = s.clone(); 43 | reverse_grapheme_clusters_in_place(&mut in_place); 44 | let normal = s.graphemes(true).rev().collect::(); 45 | in_place == normal 46 | } 47 | } 48 | --------------------------------------------------------------------------------