├── .github └── workflows │ └── rust.yml ├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── benches └── benchmark.rs ├── rustfmt.toml └── src └── lib.rs /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | 3 | name: CI 4 | 5 | jobs: 6 | check: 7 | name: Check 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v2 11 | - uses: actions-rs/toolchain@v1 12 | with: 13 | profile: minimal 14 | toolchain: stable 15 | override: true 16 | - uses: actions-rs/cargo@v1 17 | with: 18 | command: check 19 | 20 | test: 21 | name: Test 22 | runs-on: ubuntu-latest 23 | steps: 24 | - uses: actions/checkout@v2 25 | - uses: actions-rs/toolchain@v1 26 | with: 27 | profile: minimal 28 | toolchain: stable 29 | override: true 30 | - uses: actions-rs/cargo@v1 31 | with: 32 | command: test 33 | 34 | fmt: 35 | name: Format 36 | runs-on: ubuntu-latest 37 | steps: 38 | - uses: actions/checkout@v2 39 | - uses: actions-rs/toolchain@v1 40 | with: 41 | profile: minimal 42 | toolchain: nightly 43 | override: true 44 | components: rustfmt 45 | - uses: actions-rs/cargo@v1 46 | with: 47 | command: fmt 48 | args: -- --check 49 | 50 | clippy: 51 | name: Clippy 52 | runs-on: ubuntu-latest 53 | steps: 54 | - uses: actions/checkout@v2 55 | - uses: actions-rs/toolchain@v1 56 | with: 57 | profile: minimal 58 | toolchain: stable 59 | override: true 60 | components: clippy 61 | - uses: actions-rs/clippy-check@v1 62 | with: 63 | token: ${{ secrets.GITHUB_TOKEN }} 64 | args: -- -D warnings 65 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cow-utils" 3 | version = "0.1.3" 4 | authors = ["Ingvar Stepanyan "] 5 | edition = "2018" 6 | license = "MIT" 7 | description = "Copy-on-write string utilities for Rust" 8 | repository = "https://github.com/RReverser/cow-utils-rs" 9 | categories = ["no-std", "text-processing"] 10 | keywords = ["string", "str", "text", "cow"] 11 | readme = "README.md" 12 | 13 | [dev-dependencies] 14 | assert_matches = "1.3.0" 15 | criterion = "0.3.1" 16 | 17 | [[bench]] 18 | name = "benchmark" 19 | harness = false 20 | 21 | [features] 22 | default = [] 23 | nightly = [] 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Ingvar Stepanyan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Copy-on-write string utils for Rust 2 | 3 | [![Crate docs](https://img.shields.io/crates/v/cow-utils)](https://docs.rs/cow-utils/latest/cow_utils/trait.CowUtils.html) 4 | [![License](https://img.shields.io/github/license/RReverser/cow-utils-rs)](LICENSE) 5 | 6 | Some [`str`](https://doc.rust-lang.org/std/primitive.str.html) methods 7 | perform destructive transformations and so they allocate, copy into and 8 | return a new 9 | [`String`](https://doc.rust-lang.org/std/string/struct.String.html) even 10 | when no modification is necessary. 11 | 12 | This crate provides a helper trait `CowUtils` with drop-in variants of 13 | such methods, which behave in the same way, but avoid extra copies and 14 | allocations when no modification is necessary. 15 | 16 | For now it's only implemented for `&str` and returns 17 | [`std::borrow::Cow`](https://doc.rust-lang.org/std/borrow/enum.Cow.html), 18 | but in the future might be extended to other types where even more 19 | efficient handling is possible (e.g. in-place modifications on mutable 20 | strings). 21 | 22 | ## Performance 23 | 24 | The primary motivation for this crate was ability to perform zero-alloc replacements when no match is found, so showing results only for `.replace` vs `.cow_replace` for now. 25 | 26 | The actual results will vary depending on the inputs, but here is a taster based on `"a".repeat(40)` as an input and various modes (nothing matched, everything matched and replaced, everything matched from the start and deleted): 27 | 28 | | params | .replace (ns) | .cow_replace (ns) | difference (%) | 29 | |------------|---------------|-------------------|----------------| 30 | | ("a", "") | 408.59 | 290.27 | -29 | 31 | | ("b", "c") | 98.78 | 54.00 | -45 | 32 | | ("a", "b") | 985.99 | 1,000.70 | +1 | 33 | 34 | ## Usage 35 | 36 | First, you need to import `CowUtils` into the scope: 37 | 38 | ```rust 39 | use cow_utils::CowUtils; 40 | ``` 41 | 42 | Then you can start invoking following `.cow_`-prefixed methods on 43 | strings instead of the regular ones: 44 | 45 | - `.cow_replace` instead of [`str::replace`](https://doc.rust-lang.org/std/primitive.str.html#method.replace) 46 | - `.cow_replacen` instead of [`str::replacen`](https://doc.rust-lang.org/std/primitive.str.html#method.replacen) 47 | - `.cow_to_ascii_lowercase` instead of [`str::to_ascii_lowercase`](https://doc.rust-lang.org/std/primitive.str.html#method.to_ascii_lowercase) 48 | - `.cow_to_ascii_uppercase` instead of [`str::to_ascii_uppercase`](https://doc.rust-lang.org/std/primitive.str.html#method.to_ascii_uppercase) 49 | - `.cow_to_lowercase` instead of [`str::to_lowercase`](https://doc.rust-lang.org/std/primitive.str.html#method.to_lowercase) 50 | - `.cow_to_uppercase` instead of [`str::to_uppercase`](https://doc.rust-lang.org/std/primitive.str.html#method.to_uppercase) 51 | 52 | Check out [the docs](https://docs.rs/cow-utils/latest/cow_utils/trait.CowUtils.html) for detailed examples. 53 | -------------------------------------------------------------------------------- /benches/benchmark.rs: -------------------------------------------------------------------------------- 1 | use cow_utils::CowUtils; 2 | use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; 3 | 4 | fn bench_replace(c: &mut Criterion) { 5 | let input = "a".repeat(40); 6 | 7 | let mut g = c.benchmark_group(format!("Replace in {:?}", input)); 8 | for params in &[("a", ""), ("b", "c"), ("a", "b")] { 9 | g.bench_with_input( 10 | BenchmarkId::new("replace", format_args!("{params:?}")), 11 | params, 12 | |b, (from, to)| b.iter(|| input.replace(from, to)), 13 | ); 14 | g.bench_with_input( 15 | BenchmarkId::new("cow_replace", format_args!("{params:?}")), 16 | params, 17 | |b, (from, to)| b.iter(|| input.cow_replace(from, to)), 18 | ); 19 | } 20 | g.finish(); 21 | } 22 | 23 | fn bench_to_lowercase(c: &mut Criterion) { 24 | let mut g = c.benchmark_group("To Lowercase"); 25 | for (name, ref input) in [ 26 | ("Ax40", "A".repeat(40)), 27 | ("ax40", "a".repeat(40)), 28 | ("ax20 + Ax20", "a".repeat(20) + &"A".repeat(20)), 29 | ] { 30 | g.bench_with_input(BenchmarkId::new("to_lowercase", name), input, |b, input| { 31 | b.iter(|| input.to_lowercase()) 32 | }); 33 | g.bench_with_input( 34 | BenchmarkId::new("cow_to_lowercase", name), 35 | input, 36 | |b, input| b.iter(|| input.cow_to_lowercase()), 37 | ); 38 | } 39 | g.finish(); 40 | } 41 | 42 | criterion_group!(benches, bench_replace, bench_to_lowercase); 43 | criterion_main!(benches); 44 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | format_code_in_doc_comments = true 2 | wrap_comments = true 3 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // Disable no_std in doctests on stable Rust. 2 | // See https://github.com/RReverser/cow-utils-rs/pull/1#issuecomment-586973518. 3 | #![cfg_attr(any(not(doctest), feature = "nightly"), no_std)] 4 | #![cfg_attr(feature = "nightly", feature(pattern))] 5 | 6 | extern crate alloc as std; 7 | 8 | use std::{ 9 | borrow::{Cow, ToOwned}, 10 | string::String, 11 | }; 12 | 13 | /// This trait is a shim for the required functionality 14 | /// normally provided directly by [`std::str::pattern::Pattern`] 15 | /// (which is currently unstable). 16 | /// 17 | /// On stable Rust it's implemented on the same standard types as 18 | /// [`std::str::pattern::Pattern`], but on nightly you can enable 19 | /// a `"nightly"` feature and any custom types implementing 20 | /// [`std::str::pattern::Pattern`] will be supported as well. 21 | pub trait Pattern<'s> { 22 | /// This will always be [`std::str::MatchIndices<'s, 23 | /// Self>`](std::str::MatchIndices) but we can't spell it out because it 24 | /// requires `Self: `[`std::str::pattern::Pattern`] and that trait bound is 25 | /// currently unstable and can't be written in a stable Rust. 26 | type MatchIndices: Iterator; 27 | 28 | /// A wrapper for [`&str::match_indices`] with a given pattern. 29 | fn match_indices_in(self, s: &'s str) -> Self::MatchIndices; 30 | } 31 | 32 | macro_rules! impl_pattern { 33 | ($ty:ty $(where $($bound:tt)*)?) => { 34 | impl<'s $(, $($bound)*)?> Pattern<'s> for $ty { 35 | type MatchIndices = std::str::MatchIndices<'s, Self>; 36 | 37 | fn match_indices_in(self, s: &'s str) -> Self::MatchIndices { 38 | s.match_indices(self) 39 | } 40 | } 41 | }; 42 | } 43 | 44 | #[cfg(not(feature = "nightly"))] 45 | const _: () = { 46 | impl_pattern!(char); 47 | impl_pattern!(&str); 48 | impl_pattern!(&String); 49 | impl_pattern!(&[char]); 50 | impl_pattern!(&&str); 51 | impl_pattern!(F where F: FnMut(char) -> bool); 52 | }; 53 | 54 | #[cfg(feature = "nightly")] 55 | impl_pattern!(P where P: std::str::pattern::Pattern<'s>); 56 | 57 | /// Some [`str`] methods perform destructive transformations and so 58 | /// return [`String`] even when no modification is necessary. 59 | /// 60 | /// This helper trait provides drop-in variants of such methods, but 61 | /// instead avoids allocations when no modification is necessary. 62 | /// 63 | /// For now only implemented for [`&str`](str) and returns 64 | /// [`Cow`](std::borrow::Cow), but in the future might be extended 65 | /// to other types. 66 | pub trait CowUtils<'s> { 67 | type Output; 68 | 69 | /// Replaces all matches of a pattern with another string. 70 | fn cow_replace(self, pattern: impl Pattern<'s>, to: &str) -> Self::Output; 71 | /// Replaces first N matches of a pattern with another string. 72 | fn cow_replacen(self, from: impl Pattern<'s>, to: &str, count: usize) -> Self::Output; 73 | /// Returns a copy of this string where each character is mapped to its 74 | /// ASCII lower case equivalent. 75 | fn cow_to_ascii_lowercase(self) -> Self::Output; 76 | /// Returns the lowercase equivalent of this string slice. 77 | fn cow_to_lowercase(self) -> Self::Output; 78 | /// Returns a copy of this string where each character is mapped to its 79 | /// ASCII upper case equivalent. 80 | fn cow_to_ascii_uppercase(self) -> Self::Output; 81 | /// Returns the uppercase equivalent of this string slice. 82 | fn cow_to_uppercase(self) -> Self::Output; 83 | } 84 | 85 | unsafe fn cow_replace<'s>( 86 | src: &'s str, 87 | match_indices: impl Iterator, 88 | to: &str, 89 | ) -> Cow<'s, str> { 90 | let mut result = Cow::default(); 91 | let mut last_start = 0; 92 | for (index, matched) in match_indices { 93 | result += src.get_unchecked(last_start..index); 94 | if !to.is_empty() { 95 | result.to_mut().push_str(to); 96 | } 97 | last_start = index + matched.len(); 98 | } 99 | result += src.get_unchecked(last_start..); 100 | result 101 | } 102 | 103 | impl<'s> CowUtils<'s> for &'s str { 104 | type Output = Cow<'s, str>; 105 | 106 | /// This is similar to [`str::replace`](https://doc.rust-lang.org/std/primitive.str.html#method.replace), but returns 107 | /// a slice of the original string when possible: 108 | /// ``` 109 | /// # use cow_utils::CowUtils; 110 | /// # use assert_matches::assert_matches; 111 | /// # use std::borrow::Cow; 112 | /// assert_matches!("abc".cow_replace("def", "ghi"), Cow::Borrowed("abc")); 113 | /// assert_matches!("$$str$$".cow_replace("$", ""), Cow::Borrowed("str")); 114 | /// assert_matches!("aaaaa".cow_replace("a", ""), Cow::Borrowed("")); 115 | /// assert_matches!("abc".cow_replace("b", "d"), Cow::Owned(s) if s == "adc"); 116 | /// assert_matches!("$a$b$".cow_replace("$", ""), Cow::Owned(s) if s == "ab"); 117 | /// ``` 118 | fn cow_replace(self, pattern: impl Pattern<'s>, to: &str) -> Self::Output { 119 | unsafe { cow_replace(self, pattern.match_indices_in(self), to) } 120 | } 121 | 122 | /// This is similar to [`str::replacen`](https://doc.rust-lang.org/std/primitive.str.html#method.replacen), but returns 123 | /// a slice of the original string when possible: 124 | /// ``` 125 | /// # use cow_utils::CowUtils; 126 | /// # use assert_matches::assert_matches; 127 | /// # use std::borrow::Cow; 128 | /// assert_matches!("abc".cow_replacen("def", "ghi", 10), Cow::Borrowed("abc")); 129 | /// assert_matches!("$$str$$".cow_replacen("$", "", 2), Cow::Borrowed("str$$")); 130 | /// assert_matches!("$a$b$".cow_replacen("$", "", 1), Cow::Borrowed("a$b$")); 131 | /// assert_matches!("aaaaa".cow_replacen("a", "", 10), Cow::Borrowed("")); 132 | /// assert_matches!("aaaaa".cow_replacen("a", "b", 0), Cow::Borrowed("aaaaa")); 133 | /// assert_matches!("abc".cow_replacen("b", "d", 1), Cow::Owned(s) if s == "adc"); 134 | /// ``` 135 | fn cow_replacen(self, pattern: impl Pattern<'s>, to: &str, count: usize) -> Self::Output { 136 | unsafe { cow_replace(self, pattern.match_indices_in(self).take(count), to) } 137 | } 138 | 139 | /// This is similar to [`str::to_ascii_lowercase`](https://doc.rust-lang.org/std/primitive.str.html#method.to_ascii_lowercase), but returns 140 | /// original slice when possible: 141 | /// ``` 142 | /// # use cow_utils::CowUtils; 143 | /// # use assert_matches::assert_matches; 144 | /// # use std::borrow::Cow; 145 | /// assert_matches!("abcd123".cow_to_ascii_lowercase(), Cow::Borrowed("abcd123")); 146 | /// assert_matches!("ὀδυσσεύς".cow_to_ascii_lowercase(), Cow::Borrowed("ὀδυσσεύς")); 147 | /// assert_matches!("ὈΔΥΣΣΕΎΣ".cow_to_ascii_lowercase(), Cow::Borrowed("ὈΔΥΣΣΕΎΣ")); 148 | /// assert_matches!("AbCd".cow_to_ascii_lowercase(), Cow::Owned(s) if s == "abcd"); 149 | /// ``` 150 | fn cow_to_ascii_lowercase(self) -> Self::Output { 151 | match self.as_bytes().iter().position(u8::is_ascii_uppercase) { 152 | Some(pos) => { 153 | let mut output = self.to_owned(); 154 | // We already know position of the first uppercase char, 155 | // so no need to rescan the part before it. 156 | unsafe { output.get_unchecked_mut(pos..) }.make_ascii_lowercase(); 157 | Cow::Owned(output) 158 | } 159 | None => Cow::Borrowed(self), 160 | } 161 | } 162 | 163 | /// This is similar to [`str::to_lowercase`](https://doc.rust-lang.org/std/primitive.str.html#method.to_lowercase), but returns 164 | /// original slice when possible: 165 | /// ``` 166 | /// # use cow_utils::CowUtils; 167 | /// # use assert_matches::assert_matches; 168 | /// # use std::borrow::Cow; 169 | /// assert_matches!("abcd123".cow_to_lowercase(), Cow::Borrowed("abcd123")); 170 | /// assert_matches!("ὀδυσσεύς".cow_to_lowercase(), Cow::Borrowed("ὀδυσσεύς")); 171 | /// assert_matches!("ὈΔΥΣΣΕΎΣ".cow_to_lowercase(), Cow::Owned(s) if s == "ὀδυσσεύς"); 172 | /// assert_matches!("AbCd".cow_to_lowercase(), Cow::Owned(s) if s == "abcd"); 173 | /// assert_matches!("ᾈ".cow_to_lowercase(), Cow::Owned(s) if s == "ᾀ"); 174 | /// ``` 175 | fn cow_to_lowercase(self) -> Self::Output { 176 | // `str::to_lowercase` has a tricky edgecase with handling of Σ. 177 | // We could optimise this by duplicating some code from stdlib, 178 | // but it wouldn't be particularly clean, so for now just check 179 | // if the string contains any uppercase char and let 180 | // `str::to_lowercase` rescan it again. 181 | if self.chars().any(changes_when_lowercased) { 182 | Cow::Owned(self.to_lowercase()) 183 | } else { 184 | Cow::Borrowed(self) 185 | } 186 | } 187 | 188 | /// This is similar to [`str::to_ascii_uppercase`](https://doc.rust-lang.org/std/primitive.str.html#method.to_ascii_uppercase), but returns 189 | /// original slice when possible: 190 | /// ``` 191 | /// # use cow_utils::CowUtils; 192 | /// # use assert_matches::assert_matches; 193 | /// # use std::borrow::Cow; 194 | /// assert_matches!("ABCD123".cow_to_ascii_uppercase(), Cow::Borrowed("ABCD123")); 195 | /// assert_matches!("ὈΔΥΣΣΕΎΣ".cow_to_ascii_uppercase(), Cow::Borrowed("ὈΔΥΣΣΕΎΣ")); 196 | /// assert_matches!("ὀδυσσεύς".cow_to_ascii_uppercase(), Cow::Borrowed("ὀδυσσεύς")); 197 | /// assert_matches!("AbCd".cow_to_ascii_uppercase(), Cow::Owned(s) if s == "ABCD"); 198 | /// ``` 199 | fn cow_to_ascii_uppercase(self) -> Self::Output { 200 | match self.as_bytes().iter().position(u8::is_ascii_lowercase) { 201 | Some(pos) => { 202 | let mut output = self.to_owned(); 203 | // We already know position of the first lowercase char, 204 | // so no need to rescan the part before it. 205 | unsafe { output.get_unchecked_mut(pos..) }.make_ascii_uppercase(); 206 | Cow::Owned(output) 207 | } 208 | None => Cow::Borrowed(self), 209 | } 210 | } 211 | 212 | /// This is similar to [`str::to_uppercase`](https://doc.rust-lang.org/std/primitive.str.html#method.to_uppercase), but returns 213 | /// original slice when possible: 214 | /// ``` 215 | /// # use cow_utils::CowUtils; 216 | /// # use assert_matches::assert_matches; 217 | /// # use std::borrow::Cow; 218 | /// assert_matches!("ABCD123".cow_to_uppercase(), Cow::Borrowed("ABCD123")); 219 | /// assert_matches!("ὈΔΥΣΣΕΎΣ".cow_to_uppercase(), Cow::Borrowed("ὈΔΥΣΣΕΎΣ")); 220 | /// assert_matches!("ὀδυσσεύς".cow_to_uppercase(), Cow::Owned(s) if s == "ὈΔΥΣΣΕΎΣ"); 221 | /// assert_matches!("AbCd".cow_to_uppercase(), Cow::Owned(s) if s == "ABCD"); 222 | /// assert_matches!("ᾈ".cow_to_uppercase(), Cow::Owned(s) if s == "ἈΙ"); 223 | /// ``` 224 | fn cow_to_uppercase(self) -> Self::Output { 225 | match self.find(changes_when_uppercased) { 226 | Some(pos) => { 227 | let mut output = String::with_capacity(self.len()); 228 | // We already know position of the first lowercase char, 229 | // so no need to rescan the part before it - just copy it. 230 | output.push_str(unsafe { self.get_unchecked(..pos) }); 231 | output.extend( 232 | unsafe { self.get_unchecked(pos..) } 233 | .chars() 234 | .flat_map(char::to_uppercase), 235 | ); 236 | Cow::Owned(output) 237 | } 238 | None => Cow::Borrowed(self), 239 | } 240 | } 241 | } 242 | 243 | fn changes_when_lowercased(c: char) -> bool { 244 | !core::iter::once(c).eq(c.to_lowercase()) 245 | } 246 | 247 | fn changes_when_uppercased(c: char) -> bool { 248 | !core::iter::once(c).eq(c.to_uppercase()) 249 | } 250 | --------------------------------------------------------------------------------