├── .github
    ├── FUNDING.yml
    ├── ISSUE_TEMPLATE
    │   ├── feature-request.md
    │   └── filtering-error.md
    └── workflows
    │   └── build.yml
├── .gitignore
├── Cargo.toml
├── LICENSE-MIT
├── Makefile
├── README.md
├── examples
    ├── advanced.rs
    ├── analyze.rs
    └── censor.rs
├── fuzz
    ├── .gitignore
    ├── Cargo.toml
    └── fuzz_targets
    │   └── fuzz.rs
├── pages
    ├── .gitignore
    ├── Cargo.toml
    ├── Trunk.prod.toml
    ├── index.html
    └── src
    │   └── main.rs
└── src
    ├── banned.rs
    ├── banned_chars.txt
    ├── buffer_proxy_iterator.rs
    ├── censor.rs
    ├── character_analyzer.rs
    ├── character_widths.bin
    ├── context.rs
    ├── dictionary_blacklist.txt
    ├── dictionary_common_valid_short.txt
    ├── dictionary_extra.txt
    ├── false_positive_finder.rs
    ├── false_positives.txt
    ├── feature_cell.rs
    ├── lib.rs
    ├── mtch.rs
    ├── pii.rs
    ├── profanity.csv
    ├── replacement_finder.rs
    ├── replacements.csv
    ├── replacements.rs
    ├── replacements_extra.csv
    ├── safe.txt
    ├── test_broken.txt
    ├── test_negative.txt
    ├── test_positive.txt
    ├── test_safe.txt
    ├── trace.rs
    ├── trie.rs
    ├── typ.rs
    ├── unicode_fonts.txt
    └── width.rs


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [finnbear]


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Request a new feature
 4 | title: Feature request
 5 | labels: feature
 6 | assignees: finnbear
 7 | 
 8 | ---
 9 | 
10 | ### Motivation
11 | 
12 | ### Summary
13 | 
14 | ### Alternatives
15 | 
16 | <!--- Optional -->
17 | 
18 | ### Context
19 | 
20 | I am using `rustrict` version `X.Y.Z` (if not latest version)
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/filtering-error.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Filtering Error
 3 | about: Easily report false positive detections or false negative detections
 4 | title: Filtering error (false positive and/or false negative)
 5 | labels: bug
 6 | assignees: finnbear
 7 | 
 8 | ---
 9 | 
10 | ### False Positives
11 | The following shouldn't have been detected, but was:
12 | ```
13 | 
14 | ```
15 | <!--- One false positive per line. -->
16 | 
17 | 
18 | ### False Negatives
19 | The following should have been detected, but wasn't:
20 | ```
21 | 
22 | ```
23 | <!--- One false negative per line. -->
24 | 
25 | ### Context
26 | 
27 | I am using `rustrict` version `X.Y.Z` (if not latest version)
28 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 |   workflow_dispatch:
 9 | 
10 | permissions:
11 |   contents: read
12 |   pages: write
13 |   id-token: write
14 | 
15 | concurrency:
16 |   group: "pages"
17 |   cancel-in-progress: false
18 | 
19 | env:
20 |   CARGO_TERM_COLOR: always
21 | 
22 | jobs:
23 |   build:
24 |     environment:
25 |       name: github-pages
26 |       url: ${{ steps.deployment.outputs.page_url }}
27 |     runs-on: ubuntu-latest
28 |     steps:
29 |       - name: Checkout
30 |         uses: actions/checkout@v3
31 |         with:
32 |           persist-credentials: false # otherwise, the token used is the GITHUB_TOKEN, instead of your personal access token.
33 |           fetch-depth: 0 # otherwise, there would be errors pushing refs to the destination repository.
34 |       - name: Install Rust
35 |         uses: actions-rs/toolchain@v1
36 |         with:
37 |           toolchain: nightly
38 |           override: true
39 |           components: rustfmt, clippy
40 |       - name: Download Testing Data
41 |         run: curl https://raw.githubusercontent.com/vzhou842/profanity-check/master/profanity_check/data/clean_data.csv --output test.csv
42 |       - name: Test (context, pii, serde)
43 |         run: cargo test --release --features context,pii,serde
44 |       - name: Test (context, width)
45 |         run: cargo test --release --features context,width
46 |       - name: Add wasm32 target
47 |         run: rustup target add wasm32-unknown-unknown
48 |       - name: Install Trunk
49 |         uses: baptiste0928/cargo-install@v2
50 |         with:
51 |           crate: trunk
52 |           version: 0.21.1
53 |       - name: Build Pages
54 |         run: cd pages && trunk --config Trunk.prod.toml build --release --filehash=false
55 |       - name: Setup Pages
56 |         uses: actions/configure-pages@v3
57 |       - name: Upload artifact
58 |         uses: actions/upload-pages-artifact@v3
59 |         with:
60 |           path: './pages/dist/'
61 |       - name: Deploy to GitHub Pages
62 |         id: deployment
63 |         uses: actions/deploy-pages@v4
64 |   fuzz:
65 |     runs-on: ubuntu-latest
66 |     steps:
67 |       - uses: actions/checkout@v2
68 |       - uses: actions-rs/toolchain@v1
69 |         with:
70 |           toolchain: nightly
71 |           override: true
72 |       - name: Install cargo-fuzz
73 |         uses: baptiste0928/cargo-install@v3
74 |         with:
75 |           crate: cargo-fuzz
76 |           locked: false
77 |       - name: Fuzz
78 |         run: RUST_BACKTRACE=1 cargo fuzz run fuzz -- -max_total_time=900


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Generated by Cargo
 2 | # will have compiled files and executables
 3 | /target/
 4 | 
 5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
 6 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
 7 | Cargo.lock
 8 | 
 9 | # These are backup files generated by rustfmt
10 | **/*.rs.bk
11 | 
12 | # Downloads (run `make downloads` to get). Only required for testing and false positive finding.
13 | test.csv
14 | src/dictionary.txt
15 | src/dictionary_common.txt
16 | src/unicode_confusables.txt
17 | 
18 | # Downloads not covered under `make downloads`
19 | ttf/
20 | 
21 | .idea
22 | *.iml
23 | .vscode/


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rustrict"
 3 | authors = ["Finn Bear"]
 4 | version = "0.7.35"
 5 | edition = "2021"
 6 | license = "MIT OR Apache-2.0"
 7 | repository = "https://github.com/finnbear/rustrict/"
 8 | description = "rustrict is a profanity filter for Rust"
 9 | exclude = ["fuzz/"]
10 | 
11 | [lib]
12 | name = "rustrict"
13 | path = "src/lib.rs"
14 | 
15 | [[bin]]
16 | name = "false_positive_finder"
17 | path = "src/false_positive_finder.rs"
18 | required-features = ["find_false_positives"]
19 | 
20 | [[bin]]
21 | name = "replacement_finder"
22 | path = "src/replacement_finder.rs"
23 | required-features = ["find_replacements"]
24 | 
25 | [[bin]]
26 | name = "character_analyzer"
27 | path = "src/character_analyzer.rs"
28 | required-features = ["imageproc", "image", "rusttype", "unicode-width", "walkdir", "rayon"]
29 | 
30 | [[bin]]
31 | name = "trace"
32 | path = "src/trace.rs"
33 | required-features = ["trace"]
34 | 
35 | [features]
36 | default = ["censor", "context"]
37 | censor = ["arrayvec", "bitflags", "lazy_static", "itertools", "unicode-normalization", "rustc-hash"]
38 | context = ["censor", "strsim"]
39 | customize = ["censor"]
40 | width = ["lazy_static", "itertools"]
41 | pii = ["lazy_static", "regex"]
42 | find_false_positives = ["censor", "regex", "indicatif", "rayon"]
43 | find_replacements = ["csv"]
44 | trace = ["censor"]
45 | trace_full = ["trace"]
46 | serde = ["dep:serde", "arrayvec/serde"]
47 | 
48 | [package.metadata.docs.rs]
49 | features = ["censor", "context", "customize", "width"]
50 | 
51 | [profile.release]
52 | panic = 'abort'
53 | 
54 | [dependencies]
55 | arrayvec = {version = "0.7", optional = true}
56 | finl_unicode = "1.2"
57 | unicode-normalization = {version = "0.1.22", optional = true}
58 | unicode-width = {version = "0.1", optional = true}
59 | bitflags = {version = "1.3", optional = true}
60 | lazy_static = {version = "1.4", optional = true}
61 | itertools = {version = "0.10", optional = true}
62 | rustc-hash = {version = "1.1", optional = true}
63 | regex = {version = "1.5", optional = true}
64 | indicatif = {version = "0.17.0-beta.1", optional = true}
65 | rayon = {version = "1.5", optional = true}
66 | doc-comment = "0.3.3"
67 | strsim = {version = "0.10.0", optional = true}
68 | csv = {version="1.1", optional = true}
69 | imageproc = {version = "0.22", optional = true}
70 | rusttype = {version = "0.9", optional = true}
71 | image = {version = "0.23.14", optional = true}
72 | walkdir = {version = "2", optional = true}
73 | serde = {version = "1", features=["derive"], optional = true}
74 | 
75 | [dev-dependencies]
76 | rand = "0.8"
77 | csv = "1.1"
78 | censor_crate = { package = "censor", version = "0.3.0" }
79 | rustrict_old = { package = "rustrict", version = "0.7.24" }
80 | serial_test = "0.5"
81 | stfu_crate = { package = "stfu", version = "0.1.0" }
82 | profane_rs_crate = { package = "profane-rs", version = "0.0.4" }
83 | bincode = "1.3.3"
84 | serde_json = "1"
85 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Finn Bear
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: fuzz
 2 | 
 3 | all: test
 4 | 
 5 | downloads:
 6 | 	wget -O test.csv https://raw.githubusercontent.com/vzhou842/profanity-check/master/profanity_check/data/clean_data.csv
 7 | 	wget -O src/dictionary.txt https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt
 8 | 	wget -O src/dictionary_common.txt https://raw.githubusercontent.com/first20hours/google-10000-english/master/google-10000-english.txt
 9 | 	wget -O src/unicode_confusables.txt https://www.unicode.org/Public/security/14.0.0/confusables.txt
10 | 	# TODO: ttf fonts
11 | 
12 | false_positives:
13 | 	cargo run --bin false_positive_finder --release --features censor,regex,indicatif,rayon,find_false_positives
14 | 
15 | replacements:
16 | 	cargo run --bin replacement_finder --features find_replacements
17 | 
18 | widths:
19 | 	cargo run --bin character_analyzer --release --features imageproc,image,rusttype,walkdir,rayon,unicode-width
20 | 
21 | test:
22 | 	cargo test --release --features width,pii,serde -- --nocapture
23 | 
24 | compare:
25 | 	COMPARE=1 make test
26 | 
27 | table:
28 | 	cargo test --release -- accuracy --nocapture
29 | 
30 | # Skips accuracy analysis so finishes faster.
31 | test_debug:
32 | 	cargo test --features pii -- --nocapture
33 | 
34 | fuzz:
35 | 	cargo fuzz run fuzz
36 | 
37 | test_customize:
38 | 	cargo test --release --features customize --no-default-features


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # rustrict
  2 | 
  3 | [![Documentation](https://docs.rs/rustrict/badge.svg)](https://docs.rs/rustrict)
  4 | [![crates.io](https://img.shields.io/crates/v/rustrict.svg)](https://crates.io/crates/rustrict)
  5 | [![Build](https://github.com/finnbear/rustrict/actions/workflows/build.yml/badge.svg)](https://github.com/finnbear/rustrict/actions/workflows/build.yml) 
  6 | [![Test Page](https://img.shields.io/badge/Test-page-green)](https://finnbear.github.io/rustrict/)
  7 | 
  8 | 
  9 | `rustrict` is a profanity filter for Rust.
 10 | 
 11 | <sup>Disclaimer: Multiple source files (`.txt`, `.csv`, `.rs` test cases) contain profanity. Viewer discretion is advised.</sup>
 12 | 
 13 | ## Features
 14 | 
 15 | - Multiple types (profane, offensive, sexual, mean, spam)
 16 | - Multiple levels (mild, moderate, severe)
 17 | - Resistant to evasion
 18 |   - Alternative spellings (like "fck")
 19 |   - Repeated characters (like "craaaap")
 20 |   - Confusable characters (like 'ᑭ', '𝕡', and '🅿')
 21 |   - Spacing (like "c r_a-p")
 22 |   - Accents (like "pÓöp")
 23 |   - Bidirectional Unicode ([related reading](https://blog.rust-lang.org/2021/11/01/cve-2021-42574.html))
 24 |   - Self-censoring (like "f*ck")
 25 |   - Safe phrase list for known bad actors]
 26 |   - Censors invalid Unicode characters
 27 |   - Battle-tested in [Mk48.io](https://mk48.io)
 28 | - Resistant to false positives
 29 |   - One word (like "**ass**assin")
 30 |   - Two words (like "pu**sh it**")
 31 | - Flexible
 32 |   - Censor and/or analyze
 33 |   - Input `&str` or `Iterator<Item = char>`
 34 |   - Can track per-user state with `context` feature
 35 |   - Can add words with the `customize` feature
 36 |   - Accurately reports the width of Unicode via the `width` feature
 37 |   - Plenty of options
 38 | - Performant
 39 |   - O(n) analysis and censoring
 40 |   - No `regex` (uses custom trie)
 41 |   - 3 MB/s in `release` mode
 42 |   - 100 KB/s in `debug` mode
 43 | 
 44 | ## Limitations
 45 | 
 46 | - Mostly English/emoji
 47 | - Censoring removes most diacritics (accents)
 48 | - Does not detect right-to-left profanity while analyzing, so...
 49 | - Censoring forces Unicode to be left-to-right
 50 | - Doesn't understand context
 51 | - Not resistant to false positives affecting profanities added at runtime
 52 | 
 53 | ## Usage
 54 | 
 55 | ### Strings (`&str`)
 56 | ```rust
 57 | use rustrict::CensorStr;
 58 | 
 59 | let censored: String = "hello crap".censor();
 60 | let inappropriate: bool = "f u c k".is_inappropriate();
 61 | 
 62 | assert_eq!(censored, "hello c***");
 63 | assert!(inappropriate);
 64 | ```
 65 | 
 66 | ### Iterators (`Iterator<Type = char>`)
 67 | 
 68 | ```rust
 69 | use rustrict::CensorIter;
 70 | 
 71 | let censored: String = "hello crap".chars().censor().collect();
 72 | 
 73 | assert_eq!(censored, "hello c***");
 74 | ```
 75 | 
 76 | ### Advanced
 77 | 
 78 | By constructing a `Censor`, one can avoid scanning text multiple times to get a censored `String` and/or
 79 | answer multiple `is` queries. This also opens up more customization options (defaults are below).
 80 | 
 81 | ```rust
 82 | use rustrict::{Censor, Type};
 83 | 
 84 | let (censored, analysis) = Censor::from_str("123 Crap")
 85 |     .with_censor_threshold(Type::INAPPROPRIATE)
 86 |     .with_censor_first_character_threshold(Type::OFFENSIVE & Type::SEVERE)
 87 |     .with_ignore_false_positives(false)
 88 |     .with_ignore_self_censoring(false)
 89 |     .with_censor_replacement('*')
 90 |     .censor_and_analyze();
 91 | 
 92 | assert_eq!(censored, "123 C***");
 93 | assert!(analysis.is(Type::INAPPROPRIATE));
 94 | assert!(analysis.isnt(Type::PROFANE & Type::SEVERE | Type::SEXUAL));
 95 | ```
 96 | 
 97 | If you cannot afford to let anything slip though, or have reason to believe a particular user
 98 | is trying to evade the filter, you can check if their input matches a [short list of safe strings](src/safe.txt):
 99 | 
100 | ```rust
101 | use rustrict::{CensorStr, Type};
102 | 
103 | // Figure out if a user is trying to evade the filter.
104 | assert!("pron".is(Type::EVASIVE));
105 | assert!("porn".isnt(Type::EVASIVE));
106 | 
107 | // Only let safe messages through.
108 | assert!("Hello there!".is(Type::SAFE));
109 | assert!("nice work.".is(Type::SAFE));
110 | assert!("yes".is(Type::SAFE));
111 | assert!("NVM".is(Type::SAFE));
112 | assert!("gtg".is(Type::SAFE));
113 | assert!("not a common phrase".isnt(Type::SAFE));
114 | ```
115 | 
116 | If you want to add custom profanities or safe words, enable the `customize` feature.
117 | 
118 | ```rust
119 | #[cfg(feature = "customize")]
120 | {
121 |     use rustrict::{add_word, CensorStr, Type};
122 | 
123 |     // You must take care not to call these when the crate is being
124 |     // used in any other way (to avoid concurrent mutation).
125 |     unsafe {
126 |         add_word("reallyreallybadword", (Type::PROFANE & Type::SEVERE) | Type::MEAN);
127 |         add_word("mybrandname", Type::SAFE);
128 |     }
129 |     
130 |     assert!("Reallllllyreallllllybaaaadword".is(Type::PROFANE));
131 |     assert!("MyBrandName".is(Type::SAFE));
132 | }
133 | ```
134 | 
135 | If your use-case is chat moderation, and you store data on a per-user basis, you can use `rustrict::Context` as a reference implementation:
136 | 
137 | ```rust
138 | #[cfg(feature = "context")]
139 | {
140 |     use rustrict::{BlockReason, Context};
141 |     use std::time::Duration;
142 |     
143 |     pub struct User {
144 |         context: Context,
145 |     }
146 |     
147 |     let mut bob = User {
148 |         context: Context::default()
149 |     };
150 |     
151 |     // Ok messages go right through.
152 |     assert_eq!(bob.context.process(String::from("hello")), Ok(String::from("hello")));
153 |     
154 |     // Bad words are censored.
155 |     assert_eq!(bob.context.process(String::from("crap")), Ok(String::from("c***")));
156 | 
157 |     // Can take user reports (After many reports or inappropriate messages,
158 |     // will only let known safe messages through.)
159 |     for _ in 0..5 {
160 |         bob.context.report();
161 |     }
162 |    
163 |     // If many bad words are used or reports are made, the first letter of
164 |     // future bad words starts getting censored too.
165 |     assert_eq!(bob.context.process(String::from("crap")), Ok(String::from("****")));
166 |     
167 |     // Can manually mute.
168 |     bob.context.mute_for(Duration::from_secs(2));
169 |     assert!(matches!(bob.context.process(String::from("anything")), Err(BlockReason::Muted(_))));
170 | }
171 | ```
172 | 
173 | ## Comparison
174 | 
175 | To compare filters, the first 100,000 items of [this list](https://raw.githubusercontent.com/vzhou842/profanity-check/master/profanity_check/data/clean_data.csv)
176 | is used as a dataset. Positive accuracy is the percentage of profanity detected as profanity. Negative accuracy is the percentage of clean text detected as clean.
177 | 
178 | | Crate | Accuracy | Positive Accuracy | Negative Accuracy | Time |
179 | |-------|----------|-------------------|-------------------|------|
180 | | [rustrict](https://crates.io/crates/rustrict) | 80.00%   | 94.01%            | 76.50%            | 9s   |
181 | | [censor](https://crates.io/crates/censor) | 76.16%   | 72.76%            | 77.01%            | 23s  |
182 | | [stfu](https://crates.io/crates/stfu) | 91.74% | 77.69% | 95.25% | 45s |
183 | | [profane-rs](https://crates.io/crates/profane-rs) | 80.47% | 73.79% | 82.14% | 52s |
184 | 
185 | ## Development
186 | 
187 | [![Build](https://github.com/finnbear/rustrict/actions/workflows/build.yml/badge.svg?branch=master)](https://github.com/finnbear/rustrict/actions/workflows/build.yml)
188 | 
189 | If you make an adjustment that would affect false positives, such as adding profanity,
190 | you will need to run `false_positive_finder`:
191 | 1. Run `make downloads` to download the required word lists and dictionaries
192 | 2. Run `make false_positives` to automatically find false positives
193 | 
194 | If you modify `replacements_extra.csv`, run `make replacements` to rebuild `replacements.csv`.
195 | 
196 | Finally, run `make test` for a full test or `make test_debug` for a fast test.
197 | 
198 | ## License
199 | 
200 | Licensed under either of
201 | 
202 |  * Apache License, Version 2.0
203 |    ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
204 |  * MIT license
205 |    ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
206 | 
207 | at your option.
208 | 
209 | ## Contribution
210 | 
211 | Unless you explicitly state otherwise, any contribution intentionally submitted
212 | for inclusion in the work by you, as defined in the Apache-2.0 license, shall be
213 | dual licensed as above, without any additional terms or conditions.
214 | 


--------------------------------------------------------------------------------
/examples/advanced.rs:
--------------------------------------------------------------------------------
 1 | use rustrict::{Censor, Type};
 2 | 
 3 | fn main() {
 4 |     let (censored, analysis) = Censor::from_str("123 Crap")
 5 |         .with_censor_first_character_threshold(Type::OFFENSIVE & Type::SEVERE)
 6 |         .with_ignore_false_positives(false)
 7 |         .with_censor_replacement('?')
 8 |         .censor_and_analyze();
 9 | 
10 |     assert_eq!(censored, "123 C???");
11 |     assert!(analysis.is(Type::INAPPROPRIATE));
12 |     assert!(analysis.isnt(Type::PROFANE & Type::SEVERE | Type::SEXUAL));
13 | }
14 | 


--------------------------------------------------------------------------------
/examples/analyze.rs:
--------------------------------------------------------------------------------
 1 | use rustrict::{CensorStr, Type};
 2 | 
 3 | fn main() {
 4 |     show_analysis("Helló world!");
 5 |     show_analysis("Hello shit world ass");
 6 |     show_analysis("assassin push it");
 7 |     show_analysis("$#1t f-u_c_k βιτ⊂η d u m b a s s");
 8 | }
 9 | 
10 | fn show_analysis(text: &str) {
11 |     println!("\"{}\" is mean? {}", text, text.is(Type::MEAN));
12 | }
13 | 


--------------------------------------------------------------------------------
/examples/censor.rs:
--------------------------------------------------------------------------------
 1 | use rustrict::CensorStr;
 2 | 
 3 | fn main() {
 4 |     // Okay words are unaffected (with the exception of having their accents removed).
 5 |     show_censor("Helló world!");
 6 | 
 7 |     // Bad words are censored.
 8 |     show_censor("Hello shit world ass");
 9 | 
10 |     // False positives are avoided.
11 |     show_censor("assassin push it");
12 | 
13 |     // Obfuscation is mostly ignored.
14 |     show_censor("$#1t f-u_c_k βιτ⊂η d u m b a s s");
15 | }
16 | 
17 | fn show_censor(text: &str) {
18 |     println!("{} -> {}", text, text.censor());
19 | }
20 | 


--------------------------------------------------------------------------------
/fuzz/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | corpus
3 | artifacts
4 | 


--------------------------------------------------------------------------------
/fuzz/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rustrict-fuzz"
 3 | version = "0.0.0"
 4 | authors = ["Automatically generated"]
 5 | publish = false
 6 | edition = "2018"
 7 | 
 8 | [package.metadata]
 9 | cargo-fuzz = true
10 | 
11 | [dependencies]
12 | libfuzzer-sys = "0.4"
13 | 
14 | [dependencies.rustrict]
15 | path = ".."
16 | features = ["pii", "width"]
17 | 
18 | # Prevent this from interfering with workspaces
19 | [workspace]
20 | members = ["."]
21 | 
22 | [[bin]]
23 | name = "fuzz"
24 | path = "fuzz_targets/fuzz.rs"
25 | test = false
26 | doc = false
27 | 


--------------------------------------------------------------------------------
/fuzz/fuzz_targets/fuzz.rs:
--------------------------------------------------------------------------------
 1 | #![no_main]
 2 | use libfuzzer_sys::fuzz_target;
 3 | use rustrict::{Censor, Context, Type};
 4 | 
 5 | fuzz_target!(|data: &[u8]| {
 6 |     if !data.is_empty() {
 7 |         let flags = data[0];
 8 |         let input = &data[1..];
 9 | 
10 |         if let Ok(text) = std::str::from_utf8(input) {
11 |             let _ = rustrict::width_str(text);
12 |             let _ = rustrict::width_str_max_unbroken(text, rustrict::WordBreak::BreakAll);
13 |             let _ = rustrict::trim_to_width(text, 10);
14 |             let _ = rustrict::censor_and_analyze_pii(text);
15 | 
16 |             let (_censored, _analysis) = Censor::from_str(text)
17 |                 .with_ignore_self_censoring(flag(flags, 0))
18 |                 .with_ignore_false_positives(flag(flags, 1))
19 |                 .with_censor_threshold(if flag(flags, 2) {
20 |                     Type::INAPPROPRIATE
21 |                 } else {
22 |                     Type::SPAM
23 |                 })
24 |                 .with_censor_first_character_threshold(if flag(flags, 3) {
25 |                     Type::INAPPROPRIATE
26 |                 } else {
27 |                     Type::SPAM
28 |                 })
29 |                 .with_censor_replacement(if flag(flags, 4) { '#' } else { '*' })
30 |                 .censor_and_analyze();
31 | 
32 |             let mut ctx = Context::new();
33 | 
34 |             for _ in 0..3 {
35 |                 let _ = ctx.process(String::from(text));
36 |                 let _ = ctx.process(String::from("hi"));
37 |                 let _ = ctx.process(String::from(text));
38 |             }
39 |         }
40 |     }
41 | });
42 | 
43 | fn flag(flags: u8, index: u8) -> bool {
44 |     ((flags >> index) & 1) == 1
45 | }
46 | 


--------------------------------------------------------------------------------
/pages/.gitignore:
--------------------------------------------------------------------------------
1 | dist/
2 | target/


--------------------------------------------------------------------------------
/pages/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "pages"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | [dependencies]
 7 | rustrict = { path = "..", features = ["trace_full", "width", "pii"] }
 8 | yew = { version = "0.21", features = ["csr"] }
 9 | 
10 | [dependencies.web-sys]
11 | version = "0.3"
12 | features = [
13 |     'HtmlInputElement',
14 |     'HtmlTextAreaElement',
15 | ]
16 | 
17 | [profile.release]
18 | codegen-units = 1
19 | lto = true
20 | opt-level = "z"
21 | panic = "abort"
22 | strip = "debuginfo"


--------------------------------------------------------------------------------
/pages/Trunk.prod.toml:
--------------------------------------------------------------------------------
1 | [build]
2 | target = "index.html"
3 | release = true
4 | public_url = "/rustrict/"


--------------------------------------------------------------------------------
/pages/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <title>Rustrict</title>
 6 |     <meta name="viewport" content="width=device-width, initial-scale=2" />
 7 |     <link data-trunk rel="rust" data-wasm-opt="0" data-no-demangle/>
 8 |     <link
 9 |         rel="stylesheet"
10 |         href="https://cdn.jsdelivr.net/npm/bootstrap@3.3.7/dist/css/bootstrap.min.css"
11 |         integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u"
12 |         crossorigin="anonymous"
13 |     />
14 | </head>
15 | <body style="background-color: #34495e; color: white; padding: 2rem;">
16 | 
17 | </body>
18 | </html>


--------------------------------------------------------------------------------
/pages/src/main.rs:
--------------------------------------------------------------------------------
 1 | use web_sys::{HtmlInputElement, window, InputEvent, HtmlTextAreaElement, wasm_bindgen::JsCast};
 2 | use yew::{html, Html, Callback, function_component, TargetCast};
 3 | use rustrict::{censor_and_analyze_pii, Censor, WordBreak};
 4 | 
 5 | #[function_component(App)]
 6 | fn app() -> Html {
 7 |     let oninput = Callback::from(move |event: InputEvent| {
 8 |         if let Some(input) = event.target_dyn_into::<HtmlInputElement>() {
 9 |             let uncensored = input.value();
10 |             let (uncensored, pii) = censor_and_analyze_pii(&uncensored);
11 |             let analysis_element = window().unwrap().document().unwrap().get_element_by_id("analysis").unwrap();
12 |             let censored_element = window().unwrap().document().unwrap().get_element_by_id("censored").unwrap().dyn_into::<HtmlTextAreaElement>().unwrap();
13 |             if uncensored.is_empty() {
14 |                 analysis_element.set_inner_html("N/A");
15 |                 censored_element.set_value("");
16 |             } else {
17 |                 let mut censor = Censor::from_str(&uncensored);
18 |                 let (censored, analysis) = censor.censor_and_analyze();
19 |                 let count = censor.total_matches();
20 |                 let detections = censor.detections();
21 |                 let width = rustrict::width_str(&uncensored);
22 |                 let max_unbroken = rustrict::width_str_max_unbroken(&uncensored, WordBreak::BreakAll);
23 |                 let result = format!("{analysis:?} (width={width}, max-unbroken={max_unbroken}, count={count}, detections={detections:?}, pii={pii:?})");
24 |                 analysis_element.set_inner_html(&result);
25 |                 censored_element.set_value(&censored);
26 |             }
27 |         }
28 |     });
29 |     html! {<>
30 |         <h2>{"Rustrict"}</h2>
31 |         <h3>{"Input"}</h3>
32 |         <input
33 |             class="form-control"
34 |             {oninput}
35 |             type="text"
36 |             style="background-color: #2c3e50; color: white; border-width: 0;"
37 |         />
38 |         <h3>{"Analysis"}</h3>
39 |         <p id="analysis">{"N/A"}</p>
40 |         <h3>{"Output"}</h3>
41 |         <textarea
42 |             id="censored"
43 |             class="form-control"
44 |             rows="10"
45 |             readonly={true}
46 |             tabindex="-1"
47 |             style="background-color: #2c3e50; resize: vertical; color: white; border-width: 0; user-select: none;"
48 |         ></textarea>
49 |     </>}
50 | }
51 | 
52 | /*
53 | <script>
54 |         async function censor() {
55 |         const req = { text: document.getElementById("input").value };
56 | 
57 |         let response = await fetch("/", {
58 |         method: "POST",
59 |         body: JSON.stringify(req),
60 |         headers: {
61 |         "Content-Type": "application/json",
62 |         },
63 |         });
64 |         let resp = await response.json();
65 |         if (resp.original != req.text) {
66 |         return;
67 |         }
68 |         document.getElementById(
69 |         "analysis"
70 |         ).innerHTML = `${resp.analysis} (width = ${resp.width})`;
71 |         document.getElementById("output").value = resp.censored;
72 |         }
73 |         censor();
74 |         </script>
75 | */
76 | 
77 | fn main() {
78 |     yew::Renderer::<App>::new().render();
79 | }


--------------------------------------------------------------------------------
/src/banned.rs:
--------------------------------------------------------------------------------
 1 | use crate::feature_cell::FeatureCell;
 2 | use crate::Set;
 3 | use lazy_static::lazy_static;
 4 | use std::ops::Deref;
 5 | 
 6 | lazy_static! {
 7 |     pub(crate) static ref BANNED: FeatureCell<Banned> = FeatureCell::new(Banned(
 8 |         include_str!("banned_chars.txt")
 9 |             .lines()
10 |             .filter(|s| s.starts_with("U+"))
11 |             .map(|s| {
12 |                 u32::from_str_radix(&s[2..], 16)
13 |                     .ok()
14 |                     .and_then(char::from_u32)
15 |                     .unwrap()
16 |             })
17 |             // If you care about width, you probably also care about height.
18 |             .chain(if cfg!(feature = "width") {
19 |                     ['\u{A9C1}', '\u{A9C2}'].as_slice().into_iter().copied()
20 |                 } else {
21 |                     [].as_slice().into_iter().copied()
22 |                 })
23 |             .collect()
24 |     ));
25 | }
26 | 
27 | /// Set of character to strip from input without replacement.
28 | #[derive(Clone, Debug)]
29 | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
30 | pub struct Banned(Set<char>);
31 | 
32 | impl Default for Banned {
33 |     fn default() -> Self {
34 |         BANNED.deref().deref().clone()
35 |     }
36 | }
37 | 
38 | impl Banned {
39 |     /// Empty.
40 |     pub fn new() -> Self {
41 |         Self(Default::default())
42 |     }
43 | 
44 |     /// Allows direct mutable access to the global default set of banned characters.
45 |     ///
46 |     /// # Safety
47 |     ///
48 |     /// You must manually avoid concurrent access/censoring.
49 |     #[cfg(feature = "customize")]
50 |     #[cfg_attr(doc, doc(cfg(feature = "customize")))]
51 |     pub unsafe fn customize_default() -> &'static mut Self {
52 |         BANNED.get_mut()
53 |     }
54 | 
55 |     pub(crate) fn contains(&self, c: char) -> bool {
56 |         self.0.contains(&c)
57 |     }
58 | 
59 |     /// Adds a banned character.
60 |     pub fn insert(&mut self, c: char) {
61 |         self.0.insert(c);
62 |     }
63 | 
64 |     /// Removes a banned character.
65 |     pub fn remove(&mut self, c: char) {
66 |         self.0.remove(&c);
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/src/banned_chars.txt:
--------------------------------------------------------------------------------
 1 | # https://blog.rust-lang.org/2021/11/01/cve-2021-42574.html
 2 | U+202A
 3 | U+202B
 4 | U+202C
 5 | U+202D
 6 | U+202E
 7 | U+2066
 8 | U+2067
 9 | U+2068
10 | U+2069
11 | 
12 | # Very small Arabic ligature
13 | U+FC60


--------------------------------------------------------------------------------
/src/buffer_proxy_iterator.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::VecDeque;
 2 | use std::ops::RangeInclusive;
 3 | 
 4 | /// This iterator buffers characters until they can be determined to be clean of profanity.
 5 | pub(crate) struct BufferProxyIterator<I: Iterator<Item = char>> {
 6 |     iter: I,
 7 |     /// The index into iter of the start of buffer.
 8 |     buffer_start_position: usize,
 9 |     /// Staging area (to possibly censor).
10 |     buffer: VecDeque<I::Item>,
11 | }
12 | 
13 | impl<I: Iterator<Item = char>> BufferProxyIterator<I> {
14 |     pub fn new(iter: I) -> Self {
15 |         BufferProxyIterator {
16 |             iter,
17 |             buffer_start_position: 0,
18 |             buffer: VecDeque::new(),
19 |         }
20 |     }
21 | 
22 |     /// Returns index of the last character read, or None if nothing has been read yet.
23 |     pub fn index(&self) -> Option<usize> {
24 |         if self.buffer_start_position + self.buffer.len() == 0 {
25 |             // Didn't read anything yet.
26 |             return None;
27 |         }
28 |         Some(self.buffer_start_position + self.buffer.len() - 1)
29 |     }
30 | 
31 |     /// Returns index of the next character that can be spied, or empty if no characters can be spied.
32 |     pub fn spy_next_index(&self) -> Option<usize> {
33 |         if self.buffer.is_empty() {
34 |             None
35 |         } else {
36 |             Some(self.buffer_start_position)
37 |         }
38 |     }
39 | 
40 |     /// Spies one one more character.
41 |     pub fn spy_next(&mut self) -> Option<char> {
42 |         let ret = self.buffer.pop_front();
43 |         if ret.is_some() {
44 |             self.buffer_start_position += 1;
45 |         }
46 |         ret
47 |     }
48 | 
49 |     /// Censors a given range (must be fully resident in the buffer).
50 |     pub fn censor(&mut self, range: RangeInclusive<usize>, replacement: char) {
51 |         let start = self.buffer_start_position;
52 |         for i in range {
53 |             self.buffer[i - start] = replacement;
54 |         }
55 |     }
56 | }
57 | 
58 | impl<I: Iterator<Item = char>> Iterator for BufferProxyIterator<I> {
59 |     type Item = I::Item;
60 | 
61 |     fn next(&mut self) -> Option<Self::Item> {
62 |         let ret = self.iter.next();
63 |         if let Some(val) = ret.as_ref() {
64 |             self.buffer.push_back(*val);
65 |         }
66 |         ret
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/src/character_analyzer.rs:
--------------------------------------------------------------------------------
  1 | #![feature(binary_heap_into_iter_sorted)]
  2 | 
  3 | use image::{GrayImage, Luma, Rgb, RgbImage};
  4 | use imageproc::drawing::draw_text_mut;
  5 | use rayon::prelude::{IntoParallelIterator, ParallelIterator};
  6 | use rusttype::{Font, Point, Scale};
  7 | use std::ffi::OsStr;
  8 | use std::fs::OpenOptions;
  9 | use std::io::{BufWriter, Write};
 10 | use std::sync::Mutex;
 11 | use walkdir::WalkDir;
 12 | 
 13 | /// Output file has the following format:
 14 | ///  - One byte storing the length in 10ths of an `m` of all omitted characters.
 15 | ///  - For each character (sorted by character)
 16 | ///     - Character in UTF-8
 17 | ///     - Length in 10ths of an `m` as a byte
 18 | fn main() {
 19 |     let fonts: Vec<Font> = WalkDir::new("./src/ttf")
 20 |         .into_iter()
 21 |         .map(|r| r.unwrap())
 22 |         .filter(|d| d.path().extension() == Some(OsStr::new("ttf")))
 23 |         .map(|d| {
 24 |             let bytes = std::fs::read(d.path()).unwrap();
 25 |             Font::try_from_vec(bytes).unwrap()
 26 |         })
 27 |         .collect();
 28 | 
 29 |     struct Output {
 30 |         histogram: [usize; 256],
 31 |         tab: Vec<(char, u8)>,
 32 |     }
 33 | 
 34 |     impl Output {
 35 |         pub fn push(&mut self, c: char, max_width: u8) {
 36 |             self.histogram[max_width as usize] += 1;
 37 |             self.tab.push((c, max_width));
 38 |         }
 39 |     }
 40 | 
 41 |     let output = Mutex::new(Output {
 42 |         histogram: [0; 256],
 43 |         tab: Vec::new(),
 44 |     });
 45 | 
 46 |     (0..=char::MAX as u32).into_par_iter().for_each(|u| {
 47 |         if let Some(c) = char::from_u32(u) {
 48 |             let max_width = match c {
 49 |                 '🐿' => 20,
 50 |                 '𒐫' => 80,
 51 |                 '𒈙' => 35,
 52 |                 '༺' | '༻' => 25,
 53 |                 _ => {
 54 |                     let max_width = (max_width(c, &fonts) as f32 / 100f32).round() as u16;
 55 |                     if max_width > u8::MAX as u16 {
 56 |                         panic!("{}", c);
 57 |                     }
 58 |                     max_width as u8
 59 |                 }
 60 |             };
 61 | 
 62 |             output.lock().unwrap().push(c, max_width);
 63 | 
 64 |             //println!("{} -> {}", c, max_width);
 65 |         }
 66 |     });
 67 | 
 68 |     let mut output = output.into_inner().unwrap();
 69 | 
 70 |     output.tab.sort_by_key(|&(c, _)| c);
 71 | 
 72 |     let mut mode = 0;
 73 |     let mut mode_n = 0;
 74 |     for (i, &n) in output.histogram.iter().enumerate() {
 75 |         let i = i as u8;
 76 |         println!("{}, {}", i, n);
 77 |         if n > mode_n {
 78 |             mode = i;
 79 |             mode_n = n;
 80 |         }
 81 |     }
 82 | 
 83 |     println!("Mode: {}", mode);
 84 | 
 85 |     let output_file = OpenOptions::new()
 86 |         .create(true)
 87 |         .write(true)
 88 |         .open("./src/character_widths.bin")
 89 |         .unwrap();
 90 |     let mut buffered = BufWriter::new(output_file);
 91 | 
 92 |     buffered.write_all(&[mode]).unwrap();
 93 | 
 94 |     for (c, max_width) in output.tab {
 95 |         if max_width == mode {
 96 |             continue;
 97 |         }
 98 |         let mut tmp = [0u8; 4];
 99 |         let s = c.encode_utf8(&mut tmp);
100 |         buffered.write_all(s.as_bytes()).unwrap();
101 |         buffered.write_all(&[max_width as u8]).unwrap();
102 | 
103 |         if max_width > 60 {
104 |             println!("character '{}' has width {}", c, max_width);
105 |         }
106 |     }
107 | 
108 |     buffered.flush().unwrap();
109 | }
110 | 
111 | /// Computes max width in milli-m's.
112 | fn max_width(c: char, fonts: &[Font]) -> usize {
113 |     use unicode_width::UnicodeWidthChar;
114 |     let mut max_width = c.width().map(|w| w * 1000).unwrap_or(0);
115 |     for font in fonts {
116 |         let width = width(c, font);
117 |         max_width = max_width.max(width);
118 |     }
119 |     max_width
120 | }
121 | 
122 | /// Computes with in milli-m's.
123 | fn width(c: char, font: &Font) -> usize {
124 |     let mut tmp = [0u8; 4];
125 |     let s = c.encode_utf8(&mut tmp);
126 | 
127 |     let mut min = i32::MAX;
128 |     let mut max = i32::MIN;
129 | 
130 |     font.layout(s, Scale::uniform(1344.0), Point::default())
131 |         .for_each(|i| {
132 |             if let Some(b) = i.pixel_bounding_box() {
133 |                 min = min.min(b.min.x);
134 |                 max = max.max(b.max.x);
135 |             } else if false {
136 |                 i.draw(|x, _y, _c| {
137 |                     min = min.min(x as i32);
138 |                     max = max.max(x as i32);
139 |                 })
140 |             }
141 |         });
142 | 
143 |     max.checked_sub(min).unwrap_or(0) as usize
144 | }
145 | 
146 | fn render(c: char, font: &Font, resolution: u32) {
147 |     let mut image = GrayImage::new(resolution, resolution);
148 | 
149 |     let height = resolution as f32;
150 |     let scale = Scale {
151 |         x: height,
152 |         y: height,
153 |     };
154 | 
155 |     let mut tmp = [0u8; 4];
156 |     let text = c.encode_utf8(&mut tmp);
157 |     draw_text_mut(&mut image, Luma([255u8]), 0, 0, scale, &font, text);
158 | 
159 |     let _ = image.save("image.png").unwrap();
160 | }
161 | 


--------------------------------------------------------------------------------
/src/context.rs:
--------------------------------------------------------------------------------
  1 | use crate::{trim_whitespace, Censor, Type};
  2 | 
  3 | use crate::censor::should_skip_censor;
  4 | use std::collections::VecDeque;
  5 | use std::fmt::{self, Debug, Display, Formatter};
  6 | use std::num::{NonZeroU16, NonZeroUsize};
  7 | use std::time::{Duration, Instant};
  8 | 
  9 | /// Context is useful for taking moderation actions on a per-user basis i.e. each user would get
 10 | /// their own Context.
 11 | ///
 12 | /// # Recommendation
 13 | ///
 14 | /// Use this as a reference implementation e.g. by copying and adapting it.
 15 | #[derive(Clone)]
 16 | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 17 | #[cfg_attr(doc, doc(cfg(feature = "context")))]
 18 | pub struct Context {
 19 |     history: VecDeque<(String, Time)>,
 20 |     burst_used: u8,
 21 |     suspicion: u8,
 22 |     reports: u8,
 23 |     total: u16,
 24 |     total_inappropriate: u16,
 25 |     muted_until: Option<Time>,
 26 |     only_safe_until: Option<Time>,
 27 |     rate_limited_until: Option<Time>,
 28 |     last_message: Option<Time>,
 29 | }
 30 | 
 31 | impl Debug for Context {
 32 |     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
 33 |         // Don't debug history field.
 34 |         f.debug_struct("Context")
 35 |             .field("burst_used", &self.burst_used)
 36 |             .field("suspicion", &self.suspicion)
 37 |             .field("reports", &self.reports)
 38 |             .field("total", &self.total)
 39 |             .field("total_inappropriate", &self.total_inappropriate)
 40 |             .field("muted_until", &self.muted_until)
 41 |             .field("only_safe_until", &self.only_safe_until)
 42 |             .field("rate_limited_until", &self.rate_limited_until)
 43 |             .field("last_message", &self.last_message)
 44 |             .finish_non_exhaustive()
 45 |     }
 46 | }
 47 | 
 48 | /// Options for customizing `Context::process_with_options`. Always initialize with ..Default::default(),
 49 | /// as new fields may be added in the future.
 50 | #[derive(Clone, Debug)]
 51 | #[cfg_attr(doc, doc(cfg(feature = "context")))]
 52 | pub struct ContextProcessingOptions {
 53 |     /// Block messages if the user has been manually muted.
 54 |     pub block_if_muted: bool,
 55 |     /// Block messages if they are empty (after whitespace is trimmed, if applicable).
 56 |     pub block_if_empty: bool,
 57 |     /// Block messages, as opposed to censoring, if severe inappropriateness is detected.
 58 |     pub block_if_severely_inappropriate: bool,
 59 |     /// Block all messages if they are unsafe (useful for implementing moderator-activated "safe mode").
 60 |     /// Note that unsafe messages from certain users may also be blocked automatically.
 61 |     pub safe_mode_until: Option<Instant>,
 62 |     /// Character count (or, with the `width` feature, number of `m`-equivalent widths).
 63 |     ///
 64 |     /// Messages will be trimmed to fit.
 65 |     pub character_limit: Option<NonZeroUsize>,
 66 |     /// Ensure word-break will work on the message.
 67 |     #[cfg(feature = "width")]
 68 |     pub word_break: Option<ContextWordBreakOptions>,
 69 |     /// Rate-limiting options.
 70 |     pub rate_limit: Option<ContextRateLimitOptions>,
 71 |     /// Block messages if they are very similar to this many previous message.
 72 |     pub repetition_limit: Option<ContextRepetitionLimitOptions>,
 73 |     /// Maximum automatic "safe" timeouts can last. If set too high, users have more time/incentive to
 74 |     /// try and find ways around the system. If zero, "safe" timeouts won't be used.
 75 |     pub max_safe_timeout: Duration,
 76 |     /// Trim whitespace from beginning and end before returning censored output.
 77 |     pub trim_whitespace: bool,
 78 | }
 79 | 
 80 | impl Default for ContextProcessingOptions {
 81 |     fn default() -> Self {
 82 |         Self {
 83 |             block_if_muted: true,
 84 |             block_if_empty: true,
 85 |             block_if_severely_inappropriate: true,
 86 |             safe_mode_until: None,
 87 |             character_limit: Some(NonZeroUsize::new(2048).unwrap()),
 88 |             rate_limit: Some(ContextRateLimitOptions::default()),
 89 |             #[cfg(feature = "width")]
 90 |             word_break: Some(ContextWordBreakOptions::default()),
 91 |             repetition_limit: Some(ContextRepetitionLimitOptions::default()),
 92 |             max_safe_timeout: Duration::from_secs(30 * 60),
 93 |             trim_whitespace: true,
 94 |         }
 95 |     }
 96 | }
 97 | 
 98 | /// Options that control rate-limiting.
 99 | #[derive(Clone, Debug)]
100 | #[cfg_attr(doc, doc(cfg(feature = "context")))]
101 | pub struct ContextRateLimitOptions {
102 |     /// Minimum time between messages (zero means infinite rate, 2s means 0.5 messages per second).
103 |     pub limit: Duration,
104 |     /// Allows a certain amount of messages beyond the rate limit.
105 |     pub burst: u8,
106 |     /// Count a message against the rate limit up to 3 times, once for each unit of this many characters.
107 |     ///
108 |     /// If the `width` feature is enabled, the length of the text is interpreted as the number
109 |     /// of `m`'s it would take to reach the same length, or the number of characters, whichever
110 |     /// is higher.
111 |     pub character_limit: Option<NonZeroU16>,
112 | }
113 | 
114 | impl Default for ContextRateLimitOptions {
115 |     fn default() -> Self {
116 |         Self {
117 |             limit: Duration::from_secs(5),
118 |             burst: 3,
119 |             character_limit: Some(NonZeroU16::new(16).unwrap()),
120 |         }
121 |     }
122 | }
123 | 
124 | impl ContextRateLimitOptions {
125 |     /// Alternate defaults for slow mode.
126 |     pub fn slow_mode() -> Self {
127 |         Self {
128 |             limit: Duration::from_secs(10),
129 |             burst: 2,
130 |             character_limit: Some(NonZeroU16::new(10).unwrap()),
131 |         }
132 |     }
133 | }
134 | 
135 | /// Options that ensure word break will be possible.
136 | #[derive(Clone, Debug)]
137 | #[cfg(feature = "width")]
138 | #[cfg_attr(doc, doc(cfg(all(feature = "context", feature = "width"))))]
139 | pub struct ContextWordBreakOptions {
140 |     /// The type of word-breaking used to display the text.
141 |     pub word_break: crate::width::WordBreak,
142 |     /// The maximum length of an unbreakable part (before the entire message is blocked).
143 |     pub limit: NonZeroUsize,
144 | }
145 | 
146 | #[cfg(feature = "width")]
147 | impl Default for ContextWordBreakOptions {
148 |     fn default() -> Self {
149 |         Self {
150 |             word_break: crate::width::WordBreak::BreakAll,
151 |             limit: NonZeroUsize::new(16).unwrap(),
152 |         }
153 |     }
154 | }
155 | 
156 | /// Options that control repetition-limiting.
157 | #[derive(Clone, Debug)]
158 | #[cfg_attr(doc, doc(cfg(feature = "context")))]
159 | pub struct ContextRepetitionLimitOptions {
160 |     /// How many recent strings can be similar before blocking ensues.
161 |     pub limit: u8,
162 |     /// How long recent input is remembered for.
163 |     pub memory: Duration,
164 |     /// Normalized levenshtein threshold to consider "too similar."
165 |     pub similarity_threshold: f32,
166 | }
167 | 
168 | impl Default for ContextRepetitionLimitOptions {
169 |     fn default() -> Self {
170 |         Self {
171 |             limit: 3,
172 |             memory: Duration::from_secs(60),
173 |             similarity_threshold: 2.0 / 3.0,
174 |         }
175 |     }
176 | }
177 | 
178 | impl Context {
179 |     pub fn new() -> Self {
180 |         Self {
181 |             history: VecDeque::new(),
182 |             burst_used: 0,
183 |             suspicion: 0,
184 |             reports: 0,
185 |             total: 0,
186 |             total_inappropriate: 0,
187 |             only_safe_until: None,
188 |             rate_limited_until: None,
189 |             muted_until: None,
190 |             last_message: None,
191 |         }
192 |     }
193 | 
194 |     /// Returns None if expired is None or has been reached, resulting in expiry being set to None.
195 |     /// Otherwise, returns duration before expiry.
196 |     fn remaining_duration(expiry: &mut Option<Time>, now: Instant) -> Option<Duration> {
197 |         if let Some(time) = *expiry {
198 |             if now >= time.0 {
199 |                 *expiry = None;
200 |                 None
201 |             } else {
202 |                 Some(time.0 - now)
203 |             }
204 |         } else {
205 |             None
206 |         }
207 |     }
208 | 
209 |     /// Takes user message, returns censored message trimmed of whitespace (if it should be sent)
210 |     /// or `BlockReason` (explaining why it should be blocked entirely).
211 |     pub fn process(&mut self, message: String) -> Result<String, BlockReason> {
212 |         self.process_with_options(message, &ContextProcessingOptions::default())
213 |     }
214 | 
215 |     /// Takes user message, returns censored message trimmed of whitespace (if it should be sent)
216 |     /// or `BlockReason` (explaining why it should be blocked entirely).
217 |     ///
218 |     /// Takes a set of options for fine-tuning the processing.
219 |     pub fn process_with_options(
220 |         &mut self,
221 |         message: String,
222 |         options: &ContextProcessingOptions,
223 |     ) -> Result<String, BlockReason> {
224 |         let now = Instant::now();
225 |         let elapsed = self
226 |             .last_message
227 |             .map(|l| now.saturating_duration_since(l.0))
228 |             .unwrap_or(Duration::ZERO);
229 | 
230 |         let suspicion = self.suspicion.max(1).saturating_mul(self.reports.max(1));
231 | 
232 |         // How convinced are we that the user is a bad actor.
233 |         let is_kinda_sus = suspicion >= 2;
234 |         let is_impostor = suspicion >= 15;
235 | 
236 |         // Don't give bad actors the benefit of the doubt when it comes to meanness.
237 |         let meanness_threshold = if is_impostor {
238 |             Type::MILD_OR_HIGHER
239 |         } else if is_kinda_sus {
240 |             Type::MODERATE_OR_HIGHER
241 |         } else {
242 |             Type::SEVERE
243 |         };
244 | 
245 |         let censor_threshold =
246 |             Type::PROFANE | Type::OFFENSIVE | Type::SEXUAL | (Type::MEAN & meanness_threshold);
247 | 
248 |         // Don't give bad actors the benefit of letting their first character through.
249 |         let censor_first_character_threshold = if is_kinda_sus {
250 |             censor_threshold
251 |         } else {
252 |             // Mainly for protection against the n-word being discernible.
253 |             Type::OFFENSIVE & Type::SEVERE
254 |         };
255 | 
256 |         let (mut censored, analysis) = Censor::from_str(&message)
257 |             .with_censor_threshold(censor_threshold)
258 |             .with_censor_first_character_threshold(censor_first_character_threshold)
259 |             .censor_and_analyze();
260 | 
261 |         let mut censored_str = if should_skip_censor(&message) {
262 |             message.as_str()
263 |         } else {
264 |             censored.as_str()
265 |         };
266 | 
267 |         if let Some(character_limit) = options.character_limit {
268 |             #[cfg(feature = "width")]
269 |             {
270 |                 censored_str = crate::trim_to_width(censored_str, character_limit.get());
271 |             }
272 |             if let Some((limit, _)) = censored_str.char_indices().nth(character_limit.get()) {
273 |                 censored_str = &censored_str[..limit];
274 |             }
275 |         }
276 | 
277 |         if options.trim_whitespace {
278 |             censored_str = trim_whitespace(censored_str);
279 |         }
280 | 
281 |         #[cfg(feature = "width")]
282 |         {
283 |             if let Some(word_break) = &options.word_break {
284 |                 let max = crate::width::width_str_max_unbroken(censored_str, word_break.word_break);
285 |                 if max > word_break.limit.get() {
286 |                     return Err(BlockReason::Unbroken(max));
287 |                 }
288 |             }
289 |         }
290 | 
291 |         if censored_str.len() < censored.len() {
292 |             // Something was trimmed, must must re-allocate.
293 |             censored = String::from(censored_str);
294 |         }
295 | 
296 |         self.total = self.total.saturating_add(1);
297 |         if analysis.is(Type::INAPPROPRIATE) {
298 |             self.total_inappropriate = self.total_inappropriate.saturating_add(1);
299 |         }
300 | 
301 |         // Collecting suspicion.
302 |         let type_to_sus = |typ: Type| -> u8 {
303 |             let combined = analysis & typ;
304 |             if combined.is(Type::SEVERE) {
305 |                 3
306 |             } else if combined.is(Type::MODERATE) {
307 |                 2
308 |             } else if combined.is(Type::MILD) {
309 |                 1
310 |             } else {
311 |                 0
312 |             }
313 |         };
314 | 
315 |         // Repetition detection.
316 |         let mut recent_similar = 0;
317 | 
318 |         if let Some(opts) = options.repetition_limit.as_ref() {
319 |             self.history.retain(|&(_, t)| now - t.0 < opts.memory);
320 | 
321 |             for (recent_message, _) in &self.history {
322 |                 if strsim::normalized_levenshtein(recent_message, &message)
323 |                     >= opts.similarity_threshold as f64
324 |                 {
325 |                     recent_similar += 1;
326 |                 }
327 |             }
328 |         }
329 | 
330 |         let mut new_suspicion = type_to_sus(Type::PROFANE | Type::OFFENSIVE | Type::SEXUAL)
331 |             + type_to_sus(Type::EVASIVE)
332 |             + type_to_sus(Type::SPAM);
333 | 
334 |         if recent_similar >= 2 {
335 |             // Don't penalize as much for repeated messages, since an innocent user may repeat their
336 |             // message multiple times if it was erroneously detected.
337 |             new_suspicion /= 2;
338 |         }
339 | 
340 |         if ((is_kinda_sus && new_suspicion >= 4) || (is_impostor && new_suspicion >= 2))
341 |             && !options.max_safe_timeout.is_zero()
342 |         {
343 |             if let Some(only_safe_until) = self
344 |                 .only_safe_until
345 |                 .map(|t| t.0)
346 |                 .unwrap_or(now)
347 |                 .checked_add(if self.reports > 0 {
348 |                     Duration::from_secs(10 * 60)
349 |                 } else {
350 |                     Duration::from_secs(5 * 60)
351 |                 })
352 |             {
353 |                 self.only_safe_until =
354 |                     Some(Time(only_safe_until.min(now + options.max_safe_timeout)));
355 |             }
356 |         }
357 | 
358 |         self.suspicion = self.suspicion.saturating_add(new_suspicion);
359 | 
360 |         let remaining_rate_limit = Self::remaining_duration(&mut self.rate_limited_until, now);
361 | 
362 |         if let Some(remaining) = options
363 |             .safe_mode_until
364 |             .filter(|_| analysis.isnt(Type::SAFE))
365 |             .and_then(|until| until.checked_duration_since(now))
366 |         {
367 |             Err(BlockReason::Unsafe {
368 |                 remaining,
369 |                 targeted: false,
370 |             })
371 |         } else if let Some(dur) =
372 |             Self::remaining_duration(&mut self.muted_until, now).filter(|_| options.block_if_muted)
373 |         {
374 |             Err(BlockReason::Muted(dur))
375 |         } else if options.block_if_empty && censored.is_empty() {
376 |             Err(BlockReason::Empty)
377 |         } else if let Some(dur) = options
378 |             .rate_limit
379 |             .as_ref()
380 |             .and_then(|opt| remaining_rate_limit.filter(|_| self.burst_used >= opt.burst))
381 |         {
382 |             Err(BlockReason::Spam(dur))
383 |         } else if options
384 |             .repetition_limit
385 |             .as_ref()
386 |             .map(|opts| recent_similar >= opts.limit)
387 |             .unwrap_or(false)
388 |         {
389 |             Err(BlockReason::Repetitious(recent_similar as usize))
390 |         } else if options.block_if_severely_inappropriate
391 |             && analysis.is(Type::INAPPROPRIATE & Type::SEVERE)
392 |         {
393 |             Err(BlockReason::Inappropriate(analysis))
394 |         } else if let Some(remaining) = Self::remaining_duration(&mut self.only_safe_until, now)
395 |             .filter(|_| !(analysis.is(Type::SAFE) || options.max_safe_timeout.is_zero()))
396 |         {
397 |             Err(BlockReason::Unsafe {
398 |                 remaining,
399 |                 targeted: true,
400 |             })
401 |         } else {
402 |             self.last_message = Some(Time(now));
403 |             if let Some(rate_limit_options) = options.rate_limit.as_ref() {
404 |                 // How many messages does this count for against the rate limit.
405 |                 let rate_limit_messages =
406 |                     if let Some(char_limit) = rate_limit_options.character_limit {
407 |                         let char_count = message.chars().count();
408 | 
409 |                         #[cfg(feature = "width")]
410 |                         let char_count = char_count.max(crate::width_str(&message));
411 | 
412 |                         (char_count / char_limit.get() as usize).clamp(1, 3) as u8
413 |                     } else {
414 |                         1
415 |                     };
416 | 
417 |                 self.burst_used = if remaining_rate_limit.is_some() {
418 |                     self.burst_used.saturating_add(rate_limit_messages)
419 |                 } else {
420 |                     self.burst_used.saturating_sub(
421 |                         (elapsed.as_nanos() / rate_limit_options.limit.as_nanos())
422 |                             .min(u8::MAX as u128) as u8,
423 |                     )
424 |                 };
425 |                 if let Some(rate_limited_until) = self
426 |                     .rate_limited_until
427 |                     .map(|t| t.0)
428 |                     .unwrap_or(now)
429 |                     .checked_add(
430 |                         rate_limit_options.limit * (rate_limit_messages + new_suspicion) as u32,
431 |                     )
432 |                 {
433 |                     self.rate_limited_until = Some(Time(rate_limited_until));
434 |                 }
435 |             }
436 |             // Forgiveness (minus one suspicion per safe message, and also per minute between messages).
437 |             self.suspicion = self.suspicion.saturating_sub(
438 |                 (elapsed.as_secs() / 60).clamp(analysis.is(Type::SAFE) as u64, u8::MAX as u64)
439 |                     as u8,
440 |             );
441 | 
442 |             if let Some(repetition_blocking_options) = options.repetition_limit.as_ref() {
443 |                 if self.history.len() >= repetition_blocking_options.limit as usize * 2 {
444 |                     self.history.pop_front();
445 |                 }
446 | 
447 |                 self.history.push_back((message, Time(now)));
448 |             }
449 | 
450 |             Ok(censored)
451 |         }
452 |     }
453 | 
454 |     /// Returns how long the user is muted for (possibly [`Duration::ZERO`]).
455 |     pub fn muted_for(&self) -> Duration {
456 |         self.muted_until
457 |             .map(|muted_until| muted_until.0.saturating_duration_since(Instant::now()))
458 |             .unwrap_or(Duration::ZERO)
459 |     }
460 | 
461 |     /// Returns the instant of the last processed message.
462 |     pub fn last_message(&self) -> Option<Instant> {
463 |         self.last_message.map(|t| t.0)
464 |     }
465 | 
466 |     /// Returns the latest instant the user is muted (possibly in the past).
467 |     pub fn muted_until(&self) -> Option<Instant> {
468 |         self.muted_until.map(|t| t.0)
469 |     }
470 | 
471 |     /// Returns how long the user is restricted to [`Type::SAFE`] for (possibly [`Duration::ZERO`]).
472 |     pub fn restricted_for(&self) -> Duration {
473 |         self.only_safe_until
474 |             .map(|restricted_until| restricted_until.0.saturating_duration_since(Instant::now()))
475 |             .unwrap_or(Duration::ZERO)
476 |     }
477 | 
478 |     /// Returns the latest instant the user is restricted (possibly in the past).
479 |     pub fn restricted_until(&self) -> Option<Instant> {
480 |         self.only_safe_until.map(|t| t.0)
481 |     }
482 | 
483 |     /// Manually mute this user's messages for a duration. Overwrites any previous manual mute.
484 |     /// Passing `Duration::ZERO` will therefore un-mute.
485 |     pub fn mute_for(&mut self, duration: Duration) {
486 |         self.mute_until(Instant::now() + duration);
487 |     }
488 | 
489 |     /// Manually mute this user's messages until an instant. Overwrites any previous manual mute.
490 |     /// Passing an instant in the past will therefore un-mute.
491 |     pub fn mute_until(&mut self, instant: Instant) {
492 |         self.muted_until = Some(Time(instant));
493 |     }
494 | 
495 |     /// Manually restrict this user's messages to known safe phrases for a duration. Overwrites any
496 |     /// previous manual restriction. Passing `Duration::ZERO` will therefore un-restrict.
497 |     pub fn restrict_for(&mut self, duration: Duration) {
498 |         self.restrict_until(Instant::now() + duration);
499 |     }
500 | 
501 |     /// Manually restrict this user's messages to known safe phrases until an instant. Overwrites any
502 |     /// previous manual restriction. Passing an instant in the past will therefore un-restrict.
503 |     pub fn restrict_until(&mut self, instant: Instant) {
504 |         self.only_safe_until = Some(Time(instant));
505 |     }
506 | 
507 |     /// Call if another user "reports" this user's message(s). The function of reports is for
508 |     /// suspicion of bad behavior to be confirmed faster.
509 |     pub fn report(&mut self) {
510 |         self.reports = self.reports.saturating_add(1);
511 |     }
512 | 
513 |     /// Returns number of reports received via `Self::report()`. It is not guaranteed that the full
514 |     /// range of `usize` of reports will be counted (currently only `u8::MAX` are counted).
515 |     pub fn reports(&self) -> usize {
516 |         self.reports as usize
517 |     }
518 | 
519 |     /// Clear suspicion, reports, inappropriate counter, and automatic mutes (not manual mute or rate limit).
520 |     pub fn exonerate(&mut self) {
521 |         self.total_inappropriate = 0;
522 |         self.suspicion = 0;
523 |         self.reports = 0;
524 |         self.only_safe_until = None;
525 |     }
526 | 
527 |     /// Returns total number of messages processed. It is not guaranteed that the full
528 |     /// range of `usize` of messages will be counted (currently only `u16::MAX` are counted).
529 |     pub fn total(&self) -> usize {
530 |         self.total as usize
531 |     }
532 | 
533 |     /// Returns total number of messages processed that were `Type::INAPPROPRIATE`. It is not
534 |     /// guaranteed that the full range of `usize` of messages will be counted (currently only
535 |     /// `u16::MAX` are counted).
536 |     pub fn total_inappropriate(&self) -> usize {
537 |         self.total_inappropriate as usize
538 |     }
539 | }
540 | 
541 | impl Default for Context {
542 |     fn default() -> Self {
543 |         Self::new()
544 |     }
545 | }
546 | 
547 | /// Communicates why a message was blocked as opposed to merely censored.
548 | #[derive(Copy, Clone, Debug, PartialEq)]
549 | #[non_exhaustive]
550 | #[cfg_attr(doc, doc(cfg(feature = "context")))]
551 | pub enum BlockReason {
552 |     /// The particular message was *severely* inappropriate, more specifically, `Type`.
553 |     Inappropriate(Type),
554 |     #[cfg(feature = "width")]
555 |     /// There was an unbroken part of the string of this length, exceeding the limit.
556 |     Unbroken(usize),
557 |     /// Recent messages were generally inappropriate, and this message isn't on the safe list.
558 |     /// Alternatively, if targeted is false, safe mode was configured globally.
559 |     /// Try again after `Duration`.
560 |     Unsafe {
561 |         remaining: Duration,
562 |         /// Whether unsafe mode was targeted at this user (as opposed to configured globally).
563 |         targeted: bool,
564 |     },
565 |     /// This message was too similar to `usize` recent messages.
566 |     Repetitious(usize),
567 |     /// Too many messages per unit time, try again after `Duration`.
568 |     Spam(Duration),
569 |     /// Manually muted for `Duration`.
570 |     Muted(Duration),
571 |     /// Message was, at least after censoring, completely empty.
572 |     Empty,
573 | }
574 | 
575 | impl BlockReason {
576 |     /// You may display `BlockReason` in any manner you choose, but this will return a reasonable
577 |     /// default warning to send to the user.
578 |     pub fn generic_str(self) -> &'static str {
579 |         match self {
580 |             Self::Inappropriate(typ) => {
581 |                 if typ.is(Type::OFFENSIVE) {
582 |                     "Your message was held for being highly offensive"
583 |                 } else if typ.is(Type::SEXUAL) {
584 |                     "Your message was held for being overly sexual"
585 |                 } else if typ.is(Type::MEAN) {
586 |                     "Your message was held for being overly mean"
587 |                 } else {
588 |                     "Your message was held for severe profanity"
589 |                 }
590 |             }
591 |             #[cfg(feature = "width")]
592 |             Self::Unbroken(_) => "Part of your message is too wide to display",
593 |             Self::Unsafe { .. } => "You have been temporarily restricted due to profanity/spam",
594 |             Self::Repetitious(_) => "Your message was too similar to recent messages",
595 |             Self::Spam(_) => "You have been temporarily muted due to excessive frequency",
596 |             Self::Muted(_) => "You have been temporarily muted",
597 |             Self::Empty => "Your message was empty",
598 |         }
599 |     }
600 | 
601 |     #[deprecated = "use contextual_string"]
602 |     pub fn contextual_str(self) -> String {
603 |         self.contextual_string()
604 |     }
605 | 
606 |     /// You may display `BlockReason` in any manner you choose, but this will return a reasonable
607 |     /// default warning to send to the user that includes some context (such as how long they are
608 |     /// muted for).
609 |     pub fn contextual_string(self) -> String {
610 |         match self {
611 |             Self::Unsafe {
612 |                 remaining,
613 |                 targeted: true,
614 |             } => format!(
615 |                 "You have been restricted for {} due to profanity/spam",
616 |                 FormattedDuration(remaining)
617 |             ),
618 |             Self::Unsafe {
619 |                 remaining,
620 |                 targeted: false,
621 |             } => format!("Safe mode is active for {}", FormattedDuration(remaining)),
622 |             Self::Repetitious(count) => {
623 |                 format!("Your message was too similar to {} recent messages", count)
624 |             }
625 |             Self::Spam(dur) => format!(
626 |                 "You have been muted for {} due to excessive frequency",
627 |                 FormattedDuration(dur)
628 |             ),
629 |             Self::Muted(dur) => format!("You have been muted for {}", FormattedDuration(dur)),
630 |             _ => self.generic_str().to_owned(),
631 |         }
632 |     }
633 | }
634 | 
635 | struct FormattedDuration(Duration);
636 | 
637 | impl Display for FormattedDuration {
638 |     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
639 |         if self.0 >= Duration::from_secs(3600) {
640 |             write!(f, "{}h", self.0.as_secs() / 3600)
641 |         } else if self.0 >= Duration::from_secs(60) {
642 |             write!(f, "{}m", self.0.as_secs() / 60)
643 |         } else {
644 |             write!(f, "{}s", self.0.as_secs().max(1))
645 |         }
646 |     }
647 | }
648 | 
649 | #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
650 | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
651 | struct Time(#[cfg_attr(feature = "serde", serde(with = "approx_instant"))] Instant);
652 | 
653 | impl Debug for Time {
654 |     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
655 |         Debug::fmt(&self.0, f)
656 |     }
657 | }
658 | 
659 | #[cfg(feature = "serde")]
660 | mod approx_instant {
661 |     use serde::{de::Error, Deserialize, Deserializer, Serialize, Serializer};
662 |     use std::time::{Duration, Instant, SystemTime};
663 | 
664 |     pub fn serialize<S>(instant: &Instant, serializer: S) -> Result<S::Ok, S::Error>
665 |     where
666 |         S: Serializer,
667 |     {
668 |         let system_now = SystemTime::now();
669 |         let instant_now = Instant::now();
670 |         let approx = if instant_now > *instant {
671 |             system_now - (instant_now - *instant)
672 |         } else {
673 |             system_now + (*instant - instant_now)
674 |         };
675 |         let millis = approx
676 |             .duration_since(SystemTime::UNIX_EPOCH)
677 |             .unwrap_or_default()
678 |             .as_millis() as u64;
679 |         millis.serialize(serializer)
680 |     }
681 | 
682 |     pub fn deserialize<'de, D>(deserializer: D) -> Result<Instant, D::Error>
683 |     where
684 |         D: Deserializer<'de>,
685 |     {
686 |         let millis = u64::deserialize(deserializer)?;
687 |         let system_now = SystemTime::now();
688 |         let de = SystemTime::UNIX_EPOCH
689 |             .checked_add(Duration::from_millis(millis))
690 |             .unwrap_or(system_now);
691 |         let instant_now = Instant::now();
692 |         let approx = if system_now > de {
693 |             let duration = system_now.duration_since(de).map_err(Error::custom)?;
694 |             instant_now - duration
695 |         } else {
696 |             let duration = de.duration_since(system_now).map_err(Error::custom)?;
697 |             instant_now + duration
698 |         };
699 |         Ok(approx)
700 |     }
701 | }
702 | 
703 | #[cfg(test)]
704 | mod tests {
705 |     #![allow(unused_imports)]
706 | 
707 |     extern crate test;
708 |     use crate::context::{
709 |         ContextProcessingOptions, ContextRateLimitOptions, ContextRepetitionLimitOptions,
710 |     };
711 |     use crate::{Censor, CensorIter, CensorStr, Type};
712 |     use serial_test::serial;
713 |     use std::fs::File;
714 |     use std::io::BufReader;
715 |     use std::num::NonZeroUsize;
716 |     use std::time::{Duration, Instant};
717 |     use test::Bencher;
718 | 
719 |     #[test]
720 |     fn context_inappropriate() {
721 |         use crate::{BlockReason, Context};
722 | 
723 |         let mut ctx = Context::new();
724 | 
725 |         assert_eq!(ctx.process(String::from("one")), Ok(String::from("one")));
726 |         assert!(matches!(
727 |             ctx.process(String::from("nigga")),
728 |             Err(BlockReason::Inappropriate(_))
729 |         ));
730 |     }
731 | 
732 |     #[test]
733 |     fn context_unsafe() {
734 |         use crate::{BlockReason, Context};
735 | 
736 |         let mut ctx = Context::new();
737 | 
738 |         for _ in 0..30 {
739 |             ctx.report();
740 |         }
741 | 
742 |         let res = ctx.process(String::from("shit"));
743 |         assert!(
744 |             matches!(res, Err(BlockReason::Unsafe { targeted: true, .. })),
745 |             "1 {:?}",
746 |             res
747 |         );
748 | 
749 |         let res = ctx.process(String::from("not common message"));
750 |         assert!(
751 |             matches!(res, Err(BlockReason::Unsafe { targeted: true, .. })),
752 |             "2 {:?}",
753 |             res
754 |         );
755 |     }
756 | 
757 |     #[test]
758 |     fn context_repetitious() {
759 |         use crate::{BlockReason, Context};
760 | 
761 |         let mut ctx = Context::new();
762 | 
763 |         for _ in 0..ContextRepetitionLimitOptions::default().limit {
764 |             assert!(ctx.process(String::from("one")).is_ok());
765 |         }
766 | 
767 |         let res = ctx.process(String::from("onne"));
768 |         assert!(matches!(res, Err(BlockReason::Repetitious(_))), "{:?}", res);
769 |     }
770 | 
771 |     #[test]
772 |     #[serial]
773 |     fn context_spam() {
774 |         use crate::{BlockReason, Context};
775 | 
776 |         let mut ctx = Context::new();
777 |         let opts = ContextProcessingOptions {
778 |             rate_limit: Some(ContextRateLimitOptions {
779 |                 limit: Duration::from_millis(350),
780 |                 burst: 2,
781 |                 ..Default::default()
782 |             }),
783 |             ..Default::default()
784 |         };
785 | 
786 |         assert_eq!(
787 |             ctx.process_with_options(String::from("one"), &opts),
788 |             Ok(String::from("one"))
789 |         );
790 |         assert_eq!(
791 |             ctx.process_with_options(String::from("two"), &opts),
792 |             Ok(String::from("two"))
793 |         );
794 |         assert_eq!(
795 |             ctx.process_with_options(String::from("three"), &opts),
796 |             Ok(String::from("three"))
797 |         );
798 |         let res = ctx.process_with_options(String::from("four"), &opts);
799 |         assert!(matches!(res, Err(BlockReason::Spam(_))), "{:?}", res);
800 | 
801 |         std::thread::sleep(Duration::from_secs(2));
802 | 
803 |         assert_eq!(
804 |             ctx.process_with_options(String::from("one"), &opts),
805 |             Ok(String::from("one"))
806 |         );
807 |     }
808 | 
809 |     #[test]
810 |     #[serial]
811 |     fn context_spam_long_message() {
812 |         use crate::{BlockReason, Context};
813 | 
814 |         let mut ctx = Context::new();
815 |         let opts = ContextProcessingOptions {
816 |             rate_limit: Some(ContextRateLimitOptions {
817 |                 limit: Duration::from_millis(350),
818 |                 burst: 2,
819 |                 ..Default::default()
820 |             }),
821 |             ..Default::default()
822 |         };
823 | 
824 |         assert_eq!(
825 |             ctx.process_with_options(String::from("three"), &opts),
826 |             Ok(String::from("three"))
827 |         );
828 |         assert!(ctx.process_with_options(String::from("one two three one two three one two three one two three one two three one two three one two three one two three one two three"), &opts).is_ok());
829 |         let result = ctx.process_with_options(String::from("four"), &opts);
830 |         assert!(matches!(result, Err(BlockReason::Spam(_))), "{:?}", result);
831 |     }
832 | 
833 |     #[test]
834 |     fn context_muted() {
835 |         use crate::{BlockReason, Context};
836 | 
837 |         let mut ctx = Context::new();
838 | 
839 |         ctx.mute_for(Duration::from_secs(5));
840 | 
841 |         let res = ctx.process(String::from("hello"));
842 |         assert!(matches!(res, Err(BlockReason::Muted(_))), "{:?}", res);
843 |     }
844 | 
845 |     #[test]
846 |     fn context_safe_mode() {
847 |         use crate::{BlockReason, Context};
848 | 
849 |         let mut ctx = Context::new();
850 | 
851 |         let res = ctx.process_with_options(
852 |             String::from("not on the safe list"),
853 |             &ContextProcessingOptions {
854 |                 safe_mode_until: Some(Instant::now() + Duration::from_secs(100)),
855 |                 ..Default::default()
856 |             },
857 |         );
858 |         assert!(
859 |             matches!(
860 |                 res,
861 |                 Err(BlockReason::Unsafe {
862 |                     targeted: false,
863 |                     ..
864 |                 })
865 |             ),
866 |             "{:?}",
867 |             res
868 |         );
869 |     }
870 | 
871 |     #[test]
872 |     fn context_empty() {
873 |         use crate::{BlockReason, Context};
874 | 
875 |         let mut ctx = Context::new();
876 |         assert_eq!(ctx.process(String::from("   ")), Err(BlockReason::Empty));
877 |     }
878 | 
879 |     #[test]
880 |     #[cfg(feature = "width")]
881 |     fn character_limit() {
882 |         use crate::{
883 |             context::ContextWordBreakOptions, BlockReason, Context, ContextProcessingOptions,
884 |         };
885 |         let mut ctx = Context::new();
886 | 
887 |         let opts = ContextProcessingOptions {
888 |             character_limit: Some(NonZeroUsize::new(5).unwrap()),
889 |             word_break: Some(ContextWordBreakOptions {
890 |                 word_break: crate::width::WordBreak::BreakAll,
891 |                 limit: NonZeroUsize::new(5).unwrap(),
892 |             }),
893 |             ..Default::default()
894 |         };
895 | 
896 |         assert_eq!(
897 |             ctx.process_with_options(String::from("abcdefgh"), &opts),
898 |             Ok(String::from("abcde"))
899 |         );
900 | 
901 |         assert_eq!(
902 |             ctx.process_with_options(String::from("a﷽"), &opts),
903 |             Ok(String::from("a"))
904 |         );
905 | 
906 |         let opts = ContextProcessingOptions {
907 |             character_limit: Some(NonZeroUsize::new(20).unwrap()),
908 |             word_break: Some(ContextWordBreakOptions {
909 |                 word_break: crate::width::WordBreak::BreakAll,
910 |                 limit: NonZeroUsize::new(5).unwrap(),
911 |             }),
912 |             ..Default::default()
913 |         };
914 | 
915 |         assert_eq!(
916 |             ctx.process_with_options("abc ௌௌௌௌ def".to_owned(), &opts),
917 |             Err(BlockReason::Unbroken(10))
918 |         );
919 |     }
920 | 
921 |     #[test]
922 |     #[cfg(feature = "serde")]
923 |     fn serde() {
924 |         use std::time::SystemTime;
925 | 
926 |         let mut ctx = crate::Context::default();
927 |         ctx.process("foo".to_string()).unwrap();
928 |         ctx.restrict_for(Duration::from_secs(1000));
929 |         println!("{}", serde_json::to_string(&ctx).unwrap());
930 |         let json = serde_json::to_value(&ctx).unwrap();
931 |         let only_safe_until = &json["only_safe_until"];
932 |         let unix = only_safe_until.as_i64().unwrap();
933 |         assert!(
934 |             unix > 1000
935 |                 + SystemTime::now()
936 |                     .duration_since(SystemTime::UNIX_EPOCH)
937 |                     .unwrap()
938 |                     .as_millis() as i64
939 |         )
940 |     }
941 | }
942 | 


--------------------------------------------------------------------------------
/src/dictionary_blacklist.txt:
--------------------------------------------------------------------------------
  1 | abortion(.*)
  2 | (.*)bastard(.*)
  3 | bollox(.*)
  4 | cunnilin(.*)
  5 | excrement(.*)
  6 | goddamn(.*)
  7 | hitler(.*)
  8 | nigg
  9 | niggarded
 10 | niggarding
 11 | niggardise
 12 | niggardising
 13 | niggardize
 14 | niggardizing
 15 | niggardliness
 16 | niggardling
 17 | niggardly
 18 | niggardness
 19 | niggards
 20 | nigged
 21 | nigget
 22 | nigging
 23 | niggle
 24 | niggler
 25 | niggly
 26 | niggot
 27 | niggra
 28 | niggun
 29 | (.*)nigger(.*)
 30 | (.*)prostitut(.*)
 31 | (.*)sexual(.*)
 32 | slave(.*)
 33 | (.*)vagina(.*)
 34 | a holes
 35 | ab
 36 | acock
 37 | afterbreast
 38 | ag
 39 | annal
 40 | annus
 41 | antiejaculation
 42 | antirape
 43 | antivibrator
 44 | anuses
 45 | ap
 46 | apoop
 47 | ar
 48 | areolas
 49 | arses
 50 | as hole
 51 | asshole
 52 | ass hole
 53 | assholes
 54 | ates
 55 | ays
 56 | b
 57 | babes
 58 | baby batter
 59 | badasses
 60 | ball gag
 61 | ball gravy
 62 | ball sac
 63 | ball sag
 64 | ball sak
 65 | ballsiest
 66 | barely legal
 67 | bastaard
 68 | beaners
 69 | beat meat
 70 | beating meat
 71 | befetished
 72 | bell end
 73 | bend over
 74 | bewhore
 75 | bichy
 76 | big knob
 77 | bints
 78 | big black
 79 | big knockers
 80 | bisexual
 81 | bitchain
 82 | bitchar
 83 | bitched
 84 | bitcheries
 85 | bitchery
 86 | bitches(.*)
 87 | bitchi
 88 | bitchier
 89 | bitchiest
 90 | bitchily
 91 | bitchiness
 92 | bitching
 93 | bitcho
 94 | bitchy
 95 | blackballs
 96 | blackbreast
 97 | blackbutt
 98 | blackcock
 99 | black guy
100 | black men
101 | black monkey
102 | black monkeys
103 | black people
104 | black women
105 | blowhards
106 | blow job
107 | blow jobs
108 | blow mee
109 | blowcock
110 | blowjobs
111 | blueballs
112 | blue waffle
113 | bn
114 | bohunks
115 | bollocks
116 | bomb china
117 | bomb germany
118 | bomb india
119 | bomb iran
120 | bomb israel
121 | bomb palestine
122 | bomb russia
123 | bomb ukraine
124 | bon er
125 | boners
126 | bonnering
127 | boobery
128 | boobialla
129 | boobie(.*)
130 | boobily
131 | boobish
132 | booboisie
133 | boobs
134 | booby
135 | boobyalla
136 | boobyish
137 | boobyism
138 | boy friend
139 | boyfriends
140 | brain less
141 | brainlessly
142 | brainlessness
143 | bras
144 | brassieres
145 | breastfeeding
146 | breastie
147 | breasting
148 | breastless
149 | breasts
150 | brothels
151 | brothers
152 | brown showers
153 | bug ger
154 | buggered
155 | buggeries
156 | buggering
157 | buggers
158 | buggery
159 | bukkake
160 | bullet vibe
161 | bullshit(.*)
162 | bums
163 | bungholes
164 | burn china
165 | burn gaza
166 | burn israel
167 | burn jew
168 | burn jews
169 | burn palestine
170 | burn yourself
171 | but holes
172 | buttocks
173 | butts
174 | c
175 | ca strate
176 | cancer
177 | castrates
178 | castrations
179 | ch
180 | china flu
181 | china plague
182 | china virus
183 | chinks
184 | chocolate people
185 | cl
186 | clitoral
187 | clitoria
188 | clitoric
189 | clitoridauxe
190 | clitoridean
191 | clitoridectomies
192 | clitoridectomy
193 | clitoriditis
194 | clitoridotomy
195 | clitoris(.*)
196 | clitoritis
197 | clitoromania
198 | clothhead
199 | co
200 | cocaines
201 | cockbell(.*)
202 | cockboat(.*)
203 | cockbrain
204 | cocked
205 | cocker
206 | cockhead
207 | cockmaster
208 | cocks
209 | commies
210 | condoms
211 | coons
212 | copulates
213 | cotton picker
214 | cotton farm
215 | cotton farmer
216 | cowards
217 | crackers
218 | crapper(.*)
219 | crappie(.*)
220 | crappy
221 | crapy
222 | cripples
223 | cts
224 | cummer
225 | cummers
226 | cumshots
227 | cunts
228 | d
229 | damned
230 | damnit
231 | damns
232 | darkies
233 | de flower
234 | de generate
235 | deboner
236 | debugger
237 | deep throat
238 | deflowered
239 | deflowerer
240 | deflowering
241 | deflowerment
242 | deflowers
243 | degenerates
244 | des
245 | diahrrea
246 | dickers
247 | dicks
248 | diddlers
249 | dildo(.*)
250 | dimwits
251 | dirty pillows
252 | dirty sanchez
253 | dongs
254 | donkey punch
255 | dog headed
256 | dog style
257 | douche(.*)
258 | drag queen
259 | dry hump
260 | dry humped
261 | dry humping
262 | ds
263 | dumbcow
264 | dumber
265 | dumbest
266 | dumbhead
267 | dumbs
268 | dumby
269 | dy
270 | dykes
271 | e
272 | ec
273 | ef
274 | effs
275 | ejaculat(.*)
276 | ejaculation
277 | el
278 | eunuchs
279 | ens
280 | ep
281 | eq
282 | erections
283 | erotics
284 | ers
285 | es
286 | esexual
287 | ex rated
288 | ey
289 | f
290 | fa
291 | fatsos
292 | fatsoes
293 | fackings
294 | facks
295 | fag(.*)
296 | fascists
297 | fe
298 | fecks
299 | fellate
300 | fellatio(.*)
301 | female squirting
302 | fetisheer
303 | fetisher
304 | fetishes
305 | fetishic
306 | fetishism
307 | fetishist
308 | fetishization
309 | fetishize
310 | fetishlike
311 | fetishmonger
312 | fetishry
313 | finger you
314 | fingerings
315 | flanges
316 | fondles
317 | foreskins
318 | fornicates
319 | four some
320 | foursomes
321 | frape
322 | freaks
323 | frigg(.*)
324 | frigs
325 | fuck(.*)
326 | fuchs
327 | fuhrers
328 | fugs
329 | g
330 | gaes
331 | gang bang
332 | gay
333 | ge
334 | gen ital
335 | genitalia
336 | genitalic
337 | genitally
338 | genitals
339 | genocides
340 | gentianal
341 | ges
342 | get lost
343 | ghettoed
344 | ghettoes
345 | ghettoing
346 | ghettoization
347 | ghettoize
348 | ghettoizing
349 | ghettos
350 | gigolos
351 | girl friend
352 | girlfriends
353 | gave birth
354 | give birth
355 | giving birth
356 | goddammed
357 | goddamming
358 | goddammit
359 | goddamn
360 | goddamns
361 | goddams
362 | golden shower
363 | gonads
364 | goo girl
365 | gringos
366 | gropes
367 | gs
368 | gyppo
369 | h
370 | hairy beaver
371 | hand job
372 | handjobs
373 | hang yourself
374 | hate negro
375 | hater
376 | haters
377 | hates
378 | heii
379 | hellhole
380 | hellholes
381 | hells
382 | hemipenis
383 | hen tai
384 | heroins
385 | hic
386 | hijackers
387 | hoars
388 | hoes
389 | holocausts
390 | homophile
391 | homophobia
392 | homophobic
393 | homos
394 | homosexual
395 | honkeys
396 | honkies
397 | hookahs
398 | hoo ker
399 | hook er
400 | hook ker
401 | hookers
402 | hor ny
403 | horn ny
404 | hornyhanded
405 | hornyhead
406 | hot carl
407 | hot chick
408 | hs
409 | ht
410 | husbands
411 | hymens
412 | hypersexual
413 | glizzy
414 | ic
415 | id
416 | idiotic
417 | idiots
418 | ids
419 | imbecil(.*)
420 | impregnates
421 | incests
422 | inseminates
423 | inseminators
424 | intersexual
425 | jackass
426 | jackassery
427 | jackasses
428 | jackassification
429 | jackassism
430 | jackassness
431 | jack off
432 | jerks
433 | jigaboo
434 | jigaboos
435 | jihads
436 | judens
437 | jungle bunny
438 | k
439 | kaffir
440 | kaffirs
441 | kafirs
442 | kaffir
443 | kafiri
444 | kafirs
445 | kikes
446 | kill china
447 | kill chinese
448 | kill muslim
449 | kill myself
450 | kill people
451 | kill russia
452 | kill russian
453 | kill russians
454 | kill self
455 | kill students
456 | kill ukraine
457 | kill ukrainian
458 | kill ukrainians
459 | killing china
460 | killing chinese
461 | killing people
462 | killing russia
463 | killing russian
464 | killing russians
465 | killing students
466 | killing ukraine
467 | killing ukrainian
468 | killing ukrainians
469 | kill yourself
470 | krauts
471 | l
472 | la
473 | lactates
474 | lady boy
475 | lapcock
476 | lc
477 | ld
478 | le
479 | lemon party
480 | lepers
481 | les
482 | lesbia
483 | lesbians
484 | li
485 | liars
486 | libidos
487 | livesex
488 | ll
489 | lo
490 | losers
491 | loss ser
492 | love minors
493 | love slavery
494 | lowlifes
495 | m
496 | making love
497 | male squirting
498 | masochists
499 | massive wood
500 | master race
501 | masturbate(.*)
502 | maya sol
503 | meat beating
504 | megalopenis
505 | menstruates
506 | menstruations
507 | mi ger
508 | micropenis
509 | mighty shaft
510 | mike hawk
511 | milfs
512 | missionary position
513 | mo ron
514 | mofos
515 | molestations
516 | molesters
517 | molestors
518 | molests
519 | mom murder(.*)
520 | moronic
521 | morons
522 | motherfuck(.*)
523 | mp
524 | muffs
525 | multivibrator
526 | murder school
527 | murder student
528 | n
529 | na
530 | naggers
531 | nakedness
532 | nazification
533 | nazified
534 | nazifies
535 | nazify
536 | naziism
537 | nazim
538 | nazir
539 | nazis
540 | nazism
541 | ne
542 | necrap
543 | nee gros
544 | negroids
545 | nerds
546 | ng ger
547 | ni
548 | nickers
549 | nick ger
550 | nick gur
551 | nick her
552 | nick ker
553 | nidiot
554 | nipples
555 | no life
556 | nonejaculatory
557 | nonpregnant
558 | nonsexual
559 | nonvibrator
560 | ns
561 | nt
562 | nude(.*)
563 | nut case
564 | nut job
565 | nut jobs
566 | nv
567 | nymphos
568 | o
569 | ob
570 | obs
571 | oe
572 | only fans
573 | ons
574 | orgasmic
575 | orgasms
576 | orifices
577 | os
578 | ot
579 | overfagged
580 | overfagging
581 | overpregnant
582 | p
583 | pansexual
584 | passionate love
585 | pe nis
586 | pean is
587 | pean nis
588 | peckers
589 | pedophil(.*)
590 | penetrates
591 | penetrations
592 | penetrators
593 | penises
594 | penist
595 | pennis
596 | perversions
597 | perverts
598 | peter phile
599 | ph
600 | phallic
601 | (.*)phallus
602 | picaninny
603 | piccaninny
604 | pickaninny
605 | pimps
606 | pissed
607 | pisses
608 | pissing
609 | pissoir
610 | pl
611 | play boy
612 | playboys
613 | play bunny
614 | play girl
615 | playgirls
616 | pleasure chest
617 | po
618 | pole smoker
619 | poop(.*)
620 | porn(.*)
621 | pornerastic
622 | pornocracy
623 | pornocrat
624 | pornograph
625 | pornological
626 | pr
627 | pricks
628 | private part
629 | private parts
630 | prudes
631 | pseudopregnant
632 | psychosexual
633 | pu
634 | puberties
635 | puberty
636 | pubes
637 | pubescence
638 | pubescency
639 | punks
640 | pussies
641 | pussy
642 | q
643 | qs
644 | queerer
645 | queerest
646 | queers
647 | r
648 | ra
649 | rab
650 | racisms
651 | racists
652 | rag head
653 | raped
654 | rapeful
655 | raper
656 | rapes
657 | rapists
658 | rectums
659 | red skins
660 | rehoe
661 | retardation
662 | retarded
663 | retards
664 | rim job
665 | romances
666 | rs
667 | s
668 | sa
669 | sadisms
670 | sadists
671 | schizos
672 | school shooting
673 | scissoring
674 | screw off
675 | screw you
676 | screw yourself
677 | scrotums
678 | scummy
679 | scums
680 | semens
681 | semicastrate
682 | semihorny
683 | semimoron
684 | seminaked
685 | seminude
686 | sex(.*)
687 | sh
688 | shaved beaver
689 | shemales
690 | shi
691 | shiite
692 | shita
693 | shithead
694 | shits
695 | shitt(.*)
696 | shoot schools
697 | shut up
698 | simps
699 | sissy
700 | sisters
701 | slaves
702 | sluthood
703 | sluts
704 | slutt(.*)
705 | smegmas
706 | sociosexual
707 | sodomites
708 | sodomitess
709 | spank me
710 | spanks
711 | spastics
712 | sperma
713 | sperms
714 | spics
715 | spread legs
716 | spunked
717 | spunks
718 | squaws
719 | ss
720 | st
721 | stillborn
722 | still born
723 | strap on
724 | strappadoes
725 | strip club
726 | stripers
727 | stripper(.*)
728 | stupider
729 | stupidhead
730 | stupids
731 | su
732 | superior race
733 | subgenital
734 | sucked
735 | suckers
736 | sucking
737 | sucks
738 | suicided
739 | suicides
740 | suicidal(.*)
741 | supersexual
742 | superugly
743 | swasticas
744 | swastikas
745 | t
746 | tampons
747 | tamponed
748 | te
749 | terrorists
750 | testicles
751 | th
752 | three some
753 | threesomes
754 | ti
755 | tite
756 | tits
757 | tittie(s*)
758 | tossers
759 | touch kids
760 | tp
761 | tr
762 | trannie
763 | tranny
764 | ts
765 | turds
766 | tussis
767 | twats
768 | twinks
769 | twits
770 | u
771 | ug
772 | ump
773 | under skirt
774 | under skirts
775 | underskirts
776 | undouched
777 | undressing
778 | unejaculated
779 | unfagged
780 | unfagoted
781 | unhorny
782 | unigenital
783 | unisexual
784 | unnaked
785 | unpregnant
786 | unsexed
787 | unsexual
788 | upskirts
789 | urethras
790 | urines
791 | ur mom
792 | ur moms
793 | ur mother
794 | ur mothers
795 | ura
796 | ureterogenital
797 | urethrogenital
798 | urethropenile
799 | urethrosexual
800 | urinals
801 | urinogenital
802 | urinosexual
803 | urogenital
804 | v
805 | vibrators
806 | virgins
807 | vixens
808 | voyeurs
809 | voyeurweb
810 | vulvae
811 | vulvas
812 | w
813 | wan ker
814 | wank(.*)
815 | weaner
816 | weaners
817 | weenies
818 | weewees
819 | weiners
820 | welchers
821 | wetbacks
822 | white power
823 | white powers
824 | white trash
825 | whiteys
826 | whore(.*)
827 | wifes
828 | worldsex
829 | ws
830 | wto
831 | x
832 | xx
833 | y
834 | yellow showers
835 | your mom
836 | your moms
837 | ys
838 | z
839 | zoophiles
840 | 鸡巴


--------------------------------------------------------------------------------
/src/dictionary_common_valid_short.txt:
--------------------------------------------------------------------------------
 1 | all
 2 | also
 3 | an
 4 | and
 5 | any
 6 | are
 7 | as
 8 | back
 9 | be
10 | but
11 | by
12 | can
13 | come
14 | day
15 | do
16 | for
17 | from
18 | have
19 | he
20 | her
21 | his
22 | how
23 | is
24 | isnt
25 | give
26 | go
27 | good
28 | one
29 | of
30 | or
31 | it
32 | its
33 | my
34 | not
35 | she
36 | that
37 | the
38 | to
39 | who
40 | with
41 | you


--------------------------------------------------------------------------------
/src/dictionary_extra.txt:
--------------------------------------------------------------------------------
  1 | #8
  2 | # of
  3 | (until
  4 | 2 secs
  5 | 3 secs
  6 | 4 secs
  7 | 45s
  8 | 5 secs
  9 | 6 secs
 10 | 7 secs
 11 | 8 secs
 12 | 88
 13 | 9 secs
 14 | 9 is still
 15 | 99
 16 | 0 secs
 17 | 300 bot
 18 | 600 bot
 19 | twinkie
 20 | two secs
 21 | three secs
 22 | four secs
 23 | five secs
 24 | six secs
 25 | seven secs
 26 | eight secs
 27 | nine secs
 28 | ten secs
 29 | aboutit
 30 | admit it's
 31 | ain't it
 32 | alt
 33 | an ai
 34 | and ill do
 35 | anna
 36 | anna!
 37 | anna!!
 38 | anna!!!
 39 | afk
 40 | afked
 41 | afking
 42 | akshita
 43 | as 5
 44 | asap
 45 | assalamo
 46 | are africans
 47 | are asians
 48 | be a nerd
 49 | becoome
 50 | betcha
 51 | bigger s
 52 | bishi
 53 | blogger
 54 | bonjor
 55 | of elchasai
 56 | braig
 57 | brain cell
 58 | braincell
 59 | brauer
 60 | brazilian s
 61 | btw it
 62 | cheese nips
 63 | chonkey
 64 | clitheroe
 65 | close range
 66 | cmon
 67 | cntrb
 68 | cockburn
 69 | coming e
 70 | co om
 71 | cooment
 72 | coomera
 73 | coomercial
 74 | couple secs
 75 | cream pie
 76 | create a hole
 77 | cumkwat
 78 | can't it
 79 | doesn't it
 80 | diconnect
 81 | dicover
 82 | didn't it
 83 | dig a hole
 84 | dongguan
 85 | enola gay
 86 | faggetaboutit
 87 | farming xp
 88 | fatty acid
 89 | fatty food
 90 | femboys are awesome
 91 | femboys are cool
 92 | few secs
 93 | ffa game
 94 | fire cracker
 95 | fire crackers
 96 | for a pea
 97 | forgot it's
 98 | francoitalian
 99 | franco italian
100 | freakin
101 | fuchs dystrophy
102 | fugia
103 | gaya
104 | gg german
105 | ght, its
106 | glhf
107 | graham cracker
108 | graham crackers
109 | mar sent
110 | griefing
111 | gtg
112 | guangdong
113 | h20
114 | harshitha
115 | has hole
116 | he'll
117 | headbutt
118 | heli
119 | hellen
120 | hellp
121 | h on keyboard
122 | h tier
123 | hi @Bla
124 | hi tirp
125 | hiu
126 | hi u
127 | hi v
128 | ho ho ho
129 | honkeytonk
130 | honkey tonk
131 | honkey-tonk
132 | hugger s
133 | hurt, its
134 | i am a jew
135 | i am gay
136 | i'd like
137 | im a jew
138 | im gay
139 | i'm a jew
140 | i'm gay
141 | i get lost
142 | ikr
143 | imho
144 | in 199
145 | in june
146 | irl
147 | isn't it
148 | it, its
149 | it's a hole
150 | it's ex
151 | jarse
152 | a jewish
153 | edit jewish
154 | have jewish
155 | gee you
156 | g hey
157 | girl jewish
158 | guy jewish
159 | my jewish
160 | is jewish
161 | or jewish
162 | to jewish
163 | jeff
164 | jewish ancestry
165 | jewish background
166 | jewish family
167 | jewish heritage
168 | jewish historian
169 | jewish history
170 | jewish or
171 | jewish scholar
172 | jewish terms
173 | katyusha the
174 | kian
175 | kill ike
176 | killian
177 | kshatr
178 | last? it
179 | left it
180 | little hovercraft
181 | lmao
182 | lol
183 | magnacumlaude
184 | maine coon
185 | make a hole
186 | s expired
187 | minigame
188 | mini game
189 | n't eat
190 | negativly
191 | ngad
192 | ngay bay
193 | nigth
194 | of agitation
195 | omg
196 | opps
197 | outgaminged
198 | pc master race
199 | pegging the
200 | plss
201 | plsss
202 | plz stop
203 | plzz
204 | plzzz
205 | pocock
206 | poof. you
207 | porsche 911
208 | poggers
209 | pp. 0
210 | pp. 1
211 | pp. 2
212 | pp. 3
213 | pp. 4
214 | pp. 5
215 | pp. 6
216 | pp. 7
217 | pp. 8
218 | pp. 9
219 | pussinboots
220 | puss in boots
221 | ref'd
222 | refresh at
223 | rip
224 | s a distance
225 | saturated fat
226 | see me now
227 | shoehorn your
228 | shouldn't it
229 | since 1
230 | since l
231 | speed*
232 | sperm whale
233 | spick and span
234 | splix
235 | strape
236 | suicide burn
237 | suicide squad
238 | superbowlxxx
239 | tally ho
240 | tally-ho
241 | tea the
242 | test test test
243 | then i guess
244 | then talk
245 | then, talk
246 | thoes
247 | thx
248 | that's a hole
249 | til it
250 | til its
251 | til it's
252 | tit 4 tat
253 | tit for tat
254 | titch
255 | title section
256 | tito
257 | to heli
258 | too heli
259 | to helicopter
260 | too helicopter
261 | to helios
262 | to hello
263 | trans fat
264 | tldr
265 | ttyl
266 | tyson gay
267 | vibrators
268 | virgin atlantic
269 | virgin group
270 | virgin islands
271 | wassup
272 | wasn't it
273 | wish i t
274 | wouldn't it
275 | xD i do
276 | xp or no
277 | yass
278 | yesturday
279 | zenga
280 | zubr east
281 | zuck
282 | 


--------------------------------------------------------------------------------
/src/false_positive_finder.rs:
--------------------------------------------------------------------------------
  1 | use indicatif::ProgressBar;
  2 | use lazy_static::lazy_static;
  3 | use rayon::iter::IntoParallelRefIterator;
  4 | use rayon::iter::ParallelIterator;
  5 | use regex::Regex;
  6 | use rustrict::{Censor, Type};
  7 | use std::collections::HashSet;
  8 | use std::fs;
  9 | use std::sync::Mutex;
 10 | 
 11 | lazy_static! {
 12 |     static ref DICTIONARY: HashSet<&'static str> = include_str!("dictionary.txt")
 13 |         .lines()
 14 |         .chain(include_str!("dictionary_extra.txt").split('\n'))
 15 |         .chain(include_str!("dictionary_common.txt").lines())
 16 |         .chain(include_str!("dictionary_common_valid_short.txt").lines())
 17 |         .filter(|&word| !word.is_empty() && !is_blacklisted(word))
 18 |         .collect();
 19 |     static ref VALID_SHORT: HashSet<&'static str> =
 20 |         include_str!("dictionary_common_valid_short.txt")
 21 |             .lines()
 22 |             .filter(|l| !l.is_empty())
 23 |             .collect();
 24 |     static ref CONCAT_DICTIONARY: HashSet<&'static str> = include_str!("dictionary_common.txt")
 25 |         .lines()
 26 |         .chain(include_str!("dictionary_common_valid_short.txt")
 27 |         .lines())
 28 |         .filter(|&w| {
 29 |             let long_enough = w.len() > 3 || VALID_SHORT.contains(w);
 30 |             let allowed = !is_blacklisted(w);
 31 |             long_enough
 32 |                 && allowed
 33 |         })
 34 |         .collect();
 35 |     static ref PROFANITY: Vec<&'static str> = include_str!("profanity.csv")
 36 |         .lines()
 37 |         .skip(1)
 38 |         .map(|l| &l[..l.find(',').unwrap()])
 39 |         .collect();
 40 |     static ref BLACKLIST: Vec<Regex> = include_str!("profanity.csv")
 41 |         .lines()
 42 |         .skip(1)
 43 |         // must trim starting spaces, as they don't count when comparing to blacklist.
 44 |         .map(|l| l[..l.find(',').expect(l)].trim_start_matches(' '))
 45 |         .map(|w| Regex::new(&regex::escape(w)).unwrap())
 46 |         .chain(
 47 |             include_str!("dictionary_blacklist.txt")
 48 |                 .split("\n")
 49 |                 .filter(|l| !l.is_empty())
 50 |                 .map(|l| Regex::new(l).unwrap())
 51 |         )
 52 |         .collect();
 53 | }
 54 | 
 55 | pub fn is_ignore_fp<C: Iterator<Item = char>>(text: C, start_separate: bool) -> (usize, usize) {
 56 |     let mut censor = Censor::new(text);
 57 |     censor.with_ignore_false_positives(true);
 58 |     censor.with_separate(start_separate);
 59 | 
 60 |     if censor
 61 |         .analyze()
 62 |         .is(Type::PROFANE | Type::OFFENSIVE | Type::SEXUAL | Type::MEAN)
 63 |     {
 64 |         (censor.total_match_characters().max(1), censor.match_ptrs())
 65 |     } else {
 66 |         (0, 0)
 67 |     }
 68 | }
 69 | 
 70 | fn maybe_false_positive<C: Iterator<Item = char> + Clone>(
 71 |     word: C,
 72 |     baseline_match_ptr: usize,
 73 | ) -> Option<String> {
 74 |     let (baseline, baseline_first_match_ptr) = is_ignore_fp(word.clone(), true);
 75 |     if baseline > 0 && baseline_first_match_ptr != baseline_match_ptr {
 76 |         let word: String = word.collect();
 77 |         let word = &word[..];
 78 | 
 79 |         if is_blacklisted(word) {
 80 |             return None;
 81 |         }
 82 | 
 83 |         let mut shortest_subslice = word;
 84 |         // TODO: Cannot always remove prefix, because false positive wont take effect if starts
 85 |         // after the profanity in question. For example, "to helicopter" -> "heli"
 86 |         for len in 1..word.len() {
 87 |             // break
 88 |             // len = 2
 89 |             // word.len() - len = 3
 90 |             // br    start = 0
 91 |             //  re   start = 1
 92 |             //   ea  start = 2
 93 |             //    ak start = 3 end = 5
 94 |             for start in 0..=word.len() - len {
 95 |                 let end = start + len;
 96 |                 let sub_slice = &word[start..end];
 97 | 
 98 |                 if sub_slice.len() >= shortest_subslice.len() {
 99 |                     continue;
100 |                 }
101 | 
102 |                 let valid = sub_slice.split(' ').all(|w| DICTIONARY.contains(w));
103 | 
104 |                 if !valid {
105 |                     continue;
106 |                 }
107 | 
108 |                 let (subslice_matches, first_match_ptr) = is_ignore_fp(
109 |                     sub_slice.chars(),
110 |                     start == 0 || word.as_bytes()[start - 1] == b' ',
111 |                 );
112 |                 if subslice_matches >= baseline
113 |                     && first_match_ptr == baseline_first_match_ptr
114 |                     && !is_blacklisted(sub_slice)
115 |                 {
116 |                     shortest_subslice = sub_slice;
117 |                 }
118 |             }
119 |         }
120 |         return Some(String::from(shortest_subslice));
121 |     }
122 |     None
123 | }
124 | 
125 | fn main() {
126 |     for word in DICTIONARY.iter() {
127 |         if is_sus(word) {
128 |             println!("\"{}\" is suspiciously like a profanity", word);
129 |         }
130 |     }
131 | 
132 |     let progress = ProgressBar::new(DICTIONARY.len() as u64);
133 | 
134 |     let false_positives: HashSet<String> = DICTIONARY
135 |         .par_iter()
136 |         .filter_map(|&word| {
137 |             progress.inc(1);
138 |             maybe_false_positive(word.chars(), 0)
139 |         })
140 |         .collect();
141 | 
142 |     progress.finish();
143 | 
144 |     let progress = ProgressBar::new(CONCAT_DICTIONARY.len() as u64);
145 |     progress.eta();
146 | 
147 |     let false_positives = Mutex::new(false_positives);
148 | 
149 |     CONCAT_DICTIONARY.par_iter().for_each(|word1| {
150 |         let word1_ptr = is_ignore_fp(word1.chars(), true).1;
151 |         for word2 in CONCAT_DICTIONARY.iter() {
152 |             let word2_ptr = is_ignore_fp(word2.chars(), true).1;
153 |             if let Some(false_positive) = maybe_false_positive(
154 |                 word1
155 |                     .chars()
156 |                     .chain(std::iter::once(' '))
157 |                     .chain(word2.chars()),
158 |                 word1_ptr ^ word2_ptr,
159 |             ) {
160 |                 //println!("fp: {}", false_positive);
161 |                 false_positives.lock().unwrap().insert(false_positive);
162 |             }
163 |         }
164 |         progress.inc(1);
165 |     });
166 | 
167 |     progress.finish();
168 | 
169 |     let mut false_positives = false_positives.into_inner().unwrap();
170 | 
171 |     let clone = false_positives.clone();
172 |     false_positives.retain(|false_positive| {
173 |         let baseline = is_ignore_fp(false_positive.chars(), true);
174 |         for clone in &clone {
175 |             if false_positive.len() != clone.len()
176 |                 && (false_positive.starts_with(clone) || false_positive.ends_with(clone))
177 |             {
178 |                 let shorter = is_ignore_fp(clone.chars(), true);
179 |                 if baseline == shorter {
180 |                     println!("filter out {false_positive} in favor of {clone}");
181 |                     return false;
182 |                 }
183 |             }
184 |         }
185 |         true
186 |     });
187 | 
188 |     let mut sorted: Vec<_> = false_positives.into_iter().collect();
189 |     sorted.sort();
190 | 
191 |     fs::write("src/false_positives.txt", sorted.join("\n")).unwrap();
192 | 
193 |     //println!("{:?}", sorted);
194 | }
195 | 
196 | fn is_blacklisted(phrase: &str) -> bool {
197 |     BLACKLIST.iter().any(|p| {
198 |         p.find(phrase)
199 |             .map(|m| m.start() == 0 && m.end() == phrase.len())
200 |             .unwrap_or(false)
201 |     })
202 | }
203 | 
204 | #[allow(dead_code)]
205 | fn is_sus(phrase: &str) -> bool {
206 |     let trimmed = phrase.trim_end_matches('s');
207 |     PROFANITY.iter().any(|&p| p == trimmed)
208 | }
209 | 


--------------------------------------------------------------------------------
/src/feature_cell.rs:
--------------------------------------------------------------------------------
 1 | #[cfg(feature = "customize")]
 2 | use std::cell::UnsafeCell;
 3 | use std::ops::Deref;
 4 | 
 5 | /// Allows (unsafe) mutation if the "customize" feature is enabled. In this case, mutation must
 6 | /// not be concurrent. Otherwise, 100% safe.
 7 | pub(crate) struct FeatureCell<T> {
 8 |     #[cfg(feature = "customize")]
 9 |     inner: UnsafeCell<T>,
10 |     #[cfg(not(feature = "customize"))]
11 |     inner: T,
12 | }
13 | 
14 | impl<T> FeatureCell<T> {
15 |     pub fn new(val: T) -> Self {
16 |         Self {
17 |             #[cfg(feature = "customize")]
18 |             inner: UnsafeCell::new(val),
19 |             #[cfg(not(feature = "customize"))]
20 |             inner: val,
21 |         }
22 |     }
23 | 
24 |     /// SAFETY: Caller must avoid concurrent access, in accordance with documentation.
25 |     #[cfg(feature = "customize")]
26 |     pub unsafe fn get_mut(&self) -> &mut T {
27 |         &mut *self.inner.get()
28 |     }
29 | }
30 | 
31 | impl<T> Deref for FeatureCell<T> {
32 |     type Target = T;
33 | 
34 |     fn deref(&self) -> &Self::Target {
35 |         #[cfg(not(feature = "customize"))]
36 |         return &self.inner;
37 |         // SAFETY: User must avoid concurrent access, in accordance with documentation.
38 |         #[cfg(feature = "customize")]
39 |         unsafe {
40 |             &*self.inner.get()
41 |         }
42 |     }
43 | }
44 | 
45 | // SAFETY: User must avoid concurrent access, in accordance with documentation.
46 | #[cfg(feature = "customize")]
47 | unsafe impl<T> Send for FeatureCell<T> {}
48 | unsafe impl<T> Sync for FeatureCell<T> {}
49 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | #![cfg_attr(test, feature(test))]
  2 | #![cfg_attr(doc, feature(doc_cfg))]
  3 | 
  4 | #[cfg(feature = "censor")]
  5 | pub(crate) mod banned;
  6 | #[cfg(feature = "censor")]
  7 | pub(crate) mod buffer_proxy_iterator;
  8 | #[cfg(feature = "censor")]
  9 | pub(crate) mod censor;
 10 | #[cfg(feature = "censor")]
 11 | pub(crate) mod feature_cell;
 12 | #[cfg(feature = "censor")]
 13 | pub(crate) mod mtch;
 14 | #[cfg(feature = "censor")]
 15 | pub(crate) mod replacements;
 16 | #[cfg(feature = "censor")]
 17 | pub(crate) mod trie;
 18 | #[cfg(feature = "censor")]
 19 | pub(crate) mod typ;
 20 | 
 21 | #[cfg(feature = "context")]
 22 | pub(crate) mod context;
 23 | 
 24 | #[cfg(feature = "pii")]
 25 | mod pii;
 26 | #[cfg(feature = "width")]
 27 | pub(crate) mod width;
 28 | 
 29 | #[cfg(feature = "censor")]
 30 | pub use banned::Banned;
 31 | #[cfg(feature = "censor")]
 32 | pub use replacements::Replacements;
 33 | #[cfg(feature = "censor")]
 34 | pub use trie::Trie;
 35 | 
 36 | #[cfg(feature = "width")]
 37 | pub use width::{trim_to_width, width, width_str, width_str_max_unbroken, WordBreak};
 38 | 
 39 | #[cfg(feature = "censor")]
 40 | pub use typ::Type;
 41 | 
 42 | #[cfg(feature = "censor")]
 43 | pub use censor::{Censor, CensorIter, CensorStr};
 44 | 
 45 | // Facilitate experimentation with different hash collections.
 46 | #[cfg(feature = "censor")]
 47 | pub(crate) type Map<K, V> = rustc_hash::FxHashMap<K, V>;
 48 | 
 49 | #[cfg(feature = "censor")]
 50 | pub(crate) type Set<V> = rustc_hash::FxHashSet<V>;
 51 | 
 52 | #[cfg(feature = "customize")]
 53 | #[allow(deprecated)]
 54 | pub use censor::add_word;
 55 | 
 56 | #[cfg(all(feature = "context", feature = "width"))]
 57 | pub use context::ContextWordBreakOptions;
 58 | #[cfg(feature = "context")]
 59 | pub use context::{
 60 |     BlockReason, Context, ContextProcessingOptions, ContextRateLimitOptions,
 61 |     ContextRepetitionLimitOptions,
 62 | };
 63 | 
 64 | #[cfg(feature = "pii")]
 65 | pub use pii::censor_and_analyze_pii;
 66 | 
 67 | /// Trims whitespace characters from both ends of a string, according to the definition of
 68 | /// `crate::is_whitespace`.
 69 | pub fn trim_whitespace(s: &str) -> &str {
 70 |     // Some characters are effectively whitespace if they are at the beginning of a string.
 71 |     // https://www.compart.com/en/unicode/U+0488
 72 |     // https://www.compart.com/en/unicode/U+0489
 73 |     s.trim_start_matches(|c| is_whitespace(c) || matches!(c, '\u{0488}' | '\u{0489}'))
 74 |         .trim_end_matches(is_whitespace)
 75 | }
 76 | 
 77 | /// Returns true iff the character is effectively whitespace. The definition of whitespace is broader
 78 | /// than that of Unicode, because it includes control characters and a few additional blank characters.
 79 | pub fn is_whitespace(c: char) -> bool {
 80 |     use finl_unicode::categories::CharacterCategories;
 81 | 
 82 |     // NOTE: The following characters are not detected by standard means but show up as blank.
 83 | 
 84 |     // https://www.compart.com/en/unicode/U+115F
 85 |     // https://www.compart.com/en/unicode/U+1160
 86 |     // https://www.compart.com/en/unicode/U+2800
 87 |     // https://www.compart.com/en/unicode/U+3164
 88 |     // https://www.compart.com/en/unicode/U+FFA0
 89 |     c.is_whitespace()
 90 |         || c.is_other()
 91 |         || c.is_format()
 92 |         || matches!(
 93 |             c,
 94 |             '\u{115F}' | '\u{1160}' | '\u{2800}' | '\u{3164}' | '\u{FFA0}' | '\u{FFFC}'
 95 |         )
 96 | }
 97 | 
 98 | #[cfg(test)]
 99 | mod tests {
100 |     #![allow(unused_imports)]
101 |     extern crate test;
102 | 
103 |     #[test]
104 |     fn trim_whitespace() {
105 |         // General.
106 |         assert_eq!(crate::trim_whitespace("\u{0020}\u{00A0}\u{2000}\u{2001}\u{2002}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{200B}\u{200C}\u{200D}\u{2028}\u{205F}\u{3000}"), "");
107 | 
108 |         // Extra cases.
109 |         assert_eq!(
110 |             crate::trim_whitespace(" \u{1160} \u{2800} abc \u{3164} \u{FFA0} \t \u{115F} \n "),
111 |             "abc"
112 |         );
113 | 
114 |         // Special cases.
115 |         assert_eq!(
116 |             crate::trim_whitespace(
117 |                 "\u{FFF9}\u{FFFA}\u{FFFB}\u{FFFC}\u{0488}\u{1160}\u{0489}\u{1160}\u{0488}\u{1160}\u{0489}abc\u{0488}\u{0489}"
118 |             ),
119 |             "abc\u{0488}\u{0489}"
120 |         )
121 |     }
122 | 
123 |     #[test]
124 |     fn is_whitespace() {
125 |         assert!(crate::is_whitespace(' '));
126 |         assert!(crate::is_whitespace('\u{2800}'));
127 |         assert!(!crate::is_whitespace('a'));
128 |     }
129 | }
130 | 
131 | doc_comment::doctest!("../README.md");
132 | 


--------------------------------------------------------------------------------
/src/mtch.rs:
--------------------------------------------------------------------------------
  1 | use crate::buffer_proxy_iterator::BufferProxyIterator;
  2 | use crate::trie::Node;
  3 | use crate::Type;
  4 | use std::hash::{Hash, Hasher};
  5 | 
  6 | #[derive(Clone)]
  7 | pub(crate) struct Match {
  8 |     /// The word being matched.
  9 |     pub node: &'static Node,
 10 |     /// Stores the index in the string when this match was created.
 11 |     pub start: usize,
 12 |     // Stores the index in the string when this match was completed.
 13 |     pub end: usize,
 14 |     /// Stores the last matched character.
 15 |     pub last: char,
 16 |     /// Whether the match was preceded by a separator.
 17 |     pub begin_separate: bool,
 18 |     /// Whether the match was followed by a separator.
 19 |     pub end_separate: bool,
 20 |     /// Stores how many spaces appeared within the match, excluding spaces that directly correspond to the pattern.
 21 |     pub spaces: u8,
 22 |     /// Stores how many characters were skipped.
 23 |     pub skipped: u8,
 24 |     /// Stores how many replacements took place while matching.
 25 |     pub replacements: u8,
 26 |     /// Stores how many extra repretitions took place while matching.
 27 |     pub repetitions: u8,
 28 |     /// Stores how many low-confidence replacements took place while matching.
 29 |     pub low_confidence_replacements: u8,
 30 | }
 31 | 
 32 | impl Match {
 33 |     /// Combines in a way that the order of matches doesn't matter.
 34 |     pub(crate) fn combine(&self, other: &Self) -> Self {
 35 |         Self {
 36 |             start: self.start.min(other.start),
 37 |             spaces: self.spaces.min(other.spaces),
 38 |             skipped: self.skipped.min(other.skipped),
 39 |             replacements: self.replacements.min(other.replacements),
 40 |             low_confidence_replacements: self
 41 |                 .low_confidence_replacements
 42 |                 .min(other.low_confidence_replacements),
 43 |             repetitions: self.repetitions.min(other.repetitions),
 44 |             last: self.last.min(other.last),
 45 |             ..*self
 46 |         }
 47 |     }
 48 | 
 49 |     fn confidence(&self) -> i64 {
 50 |         let mut confidence: i64 = 0;
 51 |         confidence += self.node.depth.max(1).ilog2() as i64;
 52 |         confidence += (self.end - self.start).max(1).ilog2() as i64;
 53 |         if self.node.depth == 1 {
 54 |             confidence += 1;
 55 |         } else {
 56 |             if !self.begin_separate {
 57 |                 confidence -= 2;
 58 |                 if self.node.contains_space {
 59 |                     confidence -= 3;
 60 |                 }
 61 |             }
 62 |             if !self.end_separate {
 63 |                 confidence -= 1;
 64 |             }
 65 |             if !self.begin_separate && !self.end_separate {
 66 |                 confidence -= 1;
 67 |             }
 68 |         }
 69 |         if self.node.typ.is(Type::SEVERE) {
 70 |             confidence += 3;
 71 |         } else if self.node.typ.is(Type::MODERATE_OR_HIGHER)
 72 |             && (self.node.depth == 1 || self.node.typ.isnt(Type::EVASIVE & Type::SEVERE))
 73 |         {
 74 |             confidence += 2
 75 |         } else if self.node.typ.is(Type::MILD_OR_HIGHER)
 76 |             && (self.node.depth == 1
 77 |                 || self.node.typ.isnt(Type::EVASIVE & Type::MODERATE_OR_HIGHER))
 78 |         {
 79 |             confidence += 1;
 80 |         };
 81 |         confidence -= (self.skipped as u16 + self.spaces as u16 + self.replacements as u16 + 1)
 82 |             .ilog2() as i64;
 83 |         confidence -= (self.low_confidence_replacements + 1).ilog2() as i64;
 84 |         if self.node.depth == 2 && self.low_confidence_replacements > 0 {
 85 |             // h8
 86 |             confidence -= 2;
 87 |         }
 88 |         if self.node.typ.is(Type::EVASIVE & Type::SEVERE) {
 89 |             confidence -= 3;
 90 |         } else if self.node.typ.is(Type::EVASIVE & Type::MODERATE_OR_HIGHER) {
 91 |             confidence -= 2;
 92 |         } else if self.node.typ.is(Type::EVASIVE & Type::MILD) {
 93 |             confidence -= 1;
 94 |         }
 95 |         confidence
 96 |     }
 97 | 
 98 |     /// Returns whether committed.
 99 |     pub(crate) fn commit<I: Iterator<Item = char>>(
100 |         &self,
101 |         typ: &mut Type,
102 |         spy: &mut BufferProxyIterator<I>,
103 |         censor_threshold: Type,
104 |         censor_first_character_threshold: Type,
105 |         censor_replacement: char,
106 |     ) -> bool {
107 |         #[cfg(feature = "trace")]
108 |         print!(
109 |             "Committing {} with begin_separate={}, spaces={}, skipped={}, end_separate={}, depth={}, replacements={}, lcr={}, contains_space={}: ",
110 |             self.node.trace,
111 |             self.begin_separate,
112 |             self.spaces,
113 |             self.skipped,
114 |             self.end_separate,
115 |             self.node.depth,
116 |             self.replacements,
117 |             self.low_confidence_replacements,
118 |             self.node.contains_space
119 |         );
120 | 
121 |         let confidence = self.confidence();
122 | 
123 |         if confidence <= 0 {
124 |             #[cfg(feature = "trace")]
125 |             println!("rejected with confidence {confidence}");
126 |             return false;
127 |         }
128 |         #[cfg(feature = "trace")]
129 |         println!("accepted with confidence {confidence}");
130 | 
131 |         /*
132 |         let too_many_replacements = !(self.begin_separate
133 |             && (self.end_separate
134 |                 || (self.spaces == 0
135 |                     && self.node.depth > 2
136 |                     && self.node.typ.is(Type::MODERATE_OR_HIGHER))))
137 |             && self.node.depth > 1
138 |             // In theory, prevents blahsex, but allows blahsexblah.
139 |             && (!(self.end_separate || self.begin_separate) || self.node.depth < 3 || self.spaces.max(self.skipped).max(self.replacements) > 0 || self.node.typ.isnt(Type::MODERATE_OR_HIGHER))
140 |             && self.spaces.max(self.skipped).max(self.replacements) as usize + 4 > self.node.depth as usize;
141 | 
142 |         let low_confidence_replacements = self.low_confidence_replacements > 0
143 |             && (self.low_confidence_replacements as usize
144 |                 > (self.end - self.start).saturating_sub(1) || self.low_confidence_replacements as usize > (self.end - self.start).max(10) / 5 || self.node.depth < 3)
145 |             && self.node.depth > 1;
146 | 
147 |         let low_confidence_short = self.replacements >= self.node.depth
148 |             && self.node.depth <= 3
149 |             && !self.node.typ.is(Type::SEVERE);
150 | 
151 |         // Make it so "squirrels word" doesn't contain "s word"
152 |         let low_confidence_special = self.node.contains_space && !self.begin_separate;
153 | 
154 |         if too_many_replacements
155 |             || low_confidence_replacements
156 |             || low_confidence_short
157 |             || low_confidence_special
158 |         {
159 |             // Match isn't strong enough.
160 |             #[cfg(feature = "trace")]
161 |             println!(
162 |                 "(rejected: {} {} {} {})",
163 |                 too_many_replacements,
164 |                 low_confidence_replacements,
165 |                 low_confidence_short,
166 |                 low_confidence_special
167 |             );
168 |             return false;
169 |         }
170 |         */
171 | 
172 |         // Apply detection.
173 |         *typ |= self.node.typ
174 |             | if self.replacements >= 2 {
175 |                 Type::EVASIVE & Type::MILD
176 |             } else {
177 |                 Type::NONE
178 |             };
179 | 
180 |         // Decide whether to censor.
181 |         if self.node.typ.is(censor_threshold) {
182 |             // Decide whether to censor the first character.
183 |             let offset =
184 |                 if self.node.typ.is(censor_first_character_threshold) || self.node.depth == 1 {
185 |                     0
186 |                 } else {
187 |                     1
188 |                 };
189 |             spy.censor(self.start + offset..=self.end, censor_replacement);
190 |         }
191 | 
192 |         true
193 |     }
194 | }
195 | 
196 | impl PartialEq for Match {
197 |     fn eq(&self, other: &Self) -> bool {
198 |         std::ptr::eq(self.node, other.node) && self.begin_separate == other.begin_separate
199 |     }
200 | }
201 | 
202 | impl Eq for Match {}
203 | 
204 | impl Hash for Match {
205 |     fn hash<H: Hasher>(&self, state: &mut H) {
206 |         state.write_usize(self.node as *const _ as usize);
207 |         state.write_u8(self.begin_separate as u8);
208 |     }
209 | }
210 | 


--------------------------------------------------------------------------------
/src/pii.rs:
--------------------------------------------------------------------------------
  1 | use lazy_static::lazy_static;
  2 | use regex::Regex;
  3 | use std::borrow::Cow;
  4 | 
  5 | lazy_static! {
  6 |     static ref PHONE : Regex = Regex::new(r#"(\+\d{1,2})?\s*\(?\d{3}\)?[\s\.-]*\d{3}[\s\.-]*\d{4}"#).unwrap();
  7 |     static ref IP_ADDRESS : Regex  = Regex::new(r#"(?:[0-9]{1,3}\.){3}[0-9]{1,3}"#).unwrap();
  8 |     static ref EMAIL_ADDRESS : Regex = Regex::new(r#"(?i)[a-z0-9_\-]{3,}\s*(@|[\[\(\s]at[\s\)\]])\s*[a-z0-9_\-]{5,}\s*(\.|dot)\s*(com|net|org|gov|biz|co|us|ru|uk|de|se|to|tv|io|info|online|site)"#).unwrap();
  9 |     //static ref ADDRESS : Regex = Regex::new(r#"(?i)\d+[ ](?:[A-Za-z0-9\.-]+ )+(?:Avenue|Lane|Road|Boulevard|Drive|Street|Ave|Dr|Rd|Blvd|Ln|St)\.?(\s+#[0-9]{1,5})?"#).unwrap();
 10 |     static ref NAME : Regex = Regex::new(r#"(?i)(real\s)?name\s+is:?\s[a-zA-Z]+(\s[a-zA-z]+)?"#).unwrap();
 11 |     static ref URL : Regex = Regex::new(r#"(?i)(https?:?/*)?[a-zA-Z0-9]{3,}\.(com|net|org|gov|biz|co|us|ru|uk|de|se|to|tv|io|info|online|site|link)"#).unwrap();
 12 | }
 13 | 
 14 | /// Returns [`s`] with personally-identifiable information censored out, and a `true` if
 15 | /// anything was censored.
 16 | ///  - phone numbers
 17 | ///  - physical addresses (disabled for now, due to excessive false positives)
 18 | ///  - ip addresses
 19 | ///  - email addresses
 20 | ///  - self-described full names
 21 | ///  - urls
 22 | pub fn censor_and_analyze_pii(s: &str) -> (String, bool) {
 23 |     let ret = Cow::Borrowed(s);
 24 |     let mut censored = false;
 25 |     let ret = PHONE.replace_all(&ret, "***-****-****");
 26 |     censored |= matches!(ret, Cow::Owned(_));
 27 |     let ret = IP_ADDRESS.replace_all(&ret, "***.***.***.***");
 28 |     censored |= matches!(ret, Cow::Owned(_));
 29 |     let ret = EMAIL_ADDRESS.replace_all(&ret, "****@*****.***");
 30 |     censored |= matches!(ret, Cow::Owned(_));
 31 |     // too many false positives
 32 |     //let ret = ADDRESS.replace_all(&ret, "***** **** Ave #***");
 33 |     //censored |= matches!(ret, Cow::Owned(_));
 34 |     let ret = NAME.replace_all(&ret, "name is ***** *****");
 35 |     censored |= matches!(ret, Cow::Owned(_));
 36 |     let ret = URL.replace_all(&ret, "******.***");
 37 |     censored |= matches!(ret, Cow::Owned(_));
 38 |     (ret.into_owned(), censored)
 39 | }
 40 | 
 41 | #[cfg(test)]
 42 | mod tests {
 43 |     use super::censor_and_analyze_pii;
 44 | 
 45 |     fn censor_pii(s: &str) -> String {
 46 |         censor_and_analyze_pii(s).0
 47 |     }
 48 | 
 49 |     fn has_pii(s: &str) -> bool {
 50 |         censor_and_analyze_pii(s).1
 51 |     }
 52 | 
 53 |     #[test]
 54 |     fn pii() {
 55 |         /*
 56 |         12345 SW 54th ST #150
 57 |         go to 1234 Main Street for free candy
 58 |         */
 59 |         let pii = r#"
 60 |             hello@gmail.com
 61 |             hello f00 @ gmail.com
 62 |             sus@yahoo.biz sus
 63 |             foo[at]yahoo.com
 64 |             foo  [at]  yahoo  dot  com
 65 |             foo at yahoo dot com
 66 |             foo @ twitch.tv
 67 |             foo AT twitch.tv
 68 |             1234567890
 69 |             (123)4567890
 70 |             +1 1234567890
 71 |             +1 (123) 4567890
 72 |             +12  (123)  456  7890
 73 |             +1 (123) 456-7890
 74 |             +1 123-456-7890
 75 |             +1 123.456.7890
 76 |             123.123.123.123
 77 |             8.8.8.8
 78 |             999.999.999.999
 79 |             my name is: ALEX Smith
 80 |             my real name is Alex smith
 81 |             his name is alex smith
 82 |             her real name is alex Smith
 83 |             my name is alex. smith
 84 |             hello.com
 85 |             http://hello.com
 86 |             https://foooo.com
 87 |             barrr.com
 88 |             example.org
 89 |             twitch.tv
 90 |             http:/chat.dev
 91 |             https://w2g.tv/?r=
 92 |         "#;
 93 |         for line in pii.lines() {
 94 |             if line.trim().is_empty() {
 95 |                 continue;
 96 |             }
 97 |             assert!(has_pii(line), "{line}");
 98 |         }
 99 |         println!("{}", censor_pii(pii));
100 |     }
101 | 
102 |     #[test]
103 |     fn not_pii() {
104 |         for line in include_str!("./safe.txt")
105 |             .lines()
106 |             .chain(include_str!("./false_positives.txt").lines())
107 |             .chain(
108 |                 r#"1234 Have 1234
109 |             gmail.zzz"#
110 |                     .lines(),
111 |             )
112 |         {
113 |             assert!(!has_pii(line), "{line}");
114 |         }
115 |         assert!(!has_pii("123 i have 4"));
116 |     }
117 | 
118 |     #[test]
119 |     fn censor_pii_test() {
120 |         assert_eq!(
121 |             censor_pii("mail me at foo@barrr.com, bye"),
122 |             "mail me at ****@*****.***, bye"
123 |         );
124 |     }
125 | }
126 | 


--------------------------------------------------------------------------------
/src/replacement_finder.rs:
--------------------------------------------------------------------------------
  1 | use csv::Writer;
  2 | use std::collections::{BTreeMap, BTreeSet};
  3 | 
  4 | fn main() {
  5 |     let mut replacements: BTreeMap<char, BTreeSet<char>> = BTreeMap::new();
  6 | 
  7 |     let mut append_replacement = |(k, v): (char, String)| {
  8 |         replacements
  9 |             .entry(k)
 10 |             .or_insert_with(BTreeSet::new)
 11 |             .extend(v.chars())
 12 |     };
 13 | 
 14 |     // Unicode confusables
 15 |     include_str!("unicode_confusables.txt")
 16 |         .lines()
 17 |         .filter(|line| !line.is_empty() && !line.starts_with('#'))
 18 |         .filter_map(|line| {
 19 |             let mut segments = line.split(';');
 20 |             segments
 21 |                 .next()
 22 |                 .zip(segments.next())
 23 |                 .and_then(|(find, replace)| {
 24 |                     let find_char = u32::from_str_radix(find.trim(), 16)
 25 |                         .ok()
 26 |                         .and_then(char::from_u32);
 27 |                     let replace_char = u32::from_str_radix(replace.trim(), 16)
 28 |                         .ok()
 29 |                         .and_then(char::from_u32)
 30 |                         .map(|c| c.to_ascii_lowercase());
 31 | 
 32 |                     find_char.zip(replace_char).and_then(|(find, replace)| {
 33 |                         if replace.is_digit(36) {
 34 |                             println!("{find} -> {replace}");
 35 |                             let mut replace = replace.to_string();
 36 |                             replace.push(find);
 37 |                             Some((find, replace))
 38 |                         } else if find.is_digit(36) {
 39 |                             panic!("reversed!");
 40 |                             //println!("{replace} -> {find} (REV)");
 41 |                             //Some((replace, find.to_string()))
 42 |                         } else {
 43 |                             None
 44 |                         }
 45 |                     })
 46 |                 })
 47 |         })
 48 |         .for_each(&mut append_replacement);
 49 | 
 50 |     include_str!("unicode_fonts.txt")
 51 |         .lines()
 52 |         .filter(|line| !line.is_empty())
 53 |         .for_each(|line| {
 54 |             let chars: Vec<_> = line.chars().collect();
 55 | 
 56 |             if chars.len() != 26 {
 57 |                 panic!("alphabet doesn't have 26 chars: {}", line);
 58 |             }
 59 | 
 60 |             for (i, c) in chars.into_iter().enumerate() {
 61 |                 if c.is_ascii() {
 62 |                     // not all fonts have all letters.
 63 |                     continue;
 64 |                 }
 65 | 
 66 |                 append_replacement((c, String::from_utf8(vec![b'a' + i as u8]).unwrap()))
 67 |             }
 68 |         });
 69 | 
 70 |     // Upper to lower case.
 71 |     (0..=0xFFFFFF)
 72 |         .filter_map(char::from_u32)
 73 |         .filter_map(|c| {
 74 |             let r = c.to_lowercase().next().unwrap();
 75 |             if r != c {
 76 |                 Some((c, r.to_string()))
 77 |             } else {
 78 |                 None
 79 |             }
 80 |         })
 81 |         .for_each(&mut append_replacement);
 82 | 
 83 |     // Extra confusables.
 84 |     include_str!("replacements_extra.csv")
 85 |         .split("\n")
 86 |         .enumerate()
 87 |         .filter(|(_, line)| !line.is_empty())
 88 |         .map(|(n, line)| {
 89 |             let comma = line.find(",").unwrap();
 90 |             let before_comma = &line[..comma];
 91 |             let c = if before_comma.chars().count() == 1 {
 92 |                 before_comma.chars().next().unwrap()
 93 |             } else {
 94 |                 let escape = before_comma
 95 |                     .strip_prefix("\\u{")
 96 |                     .expect(&format!("line {}", n + 1))
 97 |                     .strip_suffix("}")
 98 |                     .unwrap();
 99 |                 let escape_int = u32::from_str_radix(escape, 16).unwrap();
100 |                 // println!("ESCAPE: {escape} {escape_int}");
101 |                 char::from_u32(escape_int).unwrap()
102 |             };
103 | 
104 |             use finl_unicode::categories::CharacterCategories;
105 |             use unicode_normalization::UnicodeNormalization;
106 |             let c_string = String::from(c);
107 |             let c_string_2 = c_string
108 |                 .nfd()
109 |                 .filter(|c| !c.is_mark_nonspacing())
110 |                 .nfc()
111 |                 .collect::<String>();
112 | 
113 |             if c_string != c_string_2 {
114 |                 println!("Warning (Mn): {c_string} -> {c_string_2}");
115 |             }
116 |             assert_eq!(c_string_2.chars().count(), 1, "line {}", n + 1);
117 | 
118 |             (
119 |                 c_string_2.chars().next().unwrap(),
120 |                 String::from(&line[comma + 1..]),
121 |             )
122 |         })
123 |         .for_each(&mut append_replacement);
124 | 
125 |     let mut writer = Writer::from_path("src/replacements.csv").unwrap();
126 |     for (find, mut replace) in replacements {
127 |         // Keep original character accessible.
128 |         if find.is_ascii() {
129 |             replace.insert(find);
130 |         }
131 | 
132 |         for c in replace.clone() {
133 |             let lower = c.to_lowercase().next().unwrap();
134 |             if c.is_uppercase() && !replace.contains(&lower) {
135 |                 println!("WARNING: Replacing {find} with {replace:?}, so adding {lower}");
136 |                 replace.insert(lower);
137 |             }
138 |         }
139 | 
140 |         writer
141 |             .write_record(&[&find.to_string(), &replace.iter().collect()])
142 |             .unwrap();
143 |     }
144 |     writer.flush().unwrap();
145 | }
146 | 


--------------------------------------------------------------------------------
/src/replacements.rs:
--------------------------------------------------------------------------------
 1 | use crate::feature_cell::FeatureCell;
 2 | use crate::Map;
 3 | use arrayvec::ArrayString;
 4 | use lazy_static::lazy_static;
 5 | use std::collections::hash_map::Entry;
 6 | use std::ops::Deref;
 7 | 
 8 | lazy_static! {
 9 |     pub(crate) static ref REPLACEMENTS: FeatureCell<Replacements> = FeatureCell::new(Replacements(
10 |         include_str!("replacements.csv")
11 |             .lines()
12 |             .filter(|line| !line.is_empty())
13 |             .map(|line| {
14 |                 let comma = line.find(',').unwrap();
15 |                 (
16 |                     line[..comma].chars().next().unwrap(),
17 |                     ArrayString::from(&line[comma + 1..]).unwrap(),
18 |                 )
19 |             })
20 |             .collect()
21 |     ));
22 | }
23 | 
24 | /// Set of possible interpretations for an input character.
25 | ///
26 | /// For example, `A` can be replaced with `a` so the word `apple` matches `Apple`.
27 | #[derive(Clone, Debug)]
28 | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
29 | pub struct Replacements(Map<char, ArrayString<12>>);
30 | 
31 | impl Default for Replacements {
32 |     fn default() -> Self {
33 |         REPLACEMENTS.deref().deref().clone()
34 |     }
35 | }
36 | 
37 | impl Replacements {
38 |     /// Empty.
39 |     pub fn new() -> Self {
40 |         Self(Default::default())
41 |     }
42 | 
43 |     /// Allows direct mutable access to the global default set of replacements.
44 |     ///
45 |     /// Prefer the safe API `Censor::with_replacements`.
46 |     ///
47 |     /// # Safety
48 |     ///
49 |     /// You must manually avoid concurrent access/censoring.
50 |     #[cfg(feature = "customize")]
51 |     #[cfg_attr(doc, doc(cfg(feature = "customize")))]
52 |     pub unsafe fn customize_default() -> &'static mut Self {
53 |         REPLACEMENTS.get_mut()
54 |     }
55 | 
56 |     pub(crate) fn get(&self, src: char) -> Option<&ArrayString<12>> {
57 |         self.0.get(&src)
58 |     }
59 | 
60 |     /// Adds a new replacement character.
61 |     ///
62 |     /// # Panics
63 |     ///
64 |     /// Panics if the total replacement characters exceed 12 bytes.
65 |     pub fn insert(&mut self, src: char, dst: char) {
66 |         let replacements = self.0.entry(src).or_default();
67 |         if !replacements.contains(dst) {
68 |             replacements.push(dst);
69 |         }
70 |     }
71 | 
72 |     /// Removes a replacement character.
73 |     pub fn remove(&mut self, src: char, dst: char) {
74 |         if let Entry::Occupied(mut occupied) = self.0.entry(src) {
75 |             let mut filtered = ArrayString::default();
76 |             for c in occupied.get().chars() {
77 |                 if c != dst {
78 |                     filtered.push(c);
79 |                 }
80 |             }
81 |             if filtered.is_empty() {
82 |                 occupied.remove();
83 |             } else {
84 |                 occupied.insert(filtered);
85 |             }
86 |         }
87 |     }
88 | }
89 | 


--------------------------------------------------------------------------------
/src/replacements_extra.csv:
--------------------------------------------------------------------------------
   1 | *,aeiou
   2 | #,a
   3 | 
   4 | &,a
   5 | u,v
   6 | U,V
   7 | v,u
   8 | V,U
   9 | ^,va
  10 | ^,u
  11 | j,i
  12 | J,i
  13 | l,i
  14 | L,i
  15 | i,l
  16 | k,c
  17 | K,c
  18 | I,Li
  19 | x,cieu
  20 | X,cieu
  21 | z,s
  22 | 
  23 | |,li
  24 | ¦,li
  25 | ‖,li
  26 | ｜,li
  27 | ∥,li
  28 | ǀ,li
  29 | ǁ,li
  30 | ∣,li
  31 | │,li
  32 | ।,li
  33 | ॥,li
  34 | [,li
  35 | ],li
  36 | (,li
  37 | ),li
  38 | {,li
  39 | },li
  40 | /,li
  41 | \,li
  42 | 
  43 | !,li
  44 | @,a
  45 | 4,a
  46 | 8,bHh
  47 | 6,bSs
  48 | (,c
  49 | <,c
  50 | 3,Beg
  51 | 9,gqp
  52 | #,h
  53 | |,li
  54 | 1,li
  55 | 0,o
  56 | 5,s
  57 | $,s
  58 | +,t
  59 | 7,t
  60 | 2,z
  61 | 
  62 | α,a
  63 | β,b
  64 | γ,y
  65 | ∆,a
  66 | δ,d
  67 | ε,Ee
  68 | ζ,z
  69 | η,hn
  70 | θ,o
  71 | ι,i
  72 | κ,k
  73 | λ,l
  74 | μ,mu
  75 | µ,mu
  76 | ν,nv
  77 | ο,o
  78 | ρ,p
  79 | ς,s
  80 | τ,t
  81 | υ,u
  82 | φ,p
  83 | χ,x
  84 | ψ,t
  85 | Ω,o
  86 | ω,w
  87 | 
  88 | ⊗,o
  89 | ⊕,o
  90 | σ,o
  91 | ∩,n
  92 | ∪,u
  93 | ⊂,c
  94 | ⊆,c
  95 | ⊄,c
  96 | ∈,e
  97 | ⊖,o
  98 | Ø,o
  99 | ∨,v
 100 | ∄,ab
 101 | ∫,l
 102 | И,n
 103 | 
 104 | ℂ,c
 105 | ¢,c
 106 | ℃,c
 107 | ℄,c
 108 | ℇ,e
 109 | ℉,f
 110 | ℊ,g
 111 | ℋ,h
 112 | ℌ,h
 113 | ℍ,h
 114 | ℎ,h
 115 | ℏ,h
 116 | ℐ,j
 117 | ℑ,j
 118 | ℒ,l
 119 | ℓ,l
 120 | ℔,b
 121 | ℕ,n
 122 | №,n
 123 | ℗,p
 124 | Ⓟ,p
 125 | ℘,p
 126 | ℙ,p
 127 | ℚ,q
 128 | ℛ,r
 129 | ℜ,r
 130 | ℝ,r
 131 | ℟,r
 132 | ȿ,s
 133 | ℣,v
 134 | ℤ,z
 135 | ℧,o
 136 | ℩,i
 137 | K,k
 138 | Å,a
 139 | ℬ,b
 140 | ℭ,c
 141 | ℮,e
 142 | ℰ,e
 143 | ℱ,f
 144 | ℳ,m
 145 | ℴ,o
 146 | ℵ,n
 147 | ℹ,i
 148 | ℺,o
 149 | ℼ,n
 150 | Ʉ,u
 151 | ℽ,v
 152 | ℿ,n
 153 | ⅀,e
 154 | ⅁,g
 155 | ⅄,l
 156 | ⅅ,d
 157 | ⅆ,d
 158 | ⅇ,e
 159 | ⅈ,i
 160 | ⅉ,ji
 161 | ⓢ,s
 162 | ⓨ,y
 163 | 
 164 | е,e
 165 | о,o
 166 | ѕ,s
 167 | х,x
 168 | і,i
 169 | ј,j
 170 | р,p
 171 | с,c
 172 | у,y
 173 | ѵ,v
 174 | ɑ,a
 175 | ɡ,g
 176 | ɩ,li
 177 | ɒ,o
 178 | г,r
 179 | π,n
 180 | ո,n
 181 | հ,h
 182 | ս,u
 183 | ց,g
 184 | ք,fp
 185 | ყ,y
 186 | ୦,o
 187 | ০,o
 188 | ੦,o
 189 | ౦,o
 190 | ೦,o
 191 | ๐,o
 192 | ໐,o
 193 | ᠐,o
 194 | 〇,o
 195 | օ,o
 196 | б,b
 197 | ৪,b
 198 | ৭,g
 199 | ੧,g
 200 | ୨,g
 201 | 
 202 | †,t
 203 | ғ,Ff
 204 | υ,u
 205 | ĸ,Kk
 206 | ᴐ,o
 207 | ᴑ,o
 208 | ᴒ,o
 209 | ᴓ,o
 210 | ᴂ,ae
 211 | ᵬ,b
 212 | ᵭ,d
 213 | ᵮ,f
 214 | ᵯ,m
 215 | ᵰ,n
 216 | ᵱ,p
 217 | ᵲ,f
 218 | ᵳ,f
 219 | ᵴ,s
 220 | ᵵ,t
 221 | ᵶ,z
 222 | ᵷ,g
 223 | ᵝ,Bb
 224 | β,Bb
 225 | Ⱡ,ⱡ
 226 | Ɫ,ɫ
 227 | Ᵽ,Pp
 228 | Ɽ,Rr
 229 | Ⱨ,Hⱨ
 230 | Ⱪ,Kⱪ
 231 | Ⱬ,Zⱬ
 232 | Ɑ,Aɑ
 233 | Ɱ,Mɱ
 234 | Ɐ,Aɐ
 235 | Ɒ,ɒ
 236 | Ⱳ,Wⱳ
 237 | 
 238 | Ȿ,Ss
 239 | Ɀ,Zz
 240 | Ⲁ,Aa
 241 | Ⲃ,Bb
 242 | Ⲅ,Rr
 243 | ⲅ,r
 244 | Ⲉ,Ee
 245 | Ⲋ,c
 246 | Ⲍ,Zz
 247 | Ⲏ,Hh
 248 | Ⲑ,Oo
 249 | Ⲓ,li
 250 | Ⲕ,Kk
 251 | Ⲙ,Mm
 252 | Ⲛ,Nn
 253 | Ⲟ,Oo
 254 | ⲟ,o
 255 | 
 256 | ᑕ,Cc
 257 | ᑖ,Cc
 258 | ᑡ,Cc
 259 | ᑢ,Cc
 260 | ᑣ,Cc
 261 | ᑤ,Cc
 262 | ᑥ,Cc
 263 | ᑪ,Cc
 264 | ᑧ,Uu
 265 | ᑨ,Nn
 266 | ᑫ,q
 267 | ᑬ,p
 268 | ᑭ,p
 269 | ᑮ,p
 270 | ᑯ,d
 271 | ᑰ,d
 272 | ᑱ,d
 273 | ᑲ,b
 274 | ᑳ,b
 275 | ᑴ,q
 276 | ᑵ,q
 277 | ᑶ,p
 278 | ᑷ,p
 279 | ᑸ,p
 280 | ᑹ,p
 281 | ᑺ,d
 282 | ᑻ,d
 283 | ᑼ,d
 284 | ᑽ,d
 285 | ᑾ,b
 286 | ᑿ,b
 287 | ᒀ,b
 288 | ᒁ,b
 289 | ᒂ,b
 290 | ᒪ,Ll
 291 | ᒫ,Ll
 292 | ᗅ,Aa
 293 | ᗋ,Aa
 294 | ᗄ,AaVv
 295 | ᗉ,c
 296 | ᗕ,c
 297 | ᗪ,Dd
 298 | ᗫ,Dd
 299 | D,Dd
 300 | D,Dd
 301 | ᗯ,Ww
 302 | ᗰ,Mm
 303 | ᗱ,Ee
 304 | ᗲ,Ee
 305 | ᗳ,Ee
 306 | ᗴ,Ee
 307 | B,Bb
 308 | ᗾ,BbEe
 309 | ᗿ,BbEe
 310 | ᘀ,BbEe
 311 | ᙖ,Bb
 312 | ᙗ,Bb
 313 | ᙘ,Bb
 314 | ᙙ,Bb
 315 | ᙩ,Bb
 316 | ᙪ,Bb
 317 | ᙫ,Bb
 318 | ᙬ,Bb
 319 | ᙎ,Ww
 320 | ᙎ,Ww
 321 | ᙔ,Ww
 322 | ᙛ,Ww
 323 | ᙧ,Ww
 324 | ᘻ,m
 325 | ᙏ,Mm
 326 | ᙕ,Mm
 327 | ᙜ,Mm
 328 | ᙨ,Mm
 329 | ᙅ,Cc
 330 | ᙭,Xx
 331 | ᒎ,Jj
 332 | ᒏ,Jj
 333 | ᒑ,Jj
 334 | ᒥ,r
 335 | ᒦ,r
 336 | 
 337 | ƈ,c
 338 | ɗ,d
 339 | ʝ,ji
 340 | ӏ,li
 341 | ʂ,s
 342 | ҳ,x
 343 | ʐ,z
 344 | 
 345 | н,Hh
 346 | 
 347 | А,Aa
 348 | Б,b
 349 | В,Bb
 350 | Г,r
 351 | Ґ,r
 352 | Д,o
 353 | Ђ,h
 354 | Ѓ,r
 355 | Е,Ee
 356 | Є,Ee
 357 | Ж,Hh
 358 | З,3e
 359 | Ѕ,Ss
 360 | И,Nn
 361 | І,Iil
 362 | Ј,Jj
 363 | К,Kk
 364 | Л,n
 365 | Љ,nb
 366 | М,Mm
 367 | Н,Hh
 368 | Њ,Hb
 369 | О,Oo
 370 | Ō,Oo
 371 | Ӧ,Oo
 372 | П,n
 373 | Р,Pp
 374 | С,Cc
 375 | Т,Tt
 376 | Ћ,h
 377 | Ќ,Kk
 378 | У,y
 379 | Ӯ,y
 380 | Ў,y
 381 | Ӱ,y
 382 | Ф,Oo
 383 | Х,Xx
 384 | Ц,Uu
 385 | Ч,uh
 386 | Џ,Uu
 387 | Ш,UuWw
 388 | Щ,UuWw
 389 | Ъ,b
 390 | Ы,blu
 391 | Ь,b
 392 | Ѣ,b
 393 | Э,e
 394 | Ю,H
 395 | Я,Rr
 396 | 
 397 | Ӑ,Aa
 398 | Ӕ,AEae
 399 | Ә,a
 400 | Ӛ,a
 401 | Ғ,Ff
 402 | Ҕ,Ffh
 403 | Ӻ,Ffr
 404 | Ӷ,r
 405 | 
 406 | Ӗ,Ee
 407 | Ӂ,Xx
 408 | Җ,Xx
 409 | Ӝ,Xx
 410 | Ҙ,Ee
 411 | Ӟ,Ee
 412 | Ԑ,Ee
 413 | Ӡ,Ee
 414 | Ҋ,HhNh
 415 | Ӥ,Nn
 416 | Ј,Jj
 417 | Қ,Kk
 418 | Ӄ,Kk
 419 | Ҡ,Kk
 420 | Ҟ,Kk
 421 | Ҝ,Kk
 422 | Ԛ,QqOo
 423 | Ӆ,n
 424 | Ԯ,n
 425 | Ԓ,n
 426 | Ӎ,Mm
 427 | Ӊ,Hh
 428 | Ң,Hh
 429 | Ԩ,Hh
 430 | Ӈ,Hh
 431 | Ҥ,Hh
 432 | Ӧ,Oo
 433 | Ө,Ooe
 434 | Ӫ,Ooe
 435 | Ҩ,ao
 436 | Ԥ,n
 437 | Ҏ,p
 438 | Ҫ,Cc
 439 | Ҭ,Tt
 440 | Т,Tb
 441 | Ь,Bb
 442 | Ү,Yy
 443 | Ұ,Yy
 444 | Х,Xx
 445 | Ҳ,Xx
 446 | Ӽ,Xx
 447 | Ӿ,x
 448 | Һ,h
 449 | Ԧ,h
 450 | Ҵ,uo
 451 | Ҷ,u
 452 | Ӵ,u
 453 | Ӌ,u
 454 | Ҹ,u
 455 | Ҽ,e
 456 | Ҿ,e
 457 | Ҍ,b
 458 | Ӭ,Ee
 459 | Ԝ,Ww
 460 | Ӏ,Iil
 461 | 
 462 | Ꙁ,Zz
 463 | Ꙃ,Zz
 464 | Ꙅ,Ss
 465 | Ꙇ,Jji
 466 | Ꙉ,n
 467 | Ҁ,gi
 468 | Ѻ,Oo
 469 | Ѹ,o
 470 | Ꙋ,y
 471 | Ѡ,w
 472 | Ѽ,w
 473 | Ꙍ,wou
 474 | Ѿ,wou
 475 | Ꙏ,b
 476 | Ꙑ,bl
 477 | Ѣ,b
 478 | Ꙓ,lb
 479 | Ꙕ,OIoi
 480 | Ꙗ,IiAa
 481 | Ѥ,Ee
 482 | Ѧ,Aa
 483 | Ꙙ,Aa
 484 | Ѫ,n
 485 | Ꙛ,n
 486 | Ѩ,Iila
 487 | Ꙝ,Iila
 488 | Ѭ,n
 489 | Ѯ,e
 490 | Ѱ,u
 491 | Ѳ,Oo
 492 | Ѵ,v
 493 | Ѷ,v
 494 | Ԙ,Ee
 495 | Ꙟ,Tt
 496 | Ꙡ,Uu
 497 | Ꙣ,uo
 498 | Ꙥ,JjTt
 499 | Ꙧ,Mm
 500 | Ꙩ,Oo
 501 | Ꙫ,Oo
 502 | Ꙭ,Oo
 503 | Ꚙ,Oo
 504 | Ꚛ,Oo
 505 | Ԁ,d
 506 | Ԕ,JjXx
 507 | Ԗ,PpRr
 508 | Ԡ,n
 509 | Ԣ,Hh
 510 | Ҧ,hm
 511 | Ꚋ,Ttb
 512 | Ꚁ,o
 513 | Ꚅ,Kk
 514 | Ꚍ,Tt
 515 | Ꚕ,h
 516 | Ꚏ,Uu
 517 | Ꚗ,Ww
 518 | Ꚃ,Ss
 519 | Ԃ,d
 520 | Ԅ,Rr
 521 | Ԉ,n
 522 | Ԋ,Hh
 523 | Ԍ,Gg
 524 | Ԏ,TtJj
 525 | Ԇ,Rr
 526 | Ꚑ,Tt
 527 | Ꚉ,n
 528 | Ꚇ,wum
 529 | Ꚓ,hu
 530 | Ԟ,Kk
 531 | Ԭ,Aa
 532 | 
 533 | Ā,Aa
 534 | ā,a
 535 | Ă,A
 536 | ă,a
 537 | Ą,A
 538 | ą,a
 539 | Ć,C
 540 | ć,c
 541 | Ĉ,C
 542 | ĉ,c
 543 | Ċ,C
 544 | ċ,c
 545 | Č,C
 546 | č,c
 547 | Ď,D
 548 | ď,d
 549 | Đ,D
 550 | đ,d
 551 | Ē,E
 552 | ē,e
 553 | Ĕ,E
 554 | ĕ,e
 555 | Ė,E
 556 | ė,e
 557 | Ę,E
 558 | ę,e
 559 | Ě,E
 560 | ě,e
 561 | Ĝ,G
 562 | ĝ,g
 563 | Ğ,G
 564 | ğ,g
 565 | Ġ,G
 566 | ġ,g
 567 | Ģ,G
 568 | ģ,g
 569 | Ĥ,H
 570 | ĥ,h
 571 | Ħ,H
 572 | ħ,h
 573 | Ĩ,I
 574 | ĩ,i
 575 | Ī,I
 576 | ī,i
 577 | Ĭ,I
 578 | ĭ,i
 579 | Į,I
 580 | į,i
 581 | İ,I
 582 | ı,i
 583 | Ĳ,IJ
 584 | ĳ,ij
 585 | Ĵ,J
 586 | ĵ,j
 587 | Ķ,K
 588 | ķ,k
 589 | ĸ,k
 590 | Ĺ,L
 591 | ĺ,li
 592 | Ļ,L
 593 | ļ,li
 594 | Ľ,L
 595 | ľ,li
 596 | Ŀ,L
 597 | ŀ,li
 598 | Ł,L
 599 | ł,li
 600 | Ń,N
 601 | ń,n
 602 | Ņ,N
 603 | ņ,n
 604 | Ň,N
 605 | ň,n
 606 | ŉ,n
 607 | Ŋ,NJ
 608 | ŋ,n
 609 | Ō,O
 610 | ō,o
 611 | Ŏ,O
 612 | ŏ,o
 613 | Ő,O
 614 | ő,o
 615 | Œ,COE
 616 | œ,coe
 617 | Ŕ,R
 618 | ŕ,r
 619 | Ŗ,R
 620 | ŗ,r
 621 | Ř,R
 622 | ř,r
 623 | Ś,S
 624 | ś,s
 625 | Ŝ,S
 626 | ŝ,s
 627 | Ş,S
 628 | ş,s
 629 | Š,S
 630 | š,s
 631 | Ţ,T
 632 | ţ,t
 633 | Ť,T
 634 | ť,t
 635 | Ŧ,T
 636 | ŧ,t
 637 | Ũ,U
 638 | ũ,u
 639 | Ū,U
 640 | ū,u
 641 | Ŭ,U
 642 | ŭ,u
 643 | Ů,U
 644 | ů,u
 645 | Ű,U
 646 | ű,u
 647 | Ų,U
 648 | ų,u
 649 | Ŵ,W
 650 | ŵ,w
 651 | Ŷ,Y
 652 | ŷ,y
 653 | Ÿ,Y
 654 | Ź,Z
 655 | ź,z
 656 | Ż,Z
 657 | ż,z
 658 | Ž,Z
 659 | ž,z
 660 | ſ,r
 661 | 
 662 | ƀ,b
 663 | Ɓ,Bb
 664 | Ƃ,b
 665 | ƃ,b
 666 | Ƅ,b
 667 | ƅ,bn
 668 | Ɔ,Cc
 669 | Ƈ,Cc
 670 | ƈ,c
 671 | Ɖ,Dd
 672 | Ɗ,Dd
 673 | Ƌ,a
 674 | ƌ,a
 675 | ƍ,g
 676 | Ǝ,Ee
 677 | Ə,e
 678 | Ɛ,Ee
 679 | Ƒ,Ff
 680 | ƒ,f
 681 | Ɠ,Gg
 682 | Ɣ,y
 683 | ƕ,hn
 684 | Ɩ,Tt
 685 | Ɨ,Ii
 686 | Ƙ,Kk
 687 | ƙ,k
 688 | ƚ,lIi
 689 | ƛ,ln
 690 | Ɯ,Ww
 691 | Ɲ,Nn
 692 | ƞ,n
 693 | Ɵ,o
 694 | Ơ,Oo
 695 | ơ,o
 696 | Ƣ,Oo
 697 | ƣ,o
 698 | Ƥ,Pp
 699 | ƥ,Pp
 700 | Ʀ,Rr
 701 | Ƨ,Zz
 702 | ƨ,z
 703 | Ʃ,Ee
 704 | ƪ,lIi
 705 | ƫ,t
 706 | Ƭ,Tt
 707 | ƭ,Ee
 708 | Ʈ,Tt
 709 | Ư,Uu
 710 | ư,u
 711 | Ʊ,u
 712 | Ʋ,u
 713 | Ƴ,Yy
 714 | ƴ,y
 715 | Ƶ,Zz
 716 | ƶ,z
 717 | Ʒ,Ee
 718 | Ƹ,Ee
 719 | ƹ,Ee
 720 | ƺ,e
 721 | ƻ,Zz
 722 | Ƽ,Ss
 723 | ƽ,Ss
 724 | ƾ,c
 725 | ƿ,p
 726 | ǀ,Ili
 727 | ǁ,Ili
 728 | ǂ,t
 729 | ǃ,Ili
 730 | Ǆ,DdZz
 731 | ǅ,Ddz
 732 | ǆ,dz
 733 | Ǉ,Ww
 734 | ǈ,Llj
 735 | ǉ,lIij
 736 | Ǌ,NnJj
 737 | ǋ,Nnj
 738 | ǌ,nj
 739 | Ǎ,A
 740 | ǎ,a
 741 | Ǐ,Ii
 742 | ǐ,i
 743 | Ǒ,Oo
 744 | ǒ,o
 745 | Ǔ,Uu
 746 | ǔ,u
 747 | Ǖ,Uu
 748 | ǖ,u
 749 | Ǘ,Uu
 750 | ǘ,u
 751 | Ǚ,Uu
 752 | ǚ,u
 753 | Ǜ,Uu
 754 | ǜ,u
 755 | ǝ,e
 756 | Ǟ,a
 757 | ǟ,a
 758 | Ǡ,Aa
 759 | ǡ,a
 760 | Ǣ,AaEe
 761 | ǣ,ae
 762 | Ǥ,Gg
 763 | ǥ,g
 764 | Ǧ,G
 765 | ǧ,g
 766 | Ǩ,Kk
 767 | ǩ,k
 768 | Ǫ,QqOo
 769 | ǫ,qo
 770 | Ǭ,QqOo
 771 | ǭ,qo
 772 | Ǯ,Ee
 773 | ǯ,e
 774 | ǰ,Jj
 775 | Ǳ,DdZz
 776 | ǲ,Ddz
 777 | ǳ,dz
 778 | Ǵ,Gg
 779 | ǵ,g
 780 | Ƕ,Hh
 781 | Ƿ,Pp
 782 | Ǹ,Nn
 783 | ǹ,n
 784 | Ǻ,Aa
 785 | ǻ,a
 786 | Ǽ,AaEe
 787 | ǽ,ae
 788 | Ǿ,Oo
 789 | ǿ,o
 790 | Ȁ,Aa
 791 | ȁ,a
 792 | Ȃ,Aa
 793 | ȃ,a
 794 | Ȅ,Ee
 795 | ȅ,e
 796 | Ȇ,Ee
 797 | ȇ,e
 798 | Ȉ,Ii
 799 | ȉ,i
 800 | Ȋ,i
 801 | ȋ,i
 802 | Ȍ,Oo
 803 | ȍ,o
 804 | Ȏ,Oo
 805 | ȏ,o
 806 | Ȑ,Rr
 807 | ȑ,r
 808 | Ȓ,Rr
 809 | ȓ,r
 810 | Ȕ,Uu
 811 | ȕ,u
 812 | Ȗ,Uu
 813 | ȗ,u
 814 | Ș,Ss
 815 | ș,s
 816 | Ț,Tt
 817 | ț,t
 818 | Ȝ,Ee
 819 | ȝ,e
 820 | Ȟ,Hh
 821 | ȟ,h
 822 | Ƞ,hn
 823 | ȡ,d
 824 | Ȣ,Ss
 825 | ȣ,s
 826 | Ȥ,Zz
 827 | ȥ,z
 828 | Ȧ,Aa
 829 | ȧ,a
 830 | Ȩ,Ee
 831 | ȩ,e
 832 | Ȫ,Oo
 833 | ȫ,o
 834 | Ȭ,Oo
 835 | ȭ,o
 836 | Ȯ,Oo
 837 | ȯ,o
 838 | Ȱ,Oo
 839 | ȱ,o
 840 | Ȳ,Yy
 841 | ȳ,y
 842 | ȴ,l
 843 | ȵ,n
 844 | ȶ,t
 845 | ȷ,Jj
 846 | ȸ,bod
 847 | ȹ,qop
 848 | Ⱥ,Aa
 849 | Ȼ,Cc
 850 | ȼ,c
 851 | Ƚ,Ll
 852 | Ⱦ,Tt
 853 | ȿ,s
 854 | ɀ,Zz
 855 | Ɂ,Pp
 856 | ɂ,p
 857 | Ƀ,Bb
 858 | Ʉ,Uu
 859 | Ʌ,n
 860 | Ɇ,Ee
 861 | ɇ,e
 862 | Ɉ,Jj
 863 | ɉ,j
 864 | Ɋ,q
 865 | ɋ,q
 866 | Ɍ,Rr
 867 | ɍ,Ff
 868 | Ɏ,Yy
 869 | ɏ,y
 870 | 
 871 | Ⱡ,Li
 872 | ⱡ,Il
 873 | Ɫ,L
 874 | Ᵽ,P
 875 | Ɽ,R
 876 | ⱥ,a
 877 | ⱦ,i
 878 | Ⱨ,H
 879 | ⱨ,h
 880 | Ⱪ,K
 881 | ⱪ,k
 882 | Ⱬ,Z
 883 | ⱬ,z
 884 | Ɑ,Oa
 885 | Ɱ,M
 886 | Ɐ,AV
 887 | Ɒ,D
 888 | ⱱ,v
 889 | Ⱳ,W
 890 | ⱳ,w
 891 | ⱴ,v
 892 | Ⱶ,It
 893 | ⱶ,it
 894 | ⱷ,w
 895 | ⱸ,e
 896 | ⱹ,Li
 897 | ⱺ,o
 898 | ⱻ,E
 899 | ⱼ,j
 900 | ⱽ,v
 901 | Ȿ,S
 902 | Ɀ,Z
 903 | 
 904 | Ꜣ,E
 905 | ꜣ,E
 906 | Ꜥ,r
 907 | ꜥ,r
 908 | Ꜧ,H
 909 | ꜧ,h
 910 | Ꜩ,Tz
 911 | ꜩ,tz
 912 | Ꜫ,E
 913 | ꜫ,E
 914 | Ꜭ,a
 915 | ꜭ,a
 916 | Ꜯ,a
 917 | ꜯ,a
 918 | ꜰ,F
 919 | ꜱ,s
 920 | Ꜳ,AM
 921 | ꜳ,an
 922 | Ꜵ,AO
 923 | ꜵ,ao
 924 | Ꜷ,AJ
 925 | ꜷ,au
 926 | Ꜹ,N
 927 | ꜹ,a
 928 | Ꜻ,N
 929 | ꜻ,a
 930 | Ꜽ,A
 931 | ꜽ,a
 932 | Ꜿ,C
 933 | ꜿ,c
 934 | Ꝁ,K
 935 | ꝁ,k
 936 | Ꝃ,K
 937 | ꝃ,k
 938 | Ꝅ,K
 939 | ꝅ,k
 940 | Ꝇ,Li
 941 | ꝇ,li
 942 | Ꝉ,Li
 943 | ꝉ,li
 944 | Ꝋ,O
 945 | ꝋ,o
 946 | Ꝍ,O
 947 | ꝍ,o
 948 | Ꝏ,O
 949 | ꝏ,o
 950 | Ꝑ,P
 951 | ꝑ,p
 952 | Ꝓ,P
 953 | ꝓ,p
 954 | Ꝗ,Q
 955 | ꝗ,q
 956 | Ꝛ,i
 957 | ꝛ,i
 958 | Ꝥ,pD
 959 | ꝥ,p
 960 | Ꝧ,b
 961 | ꝧ,b
 962 | Ꝩ,P
 963 | ꝩ,p
 964 | Ꝯ,og
 965 | ꝯ,og
 966 | ꝰ,o
 967 | Ꝺ,O
 968 | ꝺ,o
 969 | Ꝼ,rli
 970 | ꝼ,rli
 971 | Ᵹ,o
 972 | Ꝿ,qo
 973 | ꝿ,q
 974 | Ꞁ,rli
 975 | ꞁ,li
 976 | Ꞃ,rn
 977 | ꞃ,rn
 978 | Ꞅ,rli
 979 | ꞅ,rli
 980 | Ꞇ,C
 981 | ꞇ,c
 982 | ꞉,i
 983 | Ꞌ,i
 984 | ꞌ,i
 985 | Ɥ,hu
 986 | ꞎ,l
 987 | Ꞑ,N
 988 | ꞑ,n
 989 | Ꞡ,G
 990 | ꞡ,g
 991 | Ꞣ,K
 992 | ꞣ,k
 993 | Ꞥ,N
 994 | ꞥ,R
 995 | Ꞧ,R
 996 | ꞧ,F
 997 | Ꞩ,S
 998 | ꞩ,s
 999 | Ɦ,H
1000 | ꟸ,t
1001 | ꟹ,oe
1002 | ꟺ,mw
1003 | ꟻ,F
1004 | ꟼ,Pq
1005 | ꟽ,MW
1006 | ꟾ,Il
1007 | ꟿ,WM
1008 | 
1009 | ƀ,b
1010 | 
1011 | ƒ,f
1012 | ❗,li
1013 | 유,i
1014 | 
1015 | 🇦,A
1016 | 🇧,B
1017 | 🇨,C
1018 | 🇩,D
1019 | 🇪,E
1020 | 🇫,F
1021 | 🇬,G
1022 | 🇭,H
1023 | 🇮,Il
1024 | 🇯,J
1025 | 🇰,K
1026 | 🇱,L
1027 | 🇲,M
1028 | 🇳,N
1029 | 🇴,O
1030 | 🇵,P
1031 | 🇶,Q
1032 | 🇷,R
1033 | 🇸,S
1034 | 🇹,T
1035 | 🇺,UV
1036 | 🇻,VU
1037 | 🇼,W
1038 | 🇽,X
1039 | 🇾,Y
1040 | 🇿,Z
1041 | 
1042 | 🄀,O
1043 | 🄁,O
1044 | 🄂,Il
1045 | 🄅,A
1046 | 🄆,S
1047 | 
1048 | 🄋,O
1049 | 🄌,O
1050 | 🄪,S
1051 | 🄫,C
1052 | 🄬,R
1053 | 🄭,CD
1054 | 🄮,Wz
1055 | 🄯,C
1056 | 🅊,HV
1057 | 🅋,MV
1058 | 🅌,SD
1059 | 🅍,SS
1060 | 🅎,PV
1061 | 🅏,WC
1062 | 
1063 | 🅪,MC
1064 | 🅫,MD
1065 | 🅬,MR
1066 | 🆛,D
1067 | 🆝,K
1068 | 🆞,K
1069 | 🆟,K
1070 | 🆣,P
1071 | 🆤,P
1072 | 🆥,d
1073 | 🆦,HC
1074 | 🆧,HDR
1075 | 🆪,SHV
1076 | 🆫,UHD
1077 | 🆬,VOD
1078 | 
1079 | Ͳ,T
1080 | ͳ,T
1081 | Ͷ,N
1082 | ͷ,N
1083 | ͻ,c
1084 | ͼ,ce
1085 | ͽ,c
1086 | Ά,A
1087 | Έ,E
1088 | Ή,H
1089 | Ί,I
1090 | Ό,O
1091 | Ύ,Y
1092 | Ώ,O
1093 | ΐ,i
1094 | Α,A
1095 | Β,B
1096 | Γ,r
1097 | Δ,A
1098 | Ε,E
1099 | Ζ,Z
1100 | Η,H
1101 | Θ,O
1102 | Ι,Il
1103 | Κ,K
1104 | Λ,A
1105 | Μ,M
1106 | Ν,N
1107 | Ο,O
1108 | Π,n
1109 | Ρ,P
1110 | Σ,E
1111 | Τ,T
1112 | Υ,Y
1113 | Φ,O
1114 | Χ,X
1115 | Ψ,Tw
1116 | Ω,O
1117 | Ϊ,Il
1118 | Ϋ,Y
1119 | ά,a
1120 | έ,e
1121 | ή,n
1122 | ί,i
1123 | ΰ,u
1124 | α,a
1125 | β,B
1126 | γ,y
1127 | δ,do
1128 | ε,e
1129 | ζ,c
1130 | η,n
1131 | θ,O
1132 | ι,i
1133 | κ,k
1134 | λ,An
1135 | μ,u
1136 | ν,v
1137 | ξ,E
1138 | ο,o
1139 | π,n
1140 | ρ,p
1141 | ς,c
1142 | σ,o
1143 | τ,T
1144 | υ,u
1145 | φ,Wp
1146 | χ,X
1147 | ψ,w
1148 | ω,w
1149 | ϊ,i
1150 | Ϗ,K
1151 | ϐ,B
1152 | ϑ,o
1153 | ϒ,Y
1154 | ϕ,lo
1155 | ϖ,w
1156 | ϗ,NK
1157 | Ϙ,O
1158 | ϙ,o
1159 | Ϛ,C
1160 | ϛ,c
1161 | Ϝ,F
1162 | ϝ,f
1163 | Ϟ,Hn
1164 | ϟ,Hn
1165 | Ϣ,Wy
1166 | ϣ,wy
1167 | Ϥ,qu
1168 | ϥ,qu
1169 | Ϧ,b
1170 | ϧ,ec
1171 | Ϩ,Z
1172 | Ϫ,X
1173 | ϫ,x
1174 | Ϭ,bo
1175 | ϭ,bo
1176 | Ϯ,t
1177 | ϯ,t
1178 | ϰ,Kx
1179 | ϱ,p
1180 | ϲ,c
1181 | ϳ,j
1182 | ϴ,O
1183 | ϵ,e
1184 | ϶,e
1185 | Ϸ,p
1186 | ϸ,p
1187 | Ϲ,C
1188 | Ϻ,M
1189 | ϻ,M
1190 | ϼ,p
1191 | Ͻ,C
1192 | Ͼ,CE
1193 | Ͽ,CD
1194 | 
1195 | Ё,E
1196 | Ђ,bh
1197 | Ѓ,r
1198 | Є,E
1199 | Ѕ,S
1200 | І,Il
1201 | Ї,Il
1202 | Ј,J
1203 | Љ,nb
1204 | Њ,Hb
1205 | Ћ,h
1206 | Ќ,K
1207 | Ѝ,N
1208 | Ў,y
1209 | Џ,U
1210 | А,A
1211 | Б,b
1212 | В,B
1213 | Г,r
1214 | Д,Do
1215 | Е,E
1216 | Ж,Xwm
1217 | З,E
1218 | И,N
1219 | Й,N
1220 | К,K
1221 | Л,n
1222 | М,M
1223 | Н,H
1224 | О,O
1225 | П,n
1226 | Р,P
1227 | С,C
1228 | Т,T
1229 | У,y
1230 | Ф,O
1231 | Х,X
1232 | Ц,u
1233 | Ч,u
1234 | Ш,w
1235 | Щ,w
1236 | Ъ,b
1237 | Ы,bl
1238 | Ь,b
1239 | Э,e
1240 | Ю,Ilo
1241 | Я,R
1242 | а,a
1243 | б,a
1244 | в,B
1245 | г,r
1246 | д,o
1247 | е,e
1248 | ж,X
1249 | з,e
1250 | и,N
1251 | й,N
1252 | к,K
1253 | л,n
1254 | м,M
1255 | н,H
1256 | о,o
1257 | п,n
1258 | р,p
1259 | с,c
1260 | т,T
1261 | у,y
1262 | ф,o
1263 | х,x
1264 | ц,u
1265 | ч,u
1266 | ш,w
1267 | щ,w
1268 | ъ,b
1269 | ы,bl
1270 | ь,b
1271 | э,e
1272 | ю,Ilo
1273 | я,R
1274 | ђ,n
1275 | ѓ,r
1276 | є,e
1277 | ѕ,s
1278 | і,i
1279 | ї,i
1280 | ј,j
1281 | љ,nb
1282 | њ,Hb
1283 | ћ,hn
1284 | џ,u
1285 | Ѡ,W
1286 | ѡ,w
1287 | Ѣ,b
1288 | ѣ,b
1289 | Ѥ,ICE
1290 | ѥ,ie
1291 | Ѧ,A
1292 | ѧ,A
1293 | Ѩ,A
1294 | ѩ,A
1295 | Ѫ,A
1296 | ѫ,A
1297 | Ѭ,A
1298 | ѭ,A
1299 | Ѯ,e
1300 | ѯ,e
1301 | Ѱ,w
1302 | ѱ,w
1303 | Ѳ,O
1304 | ѳ,o
1305 | Ѵ,v
1306 | ѵ,v
1307 | Ѷ,v
1308 | ѷ,v
1309 | Ѹ,q
1310 | ѹ,q
1311 | Ѻ,O
1312 | ѻ,o
1313 | Ѽ,w
1314 | ѽ,w
1315 | Ѿ,w
1316 | ѿ,w
1317 | Ҁ,G
1318 | ҁ,Gc
1319 | ҂,i
1320 | Ҋ,N
1321 | ҋ,N
1322 | Ҍ,b
1323 | ҍ,b
1324 | Ҏ,p
1325 | ҏ,p
1326 | Ґ,r
1327 | ґ,r
1328 | Ғ,F
1329 | ғ,F
1330 | Ҕ,h
1331 | ҕ,h
1332 | Җ,X
1333 | җ,X
1334 | Ҙ,e
1335 | ҙ,e
1336 | Қ,K
1337 | қ,K
1338 | Ҝ,K
1339 | ҝ,K
1340 | Ҟ,K
1341 | ҟ,K
1342 | Ҡ,K
1343 | ҡ,K
1344 | Ң,H
1345 | ң,H
1346 | Ҥ,H
1347 | ҥ,H
1348 | Ҧ,m
1349 | ҧ,m
1350 | Ҩ,a
1351 | ҩ,a
1352 | Ҫ,C
1353 | ҫ,c
1354 | Ҭ,T
1355 | ҭ,T
1356 | Ү,Y
1357 | ү,Y
1358 | Ұ,Y
1359 | ұ,V
1360 | Ҳ,X
1361 | ҳ,x
1362 | Ҵ,Tu
1363 | ҵ,Tu
1364 | Ҷ,u
1365 | ҷ,u
1366 | Ҹ,u
1367 | ҹ,u
1368 | Һ,h
1369 | һ,h
1370 | Ҽ,e
1371 | ҽ,e
1372 | Ҿ,e
1373 | ҿ,e
1374 | Ӏ,I
1375 | Ӂ,X
1376 | ӂ,X
1377 | Ӄ,K
1378 | ӄ,K
1379 | Ӆ,no
1380 | ӆ,no
1381 | Ӈ,H
1382 | ӈ,H
1383 | Ӊ,H
1384 | ӊ,H
1385 | Ӌ,u
1386 | ӌ,u
1387 | Ӎ,M
1388 | ӎ,M
1389 | ӏ,Il
1390 | Ӕ,AE
1391 | ӕ,ae
1392 | Ә,e
1393 | ә,e
1394 | Ӡ,E
1395 | ӡ,E
1396 | Ө,e
1397 | ө,e
1398 | Ӷ,r
1399 | ӷ,r
1400 | Ӹ,blI
1401 | ӹ,blI
1402 | Ӻ,r
1403 | ӻ,r
1404 | Ӽ,X
1405 | ӽ,x
1406 | Ӿ,X
1407 | ӿ,x
1408 | 
1409 | Ա,U
1410 | Բ,F
1411 | Գ,q
1412 | Դ,nr
1413 | Ե,t
1414 | Զ,cz
1415 | Է,t
1416 | Ը,cr
1417 | Թ,A
1418 | Ժ,d
1419 | Ի,r
1420 | Լ,L
1421 | Խ,nu
1422 | Ծ,o
1423 | Կ,u
1424 | Հ,c
1425 | Ձ,Z
1426 | Ղ,n
1427 | Ճ,a
1428 | Մ,U
1429 | Յ,e
1430 | Ն,JLi
1431 | Շ,c
1432 | Ո,,n
1433 | Չ,,o
1434 | Պ,m
1435 | Ջ,o
1436 | Ռ,n
1437 | Ս,u
1438 | Վ,u
1439 | Տ,S
1440 | Ր,r
1441 | Ց,e
1442 | Ւ,r
1443 | Փ,o
1444 | Ք,p
1445 | Օ,O
1446 | Ֆ,c
1447 | ա,w
1448 | բ,F
1449 | գ,q
1450 | դ,n
1451 | ե,t
1452 | զ,q
1453 | է,t
1454 | ը,cno
1455 | թ,p
1456 | ժ,d
1457 | ի,rn
1458 | լ,Li
1459 | խ,N
1460 | ծ,o
1461 | կ,u
1462 | հ,h
1463 | ձ,odb
1464 | ղ,n
1465 | ճ,ao
1466 | մ,u
1467 | յ,j
1468 | ն,u
1469 | շ,Z
1470 | ո,n
1471 | չ,z
1472 | պ,wu
1473 | ջ,p
1474 | ռ,n
1475 | ս,u
1476 | վ,uy
1477 | տ,n
1478 | ր,n
1479 | ց,g
1480 | ւ,L
1481 | փ,n
1482 | ք,p
1483 | օ,o
1484 | ֆ,c
1485 | և,u
1486 | ֏,n
1487 | 
1488 | Ⴀ,o
1489 | Ⴁ,uo
1490 | Ⴂ,no
1491 | Ⴃ,o
1492 | Ⴄ,OCn
1493 | Ⴅ,ne
1494 | Ⴆ,b
1495 | Ⴇ,Don
1496 | Ⴈ,rno
1497 | Ⴉ,Joq
1498 | Ⴊ,b
1499 | Ⴋ,d
1500 | Ⴌ,h
1501 | Ⴍ,Q
1502 | Ⴎ,U
1503 | Ⴏ,uj
1504 | Ⴐ,Jm
1505 | Ⴑ,J
1506 | Ⴒ,P
1507 | Ⴓ,Q
1508 | Ⴔ,qp
1509 | Ⴕ,jt
1510 | Ⴖ,mn
1511 | 
1512 | ꓐ,B
1513 | ꓑ,P
1514 | ꓒ,d
1515 | ꓓ,D
1516 | ꓔ,T
1517 | ꓕ,IL
1518 | ꓖ,G
1519 | ꓗ,K
1520 | ꓘ,K
1521 | ꓙ,J
1522 | ꓚ,C
1523 | ꓛ,C
1524 | ꓜ,Z
1525 | ꓝ,F
1526 | ꓞ,FI
1527 | ꓟ,M
1528 | ꓠ,N
1529 | ꓡ,L
1530 | ꓢ,S
1531 | ꓣ,R
1532 | ꓤ,R
1533 | ꓥ,n
1534 | ꓦ,V
1535 | ꓧ,H
1536 | ꓨ,G
1537 | ꓩ,r
1538 | ꓪ,W
1539 | ꓫ,X
1540 | ꓬ,Y
1541 | ꓭ,B
1542 | ꓮ,A
1543 | ꓯ,AV
1544 | ꓰ,E
1545 | ꓱ,E
1546 | ꓲ,IL
1547 | ꓳ,O
1548 | ꓴ,U
1549 | ꓵ,N
1550 | ꓶ,LI
1551 | ꓷ,DO
1552 | 
1553 | ♍,m
1554 | ♎,no
1555 | ♏,m
1556 | ⛎,U
1557 | 
1558 | ﬀ,f
1559 | ﬁ,fi
1560 | ﬂ,fl
1561 | ﬃ,fi
1562 | ﬄ,fl
1563 | ﬅ,ft
1564 | ﬆ,st
1565 | 
1566 | ɐ,ap
1567 | ɑ,ao
1568 | ɒ,abo
1569 | ɓ,b
1570 | ɔ,c
1571 | ɕ,cG
1572 | ɖ,d
1573 | ɗ,d
1574 | ɘ,e
1575 | ə,e
1576 | ɚ,e
1577 | ɛ,E
1578 | ɜ,E
1579 | ɝ,E
1580 | ɞ,Be
1581 | ɟ,tfJ
1582 | ɠ,g
1583 | ɡ,g
1584 | ɢ,G
1585 | ɣ,li
1586 | ɤ,li
1587 | ɥ,hu
1588 | ɦ,h
1589 | ɧ,hb
1590 | ɨ,i
1591 | ɩ,i
1592 | ɪ,I
1593 | ɫ,l
1594 | ɬ,l
1595 | ɭ,l
1596 | ɮ,B
1597 | ɯ,w
1598 | ɰ,wu
1599 | ɱ,m
1600 | ɲ,n
1601 | ɳ,n
1602 | ɴ,N
1603 | ɵ,o
1604 | ɶ,DE
1605 | ɷ,o
1606 | ɸ,oI
1607 | ɹ,rɹ
1608 | ɺ,r
1609 | ɻ,l
1610 | ɼ,cr
1611 | ɽ,cr
1612 | ɾ,i
1613 | ɿ,i
1614 | ʀ,R
1615 | ʁ,Rb
1616 | ʂ,sE
1617 | ʃ,fl
1618 | ʄ,ft
1619 | ʅ,li
1620 | ʆ,lI
1621 | ʇ,ti
1622 | ʈ,t
1623 | ʉ,u
1624 | ʊ,ou
1625 | ʋ,bou
1626 | ʌ,n
1627 | ʍ,M
1628 | ʎ,ln
1629 | ʏ,Y
1630 | ʐ,z
1631 | ʑ,Z
1632 | ʒ,E
1633 | ʓ,E
1634 | ʔ,p
1635 | ʕ,rp
1636 | ʖ,b
1637 | ʗ,CI
1638 | ʘ,o
1639 | ʙ,B
1640 | ʚ,B
1641 | ʛ,G
1642 | ʜ,H
1643 | ʝ,j
1644 | ʞ,K
1645 | ʟ,Li
1646 | ʠ,d
1647 | ʡ,tp
1648 | ʢ,tr
1649 | ʣ,dz
1650 | ʤ,db
1651 | ʥ,dz
1652 | ʦ,ts
1653 | ʧ,tf
1654 | ʨ,ct
1655 | ʩ,fmj
1656 | ʪ,s
1657 | ʫ,z
1658 | ʬ,w
1659 | ʭ,n
1660 | ʮ,u
1661 | ʯ,u
1662 | 
1663 | ₠,CE
1664 | ₡,C
1665 | ₢,Cr
1666 | ₣,F
1667 | ₤,L
1668 | ₥,m
1669 | ₦,N
1670 | ₧,Pts
1671 | ₨,Rs
1672 | ₩,W
1673 | ₪,N
1674 | ₫,d
1675 | €,EC
1676 | ₭,K
1677 | ₮,T
1678 | ₯,Dp
1679 | ₰,sg
1680 | ₱,P
1681 | ₲,G
1682 | ₳,A
1683 | ₴,ZS
1684 | ₵,C
1685 | ₶,Hn
1686 | ₷,S
1687 | ₸,T
1688 | ₹,R
1689 | ₺,L
1690 | ₽,P
1691 | ₿,B
1692 | ß,B
1693 | 匚,C
1694 | ㄩ,U
1695 | 千,FE
1696 | £,Ef
1697 | ₤,Ef
1698 | ’,'
1699 | ´,'
1700 | ¡,i
1701 | ¢,c
1702 | £,eff
1703 | ¤,o
1704 | ¥,Y
1705 | ¦,Il
1706 | §,Sf
1707 | ©,C
1708 | ª,a
1709 | «,c
1710 | ®,R
1711 | °,o
1712 | ²,2z
1713 | ³,3e
1714 | μ,u
1715 | ¶,PlI
1716 | º,o
1717 | ¿,i
1718 | ſ,if
1719 | þ,b
1720 | æ,ae
1721 | ſ,f
1722 | ð,od
1723 | đ,od
1724 | ŋ,no
1725 | Þ,P
1726 | Æ,AE
1727 | ẞ,B
1728 | Ŋ,N
1729 | º,o
1730 | Ð,D
1731 | Э,3E
1732 | Д,An
1733 | д,n
1734 | ɔ,co
1735 | ⵡ,U
1736 | ⵊ,I
1737 | ⵢ,S
1738 | ⴽ,kr
1739 | ⵀ,o
1740 | ⴱ,o
1741 | ⵙ,o
1742 | ⵍ,n
1743 | ⵎ,c
1744 | ⵛ,c
1745 | ⵡ,u
1746 | ⴰ,o
1747 | ⵏ,i
1748 | ⵖ,y
1749 | ⵉ,e
1750 | ⵜ,t
1751 | ⴷ,a
1752 | ⵇ,rz
1753 | ⴼ,h
1754 | ⵅ,x
1755 | ⴳ,x
1756 | ⵣ,hmx
1757 | ဆ,eo
1758 | တ,oc
1759 | န,s
1760 | မ,uo
1761 | ပ,uo
1762 | က,uc
1763 | င,co
1764 | သ,oc
1765 | စ,o
1766 | ရ,po
1767 | ယ,uc
1768 | ဖ,ue
1769 | ထ,oc
1770 | ခ,eo
1771 | လ,co
1772 | ဘ,co
1773 | ည,co
1774 | ဋ,CO
1775 | ဍ,CO
1776 | ဠ,CO
1777 | ဎ,UO
1778 | ဃ,UCO
1779 | ဧ,ECO
1780 | ဇ,CO
1781 | ဂ,NCO
1782 | ဟ,cu
1783 | ဒ,eb
1784 | ဓ,o
1785 | ဗ,uo
1786 | ဌ,cgoq
1787 | ဥ,cpo
1788 | ၎,cqpo
1789 | ၌,cgop
1790 | ქ,bo
1791 | წ,o
1792 | ე,oc
1793 | რ,mn
1794 | ტ,o
1795 | ყ,y
1796 | უ,gco
1797 | ი,n
1798 | Ი,Nu
1799 | ო,mn
1800 | პ,eo
1801 | ა,oc
1802 | ს,bo
1803 | დ,oc
1804 | ფ,oce
1805 | გ,o
1806 | ჰ,e
1807 | ჯ,tjco
1808 | კ,eco
1809 | ლ,mno
1810 | ზ,o
1811 | ხ,bo
1812 | ც,oeb
1813 | ვ,oeb
1814 | ბ,o
1815 | ნ,oc
1816 | მ,ao
1817 | ჭ,jtoc
1818 | ღ,mno
1819 | თ,onc
1820 | შ,doc
1821 | ჟ,yjo
1822 | ძ,doc
1823 | ჩ,ho
1824 | ક,sft
1825 | ચ,uci
1826 | ટ,sco
1827 | ત,nli
1828 | પ,uli
1829 | ય,ucli
1830 | ષ,uliy
1831 | ખ,wuli
1832 | છ,eoc
1833 | ઠ,ocs
1834 | થ,uli
1835 | ફ,sft
1836 | ર,rco
1837 | સ,chli
1838 | ગ,ocli
1839 | જ,oy
1840 | ડ,sc
1841 | દ,eco
1842 | બ,wuli
1843 | લ,hcli
1844 | હ,sc
1845 | ઘ,el
1846 | ઝ,khno
1847 | ઢ,soec
1848 | ધ,elic
1849 | ભ,hlio
1850 | વ,cli
1851 | ઙ,soc
1852 | ઞ,hcoli
1853 | ણ,isclo
1854 | ન,hli
1855 | મ,hli
1856 | શ,ocl
1857 | ឦ,nil
1858 | ឧ,soc
1859 | ឩ,soc
1860 | ឪ,soc
1861 | ឫ,yu
1862 | ឭ,nm
1863 | ឮ,nm
1864 | ឯ,nw
1865 | ឰ,nm
1866 | ឱ,sc
1867 | ឈ,nwu
1868 | ឬ,u
1869 | ទ,co
1870 | ភ,n
1871 | ឌ,soc
1872 | ធ,wu
1873 | អ,hil
1874 | ញ,mn
1875 | គ,an
1876 | ឡ,cigl
1877 | ឍ,miln
1878 | ឃ,wu
1879 | ជ,wu
1880 | ព,nm
1881 | ណ,mnw
1882 | ៕,il
1883 | ឆ,inoc
1884 | រ,sil
1885 | ត,n
1886 | យ,wu
1887 | ឥ,n
1888 | ឲ,co
1889 | ផ,wu
1890 | ស,snu
1891 | ដ,wu
1892 | ថ,uoc
1893 | ង,wo
1894 | ហ,sun
1895 | ក,n
1896 | ល,sun
1897 | ឋ,u
1898 | ខ,soc
1899 | ច,uos
1900 | វ,sil
1901 | ប,u
1902 | ន,soc
1903 | ម,ua
1904 | ៗ,jcoil
1905 | ។,li
1906 | ക,O
1907 | ച,Ilo
1908 | ട,Sc
1909 | ത,mo
1910 | പ,nUlI
1911 | യ,weoc
1912 | ഷ,uwon
1913 | ഖ,nuo
1914 | ഛ,eocn
1915 | ഥ,bno
1916 | ഫ,bno
1917 | ര,neoc
1918 | സ,mun
1919 | ഗ,soc
1920 | ജ,Bo
1921 | ഡ,nuwB
1922 | ദ,BoR
1923 | ബ,num
1924 | ല,eu
1925 | ഹ,no
1926 | ഘ,eu
1927 | ഝ,ewun
1928 | ഢ,wun
1929 | ധ,wun
1930 | ഭ,BRCF
1931 | വ,nu
1932 | ങ,BRC
1933 | ഞ,mno
1934 | ണ,em
1935 | ന,mn
1936 | മ,o
1937 | ശ,ueo
1938 | ꯀ,wIl
1939 | ꯁ,mo
1940 | ꯂ,co
1941 | ꯃ,Hc
1942 | ꯄ,MCIl
1943 | ꯅ,Co
1944 | ꯆ,AHC
1945 | ꯇ,co
1946 | ꯈ,ho
1947 | ꯉ,OIl
1948 | ꯊ,TwuIl
1949 | ꯋ,AHn
1950 | ꯌ,oRc
1951 | ꯍ,SIlT
1952 | ꯎ,msco
1953 | ꯏ,hso
1954 | ꯐ,E
1955 | ꯑ,CIlO
1956 | ꯒ,Tn
1957 | ꯓ,cN
1958 | ꯔ,AuoH
1959 | ꯕ,oY
1960 | ꯖ,c
1961 | ꯗ,hco
1962 | ꯘ,hcT
1963 | ꯙ,JTIl
1964 | ꯚ,JTIl
1965 | ꯛ,WTUO
1966 | ꯜ,FIl
1967 | ꯝ,FAT
1968 | ꯞ,MTCIl
1969 | ꯟ,Co
1970 | ꯠ,Yo
1971 | ꯡ,WOTIl
1972 | ꯢ,hoc
1973 | ꯫,Il
1974 | ᱕,cIl
1975 | ᱚ,DoCe
1976 | ᱟ,DoCe
1977 | ᱤ,AvIl
1978 | ᱩ,bc9
1979 | ᱮ,Sco
1980 | ᱳ,CO
1981 | ᱽ,lIeo
1982 | ᱑,Il
1983 | ᱖,eC
1984 | ᱛ,oO
1985 | ᱠ,bo
1986 | ᱥ,KvYcU
1987 | ᱪ,no
1988 | ᱯ,On
1989 | ᱴ,noIl
1990 | ᱾,Il
1991 | ᱒,cS
1992 | ᱗,cS
1993 | ᱜ,COE
1994 | ᱡ,UNDO,
1995 | ᱦ,wUO
1996 | ᱰ,OIl
1997 | ᱵ,OIl
1998 | ᱿,Il
1999 | ᱓,Ce
2000 | ᱘,eC
2001 | ᱝ,KEOC
2002 | ᱢ,UNDO
2003 | ᱧ,qpcoIl
2004 | ᱬ,MOC
2005 | ᱱ,YKC
2006 | ᱶ,ouci
2007 | ।,Il
2008 | ᱔,CG
2009 | ᱙,CG
2010 | ᱞ,pqcoIl
2011 | ᱣ,oucIDl
2012 | ᱨ,DIlCO
2013 | ᱭ,cno
2014 | ᱲ,COK
2015 | ᱷ,wou
2016 | ሏ,Iyl
2017 | ሗ,mIl
2018 | ሟ,oT
2019 | ሧ,Tmun
2020 | ሯ,TC
2021 | ሷ,ACO
2022 | ቈ,oIlP
2023 | ቧ,AC0
2024 | ቯ,ACO
2025 | ቷ,TlIH
2026 | ቿ,HIlT
2027 | ኈ,Il
2028 | ኗ,TC
2029 | ኟ,TC
2030 | ኧ,YKTn
2031 | ኰ,n
2032 | ዀ,n
2033 | ወ,pDo
2034 | ዟ,HIl
2035 | ዧ,HIl
2036 | የ,Pqoe
2037 | ዷ,oec
2038 | ጇ,Toe
2039 | ጐ,Il
2040 | ጧ,mnT
2041 | ጯ,bmnT
2042 | ጷ,o
2043 | ጿ,o
2044 | ፀ,OD
2045 | ፏ,Tq
2046 | ፗ,TIl
2047 | අ,DPqOc
2048 | ආ,DPqOc
2049 | ඇ,DPqOc
2050 | ඈ,DPqOcE
2051 | ඉ,o
2052 | ඊ,o
2053 | උ,CO
2054 | ඌ,CO
2055 | එ,GcO
2056 | ඒ,GcO
2057 | ඔ,wO
2058 | ඕ,wO
2059 | ඖ,wO
2060 | අ,Dqpo
2061 | ක,oc
2062 | ඛ,Cne
2063 | ග,oc
2064 | ඞ,wA
2065 | ච,GO
2066 | ඡ,obC
2067 | ජ,obC
2068 | ඣ,wOD
2069 | ඤ,cw
2070 | ට,GcOD
2071 | ඪ,wO
2072 | ණ,cO
2073 | ත,o
2074 | ථ,GODc
2075 | ද,co
2076 | ධ,wAGs
2077 | න,Do
2078 | ප,oC
2079 | ඵ,ocD
2080 | බ,Ae
2081 | භ,coD
2082 | ම,GOD
2083 | ය,woEB
2084 | ර,ob
2085 | ල,CO
2086 | ව,DGO
2087 | ශ,ocD
2088 | ෂ,oC
2089 | ස,wB
2090 | හ,Do
2091 | ๅ,Il
2092 | ฃ,uDIl
2093 | ภ,nIl
2094 | ถ,nLI
2095 | ค,cp
2096 | ต,Mqn
2097 | จ,Dqn
2098 | ข,DUIl
2099 | ช,UDIl
2100 | ๆ,Il
2101 | ไ,bIl
2102 | ำ,Il
2103 | พ,wIlNM
2104 | ร,Pq
2105 | น,uN
2106 | ย,uIDl
2107 | บ,uIlD
2108 | ล,aDIl
2109 | ฟ,wIlM
2110 | ห,VNu
2111 | ก,no
2112 | ด,qno
2113 | เ,bIl
2114 | า,Il
2115 | ส,aDIl
2116 | ว,Ilb
2117 | ง,vuIl
2118 | ป,DIql
2119 | แ,bIl
2120 | อ,aGO
2121 | ท,Nvu
2122 | ม,uIl
2123 | ใ,bIl
2124 | ฝ,NwIl
2125 | ๑,oc
2126 | ๒,mueIl
2127 | ๓,mbOc
2128 | ๔,cDeO
2129 | ฿,BIl
2130 | ๕,cDeo
2131 | ๖,oeD
2132 | ๗,mIl
2133 | ๘,wD
2134 | ๙,nKo
2135 | ๐,o
2136 | ฎ,IlDO
2137 | ฑ,NIl
2138 | ธ,bSo
2139 | ณ,uIl
2140 | ฯ,Il
2141 | ญ,ulIn
2142 | ฐ,Sbo
2143 | ฤ,nIl
2144 | ฆ,uIlN
2145 | ฏ,OIlD
2146 | โ,bIl
2147 | ฌ,nIl
2148 | ษ,uIl
2149 | ศ,npbo
2150 | ซ,uD
2151 | ฅ,npm
2152 | ฉ,aqnO
2153 | ฮ,OpDCG
2154 | ฒ,mIlq
2155 | ฬ,WIlN
2156 | ฦ,Il
2157 | 堬,J
2158 | 征,EIF
2159 | ᄃ,co
2160 | ᄋ,o
2161 | ᄁ,I
2162 | 杤,hJF
2163 | ᄆ,o
2164 | ᢸ,bo
2165 | 瑅,E
2166 | ᄀ,I
2167 | ਧ,pqoT
2168 | 绯,E
2169 | ᄄ,co
2170 | ᄑ,oc
2171 | እ,y
2172 | ᄐ,Eco
2173 | ᄂ,L
2174 | ᄊ,AUM
2175 | ᣑ,poc
2176 | ᄎ,tl
2177 | ꐂ,IX
2178 | 籽,Jt
2179 | 䋇,Ro
2180 | ᄍ,Mu
2181 | 㞯,ScOw
2182 | ᄉ,nA
2183 | ꑧ,wcu
2184 | Ⴭ,Z
2185 | 牨,n
2186 | 䘨,x
2187 | ᄇ,upA
2188 | ꡙ,So
2189 | 䖎,B
2190 | 宎,T
2191 | 姹,F
2192 | 攻,x
2193 | ᣪ,pq
2194 | ॼ,co
2195 | ᄏ,F
2196 | ቺ,TIl
2197 | 佋,AJ
2198 | 壬,TFE
2199 | ᄒ,o
2200 | ᝦ,JT
2201 | ᄈ,wuA
2202 | ⶵ,TH
2203 | 訊,Al
2204 | ퟁ,Il
2205 | ᄌ,An
2206 | 丒,x
2207 | 笿,Ax
2208 | ᄅ,Sc
2209 | 抠,cx
2210 | 盻,S
2211 | ᐘ,voD
2212 | ꏶ,FIt
2213 | 校,x
2214 | ᔮ,Lbi
2215 | 蚇,Ro
2216 | ᥭ,ucD
2217 | ꦦ,wnu
2218 | ꗫ,CI
2219 | 跍,tAo
2220 | 归,E
2221 | ᢶ,v
2222 | 놩,o
2223 | 믘,oF
2224 | 炬,EoC
2225 | ꗂ,Sf
2226 | 컴,oI
2227 | 탱,oHE
2228 | 죜,IF
2229 | ꕨ,IMIT
2230 | 핵,H
2231 | 䩕,po
2232 | ꢄ,uBDo
2233 | ᝐ,I
2234 | 況,o
2235 | 巵,e
2236 | ⲛ,nv
2237 | ꄆ,ZNX
2238 | ꚑ,T
2239 | ᰀ,EB
2240 | ꢚ,mo
2241 | ꊌ,ZIF
2242 | Ꮧ,Il
2243 | 욺,o
2244 | 므,o
2245 | ᤝ,hn
2246 | ꃟ,YI
2247 | ㅞ,IT
2248 | Შ,aoc
2249 | 䢾,B
2250 | ꡞ,bnoC
2251 | Ⲯ,YIv
2252 | 肛,I
2253 | 囡,ox
2254 | 믱,oI
2255 | Ⴡ,I
2256 | ꮶ,K
2257 | ꀎ,uo
2258 | 탦,EHO
2259 | ᭈ,uy
2260 | 짿,cH
2261 | 紆,TF
2262 | ㆿ,o
2263 | ྉ,mc
2264 | ꖐ,oH
2265 | ⳳ,BDo
2266 | 망,oI
2267 | ꌻ,fO
2268 | 뚘,c
2269 | 쵃,H
2270 | ꢜ,Soc
2271 | ᒝ,lu
2272 | 罓,x
2273 | ꮬ,So
2274 | ꖍ,Il
2275 | 蔀,B
2276 | ꭙ,xy
2277 | 걭,E
2278 | 믌,o
2279 | 炉,Ao
2280 | 區,Co
2281 | ཪ,H
2282 | ひ,uo
2283 | ᒵ,iLJ
2284 | Ꮞ,aI
2285 | 䆰,co
2286 | ᶭ,wuI
2287 | Ⳙ,u
2288 | 쌬,H
2289 | ᔤ,S
2290 | ꃩ,I
2291 | ಋ,Doc
2292 | 뮽,oE
2293 | 䑩,At
2294 | 听,TF
2295 | ꔻ,o
2296 | 䡄,n
2297 | 冗,n
2298 | ꕀ,wum
2299 | ォ,tI
2300 | 짐,oI
2301 | 生,EI
2302 | ꖹ,UO
2303 | 쨛,c
2304 | 厸,bGA
2305 | ᕒ,EW
2306 | 夙,n
2307 | 땀,o
2308 | ఞ,Bm
2309 | ᖼ,vI
2310 | ሿ,nA
2311 | ꌠ,HI
2312 | ꡤ,V
2313 | ዳ,eo
2314 | ⲱ,wDo
2315 | ꌙ,If
2316 | ᶐ,qoD
2317 | ㆨ,XE
2318 | ᙵ,l
2319 | ꃞ,F
2320 | ꂬ,woI
2321 | ꈅ,LC
2322 | ꏼ,d
2323 | ᕕ,Av
2324 | 正,ED
2325 | Ⱝ,ID
2326 | ባ,nl
2327 | ᆿ,F
2328 | Ⱒ,O
2329 | ꖈ,wu
2330 | ᔘ,lS
2331 | ㄐ,uYI
2332 | ফ,BT
2333 | 冱,oS
2334 | ᙒ,BwD
2335 | 耓,TI
2336 | ꡖ,uO
2337 | ᝨ,TI
2338 | 틐,E
2339 | 鿽,O
2340 | 犴,TF
2341 | 巾,Top
2342 | ⲫ,oI
2343 | ᢍ,Buv
2344 | 㠮,I
2345 | ⳮ,xH
2346 | ಫ,wT
2347 | ॐ,B
2348 | 㧄,ULI
2349 | ᛙ,I
2350 | ꮾ,boD
2351 | 业,UI
2352 | ⵦ,vc
2353 | ꪱ,I
2354 | 鿈,Ro
2355 | ⶌ,eco
2356 | ጒ,IL
2357 | ᤎ,BEo
2358 | Ⱃ,bo
2359 | 丈,X
2360 | ゐ,bo
2361 | 纴,E
2362 | ᝯ,co
2363 | ꀯ,c
2364 | ꚅ,KB
2365 | డ,woD
2366 | ዤ,Hb
2367 | ᶴ,SI
2368 | 伬,RO
2369 | ሌ,bnI
2370 | ᵓ,cou
2371 | ꭖ,x
2372 | ꏁ,BE
2373 | 돋,c
2374 | ꡈ,cE
2375 | 드,co
2376 | ဉ,po
2377 | ጋ,DI
2378 | ᆡ,I
2379 | ⱚ,o
2380 | ᏸ,B
2381 | ꁜ,C
2382 | ᓎ,pob
2383 | ꈜ,s
2384 | ꁭ,I
2385 | ꈫ,x
2386 | ᬌ,up
2387 | ꫳ,ob
2388 | ᜱ,VU
2389 | ᓃ,bo
2390 | ꑱ,bdco
2391 | ᘣ,bnoc
2392 | ꮞ,AI
2393 | ਯ,qpO
2394 | ꀸ,dbco
2395 | ᘉ,mIl
2396 | ፘ,Tu
2397 | ᒠ,lJu
2398 | Ⰳ,oV
2399 | ꮪ,c
2400 | ᘏ,ne
2401 | ຽ,Je
2402 | ᝋ,oA
2403 | ᶅ,l
2404 | ᚵ,YI
2405 | Ⱘ,CE
2406 | ꎧ,H
2407 | Ჟ,J
2408 | 亅,JIl
2409 | ⱟ,O
2410 | ꬓ,H
2411 | ꎾ,c
2412 | ጴ,on
2413 | Ⅰ,I
2414 | ᨻ,mo
2415 | ழ,e
2416 | ꮺ,u
2417 | ⵯ,u
2418 | ꌬ,t
2419 | ꨟ,u
2420 | ꆽ,J
2421 | 쇁,E
2422 | ᘈ,wu
2423 | ꐲ,yw
2424 | え,h
2425 | ದ,BwO
2426 | ᡙ,um
2427 | ꊁ,N
2428 | ꋁ,O
2429 | Ꭾ,p
2430 | ᯉ,o
2431 | ᯖ,x
2432 | ᮘ,DO
2433 | ང,c
2434 | ꏮ,wu
2435 | ሦ,w
2436 | ꨄ,m
2437 | ꗘ,PB
2438 | ꆀ,d
2439 | ꎞ,dE
2440 | ၽ,ue
2441 | ᣎ,u
2442 | ᑩ,cu
2443 | ꚴ,VU
2444 | Ꮦ,bTo
2445 | ꜟ,i
2446 | Ჽ,S
2447 | ⲧ,T
2448 | ꛔ,m
2449 | ᒳ,L
2450 | ꛕ,c
2451 | ত,b
2452 | ኮ,n
2453 | ᣩ,R
2454 | ꕡ,O
2455 | ᶒ,e
2456 | ⴛ,ch
2457 | ᯜ,S
2458 | ዹ,e
2459 | ꃎ,IH
2460 | ᜠ,v
2461 | ᦸ,eo
2462 | ꅧ,o
2463 | ㄮ,c
2464 | ꄠ,I
2465 | Ქ,co
2466 | ቫ,n
2467 | Ⅺ,XI
2468 | Ⅳ,IV
2469 | ᚮ,I
2470 | ꋢ,o
2471 | ⷓ,n
2472 | ꌚ,S
2473 | ፎ,L
2474 | ꉧ,C
2475 | ꈚ,H
2476 | Ჹ,o
2477 | ꨢ,m
2478 | ᯢ,I
2479 | ꔟ,m
2480 | ᘆ,lJc
2481 | ꎜ,H
2482 | ቪ,n
2483 | ᮎ,u
2484 | ᢋ,T
2485 | ᴽ,o
2486 | ꏾ,X
2487 | ᤃ,yo
2488 | ꍹ,L
2489 | ᖓ,lJ
2490 | ꔤ,I
2491 | Კ,uB
2492 | ⅸ,ix
2493 | ᗛ,Ev
2494 | ꕐ,w
2495 | ꊗ,wu
2496 | ப,u
2497 | ᦎ,o
2498 | ವ,wT
2499 | ᛡ,xI
2500 | れ,n
2501 | ꎇ,TZI
2502 | ꅔ,wOA
2503 | ꇋ,I
2504 | ㅂ,A
2505 | ᔠ,S
2506 | ᏼ,B
2507 | Ꝡ,W
2508 | ᛇ,I
2509 | ꘆ,w
2510 | ᜫ,x
2511 | ਘ,w
2512 | ໞ,n
2513 | ᜩ,Vk
2514 | ㄎ,S
2515 | ꝡ,w
2516 | ꃺ,x
2517 | ᮖ,u
2518 | ఴ,we
2519 | ᙃ,D
2520 | ཎ,Tp
2521 | ꋑ,db
2522 | ᑞ,co
2523 | ኛ,T
2524 | କ,m
2525 | ꏻ,I
2526 | ⷋ,n
2527 | ಆ,ue
2528 | এ,Do
2529 | ᖃ,bo
2530 | ꞝ,BDo
2531 | ᗈ,Av
2532 | ඬ,wO
2533 | ꃼ,u
2534 | ᬥ,wu
2535 | ꕺ,o
2536 | ᒛ,ji
2537 | ၾ,ue
2538 | ᮃ,Bo
2539 | Ꮻ,o
2540 | ቱ,T
2541 | ꨁ,m
2542 | ꛭ,p
2543 | ꛯ,S
2544 | ᅬ,I
2545 | ⴚ,n
2546 | ᴝ,nu
2547 | ꘪ,HM
2548 | ዯ,po
2549 | Ⱎ,w
2550 | ው,Do
2551 | ꄭ,L
2552 | ハ,I
2553 | ꥹ,E
2554 | ㅊ,t
2555 | ᖘ,p
2556 | ꧻ,o
2557 | ꭳ,no
2558 | ᚲ,c
2559 | ꛑ,C
2560 | ꀂ,uA
2561 | ቘ,T
2562 | ら,bo
2563 | ᵨ,p
2564 | ᛓ,I
2565 | ꚕ,h
2566 | Ꮺ,u
2567 | Ꮽ,eo
2568 | ꚜ,b
2569 | ꗔ,F
2570 | ᒆ,p
2571 | ꖝ,o
2572 | ၰ,uc
2573 | ᣐ,nb
2574 | ᐽ,A
2575 | ꚛ,o
2576 | ꏐ,X
2577 | ꛄ,p
2578 | ꮼ,u
2579 | Ⱈ,b
2580 | ꘐ,n
2581 | ꧤ,ce
2582 | ꋙ,X
2583 | ൈ,n
2584 | ᥠ,da
2585 | ꨖ,u
2586 | ਥ,qp
2587 | ଢ,n
2588 | ힿ,H
2589 | ᑋ,h
2590 | ᘧ,bco
2591 | ᡘ,n
2592 | ꠑ,bo
2593 | ፃ,p
2594 | ꖡ,w
2595 | ㄕ,P
2596 | ꩪ,o
2597 | ᰜ,So
2598 | ᠼ,T
2599 | ግ,I
2600 | ꕪ,c
2601 | உ,b
2602 | ㅟ,T
2603 | ꘃ,wI
2604 | ꨤ,m
2605 | ꚉ,Ao
2606 | ठ,o
2607 | 〹,H
2608 | ꄩ,E
2609 | ꎣ,o
2610 | ꂧ,u
2611 | ꬴ,e
2612 | ጡ,m
2613 | ꩱ,cn
2614 | ꧾ,oc
2615 | ᘺ,wu
2616 | ᦒ,wo
2617 | ⴶ,I
2618 | ⶰ,H
2619 | Ʞ,K
2620 | ꖊ,o
2621 | ꆰ,O
2622 | ጃ,e
2623 | ᴤ,Sc
2624 | ᙶ,i
2625 | よ,d
2626 | ᐃ,o
2627 | ဩ,co
2628 | ꑥ,o
2629 | ꝵ,r
2630 | ඎ,a
2631 | ಶ,oD
2632 | ꁥ,Dqo
2633 | ᆷ,o
2634 | ⷎ,n
2635 | ⳤ,K
2636 | 이,IO
2637 | ꬾ,D
2638 | ケ,T
2639 | コ,co
2640 | ꬒ,H
2641 | ഴ,pu
2642 | ඨ,w
2643 | ᳬ,u
2644 | ཬ,I
2645 | ઇ,BD
2646 | ᕪ,ce
2647 | ꁖ,O
2648 | ꃡ,u
2649 | Ꮾ,ob
2650 | ꏵ,U
2651 | ꄇ,A
2652 | ꕙ,B
2653 | ᢗ,c
2654 | ᓪ,J
2655 | ⷉ,n
2656 | ວ,J
2657 | Ჱ,B
2658 | ᓕ,lJ
2659 | ᰟ,o
2660 | ၡ,q
2661 | ᬏ,n
2662 | く,cV
2663 | ᗖ,VA
2664 | ኡ,hn
2665 | ᢳ,o
2666 | ᗵ,w
2667 | ఇ,B
2668 | ꂚ,N
2669 | ꀩ,m
2670 | ⷕ,n
2671 | ꊢ,O
2672 | ቲ,T
2673 | ᙯ,q
2674 | ꑑ,H
2675 | ຝ,w
2676 | ᒌ,n
2677 | Ნ,bo
2678 | ꖅ,I
2679 | ꁱ,n
2680 | ᐹ,v
2681 | ꗙ,BP
2682 | ꋗ,N
2683 | ሪ,l
2684 | ᥴ,c
2685 | ᡏ,Io
2686 | ꈲ,O
2687 | ㅿ,o
2688 | ꭏ,u
2689 | ଟ,c
2690 | ꈮ,o
2691 | ᯡ,o
2692 | ຜ,w
2693 | Ꮁ,I
2694 | ᵫ,ue
2695 | ㆤ,t
2696 | ⱊ,n
2697 | ᥒ,n
2698 | ꘒ,E
2699 | ꌤ,u
2700 | ዏ,B
2701 | Ꝙ,Q
2702 | ᤉ,E
2703 | ᓬ,E
2704 | Ⴝ,S
2705 | ꭜ,h
2706 | ள,n
2707 | ヶ,T
2708 | ឨ,S
2709 | ᥑ,a
2710 | Რ,m
2711 | ᖾ,I
2712 | መ,D
2713 | ꬖ,H
2714 | ᨄ,p
2715 | ꋡ,B
2716 | Ჯ,J
2717 | ᯊ,y
2718 | ᨃ,F
2719 | ᚳ,I
2720 | ꕽ,w
2721 | ꠕ,M
2722 | ᡗ,n
2723 | ꘋ,b
2724 | ᔕ,S
2725 | ヘ,L
2726 | ᨄ,P
2727 | ꤠ,M
2728 | ਜ,H
2729 | ᜣ, p
2730 | 
2731 | ᶀ,b
2732 | ᶁ,d
2733 | ᶂ,f
2734 | ᶃ,g
2735 | ᶄ,k
2736 | ᶅ,lbi
2737 | ᶆ,m
2738 | ᶇ,n
2739 | ᶈ,p
2740 | ᶉ,r
2741 | ᶊ,s
2742 | ᶋ,lf
2743 | ᶌ,yv
2744 | ᶍ,x
2745 | ᶎ,z
2746 | ᶏ,a
2747 | ᶐ,a
2748 | ᶑ,d
2749 | ᶒ,e
2750 | ᶓ,E
2751 | ᶔ,e
2752 | ᶕ,e
2753 | ᶖ,i
2754 | ᶗ,po
2755 | ᶘ,fli
2756 | ᶙ,u
2757 | ᶚ,B
2758 | ᶛ,o
2759 | ᶜ,c
2760 | ᶝ,c
2761 | ᶞ,o
2762 | ᶟ,e
2763 | ᶠ,f
2764 | ᶡ,ji
2765 | ᶢ,g
2766 | ᶣ,u
2767 | ᶤ,i
2768 | ᶥ,li
2769 | ᶦ,I
2770 | ᶧ,IE
2771 | ᶨ,ji
2772 | ᶩ,li
2773 | ᶪ,lib
2774 | ᶫ,Li
2775 | ᶬ,m
2776 | ᶭ,u
2777 | ᶮ,n
2778 | ᶯ,n
2779 | ᶰ,N
2780 | ᶱ,oe
2781 | ᶲ,OI
2782 | ᶳ,e
2783 | ᶴ,fli
2784 | ᶵ,tcb
2785 | ᶶ,u
2786 | ᶷ,uo
2787 | ᶸ,uv
2788 | ᶹ,uv
2789 | ᶺ,n
2790 | ᶻ,z
2791 | ᶼ,z
2792 | ᶽ,z
2793 | ᶾ,e
2794 | ᶿ,eo
2795 | 
2796 | ∀,VA
2797 | ∁,C
2798 | ∂,e
2799 | ∃,E
2800 | ∅,o
2801 | ∆,A
2802 | ∈,CE
2803 | ∊,cE
2804 | ∋,E
2805 | ∍,E
2806 | ∎,o
2807 | ∏,N
2808 | ∐,U
2809 | ∑,E
2810 | ∔,t
2811 | ∕,li
2812 | ∖,li
2813 | ∗,o
2814 | ∘,o
2815 | √,v
2816 | ∛,v
2817 | ∜,v
2818 | ∝,co
2819 | ∞,o
2820 | ∟,L
2821 | ∠,c
2822 | ∡,C
2823 | ∢,C
2824 | ∣,li
2825 | ∥,li
2826 | ∧,n
2827 | ∨,vu
2828 | ∩,n
2829 | ∪,uv
2830 | ∫,fli
2831 | ∬,fli
2832 | ∭,fli
2833 | ∮,fli
2834 | ∯,fli
2835 | ∰,fli
2836 | ∱,fli
2837 | ∲,fli
2838 | ∳,fli
2839 | ∴,n
2840 | ∵,vu
2841 | ∶,li
2842 | ∺,H
2843 | ∻,t
2844 | ∾,N
2845 | ∿,N
2846 | ≀,l
2847 | ≅,E
2848 | ≊,E
2849 | ≋,E
2850 | ≌,E
2851 | ≎,o
2852 | ≏,o
2853 | ≑,t
2854 | ≖,o
2855 | ≗,o
2856 | ≘,n
2857 | ≙,nA
2858 | ≚,vu
2859 | ≜,nA
2860 | ≝,def
2861 | ≞,m
2862 | ≡,E
2863 | ≣,E
2864 | ≤,cE
2865 | ≥,cE
2866 | ≦,cE
2867 | ≧,cE
2868 | ≨,cE
2869 | ≩,cE
2870 | ≪,c
2871 | ≫,c
2872 | ≬,o
2873 | ≲,c
2874 | ≳,c
2875 | ≶,c
2876 | ≷,c
2877 | ≺,c
2878 | ≻,c
2879 | ≼,cE
2880 | ≽,cE
2881 | ≾,cE
2882 | ≿,cE
2883 | ⊂,c
2884 | ⊃,c
2885 | ⊆,c
2886 | ⊇,cE
2887 | ⊊,c
2888 | ⊋,c
2889 | ⊌,U
2890 | ⊍,U
2891 | ⊎,U
2892 | ⊏,c
2893 | ⊐,c
2894 | ⊑,c
2895 | ⊒,c
2896 | ⊓,N
2897 | ⊔,U
2898 | ⊕,ot
2899 | ⊖,o
2900 | ⊗,ox
2901 | ⊘,o
2902 | ⊙,o
2903 | ⊚,o
2904 | ⊛,o
2905 | ⊜,o
2906 | ⊝,o
2907 | ⊞,ot
2908 | ⊟,o
2909 | ⊠,ox
2910 | ⊡,o
2911 | ⊢,tli
2912 | ⊣,tli
2913 | ⊤,Tli
2914 | ⊥,Tli
2915 | ⊦,li
2916 | ⊧,lic
2917 | ⊨,c
2918 | ⊩,H
2919 | ⊪,H
2920 | ⊫,Hc
2921 | ⊰,c
2922 | ⊱,E
2923 | ⊲,c
2924 | ⊳,c
2925 | ⊴,ce
2926 | ⊵,ce
2927 | ⊶,o
2928 | ⊷,o
2929 | ⊸,o
2930 | ⊹,t
2931 | ⊺,Til
2932 | ⊻,v
2933 | ⊼,n
2934 | ⊽,v
2935 | ⊾,o
2936 | ⊿,o
2937 | ⋀,N
2938 | ⋁,VU
2939 | ⋂,N
2940 | ⋃,U
2941 | ⋄,o
2942 | ⋆,o
2943 | ⋇,XO
2944 | ⋈,X
2945 | ⋉,CX
2946 | ⋊,X
2947 | ⋋,n
2948 | ⋌,Cn
2949 | ⋎,vu
2950 | ⋏,n
2951 | ⋐,c
2952 | ⋑,c
2953 | ⋒,n
2954 | ⋓,u
2955 | ⋔,hn
2956 | ⋕,H
2957 | ⋖,c
2958 | ⋗,c
2959 | ⋘,c
2960 | ⋙,c
2961 | ⋚,EcS
2962 | ⋛,Ec
2963 | ⋜,Ec
2964 | ⋝,Ec
2965 | ⋞,Ec
2966 | ⋤,Ec
2967 | ⋥,Ec
2968 | ⋦,Ec
2969 | ⋧,Ec
2970 | ⋨,Ec
2971 | ⋩,Ec
2972 | ⋮,li
2973 | ⋲,E
2974 | ⋳,E
2975 | ⋴,E
2976 | ⋵,E
2977 | ⋶,E
2978 | ⋷,E
2979 | ⋸,E
2980 | ⋹,E
2981 | ⋺,E
2982 | ⋻,E
2983 | ⋼,E
2984 | ⋽,E
2985 | ⋾,E
2986 | ⋿,E
2987 | 
2988 | ⌀,O
2989 | ⌂,A
2990 | ⌃,n
2991 | ⌄,uv
2992 | ⌅,n
2993 | ⌆,n
2994 | ⌇,l
2995 | ⌈,li
2996 | ⌉,li
2997 | ⌊,li
2998 | ⌋,li
2999 | ⌐,r
3000 | ⌑,O
3001 | ⌒,n
3002 | ⌓,no
3003 | ⌕,o
3004 | ⌖,o
3005 | ⌗,H
3006 | ⌘,H
3007 | ⌫,X
3008 | ⌭,Ho
3009 | ⌮,o
3010 | ⌴,U
3011 | ⌵,VU
3012 | ⌶,I
3013 | ⌹,t
3014 | ⌺,o
3015 | ⌻,o
3016 | ⌼,O
3017 | ⌽,OL
3018 | ⌾,O
3019 | ⌿,Lt
3020 | ⍃,c
3021 | ⍄,c
3022 | ⍅,lt
3023 | ⍆,lt
3024 | ⍉,O
3025 | ⍊,li
3026 | ⍋,A
3027 | ⍌,V
3028 | ⍍,A
3029 | ⍎,ol
3030 | ⍏,t
3031 | ⍐,tli
3032 | ⍑,T
3033 | ⍒,Al
3034 | ⍓,An
3035 | ⍔,V
3036 | ⍕,T
3037 | ⍖,t
3038 | ⍗,il
3039 | ⍘,i
3040 | ⍙,Ao
3041 | ⍚,o
3042 | ⍛,o
3043 | ⍜,O
3044 | ⍝,NA
3045 | ⍟,O
3046 | ⍡,T
3047 | ⍢,o
3048 | ⍤,o
3049 | ⍥,O
3050 | ⍦,w
3051 | ⍧,Cil
3052 | ⍫,A
3053 | ⍬,O
3054 | ⍭,Lt
3055 | ⍱,AV
3056 | ⍲,An
3057 | ⍳,il
3058 | ⍴,p
3059 | ⍵,w
3060 | ⍶,a
3061 | ⍷,E
3062 | ⍸,li
3063 | ⍹,w
3064 | ⍺,a
3065 | ⍼,L
3066 | ⍽,u
3067 | ⍾,n
3068 | ⍿,lo
3069 | ⎀,av
3070 | ⎁,a
3071 | ⎂,a
3072 | ⎃,a
3073 | ⎄,DO
3074 | ⎅,o
3075 | ⎈,o
3076 | ⎉,o
3077 | ⎊,o
3078 | ⎋,o
3079 | ⎍,n
3080 | ⎎,f
3081 | ⎏,o
3082 | ⎐,o
3083 | ⎑,o
3084 | ⎒,p
3085 | ⎔,O
3086 | ⎕,O
3087 | ⎛,li
3088 | ⎜,li
3089 | ⎝,li
3090 | ⎞,li
3091 | ⎟,li
3092 | ⎠,lij
3093 | ⎡,li
3094 | ⎢,li
3095 | ⎣,li
3096 | ⎤,li
3097 | ⎥,li
3098 | ⎦,lij
3099 | ⎧,li
3100 | ⎨,li
3101 | ⎩,li
3102 | ⎪,li
3103 | ⎫,li
3104 | ⎬,li
3105 | ⎭,lij
3106 | ⎮,li
3107 | ⎰,lif
3108 | ⎱,li
3109 | ⎲,c
3110 | ⎳,c
3111 | ⎴,n
3112 | ⎵,u
3113 | ⎶,h
3114 | ⎷,v
3115 | ⎸,li
3116 | ⎹,li
3117 | ⎾,li
3118 | ⎿,Li
3119 | ⏀,OI
3120 | ⏁,TO
3121 | ⏂,O
3122 | ⏃,AIl
3123 | ⏄,AT
3124 | ⏅,A
3125 | ⏆,tIl
3126 | ⏇,TF
3127 | ⏈,t
3128 | ⏉,T
3129 | ⏊,Li
3130 | ⏋,iL
3131 | ⏌,ilJ
3132 | ⏍,O
3133 | ⏏,A
3134 | ⏐,li
3135 | ⏑,u
3136 | ⏒,uo
3137 | ⏓,u
3138 | ⏔,w
3139 | ⏕,w
3140 | ⏖,w
3141 | ⏘,u
3142 | ⏙,w
3143 | ⏜,n
3144 | ⏝,u
3145 | ⏞,mn
3146 | ⏟,wu
3147 | ⏠,n
3148 | ⏡,u
3149 | ⏣,O
3150 | ⏥,O
3151 | ⏰,O
3152 | ⏱,O
3153 | ⏲,O
3154 | ⏳,H
3155 | ⏴,C
3156 | ⏵,D
3157 | ⏶,n
3158 | ⏷,u
3159 | ⏸,LI
3160 | ⏺,O
3161 | ⏻,Oli
3162 | ⏼,Oli
3163 | ⏽,LI
3164 | ⏾,C
3165 | ⏿,CO
3166 | ⠇,Il
3167 | ⠏,IL
3168 | ⠓,L
3169 | ⠛,o
3170 | ⠜,J
3171 | ⠝,J
3172 | ⠟,P
3173 | ⠣,l
3174 | ⠦,L
3175 | ⠧,L
3176 | ⠪,C
3177 | ⠯,C
3178 | ⠶,o
3179 | ⠷,b
3180 | ⠸,IL
3181 | ⠻,q
3182 | ⠼,L
3183 | ⠽,C
3184 | ⠾,b
3185 | ⡅,i
3186 | ⡆,I
3187 | ⡇,IL
3188 | ⡟,P
3189 | ⡪,S
3190 | ⡫,S
3191 | ⡱,DC
3192 | ⡷,D
3193 | ⡸,J
3194 | ⡹,J
3195 | ⡺,t
3196 | ⢗,t
3197 | ⢨,i
3198 | ⢸,IL
3199 | ⢻,q
3200 | ⢽,F
3201 | ⡯,F
3202 | ⢾,D
3203 | ⣇,L
3204 | ⣏,C
3205 | ⣒,o
3206 | ⣖,c
3207 | ⣗,t
3208 | ⣤,o
3209 | ⣫,S
3210 | ⣪,S
3211 | ⣲,c
3212 | ⣸,L
3213 | ⣹,C
3214 | ⣺,t
3215 | ⣼,b
3216 | 
3217 | ﹀,uV
3218 | ︾,uV
3219 | ︿,n
3220 | ︽,n
3221 | 「,c
3222 | 『,c
3223 | 」,i
3224 | 』,i
3225 | ŧ,ti
3226 | ○,O
3227 | ◯,O
3228 | ◻,O
3229 | ◻,O
3230 | ◷,O
3231 | ◶,O
3232 | ◵,O
3233 | ◴,O
3234 | ◦,o
3235 | ◎,O
3236 | ◍,O
3237 | ◌,O
3238 | ◉,O
3239 | ▢,O
3240 | 
3241 | \u{0627},li
3242 | ㄨ,x
3243 | უ,g
3244 | ᙀ,U
3245 | 
3246 | ሀ,vu
3247 | ሁ,vu
3248 | ሂ,yli
3249 | ሃ,y
3250 | ሄ,yb
3251 | ህ,U
3252 | ሆ,UP
3253 | ሇ,UP
3254 | ለ,n
3255 | ሉ,n
3256 | ሊ,n
3257 | ላ,n
3258 | ሌ,nb
3259 | ል,nbA
3260 | ሎ,no
3261 | ሏ,i1
3262 | ሐ,mh
3263 | ሑ,mh
3264 | ሒ,mh
3265 | ሓ,mh
3266 | ሔ,mhb
3267 | ሕ,mhi
3268 | ሖ,mh
3269 | ሗ,b
3270 | መ,ao
3271 | ሙ,ao
3272 | ሚ,aq
3273 | ማ,aq
3274 | ሜ,aqb
3275 | ም,qo
3276 | ሞ,qo
3277 | ሟ,aq
3278 | ሠ,uvw
3279 | ሡ,uvw
3280 | ሢ,uw
3281 | ሣ,uw
3282 | ሤ,ubw
3283 | ሥ,w
3284 | ሦ,wuv
3285 | ሧ,w
3286 | ረ,CG
3287 | ሩ,ciG
3288 | ሪ,cG
3289 | ራ,CG
3290 | ሬ,CG
3291 | ር,CG
3292 | ሮ,CG
3293 | ሯ,CI
3294 | ሰ,n
3295 | ሱ,n
3296 | ሲ,n
3297 | ሳ,n
3298 | ሴ,nb
3299 | ስ,n
3300 | ሶ,n
3301 | ሷ,I
3302 | ሸ,nIH
3303 | ሹ,nI
3304 | ሺ,nI
3305 | ሻ,nI
3306 | ሼ,nbI
3307 | ሽ,nI
3308 | ሾ,nI
3309 | ሿ,I
3310 | ቀ,Ilo
3311 | ቁ,Ilo
3312 | ቂ,Ilo
3313 | ቃ,Jo
3314 | ቄ,Ilob
3315 | ቅ,Ilo
3316 | ቆ,Ilo
3317 | ቇ,Ilo
3318 | ቈ,Ilob
3319 | ቊ,Ilo
3320 | ቋ,Ilo
3321 | ቌ,IloB
3322 | ቍ,Ilo
3323 | ቐ,Tlo
3324 | ቑ,Tlo
3325 | ቒ,TloE
3326 | ቓ,Jo
3327 | ቓ,Jo
3328 | ቔ,Tlob
3329 | ቕ,To
3330 | ቖ,ilo
3331 | ቘ,Tlo
3332 | ቚ,Tlo
3333 | ቛ,Ilo
3334 | ቜ,Tlob
3335 | ቝ,Tlo
3336 | በ,n
3337 | ቡ,n
3338 | ቢ,n
3339 | ባ,n
3340 | ቤ,nb
3341 | ብ,n
3342 | ቦ,n
3343 | ቧ,In
3344 | ቨ,n
3345 | ቩ,n
3346 | ቪ,n
3347 | ቫ,n
3348 | ቬ,nb
3349 | ቭ,n
3350 | ቮ,n
3351 | ቯ,n
3352 | ተ,Ilt
3353 | ቱ,Ilt
3354 | ቲ,Ilt
3355 | ታ,Jt
3356 | ቴ,bt
3357 | ት,Ilt
3358 | ቶ,it
3359 | ቷ,I
3360 | ቸ,T
3361 | ቹ,E
3362 | ቺ,E
3363 | ቻ,J
3364 | ቼ,tb
3365 | ች,T
3366 | ቾ,i
3367 | ቿ,I
3368 | ኀ,i
3369 | ኁ,i
3370 | ኂ,i
3371 | ኃ,J
3372 | ኄ,b
3373 | ኅ,in
3374 | ኆ,G
3375 | ኇ,G
3376 | ኈ,ib
3377 | ኊ,Hi
3378 | ኋ,i
3379 | ኌ,ib
3380 | ኍ,ui
3381 | ነ,ilb
3382 | ኑ,ilh
3383 | ኒ,il
3384 | ና,G
3385 | ኔ,b
3386 | ን,i
3387 | ኖ,G
3388 | ኗ,IG
3389 | ኘ,T
3390 | ኙ,T
3391 | ኚ,I
3392 | ኛ,GT
3393 | ኜ,Tb
3394 | ኝ,Tb
3395 | ኞ,TG
3396 | ኟ,IG
3397 | አ,nh
3398 | ኡ,nh
3399 | ኢ,nh
3400 | ኣ,nh
3401 | ኤ,nhb
3402 | እ,nh
3403 | ኦ,nh
3404 | ኧ,nh
3405 | ከ,nh
3406 | ኩ,nh
3407 | ኪ,nh
3408 | ካ,nh
3409 | ኬ,nhb
3410 | ክ,nh
3411 | ኮ,nh
3412 | ኯ,nh
3413 | ኰ,noh
3414 | ኲ,nh
3415 | ኳ,no
3416 | ኴ,nbo
3417 | ኵ,n
3418 | ኸ,nhT
3419 | ኹ,nhT
3420 | ኺ,nhT
3421 | ኻ,nT
3422 | ኼ,nhTb
3423 | ኽ,Tnh
3424 | ኾ,Tnh
3425 | ዀ,Tnho
3426 | ዂ,Tnh
3427 | ዃ,Ton
3428 | ዄ,Tonb
3429 | ዅ,Tnh
3430 | ወ,oD
3431 | ዉ,oD
3432 | ዊ,qp
3433 | ዋ,qp
3434 | ዌ,qpB
3435 | ው,oD
3436 | ዎ,qpj
3437 | ዏ,qpj
3438 | ዐ,ouv
3439 | ዑ,ouv
3440 | ዒ,ociq
3441 | ዓ,qio
3442 | ዔ,obq
3443 | ዕ,o
3444 | ዖ,ojp
3445 | ዘ,H
3446 | ዙ,H
3447 | ዚ,H
3448 | ዛ,H
3449 | ዜ,Hb
3450 | ዝ,H
3451 | ዞ,H
3452 | ዟ,HLI
3453 | ዠ,Hf
3454 | ዡ,HF
3455 | ዢ,HC
3456 | ዣ,Hf
3457 | ዤ,Hb
3458 | ዥ,HTf
3459 | ዦ,Hf
3460 | ዧ,HIf
3461 | የ,pi
3462 | ዩ,p
3463 | ዪ,pnh
3464 | ያ,p
3465 | ዬ,hp
3466 | ይ,e
3467 | ዮ,p
3468 | ዯ,P
3469 | ደ,e
3470 | ዱ,eh
3471 | ዲ,eh
3472 | ዳ,ehi
3473 | ዴ,eL
3474 | ድ,e
3475 | ዶ,ep
3476 | ዷ,ei
3477 | ዸ,eip
3478 | ዹ,ephi
3479 | ዺ,ephi
3480 | ዻ,ehi
3481 | ዼ,pie
3482 | ዽ,pie
3483 | ዾ,pe
3484 | ዿ,ehi
3485 | ጀ,Ie
3486 | ጁ,Ie
3487 | ጂ,Ie
3488 | ጃ,Te
3489 | ጄ,Ie
3490 | ጅ,IE
3491 | ጆ,Ie
3492 | ጇ,Ie
3493 | ገ,ni
3494 | ጉ,ni
3495 | ጊ,ni
3496 | ጋ,ceo
3497 | ጌ,nb
3498 | ግ,nli
3499 | ጎ,ni
3500 | ጏ,ni
3501 | ጐ,lon
3502 | ጒ,nHli
3503 | ጓ,nhB
3504 | ጔ,nb
3505 | ጕ,yi
3506 | ጘ,ni
3507 | ጙ,ni
3508 | ጚ,ni
3509 | ጛ,oc
3510 | ጜ,nb
3511 | ጝ,n
3512 | ጞ,ni
3513 | ጟ,In
3514 | ጠ,m
3515 | ጡ,m
3516 | ጢ,m
3517 | ጣ,nm
3518 | ጤ,mb
3519 | ጥ,T
3520 | ጦ,mn
3521 | ጧ,m
3522 | ጨ,mb
3523 | ጩ,mb
3524 | ጪ,mb
3525 | ጫ,mb
3526 | ጬ,mb
3527 | ጭ,Tb
3528 | ጮ,mb
3529 | ጯ,mn
3530 | ጰ,no
3531 | ጱ,no
3532 | ጲ,no
3533 | ጳ,no
3534 | ጴ,nob
3535 | ጵ,no
3536 | ጶ,no
3537 | ጷ,no
3538 | ጸ,no
3539 | ጹ,no
3540 | ጺ,no
3541 | ጻ,no
3542 | ጼ,no
3543 | ጽ,no
3544 | ጾ,no
3545 | ጿ,on
3546 | ፀ,Ao
3547 | ፁ,Ao
3548 | ፂ,qi
3549 | ፃ,qi
3550 | ፄ,ib
3551 | ፅ,oi
3552 | ፆ,pi
3553 | ፇ,pi
3554 | ፈ,bc
3555 | ፉ,ic
3556 | ፊ,cob
3557 | ፋ,ic
3558 | ፌ,bo
3559 | ፍ,Gi
3560 | ፎ,Cb
3561 | ፏ,It
3562 | ፐ,T
3563 | ፑ,TF
3564 | ፒ,TC
3565 | ፓ,TJ
3566 | ፔ,Tb
3567 | ፕ,T
3568 | ፖ,JT
3569 | ፗ,TI
3570 | ፘ,C
3571 | ፙ,oq
3572 | ፚ,Cb
3573 | ፠,xo
3574 | ፡,i
3575 | ።,o
3576 | ፣,i
3577 | ፤,i
3578 | ፥,i
3579 | ፦,ri
3580 | ፧,li
3581 | ፨,o
3582 | ፩,b
3583 | ፪,ec
3584 | ፫,r
3585 | ፬,o
3586 | ፭,c
3587 | ፮,ib
3588 | ፯,iz
3589 | ፰,T
3590 | ፱,UA
3591 | ፲,i
3592 | ፳,n
3593 | ፴,nuv
3594 | ፵,w
3595 | ፶,u
3596 | ፷,i
3597 | ፸,C
3598 | ፹,n
3599 | ፺,B
3600 | ፻,p
3601 | ፼,p
3602 | ᎀ,o
3603 | ᎁ,q
3604 | ᎂ,qb
3605 | ᎃ,qo
3606 | ᎄ,n
3607 | ᎅ,n
3608 | ᎆ,nb
3609 | ᎇ,n
3610 | ᎈ,bo
3611 | ᎉ,cbH
3612 | ᎊ,bo
3613 | ᎋ,G
3614 | ᎌ,TF
3615 | ᎍ,TF
3616 | ᎎ,Tb
3617 | ᎏ,ToF
3618 | ᎑,u
3619 | ᎒,il
3620 | ᎓,i
3621 | ᎔,n
3622 | ᎕,j
3623 | ᎖,j
3624 | ᎘,u
3625 | ᎙,I


--------------------------------------------------------------------------------
/src/safe.txt:
--------------------------------------------------------------------------------
  1 | hi
  2 | hi all
  3 | hi everyone
  4 | hi there
  5 | yo
  6 | hello
  7 | hello there
  8 | hello everyone
  9 | hola
 10 | hola amigos
 11 | sup
 12 | whats up
 13 | hey
 14 | omw
 15 | on my way
 16 | whats happening
 17 | welcome
 18 | youre welcome
 19 | wdym
 20 | greetings
 21 | ok bye
 22 | lol bye
 23 | bye
 24 | bye all
 25 | goodbye
 26 | have fun
 27 | cya
 28 | cya later
 29 | i gtg
 30 | gtg
 31 | gotta go
 32 | back
 33 | im back
 34 | im leaving
 35 | im going
 36 | im not leaving
 37 | im not going
 38 | thanks
 39 | thank you
 40 | thx
 41 | ty
 42 | np
 43 | no problem
 44 | no kill
 45 | so
 46 | sorry
 47 | sry
 48 | i forgot
 49 | oh
 50 | ok
 51 | ok lol
 52 | okay
 53 | i see
 54 | i see you
 55 | how did this happen
 56 | look
 57 | they are coming
 58 | up ahead
 59 | run away
 60 | so long
 61 | peace
 62 | oof
 63 | its been so long
 64 | yawn
 65 | wut
 66 | what
 67 | how
 68 | why
 69 | when
 70 | where
 71 | who
 72 | huh
 73 | uh
 74 | sure
 75 | aye
 76 | yes
 77 | yes please
 78 | yes pls
 79 | yup
 80 | yum
 81 | yum yum
 82 | yeah
 83 | cool
 84 | good one
 85 | good game
 86 | yay
 87 | excellent
 88 | nice
 89 | nice one
 90 | nice work
 91 | noice
 92 | noice one
 93 | noice work
 94 | good job
 95 | good work
 96 | good try
 97 | nevermind
 98 | nvm
 99 | welp
100 | whelp
101 | no
102 | never
103 | nop
104 | nope
105 | nah
106 | cant
107 | i can
108 | i cant
109 | attack
110 | base
111 | come
112 | come here
113 | follow me
114 | join
115 | join me
116 | join my team
117 | use team chat
118 | team chat
119 | switch to team chat
120 | go to team chat
121 | join my fleet
122 | join my clan
123 | stop
124 | truce
125 | traitor
126 | cease fire
127 | dont move
128 | donate
129 | dont die
130 | good
131 | bad
132 | guys
133 | sus
134 | kinda sus
135 | thats sus
136 | nice try
137 | true
138 | false
139 | gtg
140 | facts
141 | brb
142 | bro
143 | omg
144 | afk
145 | gg
146 | please
147 | pls
148 | plz
149 | lol
150 | lmao
151 | hehe
152 | haha
153 | hahaha
154 | hablo espanol
155 | ha
156 | very funny
157 | xd
158 | #:)
159 | #:|
160 | #:(
161 | #;)
162 | #:P
163 | #:D
164 | dont kill me
165 | run
166 | wait
167 | come here
168 | protect me
169 | ally
170 | we ally
171 | oh sorry
172 | go
173 | i know
174 | ik
175 | up
176 | down
177 | right
178 | left
179 | north
180 | south
181 | east
182 | west
183 | northeast
184 | southeast
185 | southwest
186 | northwest
187 | zero
188 | one
189 | two
190 | three
191 | four
192 | five
193 | six
194 | seven
195 | eight
196 | nine
197 | ten
198 | help
199 | i need help
200 | help me
201 | try me
202 | stop hiding
203 | 1v1
204 | 1v1 me
205 | coming
206 | im coming
207 | almost there
208 | out of ammo
209 | im dead
210 | i died
211 | someone
212 | grr
213 | grrr
214 | hiss
215 | my bad
216 | noo
217 | nooo
218 | oops
219 | ree
220 | reee
221 | aww
222 | awww
223 | awwww
224 | hmm
225 | hmmm
226 | ah
227 | ahh
228 | ahhh
229 | bruh
230 | dude
231 | wow
232 | woah
233 | amazing
234 | brilliant
235 | f
236 | oh no
237 | xp
238 | need xp
239 | need exp
240 | give xp
241 | give exp
242 | are you ok
243 | are you good
244 | how r u
245 | how r u doing
246 | how are you
247 | how are you doing
248 | im ok
249 | im fine
250 | im good
251 | it ok
252 | its ok
253 | all good
254 | incoming
255 | enemy coming
256 | enemy incoming
257 | watch out
258 | be careful
259 | im muted
260 | im muted for now
261 | muted
262 | the thing muted me
263 | im restricted
264 | ive been restricted
265 | restricted
266 | got muted
267 | i am muted
268 | i got muted
269 | i was muted
270 | i cant chat
271 | im not a bot
272 | im in restricted chat mode
273 | my chat is restricted
274 | cant talk
275 | cannot talk
276 | lets go


--------------------------------------------------------------------------------
/src/test_broken.txt:
--------------------------------------------------------------------------------
 1 | # Broken "negative" tests
 2 | plss don't hurt me
 3 | 
 4 | # Broken "positive" tests
 5 | theRapist
 6 | 
 7 | # replacement addition
 8 | ебаная
 9 | ебаные
10 | 
11 | # pending replacement weight change
12 | Cучка
13 | блядь
14 | дерьмо
15 | говно
16 | пизда
17 | мудак
18 | ублюдок
19 | сука
20 | педик
21 | пидор
22 | тварь
23 | дурак
24 | идиот
25 | член
26 | жопа
27 | урод
28 | мразь
29 | ебаный
30 | ебать
31 | шлюха
32 | шалава
33 | давалка
34 | дрочить
35 | хуйло
36 | хуево
37 | хуета
38 | иди на хуй
39 | ебало
40 | козел
41 | упырь
42 | суки
43 | пидоры


--------------------------------------------------------------------------------
/src/test_negative.txt:
--------------------------------------------------------------------------------
  1 | hi
  2 | hello
  3 | hello my name is Bob.
  4 | is extreme
  5 | assassin
  6 | glass
  7 | push it
  8 | carcass
  9 | but treason
 10 | of rigs
 11 | close range
 12 | im in1584e 1362n
 13 | join me the i can???
 14 | since the new terrain seed
 15 | oh ye
 16 | hi can I join UsNavy?
 17 | my favorite number is 12122121221555512515
 18 | ho ho ho, Merry Christmas
 19 | im gay
 20 | i'm gay
 21 | *hello*
 22 | *testing intensifies*
 23 | come ASAP
 24 | RSVP today
 25 | ASAP
 26 | MVP
 27 | LOL
 28 | only one WORD is uppercase
 29 | TWO WORDS are uppercase for emphasis
 30 | hellp me over here
 31 | dig a hole here
 32 | since 1
 33 | 99
 34 | 9999
 35 | 900
 36 | 90000
 37 | tit for tat
 38 | tit 4 tat
 39 | plss don't hurt me
 40 | 2 secs
 41 | :C
 42 | side going
 43 | side.going
 44 | side...going
 45 | opposite side...going at 10kn
 46 | side,going
 47 | side;going
 48 | DOnt die
 49 | poof you
 50 | poof. you
 51 | they die under the rig
 52 | I could say I miss you but it’s not the truth
 53 | But this time only the hurt inside is what is real
 54 | I'm holding on to your love but they won't stop 'til it's dead
 55 | yourself but that doesn't change
 56 | my last name is Shomo
 57 | the homosphere
 58 | I'm afking
 59 | to helicopter
 60 | to helios
 61 | to heli
 62 | too heli
 63 | too helicopter
 64 | ain't it
 65 | honkey-tonk
 66 | I get lost
 67 | poof  you
 68 | poof (You
 69 | really didn't (It turns out)
 70 | I don't know how to drop it on this page. Maybe you could do that?81.58.1
 71 | Still under arrest? Is she still under arrest? .67.8.102
 72 | brain cells
 73 | braincells
 74 | Fuchs' Dystrophy
 75 | facial
 76 | via ssh
 77 | put a box here
 78 | put 4 of them
 79 | via ssh
 80 | kill ike
 81 | even bigger
 82 | bigger ship
 83 | bigger sub
 84 | even bigger
 85 | i'm from virginia
 86 | kshatriya (plural kshatriyas)
 87 | thoes people
 88 | the suicide squad
 89 | execute a suicide burn
 90 | i afked
 91 | Hi Tirpitz
 92 | zubr east!
 93 | it has hole
 94 | ok then, talk
 95 | push   it
 96 | See International Phonetic Alphabet chart for English dialects.
 97 | blow hard
 98 | Pre-Chours
 99 | Nigthmares
100 | Quo fugiam ab eorum spiritibus
101 | refugee
102 | sperm whale
103 | therapist
104 | rapping
105 | who return
106 | who retire
107 | who retract
108 | who revise
109 | who return
110 | who reproduce
111 | who report
112 | who reply
113 | who replicate
114 | who rent
115 | who remain
116 | who remember
117 | who regenerate
118 | who refuse
119 | who reflect
120 | who reduce
121 | who reason
122 | who read
123 | who reap
124 | who reassemble
125 | who record
126 | twinkie
127 | dont eat me
128 | don't eat me
129 | couldn't eat me
130 | won't eat me
131 | test test test
132 | test test test test
133 | to hello
134 | i'll send you to hello
135 | I could say I miss you but it’s not the truth
136 | same as 5 apples
137 | 4 secs so
138 | Porsche 911
139 | 'Cause there's a distance now
140 | (It's alright, it's alright, it's alright to start from the bottom)
141 | And I'll do my duty
142 | my mind's expired
143 | I wish I take it back
144 | could see me now
145 | hurt, it's gonna
146 | to last? It fell
147 | don't get it, it's my
148 | xD i do
149 | VVrmk 1v1 me ;D
150 | i left it
151 | @slamco omw ty
152 | hi umbra
153 | Speed*
154 | g hey
155 | 


--------------------------------------------------------------------------------
/src/test_safe.txt:
--------------------------------------------------------------------------------
 1 | Hello
 2 | hi there!
 3 | i'm fine
 4 | LOL
 5 | what?
 6 | my bad
 7 | good game
 8 | no kill
 9 | peace
10 | pls
11 | guys
12 | noice
13 | I am muted
14 | hey
15 | donate
16 | coming
17 | omw
18 | on my way
19 | I need help
20 | incoming
21 | south
22 | north
23 | east
24 | west


--------------------------------------------------------------------------------
/src/trace.rs:
--------------------------------------------------------------------------------
 1 | use rustrict::{Censor, Type};
 2 | use std::env::args;
 3 | 
 4 | pub fn main() {
 5 |     let input = args().skip(1).collect::<Vec<_>>().join(" ");
 6 | 
 7 |     trace(&input, false);
 8 |     //trace(&input, true);
 9 | 
10 |     use finl_unicode::categories::CharacterCategories;
11 |     use unicode_normalization::UnicodeNormalization;
12 |     println!(
13 |         "Without diacritics: {}",
14 |         input
15 |             .nfd()
16 |             .filter(|c| !c.is_mark_nonspacing())
17 |             .nfc()
18 |             .collect::<String>()
19 |     );
20 | }
21 | 
22 | pub fn trace(s: &str, ignore_fp: bool) {
23 |     let mut censor = Censor::from_str(s);
24 |     censor.with_ignore_false_positives(ignore_fp);
25 |     censor.with_censor_threshold(Type::ANY);
26 |     let (censored, analysis) = censor.censor_and_analyze();
27 |     println!(
28 |         "ignore_fp={}, \"{}\" -> \"{}\" ({:?} with {} matches and {} matching characters)",
29 |         ignore_fp,
30 |         s,
31 |         censored,
32 |         analysis,
33 |         censor.total_matches(),
34 |         censor.total_match_characters(),
35 |     );
36 | }
37 | 


--------------------------------------------------------------------------------
/src/trie.rs:
--------------------------------------------------------------------------------
  1 | use crate::feature_cell::FeatureCell;
  2 | use crate::Map;
  3 | use crate::Type;
  4 | use lazy_static::lazy_static;
  5 | use std::ops::Deref;
  6 | 
  7 | lazy_static! {
  8 |     pub(crate) static ref TRIE: FeatureCell<Trie> = FeatureCell::new(
  9 |         include_str!("profanity.csv")
 10 |             .lines()
 11 |             .skip(1)
 12 |             .map(|line| {
 13 |                 let mut split = line.split(',');
 14 |                 (
 15 |                     split.next().unwrap(),
 16 |                     Type::from_weights(
 17 |                         &[0; Type::WEIGHT_COUNT]
 18 |                             .map(|_| split.next().expect(line).parse().unwrap()),
 19 |                     ),
 20 |                 )
 21 |             })
 22 |             .chain(
 23 |                 include_str!("safe.txt")
 24 |                     .lines()
 25 |                     .filter(|line| !line.is_empty() && !line.starts_with('#'))
 26 |                     .map(|line| { (line, Type::SAFE) })
 27 |             )
 28 |             .chain(
 29 |                 include_str!("false_positives.txt")
 30 |                     .lines()
 31 |                     .filter(|line| !line.is_empty())
 32 |                     .map(|line| { (line, Type::NONE) })
 33 |             )
 34 |             .collect()
 35 |     );
 36 | }
 37 | 
 38 | /// Efficiently stores profanity, false positives, and safe words.
 39 | #[derive(Clone, Debug)]
 40 | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 41 | pub struct Trie {
 42 |     pub(crate) root: Node,
 43 | }
 44 | 
 45 | impl Default for Trie {
 46 |     fn default() -> Self {
 47 |         TRIE.deref().deref().clone()
 48 |     }
 49 | }
 50 | 
 51 | #[derive(Clone, Debug)]
 52 | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 53 | pub(crate) struct Node {
 54 |     pub children: Map<char, Node>,
 55 |     pub word: bool,
 56 |     /// word contains space.
 57 |     pub contains_space: bool,
 58 |     pub typ: Type,
 59 |     pub depth: u8,
 60 |     /// Character from parent to self.
 61 |     pub last: Option<char>,
 62 |     #[cfg(feature = "trace")]
 63 |     pub trace: String,
 64 | }
 65 | 
 66 | impl Trie {
 67 |     /// Empty.
 68 |     pub fn new() -> Self {
 69 |         Self {
 70 |             root: Node {
 71 |                 children: Map::default(),
 72 |                 word: false,
 73 |                 contains_space: false,
 74 |                 typ: Type::NONE,
 75 |                 depth: 0,
 76 |                 last: None,
 77 |                 #[cfg(feature = "trace")]
 78 |                 trace: String::new(),
 79 |             },
 80 |         }
 81 |     }
 82 | 
 83 |     /// Allows direct mutable access to the global default trie of words.
 84 |     ///
 85 |     /// Prefer the safe API `Censor::with_trie`.
 86 |     ///
 87 |     /// # Safety
 88 |     ///
 89 |     /// You must manually avoid concurrent access/censoring.
 90 |     #[cfg(feature = "customize")]
 91 |     #[cfg_attr(doc, doc(cfg(feature = "customize")))]
 92 |     pub unsafe fn customize_default() -> &'static mut Self {
 93 |         TRIE.get_mut()
 94 |     }
 95 | 
 96 |     /// Adds a word, with the given type. The type can be `Type::SAFE`, or a combination of `Type::PROFANE`,
 97 |     /// `Type::Sexual`, `Type::Offensive`, `Type::Mean`, `Type::Mild`, `Type::Moderate`, and `Type::Severe`,
 98 |     /// but NOT both (can't be safe and unsafe).
 99 |     ///
100 |     /// It is recommended to use all lower-case, which will match both cases. Upper-case characters will
101 |     /// only match upper-case.
102 |     ///
103 |     /// # Warning
104 |     ///
105 |     /// Any profanity words added this way will not support false positives. For example, if you add the word
106 |     /// "field," you can expect "cornfield" to be detected as well, unless you call `add_word("cornfield", Type::None)`.
107 |     pub fn set(&mut self, word: &str, typ: Type) {
108 |         self.add(word, typ, true);
109 |     }
110 | 
111 |     fn add(&mut self, mut word: &str, typ: Type, overwrite: bool) {
112 |         let mut current = &mut self.root;
113 |         let mut contains_space = false;
114 |         if word.starts_with(' ') {
115 |             // Chomp the first space, since what we actually want is to only match separate
116 |             // strings, not only strings that start with a space character.
117 |             contains_space = true;
118 |             word = word.trim_start_matches(' ');
119 |         }
120 |         for (i, c) in word.chars().enumerate() {
121 |             let next = current.children.entry(c);
122 |             contains_space |= c == ' ';
123 |             current = next.or_insert_with(|| Node {
124 |                 children: Map::default(),
125 |                 word: false,
126 |                 contains_space: false,
127 |                 typ: Type::NONE,
128 |                 depth: (i + 1) as u8,
129 |                 last: Some(c),
130 |                 #[cfg(feature = "trace")]
131 |                 trace: word.chars().take(i + 1).collect(),
132 |             });
133 |         }
134 |         current.word = true;
135 |         if overwrite {
136 |             current.typ = typ;
137 |             current.contains_space = contains_space;
138 |         } else {
139 |             current.typ |= typ;
140 |             current.contains_space |= contains_space;
141 |         }
142 |         debug_assert!(
143 |             !(current.typ.is(Type::ANY) && current.typ.is(Type::SAFE)),
144 |             "if word is Type::SAFE, it cannot be anything else"
145 |         );
146 |     }
147 | }
148 | 
149 | impl FromIterator<(&'static str, Type)> for Trie {
150 |     fn from_iter<T: IntoIterator<Item = (&'static str, Type)>>(iter: T) -> Self {
151 |         let mut ret = Self::new();
152 |         for (word, typ) in iter.into_iter() {
153 |             ret.add(word, typ, false);
154 |         }
155 |         ret
156 |     }
157 | }
158 | 


--------------------------------------------------------------------------------
/src/typ.rs:
--------------------------------------------------------------------------------
  1 | use bitflags::bitflags;
  2 | use std::fmt::Debug;
  3 | use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, Not};
  4 | 
  5 | bitflags! {
  6 |     #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
  7 |     struct TypeRepr: u32 {
  8 |         const PROFANE   = 0b0_000_000_000_000_000_111;
  9 |         const OFFENSIVE = 0b0_000_000_000_000_111_000;
 10 |         const SEXUAL    = 0b0_000_000_000_111_000_000;
 11 |         const MEAN      = 0b0_000_000_111_000_000_000;
 12 |         const EVASIVE   = 0b0_000_111_000_000_000_000;
 13 |         const SPAM      = 0b0_111_000_000_000_000_000;
 14 | 
 15 |         const SAFE      = 0b1_000_000_000_000_000_000;
 16 | 
 17 |         const MILD      = 0b0_001_001_001_001_001_001;
 18 |         const MODERATE  = 0b0_010_010_010_010_010_010;
 19 |         const SEVERE    = 0b0_100_100_100_100_100_100;
 20 | 
 21 |         const MILD_OR_HIGHER = Self::MILD.bits | Self::MODERATE.bits | Self::SEVERE.bits;
 22 |         const MODERATE_OR_HIGHER = Self::MODERATE.bits | Self::SEVERE.bits;
 23 |         const INAPPROPRIATE = Self::PROFANE.bits | Self::OFFENSIVE.bits | Self::SEXUAL.bits | (Self::MEAN.bits & Self::SEVERE.bits);
 24 | 
 25 |         const ANY = Self::PROFANE.bits | Self::OFFENSIVE.bits | Self::SEXUAL.bits | Self::MEAN.bits | Self::EVASIVE.bits | Self::SPAM.bits;
 26 |         const NONE = 0;
 27 |     }
 28 | }
 29 | 
 30 | /// Type is represents a type or severity of inappropriateness.
 31 | /// They can be combined with bitwise AND and OR operators, and are **not** mutually exclusive.
 32 | ///
 33 | /// For example, the following means profane or at-least moderately mean:
 34 | /// `Type::PROFANE | (Type::MEAN & Type::MODERATE_OR_HIGHER)`
 35 | #[derive(Copy, Clone, Eq, PartialEq, Hash)]
 36 | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 37 | pub struct Type(TypeRepr);
 38 | 
 39 | const SEVERE_WEIGHT: i8 = 3;
 40 | const MODERATE_WEIGHT: i8 = 2;
 41 | const MILD_WEIGHT: i8 = 1;
 42 | 
 43 | impl Type {
 44 |     /// Bad words.
 45 |     pub const PROFANE: Self = Self(TypeRepr::PROFANE);
 46 | 
 47 |     /// Offensive words.
 48 |     pub const OFFENSIVE: Self = Self(TypeRepr::OFFENSIVE);
 49 | 
 50 |     /// Sexual words.
 51 |     pub const SEXUAL: Self = Self(TypeRepr::SEXUAL);
 52 | 
 53 |     /// Mean words.
 54 |     pub const MEAN: Self = Self(TypeRepr::MEAN);
 55 | 
 56 |     /// Words intended to evade detection.
 57 |     pub const EVASIVE: Self = Self(TypeRepr::EVASIVE);
 58 | 
 59 |     /// Spam/gibberish/SHOUTING.
 60 |     pub const SPAM: Self = Self(TypeRepr::SPAM);
 61 | 
 62 |     /// One of a very small number of safe phases.
 63 |     /// Recommended to enforce this on users who repeatedly evade the filter.
 64 |     pub const SAFE: Self = Self(TypeRepr::SAFE);
 65 | 
 66 |     /// Not that bad. (low severity and/or low confidence)
 67 |     pub const MILD: Self = Self(TypeRepr::MILD);
 68 | 
 69 |     /// Bad. (moderate severity and/or moderate confidence)
 70 |     pub const MODERATE: Self = Self(TypeRepr::MODERATE);
 71 | 
 72 |     /// Cover your eyes! (high severity and/or high confidence)
 73 |     pub const SEVERE: Self = Self(TypeRepr::SEVERE);
 74 | 
 75 |     /// Any level; `Type::MILD`, `Type::MODERATE`, or `Type::SEVERE`.
 76 |     pub const MILD_OR_HIGHER: Self = Self(TypeRepr::MILD_OR_HIGHER);
 77 | 
 78 |     /// Any level in excess of `Type::MILD`.
 79 |     pub const MODERATE_OR_HIGHER: Self = Self(TypeRepr::MODERATE_OR_HIGHER);
 80 | 
 81 |     /// Inappropriate for general audiences (the default `Type`, meaning profane,
 82 |     /// offensive, sexual, or severely mean).
 83 |     pub const INAPPROPRIATE: Self = Self(TypeRepr::INAPPROPRIATE);
 84 | 
 85 |     /// Any type of detection (except SAFE). This will be expanded to cover all future types.
 86 |     pub const ANY: Self = Self(TypeRepr::ANY);
 87 | 
 88 |     /// No type of detection.
 89 |     pub const NONE: Self = Self(TypeRepr::NONE);
 90 | 
 91 |     /// Number of weights.
 92 |     pub(crate) const WEIGHT_COUNT: usize = 5;
 93 |     /// Bits per weight;
 94 |     const WEIGHT_BITS: usize = 3;
 95 | 
 96 |     /// Returns `true` if and only if self, the analysis result, meets the given threshold.
 97 |     pub fn is(self, threshold: Self) -> bool {
 98 |         self & threshold != Type::NONE
 99 |     }
100 | 
101 |     /// Logical opposite of `Self::is`.
102 |     pub fn isnt(self, threshold: Self) -> bool {
103 |         self & threshold == Type::NONE
104 |     }
105 | 
106 |     #[deprecated(note = "this is for backwards-compatibility, use Type::NONE instead")]
107 |     pub fn empty() -> Self {
108 |         Self::NONE
109 |     }
110 | 
111 |     #[deprecated(note = "this is for backwards-compatibility, compare with Type::NONE instead")]
112 |     pub fn is_empty(self) -> bool {
113 |         self.0.is_empty()
114 |     }
115 | 
116 |     #[deprecated(note = "this is for backwards-compatibility, there is no replacement")]
117 |     pub fn bits(self) -> u32 {
118 |         self.0.bits
119 |     }
120 | 
121 |     #[allow(dead_code)]
122 |     pub(crate) fn to_weights(self) -> [i8; Self::WEIGHT_COUNT] {
123 |         fn bits_to_weight(bits: u32) -> i8 {
124 |             if bits == 0 {
125 |                 0
126 |             } else if bits & 0b1 != 0 {
127 |                 MILD_WEIGHT
128 |             } else if bits & 0b10 != 0 {
129 |                 MODERATE_WEIGHT
130 |             } else {
131 |                 SEVERE_WEIGHT
132 |             }
133 |         }
134 | 
135 |         let mut i = 0;
136 |         [0; Self::WEIGHT_COUNT].map(|_| {
137 |             let ret = bits_to_weight((self.0.bits >> i) & 0b111);
138 |             i += Self::WEIGHT_BITS;
139 |             ret
140 |         })
141 |     }
142 | 
143 |     pub(crate) fn from_weights(weights: &[i8; Self::WEIGHT_COUNT]) -> Type {
144 |         let mut result = 0;
145 |         for (i, &weight) in weights.iter().enumerate() {
146 |             let severity: u32 = if weight >= SEVERE_WEIGHT {
147 |                 0b100
148 |             } else if weight == MODERATE_WEIGHT {
149 |                 0b010
150 |             } else if weight == MILD_WEIGHT {
151 |                 0b001
152 |             } else {
153 |                 0 // none
154 |             };
155 | 
156 |             result |= severity << (i * Self::WEIGHT_BITS)
157 |         }
158 |         Self(TypeRepr { bits: result })
159 |     }
160 | }
161 | 
162 | impl Default for Type {
163 |     /// Returns a reasonable default for censoring or blocking.
164 |     fn default() -> Self {
165 |         Self::INAPPROPRIATE
166 |     }
167 | }
168 | 
169 | impl BitAnd for Type {
170 |     type Output = Self;
171 | 
172 |     fn bitand(self, rhs: Self) -> Self::Output {
173 |         // Delegate to bitflags
174 |         Self(self.0.bitand(rhs.0))
175 |     }
176 | }
177 | 
178 | impl BitOr for Type {
179 |     type Output = Self;
180 | 
181 |     fn bitor(self, rhs: Self) -> Self::Output {
182 |         // Delegate to bitflags
183 |         Self(self.0.bitor(rhs.0))
184 |     }
185 | }
186 | 
187 | impl BitAndAssign for Type {
188 |     fn bitand_assign(&mut self, rhs: Self) {
189 |         // Delegate to bitflags
190 |         self.0.bitand_assign(rhs.0)
191 |     }
192 | }
193 | 
194 | impl BitOrAssign for Type {
195 |     fn bitor_assign(&mut self, rhs: Self) {
196 |         // Delegate to bitflags
197 |         self.0.bitor_assign(rhs.0)
198 |     }
199 | }
200 | 
201 | impl Not for Type {
202 |     type Output = Self;
203 | 
204 |     fn not(self) -> Self::Output {
205 |         Self(self.0.not())
206 |     }
207 | }
208 | 
209 | // Note: Can't impl directly on TypeRepr due to https://github.com/bitflags/bitflags/issues/218
210 | impl Debug for Type {
211 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
212 |         fn description(bits: u32) -> &'static str {
213 |             if bits & 0b100 != 0 {
214 |                 "severely"
215 |             } else if bits & 0b010 != 0 {
216 |                 "moderately"
217 |             } else if bits & 0b001 != 0 {
218 |                 "mildly"
219 |             } else {
220 |                 "not"
221 |             }
222 |         }
223 |         let mut count = 0;
224 |         if *self & Self::PROFANE != Self::NONE {
225 |             if count > 0 {
226 |                 write!(f, ", ")?;
227 |             }
228 |             write!(
229 |                 f,
230 |                 "{} profane",
231 |                 description((*self & Self::PROFANE).0.bits())
232 |             )?;
233 |             count += 1;
234 |         }
235 |         if *self & Self::OFFENSIVE != Self::NONE {
236 |             if count > 0 {
237 |                 write!(f, ", ")?;
238 |             }
239 |             write!(
240 |                 f,
241 |                 "{} offensive",
242 |                 description((*self & Self::OFFENSIVE).0.bits() >> 3)
243 |             )?;
244 |             count += 1;
245 |         }
246 |         if *self & Self::SEXUAL != Self::NONE {
247 |             if count > 0 {
248 |                 write!(f, ", ")?;
249 |             }
250 |             write!(
251 |                 f,
252 |                 "{} sexual",
253 |                 description((*self & Self::SEXUAL).0.bits() >> 6)
254 |             )?;
255 |             count += 1;
256 |         }
257 |         if *self & Self::MEAN != Self::NONE {
258 |             if count > 0 {
259 |                 write!(f, ", ")?;
260 |             }
261 |             write!(
262 |                 f,
263 |                 "{} mean",
264 |                 description((*self & Self::MEAN).0.bits() >> 9)
265 |             )?;
266 |             count += 1;
267 |         }
268 |         if *self & Self::EVASIVE != Self::NONE {
269 |             if count > 0 {
270 |                 write!(f, ", ")?;
271 |             }
272 |             write!(
273 |                 f,
274 |                 "{} evasive",
275 |                 description((*self & Self::EVASIVE).0.bits() >> 12)
276 |             )?;
277 |             count += 1;
278 |         }
279 |         if *self & Self::SPAM != Self::NONE {
280 |             if count > 0 {
281 |                 write!(f, ", ")?;
282 |             }
283 |             write!(
284 |                 f,
285 |                 "{} spam",
286 |                 description((*self & Self::SPAM).0.bits() >> 15)
287 |             )?;
288 |             count += 1;
289 |         }
290 |         if *self & Self::SAFE != Self::NONE {
291 |             if count > 0 {
292 |                 write!(f, ", ")?;
293 |             }
294 |             write!(f, "safe")
295 |         } else if count == 0 {
296 |             write!(f, "no detections")
297 |         } else {
298 |             write!(f, "")
299 |         }
300 |     }
301 | }
302 | 


--------------------------------------------------------------------------------
/src/unicode_fonts.txt:
--------------------------------------------------------------------------------
 1 | ⓐⓑⓒⓓⓔⓕⓖⓗⓘⓙⓚⓛⓜⓝⓞⓟⓠⓡⓢⓣⓤⓥⓦⓧⓨⓩ
 2 | ⒶⒷⒸⒹⒺⒻⒼⒽⒾⒿⓀⓁⓂⓃⓄⓅⓆⓇⓈⓉⓊⓋⓌⓍⓎⓏ
 3 | 🅐🅑🅒🅓🅔🅕🅖🅗🅘🅙🅚🅛🅜🅝🅞🅟🅠🅡🅢🅣🅤🅥🅦🅧🅨🅩
 4 | ａｂｃｄｅｆｇｈｉｊｋｌｍｎｏｐｑｒｓｔｕｖｗｘｙｚ
 5 | ＡＢＣＤＥＦＧＨＩＪＫＬＭＮＯＰＱＲＳＴＵＶＷＸＹＺ
 6 | 𝐚𝐛𝐜𝐝𝐞𝐟𝐠𝐡𝐢𝐣𝐤𝐥𝐦𝐧𝐨𝐩𝐪𝐫𝐬𝐭𝐮𝐯𝐰𝐱𝐲𝐳
 7 | 𝐀𝐁𝐂𝐃𝐄𝐅𝐆𝐇𝐈𝐉𝐊𝐋𝐌𝐍𝐎𝐏𝐐𝐑𝐒𝐓𝐔𝐕𝐖𝐗𝐘𝐙
 8 | 𝖆𝖇𝖈𝖉𝖊𝖋𝖌𝖍𝖎𝖏𝖐𝖑𝖒𝖓𝖔𝖕𝖖𝖗𝖘𝖙𝖚𝖛𝖜𝖝𝖞𝖟
 9 | 𝕬𝕭𝕮𝕯𝕰𝕱𝕲𝕳𝕴𝕵𝕶𝕷𝕸𝕹𝕺𝕻𝕼𝕽𝕾𝕿𝖀𝖁𝖂𝖃𝖄𝖅
10 | 𝒂𝒃𝒄𝒅𝒆𝒇𝒈𝒉𝒊𝒋𝒌𝒍𝒎𝒏𝒐𝒑𝒒𝒓𝒔𝒕𝒖𝒗𝒘𝒙𝒚𝒛
11 | 𝑨𝑩𝑪𝑫𝑬𝑭𝑮𝑯𝑰𝑱𝑲𝑳𝑴𝑵𝑶𝑷𝑸𝑹𝑺𝑻𝑼𝑽𝑾𝑿𝒀𝒁
12 | 𝓪𝓫𝓬𝓭𝓮𝓯𝓰𝓱𝓲𝓳𝓴𝓵𝓶𝓷𝓸𝓹𝓺𝓻𝓼𝓽𝓾𝓿𝔀𝔁𝔂𝔃
13 | 𝓐𝓑𝓒𝓓𝓔𝓕𝓖𝓗𝓘𝓙𝓚𝓛𝓜𝓝𝓞𝓟𝓠𝓡𝓢𝓣𝓤𝓥𝓦𝓧𝓨𝓩
14 | 𝕒𝕓𝕔𝕕𝕖𝕗𝕘𝕙𝕚𝕛𝕜𝕝𝕞𝕟𝕠𝕡𝕢𝕣𝕤𝕥𝕦𝕧𝕨𝕩𝕪𝕫
15 | 𝔸𝔹ℂ𝔻𝔼𝔽𝔾ℍ𝕀𝕁𝕂𝕃𝕄ℕ𝕆ℙℚℝ𝕊𝕋𝕌𝕍𝕎𝕏𝕐ℤ
16 | 𝚊𝚋𝚌𝚍𝚎𝚏𝚐𝚑𝚒𝚓𝚔𝚕𝚖𝚗𝚘𝚙𝚚𝚛𝚜𝚝𝚞𝚟𝚠𝚡𝚢𝚣
17 | 𝙰𝙱𝙲𝙳𝙴𝙵𝙶𝙷𝙸𝙹𝙺𝙻𝙼𝙽𝙾𝙿𝚀𝚁𝚂𝚃𝚄𝚅𝚆𝚇𝚈𝚉
18 | 𝖺𝖻𝖼𝖽𝖾𝖿𝗀𝗁𝗂𝗃𝗄𝗅𝗆𝗇𝗈𝗉𝗊𝗋𝗌𝗍𝗎𝗏𝗐𝗑𝗒𝗓
19 | 𝖠𝖡𝖢𝖣𝖤𝖥𝖦𝖧𝖨𝖩𝖪𝖫𝖬𝖭𝖮𝖯𝖰𝖱𝖲𝖳𝖴𝖵𝖶𝖷𝖸𝖹
20 | 𝗮𝗯𝗰𝗱𝗲𝗳𝗴𝗵𝗶𝗷𝗸𝗹𝗺𝗻𝗼𝗽𝗾𝗿𝘀𝘁𝘂𝘃𝘄𝘅𝘆𝘇
21 | 𝗔𝗕𝗖𝗗𝗘𝗙𝗚𝗛𝗜𝗝𝗞𝗟𝗠𝗡𝗢𝗣𝗤𝗥𝗦𝗧𝗨𝗩𝗪𝗫𝗬𝗭
22 | 𝙖𝙗𝙘𝙙𝙚𝙛𝙜𝙝𝙞𝙟𝙠𝙡𝙢𝙣𝙤𝙥𝙦𝙧𝙨𝙩𝙪𝙫𝙬𝙭𝙮𝙯
23 | 𝘼𝘽𝘾𝘿𝙀𝙁𝙂𝙃𝙄𝙅𝙆𝙇𝙈𝙉𝙊𝙋𝙌𝙍𝙎𝙏𝙐𝙑𝙒𝙓𝙔𝙕
24 | 𝘢𝘣𝘤𝘥𝘦𝘧𝘨𝘩𝘪𝘫𝘬𝘭𝘮𝘯𝘰𝘱𝘲𝘳𝘴𝘵𝘶𝘷𝘸𝘹𝘺𝘻
25 | 𝘈𝘉𝘊𝘋𝘌𝘍𝘎𝘏𝘐𝘑𝘒𝘓𝘔𝘕𝘖𝘗𝘘𝘙𝘚𝘛𝘜𝘝𝘞𝘟𝘠𝘡
26 | 🄰🄱🄲🄳🄴🄵🄶🄷🄸🄹🄺🄻🄼🄽🄾🄿🅀🅁🅂🅃🅄🅅🅆🅇🅈🅉
27 | 🅰🅱🅲🅳🅴🅵🅶🅷🅸🅹🅺🅻🅼🅽🅾🅿🆀🆁🆂🆃🆄🆅🆆🆇🆈🆉
28 | 𝔞𝔟𝔠𝔡𝔢𝔣𝔤𝔥𝔦𝔧𝔨𝔩𝔪𝔫𝔬𝔭𝔮𝔯𝔰𝔱𝔲𝔳𝔴𝔵𝔶𝔷
29 | 𝔄𝔅ℭ𝔇𝔈𝔉𝔊ℌℑ𝔍𝔎𝔏𝔐𝔑𝔒𝔓𝔔ℜ𝔖𝔗𝔘𝔙𝔚𝔛𝔜ℨ
30 | ₐᵦ꜀dₑf₉ₕᵢⱼₖₗₘₙₒₚqᵣₛₜᵤᵥwₓᵧ₂
31 | ᵃᵇᶜᵈᵉᶠᵍʰⁱʲᵏˡᵐⁿᵒᵖ۹ʳˢᵗᵘᵛʷˣʸᶻ
32 | ᴬᴮᑦᴰᴱ⸁ᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾ۹ᴿᔆᵀᵁⱽᵂᕽʸᙆ
33 | ⒜⒝⒞⒟⒠⒡⒢⒣⒤⒥⒦⒧⒨⒩⒪⒫⒬⒭⒮⒯⒰⒱⒲⒳⒴⒵
34 | ɐqɔpǝɟƃɥᴉɾʞlɯuodbɹsʇnʌʍxʎz
35 | ∀𐐒ƆᗡƎℲ⅁HIſﻼ⅂WNOԀꝹᖈS⊥ՈΛMX⅄Z
36 | ɒdɔbɘʇϱʜiįʞlmnoqpɿƨɈυvwxγz
37 | A𐐒ƆႧƎꟻӘHIႱﻼ⅃MИOꟼϘЯƧTUVWXYZ
38 | ልጌርዕቿቻኗዘጎጋጕረጠክዐየዒዪነፕሁሀሠሸሃጊ
39 | ﾑ乃cd乇ｷgんﾉﾌズﾚﾶ刀oｱq尺丂ｲu√wﾒﾘ乙
40 | ᴀᴃᴄᴅᴇfghᴉᴊᴋᴌᴍᴎᴏᴘqᴙsᴛᴜᴠᴡxyᴢ
41 | 🄐🄑🄒🄓🄔🄕🄖🄗🄘🄙🄚🄛🄜🄝🄞🄟🄠🄡🄢🄣🄤🄥🄦🄧🄨🄩


--------------------------------------------------------------------------------
/src/width.rs:
--------------------------------------------------------------------------------
  1 | use crate::is_whitespace;
  2 | use std::str::from_utf8;
  3 | 
  4 | const MODE_WIDTH: u8 = 10;
  5 | 
  6 | lazy_static::lazy_static! {
  7 |     static ref WIDTHS: Vec<(char, u8)> = {
  8 |         use std::io::Read;
  9 |         // Format of this file is documented in character_analyzer.rs
 10 |         let mut raw = include_bytes!("character_widths.bin").as_slice();
 11 | 
 12 |         // First byte is mode length.
 13 |         let mut mode = [0u8];
 14 |         raw.read(&mut mode).unwrap();
 15 |         let mode = mode[0];
 16 | 
 17 |         assert_eq!(mode, MODE_WIDTH);
 18 | 
 19 |         let mut widths = Vec::new();
 20 | 
 21 |         while !raw.is_empty() {
 22 |             // Read one UTF-8 character.
 23 |             // TODO: Once stable, use: utf8_char_width(raw[0])
 24 |             let s = from_utf8(&raw[..1])
 25 |                 .or_else(|_| from_utf8(&raw[..2]))
 26 |                 .or_else(|_| from_utf8(&raw[..3]))
 27 |                 .or_else(|_| from_utf8(&raw[..4]))
 28 |                 .unwrap();
 29 |             let c = s.chars().next().unwrap();
 30 |             raw = &raw[c.len_utf8()..];
 31 | 
 32 |             // After character comes a byte of length.
 33 |             let mut len = [0u8];
 34 |             raw.read(&mut len).unwrap();
 35 |             let len = len[0];
 36 | 
 37 |             widths.push((c, len));
 38 |         }
 39 | 
 40 |         widths
 41 |     };
 42 | }
 43 | 
 44 | /// Returns an estimate of the worst-case display width in milli-`m`'s (thousandths of the
 45 | /// the width of an `m` character).
 46 | ///
 47 | /// For example, `width('m')` returns 1000 and `width('\u{FDFD}')` returns 10300 (wouldn't you like
 48 | /// to know if your user's text is 10.3X longer per character than you might have expected?).
 49 | ///
 50 | /// Precision is not necessarily 1 milli-`m` (currently, it is 100 milli-`m`'s).
 51 | #[cfg_attr(doc, doc(cfg(feature = "width")))]
 52 | pub fn width(c: char) -> usize {
 53 |     let width = match WIDTHS.binary_search_by_key(&c, |&(c, _)| c) {
 54 |         Ok(idx) => WIDTHS[idx].1,
 55 |         Err(_) => MODE_WIDTH,
 56 |     } as usize;
 57 | 
 58 |     width * 100
 59 | }
 60 | 
 61 | /// Convenience method for getting the width, in `m`'s, of an entire string.
 62 | ///
 63 | /// Warning: If the width overflows, the result is undefined (e.g. panic or overflow).
 64 | #[cfg_attr(doc, doc(cfg(feature = "width")))]
 65 | pub fn width_str(s: &str) -> usize {
 66 |     s.chars().map(|c| width(c) / 100).sum::<usize>() / 10
 67 | }
 68 | 
 69 | /// How text is expected to be displayed.
 70 | ///
 71 | /// Eventually, `BreakWord` will be supported.
 72 | #[derive(Copy, Clone, Debug)]
 73 | #[non_exhaustive]
 74 | pub enum WordBreak {
 75 |     // TODO: BreakWord
 76 |     /// Same as CSS's `word-break: break-all;`.
 77 |     BreakAll,
 78 | }
 79 | 
 80 | /// Like `width_str` but computes the width of the max unbroken (no line break) part of the string.
 81 | ///
 82 | /// In certain cases, not even CSS's `word-break: break-all;` (or equivalents) will be able to
 83 | /// break a string, so it's good to know how long the lines might get.
 84 | ///
 85 | /// For example, try selecting the following unbroken part: ௌௌௌௌ
 86 | pub fn width_str_max_unbroken(s: &str, _word_break: WordBreak) -> usize {
 87 |     let mut start = 0;
 88 |     break_all_linebreaks(&s)
 89 |         .map(|p| {
 90 |             let unbroken = &s[start..p];
 91 |             start = p;
 92 |             width_str(unbroken.trim_end_matches(is_whitespace))
 93 |         })
 94 |         .max()
 95 |         .unwrap_or(0)
 96 | }
 97 | 
 98 | // TODO unicode-linebreak = { version = "0.1.5", optional = true }
 99 | 
100 | fn break_all_linebreaks(s: &str) -> impl Iterator<Item = usize> + '_ {
101 |     use finl_unicode::categories::{CharacterCategories, MinorCategory};
102 | 
103 |     use itertools::Itertools;
104 |     s.char_indices()
105 |         .tuple_windows()
106 |         .filter_map(|((_, c1), (p, c2))| {
107 |             let c1 = c1.get_minor_category();
108 |             let c2 = c2.get_minor_category();
109 |             let break_all = !matches!(c1, MinorCategory::Mn | MinorCategory::Mc)
110 |                 && !matches!(c2, MinorCategory::Mn | MinorCategory::Mc);
111 |             if break_all
112 |                 || [c1, c2]
113 |                     .into_iter()
114 |                     .any(|c| matches!(c, MinorCategory::Zs | MinorCategory::Zl))
115 |             {
116 |                 Some(p)
117 |             } else {
118 |                 None
119 |             }
120 |         })
121 |         .chain(std::iter::once(s.len()))
122 | }
123 | 
124 | /// Trims a string to a maximum number of `m`'s. A budget of 5 would allow five m, or more narrower
125 | /// characters, or fewer wider characters.
126 | pub fn trim_to_width(s: &str, mut budget: usize) -> &str {
127 |     // Convert to milli-`m`'s.
128 |     budget *= 10;
129 |     for (idx, c) in s.char_indices() {
130 |         match budget.checked_sub(width(c) / 100) {
131 |             Some(new_budget) => budget = new_budget,
132 |             None => return &s[..idx],
133 |         }
134 |     }
135 |     return s;
136 | }
137 | 
138 | #[cfg(test)]
139 | mod test {
140 |     use crate::width::{trim_to_width, width_str, WordBreak};
141 |     use crate::{width, width_str_max_unbroken, CensorStr};
142 |     use serial_test::serial;
143 | 
144 |     /*
145 |     #[test]
146 |     pub fn i() {
147 |         assert_eq!(width('i'), 600);
148 |     }
149 |      */
150 | 
151 |     #[test]
152 |     pub fn unbroken() {
153 |         let tests = [
154 |             ("", 0),
155 |             ("m", 1),
156 |             ("mm", 1),
157 |             ("m m", 1),
158 |             ("m     m", 1),
159 |             ("mm m", 1),
160 |             ("m mm", 1),
161 |             ("m;m", 1),
162 |         ];
163 |         for (s, w) in tests {
164 |             assert_eq!(width_str_max_unbroken(s, WordBreak::BreakAll), w, "{s} {w}");
165 |         }
166 |     }
167 | 
168 |     #[test]
169 |     pub fn m() {
170 |         assert_eq!(width('m'), 1000);
171 |     }
172 | 
173 |     #[test]
174 |     pub fn fdfd() {
175 |         // https://commons.wikimedia.org/wiki/File:Lateef_unicode_U%2BFDFD_2020-03-09_122519.png
176 |         assert_eq!(width('\u{FDFD}'), 10300)
177 |     }
178 | 
179 |     #[test]
180 |     pub fn three_em_dash() {
181 |         assert!(width('⸻') >= 2500);
182 |     }
183 | 
184 |     #[test]
185 |     pub fn lattice() {
186 |         assert!(width('𒐫') >= 3000);
187 |     }
188 | 
189 |     #[test]
190 |     pub fn cuneiform() {
191 |         assert!(width('𒈙') >= 3000);
192 |     }
193 | 
194 |     #[test]
195 |     pub fn javanese() {
196 |         assert!(width('꧅') >= 1500);
197 |     }
198 | 
199 |     #[test]
200 |     pub fn tamil() {
201 |         assert_eq!(
202 |             width_str_max_unbroken("abc ௌௌௌௌ def", WordBreak::BreakAll),
203 |             10
204 |         );
205 |         assert_eq!(width_str_max_unbroken("abc ௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌ", WordBreak::BreakAll), 345);
206 |     }
207 | 
208 |     #[test]
209 |     pub fn emoji() {
210 |         assert_eq!(width_str("😀🐿"), 4);
211 |     }
212 | 
213 |     #[test]
214 |     pub fn cjk() {
215 |         assert_eq!(width_str("大はㅂ"), 6)
216 |     }
217 | 
218 |     #[test]
219 |     pub fn string() {
220 |         //assert_eq!(width_str("abc‱Ǆဪ"), 7);
221 |         assert_eq!(width_str("abc‱Ǆဪ"), 8);
222 |     }
223 | 
224 |     #[test]
225 |     #[serial]
226 |     pub fn tall() {
227 |         assert_eq!("a꧁a".censor(), "aa");
228 |     }
229 | 
230 |     #[test]
231 |     #[serial]
232 |     pub fn trim() {
233 |         assert_eq!(trim_to_width("aa", 0), "");
234 |         assert_eq!(trim_to_width("mmm", 1), "m");
235 |         assert_eq!(trim_to_width("mmm", 2), "mm");
236 |         assert_eq!(trim_to_width("mmm", 3), "mmm");
237 |         assert_eq!(trim_to_width("mmm", 4), "mmm");
238 | 
239 |         let mut s = String::new();
240 |         for u in 0..10000 {
241 |             if let Some(c) = char::from_u32(u) {
242 |                 s.push(c);
243 |             }
244 |         }
245 |         for b in 0..1000 {
246 |             let t = trim_to_width(&s, b);
247 |             let w = width_str(t);
248 |             assert!(w <= b);
249 |             assert!(w + 15 >= b)
250 |         }
251 |     }
252 | }
253 | 


--------------------------------------------------------------------------------