├── .gitignore
├── .travis.yml
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── Gemfile
├── Gemfile.lock
├── LICENSE.txt
├── README.md
├── Rakefile
├── bin
    ├── console
    └── setup
├── lib
    ├── profanity-dictionaries
    │   ├── en.yaml
    │   ├── es.yaml
    │   ├── leet_strategy_dictionary.yaml
    │   ├── partial_match.yaml
    │   └── pt.yaml
    ├── profanity-filter.rb
    └── profanity-filter
    │   ├── engines
    │       ├── allow_duplicate_characters_strategy.rb
    │       ├── allow_symbols_in_words_strategy.rb
    │       ├── component.rb
    │       ├── composite.rb
    │       ├── exact_match_strategy.rb
    │       ├── leet_exact_match_strategy.rb
    │       ├── partial_match_strategy.rb
    │       └── regexp_strategy.rb
    │   └── version.rb
├── profanity-filter.gemspec
└── test
    ├── profanity_filter_engine
        ├── allow_duplicate_characters_strategy_test.rb
        ├── allow_symbols_in_words_strategy_test.rb
        ├── component_test.rb
        ├── composite_test.rb
        ├── exact_match_strategy_test.rb
        ├── leet_exact_match_strategy_test.rb
        ├── partial_match_strategy_test.rb
        └── regexp_strategy_test.rb
    ├── profanity_filter_test.rb
    └── test_helper.rb


/.gitignore:
--------------------------------------------------------------------------------
 1 | /.bundle/
 2 | /.yardoc
 3 | /_yardoc/
 4 | /coverage/
 5 | /doc/
 6 | /pkg/
 7 | /spec/reports/
 8 | /tmp/
 9 | /.idea/
10 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | ---
2 | sudo: false
3 | language: ruby
4 | cache: bundler
5 | rvm:
6 |   - 2.5.5
7 | before_install: gem install bundler -v 2.0.2
8 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | ## Version 1.0
 2 | 
 3 | This version is not compatible with previous versions. The following are main changes and migration guide:
 4 | 
 5 | 1. Keyword parameter `strictness` for both `profane?` and `profanity_count` is replaced by `strategies`.
 6 | 
 7 |     ```ruby
 8 |     # 'strict mode' before
 9 |     pf.profane?('text', strictness: :strict)
10 |   
11 |     # 'strict mode' now
12 |     pf.profane?('text', strategies: :all)
13 | 
14 |     # 'tolerant mode' before
15 |     pf.profane?('text', strictness: :tolerant)
16 |  
17 |     # 'tolerant mode' now
18 |     pf.profane?('text', strategies: :basic)
19 |     ``` 
20 | 2. We can compose our own strategies:
21 | 
22 |     ```ruby
23 |     # the below two are exactly the same:
24 |     pf.profane?('text', strategies: [:leet, :allow_symbol, :duplicate_characters, :partial_match])
25 |     pf.profane?('text', strategies: :all)
26 |     ```
27 | 3. Now the default mode has full support for partial match
28 | 
29 |     ```ruby
30 |     # before it passes our filter, but now it's marked as profane.
31 |     pf.profane?('youasshole')
32 |     ```
33 | 
34 | That's it. Enjoy!


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, gender identity and expression, level of experience,
 9 | nationality, personal appearance, race, religion, or sexual identity and
10 | orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at jinghua.shih@gmail.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at [http://contributor-covenant.org/version/1/4][version]
72 | 
73 | [homepage]: http://contributor-covenant.org
74 | [version]: http://contributor-covenant.org/version/1/4/
75 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source "https://rubygems.org"
2 | 
3 | gemspec
4 | 


--------------------------------------------------------------------------------
/Gemfile.lock:
--------------------------------------------------------------------------------
 1 | PATH
 2 |   remote: .
 3 |   specs:
 4 |     profanity-filter (1.0)
 5 |       webpurify
 6 | 
 7 | GEM
 8 |   remote: https://rubygems.org/
 9 |   specs:
10 |     coderay (1.1.2)
11 |     json (2.2.0)
12 |     method_source (0.9.2)
13 |     minitest (5.11.3)
14 |     pry (0.12.2)
15 |       coderay (~> 1.1.0)
16 |       method_source (~> 0.9.0)
17 |     rake (10.5.0)
18 |     rr (1.2.1)
19 |     webpurify (1.0.1)
20 |       json
21 | 
22 | PLATFORMS
23 |   ruby
24 | 
25 | DEPENDENCIES
26 |   bundler (~> 2.0)
27 |   minitest (~> 5.0)
28 |   profanity-filter!
29 |   pry (~> 0.12.2)
30 |   rake (~> 10.0)
31 |   rr
32 | 
33 | BUNDLED WITH
34 |    2.0.2
35 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2019 jennyshih
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Gem Version](https://badge.fury.io/rb/profanity-filter.svg)](https://badge.fury.io/rb/profanity-filter)
  2 | 
  3 | ## Profanity Filter
  4 | Strategies to publish offensive texts online can be roughly grouped into 5 categories:
  5 | 1. Similarities, eg. b ⇔ 6
  6 | 2. Diacritics(sound alteration), eg. u ⇔ ü, ù, ú
  7 | 3. Constructions(multi-part), eg. W ⇔ VV, V ⇔ \/
  8 | 4. Injections, eg. s-h-i-t, shhhhhhhhhhhit
  9 | 5. Unicode(same shape but different unicode), eg ⒜, ⍺, ａ, 𝐚, 𝑎, 𝒂, 𝒶, 𝓪, 𝔞, 𝕒, 𝖆, 𝖺, 𝗮, 𝘢, 𝙖
 10 | 
 11 | This profanity filter implements:
 12 | - [Full Support] diacritics, injections, unicode
 13 | - [Partial Support] similarities, constructions
 14 | 
 15 | This gem is also integrated with [WebPurify](https://www.webpurify.com). Usage example below.
 16 | 
 17 | 
 18 | ## Installation
 19 | 
 20 | Add this line to your application's Gemfile:
 21 | 
 22 | ```ruby
 23 | gem 'profanity-filter', '~> 1.0'
 24 | ```
 25 | 
 26 | And then execute:
 27 | 
 28 |     $ bundle install
 29 | 
 30 | Or install it yourself as:
 31 | 
 32 |     $ gem install profanity-filter
 33 | 
 34 | ## Versioning
 35 | Version 1.0 onward is not compatible with previous versions. See [changelog(https://github.com/cardinalblue/profanity-filter/blob/master/CHANGELOG.md)] for details.
 36 | 
 37 | ## Usage
 38 | In your Ruby code,
 39 | 
 40 | ```ruby
 41 | # basic usage
 42 | pf = ProfanityFilter.new
 43 | 
 44 | pf.profane? ('ssssshit')
 45 | # => true
 46 | 
 47 | pf.profanity_count('fjsdio fdsk fU_cK_THIS_shI_T')
 48 | # => 2
 49 | ```
 50 | 
 51 | If we want to integrate WebPurify,
 52 | 
 53 | ```ruby
 54 | # with WebPurify
 55 | pf = ProfanityFilter.new(web_purifier_api_key: [YOUR-API-KEY])
 56 | ```
 57 | 
 58 | With WebPurify enabled, texts sent to `profane?` and `profanity_count` will **first** be checked against the mechanism this gem provides, **then** against WebPurify if no positive results are returned.  
 59 | 
 60 | ## Strategies
 61 | There are four different `strategies` that we can compose to our heart's content. 
 62 | 
 63 | 1. `:partial_match`
 64 | will flag a text as profane if any substrings of it is in our dictionary.
 65 | 
 66 | 2. `:allow_symbol`
 67 | will flag a text as profane if any word in the text matches our dictionary after removing the symbols.
 68 | 
 69 | 3. `:duplicate_characters`
 70 | will flag a text as profane if any word in the text matches our dictionary after removing duplications.
 71 | 
 72 | 4. `:leet`
 73 | will flag a text as profane if any word in the text matches our dictionary after substituting similar unicode characters with their letter correspondents. 
 74 | 
 75 | ## Config
 76 | By default, the profanity filter implements `:partial_match` and `:allow_symbol` strategies. But we can specify what strategies we want:
 77 | 
 78 | ```ruby
 79 | pf = ProfanityFilter.new
 80 | 
 81 | # type :basic is the default
 82 | pf.profane?('test_string', strategies: :basic)
 83 | pf.profanity_count('test_string', strategies: :basic)
 84 | 
 85 | # type :all includes all four strategies
 86 | pf.profane?('test_string', strategies: :all)
 87 | pf.profanity_count('test_string', strategies: :all)
 88 | 
 89 | # compose our own
 90 | pf.profane?('test_string', strategies: [:partial_match, :leet])
 91 | pf.profanity_count('test_string', strategies: [:partial_match, :leet])
 92 | ```
 93 | 
 94 | Also, we also supports `whitelist`, specified at object creation.
 95 | 
 96 | ```ruby
 97 | pf_with_whitelist = ProfanityFilter.new(whitelist: ['asshole'])
 98 | pf_with_whitelist.profane?('asshole')
 99 | # => false
100 | 
101 | ```
102 | ## Development
103 | 
104 | After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
105 | 
106 | To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
107 | 
108 | ## Contributing
109 | 
110 | Bug reports and pull requests are welcome on GitHub at https://github.com/cardinalblue/profanity-filter. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
111 | 
112 | ## License
113 | 
114 | The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
115 | 
116 | ## Code of Conduct
117 | 
118 | Everyone interacting in the ProfanityFilter project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/cardinalblue/profanity-filter/blob/master/CODE_OF_CONDUCT.md).
119 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
 1 | require "bundler/gem_tasks"
 2 | require "rake/testtask"
 3 | 
 4 | Rake::TestTask.new(:test) do |t|
 5 |   t.libs << "test"
 6 |   t.libs << "lib"
 7 |   t.test_files = FileList["test/**/*_test.rb"]
 8 | end
 9 | 
10 | task :default => :test
11 | 


--------------------------------------------------------------------------------
/bin/console:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | 
3 | require "bundler/setup"
4 | require "profanity-filter"
5 | 
6 | require "pry"
7 | Pry.start
8 | 


--------------------------------------------------------------------------------
/bin/setup:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -euo pipefail
3 | IFS=$'\n\t'
4 | set -vx
5 | 
6 | bundle install
7 | 
8 | # Do any other automated setup that you need to do here
9 | 


--------------------------------------------------------------------------------
/lib/profanity-dictionaries/en.yaml:
--------------------------------------------------------------------------------
  1 | - asshole
  2 | - assholes
  3 | - adultvideo
  4 | - badmotherfucker
  5 | - bastard
  6 | - bdsm
  7 | - beastial
  8 | - beastiality
  9 | - beastility
 10 | - bestial
 11 | - bestiality
 12 | - bitch
 13 | - bitcher
 14 | - bitchers
 15 | - bitches
 16 | - bitchin
 17 | - bitching
 18 | - blowjob
 19 | - blowjobs
 20 | - clit
 21 | - cocksuck
 22 | - cocksucked
 23 | - cocksucker
 24 | - cocksucking
 25 | - cocksucks
 26 | - cummer
 27 | - cumming
 28 | - cumshot
 29 | - cunillingus
 30 | - cunnilingus
 31 | - cunt
 32 | - cuntlick
 33 | - cuntlicker
 34 | - cuntlicking
 35 | - cyberfuc
 36 | - cyberfuck
 37 | - cyberfucked
 38 | - cyberfucker
 39 | - cyberfuckers
 40 | - cyberfucking
 41 | - dildo
 42 | - dildos
 43 | - ejaculate
 44 | - ejaculated
 45 | - ejaculates
 46 | - ejaculating
 47 | - ejaculatings
 48 | - ejaculation
 49 | - fag
 50 | - fagging
 51 | - faggot
 52 | - fagots
 53 | - farted
 54 | - farting
 55 | - fartings
 56 | - felatio
 57 | - fellatio
 58 | - fingerfuck
 59 | - fingerfucked
 60 | - fingerfucker
 61 | - fingerfuckers
 62 | - fingerfucking
 63 | - fingerfucks
 64 | - fistfuck
 65 | - fistfucked
 66 | - fistfucker
 67 | - fistfuckers
 68 | - fistfucking
 69 | - fistfuckings
 70 | - fistfucks
 71 | - fixation
 72 | - fuck
 73 | - fucked
 74 | - fucker
 75 | - fuckers
 76 | - fuckin
 77 | - fucking
 78 | - fuckings
 79 | - fuckme
 80 | - fuckpolitics
 81 | - gangbang
 82 | - gangbanged
 83 | - gangbangs
 84 | - gaysex
 85 | - goddamn
 86 | - hardcoresex
 87 | - horniest
 88 | - horny
 89 | - hotsex
 90 | - kondum
 91 | - kondums
 92 | - kummer
 93 | - kumming
 94 | - kunilingus
 95 | - lusting
 96 | - mothafuck
 97 | - mothafucka
 98 | - mothafuckas
 99 | - mothafuckaz
100 | - mothafucked
101 | - mothafucker
102 | - mothafuckers
103 | - mothafuckin
104 | - mothafucking
105 | - mothafuckings
106 | - mothafucks
107 | - motherfuck
108 | - motherfucked
109 | - motherfucker
110 | - motherfuckers
111 | - motherfuckin
112 | - motherfucking
113 | - motherfuckings
114 | - motherfucks
115 | - mutilation
116 | - niger
117 | - nigger
118 | - niggers
119 | - orgasim
120 | - orgasims
121 | - orgasm
122 | - orgasms
123 | - phonesex
124 | - phuked
125 | - phuking
126 | - phukked
127 | - phukking
128 | - piss
129 | - pisser
130 | - pissed
131 | - pissers
132 | - pisses
133 | - pissin
134 | - pising
135 | - pissing
136 | - pissoff
137 | - porn
138 | - pornography
139 | - pornos
140 | - prick
141 | - pricks
142 | - pussies
143 | - pusies
144 | - pussy
145 | - pusy
146 | - pussys
147 | - shit
148 | - shited
149 | - shitfull
150 | - shiting
151 | - shitings
152 | - shitted
153 | - shitter
154 | - shitters
155 | - shitting
156 | - shittings
157 | - shitty
158 | - slut
159 | - smut
160 | - spunk
161 | - submission
162 | - twat
163 | - vagina
164 | - vegina
165 | - vogina
166 | 


--------------------------------------------------------------------------------
/lib/profanity-dictionaries/es.yaml:
--------------------------------------------------------------------------------
 1 | - Asesinato
 2 | - asno
 3 | - bastardo
 4 | - Bollera
 5 | - Cabron
 6 | - Cabrón
 7 | - Caca
 8 | - Chupada
 9 | - Chupapollas
10 | - Chupetón
11 | - concha
12 | - Concha de tu madre
13 | - Coño
14 | - Coprofagía
15 | - Culo
16 | - Drogas
17 | - Esperma
18 | - Fiesta de salchichas
19 | - Follador
20 | - Follar
21 | - Gilipichis
22 | - Gilipollas
23 | - Hacer una paja
24 | - Haciendo el amor
25 | - Heroína
26 | - Hija de puta
27 | - Hijaputa
28 | - Hijo de puta
29 | - Hijoputa
30 | - Idiota
31 | - Imbécil
32 | - infierno
33 | - Jilipollas
34 | - Kapullo
35 | - Lameculos
36 | - Maciza
37 | - Macizorra
38 | - maldito
39 | - Mamada
40 | - Marica
41 | - Maricón
42 | - Mariconazo
43 | - martillo
44 | - Mierda
45 | - Nazi
46 | - Orina
47 | - Pedo
48 | - Pervertido
49 | - Pezón
50 | - Pinche
51 | - Pis
52 | - Prostituta
53 | - Puta
54 | - Racista
55 | - Ramera
56 | - Sádico
57 | - Semen
58 | - Sexo
59 | - Sexo oral
60 | - Soplagaitas
61 | - Soplapollas
62 | - Tetas grandes
63 | - Tía buena
64 | - Travesti
65 | - Trio
66 | - Verga
67 | - vete a la mierda
68 | - Vulva
69 | 


--------------------------------------------------------------------------------
/lib/profanity-dictionaries/leet_strategy_dictionary.yaml:
--------------------------------------------------------------------------------
   1 | ---
   2 | a:
   3 | - a\.
   4 | - a\-
   5 | - '4'
   6 | - "@"
   7 | - Á
   8 | - á
   9 | - À
  10 | - à
  11 | - Â
  12 | - â
  13 | - Ä
  14 | - ä
  15 | - Ã
  16 | - ã
  17 | - Å
  18 | - å
  19 | - α
  20 | - Δ
  21 | - Λ
  22 | - λ
  23 | - A
  24 | - Ă
  25 | - Ắ
  26 | - Ặ
  27 | - Ằ
  28 | - Ẳ
  29 | - Ẵ
  30 | - Ǎ
  31 | - Ấ
  32 | - Ậ
  33 | - Ầ
  34 | - Ẩ
  35 | - Ẫ
  36 | - Ạ
  37 | - Ả
  38 | - Ā
  39 | - Ą
  40 | - Ǻ
  41 | - Æ
  42 | - Ǽ
  43 | - a
  44 | - ă
  45 | - ắ
  46 | - ặ
  47 | - ằ
  48 | - ẳ
  49 | - ẵ
  50 | - ǎ
  51 | - ấ
  52 | - ậ
  53 | - ầ
  54 | - ẩ
  55 | - ẫ
  56 | - ạ
  57 | - ả
  58 | - ā
  59 | - ą
  60 | - ǻ
  61 | - æ
  62 | - ǽ
  63 | - ɑ
  64 | - ɐ
  65 | - ɒ
  66 | - "⒜"
  67 | - "⍺"
  68 | - ａ
  69 | - "\U0001D41A"
  70 | - "\U0001D44E"
  71 | - "\U0001D482"
  72 | - "\U0001D4B6"
  73 | - "\U0001D4EA"
  74 | - "\U0001D51E"
  75 | - "\U0001D552"
  76 | - "\U0001D586"
  77 | - "\U0001D5BA"
  78 | - "\U0001D5EE"
  79 | - "\U0001D622"
  80 | - "\U0001D656"
  81 | - "\U0001D68A"
  82 | - "\U0001D6C2"
  83 | - "\U0001D6FC"
  84 | - "\U0001D736"
  85 | - "\U0001D770"
  86 | - "\U0001D7AA"
  87 | - а
  88 | - "⍶"
  89 | - "℀"
  90 | - "℁"
  91 | - ꜳ
  92 | - ӕ
  93 | - ꜵ
  94 | - ꜷ
  95 | - ꜹ
  96 | - ꜻ
  97 | - ꜽ
  98 | b:
  99 | - b\.
 100 | - b\-
 101 | - '8'
 102 | - "\\"
 103 | - '3'
 104 | - ß
 105 | - Β
 106 | - β
 107 | - B
 108 | - Ḅ
 109 | - Ɓ
 110 | - ʚ
 111 | - ɞ
 112 | - b
 113 | - ḅ
 114 | - ɓ
 115 | - ᑾ
 116 | - ᒀ
 117 | - "⒝"
 118 | - ъ
 119 | - ꙑ
 120 | - "\U0001D41B"
 121 | - "\U0001D44F"
 122 | - "\U0001D483"
 123 | - "\U0001D4B7"
 124 | - "\U0001D4EB"
 125 | - "\U0001D51F"
 126 | - "\U0001D553"
 127 | - "\U0001D587"
 128 | - "\U0001D5BB"
 129 | - "\U0001D5EF"
 130 | - "\U0001D623"
 131 | - "\U0001D657"
 132 | - "\U0001D68B"
 133 | - Ƅ
 134 | - Ь
 135 | - Ꮟ
 136 | - ᑲ
 137 | - ᖯ
 138 | - ᑳ
 139 | - ƃ
 140 | - Ƃ
 141 | - Б
 142 | - ƀ
 143 | - ҍ
 144 | - Ҍ
 145 | - ѣ
 146 | - Ѣ
 147 | - ᑿ
 148 | - ᒁ
 149 | - ᒈ
 150 | - Ы
 151 | - ᔎ
 152 | - ᖃ
 153 | - ᖄ
 154 | c:
 155 | - c\.
 156 | - c\-
 157 | - Ç
 158 | - ç
 159 | - "¢"
 160 | - "€"
 161 | - "<"
 162 | - "\\("
 163 | - "{"
 164 | - "©"
 165 | - C
 166 | - Ć
 167 | - Č
 168 | - Ĉ
 169 | - Ċ
 170 | - Ɔ
 171 | - ʗ
 172 | - c
 173 | - ć
 174 | - č
 175 | - ĉ
 176 | - ɕ
 177 | - ċ
 178 | - "⒞"
 179 | - "℀"
 180 | - ｃ
 181 | - ⅽ
 182 | - "\U0001D41C"
 183 | - "\U0001D450"
 184 | - "\U0001D484"
 185 | - "\U0001D4B8"
 186 | - "\U0001D4EC"
 187 | - "\U0001D520"
 188 | - "\U0001D554"
 189 | - "\U0001D588"
 190 | - "\U0001D5BC"
 191 | - "\U0001D5F0"
 192 | - "\U0001D624"
 193 | - "\U0001D658"
 194 | - "\U0001D68C"
 195 | - ᴄ
 196 | - ϲ
 197 | - ⲥ
 198 | - с
 199 | - ꮯ
 200 | - "\U0001043D"
 201 | - ȼ
 202 | - ҫ
 203 | - "℅"
 204 | - "℆"
 205 | d:
 206 | - d\.
 207 | - d\-
 208 | - "&part;"
 209 | - "\\"
 210 | - "\\)"
 211 | - Þ
 212 | - þ
 213 | - Ð
 214 | - ð
 215 | - D
 216 | - Ď
 217 | - Ḓ
 218 | - Ḍ
 219 | - Ɗ
 220 | - Ḏ
 221 | - ǲ
 222 | - ǅ
 223 | - Đ
 224 | - Ǳ
 225 | - Ǆ
 226 | - d
 227 | - ď
 228 | - ḓ
 229 | - ḍ
 230 | - ɗ
 231 | - ḏ
 232 | - đ
 233 | - ɖ
 234 | - ʤ
 235 | - ǳ
 236 | - ʣ
 237 | - ʥ
 238 | - ǆ
 239 | - ᑺ
 240 | - "⒟"
 241 | - ⅾ
 242 | - ⅆ
 243 | - "\U0001D41D"
 244 | - "\U0001D451"
 245 | - "\U0001D485"
 246 | - "\U0001D4B9"
 247 | - "\U0001D4ED"
 248 | - "\U0001D521"
 249 | - "\U0001D555"
 250 | - "\U0001D589"
 251 | - "\U0001D5BD"
 252 | - "\U0001D5F1"
 253 | - "\U0001D625"
 254 | - "\U0001D659"
 255 | - "\U0001D68D"
 256 | - ԁ
 257 | - Ꮷ
 258 | - ᑯ
 259 | - ꓒ
 260 | - ƌ
 261 | - "₫"
 262 | - ᑻ
 263 | - ᒇ
 264 | - ᖁ
 265 | e:
 266 | - e\.
 267 | - e\-
 268 | - '3'
 269 | - "€"
 270 | - È
 271 | - è
 272 | - É
 273 | - é
 274 | - Ê
 275 | - ê
 276 | - "∑"
 277 | - E
 278 | - Ĕ
 279 | - Ě
 280 | - Ế
 281 | - Ệ
 282 | - Ề
 283 | - Ể
 284 | - Ễ
 285 | - Ë
 286 | - Ė
 287 | - Ẹ
 288 | - Ẻ
 289 | - Ē
 290 | - Ę
 291 | - Ẽ
 292 | - Ɛ
 293 | - Ə
 294 | - e
 295 | - ĕ
 296 | - ě
 297 | - ế
 298 | - ệ
 299 | - ề
 300 | - ể
 301 | - ễ
 302 | - ë
 303 | - ė
 304 | - ẹ
 305 | - ẻ
 306 | - ē
 307 | - ę
 308 | - ẽ
 309 | - ʒ
 310 | - ǯ
 311 | - ʓ
 312 | - ɘ
 313 | - ɜ
 314 | - ɝ
 315 | - ə
 316 | - ɚ
 317 | - ʚ
 318 | - ɞ
 319 | - "⒠"
 320 | - æ
 321 | - ӕ
 322 | - "℮"
 323 | - ｅ
 324 | - ℯ
 325 | - ⅇ
 326 | - "\U0001D41E"
 327 | - "\U0001D452"
 328 | - "\U0001D486"
 329 | - "\U0001D4EE"
 330 | - "\U0001D522"
 331 | - "\U0001D556"
 332 | - "\U0001D58A"
 333 | - "\U0001D5BE"
 334 | - "\U0001D5F2"
 335 | - "\U0001D626"
 336 | - "\U0001D65A"
 337 | - "\U0001D68E"
 338 | - ꬲ
 339 | - е
 340 | - ҽ
 341 | - ɇ
 342 | - ҿ
 343 | - œ
 344 | - ꭢ
 345 | - ᵫ
 346 | f:
 347 | - f\.
 348 | - f\-
 349 | - ƒ
 350 | - F
 351 | - Ƒ
 352 | - f
 353 | - ſ
 354 | - ʩ
 355 | - ʃ
 356 | - ʆ
 357 | - ʅ
 358 | - ɟ
 359 | - ʄ
 360 | - "⒡"
 361 | - "\U0001D41F"
 362 | - "\U0001D453"
 363 | - "\U0001D487"
 364 | - "\U0001D4BB"
 365 | - "\U0001D4EF"
 366 | - "\U0001D523"
 367 | - "\U0001D557"
 368 | - "\U0001D58B"
 369 | - "\U0001D5BF"
 370 | - "\U0001D5F3"
 371 | - "\U0001D627"
 372 | - "\U0001D65B"
 373 | - "\U0001D68F"
 374 | - ẝ
 375 | - ք
 376 | - ᵮ
 377 | - ﬀ
 378 | - ﬃ
 379 | - ﬄ
 380 | - ꝷ
 381 | g:
 382 | - g\.
 383 | - g\-
 384 | - '6'
 385 | - '9'
 386 | - G
 387 | - Ǵ
 388 | - Ğ
 389 | - Ǧ
 390 | - Ģ
 391 | - Ĝ
 392 | - Ġ
 393 | - Ḡ
 394 | - ʛ
 395 | - g
 396 | - ǵ
 397 | - ğ
 398 | - ǧ
 399 | - ģ
 400 | - ĝ
 401 | - ġ
 402 | - ɠ
 403 | - ḡ
 404 | - ɡ
 405 | - ɣ
 406 | - "⒢"
 407 | - ｇ
 408 | - ℊ
 409 | - "\U0001D420"
 410 | - "\U0001D454"
 411 | - "\U0001D488"
 412 | - "\U0001D4F0"
 413 | - "\U0001D524"
 414 | - "\U0001D558"
 415 | - "\U0001D58C"
 416 | - "\U0001D5C0"
 417 | - "\U0001D5F4"
 418 | - "\U0001D628"
 419 | - "\U0001D65C"
 420 | - "\U0001D690"
 421 | - ᶃ
 422 | - ƍ
 423 | - ց
 424 | - ǥ
 425 | h:
 426 | - h\.
 427 | - h\-
 428 | - Η
 429 | - H
 430 | - Ḫ
 431 | - Ĥ
 432 | - Ḥ
 433 | - Ħ
 434 | - h
 435 | - ḫ
 436 | - ĥ
 437 | - ḥ
 438 | - ɦ
 439 | - ẖ
 440 | - ħ
 441 | - ɧ
 442 | - ɥ
 443 | - ʮ
 444 | - ʯ
 445 | - ų
 446 | - "⒣"
 447 | - ｈ
 448 | - ℎ
 449 | - "\U0001D421"
 450 | - "\U0001D489"
 451 | - "\U0001D4BD"
 452 | - "\U0001D4F1"
 453 | - "\U0001D525"
 454 | - "\U0001D559"
 455 | - "\U0001D58D"
 456 | - "\U0001D5C1"
 457 | - "\U0001D5F5"
 458 | - "\U0001D629"
 459 | - "\U0001D65D"
 460 | - "\U0001D691"
 461 | - һ
 462 | - հ
 463 | - Ꮒ
 464 | - ꚕ
 465 | - Ᏺ
 466 | - ℏ
 467 | - ћ
 468 | i:
 469 | - i\.
 470 | - i\-
 471 | - "!"
 472 | - "\\"
 473 | - "\\]\\["
 474 | - "\\]"
 475 | - '1'
 476 | - "∫"
 477 | - Ì
 478 | - Í
 479 | - Î
 480 | - Ï
 481 | - ì
 482 | - í
 483 | - î
 484 | - ï
 485 | - I
 486 | - Ĭ
 487 | - Ǐ
 488 | - İ
 489 | - Ị
 490 | - Ỉ
 491 | - Ī
 492 | - Į
 493 | - Ĩ
 494 | - Ĳ
 495 | - i
 496 | - ĭ
 497 | - ǐ
 498 | - ị
 499 | - ỉ
 500 | - ī
 501 | - į
 502 | - ɨ
 503 | - ĩ
 504 | - ɩ
 505 | - ı
 506 | - ĳ
 507 | - ɟ
 508 | - "⒤"
 509 | - ꙑ
 510 | - ﬃ
 511 | - ﬁ
 512 | - "˛"
 513 | - "⍳"
 514 | - ｉ
 515 | - ⅰ
 516 | - ℹ
 517 | - ⅈ
 518 | - "\U0001D422"
 519 | - "\U0001D456"
 520 | - "\U0001D48A"
 521 | - "\U0001D4BE"
 522 | - "\U0001D4F2"
 523 | - "\U0001D526"
 524 | - "\U0001D55A"
 525 | - "\U0001D58E"
 526 | - "\U0001D5C2"
 527 | - "\U0001D5F6"
 528 | - "\U0001D62A"
 529 | - "\U0001D65E"
 530 | - "\U0001D692"
 531 | - "\U0001D6A4"
 532 | - ɪ
 533 | - ι
 534 | - ι
 535 | - ͺ
 536 | - "\U0001D6CA"
 537 | - "\U0001D704"
 538 | - "\U0001D73E"
 539 | - "\U0001D778"
 540 | - "\U0001D7B2"
 541 | - і
 542 | - ꙇ
 543 | - ӏ
 544 | - ꭵ
 545 | - Ꭵ
 546 | - "\U000118C3"
 547 | - "⍸"
 548 | - ᵻ
 549 | - ᵼ
 550 | - ⅱ
 551 | - ⅲ
 552 | - ⅳ
 553 | - ⅸ
 554 | - ⅵ
 555 | - ⅶ
 556 | - ⅷ
 557 | - ⅺ
 558 | - ⅻ
 559 | - ы
 560 | j:
 561 | - j\.
 562 | - j\-
 563 | - J
 564 | - Ĵ
 565 | - j
 566 | - ǰ
 567 | - ĵ
 568 | - ʝ
 569 | - ȷ
 570 | - ɟ
 571 | - ʄ
 572 | - "⒥"
 573 | - ĳ
 574 | - ｊ
 575 | - ⅉ
 576 | - "\U0001D423"
 577 | - "\U0001D457"
 578 | - "\U0001D48B"
 579 | - "\U0001D4BF"
 580 | - "\U0001D4F3"
 581 | - "\U0001D527"
 582 | - "\U0001D55B"
 583 | - "\U0001D58F"
 584 | - "\U0001D5C3"
 585 | - "\U0001D5F7"
 586 | - "\U0001D62B"
 587 | - "\U0001D65F"
 588 | - "\U0001D693"
 589 | - ϳ
 590 | - ј
 591 | - ɉ
 592 | - ǉ
 593 | - ǈ
 594 | - ǌ
 595 | - ǋ
 596 | k:
 597 | - k\.
 598 | - k\-
 599 | - Κ
 600 | - κ
 601 | - K
 602 | - Ķ
 603 | - Ḳ
 604 | - Ƙ
 605 | - Ḵ
 606 | - k
 607 | - ķ
 608 | - ḳ
 609 | - ƙ
 610 | - ḵ
 611 | - ĸ
 612 | - ʞ
 613 | - "⒦"
 614 | - "\U0001D424"
 615 | - "\U0001D458"
 616 | - "\U0001D48C"
 617 | - "\U0001D4C0"
 618 | - "\U0001D4F4"
 619 | - "\U0001D528"
 620 | - "\U0001D55C"
 621 | - "\U0001D590"
 622 | - "\U0001D5C4"
 623 | - "\U0001D5F8"
 624 | - "\U0001D62C"
 625 | - "\U0001D660"
 626 | - "\U0001D694"
 627 | l:
 628 | - 1\.
 629 | - l\-
 630 | - "!"
 631 | - "\\"
 632 | - "\\]\\["
 633 | - "\\]"
 634 | - "£"
 635 | - "∫"
 636 | - Ì
 637 | - Í
 638 | - Î
 639 | - Ï
 640 | - L
 641 | - Ĺ
 642 | - Ƚ
 643 | - Ľ
 644 | - Ļ
 645 | - Ḽ
 646 | - Ḷ
 647 | - Ḹ
 648 | - Ḻ
 649 | - Ŀ
 650 | - ǈ
 651 | - Ł
 652 | - Ǉ
 653 | - l
 654 | - ĺ
 655 | - ƚ
 656 | - ɬ
 657 | - ľ
 658 | - ļ
 659 | - ḽ
 660 | - ḷ
 661 | - ḹ
 662 | - ḻ
 663 | - ŀ
 664 | - ɫ
 665 | - ɭ
 666 | - ł
 667 | - ƛ
 668 | - ɮ
 669 | - ǉ
 670 | - ʪ
 671 | - ʫ
 672 | - "⑴"
 673 | - "\U0001F118"
 674 | - "⒧"
 675 | - "⑿"
 676 | - "⒀"
 677 | - "⒁"
 678 | - "⒂"
 679 | - "⒃"
 680 | - "⒄"
 681 | - "⒅"
 682 | - "⒆"
 683 | - "⑾"
 684 | - "⑽"
 685 | - "㏴"
 686 | - "㍭"
 687 | - "㏾"
 688 | - Ы
 689 | - ﬄ
 690 | - ﬂ
 691 | - "׀"
 692 | - "|"
 693 | - "∣"
 694 | - "⏽"
 695 | - "￨"
 696 | - '1'
 697 | - ١
 698 | - ۱
 699 | - "\U00010320"
 700 | - "\U0001E8C7"
 701 | - "\U0001D7CF"
 702 | - "\U0001D7D9"
 703 | - "\U0001D7E3"
 704 | - "\U0001D7ED"
 705 | - "\U0001D7F7"
 706 | - I
 707 | - Ｉ
 708 | - Ⅰ
 709 | - ℐ
 710 | - ℑ
 711 | - "\U0001D408"
 712 | - "\U0001D43C"
 713 | - "\U0001D470"
 714 | - "\U0001D4D8"
 715 | - "\U0001D540"
 716 | - "\U0001D574"
 717 | - "\U0001D5A8"
 718 | - "\U0001D5DC"
 719 | - "\U0001D610"
 720 | - "\U0001D644"
 721 | - "\U0001D678"
 722 | - Ɩ
 723 | - ｌ
 724 | - ⅼ
 725 | - ℓ
 726 | - "\U0001D425"
 727 | - "\U0001D459"
 728 | - "\U0001D48D"
 729 | - "\U0001D4C1"
 730 | - "\U0001D4F5"
 731 | - "\U0001D529"
 732 | - "\U0001D55D"
 733 | - "\U0001D591"
 734 | - "\U0001D5C5"
 735 | - "\U0001D5F9"
 736 | - "\U0001D62D"
 737 | - "\U0001D661"
 738 | - "\U0001D695"
 739 | - ǀ
 740 | - Ι
 741 | - "\U0001D6B0"
 742 | - "\U0001D6EA"
 743 | - "\U0001D724"
 744 | - "\U0001D75E"
 745 | - "\U0001D798"
 746 | - Ⲓ
 747 | - І
 748 | - Ӏ
 749 | - ו
 750 | - ן
 751 | - ا
 752 | - "\U0001EE00"
 753 | - "\U0001EE80"
 754 | - ﺎ
 755 | - ﺍ
 756 | - ߊ
 757 | - ⵏ
 758 | - ᛁ
 759 | - ꓲ
 760 | - "\U00016F28"
 761 | - "\U0001028A"
 762 | - "\U00010309"
 763 | - ﴼ
 764 | - ﴽ
 765 | - Ɨ
 766 | - إ
 767 | - ﺈ
 768 | - ﺇ
 769 | - ٳ
 770 | - ᒷ
 771 | - "\U0001F102"
 772 | - "⒈"
 773 | - ױ
 774 | - "⒓"
 775 | - "㏫"
 776 | - "㋋"
 777 | - "㍤"
 778 | - "⒔"
 779 | - "㏬"
 780 | - "㍥"
 781 | - "⒕"
 782 | - "㏭"
 783 | - "㍦"
 784 | - "⒖"
 785 | - "㏮"
 786 | - "㍧"
 787 | - "⒗"
 788 | - "㏯"
 789 | - "㍨"
 790 | - "⒘"
 791 | - "㏰"
 792 | - "㍩"
 793 | - "⒙"
 794 | - "㏱"
 795 | - "㍪"
 796 | - "⒚"
 797 | - "㏲"
 798 | - "㍫"
 799 | - Ĳ
 800 | - "‖"
 801 | - "∥"
 802 | - Ⅱ
 803 | - ǁ
 804 | - װ
 805 | - "\U00010199"
 806 | - "⒒"
 807 | - Ⅲ
 808 | - "\U00010198"
 809 | - "㏪"
 810 | - "㋊"
 811 | - "㍣"
 812 | - Ю
 813 | - "⒑"
 814 | - "㏩"
 815 | - "㋉"
 816 | - "㍢"
 817 | - "₶"
 818 | - Ⅳ
 819 | - Ⅸ
 820 | - أ
 821 | - ﺄ
 822 | - ﺃ
 823 | - ٲ
 824 | - ٵ
 825 | - ﷳ
 826 | - ﷲ
 827 | - "㏠"
 828 | - "㋀"
 829 | - "㍙"
 830 | - Ⅵ
 831 | - Ⅶ
 832 | - Ⅷ
 833 | - Ⅺ
 834 | - Ⅻ
 835 | - Ꙑ
 836 | - ﷻ
 837 | - "﷼"
 838 | - ﷺ
 839 | - ﲀ
 840 | - ﰷ
 841 | - ﻼ
 842 | - ﻻ
 843 | - ﻺ
 844 | - ﻹ
 845 | - ﻸ
 846 | - ﻷ
 847 | - ﲈ
 848 | - ﯫ
 849 | - ﯪ
 850 | m:
 851 | - m\.
 852 | - m\-
 853 | - M
 854 | - Ḿ
 855 | - Ṁ
 856 | - Ṃ
 857 | - m
 858 | - ḿ
 859 | - ṁ
 860 | - ṃ
 861 | - ɱ
 862 | - ɯ
 863 | - ɰ
 864 | n:
 865 | - n\.
 866 | - n\-
 867 | - η
 868 | - Ν
 869 | - Π
 870 | - N
 871 | - Ń
 872 | - Ň
 873 | - Ņ
 874 | - Ṋ
 875 | - Ṅ
 876 | - Ṇ
 877 | - Ǹ
 878 | - Ɲ
 879 | - Ṉ
 880 | - ǋ
 881 | - Ñ
 882 | - Ǌ
 883 | - n
 884 | - ŉ
 885 | - ń
 886 | - ň
 887 | - ņ
 888 | - ṋ
 889 | - ṅ
 890 | - ṇ
 891 | - ǹ
 892 | - ɲ
 893 | - ṉ
 894 | - ɳ
 895 | - ñ
 896 | - ǌ
 897 | - ŋ
 898 | - Ŋ
 899 | - "⒩"
 900 | - "⒨"
 901 | - "\U0001D427"
 902 | - "\U0001D45B"
 903 | - "\U0001D48F"
 904 | - "\U0001D4C3"
 905 | - "\U0001D4F7"
 906 | - "\U0001D52B"
 907 | - "\U0001D55F"
 908 | - "\U0001D593"
 909 | - "\U0001D5C7"
 910 | - "\U0001D5FB"
 911 | - "\U0001D62F"
 912 | - "\U0001D663"
 913 | - "\U0001D697"
 914 | - ո
 915 | - ռ
 916 | - ƞ
 917 | - "\U0001D6C8"
 918 | - "\U0001D702"
 919 | - "\U0001D73C"
 920 | - "\U0001D776"
 921 | - "\U0001D7B0"
 922 | - ᵰ
 923 | - "\U000118E3"
 924 | - m
 925 | - ⅿ
 926 | - "\U0001D426"
 927 | - "\U0001D45A"
 928 | - "\U0001D48E"
 929 | - "\U0001D4C2"
 930 | - "\U0001D4F6"
 931 | - "\U0001D52A"
 932 | - "\U0001D55E"
 933 | - "\U0001D592"
 934 | - "\U0001D5C6"
 935 | - "\U0001D5FA"
 936 | - "\U0001D62E"
 937 | - "\U0001D662"
 938 | - "\U0001D696"
 939 | - "\U00011700"
 940 | - "₥"
 941 | - ɱ
 942 | - ᵯ
 943 | o:
 944 | - o\.
 945 | - o\-
 946 | - '0'
 947 | - Ο
 948 | - ο
 949 | - Φ
 950 | - "¤"
 951 | - "°"
 952 | - ø
 953 | - O
 954 | - Ó
 955 | - Ŏ
 956 | - Ǒ
 957 | - Ô
 958 | - Ố
 959 | - Ộ
 960 | - Ồ
 961 | - Ổ
 962 | - Ỗ
 963 | - Ö
 964 | - Ọ
 965 | - Ő
 966 | - Ò
 967 | - Ỏ
 968 | - Ơ
 969 | - Ớ
 970 | - Ợ
 971 | - Ờ
 972 | - Ở
 973 | - Ỡ
 974 | - Ō
 975 | - Ɵ
 976 | - Ǫ
 977 | - Ø
 978 | - Ǿ
 979 | - Õ
 980 | - Œ
 981 | - ɶ
 982 | - o
 983 | - ó
 984 | - ŏ
 985 | - ǒ
 986 | - ô
 987 | - ố
 988 | - ộ
 989 | - ồ
 990 | - ổ
 991 | - ỗ
 992 | - ö
 993 | - ọ
 994 | - ő
 995 | - ò
 996 | - ỏ
 997 | - ơ
 998 | - ớ
 999 | - ợ
1000 | - ờ
1001 | - ở
1002 | - ỡ
1003 | - ō
1004 | - ǫ
1005 | - ǿ
1006 | - õ
1007 | - ɛ
1008 | - ɔ
1009 | - ɵ
1010 | - ʘ
1011 | - œ
1012 | - "⒪"
1013 | - ꜵ
1014 | - "℅"
1015 | - ᴔ
1016 | - ꭁ
1017 | - ꭂ
1018 | - ﷲ
1019 | - "№"
1020 | - ం
1021 | - ಂ
1022 | - ം
1023 | - ං
1024 | - ०
1025 | - ੦
1026 | - ૦
1027 | - ௦
1028 | - ౦
1029 | - ೦
1030 | - ൦
1031 | - ๐
1032 | - ໐
1033 | - ၀
1034 | - ٥
1035 | - ۵
1036 | - ｏ
1037 | - ℴ
1038 | - "\U0001D428"
1039 | - "\U0001D45C"
1040 | - "\U0001D490"
1041 | - "\U0001D4F8"
1042 | - "\U0001D52C"
1043 | - "\U0001D560"
1044 | - "\U0001D594"
1045 | - "\U0001D5C8"
1046 | - "\U0001D5FC"
1047 | - "\U0001D630"
1048 | - "\U0001D664"
1049 | - "\U0001D698"
1050 | - ᴏ
1051 | - ᴑ
1052 | - ꬽ
1053 | - "\U0001D6D0"
1054 | - "\U0001D70A"
1055 | - "\U0001D744"
1056 | - "\U0001D77E"
1057 | - "\U0001D7B8"
1058 | - σ
1059 | - "\U0001D6D4"
1060 | - "\U0001D70E"
1061 | - "\U0001D748"
1062 | - "\U0001D782"
1063 | - "\U0001D7BC"
1064 | - ⲟ
1065 | - о
1066 | - ჿ
1067 | - օ
1068 | - ס
1069 | - ه
1070 | - "\U0001EE24"
1071 | - "\U0001EE64"
1072 | - "\U0001EE84"
1073 | - ﻫ
1074 | - ﻬ
1075 | - ﻪ
1076 | - ﻩ
1077 | - ھ
1078 | - ﮬ
1079 | - ﮭ
1080 | - ﮫ
1081 | - ﮪ
1082 | - ہ
1083 | - ﮨ
1084 | - ﮩ
1085 | - ﮧ
1086 | - ﮦ
1087 | - ە
1088 | - ഠ
1089 | - ဝ
1090 | - "\U000104EA"
1091 | - "\U000118C8"
1092 | - "\U000118D7"
1093 | - "\U0001042C"
1094 | - ۿ
1095 | - ꬾ
1096 | - ꝋ
1097 | - ө
1098 | - ѳ
1099 | - ꮎ
1100 | - ꮻ
1101 | - ꭴ
1102 | - ﳙ
1103 | - "∞"
1104 | - ꝏ
1105 | - ꚙ
1106 | - ﳗ
1107 | - ﱑ
1108 | - ﳘ
1109 | - ﱒ
1110 | - ﶓ
1111 | - ﶔ
1112 | - ﱓ
1113 | - ﱔ
1114 | - ൟ
1115 | - တ
1116 | - ꭣ
1117 | - ﲠ
1118 | - ﳢ
1119 | - ﲥ
1120 | - ﳤ
1121 | - ﷻ
1122 | - ﴱ
1123 | - ﳨ
1124 | - ﴲ
1125 | - ﳪ
1126 | - ﷺ
1127 | - ﷷ
1128 | - ﳍ
1129 | - ﳖ
1130 | - ﳯ
1131 | - ﳞ
1132 | - ﳱ
1133 | - ﳦ
1134 | - ﲛ
1135 | - ﳠ
1136 | - ﯭ
1137 | - ﯬ
1138 | p:
1139 | - p\.
1140 | - p\-
1141 | - ρ
1142 | - Ρ
1143 | - "¶"
1144 | - þ
1145 | - P
1146 | - Þ
1147 | - p
1148 | - ɸ
1149 | - "⒫"
1150 | - "⍴"
1151 | - ｐ
1152 | - "\U0001D429"
1153 | - "\U0001D45D"
1154 | - "\U0001D491"
1155 | - "\U0001D4C5"
1156 | - "\U0001D4F9"
1157 | - "\U0001D52D"
1158 | - "\U0001D561"
1159 | - "\U0001D595"
1160 | - "\U0001D5C9"
1161 | - "\U0001D5FD"
1162 | - "\U0001D631"
1163 | - "\U0001D665"
1164 | - "\U0001D699"
1165 | - ϱ
1166 | - "\U0001D6D2"
1167 | - "\U0001D6E0"
1168 | - "\U0001D70C"
1169 | - "\U0001D71A"
1170 | - "\U0001D746"
1171 | - "\U0001D754"
1172 | - "\U0001D780"
1173 | - "\U0001D78E"
1174 | - "\U0001D7BA"
1175 | - "\U0001D7C8"
1176 | - ⲣ
1177 | - р
1178 | - ƥ
1179 | - ᵽ
1180 | - ᑷ
1181 | q:
1182 | - q\.
1183 | - q\-
1184 | - Q
1185 | - q
1186 | - ʠ
1187 | - "⒬"
1188 | - "\U0001D42A"
1189 | - "\U0001D45E"
1190 | - "\U0001D492"
1191 | - "\U0001D4C6"
1192 | - "\U0001D4FA"
1193 | - "\U0001D52E"
1194 | - "\U0001D562"
1195 | - "\U0001D596"
1196 | - "\U0001D5CA"
1197 | - "\U0001D5FE"
1198 | - "\U0001D632"
1199 | - "\U0001D666"
1200 | - "\U0001D69A"
1201 | - ԛ
1202 | - գ
1203 | - զ
1204 | r:
1205 | - r\.
1206 | - r\-
1207 | - "®"
1208 | - R
1209 | - Ŕ
1210 | - Ř
1211 | - Ŗ
1212 | - Ṙ
1213 | - Ṛ
1214 | - Ṝ
1215 | - Ṟ
1216 | - ʁ
1217 | - r
1218 | - ŕ
1219 | - ř
1220 | - ŗ
1221 | - ṙ
1222 | - ṛ
1223 | - ṝ
1224 | - ɾ
1225 | - ṟ
1226 | - ɼ
1227 | - ɽ
1228 | - ɿ
1229 | - ɹ
1230 | - ɻ
1231 | - ɺ
1232 | - "⒭"
1233 | - "⒨"
1234 | - "\U0001D42B"
1235 | - "\U0001D45F"
1236 | - "\U0001D493"
1237 | - "\U0001D4C7"
1238 | - "\U0001D4FB"
1239 | - "\U0001D52F"
1240 | - "\U0001D563"
1241 | - "\U0001D597"
1242 | - "\U0001D5CB"
1243 | - "\U0001D5FF"
1244 | - "\U0001D633"
1245 | - "\U0001D667"
1246 | - "\U0001D69B"
1247 | - ꭇ
1248 | - ꭈ
1249 | - ᴦ
1250 | - ⲅ
1251 | - г
1252 | - ꮁ
1253 | - ɍ
1254 | - ғ
1255 | - ᵲ
1256 | - ґ
1257 | - "\U000118E3"
1258 | - m
1259 | - ⅿ
1260 | - "\U0001D426"
1261 | - "\U0001D45A"
1262 | - "\U0001D48E"
1263 | - "\U0001D4C2"
1264 | - "\U0001D4F6"
1265 | - "\U0001D52A"
1266 | - "\U0001D55E"
1267 | - "\U0001D592"
1268 | - "\U0001D5C6"
1269 | - "\U0001D5FA"
1270 | - "\U0001D62E"
1271 | - "\U0001D662"
1272 | - "\U0001D696"
1273 | - "\U00011700"
1274 | - "₥"
1275 | - ɱ
1276 | - ᵯ
1277 | s:
1278 | - s\.
1279 | - s\-
1280 | - '5'
1281 | - "\\$"
1282 | - "§"
1283 | - S
1284 | - Ś
1285 | - Š
1286 | - Ş
1287 | - Ŝ
1288 | - Ș
1289 | - Ṡ
1290 | - Ṣ
1291 | - ẞ
1292 | - s
1293 | - ś
1294 | - š
1295 | - ş
1296 | - ŝ
1297 | - ș
1298 | - ṡ
1299 | - ṣ
1300 | - ʂ
1301 | - ſ
1302 | - ʃ
1303 | - ʆ
1304 | - ß
1305 | - ʅ
1306 | - "⒮"
1307 | - "℁"
1308 | - ʪ
1309 | - "₨"
1310 | - ｓ
1311 | - "\U0001D42C"
1312 | - "\U0001D460"
1313 | - "\U0001D494"
1314 | - "\U0001D4C8"
1315 | - "\U0001D4FC"
1316 | - "\U0001D530"
1317 | - "\U0001D564"
1318 | - "\U0001D598"
1319 | - "\U0001D5CC"
1320 | - "\U0001D600"
1321 | - "\U0001D634"
1322 | - "\U0001D668"
1323 | - "\U0001D69C"
1324 | - ꜱ
1325 | - ƽ
1326 | - ѕ
1327 | - ꮪ
1328 | - "\U000118C1"
1329 | - "\U00010448"
1330 | - ᵴ
1331 | - "\U0001F75C"
1332 | - ﬆ
1333 | - ʦ
1334 | t:
1335 | - t\.
1336 | - t\-
1337 | - Τ
1338 | - τ
1339 | - '7'
1340 | - T
1341 | - Ť
1342 | - Ţ
1343 | - Ṱ
1344 | - Ț
1345 | - Ṭ
1346 | - Ṯ
1347 | - Ŧ
1348 | - Þ
1349 | - Ð
1350 | - t
1351 | - ť
1352 | - ţ
1353 | - ṱ
1354 | - ț
1355 | - ẗ
1356 | - ṭ
1357 | - ṯ
1358 | - ʈ
1359 | - ŧ
1360 | - ʨ
1361 | - ʧ
1362 | - þ
1363 | - ð
1364 | - ʦ
1365 | - ʇ
1366 | - "⒯"
1367 | - "₶"
1368 | - ﬆ
1369 | - "\U0001D42D"
1370 | - "\U0001D461"
1371 | - "\U0001D495"
1372 | - "\U0001D4C9"
1373 | - "\U0001D4FD"
1374 | - "\U0001D531"
1375 | - "\U0001D565"
1376 | - "\U0001D599"
1377 | - "\U0001D5CD"
1378 | - "\U0001D601"
1379 | - "\U0001D635"
1380 | - "\U0001D669"
1381 | - "\U0001D69D"
1382 | - ƭ
1383 | - ᵵ
1384 | - ꝷ
1385 | - ꜩ
1386 | u:
1387 | - u\.
1388 | - u\-
1389 | - υ
1390 | - µ
1391 | - U
1392 | - Ú
1393 | - Ŭ
1394 | - Ǔ
1395 | - Û
1396 | - Ü
1397 | - Ǘ
1398 | - Ǚ
1399 | - Ǜ
1400 | - Ǖ
1401 | - Ụ
1402 | - Ű
1403 | - Ù
1404 | - Ủ
1405 | - Ư
1406 | - Ứ
1407 | - Ự
1408 | - Ừ
1409 | - Ử
1410 | - Ữ
1411 | - Ū
1412 | - Ų
1413 | - Ů
1414 | - Ũ
1415 | - u
1416 | - ʉ
1417 | - ú
1418 | - ŭ
1419 | - ǔ
1420 | - û
1421 | - ü
1422 | - ǘ
1423 | - ǚ
1424 | - ǜ
1425 | - ǖ
1426 | - ụ
1427 | - ű
1428 | - ù
1429 | - ủ
1430 | - ư
1431 | - ứ
1432 | - ự
1433 | - ừ
1434 | - ử
1435 | - ữ
1436 | - ū
1437 | - ų
1438 | - ů
1439 | - ũ
1440 | - ʊ
1441 | - "⒰"
1442 | - ꜷ
1443 | - "℆"
1444 | - "\U0001D42E"
1445 | - "\U0001D462"
1446 | - "\U0001D496"
1447 | - "\U0001D4CA"
1448 | - "\U0001D4FE"
1449 | - "\U0001D532"
1450 | - "\U0001D566"
1451 | - "\U0001D59A"
1452 | - "\U0001D5CE"
1453 | - "\U0001D602"
1454 | - "\U0001D636"
1455 | - "\U0001D66A"
1456 | - "\U0001D69E"
1457 | - ꞟ
1458 | - ᴜ
1459 | - ꭎ
1460 | - ꭒ
1461 | - ʋ
1462 | - "\U0001D6D6"
1463 | - "\U0001D710"
1464 | - "\U0001D74A"
1465 | - "\U0001D784"
1466 | - "\U0001D7BE"
1467 | - ս
1468 | - "\U000104F6"
1469 | - "\U000118D8"
1470 | - ᵾ
1471 | - ꮜ
1472 | - ᵫ
1473 | - ꭣ
1474 | v:
1475 | - v\.
1476 | - v\-
1477 | - υ
1478 | - ν
1479 | - V
1480 | - v
1481 | - ʋ
1482 | - ʌ
1483 | - "⒱"
1484 | - ꜹ
1485 | - ꜻ
1486 | - ⅳ
1487 | - "∨"
1488 | - "⋁"
1489 | - ｖ
1490 | - ⅴ
1491 | - "\U0001D42F"
1492 | - "\U0001D463"
1493 | - "\U0001D497"
1494 | - "\U0001D4CB"
1495 | - "\U0001D4FF"
1496 | - "\U0001D533"
1497 | - "\U0001D567"
1498 | - "\U0001D59B"
1499 | - "\U0001D5CF"
1500 | - "\U0001D603"
1501 | - "\U0001D637"
1502 | - "\U0001D66B"
1503 | - "\U0001D69F"
1504 | - ᴠ
1505 | - "\U0001D6CE"
1506 | - "\U0001D708"
1507 | - "\U0001D742"
1508 | - "\U0001D77C"
1509 | - "\U0001D7B6"
1510 | - ѵ
1511 | - ט
1512 | - "\U00011706"
1513 | - ꮩ
1514 | - "\U000118C0"
1515 | - ⅵ
1516 | - ⅶ
1517 | - ⅷ
1518 | w:
1519 | - w\.
1520 | - w\-
1521 | - ω
1522 | - ψ
1523 | - Ψ
1524 | - W
1525 | - Ẃ
1526 | - Ŵ
1527 | - Ẅ
1528 | - Ẁ
1529 | - ʬ
1530 | - w
1531 | - ẃ
1532 | - ŵ
1533 | - ẅ
1534 | - ẁ
1535 | - ʍ
1536 | - "⒲"
1537 | - ɯ
1538 | - "\U0001D430"
1539 | - "\U0001D464"
1540 | - "\U0001D498"
1541 | - "\U0001D4CC"
1542 | - "\U0001D500"
1543 | - "\U0001D534"
1544 | - "\U0001D568"
1545 | - "\U0001D59C"
1546 | - "\U0001D5D0"
1547 | - "\U0001D604"
1548 | - "\U0001D638"
1549 | - "\U0001D66C"
1550 | - "\U0001D6A0"
1551 | - ᴡ
1552 | - ѡ
1553 | - ԝ
1554 | - ա
1555 | - "\U0001170A"
1556 | - "\U0001170E"
1557 | - "\U0001170F"
1558 | - ꮃ
1559 | - ѽ
1560 | - "\U000114C5"
1561 | - ꝡ
1562 | x:
1563 | - x\.
1564 | - x\-
1565 | - Χ
1566 | - χ
1567 | - X
1568 | - x
1569 | - "⒳"
1570 | - ⅸ
1571 | - "᙮"
1572 | - "×"
1573 | - "⤫"
1574 | - "⤬"
1575 | - "⨯"
1576 | - ｘ
1577 | - ⅹ
1578 | - "\U0001D431"
1579 | - "\U0001D465"
1580 | - "\U0001D499"
1581 | - "\U0001D4CD"
1582 | - "\U0001D501"
1583 | - "\U0001D535"
1584 | - "\U0001D569"
1585 | - "\U0001D59D"
1586 | - "\U0001D5D1"
1587 | - "\U0001D605"
1588 | - "\U0001D639"
1589 | - "\U0001D66D"
1590 | - "\U0001D6A1"
1591 | - х
1592 | - ᕁ
1593 | - ᕽ
1594 | - "⨰"
1595 | - ⅺ
1596 | - ⅻ
1597 | y:
1598 | - y\.
1599 | - y\-
1600 | - "¥"
1601 | - γ
1602 | - ÿ
1603 | - ý
1604 | - Ÿ
1605 | - Ý
1606 | - Y
1607 | - Ŷ
1608 | - Ẏ
1609 | - Ỵ
1610 | - Ỳ
1611 | - Ƴ
1612 | - Ỷ
1613 | - Ȳ
1614 | - Ỹ
1615 | - y
1616 | - ŷ
1617 | - ẏ
1618 | - ỵ
1619 | - ỳ
1620 | - ƴ
1621 | - ỷ
1622 | - ȳ
1623 | - ỹ
1624 | - ʎ
1625 | - "⒴"
1626 | - ꜽ
1627 | - ɣ
1628 | - ᶌ
1629 | - ｙ
1630 | - "\U0001D432"
1631 | - "\U0001D466"
1632 | - "\U0001D49A"
1633 | - "\U0001D4CE"
1634 | - "\U0001D502"
1635 | - "\U0001D536"
1636 | - "\U0001D56A"
1637 | - "\U0001D59E"
1638 | - "\U0001D5D2"
1639 | - "\U0001D606"
1640 | - "\U0001D63A"
1641 | - "\U0001D66E"
1642 | - "\U0001D6A2"
1643 | - ʏ
1644 | - ỿ
1645 | - ꭚ
1646 | - ℽ
1647 | - "\U0001D6C4"
1648 | - "\U0001D6FE"
1649 | - "\U0001D738"
1650 | - "\U0001D772"
1651 | - "\U0001D7AC"
1652 | - у
1653 | - ү
1654 | - ყ
1655 | - "\U000118DC"
1656 | - ɏ
1657 | - ұ
1658 | z:
1659 | - z\.
1660 | - z\-
1661 | - Ζ
1662 | - Z
1663 | - Ź
1664 | - Ž
1665 | - Ż
1666 | - Ẓ
1667 | - Ẕ
1668 | - Ƶ
1669 | - z
1670 | - ź
1671 | - ž
1672 | - ʑ
1673 | - ż
1674 | - ẓ
1675 | - ẕ
1676 | - ʐ
1677 | - ƶ
1678 | - ǂ
1679 | - ʭ
1680 | - ʔ
1681 | - ʡ
1682 | - ʖ
1683 | - ǁ
1684 | - ʕ
1685 | - ǃ
1686 | - ʢ
1687 | - "⒵"
1688 | - ǳ
1689 | - ʣ
1690 | - ǲ
1691 | - ʫ
1692 | - "\U0001D433"
1693 | - "\U0001D467"
1694 | - "\U0001D49B"
1695 | - "\U0001D4CF"
1696 | - "\U0001D503"
1697 | - "\U0001D537"
1698 | - "\U0001D56B"
1699 | - "\U0001D59F"
1700 | - "\U0001D5D3"
1701 | - "\U0001D607"
1702 | - "\U0001D63B"
1703 | - "\U0001D66F"
1704 | - "\U0001D6A3"
1705 | - ᴢ
1706 | - ꮓ
1707 | - "\U000118C4"
1708 | - ȥ
1709 | - ᵶ
1710 | 


--------------------------------------------------------------------------------
/lib/profanity-dictionaries/partial_match.yaml:
--------------------------------------------------------------------------------
1 | - 🖕
2 | 


--------------------------------------------------------------------------------
/lib/profanity-dictionaries/pt.yaml:
--------------------------------------------------------------------------------
 1 | - aborto
 2 | - amador
 3 | - ânus
 4 | - aranha
 5 | - ariano
 6 | - balalao
 7 | - bastardo
 8 | - bicha
 9 | - biscate
10 | - bissexual
11 | - boceta
12 | - boob
13 | - bosta
14 | - braulio de borracha
15 | - bumbum
16 | - burro
17 | - cabrao
18 | - cacete
19 | - cagar
20 | - camisinha
21 | - caralho
22 | - cerveja
23 | - chochota
24 | - chupar
25 | - clitoris
26 | - cocaína
27 | - colhoes
28 | - comer
29 | - cona
30 | - consolo
31 | - corno
32 | - cu
33 | - dar o rabo
34 | - dum raio
35 | - esporra
36 | - fecal
37 | - filho da puta
38 | - foda
39 | - foda-se
40 | - foder
41 | - frango assado
42 | - gozar
43 | - grelho
44 | - heroína
45 | - heterosexual
46 | - homem gay
47 | - homoerótico
48 | - homosexual
49 | - inferno
50 | - lésbica
51 | - lolita
52 | - mama
53 | - merda
54 | - paneleiro
55 | - passar um cheque
56 | - pau
57 | - peidar
58 | - pênis
59 | - pinto
60 | - porra
61 | - puta
62 | - puta que pariu
63 | - puta que te pariu
64 | - queca
65 | - sacanagem
66 | - saco
67 | - torneira
68 | - transar
69 | - vai-te foder
70 | - vai tomar no cu
71 | - veado
72 | - vibrador
73 | - xana
74 | - xochota
75 | 


--------------------------------------------------------------------------------
/lib/profanity-filter.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require 'profanity-filter/version'
  4 | require 'profanity-filter/engines/composite'
  5 | require 'profanity-filter/engines/partial_match_strategy'
  6 | require 'profanity-filter/engines/allow_duplicate_characters_strategy'
  7 | require 'profanity-filter/engines/allow_symbols_in_words_strategy'
  8 | require 'profanity-filter/engines/leet_exact_match_strategy'
  9 | require 'web_purify'
 10 | 
 11 | class ProfanityFilter
 12 |   WP_DEFAULT_LANGS    = [:en].freeze
 13 |   WP_LANG_CONVERSIONS = { es: :sp, ko: :kr, ja: :jp }.freeze
 14 |   WP_AVAILABLE_LANGS  = [
 15 |     :en, :ar, :fr, :de, :hi, :jp, :it, :pt, :ru, :sp, :th, :tr, :zh, :kr, :pa
 16 |   ].freeze
 17 | 
 18 |   LEET_STRATEGY                 = :leet
 19 |   ALLOW_SYMBOL_STRATEGY         = :allow_symbol
 20 |   PARTIAL_MATCH_STRATEGY        = :partial_match
 21 |   DUPLICATE_CHARACTERS_STRATEGY = :duplicate_characters
 22 | 
 23 |   attr_reader :available_strategies
 24 | 
 25 |   def initialize(web_purifier_api_key: nil, whitelist: [])
 26 |     # If we are using Web Purifier
 27 |     @wp_client = web_purifier_api_key ? WebPurify::Client.new(web_purifier_api_key) : nil
 28 |     @whitelist = whitelist
 29 |     raise 'Whitelist should be an array' unless @whitelist.is_a?(Array)
 30 | 
 31 |     exact_match_dictionary = load_exact_match_dictionary
 32 |     partial_match_dictionary = load_partial_match_dictionary
 33 | 
 34 |     @available_strategies = {
 35 |       ALLOW_SYMBOL_STRATEGY => ::ProfanityFilterEngine::AllowSymbolsInWordsStrategy.new(
 36 |         dictionary:  exact_match_dictionary,
 37 |         ignore_case: true
 38 |       ),
 39 |       DUPLICATE_CHARACTERS_STRATEGY => ::ProfanityFilterEngine::AllowDuplicateCharactersStrategy.new(
 40 |         dictionary:  exact_match_dictionary,
 41 |         ignore_case: true
 42 |       ),
 43 |       LEET_STRATEGY => ::ProfanityFilterEngine::LeetExactMatchStrategy.new(
 44 |         dictionary:  exact_match_dictionary,
 45 |         ignore_case: true
 46 |       ),
 47 |       PARTIAL_MATCH_STRATEGY => ::ProfanityFilterEngine::PartialMatchStrategy.new(
 48 |         dictionary:  partial_match_dictionary + exact_match_dictionary,
 49 |         ignore_case: true
 50 |       ),
 51 |     }
 52 |   end
 53 | 
 54 |   def all_strategy_names
 55 |     available_strategies.keys
 56 |   end
 57 | 
 58 |   def basic_strategy_names
 59 |     [ALLOW_SYMBOL_STRATEGY, PARTIAL_MATCH_STRATEGY]
 60 |   end
 61 | 
 62 |   def profane?(phrase, lang: nil, strategies: :basic)
 63 |     return false if phrase == ''
 64 |     return false if @whitelist.include?(phrase)
 65 | 
 66 |     if use_webpurify?
 67 |       !!(pf_profane?(phrase, strategies: strategies) || wp_profane?(phrase, lang: lang))
 68 |     else
 69 |       !!pf_profane?(phrase, strategies: strategies)
 70 |     end
 71 |   end
 72 | 
 73 |   def profanity_count(phrase, lang: nil, strategies: :basic)
 74 |     return 0 if phrase == '' || phrase.nil?
 75 | 
 76 |     pf_count = pf_profanity_count(phrase, strategies: strategies)
 77 |     if use_webpurify?
 78 |       pf_count.zero? ? wp_profanity_count(phrase, lang: lang).to_i : pf_count
 79 |     else
 80 |       pf_count
 81 |     end
 82 |   end
 83 | 
 84 |   private
 85 | 
 86 |   def use_webpurify?
 87 |     !!@wp_client
 88 |   end
 89 | 
 90 |   def filter(strategies:)
 91 |     ::ProfanityFilterEngine::Composite.new.tap do |engine|
 92 |       case strategies
 93 |       when :all
 94 |         all_strategy_names.each { |s| engine.add_strategy(available_strategies[s]) }
 95 |       when :basic
 96 |         basic_strategy_names.each { |s| engine.add_strategy(available_strategies[s]) }
 97 |       else
 98 |         strategies.each do |s|
 99 |           raise "Strategy name \"#{s}\" not supported." unless all_strategy_names.include?(s)
100 | 
101 |           engine.add_strategy(available_strategies[s])
102 |         end
103 |       end
104 |     end
105 |   end
106 | 
107 |   def pf_profane?(phrase, strategies:)
108 |     filter(strategies: strategies).profane?(phrase)
109 |   end
110 | 
111 |   def pf_profanity_count(phrase, strategies:)
112 |     filter(strategies: strategies).profanity_count(phrase)
113 |   end
114 | 
115 |   def wp_profane?(phrase, lang: nil, timeout_duration: 5)
116 |     profanity_count = wp_profanity_count(phrase, lang: lang, timeout_duration: timeout_duration)
117 | 
118 |     if profanity_count.nil? || profanity_count == 0
119 |       false
120 |     else
121 |       true
122 |     end
123 |   end
124 | 
125 |   def wp_profanity_count(phrase, lang: nil, timeout_duration: 5)
126 |     Timeout::timeout(timeout_duration) do
127 |       @wp_client.check_count phrase, lang: wp_langs_list_with(lang)
128 |     end
129 |   rescue StandardError
130 |     nil
131 |   end
132 | 
133 |   def wp_langs_list_with(lang)
134 |     langs = Set.new(WP_DEFAULT_LANGS)
135 | 
136 |     if lang
137 |       lang = shorten_language(lang).to_sym
138 |       lang = WP_LANG_CONVERSIONS[lang] || lang
139 |       if WP_AVAILABLE_LANGS.include?(lang)
140 |         langs << lang
141 |       end
142 |     end
143 | 
144 |     langs.to_a.join(',')
145 |   end
146 | 
147 |   def load_dictionary(file_path)
148 |     dir = File.dirname(__FILE__)
149 |     YAML.load(File.read("#{dir}/profanity-dictionaries/#{file_path}.yaml"))
150 |   end
151 | 
152 |   def load_exact_match_dictionary
153 |     en_dictionary = load_dictionary('en')
154 |     es_dictionary = load_dictionary('es')
155 |     pt_dictionary = load_dictionary('pt')
156 |     en_dictionary + es_dictionary + pt_dictionary
157 |   end
158 | 
159 |   def load_partial_match_dictionary
160 |     load_dictionary('partial_match')
161 |   end
162 | 
163 |   def shorten_language(lang)
164 |     lang && lang.to_s.downcase[0, 2]
165 |   end
166 | end
167 | 


--------------------------------------------------------------------------------
/lib/profanity-filter/engines/allow_duplicate_characters_strategy.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require_relative 'exact_match_strategy'
 4 | 
 5 | module ProfanityFilterEngine
 6 |   class AllowDuplicateCharactersStrategy < ExactMatchStrategy
 7 |     DEFAULT_IGNORE_CASE = true
 8 | 
 9 |     private
10 | 
11 |     def build_word_regexp(word)
12 |       word.chars.map { |char| Regexp.escape(char) + '+' }.join
13 |     end
14 |   end
15 | end
16 | 


--------------------------------------------------------------------------------
/lib/profanity-filter/engines/allow_symbols_in_words_strategy.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require_relative 'exact_match_strategy'
 4 | 
 5 | module ProfanityFilterEngine
 6 |   class AllowSymbolsInWordsStrategy < ExactMatchStrategy
 7 |     SYMBOLS_REGEXP = '(?:\p{Mark}|\p{Separator}|\p{Symbol}|\p{Punctuation})*'
 8 |     DEFAULT_IGNORE_CASE = true
 9 | 
10 |     private
11 | 
12 |     def build_word_regexp(word)
13 |       word.chars.map { |char| Regexp.escape(char) }.join(SYMBOLS_REGEXP)
14 |     end
15 |   end
16 | end
17 | 


--------------------------------------------------------------------------------
/lib/profanity-filter/engines/component.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module ProfanityFilterEngine
 4 |   class Component
 5 |     def profane?(text)
 6 |       raise NotImplementedError
 7 |     end
 8 | 
 9 |     def profane_words(text)
10 |       raise NotImplementedError
11 |     end
12 | 
13 |     def profanity_count(text)
14 |       profane_words(text).size
15 |     end
16 |   end
17 | end
18 | 


--------------------------------------------------------------------------------
/lib/profanity-filter/engines/composite.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'yaml'
 4 | require_relative 'component'
 5 | 
 6 | module ProfanityFilterEngine
 7 |   class Composite < Component
 8 |     attr_reader :strategies
 9 | 
10 |     def initialize
11 |       @strategies = []
12 |     end
13 | 
14 |     def add_strategy(strategy)
15 |       strategies << strategy
16 |     end
17 | 
18 |     def add_strategies(*new_strategies)
19 |       strategies.concat(new_strategies)
20 |     end
21 | 
22 |     def delete_strategy(strategy)
23 |       strategies.delete(strategy)
24 |     end
25 | 
26 |     def profane?(text)
27 |       strategies.any? { |strategy| strategy.profane?(text) }
28 |     end
29 | 
30 |     def profane_words(text)
31 |       total_words = strategies.reduce([]) do |words, strategy|
32 |         words.concat(strategy.profane_words(text).map { |w| w.gsub(/[ _\-\.]/, '') })
33 |       end
34 |       total_words.uniq
35 |     end
36 |   end
37 | end
38 | 


--------------------------------------------------------------------------------
/lib/profanity-filter/engines/exact_match_strategy.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require_relative 'regexp_strategy'
 4 | 
 5 | module ProfanityFilterEngine
 6 |   class ExactMatchStrategy < RegexpStrategy
 7 |     DELIMITER = '(?:\b|^|$|_)'
 8 |     DEFAULT_IGNORE_CASE = false
 9 | 
10 |     attr_reader :delimiter
11 |     attr_reader :ignore_case
12 | 
13 |     def initialize(dictionary:, ignore_case: DEFAULT_IGNORE_CASE)
14 |       @dictionary = dictionary
15 |       @delimiter = DELIMITER
16 |       @ignore_case = ignore_case
17 |       @profanity_regexp = build_profanity_regexp
18 |     end
19 | 
20 |     private
21 | 
22 |     def build_profanity_regexp
23 |       option = ignore_case ? Regexp::IGNORECASE : nil
24 |       regexp_list = dictionary.map do |word|
25 |         Regexp.new("#{delimiter}#{build_word_regexp(word)}#{delimiter}", option)
26 |       end
27 | 
28 |       Regexp.union(*regexp_list)
29 |     end
30 | 
31 |     def build_word_regexp(word)
32 |       Regexp.escape(word)
33 |     end
34 |   end
35 | end
36 | 


--------------------------------------------------------------------------------
/lib/profanity-filter/engines/leet_exact_match_strategy.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require_relative 'exact_match_strategy'
 4 | 
 5 | module ProfanityFilterEngine
 6 |   class LeetExactMatchStrategy < ExactMatchStrategy
 7 |     DEFAULT_IGNORE_CASE = true
 8 | 
 9 |     private
10 | 
11 |     def build_word_regexp(word)
12 |       build_leet_dictionary unless defined? LEET_DICTIONARY
13 |       word.chars.map do |char|
14 |         downcase_char = char.downcase
15 |         if LEET_DICTIONARY.include?(downcase_char)
16 |           LEET_DICTIONARY[downcase_char]
17 |         else
18 |           Regexp.escape(char)
19 |         end
20 |       end.join
21 |     end
22 | 
23 |     def build_leet_dictionary
24 |       lib_dir  = File.expand_path('../../../', __FILE__)
25 |       file     = File.read("#{lib_dir}/profanity-dictionaries/leet_strategy_dictionary.yaml")
26 |       raw_data = YAML.safe_load(file)
27 |       dict     = transform_data_to_regex(raw_data)
28 |       ::ProfanityFilterEngine::LeetExactMatchStrategy.const_set('LEET_DICTIONARY', dict)
29 |     end
30 | 
31 |     def transform_data_to_regex(dict)
32 |       dict.map do |char, data|
33 |         data_str = data.join('|')
34 |         dict[char] = "(?:#{data_str})"
35 |       end
36 |       dict
37 |     end
38 |   end
39 | end
40 | 


--------------------------------------------------------------------------------
/lib/profanity-filter/engines/partial_match_strategy.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require_relative 'regexp_strategy'
 4 | 
 5 | module ProfanityFilterEngine
 6 |   class PartialMatchStrategy < RegexpStrategy
 7 |     DEFAULT_IGNORE_CASE = false
 8 | 
 9 |     attr_reader :ignore_case
10 | 
11 |     def initialize(dictionary:, ignore_case: DEFAULT_IGNORE_CASE)
12 |       @dictionary = dictionary
13 |       @ignore_case = ignore_case
14 |       @profanity_regexp = build_profanity_regexp
15 |     end
16 | 
17 |     private
18 | 
19 |     def build_profanity_regexp
20 |       option = ignore_case ? Regexp::IGNORECASE : nil
21 |       regexp_list = dictionary.map do |word|
22 |         Regexp.new("#{Regexp.escape(word)}", option)
23 |       end
24 | 
25 |       Regexp.union(*regexp_list)
26 |     end
27 |   end
28 | end
29 | 


--------------------------------------------------------------------------------
/lib/profanity-filter/engines/regexp_strategy.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require_relative 'component'
 4 | 
 5 | module ProfanityFilterEngine
 6 |   class RegexpStrategy < Component
 7 |     DEFAULT_DELIMITER = '(?:\b|^|$|_)'
 8 | 
 9 |     attr_reader :dictionary, :profanity_regexp
10 | 
11 |     attr_writer :profanity_regexp
12 |     private :profanity_regexp=
13 | 
14 |     def initialize(dictionary:, profanity_regexp: nil)
15 |       @dictionary = dictionary
16 |       @profanity_regexp = profanity_regexp || build_profanity_regexp
17 |     end
18 | 
19 |     def profane_words(text)
20 |       text.scan(profanity_regexp).uniq
21 |     end
22 | 
23 |     def profane?(text)
24 |       profanity_regexp.match?(text)
25 |     end
26 | 
27 |     private
28 | 
29 |     def build_profanity_regexp
30 |       regexp_list = dictionary.map do |word|
31 |         Regexp.new("#{DEFAULT_DELIMITER}#{Regexp.escape(word)}#{DEFAULT_DELIMITER}")
32 |       end
33 | 
34 |       Regexp.union(*regexp_list)
35 |     end
36 |   end
37 | end
38 | 


--------------------------------------------------------------------------------
/lib/profanity-filter/version.rb:
--------------------------------------------------------------------------------
1 | class ProfanityFilter
2 |   VERSION = '1.0'
3 | end
4 | 


--------------------------------------------------------------------------------
/profanity-filter.gemspec:
--------------------------------------------------------------------------------
 1 | lib = File.expand_path('lib', __dir__)
 2 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
 3 | require 'profanity-filter/version'
 4 | 
 5 | Gem::Specification.new do |spec|
 6 |   spec.name          = 'profanity-filter'
 7 |   spec.version       = ProfanityFilter::VERSION
 8 |   spec.authors       = ['Maso Lin', 'Jenny Shih', 'YenTing Chen']
 9 |   spec.email         = ['dev@cardinalblue.com']
10 | 
11 |   spec.summary       = 'To detect if a given string contains profane words.'
12 |   spec.description   = 'Detects profane words using multiple strategies,
13 |                         including similarities, diacritics(sound alterations),
14 |                         constructions (multi-part), injections and unicode.'
15 |   spec.homepage      = 'https://github.com/cardinalblue/profanity-filter'
16 |   spec.license       = 'MIT'
17 |   
18 |   spec.metadata['homepage_uri'] = spec.homepage
19 |   spec.metadata['source_code_uri'] = 'https://github.com/cardinalblue/profanity-filter'
20 |   spec.metadata['changelog_uri'] = 'https://github.com/cardinalblue/profanity-filter/blob/master/CHANGELOG.md'
21 | 
22 |   # Specify which files should be added to the gem when it is released.
23 |   # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
24 |   spec.files         = Dir.chdir(File.expand_path('..', __FILE__)) do
25 |     `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
26 |   end
27 |   spec.bindir        = 'exe'
28 |   spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
29 |   spec.require_paths = ['lib']
30 | 
31 |   spec.add_dependency 'webpurify'
32 |   spec.add_development_dependency 'bundler', '~> 2.0'
33 |   spec.add_development_dependency 'rake', '~> 10.0'
34 |   spec.add_development_dependency 'pry', '~> 0.12.2'
35 |   spec.add_development_dependency 'minitest', '~> 5.0'
36 |   spec.add_development_dependency 'rr'
37 | end
38 | 


--------------------------------------------------------------------------------
/test/profanity_filter_engine/allow_duplicate_characters_strategy_test.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require_relative '../test_helper'
 4 | 
 5 | module ProfanityFilterEngine
 6 |   class AllowDuplicateCharactersStrategyTest < Minitest::Test
 7 |     def test_different_latin_like_characters
 8 |       varients_struct = Struct.new(:o, :a)
 9 |       o_vals = %w(o ó ø)
10 |       a_vals = %w(a á å)
11 |       o_vals.product(a_vals).each do |vars_arr|
12 |         vars = varients_struct.new(*vars_arr)
13 |         o = vars.o
14 |         big_o = o.upcase
15 |         a = vars.a
16 |         big_a = a.upcase
17 | 
18 |         strategy = ::ProfanityFilterEngine::AllowDuplicateCharactersStrategy.new(
19 |           dictionary: ["f#{o}#{o}", "b#{a}r"],
20 |           ignore_case: true
21 |         )
22 | 
23 |         %W(f f#{o} ff#{o}).each do |text|
24 |           assert_equal [], strategy.profane_words(text)
25 |           assert_equal 0, strategy.profanity_count(text)
26 |           refute strategy.profane?(text)
27 |         end
28 | 
29 |         %W(f#{o}#{o} ff#{o}#{o} f#{o}#{o}#{o}#{o} b#{a}r b#{a}#{a}r).each do |text|
30 |           assert_equal [text], strategy.profane_words(text)
31 |           assert_equal 1, strategy.profanity_count(text)
32 |           assert strategy.profane?(text)
33 |         end
34 | 
35 |         foo_bar_upcase_text = "f#{big_o}#{o} B#{big_a}R"
36 |         assert_equal ["f#{big_o}#{o}", "B#{big_a}R"], strategy.profane_words(foo_bar_upcase_text)
37 |         assert_equal 2, strategy.profanity_count(foo_bar_upcase_text)
38 |         assert strategy.profane?(foo_bar_upcase_text)
39 | 
40 |         # sub-string should not be matched
41 |         fooo_bbar_text = "f#{o}#{o}k xb#{a}r"
42 |         assert_empty strategy.profane_words(fooo_bbar_text)
43 |         assert_equal 0, strategy.profanity_count(fooo_bbar_text)
44 |         refute strategy.profane?(fooo_bbar_text)
45 | 
46 |         # space and symbol inside the word should not be matched
47 |         symbols = %w(~ ` ! @ # $ % ^ & * ( ) _ + { } | \ [ ] " ' : ; < > ? , . / ¿ ¡)
48 |         foo_with_symbol_text = symbols.reduce("f #{o}#{o}") do |meme, symbol|
49 |           meme + " f#{symbol}#{o}#{o}"
50 |         end
51 |         assert_empty strategy.profane_words(foo_with_symbol_text)
52 |         assert_equal 0, strategy.profanity_count(foo_with_symbol_text)
53 |         refute strategy.profane?(foo_with_symbol_text)
54 |       end
55 |     end
56 |   end
57 | end
58 | 


--------------------------------------------------------------------------------
/test/profanity_filter_engine/allow_symbols_in_words_strategy_test.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require_relative '../test_helper'
 4 | 
 5 | module ProfanityFilterEngine
 6 |   class AllowSymbolsInWordsStrategyTest < Minitest::Test
 7 |     def test_different_latin_like_characters
 8 |       varients_struct = Struct.new(:o, :a)
 9 |       o_vals = %w(o ó ø)
10 |       a_vals = %w(a á å)
11 |       o_vals.product(a_vals).each do |vars_arr|
12 |         vars = varients_struct.new(*vars_arr)
13 |         o = vars.o
14 |         big_o = o.upcase
15 |         a = vars.a
16 |         big_a = a.upcase
17 | 
18 |         strategy = ::ProfanityFilterEngine::AllowSymbolsInWordsStrategy.new(
19 |           dictionary: ["f#{o}#{o}", "b#{a}r"],
20 |           ignore_case: true
21 |         )
22 | 
23 |         foo_text = "f f#{o}  f#{o}#{o}"
24 |         assert_equal ["f#{o}#{o}"], strategy.profane_words(foo_text)
25 |         assert_equal 1, strategy.profanity_count(foo_text)
26 |         assert strategy.profane?(foo_text)
27 | 
28 |         # it should ignore case
29 |         foo_bar_upcase_text = "f#{big_o}#{o} B#{big_a}R"
30 |         assert_equal ["f#{big_o}#{o}", "B#{big_a}R"], strategy.profane_words(foo_bar_upcase_text)
31 |         assert_equal 2, strategy.profanity_count(foo_bar_upcase_text)
32 |         assert strategy.profane?(foo_bar_upcase_text)
33 | 
34 |         # space inside the word should be matched
35 |         ["f #{o}#{o}", "f #{o} #{o}", "b   #{a} r"].each do |text|
36 |           assert_equal [text], strategy.profane_words(text)
37 |           assert_equal 1, strategy.profanity_count(text)
38 |           assert strategy.profane?(text)
39 |         end
40 | 
41 |         # symbol inside the word should be matched
42 |         %w(~ ` ! @ # $ % ^ & * ( ) _ + { } | \ [ ] " ' : ; < > ? , . / ¿ ¡).each do |symbol|
43 |           text = "f#{symbol}#{o}#{symbol * 2}#{o}"
44 |           assert_equal [text], strategy.profane_words(text)
45 |           assert_equal 1, strategy.profanity_count(text)
46 |           assert strategy.profane?(text)
47 |         end
48 | 
49 |         # sub-string should not be matched
50 |         fooo_bbar_text = "f#{o}#{o}#{o} bb#{a}r"
51 |         assert_empty strategy.profane_words(fooo_bbar_text)
52 |         assert_equal 0, strategy.profanity_count(fooo_bbar_text)
53 |         refute strategy.profane?(fooo_bbar_text)
54 | 
55 |         # character but non-English should not be matched
56 |         non_english_text = "f#{o}你好#{o} bニクキュウ#{a}r"
57 |         assert_empty strategy.profane_words(non_english_text)
58 |         assert_equal 0, strategy.profanity_count(non_english_text)
59 |         refute strategy.profane?(non_english_text)
60 |       end
61 |     end
62 |   end
63 | end
64 | 


--------------------------------------------------------------------------------
/test/profanity_filter_engine/component_test.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require_relative '../test_helper'
 4 | require 'rr'
 5 | 
 6 | module ProfanityFilterEngine
 7 |   class ComponentTest < Minitest::Test
 8 |     def setup
 9 |       super
10 |       @c = ::ProfanityFilterEngine::Component.new
11 |     end
12 |     def test_profane?
13 |       assert_raises NotImplementedError do
14 |         @c.profane?('foo')
15 |       end
16 |     end
17 | 
18 |     def test_profane_words
19 |       assert_raises NotImplementedError do
20 |         @c.profane_words('foo')
21 |       end
22 |     end
23 | 
24 |     def test_profanity_count
25 |       mock(@c).profane_words('foo bar') { ['foo', 'bar'] }
26 |       assert_equal 2, @c.profanity_count('foo bar')
27 |     end
28 |   end
29 | end
30 | 


--------------------------------------------------------------------------------
/test/profanity_filter_engine/composite_test.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require_relative '../test_helper'
 4 | 
 5 | module ProfanityFilterEngine
 6 |   class CompositeTest < Minitest::Test
 7 |     def setup
 8 |       super
 9 |       @exact_match_strategy = ::ProfanityFilterEngine::ExactMatchStrategy.new(
10 |         dictionary: %w(foo bar),
11 |         ignore_case: true
12 |       )
13 |       @partial_match_strategy = ::ProfanityFilterEngine::PartialMatchStrategy.new(
14 |         dictionary: %w(🖕),
15 |         ignore_case: true
16 |       )
17 |       @composite = ::ProfanityFilterEngine::Composite.new
18 |     end
19 | 
20 |     def test_add_strategy
21 |       @composite.add_strategy(@exact_match_strategy)
22 |       assert_equal [@exact_match_strategy], @composite.strategies
23 |     end
24 | 
25 |     def test_add_strategies
26 |       @composite.add_strategies(@exact_match_strategy, @partial_match_strategy)
27 |       assert_equal [@exact_match_strategy, @partial_match_strategy], @composite.strategies
28 | 
29 |       new_composite = Composite.new
30 |       new_composite.add_strategies([@exact_match_strategy, @partial_match_strategy])
31 |       assert_equal [@exact_match_strategy, @partial_match_strategy], @composite.strategies
32 |     end
33 | 
34 |     def test_delete_strategy
35 |       @composite.add_strategies(@exact_match_strategy, @partial_match_strategy)
36 |       assert_equal [@exact_match_strategy, @partial_match_strategy], @composite.strategies
37 |       @composite.delete_strategy(@exact_match_strategy)
38 |       assert_equal [@partial_match_strategy], @composite.strategies
39 |     end
40 | 
41 |     def test_profane?
42 |       @composite.add_strategies(@exact_match_strategy, @partial_match_strategy)
43 | 
44 |       profane_foo_text = 'foo is a foo.'
45 |       assert @composite.profane?(profane_foo_text)
46 | 
47 |       profane_emoji_text = '🖕 is a middle finger.'
48 |       assert @composite.profane?(profane_emoji_text)
49 | 
50 |       profane_foo_and_emoji_text = 'foo and 🖕 are profane.'
51 |       assert @composite.profane?(profane_foo_and_emoji_text)
52 | 
53 |       safe_text = 'I like 🐈.'
54 |       refute @composite.profane?(safe_text)
55 |     end
56 | 
57 |     def test_profane_words
58 |       @composite.add_strategies(@exact_match_strategy, @partial_match_strategy)
59 | 
60 |       profane_foo_text = 'foo is a foo.'
61 |       assert_equal %w(foo), @composite.profane_words(profane_foo_text)
62 | 
63 |       profane_emoji_text = '🖕 is a middle finger.'
64 |       assert_equal %w(🖕), @composite.profane_words(profane_emoji_text)
65 | 
66 |       profane_foo_and_emoji_text = 'foo and 🖕 are profane.'
67 |       assert_equal %w(foo 🖕), @composite.profane_words(profane_foo_and_emoji_text)
68 | 
69 |       safe_text = 'I like 🐈.'
70 |       assert_empty @composite.profane_words(safe_text)
71 |     end
72 | 
73 |     def test_profanity_count
74 |       @composite.add_strategies(@exact_match_strategy, @partial_match_strategy)
75 | 
76 |       profane_foo_text = 'foo is a foo.'
77 |       assert_equal 1, @composite.profanity_count(profane_foo_text)
78 | 
79 |       profane_emoji_text = '🖕 is a middle finger.'
80 |       assert_equal 1, @composite.profanity_count(profane_emoji_text)
81 | 
82 |       profane_foo_and_emoji_text = 'foo and 🖕 are profane.'
83 |       assert_equal 2, @composite.profanity_count(profane_foo_and_emoji_text)
84 | 
85 |       safe_text = 'I like 🐈.'
86 |       assert_equal 0, @composite.profanity_count(safe_text)
87 |     end
88 |   end
89 | end
90 | 


--------------------------------------------------------------------------------
/test/profanity_filter_engine/exact_match_strategy_test.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require_relative '../test_helper'
 4 | 
 5 | module ProfanityFilterEngine
 6 |   class ExactMatchStrategyTest < Minitest::Test
 7 |     def test_different_latin_like_characters
 8 |       varients_struct = Struct.new(:o, :a)
 9 |       o_vals = %w(o ó ø)
10 |       a_vals = %w(a á å)
11 |       o_vals.product(a_vals).each do |vars_arr|
12 |         vars = varients_struct.new(*vars_arr)
13 |         o = vars.o
14 |         big_o = o.upcase
15 |         a = vars.a
16 |         big_a = a.upcase
17 | 
18 |         strategy = ::ProfanityFilterEngine::ExactMatchStrategy.new(
19 |           dictionary: ["f#{o}#{o}", "b#{a}r"],
20 |           ignore_case: true
21 |         )
22 | 
23 |         foo_text = "f f#{o}  f#{o}#{o}"
24 |         assert_equal ["f#{o}#{o}"], strategy.profane_words(foo_text)
25 |         assert_equal 1, strategy.profanity_count(foo_text)
26 |         assert strategy.profane?(foo_text)
27 | 
28 |         foo_bar_text = "f#{o}#{o}  ff#{o}#{o} f#{o}#{o}b#{a}rr  b#{a}rf#{o} b#{a}r"
29 |         assert_equal ["f#{o}#{o}", "b#{a}r"], strategy.profane_words(foo_bar_text)
30 |         assert_equal 2, strategy.profanity_count(foo_bar_text)
31 |         assert strategy.profane?(foo_bar_text)
32 | 
33 |         foo_bar_upcase_text = "f#{big_o}#{o} B#{big_a}R"
34 |         assert_equal ["f#{big_o}#{o}", "B#{big_a}R"], strategy.profane_words(foo_bar_upcase_text)
35 |         assert_equal 2, strategy.profanity_count(foo_bar_upcase_text)
36 |         assert strategy.profane?(foo_bar_upcase_text)
37 | 
38 |         # sub-string should not be matched
39 |         fooo_bbar_text = "f#{o}#{o}#{o} bb#{a}r"
40 |         assert_empty strategy.profane_words(fooo_bbar_text)
41 |         assert_equal 0, strategy.profanity_count(fooo_bbar_text)
42 |         refute strategy.profane?(fooo_bbar_text)
43 | 
44 |         # space and symbol inside the word should not be matched
45 |         symbols = %w(~ ` ! @ # $ % ^ & * ( ) _ + { } | \ [ ] " ' : ; < > ? , . / ¿ ¡)
46 |         foo_with_symbol_text = symbols.reduce("f #{o}#{o}") do |meme, symbol|
47 |           meme + " f#{symbol}#{o}#{o}"
48 |         end
49 | 
50 |         assert_empty strategy.profane_words(foo_with_symbol_text)
51 |         assert_equal 0, strategy.profanity_count(fooo_bbar_text)
52 |         refute strategy.profane?(foo_with_symbol_text)
53 |       end
54 |     end
55 |   end
56 | end
57 | 


--------------------------------------------------------------------------------
/test/profanity_filter_engine/leet_exact_match_strategy_test.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require_relative '../test_helper'
 4 | 
 5 | module ProfanityFilterEngine
 6 |   class LeetExactMatchStrategyTest < Minitest::Test
 7 |     def test_different_latin_like_characters
 8 |       varients_struct = Struct.new(:o, :a)
 9 |       o_vals = %w(o ó ø)
10 |       a_vals = %w(a á å)
11 |       o_vals.product(a_vals).each do |vars_arr|
12 |         vars = varients_struct.new(*vars_arr)
13 |         o = vars.o
14 |         big_o = o.upcase
15 |         a = vars.a
16 |         big_a = a.upcase
17 | 
18 |         strategy = ::ProfanityFilterEngine::LeetExactMatchStrategy.new(
19 |           dictionary: %W(f#{o}#{o} b#{a}r),
20 |           ignore_case: true
21 |         )
22 | 
23 |         foo_text = "f f#{o}  f#{o}#{o}"
24 |         assert_equal ["f#{o}#{o}"], strategy.profane_words(foo_text)
25 |         assert_equal 1, strategy.profanity_count(foo_text)
26 |         assert strategy.profane?(foo_text)
27 | 
28 |         foo_bar_text = "f#{o}#{o}  ff#{o}#{o} f#{o}#{o}b#{a}rr  b#{a}rf#{o} b#{a}r"
29 |         assert_equal ["f#{o}#{o}", "ff#{o}#{o}", "b#{a}r"], strategy.profane_words(foo_bar_text)
30 |         assert_equal 3, strategy.profanity_count(foo_bar_text)
31 |         assert strategy.profane?(foo_bar_text)
32 | 
33 |         foo_bar_upcase_text = "f#{big_o}#{o} B#{big_a}R"
34 |         assert_equal ["f#{big_o}#{o}", "B#{big_a}R"], strategy.profane_words(foo_bar_upcase_text)
35 |         assert_equal 2, strategy.profanity_count(foo_bar_upcase_text)
36 |         assert strategy.profane?(foo_bar_upcase_text)
37 | 
38 |         # sub-string should not be matched
39 |         fooo_bbar_text = "f#{o}#{o}#{o} bb#{a}r"
40 |         assert_empty strategy.profane_words(fooo_bbar_text)
41 |         assert_equal 0, strategy.profanity_count(fooo_bbar_text)
42 |         refute strategy.profane?(fooo_bbar_text)
43 | 
44 |         # space and symbol inside the word should not be matched
45 |         symbols = %w(~ ` ! @ # $ % ^ & * ( ) _ + { } | \ [ ] " ' : ; < > ? , / ¿ ¡)
46 |         foo_with_symbol_text = symbols.reduce("f #{o}#{o}") do |meme, symbol|
47 |           meme + " f#{symbol}#{o}#{o}"
48 |         end
49 |         assert_empty strategy.profane_words(foo_with_symbol_text)
50 |         assert_equal 0, strategy.profanity_count(fooo_bbar_text)
51 |         refute strategy.profane?(foo_with_symbol_text)
52 |       end
53 |     end
54 | 
55 |     def test_leet_variations
56 |       varients_struct = Struct.new(:f, :o)
57 |       f_vals = %w(f f. f- ƒ)
58 |       o_vals = %w(o o. o- 0 Ο ο Φ ¤ ° ø)
59 |       f_vals.product(o_vals).each do |vars_arr|
60 |         vars = varients_struct.new(*vars_arr)
61 |         strategy = LeetExactMatchStrategy.new(dictionary: %W(FoO), ignore_case: true)
62 |         text = "#{vars.f}#{vars.o}#{vars.o}"
63 |         assert_equal [text], strategy.profane_words(text)
64 |         assert_equal 1, strategy.profanity_count(text)
65 |         assert strategy.profane?(text)
66 |       end
67 |     end
68 |   end
69 | end
70 | 


--------------------------------------------------------------------------------
/test/profanity_filter_engine/partial_match_strategy_test.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require_relative '../test_helper'
 4 | 
 5 | module ProfanityFilterEngine
 6 |   class PartialMatchStrategyTest < Minitest::Test
 7 |     def test_different_latin_like_characters
 8 |       varients_struct = Struct.new(:o, :a)
 9 |       o_vals = %w(o ó ø)
10 |       a_vals = %w(a á å)
11 |       o_vals.product(a_vals).each do |vars_arr|
12 |         vars = varients_struct.new(*vars_arr)
13 |         o = vars.o
14 |         big_o = o.upcase
15 |         a = vars.a
16 |         big_a = a.upcase
17 | 
18 |         strategy = ::ProfanityFilterEngine::PartialMatchStrategy.new(
19 |           dictionary: ["f#{o}#{o}", "b#{a}r", "🖕"],
20 |           ignore_case: true
21 |         )
22 | 
23 |         foo_text = "f f#{o}  f#{o}#{o}"
24 |         assert_equal ["f#{o}#{o}"], strategy.profane_words(foo_text)
25 |         assert_equal 1, strategy.profanity_count(foo_text)
26 |         assert strategy.profane?(foo_text)
27 | 
28 |         # sub-string should be matched
29 |         ["ff#{o}#{o} b#{a}r", "f#{o}#{o}b#{a}rr", "f#{o}#{o}#{o} b#{a}rf#{o}"].each do |foo_bar_text|
30 |           assert_equal ["f#{o}#{o}", "b#{a}r"], strategy.profane_words(foo_bar_text)
31 |           assert_equal 2, strategy.profanity_count(foo_bar_text)
32 |           assert strategy.profane?(foo_bar_text)
33 |         end
34 | 
35 |         foo_bar_upcase_text = "f#{big_o}#{o} B#{big_a}R"
36 |         assert_equal ["f#{big_o}#{o}", "B#{big_a}R"], strategy.profane_words(foo_bar_upcase_text)
37 |         assert_equal 2, strategy.profanity_count(foo_bar_upcase_text)
38 |         assert strategy.profane?(foo_bar_upcase_text)
39 | 
40 |         ["🖕", "xx🖕", "xxx 🖕x", "x🖕x"].each do |emoji_text|
41 |           assert_equal %w(🖕), strategy.profane_words(emoji_text)
42 |           assert_equal 1, strategy.profanity_count(emoji_text)
43 |           assert strategy.profane?(emoji_text)
44 |         end
45 | 
46 |         # space and symbol inside the word should not be matched
47 |         symbols = %w(~ ` ! @ # $ % ^ & * ( ) _ + { } | \ [ ] " ' : ; < > ? , . / ¿ ¡)
48 |         foo_with_symbol_text = symbols.reduce("f #{o}#{o}") do |meme, symbol|
49 |           meme + " f#{symbol}#{o}#{o}"
50 |         end
51 |         assert_empty strategy.profane_words(foo_with_symbol_text)
52 |         assert_equal 0, strategy.profanity_count(foo_with_symbol_text)
53 |         refute strategy.profane?(foo_with_symbol_text)
54 |       end
55 |     end
56 |   end
57 | end
58 | 


--------------------------------------------------------------------------------
/test/profanity_filter_engine/regexp_strategy_test.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require_relative '../test_helper'
 4 | 
 5 | module ProfanityFilterEngine
 6 |   class RegexpStrategyTest < Minitest::Test
 7 |     FAKE_WORDS = %w(foo bar)
 8 | 
 9 |     def setup
10 |       @fake_strategy = ::ProfanityFilterEngine::RegexpStrategy.new(
11 |         dictionary: FAKE_WORDS,
12 |         profanity_regexp: /foo|bar/
13 |       )
14 |       @foo_bar_text = 'foo and bar are not animals.'
15 |       @cat_text = 'cat is a cute animal.'
16 |       super
17 |     end
18 | 
19 |     def test_profanity_regexp
20 |       expected_regexp = Regexp.union(FAKE_WORDS)
21 |       assert_equal expected_regexp, @fake_strategy.profanity_regexp
22 |     end
23 | 
24 |     def test_profane?
25 |       assert @fake_strategy.profane?(@foo_bar_text)
26 |       refute @fake_strategy.profane?(@cat_text)
27 |     end
28 | 
29 |     def test_profane_words
30 |       assert_equal %w(foo bar), @fake_strategy.profane_words(@foo_bar_text)
31 |       assert_equal [], @fake_strategy.profane_words(@cat_text)
32 |     end
33 | 
34 |     def test_profanity_count
35 |       assert_equal 2, @fake_strategy.profanity_count(@foo_bar_text)
36 |       assert_equal 0, @fake_strategy.profanity_count(@cat_text)
37 |     end
38 |   end
39 | end
40 | 


--------------------------------------------------------------------------------
/test/profanity_filter_test.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require 'test_helper'
  4 | require 'web_purify'
  5 | require 'rr'
  6 | require 'pry'
  7 | 
  8 | class ProfanityFilterTest < Minitest::Test
  9 |   def setup
 10 |     @filter = ProfanityFilter.new
 11 |     @filter_with_wp = ProfanityFilter.new(web_purifier_api_key: 'fake_api_key')
 12 |   end
 13 | 
 14 |   def test_that_it_has_a_version_number
 15 |     refute_nil ::ProfanityFilter::VERSION
 16 |   end
 17 | 
 18 |   def test_profanity_with_default_config
 19 |     prepare_profane_words
 20 | 
 21 |     @profanity_one_match.each do |word|
 22 |       assert @filter.profane?(word)
 23 |       assert_equal 1, @filter.profanity_count(word)
 24 |     end
 25 | 
 26 |     @profanity_two_matches.each do |word|
 27 |       assert @filter.profane?(word)
 28 |       assert_equal 2, @filter.profanity_count(word)
 29 |     end
 30 | 
 31 |     assert @filter.profane?(@profanity_three_matches_with_emoji)
 32 |     assert_equal 3, @filter.profanity_count(@profanity_three_matches_with_emoji)
 33 | 
 34 |     @not_profane_words.each do |word|
 35 |       refute @filter.profane? word
 36 |     end
 37 |   end
 38 | 
 39 |   def test_profanity_wp_enabled
 40 |     prepare_profane_words
 41 | 
 42 |     @not_profane_words.each do |word|
 43 |     any_instance_of(WebPurify::Client) do |wp_client|
 44 |           mock(wp_client).check_count(word, lang: expected_langs(:en)) { 0 }.once
 45 |         refute @filter_with_wp.profane? word
 46 |       end
 47 |     end
 48 |   end
 49 | 
 50 |   def test_profanity_with_unsupported_whitelist_format
 51 |     assert_raises do
 52 |       ProfanityFilter.new(whitelist: 'unsupported')
 53 |     end
 54 |   end
 55 | 
 56 |   def test_profanity_with_whitelist
 57 |     profane_word = 'shit'
 58 |     assert ProfanityFilter.new.profane?(profane_word)
 59 |     refute ProfanityFilter.new(whitelist: [profane_word]).profane?(profane_word)
 60 |   end
 61 | 
 62 |   def test_config_strategies_with_nonexistent_name_throws_exception
 63 |     assert_raises do
 64 |       @filter.profane?('_', strategies: 'nonexistent')
 65 |     end
 66 |   end
 67 | 
 68 |   def test_config_with_one_strategy
 69 |     duplicated_profane_word = 'shhhhhhit'
 70 |     refute @filter.profane?(duplicated_profane_word, strategies: [])
 71 |     assert @filter.profane?(duplicated_profane_word, strategies: [:duplicate_characters])
 72 | 
 73 |     allow_symbol_profane_word = 's@h@i@t'
 74 |     refute @filter.profane?(allow_symbol_profane_word, strategies: [])
 75 |     assert @filter.profane?(allow_symbol_profane_word, strategies: [:allow_symbol])
 76 | 
 77 |     leet_profane_word = 'sĦit'
 78 |     refute @filter.profane?(leet_profane_word, strategies: [])
 79 |     assert @filter.profane?(leet_profane_word, strategies: [:leet])
 80 | 
 81 |     partial_match_profane_word = 'random🖕 string'
 82 |     refute @filter.profane?(partial_match_profane_word, strategies: [])
 83 |     assert @filter.profane?(partial_match_profane_word, strategies: [:partial_match])
 84 |   end
 85 | 
 86 |   def test_config_with_basic_and_all_strategies
 87 |     # :basic strategies is the legacy 'tolerant' filter,
 88 |     # containing 'allow_symbol' and 'partial match' strategies.
 89 |     # :all strategies is the legacy 'strict' filter,
 90 |     # containing all four strategies.
 91 | 
 92 |     only_strict_profane_texts = [
 93 |       'You are s.h-!7!',
 94 |       'You are ssshiiittt!',
 95 |     ]
 96 | 
 97 |     only_strict_profane_texts.each do |word|
 98 |       assert_equal 0, @filter.profanity_count(word, strategies: :basic)
 99 |       assert_equal 1, @filter.profanity_count(word, strategies: :all)
100 | 
101 |       refute @filter.profane?(word, strategies: :basic)
102 |       assert @filter.profane?(word, strategies: :all)
103 |     end
104 |   end
105 | 
106 |   def test_wp_profanity_count
107 |     profanity_0 = 'hi'
108 |     profanity_1 = 'tits'
109 |     profanity_2 = 'tits fuck'
110 |     any_instance_of(WebPurify::Client) do |wp_client|
111 |       mock(wp_client).check_count(profanity_0, lang: expected_langs(:en)) { 0 }.once
112 |       mock(wp_client).check_count(profanity_1, lang: expected_langs(:en)) { 1 }.once
113 |       mock(wp_client).check_count(profanity_2, lang: expected_langs(:en)) { 2 }.once
114 |     end
115 | 
116 |     assert 0, @filter_with_wp.send('wp_profanity_count', profanity_0)
117 |     assert 1, @filter_with_wp.send('wp_profanity_count', profanity_1)
118 |     assert 2, @filter_with_wp.send('wp_profanity_count', profanity_2)
119 |   end
120 | 
121 |   def test_WebPurify_request_timeout
122 |     # Test if the process of checking profanity of words with WebPurify will
123 |     # auto-terminate, if it exceeds a fixed amount of time (5 seconds).
124 |     any_instance_of(WebPurify::Client) do |wp_client|
125 |       mock(wp_client).check_count('tits', lang: expected_langs(:en)) { sleep(0.2); 1 }
126 |     end
127 | 
128 |     assert !@filter_with_wp.send('wp_profane?', 'tits', lang: 'bogus', timeout_duration: 0.1)
129 | 
130 |     any_instance_of(WebPurify::Client) do |wp_client|
131 |       mock(wp_client).check_count('tits', lang: expected_langs(:en)) { 1 }
132 |     end
133 |     assert @filter_with_wp.send('wp_profane?', 'tits', lang: 'bogus', timeout_duration: 0.1)
134 |   end
135 | 
136 |   def test_wp_profane_default_language
137 |     any_instance_of(WebPurify::Client) do |wp_client|
138 |       mock(wp_client).check_count('tits', lang: expected_langs(:en)) { 1 }
139 |     end
140 |     assert @filter_with_wp.send('wp_profane?', 'tits', lang: 'bogus')
141 |   end
142 | 
143 |   def test_wp_profane_with_a_language_not_recognized_by_WebPurify
144 |     # Test if the language codes that are send to the WebPurify API are
145 |     # the ones that it recognized (e.g. :sp instead of :es).
146 |     any_instance_of(WebPurify::Client) do |wp_client|
147 |       mock(wp_client).check_count('el tittos', lang: expected_langs(:sp)) { 1 }
148 |     end
149 |     assert @filter_with_wp.send('wp_profane?', 'el tittos', lang: :es)
150 |   end
151 | 
152 |   def test_wp_profane_with_a_long_language_code
153 |     # Test if the language codes that are send to the WebPurify API are
154 |     # the ones that it recognized (e.g. :sp instead of :es).
155 |     any_instance_of(WebPurify::Client) do |wp_client|
156 |       mock(wp_client).check_count('tittou', lang: expected_langs(:zh)) { 1 }
157 |     end
158 |     assert @filter_with_wp.send('wp_profane?', 'tittou', lang: 'zh-Hant')
159 |   end
160 | 
161 |   def test_profane_should_not_fail_if_lang_nil
162 |     any_instance_of(WebPurify::Client) do |wp_client|
163 |       mock(wp_client).check_count('bogus', lang: expected_langs) { 1 }
164 |     end
165 |     assert_silent do
166 |       @filter_with_wp.profane? 'bogus', lang: nil
167 |     end
168 |   end
169 | 
170 |   def test_profane_and_profanity_count_when_web_purify_fails
171 |     mock.instance_of(WebPurify::Client).check_count(anything, anything) do
172 |       raise StandardError
173 |     end.times(2)
174 | 
175 |     assert_equal false, @filter_with_wp.profane?('foo')
176 |     assert_equal 0, @filter_with_wp.profanity_count('foo')
177 |   end
178 | 
179 |   private
180 | 
181 |   def prepare_profane_words
182 |     @profanity_one_match = [
183 |       'bullshit',
184 |       'fuck',
185 |       'f.u.c.k',
186 |       'f uck',
187 |       'FUCK',
188 |       'FU-CK',
189 |       'Fu-cK',
190 |       'badmotherfucker',
191 |       'bad.mother*fu-c_ker',
192 |       'fu-ckpolitics',
193 |       'fuc kpolitics',
194 |       'bull shit',
195 |       'bull-shit',
196 |     ]
197 | 
198 |     @profanity_two_matches = [
199 |       'FUCK_THIS_SHIT',
200 |       'FuCk_THiS_shIT',
201 |       'fU_cK_THIS_shI_T',
202 |       'bad mothe rfucker',
203 |       '`F:+![U__@C]?#-k.<$}t%H,"i^_S&|s{*H>(i)=~T;',
204 |     ]
205 | 
206 |     @profanity_three_matches_with_emoji = 'You areshit! 🖕  s*h!i-- t sh !7 sshhiiit sh!7'
207 | 
208 |     @not_profane_words = %w(basses phuket)
209 |   end
210 | 
211 |   def with_webpurify
212 |     begin
213 |       AppConfig[:webpurify_filtering] = true
214 |       yield
215 |     ensure
216 |       AppConfig.delete :webpurify_filtering
217 |     end
218 |   end
219 | 
220 |   def expected_langs lang = nil
221 |     (ProfanityFilter::WP_DEFAULT_LANGS + [lang]).to_a.uniq.compact.join(',')
222 |   end
223 | end
224 | 
225 | 


--------------------------------------------------------------------------------
/test/test_helper.rb:
--------------------------------------------------------------------------------
1 | $LOAD_PATH.unshift File.expand_path("../lib", __dir__)
2 | require "profanity-filter"
3 | 
4 | require "minitest/autorun"
5 | 


--------------------------------------------------------------------------------