├── .gitignore ├── Gemfile ├── LICENSE.txt ├── README.md ├── Rakefile ├── config ├── exceptionlists │ ├── hate.txt │ ├── mccormick.txt │ ├── profanity.txt │ ├── sex.txt │ └── violence.txt └── matchlists │ ├── hate.txt │ ├── mccormick.txt │ ├── profanity.txt │ ├── sex.txt │ └── violence.txt ├── language_filter.gemspec ├── lib ├── language_filter.rb └── language_filter │ ├── error.rb │ └── version.rb └── test ├── lib └── language_filter │ ├── methods_test.rb │ └── version_test.rb ├── lists ├── simpsons-5000.txt └── wiktionary-50000.txt └── test_helper.rb /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | .bundle 4 | .config 5 | .yardoc 6 | Gemfile.lock 7 | InstalledFiles 8 | _yardoc 9 | coverage 10 | doc/ 11 | lib/bundler/man 12 | pkg 13 | rdoc 14 | spec/reports 15 | test/tmp 16 | test/version_tmp 17 | tmp 18 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # Specify your gem's dependencies in language_filter.gemspec 4 | gemspec 5 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Chris Fritz 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | - [LanguageFilter](#languagefilter) 2 | - [About](#about) 3 | - [Guiding Principles](#guiding-principles) 4 | - [TO-DO](#to-do) 5 | - [Installation](#installation) 6 | - [Usage](#usage) 7 | - [`:matchlist` and `:exceptionlist`](#matchlist-and-exceptionlist) 8 | - [Symbol signifying a pre-packaged list](#symbol-signifying-a-pre-packaged-list) 9 | - [An array of words and phrases to screen for](#an-array-of-words-and-phrases-to-screen-for) 10 | - [A filepath or string pointing to a filepath](#a-filepath-or-string-pointing-to-a-filepath) 11 | - [Formatting your lists](#formatting-your-lists) 12 | - [`:replacement`](#replacement) 13 | - [`:creative_letters`](#creative_letters) 14 | - [Methods to modify filters after creation](#methods-to-modify-filters-after-creation) 15 | - [ActiveModel integration](#activemodel-integration) 16 | - [Contributing](#contributing) 17 | 18 | 19 | # LanguageFilter 20 | 21 | ## About 22 | 23 | LanguageFilter is a Ruby gem to detect and optionally filter multiple categories of language. It was adapted from Thiago Jackiw's Obscenity gem for [FractalWriting.org](http://fractalwriting.org) and features many improvements, including: 24 | 25 | - The ability to create and independently configure multiple language filters. 26 | - Comes pre-packaged with multiple matchlists (for hate, profanity, sex, and violence), for more fine-tuned language detection. I think this aligns much better with the real needs of communities that might need language filtering. For example, I probably want to flag and eventually ban users that use hateful language. Then for content featuring sex, profanity, and/or violence, I can let users know exactly what to expect before delving into content, much more so than with a single, all-encompassing "mature" tag. 27 | - Simpler, more intuitive configuration. 28 | - More neutral language to accommodate a wider variety of use cases. For example, LanguageFilter uses `matchlist` and `exceptionlist` instead of `blacklist` and `whitelist`, since the gem can be used not only for censorship, but also for content *type* identification (e.g. fantasy, sci-fi, historical, etc in the context of creative writing) 29 | - More robust exceptionlist (i.e. whitelist) handling. Given a simple example of a matchlist containing `cock` and an exceptionlist containing `game cock`, the other filtering gems I've seen will flag the `cock` in `game cock`, despite the exceptionlist. LanguageFilter is a little smarter and does what you would expect, so that when sanitizing the string `cock is usually sexual, but a game cock is just an animal`, the returned string will be `**** is usually sexual, but a game cock is just an animal`. 30 | 31 | It should be noted however, that if you'd like to use this gem or another language filtering library to replace human moderation, you should not, for [reasons outlined here](http://www.codinghorror.com/blog/2008/10/obscenity-filters-bad-idea-or-incredibly-intercoursing-bad-idea.html). The major takeaway is that content filtering is a very difficult problem and context is everything. You can keep refining your filters, but that can easily become a full-time job and it can be difficult to do these refinements without unintentionally creating more false positives, which is extremely frustrating from a user's point of view. This kind of tool is best used to *guide* users, rather than enforce rules on them. See the guiding principles below for more on this. 32 | 33 | ## Guiding Principles 34 | 35 | These are things I've learned from developing this gem that are good to keep in mind when using or contributing to the project. 36 | 37 | **It's better to under-match than over-match.** 38 | 39 | It's extremely frustrating, for example, if someone is prevented from entering a perfectly good username that just happens to contain the word "ass" in it - as many do. It's not nearly as frustrating to be exposed to profanity that you have to strain to make out. 40 | 41 | **Using filters for language detection that aid in self-categorization is a better idea than automatically forcing mature/profane/sexual/etc tags on user-generated content.** 42 | 43 | If someone uses language that could be considered profanity in many contexts, but is not profanity in their particular context, such as "bitch" to describe a female dog or "ass" to describe a donkey, they will be justifiably upset at the automatic categorization. It's better to say, "Your story contains the following words or phrases that we think might be profane: bitch, ass. Click on the `profane` tag if you'd like to add it." Then other users can flag content that still isn't correctly categorized and moderators can edit content tags and educate the user to further prevent miscategorization. 44 | 45 | ## TO-DO 46 | 47 | - Expand the pre-packaged matchlists to be more exhaustive 48 | - Add some activemodel integration, a la something like: 49 | 50 | ``` ruby 51 | filter_language :content, matchlist: :hate, replacement: :garbled 52 | validate_language :username, matchlist: :profanity 53 | ``` 54 | 55 | ## Installation 56 | 57 | Add this line to your application's Gemfile: 58 | 59 | ``` ruby 60 | gem 'language_filter' 61 | ``` 62 | 63 | And then execute: 64 | 65 | ``` bash 66 | $ bundle 67 | ``` 68 | 69 | Or install it yourself as: 70 | 71 | ``` bash 72 | $ gem install language_filter 73 | ``` 74 | 75 | ## Usage 76 | 77 | Need a new language filter? Here's a quick usage example: 78 | 79 | ``` ruby 80 | sex_filter = LanguageFilter::Filter.new matchlist: :sex, replacement: :stars 81 | 82 | # returns true if any content matched the filter's matchlist, else false 83 | sex_filter.match?('This is some sexual content.') 84 | => true 85 | 86 | # returns a "cleaned up" version of the text, based on the replacement rule 87 | sex_filter.sanitize('This is some sexual content.') 88 | => "This is some ****** content." 89 | 90 | # returns an array of the words and phrases that matched an item in the matchlist 91 | sex_filter.matched('This is some sexual content.') 92 | => ["sexual"] 93 | ``` 94 | 95 | Now let's go over this a little more methodically. When you create a new LanguageFilter, you simply call LanguageFilter::Filter.new, with any of the following optional parameters. Below, you can see their defaults. 96 | 97 | ``` ruby 98 | LanguageFilter::Filter.new( 99 | matchlist: :profanity, 100 | exceptionlist: [], 101 | replacement: :stars 102 | ) 103 | ``` 104 | 105 | Now let's dive a little deeper into each parameter. 106 | 107 | ### `:matchlist` and `:exceptionlist` 108 | 109 | Both of these lists can take four different kinds of inputs. 110 | 111 | #### Symbol signifying a pre-packaged list 112 | 113 | By default, LanguageFilter comes with four different matchlists, each screening for a different category of language. These filters are accessible via: 114 | 115 | - `matchlist: :hate` (for hateful language, like `f**k you`, `b***h`, or `f*g`) 116 | - `matchlist: :profanity` (for swear/cuss words and phrases) 117 | - `matchlist: :sex` (for content of a sexual nature) 118 | - `matchlist: :violence` (for language indicating violence, such as `stab`, `gun`, or `murder`) 119 | 120 | There's quite a bit of overlap between these lists, but they can be useful for communities that may want to self-monitor, giving them an idea of the kind of content in a story or article before clicking through. 121 | 122 | #### An array of words and phrases to screen for 123 | 124 | - `matchlist: ['giraffes?','rhino\w*','elephants?'] # a non-exhaustive list of African animals` 125 | 126 | As you may have noticed, you can include regex! However, if you do, keep in mind that the more complicated regex you include, the slower the matching will be. Also, if you're assigning an array directly to matchlist and want to use regex, be sure to use single quotes (`'like this'`), rather than double quotes (`"like this"`). Otherwise, Ruby will think your backslashes are to help it interpolate the string, rather than to be intrepreted literally and passed into your regex, untouched. 127 | 128 | In the actual matching, each item you enter in the list is dumped into the middle of the following regex, through the `list_item` variable. 129 | 130 | ``` ruby 131 | /\b#{list_item}\b/i 132 | ``` 133 | 134 | There's not a whole lot going on there, but I'll quickly parse it for any who aren't very familiar with regex. 135 | 136 | - `#{list_item}` just dumps in the item from our list that we want to check. 137 | - The two `\b` on either side ensure that only text surrounded by non-word characters (anything other than letters, numbers, and the underscore), or the beginning or end of a string, are matched. 138 | - The two `/` wrapping (almost) the whole statement lets Ruby know that this is a regex statement. 139 | - The `i` right after the regex tells it to match case-insensitively, so that whether someone writes `giraffe`, `GIRAFFE`, or `gIrAffE`, the match won't fail. 140 | 141 | If you'd like to master some regex Rubyfu, I highly recommend stopping at [Rubular.com](http://rubular.com/). 142 | 143 | #### A filepath or string pointing to a filepath 144 | 145 | If you want to use your own lists, there are two ways to do it. 146 | 147 | 1) Pass in a filepath: 148 | 149 | ``` ruby 150 | matchlist: File.join(Rails.root,"/config/language_filters/my_custom_list.yml") 151 | ``` 152 | 153 | 2) Pass in a `Pathname`, like Rails.root. I'm honestly not sure when you'd do this, but it was in option in Obscenity and it's still an option now. 154 | 155 | ##### Formatting your lists 156 | 157 | Now when you're actually writing these lists, they both use the same, relatively simple format, which looks something like this: 158 | 159 | ``` regex 160 | giraffes? 161 | rhino\w* 162 | elephants? 163 | ``` 164 | 165 | It's a pretty simple pattern. Each word, phrase, or regex is on its own line - and that's it. 166 | 167 | ### `:replacement` 168 | 169 | If you're not using this gem to filter out potentially offensive content, then you don't have to worry about this part. For the rest of you the `:replacement` parameter specifies what to replace matches with, when sanitizing text. 170 | 171 | Here are the options: 172 | 173 | `replacement: :stars` (this is the default replacement method) 174 | 175 | Example: This is some ****** up ****. 176 | 177 | `replacement: :garbled` 178 | 179 | Example: This is some $@!#% up $@!#%. 180 | 181 | `replacement: :vowels` 182 | 183 | Example: This is some f*ck*d up sh*t. 184 | 185 | `replacement: :nonconsonants` (useful where letters might be replaced with numbers, for example in L3375P34|< - i.e. leetspeak) 186 | 187 | Example: 7|-|1$ 1$ $0/\/\3 Ph*****D UP ******. 188 | 189 | (**note: `creative_letters: true` must be set to match plain words to leetspeak**) 190 | 191 | ### `:creative_letters` 192 | 193 | If you want to match leetspeak or other creative lettering, figuring out all the possible variations of each letter in a word can be exhausting. *And* you don't want to go through the whole process for each and every word, creating complicated matchlists that humans will struggle to parse. 194 | 195 | That's why there's a :creative_letters option. When set to true, your filter will use a version of your matchlist that will catch common and not-so-common letterings for each word in your matchlist. The downside to this option is a significant hit to performance. 196 | 197 | Here's an example. Let's say you have a matchlist with a single word: 198 | 199 | ``` 200 | hippopotamus 201 | ``` 202 | 203 | But what if some smart-allec types in something like this? 204 | 205 | ``` 206 | }{!|o|o[]|o()+4|\/|v$ 207 | ``` 208 | 209 | Well, if you have :creative_letters activated, the matchlist that your filtering engine will actually use looks more like this: 210 | 211 | ``` 212 | (?:(?:h|\\#|[\\|\\}\\{\\\\/\\(\\)\\[\\]]\\-?[\\|\\}\\{\\\\/\\(\\)\\[\\]])+)(?:(?:i|l|1|\\!|\\u00a1|\\||\\]|\\[|\\\\|/|[^a-z]eye[^a-z]|\\u00a3|[\\|li1\\!\\u00a1\\[\\]\\(\\)\\{\\}]_|\\u00ac|[^a-z]el+[^a-z]))(?:(?:p|\\u00b6|[\\|li1\\[\\]\\!\\u00a1/\\\\][\\*o\\u00b0\\\"\\>7\\^]|[^a-z]pee+[^a-z])+)(?:(?:p|\\u00b6|[\\|li1\\[\\]\\!\\u00a1/\\\\][\\*o\\u00b0\\\"\\>7\\^]|[^a-z]pee+[^a-z])+)(?:(?:o|0|\\(\\)|\\[\\]|\\u00b0|[^a-z]oh+[^a-z])+)(?:(?:p|\\u00b6|[\\|li1\\[\\]\\!\\u00a1/\\\\][\\*o\\u00b0\\\"\\>7\\^]|[^a-z]pee+[^a-z])+)(?:(?:o|0|\\(\\)|\\[\\]|\\u00b0|[^a-z]oh+[^a-z])+)(?:(?:t|7|\\+|\\u2020|\\-\\|\\-|\\'\\]\\[\\')+)(?:(?:a|@|4|\\^|/\\\\|/\\-\\\\|aye?)+)(?:(?:m|[\\|\\(\\)/](?:\\\\/|v|\\|)[\\|\\(\\)\\\\]|\\^\\^|[^a-z]em+[^a-z])+)(?:(?:u|v|\\u00b5|[\\|\\(\\)\\[\\]\\{\\}]_[\\|\\(\\)\\[\\]\\{\\}]|\\L\\||\\/|[^a-z]you[^a-z]|[^a-z]yoo+[^a-z]|[^a-z]vee+[^a-z]))(?:(?:s|\\$|5|\\u00a7|[^a-z]es+[^a-z]|z|2|7_|\\~/_|\\>_|\\%|[^a-z]zee+[^a-z])+) 213 | ``` 214 | 215 | And that barely legible mess can be made completely illegible by the `sanitize` method. Even *this* crazy string of regex can be beaten though. People *will* have to get quite creative, but people *are* creative. And making it difficult to enter banned content can make it quite an attractive challenge. For this reason and because of the aforementioned performance hit, **this option is not recommended for production systems**. 216 | 217 | ### Methods to modify filters after creation 218 | 219 | If you ever want to change the matchlist, exceptionlist, or replacement type, each parameter is accessible via an assignment method. 220 | 221 | For example: 222 | 223 | ``` ruby 224 | my_filter = LanguageFilter::Filter.new( 225 | matchlist: ['dogs?'], 226 | exceptionlist: ['dogs drool'], 227 | replacement: :garbled 228 | ) 229 | 230 | my_filter.sanitize('Dogs rule, cats drool!') 231 | => "$@!#% rule, cats drool!" 232 | my_filter.sanitize('Cats rule, dogs drool!') 233 | => "Cats rule, dogs drool!" 234 | 235 | my_filter.matchlist = ['dogs?','cats drool'] 236 | my_filter.exceptionlist = ['dogs drool','dogs are cruel'] 237 | my_filter.replacement = :stars 238 | 239 | my_filter.sanitize('Dogs rule, cats drool!') 240 | => "**** rule, **********!" 241 | my_filter.sanitize('Cats rule, dogs drool!') 242 | => "Cats rule, dogs drool!" 243 | ``` 244 | 245 | In the above case though, we just wanted to add items to the existing lists, so there's actually a better solution. They're stored as arrays, so treat them as such. Any array methods are fair game. 246 | 247 | For example: 248 | 249 | ``` ruby 250 | my_filter.matchlist.pop 251 | my_filter.matchlist << "cats are liars" << "don't listen to( the)? cats" << "why does no one heed my warnings about the cats?! aren't you getting my messages?" 252 | my_filter.matchlist.uniq! 253 | # etc... 254 | ``` 255 | 256 | ### ActiveModel integration 257 | 258 | There's not yet any built-in ActiveModel integration, but that doesn't mean it isn't a breeze to work with filters in your model. The examples below should help get you started. 259 | 260 | ```ruby 261 | # garbles any hateful language in the content attribute before any save to the database 262 | before_save :remove_hateful_language 263 | 264 | def remove_hateful_language 265 | hate_filter = LanguageFilter::Filter.new matchlist: :hate, replacement: :garbled 266 | content = hate_filter.sanitize(content) 267 | end 268 | ```` 269 | 270 | ``` ruby 271 | # yells at users if they try to sneak in a dirty username, letting them know exactly why the username they wanted was rejected 272 | validate :clean_username 273 | 274 | def clean_username 275 | profanity_filter = LanguageFilter::Filter.new matchlist: :profanity 276 | if profanity_filter.match? username then 277 | errors.add(:username, "The following language is inappropriate in a username: #{profanity_filter.matched(username).join(', ')}" 278 | end 279 | end 280 | ``` 281 | 282 | ## Contributing 283 | 284 | 1. Fork it 285 | 2. Create your feature branch (`git checkout -b my-new-feature`) 286 | 3. Commit your changes (`git commit -am 'Add some feature'`) 287 | 4. Push to the branch (`git push origin my-new-feature`) 288 | 5. Create new Pull Request 289 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env rake 2 | require "bundler/gem_tasks" 3 | 4 | require 'rake/testtask' 5 | 6 | Rake::TestTask.new do |t| 7 | t.libs << 'lib' 8 | t.test_files = FileList['test/lib/language_filter/*_test.rb'] 9 | t.verbose = true 10 | end 11 | 12 | task :default => :test -------------------------------------------------------------------------------- /config/exceptionlists/hate.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisvfritz/language_filter/084fe0654e4cb18e0a145fad1006dcf98bf4011c/config/exceptionlists/hate.txt -------------------------------------------------------------------------------- /config/exceptionlists/mccormick.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chrisvfritz/language_filter/084fe0654e4cb18e0a145fad1006dcf98bf4011c/config/exceptionlists/mccormick.txt -------------------------------------------------------------------------------- /config/exceptionlists/profanity.txt: -------------------------------------------------------------------------------- 1 | confucius -------------------------------------------------------------------------------- /config/exceptionlists/sex.txt: -------------------------------------------------------------------------------- 1 | sexton 2 | sextus 3 | bonner 4 | tittles? 5 | puzzles? -------------------------------------------------------------------------------- /config/exceptionlists/violence.txt: -------------------------------------------------------------------------------- 1 | x+l*i+ 2 | cilicia 3 | gunther 4 | gunnar 5 | gunwale -------------------------------------------------------------------------------- /config/matchlists/hate.txt: -------------------------------------------------------------------------------- 1 | \w*fuck[ae]r?s? 2 | fag\w* 3 | cunt\w* 4 | as*hole\w* 5 | \w*bitch\w* 6 | fudge ?pack\w* 7 | bastards? -------------------------------------------------------------------------------- /config/matchlists/mccormick.txt: -------------------------------------------------------------------------------- 1 | 2g1c 2 | 2 girls 1 cup 3 | acrotomophilia 4 | anal 5 | anilingus 6 | anus 7 | arsehole 8 | ass 9 | asshole 10 | assmunch 11 | auto erotic 12 | autoerotic 13 | babeland 14 | baby batter 15 | ball gag 16 | ball gravy 17 | ball kicking 18 | ball licking 19 | ball sack 20 | ball sucking 21 | bangbros 22 | bareback 23 | barely legal 24 | barenaked 25 | bastardo 26 | bastinado 27 | bbw 28 | bdsm 29 | beaver cleaver 30 | beaver lips 31 | bestiality 32 | bi curious 33 | big black 34 | big breasts 35 | big knockers 36 | big tits 37 | bimbos 38 | birdlock 39 | bitch 40 | black cock 41 | blonde action 42 | blonde on blonde action 43 | blow j 44 | blow your l 45 | blue waffle 46 | blumpkin 47 | bollocks 48 | bondage 49 | boner 50 | boob 51 | boobs 52 | booty call 53 | brown showers 54 | brunette action 55 | bukkake 56 | bulldyke 57 | bullet vibe 58 | bung hole 59 | bunghole 60 | busty 61 | butt 62 | buttcheeks 63 | butthole 64 | camel toe 65 | camgirl 66 | camslut 67 | camwhore 68 | carpet muncher 69 | carpetmuncher 70 | chocolate rosebuds 71 | circlejerk 72 | cleveland steamer 73 | clit 74 | clitoris 75 | clover clamps 76 | clusterfuck 77 | cock 78 | cocks 79 | coprolagnia 80 | coprophilia 81 | cornhole 82 | cum 83 | cumming 84 | cunnilingus 85 | cunt 86 | darkie 87 | date rape 88 | daterape 89 | deep throat 90 | deepthroat 91 | dick 92 | dildo 93 | dirty pillows 94 | dirty sanchez 95 | dog style 96 | doggie style 97 | doggiestyle 98 | doggy style 99 | doggystyle 100 | dolcett 101 | domination 102 | dominatrix 103 | dommes 104 | donkey punch 105 | double dong 106 | double penetration 107 | dp action 108 | eat my ass 109 | ecchi 110 | ejaculation 111 | erotic 112 | erotism 113 | escort 114 | ethical slut 115 | eunuch 116 | faggot 117 | fecal 118 | felch 119 | fellatio 120 | feltch 121 | female squirting 122 | femdom 123 | figging 124 | fingering 125 | fisting 126 | foot fetish 127 | footjob 128 | frotting 129 | fuck 130 | fuck buttons 131 | fudge packer 132 | fudgepacker 133 | futanari 134 | g-spot 135 | gang bang 136 | gay sex 137 | genitals 138 | giant cock 139 | girl on 140 | girl on top 141 | girls gone wild 142 | goatcx 143 | goatse 144 | gokkun 145 | golden shower 146 | goo girl 147 | goodpoop 148 | goregasm 149 | grope 150 | group sex 151 | guro 152 | hand job 153 | handjob 154 | hard core 155 | hardcore 156 | hentai 157 | homoerotic 158 | honkey 159 | hooker 160 | hot chick 161 | how to kill 162 | how to murder 163 | huge fat 164 | humping 165 | incest 166 | intercourse 167 | jack off 168 | jail bait 169 | jailbait 170 | jerk off 171 | jigaboo 172 | jiggaboo 173 | jiggerboo 174 | jizz 175 | juggs 176 | kike 177 | kinbaku 178 | kinkster 179 | kinky 180 | knobbing 181 | leather restraint 182 | leather straight jacket 183 | lemon party 184 | lolita 185 | lovemaking 186 | make me come 187 | male squirting 188 | masturbate 189 | menage a trois 190 | milf 191 | missionary position 192 | motherfucker 193 | mound of venus 194 | mr hands 195 | muff diver 196 | muffdiving 197 | nambla 198 | nawashi 199 | negro 200 | neonazi 201 | nig nog 202 | nigga 203 | nigger 204 | nimphomania 205 | nipple 206 | nipples 207 | nsfw images 208 | nude 209 | nudity 210 | nympho 211 | nymphomania 212 | octopussy 213 | omorashi 214 | one cup two girls 215 | one guy one jar 216 | orgasm 217 | orgy 218 | paedophile 219 | panties 220 | panty 221 | pedobear 222 | pedophile 223 | pegging 224 | penis 225 | phone sex 226 | piece of shit 227 | piss pig 228 | pissing 229 | pisspig 230 | playboy 231 | pleasure chest 232 | pole smoker 233 | ponyplay 234 | poof 235 | poop chute 236 | poopchute 237 | porn 238 | porno 239 | pornography 240 | prince albert piercing 241 | pthc 242 | pubes 243 | pussy 244 | queaf 245 | raghead 246 | raging boner 247 | rape 248 | raping 249 | rapist 250 | rectum 251 | reverse cowgirl 252 | rimjob 253 | rimming 254 | rosy palm 255 | rosy palm and her 5 sisters 256 | rusty trombone 257 | s&m 258 | sadism 259 | scat 260 | schlong 261 | scissoring 262 | semen 263 | sex 264 | sexo 265 | sexy 266 | shaved beaver 267 | shaved pussy 268 | shemale 269 | shibari 270 | shit 271 | shota 272 | shrimping 273 | slanteye 274 | slut 275 | smut 276 | snatch 277 | snowballing 278 | sodomize 279 | sodomy 280 | spic 281 | spooge 282 | spread legs 283 | strap on 284 | strapon 285 | strappado 286 | strip club 287 | style doggy 288 | suck 289 | sucks 290 | suicide girls 291 | sultry women 292 | swastika 293 | swinger 294 | tainted love 295 | taste my 296 | tea bagging 297 | threesome 298 | throating 299 | tied up 300 | tight white 301 | tit 302 | tits 303 | titties 304 | titty 305 | tongue in a 306 | topless 307 | tosser 308 | towelhead 309 | tranny 310 | tribadism 311 | tub girl 312 | tubgirl 313 | tushy 314 | twat 315 | twink 316 | twinkie 317 | two girls one cup 318 | undressing 319 | upskirt 320 | urethra play 321 | urophilia 322 | vagina 323 | venus mound 324 | vibrator 325 | violet wand 326 | vorarephilia 327 | voyeur 328 | vulva 329 | wank 330 | wet dream 331 | wetback 332 | white power 333 | women rapping 334 | wrapping men 335 | wrinkled starfish 336 | xx 337 | xxx 338 | yaoi 339 | yellow showers 340 | yiffy 341 | zoophilia 342 | -------------------------------------------------------------------------------- /config/matchlists/profanity.txt: -------------------------------------------------------------------------------- 1 | \w*fuck\w* 2 | \w*fcuk\w* 3 | \w*fuk\w* 4 | \w*shit\w* 5 | ass+(es)? 6 | asshol\w* 7 | bastards? 8 | \w*bitch\w* 9 | cunt\w* 10 | fag\w* -------------------------------------------------------------------------------- /config/matchlists/sex.txt: -------------------------------------------------------------------------------- 1 | sex\w* 2 | blow ?job\w* 3 | fellat\w* 4 | felch\w* 5 | \w*fuck\w* 6 | wank\w* 7 | cocks? 8 | cock suck\w* 9 | poll ?smok\w* 10 | dicks? 11 | fudge ?pack\w* 12 | rim ?job\w* 13 | knob ?gobbl\w* 14 | anal 15 | rectums? 16 | ass+ 17 | as*hole\w* 18 | ballsacks? 19 | scrotums? 20 | bollocks 21 | penis(es)? 22 | boners? 23 | pricks? 24 | knobends? 25 | manhoods? 26 | wieners? 27 | breasts? 28 | tit(t(ie|y))?s? 29 | boob\w* 30 | honkers? 31 | cleavages? 32 | vagina\w* 33 | puss(y|ies|ee) 34 | muffs? 35 | cunt\w* 36 | twats? 37 | clit\w* 38 | quims? 39 | labias? 40 | buttplugs? 41 | dildos? 42 | heteros? 43 | homos? 44 | sluts? 45 | whor\w* 46 | skank\w* 47 | g+h?[ae]ys? 48 | dykes? 49 | fag\w* 50 | cumm?(ing|er)? 51 | jizz\w* 52 | pubes? 53 | puberty 54 | pubic 55 | smegma 56 | boy ?butter 57 | -------------------------------------------------------------------------------- /config/matchlists/violence.txt: -------------------------------------------------------------------------------- 1 | stab(ing|ed|s|ber)? 2 | kill\w* 3 | beat ?up 4 | beat the \w+ out of 5 | beat the \w+ out of 6 | fuck ?\w* up 7 | murder\w* 8 | genocide 9 | shoot (him|her|it|me|us|them) 10 | shot (him|her|it|me|us|them) 11 | gun\w* 12 | phasers? 13 | death( ray)? -------------------------------------------------------------------------------- /language_filter.gemspec: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | lib = File.expand_path('../lib', __FILE__) 3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 4 | require 'language_filter/version' 5 | 6 | Gem::Specification.new do |spec| 7 | spec.name = "language_filter" 8 | spec.version = LanguageFilter::VERSION 9 | spec.authors = ["Chris Fritz"] 10 | spec.email = ["chrisvfritz@gmail.com"] 11 | spec.description = %q{LanguageFilter is a Ruby gem to detect and optionally filter various categories of language.} 12 | spec.summary = %q{LanguageFilter is a Ruby gem to detect and optionally filter various categories of language.} 13 | spec.homepage = "http://github.com/chrisvfritz/language_filter" 14 | spec.license = "MIT" 15 | 16 | spec.files = `git ls-files`.split($/) 17 | spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) } 18 | spec.test_files = spec.files.grep(%r{^(test|spec|features)/}) 19 | spec.require_paths = ["lib"] 20 | 21 | spec.add_development_dependency "bundler", "~> 1.3" 22 | spec.add_development_dependency "rake" 23 | end 24 | -------------------------------------------------------------------------------- /lib/language_filter.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | require 'pathname' 4 | require 'language_filter/error' 5 | require 'language_filter/version' 6 | 7 | module LanguageFilter 8 | class Filter 9 | attr_accessor :matchlist, :exceptionlist, :replacement, :creative_letters 10 | attr_reader :creative_matchlist 11 | 12 | CREATIVE_BEG_REGEX = '(?<=\\s|\\A|_|\\-|\\.)' 13 | CREATIVE_END_REGEX = '(?=\\b|\\s|\\z|_|\\-|\\.)' 14 | 15 | DEFAULT_EXCEPTIONLIST = [] 16 | DEFAULT_MATCHLIST = File.dirname(__FILE__) + "/../config/matchlists/profanity.txt" 17 | DEFAULT_REPLACEMENT = :stars 18 | DEFAULT_CREATIVE_LETTERS = false 19 | 20 | def initialize(options={}) 21 | @creative_letters = if options[:creative_letters] then 22 | options[:creative_letters] 23 | else DEFAULT_CREATIVE_LETTERS end 24 | 25 | @matchlist = if options[:matchlist] then 26 | validate_list_content(options[:matchlist]) 27 | set_list_content(options[:matchlist]) 28 | else set_list_content(DEFAULT_MATCHLIST) end 29 | @creative_matchlist = @matchlist.map {|list_item| use_creative_letters(list_item)} 30 | 31 | @exceptionlist = if options[:exceptionlist] then 32 | validate_list_content(options[:exceptionlist]) 33 | set_list_content(options[:exceptionlist]) 34 | elsif options[:matchlist].class == Symbol then 35 | set_list_content(options[:matchlist],folder: "exceptionlists") 36 | else set_list_content(DEFAULT_EXCEPTIONLIST) end 37 | 38 | @replacement = options[:replacement] || DEFAULT_REPLACEMENT 39 | validate_replacement 40 | end 41 | 42 | # SETTERS 43 | 44 | def matchlist=(content) 45 | validate_list_content(content) 46 | @matchlist = case content 47 | when :default then set_list_content(DEFAULT_MATCHLIST) 48 | else set_list_content(content) 49 | end 50 | @exceptionlist = set_list_content(content,folder: "exceptionlists") if content.class == Symbol and @exceptionlist.empty? 51 | @creative_matchlist = @matchlist.map {|list_item| use_creative_letters(list_item)} 52 | end 53 | 54 | def exceptionlist=(content) 55 | validate_list_content(content) 56 | @exceptionlist = case content 57 | when :default then set_list_content(DEFAULT_EXCEPTIONLIST) 58 | else set_list_content(content) 59 | end 60 | end 61 | 62 | def replacement=(value) 63 | @replacement = case value 64 | when :default then :stars 65 | else value 66 | end 67 | validate_replacement 68 | end 69 | 70 | # LANGUAGE 71 | 72 | def match?(text) 73 | return false unless text.to_s.size >= 3 74 | chosen_matchlist = case @creative_letters 75 | when true then @creative_matchlist 76 | else @matchlist 77 | end 78 | chosen_matchlist.each do |list_item| 79 | start_at = 0 80 | text.scan(%r"#{beg_regex}#{list_item}#{end_regex}"i) do |match| 81 | unless @exceptionlist.empty? then 82 | match_start = text[start_at..-1].index(%r"#{beg_regex}#{list_item}#{end_regex}"i) + start_at 83 | match_end = match_start + match.size-1 84 | end 85 | return true if @exceptionlist.empty? or not protected_by_exceptionlist?(match_start,match_end,text,start_at) 86 | start_at = match_end + 1 unless @exceptionlist.empty? 87 | end 88 | end 89 | false 90 | end 91 | 92 | def matched(text) 93 | words = [] 94 | return words unless text.to_s.size >= 3 95 | chosen_matchlist = case @creative_letters 96 | when true then @creative_matchlist 97 | else @matchlist 98 | end 99 | chosen_matchlist.each do |list_item| 100 | start_at = 0 101 | text.scan(%r"#{beg_regex}#{list_item}#{end_regex}"i) do |match| 102 | unless @exceptionlist.empty? then 103 | match_start = text[start_at..-1].index(%r"#{beg_regex}#{list_item}#{end_regex}"i) + start_at 104 | match_end = match_start + match.size-1 105 | end 106 | words << match if @exceptionlist.empty? or not protected_by_exceptionlist?(match_start,match_end,text,start_at) 107 | start_at = match_end + 1 unless @exceptionlist.empty? 108 | end 109 | end 110 | words.uniq 111 | end 112 | 113 | def sanitize(text) 114 | return text unless text.to_s.size >= 3 115 | chosen_matchlist = case @creative_letters 116 | when true then @creative_matchlist 117 | else @matchlist 118 | end 119 | chosen_matchlist.each do |list_item| 120 | start_at = 0 121 | text.gsub!(%r"#{beg_regex}#{list_item}#{end_regex}"i) do |match| 122 | unless @exceptionlist.empty? then 123 | match_start = text[start_at..-1].index(%r"#{beg_regex}#{list_item}#{end_regex}"i) + start_at 124 | match_end = match_start + match.size-1 125 | end 126 | unless @exceptionlist.empty? or not protected_by_exceptionlist?(match_start,match_end,text,start_at) then 127 | start_at = match_end + 1 unless @exceptionlist.empty? 128 | match 129 | else 130 | start_at = match_end + 1 unless @exceptionlist.empty? 131 | replace(match) 132 | end 133 | end 134 | end 135 | text 136 | end 137 | 138 | private 139 | 140 | # VALIDATIONS 141 | 142 | def validate_list_content(content) 143 | case content 144 | when Array then content.all? {|c| c.class == String} || raise(LanguageFilter::EmptyContentList.new("List content array is empty.")) 145 | when String then File.exists?(content) || raise(LanguageFilter::UnkownContentFile.new("List content file \"#{content}\" can't be found.")) 146 | when Pathname then content.exist? || raise(LanguageFilter::UnkownContentFile.new("List content file \"#{content}\" can't be found.")) 147 | when Symbol then 148 | case content 149 | when :default, :hate, :profanity, :sex, :violence then true 150 | else raise(LanguageFilter::UnkownContent.new("The only accepted symbols are :default, :hate, :profanity, :sex, and :violence.")) 151 | end 152 | else raise LanguageFilter::UnkownContent.new("The list content can be either an Array, Pathname, or String path to a file.") 153 | end 154 | end 155 | 156 | def validate_replacement 157 | case @replacement 158 | when :default, :garbled, :vowels, :stars, :nonconsonants 159 | else raise LanguageFilter::UnknownReplacement.new("This is not a known replacement type.") 160 | end 161 | end 162 | 163 | # HELPERS 164 | 165 | def set_list_content(list,options={}) 166 | case list 167 | when :hate then load_list File.dirname(__FILE__) + "/../config/#{options[:folder] || "matchlists"}/hate.txt" 168 | when :profanity then load_list File.dirname(__FILE__) + "/../config/#{options[:folder] || "matchlists"}/profanity.txt" 169 | when :sex then load_list File.dirname(__FILE__) + "/../config/#{options[:folder] || "matchlists"}/sex.txt" 170 | when :violence then load_list File.dirname(__FILE__) + "/../config/#{options[:folder] || "matchlists"}/violence.txt" 171 | when Array then list.map {|list_item| list_item.gsub(/(?<=[^\\]|\A)\((?=[^(\?\:)])/,'(?:')} 172 | when String, Pathname then load_list list.to_s 173 | else [] 174 | end 175 | end 176 | 177 | def load_list(filepath) 178 | IO.readlines(filepath).each {|line| line.gsub!(/\n/,''); line.gsub!(/(?<=[^\\]|\A)\((?=[^(\?\:)])/,'(?:')} 179 | end 180 | 181 | def use_creative_letters(text) 182 | new_text = "" 183 | last_char = "" 184 | first_char_done = false 185 | text.each_char do |char| 186 | if last_char != '\\' 187 | # new_text += '[\\-_\\s\\*\\.\\,\\`\\:\\\']*' if last_char != "" and char =~ /[A-Za-z]/ and first_char_done 188 | new_text += case char.downcase 189 | when 'a' then first_char_done = true; '(?:(?:a|@|4|\\^|/\\\\|/\\-\\\\|aye?)+)' 190 | when 'b' then first_char_done = true; '(?:(?:b|i3|l3|13|\\|3|/3|\\\\3|3|8|6|\\u00df|p\\>|\\|\\:|[^a-z]bee+[^a-z])+)' 191 | when 'c','k' then first_char_done = true; '(?:(?:c|\\u00a9|\\u00a2|\\(|\\[|[^a-z]cee+[^a-z]|[^a-z]see+[^a-z]|k|x|[\\|\\[\\]\\)\\(li1\\!\\u00a1][\\<\\{\\(]|[^a-z][ck]ay+[^a-z])+)' 192 | when 'd' then first_char_done = true; '(?:(?:d|\\)|\\|\\)|\\[\\)|\\?|\\|\\>|\\|o|[^a-z]dee+[^a-z])+)' 193 | when 'e' then first_char_done = true; '(?:(?:e|3|\\&|\\u20ac|\\u00eb|\\[\\-)+)' 194 | when 'f' then first_char_done = true; '(?:(?:f|ph|\\u0192|[\\|\\}\\{\\\\/\\(\\)\\[\\]1il\\!][\\=\\#]|[^a-z]ef+[^a-z])+)' 195 | when 'g' then first_char_done = true; '(?:(?:g|6|9|\\&|c\\-|\\(_\\+|[^a-z]gee+[^a-z])+)' 196 | when 'h' then first_char_done = true; '(?:(?:h|\\#|[\\|\\}\\{\\\\/\\(\\)\\[\\]]\\-?[\\|\\}\\{\\\\/\\(\\)\\[\\]])+)' 197 | when 'i','l' then first_char_done = true; '(?:(?:i|l|1|\\!|\\u00a1|\\||\\]|\\[|\\\\|/|[^a-z]eye[^a-z]|\\u00a3|[\\|li1\\!\\u00a1\\[\\]\\(\\)\\{\\}]_|\\u00ac|[^a-z]el+[^a-z]))' 198 | when 'j' then first_char_done = true; '(?:(?:j|\\]|\\u00bf|_\\||_/|\\]\\\\[\\|/\\[\\]\\<\\>]|/v|\\^/|[^a-z]en+[^a-z])+)' 201 | when 'o' then first_char_done = true; '(?:(?:o|0|\\(\\)|\\[\\]|\\u00b0|[^a-z]oh+[^a-z])+)' 202 | when 'p' then first_char_done = true; '(?:(?:p|\\u00b6|[\\|li1\\[\\]\\!\\u00a1/\\\\][\\*o\\u00b0\\"\\>7\\^]|[^a-z]pee+[^a-z])+)' 203 | when 'q' then first_char_done = true; '(?:(?:q|9|(?:0|\\(\\)|\\[\\])_|\\(_\\,\\)|\\<\\||[^a-z][ck]ue*|qu?eue*[^a-z])+)' 204 | when 'r' then first_char_done = true; '(?:(?:r|[/1\\|li]?[2\\^\\?z]|\\u00ae|[^a-z]ar+[^a-z])+)' 205 | when 's','z' then first_char_done = true; '(?:(?:s|\\$|5|\\u00a7|[^a-z]es+[^a-z]|z|2|7_|\\~/_|\\>_|\\%|[^a-z]zee+[^a-z])+)' 206 | when 't' then first_char_done = true; '(?:(?:t|7|\\+|\\u2020|\\-\\|\\-|\\\'\\]\\[\\\')+)' 207 | when 'u','v' then first_char_done = true; '(?:(?:u|v|\\u00b5|[\\|\\(\\)\\[\\]\\{\\}]_[\\|\\(\\)\\[\\]\\{\\}]|\\L\\||\\/|[^a-z]you[^a-z]|[^a-z]yoo+[^a-z]|[^a-z]vee+[^a-z]))' 208 | when 'w' then first_char_done = true; '(?:(?:w|vv|\\\\/\\\\/|\\\\\\|/|\\\\\\\\\\\'|\\\'//|\\\\\\^/|\\(n\\)|[^a-z]do?u+b+l+e*[^a-z]?(?:u+|you|yoo+)[^a-z])+)' 209 | when 'x' then first_char_done = true; '(?:(?:x|\\>\\<|\\%|\\*|\\}\\{|\\)\\(|[^a-z]e[ck]+s+[^a-z]|[^a-z]ex+[^a-z])+)' 210 | when 'y' then first_char_done = true; '(?:(?:y|\\u00a5|j|\\\'/|[^a-z]wh?(?:y+|ie+)[^a-z])+)' 211 | else char 212 | end 213 | elsif char.downcase == 'w' then 214 | new_text += 'S' 215 | else 216 | new_text += char 217 | end 218 | last_char = char 219 | end 220 | new_text 221 | end 222 | 223 | def protected_by_exceptionlist?(match_start,match_end,text,start_at) 224 | @exceptionlist.each do |list_item| 225 | current_start_at = start_at 226 | done_searching = false 227 | until done_searching do 228 | # puts "#{current_start_at}" 229 | text_snippet = text[current_start_at..-1] 230 | exception_start = text_snippet.index(%r"\b#{list_item}\b"i) 231 | # puts "#{text_snippet[%r`\b#{list_item}\b`i]}, #{text[match_start..match_end]} :: #{current_start_at}, #{text.size} :: #{match_start}, #{match_end}" if text[match_start..match_end] == "XIII" 232 | if exception_start then 233 | exception_start += current_start_at 234 | # puts "#{text_snippet[%r`\b#{list_item}\b`i]}, #{text[match_start..match_end]} :: #{current_start_at}, #{text.size} :: #{match_start}, #{match_end} :: #{exception_start}, #{text[exception_start,20]}" if text[match_start..match_end] == "XIII" 235 | if exception_start <= match_start then 236 | exception_end = exception_start + text_snippet[%r"\b#{list_item}\b"i].size-1 237 | # puts "#{text_snippet[%r`\b#{list_item}\b`i]}, #{text[match_start..match_end]} :: #{current_start_at}, #{text.size} :: #{match_start}, #{match_end} :: #{exception_start}, #{exception_end}" 238 | if exception_end >= match_end 239 | return true 240 | elsif text[exception_end+1..-1].index(%r"\b#{list_item}\b"i) 241 | current_start_at = exception_end+1 242 | else 243 | done_searching = true 244 | end 245 | else 246 | done_searching = true 247 | end 248 | else 249 | done_searching = true 250 | end 251 | # puts text[exception_end+1..-1].index(%r"\b#{list_item}\b"i).inspect 252 | end 253 | end 254 | return false 255 | end 256 | 257 | # This was moved to private because users should just use sanitize for any content 258 | def replace(word) 259 | case @replacement 260 | when :vowels then word.gsub(/[aeiou]/i, '*') 261 | when :stars then '*' * word.size 262 | when :nonconsonants then word.gsub(/[^bcdfghjklmnpqrstvwxyz]/i, '*') 263 | when :default, :garbled then '$@!#%' 264 | else raise LanguageFilter::UnknownReplacement.new("#{@replacement} is not a known replacement type.") 265 | end 266 | end 267 | 268 | def beg_regex 269 | if @creative_letters then 270 | CREATIVE_BEG_REGEX 271 | else 272 | '\\b' 273 | end 274 | end 275 | 276 | def end_regex 277 | if @creative_letters then 278 | CREATIVE_END_REGEX 279 | else 280 | '\\b' 281 | end 282 | end 283 | end 284 | end -------------------------------------------------------------------------------- /lib/language_filter/error.rb: -------------------------------------------------------------------------------- 1 | module LanguageFilter 2 | class Error < RuntimeError; end 3 | 4 | class UnkownContent < Error; end 5 | class UnkownContentFile < Error; end 6 | class EmptyContentList < Error; end 7 | end -------------------------------------------------------------------------------- /lib/language_filter/version.rb: -------------------------------------------------------------------------------- 1 | module LanguageFilter 2 | VERSION = "0.3.01" 3 | end -------------------------------------------------------------------------------- /test/lib/language_filter/methods_test.rb: -------------------------------------------------------------------------------- 1 | require_relative '../../test_helper' 2 | 3 | describe LanguageFilter do 4 | 5 | it "must be successfully created with correct defaults" do 6 | filter = LanguageFilter::Filter.new 7 | filter.must_be_kind_of LanguageFilter::Filter 8 | valid_non_empty_list? filter.matchlist 9 | filter.exceptionlist.must_be_kind_of Array 10 | filter.exceptionlist.must_be_empty 11 | filter.exceptionlist.each {|list_item| list_item.must_be_kind_of String} 12 | filter.creative_letters.must_be :==, false 13 | valid_non_empty_list? filter.creative_matchlist 14 | end 15 | 16 | it "must work with custom params and assignments" do 17 | # MATCHLIST 18 | # pre-packaged lists 19 | [:hate,:profanity,:sex,:violence].each do |list| 20 | filter = LanguageFilter::Filter.new matchlist: list 21 | filter.must_be_kind_of LanguageFilter::Filter 22 | valid_non_empty_list? filter.matchlist 23 | end 24 | # array of strings 25 | list = ['blah\\w*','test'] 26 | filter = LanguageFilter::Filter.new matchlist: list 27 | valid_non_empty_list? filter.matchlist 28 | filter.matchlist.must_be :==, list 29 | # filepath 30 | list = File.dirname(__FILE__) + '/../../../config/matchlists/profanity.txt' 31 | filter = LanguageFilter::Filter.new matchlist: list 32 | valid_non_empty_list? filter.matchlist 33 | 34 | # EXCEPTIONLIST 35 | # pre-packaged lists 36 | [:hate,:profanity,:sex,:violence].each do |list| 37 | filter = LanguageFilter::Filter.new exceptionlist: list 38 | filter.must_be_kind_of LanguageFilter::Filter 39 | valid_non_empty_list? filter.exceptionlist 40 | end 41 | # array of strings 42 | list = ['blah\\w*','test'] 43 | filter = LanguageFilter::Filter.new exceptionlist: list 44 | filter.exceptionlist.must_be_kind_of Array 45 | valid_non_empty_list? filter.exceptionlist 46 | # filepath 47 | list = File.dirname(__FILE__) + '/../../../config/matchlists/profanity.txt' 48 | filter = LanguageFilter::Filter.new exceptionlist: list 49 | valid_non_empty_list? filter.exceptionlist 50 | 51 | # CREATIVE_LETTERS 52 | [true,false].each do |creative_boolean| 53 | filter = LanguageFilter::Filter.new creative_letters: creative_boolean 54 | filter.creative_letters.must_be :==, creative_boolean 55 | valid_non_empty_list? filter.creative_matchlist 56 | filter.creative_matchlist.must_be :!=, filter.matchlist 57 | filter.creative_matchlist.size.must_be :==, filter.matchlist.size 58 | filter.creative_matchlist.join("").size.must_be :>, filter.matchlist.join("").size 59 | end 60 | end 61 | 62 | it "must correctly detect bad words without false positives" do 63 | test_against_word_lists 64 | end 65 | 66 | end -------------------------------------------------------------------------------- /test/lib/language_filter/version_test.rb: -------------------------------------------------------------------------------- 1 | require_relative '../../test_helper' 2 | 3 | describe LanguageFilter do 4 | 5 | it "must be defined" do 6 | LanguageFilter::VERSION.wont_be_nil 7 | end 8 | 9 | end -------------------------------------------------------------------------------- /test/lists/simpsons-5000.txt: -------------------------------------------------------------------------------- 1 | the you i a to and of it in my that is this me your for oh i'm on we what no all have be but don't it's do are just with not like so well now was get can you're know homer out here one at that's up hey right if go got bart he how our about there i'll good come yeah see will they want marge man from as think an little can't look why him back did okay who dad some us time when gonna uh take could simpson his never lisa going yes we're i've say he's by make would more been or love let's really there's has her way down mr too off them boy give something need where were only people what's then over tell am sorry had these let those two sure didn't day thing kids please wait new mom hmm should she first ever better stop great sir again god old help maybe big any thank i'd into home life even they're than very gasps put school said their mean work show you've laughing made son ooh we'll much hello because huh last you'll springfield call always still money must name family night groans find doing before thought won't house every next ah guy anything thanks bad isn't guess feel away burns three d'oh nothing things baby keep wow years nice other gotta does place we've world both real after mmm believe lot best guys car ow remember hear own long dog around kill moe fun whoa krusty another may stupid doesn't grunting grunts talk done here's happy today singing use play which she's children kind job wrong fine getting screams everything girl someone chuckles honey through coming eat father most everyone milhouse mother while worry many screaming hi enough looks leave watch hell listen left shut minute tonight tv maggie simpsons try talking turn town friend cool being friends beer live kid might homie without miss flanders who's room care everybody once book wanna stay seen aw hope told boys free head um such used ya wouldn't run young whole game pay dead pretty heard check continues start tires together door yourself sweet course wanted smithers sighs actually woman says read trying buy already 'em wife idea skinner beautiful problem dear forget hard cheering five face welcome found hot gone eh came you'd called groaning party crazy happened mind wish myself food anyone makes laughs movie lost stuff mrs where's die making same true ned since 'cause lady bring cut ask looking excuse music year word open hair hate eyes morning playing end special number screeching heart story chief fat tomorrow hold yet christmas shh went having damn four thinking funny afraid haven't comes ain't win finally aren't voice lord saying part else also class days each business couldn't under meet knew week phone ready least second few perfect save wasn't saw point easy bob understand fight rock took power taking gets minutes fire till full beeping goes until change move bell sounds dinner hit hand wants bed pick its gave deal supposed break stand humming ball ringing men drink daddy song anymore uh-oh grampa news means water needs bart's brother dr sleep hurt set times bar million ha married bet city husband gasping air mm-hmm brought attention quit chattering sit hands rest alone dream whatever chuckling plant answer learn matter line shouting sound enjoy important store team close nelson president sign hours principal kiss death drive watching side anyway mad mine apu working top pants wonderful seems quite probably birthday sister chance test trouble ice poor crowd he'll late clown itchy ago country dollars seymour secret whoo-hoo glad box case ladies inside police nobody front later fair america bus happen shot card six marriage alive throw won except exactly behind eating gentlemen soon women walk killed american telling question high bit girls order dance shall picture lenny parents spend candy beat ralph sick ho butt learned person clean plan lives yours truth started somebody rings cat send catch write against murmuring forever knows far they'll tree child good-bye different fish doctor fast folks almost terrible proud snoring eight tried war favorite scratchy laugh red earth sideshow saved barney sell nuclear promise teach along lucky wedding couple it'll wiggum bucks rich blood worse ten dogs gun future quiet throat ride eye date horn kent we'd feeling half less ends lose bought living moment return lie church ahead bill roll blue whistle human laughter words selma pie become wonder state willie body outside loud king using panting machine small smell cream hat perhaps soul mouth giving sobbing quick seven uh-huh ohh reason brain paper whimpering whoo evil cold instead office excellent dude shouldn't sweetie worst fired monkey hour paid wear act floor la stick company light greatest boss super chorus street sometimes pass yelling smart fault join safe met wake relax lousy gay honor lunch books turned film sex fact takes hang moe's anybody ones taken lisa's law touch either aah likes edna bag seem gas extra running feet applause pull step present luck how's reading leaving surprise sold angry star gasp worth piece sad unless speak forgot bear land tired trip space sugar homer's crap problems drunk afford blowing drop known club thinks longer key drinking table english cake one's idiot shoot daughter between kidding clears monster yo certainly low nose calling fly group works mayor evening past boring letter fall radio driving weren't cute clear white movies pop teacher ma'am window coffee worked growling black ass horrible martin himself stuck happens blow audience choice though loved nuts hurry seat prison burn gee l missed lou died heaven history college mm doorbell cheese sort wrote ring wearing dreams build brockman straight broke duff art announcer caught awesome boat krabappel report lise ruined starting questions hero absolutely age sense public lesson shoes fresh plus nah toilet camera trust chair gift visit grow months pain milk tickets park deserve cost share asked bird dad's knock crying meeting vote sea glass service blows swear breakfast ahh suppose safety system yea peace clothes sing somewhere court weeks pizza tough band buddy rather meat babies sent early third changed local realize meant rules chocolate patty science crime french geez waiting theme finish follow felt bowl summer blah missing field gives bite magic marry lots animals loves tongue ha-ha slow speaking ay born respect cash green cover deep comic chicken punch carl fell taste mistake seeing kick fool charge forgive weird month ate judge killing steal able moaning honking although fear solo taught wind teeth muttering student fill building they've bunch grade bottle handle exciting hundred sitting awful video control blame santa suit none center queen students entire sun raise scared today's ew names count moon dollar explain credit grand during simple note bathroom cannot jerk nine loser busy & whistling winner captain suck single hungry shirt dress trick ground price owe admit stole lis murder siren that'll jazzy list whimpers animal short santa's yep walking agree holy double wall dump road afternoon tape normal shelbyville garbage quimby bye otto record troy gold giant popular vacation broken reverend careful named prove original putting ticket congratulations joke fellow o/ george paying fix seconds cry gotten shop football scoffs smoke interesting computer sunday amazing dangerous character jail pig dumb channel smells train giggling lovely feelings hole ugly interested asking joe larry sleeping totally truly hall message jazz cents serious cell romantic plays action final guy's weekend world's wailing helper biggest push fake dare bank buzzing springfield's alarm apple yells angel spent fighting shows waste wine personal fox stars treat promised squeaking given weight middle genius john baseball honest saturday innocent bowling form sucks army hide calm ugh favor race usually de passed snow spot lived abe island museum engine dark outta lead delicious keys finished uncle famous closed o following shame plane hug nothin' bible pal naked cartoon sports offer station dirty worried upon figure government stopped built tour cheap rid dancing goin' pool completely magazine '' precious kissing oil created feels foot corn tony scary ways planet horse advice sandwich professor strong fit how'd strike arm sale whose god's spirit plenty photo truck father's cheer legs she'll private size listening played writing grave program decided should've stomach stacy elephant mail faster moans warm kitchen kwik-e-mart cookies poison needed upset others round paint beloved destroy blind moving neck according hasn't stealing tom starts washington calls fourth daddy's robot invented danger button games tie ' ran expect smile happening juice begin dig stops season heads toy leader b possibly ourselves beauty eggs wild liked shake st lights revenge nature security language episode accent heh experience fortune zero lying gum keeping aye feed across silly hospital everywhere sauce handsome butter freedom board miracle cable goodness appreciate hibbert common ¶ media america's bread yellow code settle doin' flying leg da filled fancy bees growls everyone's ye helping cards comedy neighbor aunt nervous arms homework saving fruit mister jump wha especially year's knife he'd twice wet malibu award mother's accept north spending shouts maude man's squealing monty classic television contest officer beach girlfriend apologize jesus bigger barking cow stories snake national cars would've gem choose wondering hollywood bottom montgomery guns mary holding remind internet stink drugs buck dying miles allowed energy cruel wash aaah coughing accident complete mel notice powerful cup thousand restaurant mess pictures career rod yelps besides chittering fans ship mama pounds radioactive male rat van grab near spit elementary standing darling keeps prize mommy pleasure losers rule series adult bones glasses tight pork someday wheel became what'd looked buying yesterday doll regular clock egg license realized cops mcclure york cartoons fingers turning possible princess project commercial heck ridiculous scene learning ideas protect library invited brilliant natural trash nerd camp factory lies bless area anywhere lock devil guard cares screw letters model yay nope chinese americans capital sexy insurance finger k loving spare empty where'd tune halloween continue someone's chalmers balls violence dry freak guest pony 'bout brown lazy score sky mountain pa warn festival level piano total parking saxophone cop force situation operation professional due press tip bother speech kept yard papa health turkey pretend treasure community guilty ma junk birds clearly switch opening hurts could've legal dirt rats jobs master sweetheart excited krusty's faith scream threw selling closer plastic everything's ding track d arrest brings darn heavy study bully information turns mystery jimbo covered support silence sergeant held moved whee j children's seriously electric page pet salad bone interest jack burning responsible square impossible color losing partner screech ba skin spring pink shrieks gimme allow imagine friday trapped tax ¶ son's bringing electricity steak loudly muffled whirring tea couch mood burger ray testing soup higher department certain whenever states fabulous leaves attitude whispering christian member oops roof footsteps caramba fan fellas mall pair crack goodbye poochie france falling peanut lovejoy mom's ghost panic gettin' sigh blues religion bouvier sisters surprised insane picked gross mark suckers south shopping booze explosion s thoughts desk smoking bath behold noise hanging league conversation numbers older duck hm customers further yell garage sighing hiding ocean acting taxes duty themselves bacon employee flag phony wives cause presents general rent odd cook something's success difference ''i expensive pure figured tale giggles county simpson's neighborhood perfectly ours strange hoover sucker lines release underwear clicks society apart pile bike potato written match lied kinda thy stage reach solve smooth sight superintendent boo load prepare illegal lay meaning buried network helen math noticed orange sport witch woo-hoo ears hearing sniffing chop hip shoe planning above wise escape chewing bridge hired winning drug hadn't beans split rocks clamoring organ frank parade bat issue celebrate tiny chapter happiness lonely penny pack places practice closet chomping education female gym detention tells killer serve belches disco stolen neither chanting grease consider driver crash rumbling coughs reminds healthy bomb they'd harder brothers inspector england forward c cleaning map spy spread narrator subject tall evidence search traffic definitely merry ballet breaking collection reward teachers scare entertainment wwwopensubtitlesorg yourselves anniversary prepared beef ad eww rubber schools bye-bye metal itself hearts failed mighty brains colonel major blaring onto powers ear alcohol indeed social pills lately gambling answers pipe bullies thee you| lookin' blimp cookie costume plow pardon manager blew robots i-i tub trade memory assistant unfortunately battle steve carry cats attack lame asleep knocking enter theater todd sticks chosen woods basketball boxes johnny training belt bush cigarettes lawn david several dessert speed british lurleen successful reached brakes weak transcript runs prank apartment name's player boyfriend simply peanuts growing dings mate value medicine lottery beating sector doughnuts often invite cap monday weather flight irish liquor plate version discuss heat row soft river people's mcbain fail wheels express decent eve talkin' slurping bust hotel nurse grumbling doubt locked golden toys ancient lemon foreign faces secrets medical rights doc lake sometime post murdered amen al tastes add behavior percent main whip beginning usa latest march correct memories wave lower peach skinner's exclaiming property decision gosh regret meal throwing doomed within minds cast awfully emergency draw straining m grandma fifth german block brave tooth nap skip item bears pathetic somehow remain mob rocket shower japanese slams gulping monkeys solution willing whale quickly warning mention led aha large among dating twins based groan arr moron kid's rescue gulps tear points easier dreamed enemy borrow millions customer clicking pumpkin difficult basement criminal jury justice mouse former begins agreed toast laundry teaching tied puzzle roger songs marge's hardly statue clever bored funeral sinister thousands charity embarrassing prefer chips rain alley lifetime aside prince guts zone finding forest tavern enjoying huge modern teddy relationship hook tears breath destroyed choking discovered charlie ii touched jokes seemed opened exercise address monsters belly lips ham vampire birth banana curse letting challenge flavor gunshot exclaims suggest crush rage average example shock records jar pour whoops forced hilarious drove lemonade e bald comin' budget sniffs players screwed r concert flowers united fanfare champagne changes cruise sack pregnant knowing picking grandpa bills chain airport changing fallen fella nicely comfortable whoever boy's nobody's breathe mission earned imitating shooting syrup hits dropped ruin neighbors stinks eaten buzz diet stampy nerds pulled shine reality paul hoping sync films remembered whistles hereby don opens workers ultimate showed sentence pen somebody's talked ashamed bow homer| surgery subtitles richard cross tone old-fashioned decide quietly union spell mud beats wife's kicking slowly hissing bucket introduce babe pressure wasted toss caused solid cousin stone yawns billy t-shirt herb surely fantastic duh dressed staying painting everyman finest condition artist bloody copy burglar romance somethin' setting view whooping hutz trial glue there'll bingo staring freeze coat dvd winter filthy slide product indian worker backwards brand farm rough soccer aww flaming guide stranger jimmy bleeding idiots grades east artie travel plans apology annual valuable doughnut wars bars gagging sending wiener trap gang ordered robbed pride dignity frozen chest texas gotcha awards creature punk title snack highly flower blast midnight type hidden anger punish talent seats golf immediately west advantage cabin punishment characters motor pee maggie's dave drill hippie painted moments tiger kinds opportunity mysterious usual repeat suspect exchange checks indistinct newspaper howling silver drawing groundskeeper holiday folk squeeze f barn concerned vehicle wagon guitar jacket suddenly event macgyver kicked drinks beeps shorts happier dial everybody's scratch rope adults prom closes alien quarter sucking deer corner sees snakes friendship oy apples whether noises james belong mike survive joy swing sin socks agent account canada snap applauding lick fence gentle downtown grumbles ol' nick junior shoo wee mix rare hire squeal smash contact vegetables tattoo strength jerks showing chick smiling watched celebrity shrimp duffman charles girl's kitty raceman dust quality dudes arts magical voices adorable nearly site delightful michaeljackson heading however file celebrities jebediah tank lincoln bug dang rusty aboard bells sake civil whew jealous disease bedroom labor honks chew earn rotten remove streets angels greetings hated defense strawberry hail ''the bubble musical position friendly 'tis dish impressed religious election delivery barely including forth replace climb pity yale risk pocket violent burned picnic nightmare cutting provide returned lamb pound therapy basket kisses obvious noble followed rub bum casino deadly liar neat avoid vision chuckle helicopter crappy shape hunting howdy riding spray oven liberty trees burns's mount receive pillow chicks holidays laws diamond tonight's sneak planned stands ended style market loose burns' results pro electrical carnival cuts coach imagination this'll owner host hopeless belongs happiest physical wolf cecil failure yup bastard school's hush stamp sand lad savings must've hop crackling ketchup scam suffering tap shadow governor grant hates supply abraham doors bags circle chug slap believed shirts below paradise obviously rice stock cracks fries slice chops babysitter fraud prices material who'd bake complain cheating kirk balloon why'd surrender twin walls forgotten barbecue progress cherry research ja dip nonsense rise hideous bright familiar opinion demand y tim fireworks wire synchro windows damage mental china steady mixed skull lift sparkle hobo reporter available slip baby's crew thud ought citizens el clancy wide electronic puppies pieces helped corpse ireland show's upstairs studio bury meow incredible michael herself gray received italian cancel nothing's patch wherever corporation grunt rabbit announce jewish stuffed g steel it| register permission debt comics vocalizing guards political members spanish wore snowball pleasant tube mercy owns confused booing iron finds role academy smarter pot wood ending earlier cracking becoming lawyer likely bee declare thou fishing terrace bid colors moral screen federal knocked falls roses dean parties dog's grew garden rattling calendar thumb eddie loaded cheated awake canceled crisis pleased sacred mild pole shove annoying bunny pin competition approaching vegas grown fairy hose hawaii prime sock manjula parent access beers grampa's heroes marvin gibberish bond wayne term que max served horns gums divorce ho-hi employees meals rapture ape monorail schedule normally actor tricks lessons pray tire minus offense abandoned twisted century brunch fallout costs silent products assure coast military thanksgiving perform theory chloe witness cocktail daughter's microwave man| gentleman sweat stanley gil nickel nation bride drag retirement fires boogie hundreds comet cage vey prayer rip beyond ohhh boom contract equipment p performance landing x buttons riot began booth jam rude enemies roaring wipe headed dame cans soda imitates oscar period backyard cameras wasting pudding sexual lion mistakes signed sdi lab machines corrected storm seal hills claim fate enjoyed acid beaten orchestra application dishes kissed loss useless items agreement appears glory exist raised joined matters humans stu tail policy float fantasy fever dies snarling beep tennis woke frankly accepted photos pinch package unlike assume pitch purchase valentine's disappointed merely slept otherwise recipe interrupt crashing mint knees false sat official cackling april paris breathing lobster superman ach sounded jeez deliver edison town's legend h daily effects range bail kidney childhood washing clue pump swimming foolish checked championship plain drew y'all wax sax superior generation bang drunken towards impressive cure writers blown families slipped a's chamber executive sputtering courage luann path appear holds lane speaker supplies gravy someplace horses bobbins cheat evergreen mm-mmm moves director novelty counts seek isotopes sour battery desperate sue cannon humor circus magazines per chirping bravo international t shary sensitive passes glove bump becomes native worthless weapon sober underpants skills olympics charged commercials separate instrumental construction uh-uh dime sleepy chip opera roosevelt wins root hats satan backup damned unit chimes shoulder rap cloud liver carefully frame commit thomas nowhere 't dolphins orders claus andy treatment stress actual brand-new adventure banner cereal log twist album cue walked unfair destruction cletus selfish web rag job22 brazil ceiling wallet current whack navy chase telephone beg softball sheep apparently pins sidekick stays skeleton thunder resist sweater mirror week's embarrassed intelligence foam pilot chairs zaius nor charm announcement castle pit noah menu pulling kids' amusement pencil alaska direct waylon crummy voted fart ted invention makeup purpose mutters kidnapped dates desert olympic stadium request ants reasons sniffles sandwiches brief flash billionaire family's louder delighted spider gurgling hockey whom farewell warned loan hurting dental bea benefit gag jerky tests stunt squawking bull sooner designed hamster cared choke design heavens thursday hammer marvelous create spin ms dad| frink majesty recycling whining karate eats financial bullet tradition hitting rush chatter bedtime beast sharp hugh '60s creative sink southern universe louis banned zoo id lighten tag boobs badly bottles aware sherman thin estate fried nervously gorge opposite jacques stores cheers represent miserable process bartender soap wrapped expression outfit sobs mumbling cigarette boxing cone snuggle kearney replaced creepy signs puppy burps suicide guests arrived discount entirely sheet clearing considered neddy digging tuck proof australia badge wing shelter anytime murmurs cupcakes crushed sticky foul ron cafeteria clinton expecting click solved parked denver adil lack poop basic harm swallow jones firing entitled brush babbling vice artists dummy congress core youth worship granted rolling tuesday nelson's sucked global dna tatum excitement district trunk warren command disgusting hill maniac baron responsibility mars marty trained splashmore simon arrested gate supervisor classy grandfather failing recall environment muntz motion henry lounge gumble generous wicked convention salt feast sirens incident poem thunderclap pickle nest pops thirsty hitler crawl wednesday village conference jell-o western puts laid towel chose collect horror monroe parts me| robert echoing needle monitor newspapers mexico carrying insist minister vest peter cola wealthy hiya string toes salary remains virus buffalo online slave cooking champion meltdown catholic victory praise ed dining bob's concentrate route triple guilt wreck mt highest entertain poetry spits stayed gifts closing understanding tomato cleaned wings vacuum gather mindy wig thief stroke 'n' trophy pounding purse husband's sacrifice returning washed prayers fixed image grace legally purple attempt passengers focus orleans lee technically monument computers spirits article helps gary grass hell's narrating massage steps doctors fudge attractive crossword stood burgers starring fits honeymoon biting rex oldest undercover polite nails fools troll racket apply cubes freddy madness high-pitched happily breaks potatoes pill twelve disturbing brad recommend popcorn spoon lowest vegetarian skipped raspberry deserves routine involved laddie murphy ahoy explode tools cube fame pageant relief loyal chili not| exact rabbi knee influence recent radiation nail dropping ex hiccups tv's sister's curly disaster shack bullets pointing bikes swallowed sir| warming forbidden ba-ba un hamburger goo confession supreme helpful shudders noon cakes palace forgetting blanket europe creaking bonnie harvard sting cuckoo jim suspicious nut checking freaks straighten thrill kicks patient section authority lance coincidence {yi}singing pages eclipse scores swim tragic scar standard millionaire dome jay tool queer notes settled typical victim bitch taco appearance martin's nature's judging motel cries snacks stammering soy clowns rate lawyers retired lenny's economy drank featuring nanny loaf waited darryl roots behave treated crank flies fifty activity safely rooms glorious footage citizen suspended journey russian pancakes ivory flesh proposition sincerely grapes knight filling newest mustard hunt cheech tasty eveybody vampires weapons goat howard quarters liquid knowledge gutter immigrants ignore passion confidence wears pushing practically fee filth personally luckily betty wrap breeze fought bachelor combination so-called balance essay nights psst programming troubled butts coins net houten deeply szyslak pronounce defend campaign complaining puke locker punching production spoken infernal inner ruining bailey beard mexican halfway pushed winners increase forms problemo pigs jet it'd sausage eternal session billion rainbow clattering fashion attempted chef commissioner captured angrily ogdenville 7-g teacher's joint tapping applesauce mortgage chong pretzels easily necessary gummi surface satellite text pretending vance fridge burst gunfire pies tying university jessica california arranged towels bart| writer script softly stripes thick portrait buzzes topic couples vicious appetite ken robbery impress ashes experienced orphanage explanation trumpets oughta stab girls' vodka expected groin poke scoop counting squirt homemade pockets principal's sweetest charges require skinny fur bothering marrying virgil hip-hop cracker greater haircut avenue hung husbands braces worms activities embarrass thrown maya fatty desire painful sketch serving arnie closest pm spine trail dictionary bicycle recognize ambulance distance shakes microphone honored sec celebrating shuddering sarah stinking rob joining response versus hog neddie source aisle council caring william eliza creatures baloney particular ralphie mattress papers bonus broadcast taylor else's threat awkward asks refrigerator busted roller stickers yoink na payment bounty waters murderer chill lionel harsh erotic shown dinosaurs eventually flanders' elves prohibition suffer men's urge outstanding ceremony chat pranks deed uhh maze other's waverly shave punks ford chortling she'd mustache walks signal necklace defeat anyhoo sushi frosting tries direction directions homes facts hooked milhouse's stretch trumpeting frightened money's therefore franchise inventor shortcut franklin los upbeat what'll armed reporting exit ducks juliet timmy hulk culture remote ironic inspired herman fatso t-shirts dynamite cooties priceless committee bergstrom commission dive hut donut madam haw smith torture blonde banging younger cave jumping sponge substitute bathing encourage woman's hooting odds popularity twenty nagging occasion cotton haunted mere gabbo gig novel perfume soldier privacy holes off| batman sneaking jelly drum mask voyage hounds procedure barley spill discussion crunching pretzel merchandise improve complex frisbee events drawer dough angelica bold frosty gut previous chuck bites gently poker san becky einstein con gorilla escaped bound ballad freezer heroic extreme ticking gathered yak broadway melody scheme wisdom here| flame permanent era hunk industry workin' cured y'ello blessed bobo freakin' zap producers replacement effort betrayed swings conditioner dealer autograph screeches fooling doggie arthur gore review dan time's pirate tragedy indians buddies jerry dork selected ehh clouds burnsie goliath flush le florida mentioned steam humanity radical jingle flat bands confess volunteer author lover dumped amount whines ankle scientific creating kang martha plates cemetery suggestions cows curious myers base mmm-hmm bodies melon thing's caller dot pete touchdown entered ability swallowing fetch darkness cleaner flushing traveling shy bananas fog demon magnificent launch leak bunk amber literally greedy sloppy plug when's car's eighth sample virgin technology nicer haw-haw interview fears plutonium bitter bend shrieking framed impression settlement ratings promises worn coaster behalf cliff cord presentation pirates officials matlock compete envy naturally tower propose cups marijuana discipline revving raw saint drives mold praying hardest cooler pedal uses christ hah baked senior syndrome result dam ages angle coke baby-sitter allowance comfort floating roman canadian dice stonecutters crackers kennedy clip peaceful sings mailman rip-off barks snitch shout roars quest moleman troubles managed teamwork woo lewis tense dozen determine display whacking sam convinced squishy witches derby bushes jungle terrific classroom rv phones mom| coolest avenge studies v rack diaper wally sissy homeless carried cement vomit life's assignment spice toe nixon oak mothers exhales tsk degree musician judgment explore elected senator india visiting object attacked shakespeare bribe mule lobby goose fed spaceship alert patented zing bases et ordinary walt fred what| thuds shark passing brandine danish cases disappeared gunshots funky souls aliens flew doh profits mostly scale outrageous paste strip slurps president's abuse crimes hid racing fry grammar kidnapping je proper describe poster dumpster developed bench management humble shush effect fascinating understood largest drivers daredevil ruth depressed observe chasing joan luxury reindeer baking teen eliminate piggy overweight gags constant desserts miami extremely pep mock crossed ribs reactor fights uniform fbi stake expert grinding fully shalt positive crow pearls turtle night's thudding sunshine lamp bait combat swell raven hercules boston stare quitting generations oi goorsha atlanta cart whipped mansion mechanical tons grizzlies species ingredient snickering directly scraping cocks nebraska jenny spoil imaginary bartholomew wolfcastle catching spaghetti invisible leprechaun homerj belts sprinkle octopus protection rug barney's baboon squawks recording dawn hippo permit hike corporate uniforms dismissed sane teresa medium causing smiles equals upside agnes pointless whisper weakness candle sleeps popping hopes advance mascot patrol answered sharks tummy joey weakly shampoo controversial yuck attached tonic twenty-five utility ovenfresh ok lf institute dreaming eternity ripped static moustache honk cram units tuna violin coffin right| self-esteem pipes carrot recently pepper messages wrestling july ungrateful hippies yeah| relieved starving location requires zombies possum marching secretary massive jeans orphan laughed legendary coin rank cupcake pets jolly exhibit fort bony technical allison ouch client toxic mop cities squares printed dentist drops souvenir correction sermon owned slay furniture policeman lettuce women's pace zinc studying goal goods creation shots struggle target edge balloons kindly dwight hallelujah penalty rumors treats wacky roast highway bugs objection clerk cigars safer elderly worries crashes marge| criminals jewel placed strategy listened minor peas speaks kansas goir lard limbo wizard paw scent poisoning kills dedicate pointy grandma's fountain astronaut roy proceed fuzzy dryer shovel adventures landed batteries grimes idol background yawning horny betting witty no| london strangle envelope restraining powersauce trailer cd jaw symphony coney struck volume boot gain covers farmer jr self whiskey mac you|to suffered goofy brother's hans reverse bah venus arrive sunk genuine voters charming marjorie germans lovers recital bigfoot vent rose ben karma gloria nasty animated snake's strap downstairs zoom tones graffiti entry ty jaws it'' focused shocking approve tender melted mutants vs plants pan protest presenting swept alternative morons sadly spinning rolls farts schemes despite occasional spells models tearing chemicals sharing released barrel tab hunter underground cowboy wedded shiny connie hyah emotional willie's aid sticking do| reunion yogurt trusted presence engaged wuss boots assistance wound begging globe anchor include dye resumes tribute precisely stan shady complicated sideburns actors casual lunchtime ease dee salesman bumper 100% nobel louie deck witnesses cheek murderous imagined marketing expired catfish objects mush sheriff entertaining prisoner monte tips korean disney constitution capture exclusive teenagers luke motorcycle aim sleigh vietnam images halftime allen guarantee patrick's carpet moist scientist lightning eugene shattering joes dinosaur profit clumsy johnson added attorney refreshing buns sprinkles steamed hmph shocked meaningless shining lunches votes pledge caramel scientists respected michigan exotic conscience scum bulletin typing disgrace snickers oliver offering experiment pine dolly dramatic oui feedback banks terms sends donuts practical limit republican batter wishes throughout claims rainier isolated weekly sells abbey fold beware zombie mill assembly phase snaps sizzling rodeo sheets toward opposed domestic frog firm myth supervising oaf stepped puff rented riviera pawn committed adopted crooked stereo marks blamed cocoa compared ribbon worthy dimoxinil destroying bellowing dancer hooray elbow concludes mummy stairs corners shauna attacks degrees shift denis drederick ski italy ta royal nearby concern nancy leary yello authorities game's answering buttocks w exception cracked stones coal mischief hisses cawing eyeballs broadcasting weigh lets unbelievable print waffles crotch differences stupidest collar honeybunny smartest downloaded tournament ten-year-old slim slight honestly refer broad solar bombardment connection grocery whiz that'd inch officially determined proven whoop-dee-do ink discover pencils fooled wooden stopping bra annie offered boil goodman repay classical 2 | -------------------------------------------------------------------------------- /test/test_helper.rb: -------------------------------------------------------------------------------- 1 | require 'minitest/autorun' 2 | require 'minitest/pride' 3 | require File.expand_path('../../lib/language_filter.rb', __FILE__) 4 | 5 | def valid_non_empty_list?(list) 6 | list.must_be_kind_of Array 7 | list.wont_be_empty 8 | list.each {|list_item| list_item.must_be_kind_of String} 9 | end 10 | 11 | def test_against_word_lists 12 | word_lists = [ 13 | { # The simpsons word list 14 | name: "simpsons-5000", 15 | contents: File.read(File.dirname(__FILE__) + '/lists/simpsons-5000.txt'), 16 | expected: 17 | { 18 | normal: [ 19 | { 20 | name: :hate, 21 | results: ["bitch","bastard"] 22 | }, 23 | { 24 | name: :profanity, 25 | results: ["ass","bastard","bitch"] 26 | }, 27 | { 28 | name: :sex, 29 | results: ["sex","sexy","sexual","cocks","ass","wiener","boobs","gay"] 30 | }, 31 | { 32 | name: :violence, 33 | results: ["stab","kill","killed","killing","killer","kills","murder","murdered","murderer","murderous","gun","guns","gunshot","gunfire","gunshots","death"] 34 | } 35 | ], 36 | creative: [ 37 | { 38 | name: :hate, 39 | results: ["bitch","bastard"] 40 | }, 41 | { 42 | name: :profanity, 43 | results: ["ass","bastard","bitch"] 44 | }, 45 | { 46 | name: :sex, 47 | results: ["sex","sexy","sexual","cocks","ass","wiener","boobs","gay"] 48 | }, 49 | { 50 | name: :violence, 51 | results: ["stab","kill","killed","killing","killer","kills","murder","murdered","murderer","murderous","gun","guns","gunshot","gunfire","gunshots","death"] 52 | } 53 | ] 54 | } 55 | }, { # Wiktionary's 50000 most commons words 56 | name: "wiktionary-50000", 57 | contents: File.read(File.dirname(__FILE__) + '/lists/wiktionary-50000.txt'), 58 | expected: 59 | { 60 | normal: [ 61 | { 62 | name: :hate, 63 | results: ["fagots","fagged","faggots","bitch","bastard","Bastard"] 64 | }, 65 | { 66 | name: :profanity, 67 | results: ["ass","asses","Ass","bastard","Bastard","bitch","fagots","fagged","faggots"], 68 | }, 69 | { 70 | name: :sex, 71 | results: ["sex","sexual","sexes","Sex","Sexual","cock","cocks","Cock","Dick","DICK","ass","Ass","penis","prick","pricks","manhood","breast","breasts","cleavage","muff","Homo","homo","slut","whore","gay","Gay","dyke","Dyke","dykes","fagots","fagged","faggots","puberty"] 72 | }, 73 | { 74 | name: :violence, 75 | results: ["stab","stabs","killed","kill","killing","kills","Kill","Killed","murder","murdered","murderer","murderous","murderers","murders","Murder","murdering","guns","gun","gunpowder","gunners","gunboats","gunner","Gun","gunboat","Guns","death","Death","DEATH"] 76 | } 77 | ], 78 | creative: [ 79 | { 80 | name: :hate, 81 | results: ["fagots","fagged","faggots","Kunti","kunt","bitch","bastard","Bastard"] 82 | }, 83 | { 84 | name: :profanity, 85 | results: ["assez","ass","asses","Ass","bastard","Bastard","bitch","Kunti","kunt","fagots","fagged","faggots"] 86 | }, 87 | { 88 | name: :sex, 89 | results: ["sex","sexual","sexes","Sex","Sexual","cock","cocks","Cock","Dick","DICK","ass","Ass","penis","prick","pricks","manhood","breast","breasts","cleavage","muff","Kunti","kunt","Homo","homo","slut","whore","gay","Gay","dyke","Dyke","dykes","fagots","fagged","faggots","puberty"] 90 | }, 91 | { 92 | name: :violence, 93 | results: ["stabbed","stab","stabbing","stabs","killed","kill","killing","kills","Kill","Killed","kill'd","murder","murdered","murderer","murderous","murderers","murders","Murder","murdering","guns","gun","gunpowder","gunners","gunboats","gunner","Gun","gunboat","Guns","death","Death","DEATH"] 94 | } 95 | ] 96 | } 97 | } 98 | ] 99 | word_lists.each do |wordlist| 100 | wordlist[:expected][:normal].each do |matchlist| 101 | filter = LanguageFilter::Filter.new(matchlist: matchlist[:name], creative_letters: false) 102 | filter.matched(wordlist[:contents]).must_be :==, matchlist[:results], 103 | "expected\n#{filter.matched(wordlist[:contents])}\nto be\n#{matchlist[:results]}\nwhile testing normal #{matchlist[:name]} against #{wordlist[:name]}" 104 | end 105 | wordlist[:expected][:creative].each do |matchlist| 106 | filter = LanguageFilter::Filter.new(matchlist: matchlist[:name], creative_letters: true) 107 | filter.matched(wordlist[:contents]).must_be :==, matchlist[:results], 108 | "expected\n#{filter.matched(wordlist[:contents])}\nto be\n#{matchlist[:results]}\nwhile testing creative #{matchlist[:name]} against #{wordlist[:name]}" 109 | end 110 | end 111 | end --------------------------------------------------------------------------------