├── install.sh ├── install.bat ├── .idea ├── vcs.xml ├── .gitignore ├── modules.xml └── wordlist-sanitizer.iml ├── .gitignore ├── LICENSE ├── README.md ├── main.go └── bad-words.txt /install.sh: -------------------------------------------------------------------------------- 1 | go install 2 | cp bad-words.txt $GOPATH/bin/bad-words.txt -------------------------------------------------------------------------------- /install.bat: -------------------------------------------------------------------------------- 1 | go install main.go 2 | copy bad-words.txt %GOPATH%\bin\bad-words.txt 3 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /dataSources/ 6 | /dataSources.local.xml 7 | # Editor-based HTTP Client requests 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, built with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | test/ 15 | 16 | # Dependency directories (remove the comment below to include it) 17 | # vendor/ 18 | -------------------------------------------------------------------------------- /.idea/wordlist-sanitizer.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Gabe Rust 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # wordlist-sanitizer 2 | Remove Offensive and Profane Words from Wordlists 3 | 4 | # About 5 | `wordlist-sanitizer` will create a copy of a file appended with`-clean` that has had a specified list of bad words removed. 6 | If the input is a directory, `wordlist-sanitizer` will recursively create a clone of the directory (directory names also appended with `-clean`) with all files inside sanitized. 7 | The default bad words list came from [https://www.cs.cmu.edu/~biglou/resources/](https://www.cs.cmu.edu/~biglou/resources/) and was prrogrammatically reduced in size (by removing redundant entries) 8 | 9 | # Installation 10 | Ensure that Golang is installed, and the GOPATH variable is in your PATH 11 | 12 | ```bash 13 | git clone https://github.com/BishopFox/wordlist-sanitizer 14 | cd wordlist-sanitizer 15 | ``` 16 | 17 | Windows: 18 | ```ps 19 | .\install.bat 20 | ``` 21 | 22 | *nix: 23 | ```bash 24 | chmod +x install.sh 25 | ./install.sh 26 | ``` 27 | 28 | # Usage 29 | ```bash 30 | $ wordlist-sanitizer -h 31 | Usage of wordlist-sanitizer: 32 | -bad string 33 | The list of words to be stripped. (default "[EXE_PATH]\\bad-words.txt") 34 | -out string 35 | The output directory. (default ".") 36 | -path string 37 | The path of the target file or directory. 38 | May also be passed after all flags as a positional argument. (default ".") 39 | -threads int 40 | Concurrent worker count. (default 100) 41 | ``` 42 | 43 | Example: 44 | ```bash 45 | $ wordlist-sanitizer -threads 100000 SecLists\Usernames\xato-net-10-million-usernames.txt 46 | SecLists\Usernames\xato-net-10-million-usernames.txt 47 | 1101033 bad words were removed out of 8295455 words. 48 | ``` 49 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "flag" 6 | "fmt" 7 | "io/ioutil" 8 | "os" 9 | "path/filepath" 10 | "strings" 11 | "sync" 12 | ) 13 | 14 | // A one copy read-only global list of "bad words" 15 | var badWords []string 16 | 17 | // Total count of words removed from all files 18 | var badCount uint64 19 | 20 | // Total count of words processed 21 | var totalWords uint64 22 | 23 | // Panic if an error is not `nil` 24 | // `e` error: The error to check 25 | func check(e error) { 26 | if e != nil { 27 | panic(e) 28 | } 29 | } 30 | 31 | // Check if the word in question contains any word from `badWords` 32 | // `word` string: The word in question 33 | // 34 | // Returns: bool (false if contains bad word, otherwise true) 35 | func checkWord(word string) bool { 36 | lowerStr := strings.ToLower(word) 37 | for _, s := range badWords { 38 | if strings.Contains(lowerStr, s) { 39 | return false 40 | } 41 | } 42 | return true 43 | } 44 | 45 | // Remove "bad words" from a file. 46 | // Recurse when the filepath is a directory, with an actual file being the base case. 47 | // `fpath` string: The filepath of the input file 48 | // `opath` string: The output directory path 49 | // `threads` int: The maximum number of concurrent goroutines processing the file 50 | func sanitizeList(fpath string, opath string, threads int) { 51 | // Print the current filepath cause leet 52 | fmt.Println(fpath) 53 | 54 | // Obtain file information for the current path 55 | info, err := os.Stat(fpath) 56 | check(err) 57 | 58 | // Check if file is directory (Base Case Check) 59 | if info.IsDir() { 60 | // File is directory, obtain directory contents 61 | dir, err := ioutil.ReadDir(fpath) 62 | check(err) 63 | 64 | // Call `sanitizeList` recursively on each listing in current path 65 | for _, f := range dir { 66 | sanitizeList(filepath.Join(fpath, f.Name()), opath, threads) 67 | } 68 | } else { 69 | // File is NOT a directory: Base Case Reached 70 | 71 | // Read file into memory 72 | content, err := ioutil.ReadFile(fpath) 73 | check(err) 74 | 75 | // Split content of file into array of whitespace separated words 76 | words := strings.Fields(string(content)) 77 | 78 | // Append file word count to global word count 79 | totalWords += uint64(len(words)) 80 | 81 | // Create channels for passing strings and queueing work 82 | results := make(chan string) 83 | queue := make(chan string) 84 | 85 | // If `threads` is greater than file word count, 86 | // Reduce threads to word count to remove excessive resource allocation 87 | if threads > len(words) { 88 | threads = len(words) 89 | } 90 | 91 | // Create Blocking WaitGroup for worker goroutines 92 | // Add number of threads to WaitGroup 93 | var waitGroup sync.WaitGroup 94 | waitGroup.Add(threads) 95 | 96 | // Create a goroutine for each "thread" 97 | for i := 0; i < threads; i++ { 98 | go func() { 99 | // Decrease WorkGroup before function exits 100 | defer waitGroup.Done() 101 | 102 | // Wait for words from work queue, breaks when `queue` closes 103 | for s := range queue { 104 | // Push word to results if good, otherwise increment global bad word counter 105 | if checkWord(s) { 106 | results <- s 107 | } else { 108 | badCount++ 109 | } 110 | } 111 | }() 112 | } 113 | 114 | // Lock mutex to prevent parent from exiting prematurely 115 | var mutex sync.Mutex 116 | mutex.Lock() 117 | 118 | // Goroutine creating new file and processing results from workers 119 | go func() { 120 | // Unlock mutex when function is finished 121 | defer mutex.Unlock() 122 | 123 | // Split filepath into array of directory names 124 | tempPath := fpath 125 | if opath != "." { 126 | tempPath = filepath.Join(opath, fpath) 127 | } 128 | dirs := strings.Split(strings.ReplaceAll(tempPath, "\\", "/"), "/") 129 | 130 | // Append -clean to each directory and filename 131 | for i := 0; i < len(dirs); i++ { 132 | if dirs[i] != "." { 133 | dirs[i] = dirs[i] + "-clean" 134 | } 135 | } 136 | 137 | // Create the new directory structure 138 | if len(dirs) > 1 { 139 | err := os.MkdirAll(filepath.Join(dirs[:len(dirs)-1]...), os.ModePerm) 140 | check(err) 141 | } 142 | 143 | // Create and open the new file 144 | f, err := os.Create(filepath.Join(dirs...)) 145 | check(err) 146 | defer f.Close() 147 | 148 | // Create buffer for new file 149 | w := bufio.NewWriter(f) 150 | defer w.Flush() 151 | 152 | // Wait for words from results channel, and write them to the new file. 153 | // Breaks when `results` closes 154 | for s := range results { 155 | _, err := w.WriteString(s + "\n") 156 | check(err) 157 | } 158 | }() 159 | 160 | // Add all words to work queue and then immediately close queue channel 161 | for _, s := range words { 162 | queue <- s 163 | } 164 | close(queue) 165 | 166 | // Wait for workers to finish, then close results channel 167 | waitGroup.Wait() 168 | close(results) 169 | 170 | // Obtain lock on mutex 171 | // Prevents function from exiting while results are still being processed and file is still open 172 | mutex.Lock() 173 | mutex.Unlock() 174 | } 175 | } 176 | 177 | // Entry point 178 | func main() { 179 | // Obtain filepath of executable to find path of default bad words list 180 | ex, err := os.Executable() 181 | check(err) 182 | defaultBadPath := filepath.Join(filepath.Dir(ex), "bad-words.txt") 183 | 184 | // Parse command line arguments with `flag` package 185 | var inPath string 186 | flag.StringVar(&inPath, "path", ".", "The path of the target file or directory.\n"+ 187 | "May also be passed after all flags as a positional argument.") 188 | 189 | var outPath string 190 | flag.StringVar(&outPath, "out", ".", "The output directory.") 191 | 192 | var badPath string 193 | flag.StringVar(&badPath, "bad", defaultBadPath, "The list of words to be stripped.") 194 | 195 | var threads int 196 | flag.IntVar(&threads, "threads", 100, "Concurrent worker count.") 197 | 198 | flag.Parse() 199 | 200 | // If extra arguments tail flags, use as `inPath` 201 | if len(flag.Args()) > 0 { 202 | inPath = strings.Join(flag.Args(), " ") 203 | } 204 | 205 | // Read bad words into memory 206 | badWordsContent, err := ioutil.ReadFile(badPath) 207 | check(err) 208 | 209 | // Split bad words into lowercase whitespace separated array (available globally) 210 | badWords = strings.Fields(string(badWordsContent)) 211 | for i := 0; i < len(badWords); i++ { 212 | badWords[i] = strings.ToLower(badWords[i]) 213 | } 214 | 215 | // Call `sanitizeList`. If the input path is a directory, `sanitizeList` will handle the recursion internally 216 | sanitizeList(inPath, outPath, threads) 217 | 218 | // After `sanitizeList` is done, print the number of removed/processed words cause leet 219 | fmt.Printf("%d bad words were removed out of %d words.", badCount, totalWords) 220 | } 221 | 222 | // BUY DOGECOIN 223 | -------------------------------------------------------------------------------- /bad-words.txt: -------------------------------------------------------------------------------- 1 | abbo 2 | abo 3 | abuse 4 | addict 5 | adult 6 | africa 7 | alla 8 | amateur 9 | american 10 | anal 11 | angie 12 | angry 13 | anus 14 | arab 15 | areola 16 | argie 17 | aroused 18 | arse 19 | asian 20 | ass 21 | athletesfoot 22 | attack 23 | australian 24 | babe 25 | backdoor 26 | backseat 27 | balls 28 | baptist 29 | barelylegal 30 | barf 31 | bast 32 | bazongas 33 | bazooms 34 | beaner 35 | beast 36 | beatoff 37 | beat-off 38 | beatyourmeat 39 | beaver 40 | bestial 41 | bi 42 | black 43 | blind 44 | blow 45 | boang 46 | bogan 47 | bohunk 48 | bollick 49 | bollock 50 | bomb 51 | bomd 52 | bondage 53 | boner 54 | bong 55 | boob 56 | boody 57 | boom 58 | boong 59 | boonie 60 | booty 61 | bountybar 62 | bra 63 | brea5t 64 | breast 65 | brothel 66 | bugger 67 | bunga 68 | buried 69 | burn 70 | butt 71 | byatch 72 | cacker 73 | cameljockey 74 | cameltoe 75 | canadian 76 | cancer 77 | carruth 78 | cemetery 79 | chav 80 | cherrypopper 81 | chickslick 82 | children's 83 | chin 84 | christ 85 | church 86 | cigarette 87 | cigs 88 | clamdigger 89 | clit 90 | cocaine 91 | cock 92 | cohee 93 | coitus 94 | color 95 | coloured 96 | commie 97 | communist 98 | condom 99 | conservative 100 | conspiracy 101 | coolie 102 | cooly 103 | coon 104 | copulate 105 | corruption 106 | cra5h 107 | crabs 108 | crack 109 | crap 110 | crash 111 | creamy 112 | crime 113 | criminal 114 | crotch 115 | cum 116 | cunilingus 117 | cunillingus 118 | cunn 119 | cunt 120 | dago 121 | dahmer 122 | dammit 123 | damn 124 | darkie 125 | dead 126 | deapthroat 127 | death 128 | deepthroat 129 | defecate 130 | dego 131 | demon 132 | deposit 133 | desire 134 | destroy 135 | deth 136 | devil 137 | dick 138 | diddle 139 | die 140 | dike 141 | dildo 142 | dingleberry 143 | dink 144 | dipstick 145 | dirty 146 | disease 147 | disturbed 148 | dive 149 | dix 150 | doggiestyle 151 | doggystyle 152 | dong 153 | doodoo 154 | doo-doo 155 | doom 156 | dope 157 | dragqueen 158 | dragqween 159 | drug 160 | drunk 161 | dumb 162 | dyefly 163 | dyke 164 | eatme 165 | ecstacy 166 | ejaculate 167 | ejaculation 168 | enema 169 | enemy 170 | erect 171 | ero 172 | escort 173 | ethiopian 174 | ethnic 175 | european 176 | evl 177 | excrement 178 | execute 179 | execution 180 | explosion 181 | faeces 182 | fag 183 | failed 184 | failure 185 | fairies 186 | fairy 187 | faith 188 | fart 189 | fat 190 | fear 191 | feces 192 | felatio 193 | felch 194 | fellatio 195 | feltch 196 | fetish 197 | fight 198 | filipina 199 | filipino 200 | fingerfood 201 | fire 202 | firing 203 | fister 204 | fisting 205 | flange 206 | flasher 207 | flatulence 208 | floo 209 | flydye 210 | fok 211 | fondle 212 | footaction 213 | footstar 214 | fore 215 | forni 216 | foursome 217 | fourtwenty 218 | fraud 219 | fu 220 | gangbang 221 | gangsta 222 | gatorbait 223 | gay 224 | geez 225 | geni 226 | german 227 | gin 228 | gipp 229 | girls 230 | givehead 231 | glazeddonut 232 | gob 233 | god 234 | gonorrehea 235 | gonzagas 236 | gook 237 | goy 238 | greaseball 239 | gringo 240 | groe 241 | gross 242 | grostulation 243 | gubba 244 | gummer 245 | gun 246 | gyp 247 | hamas 248 | handjob 249 | hapa 250 | harder 251 | hardon 252 | harem 253 | headlights 254 | hebe 255 | heeb 256 | hell 257 | herpes 258 | hijack 259 | hindoo 260 | hitler 261 | hiv 262 | ho 263 | hummer 264 | hussy 265 | hustler 266 | hymen 267 | hymie 268 | idiot 269 | ikey 270 | illegal 271 | incest 272 | insest 273 | intercourse 274 | inthebuff 275 | israel 276 | italiano 277 | itch 278 | jackoff 279 | jacktheripper 280 | jade 281 | jap 282 | jebus 283 | jeez 284 | jerkoff 285 | jesus 286 | jew 287 | jiga 288 | jigg 289 | jihad 290 | jijjiboo 291 | jimfish 292 | jism 293 | jiz 294 | joint 295 | juggalo 296 | jugs 297 | junglebunny 298 | kaffer 299 | kaffir 300 | kaffre 301 | kafir 302 | kanake 303 | kid 304 | kigger 305 | kike 306 | kill 307 | kink 308 | kkk 309 | knife 310 | knockers 311 | kock 312 | kondum 313 | koon 314 | kotex 315 | krap 316 | kraut 317 | kum 318 | kunilingus 319 | kunnilingus 320 | kunt 321 | ky 322 | lactate 323 | laid 324 | lapdance 325 | latin 326 | lesbain 327 | lesbayn 328 | lesbo 329 | lez 330 | liberal 331 | licker 332 | lickme 333 | lies 334 | limey 335 | limy 336 | lingerie 337 | liquor 338 | lolita 339 | looser 340 | loser 341 | lotion 342 | lovebone 343 | lovegoo 344 | lovejuice 345 | lovemuscle 346 | lowlife 347 | lsd 348 | lubejob 349 | lucifer 350 | lugan 351 | lynch 352 | macaca 353 | mad 354 | mafia 355 | magicwand 356 | mams 357 | manhater 358 | manpaste 359 | marijuana 360 | mastabate 361 | masterbate 362 | masterblaster 363 | mastrabator 364 | masturbate 365 | masturbating 366 | mattressprincess 367 | meatbeatter 368 | meatrack 369 | meth 370 | mexican 371 | mgger 372 | mggor 373 | mickeyfinn 374 | mideast 375 | milf 376 | minority 377 | mockey 378 | mockie 379 | mofo 380 | moles 381 | mooncricket 382 | mormon 383 | moron 384 | moslem 385 | mosshead 386 | muff 387 | mulatto 388 | muncher 389 | munt 390 | murder 391 | muslim 392 | naked 393 | narcotic 394 | nasty 395 | nazi 396 | necro 397 | negro 398 | nig 399 | nip 400 | nlgger 401 | nlggor 402 | nook 403 | noonan 404 | nooner 405 | nude 406 | nudger 407 | nymph 408 | ontherag 409 | oral 410 | orga 411 | orgies 412 | orgy 413 | osama 414 | paki 415 | palesimian 416 | palestinian 417 | pansies 418 | pansy 419 | panti 420 | payo 421 | pearlnecklace 422 | peck 423 | pee 424 | pendy 425 | penetration 426 | peni5 427 | penile 428 | penis 429 | period 430 | perv 431 | phuq 432 | pi55 433 | picaninny 434 | piccaninny 435 | pickaninny 436 | piker 437 | pimp 438 | piss 439 | pistol 440 | pixie 441 | pixy 442 | playboy 443 | playgirl 444 | pocha 445 | pohm 446 | polack 447 | pom 448 | poo 449 | porchmonkey 450 | porn 451 | pot 452 | poverty 453 | premature 454 | pric 455 | primetime 456 | propaganda 457 | pros 458 | protestant 459 | pu55i 460 | pu55y 461 | pube 462 | pud 463 | puss 464 | pusy 465 | quashie 466 | queef 467 | queer 468 | quickie 469 | quim 470 | ra8s 471 | racial 472 | racist 473 | radical 474 | raghead 475 | randy 476 | rape 477 | rapist 478 | rearend 479 | rearentry 480 | rectum 481 | redlight 482 | redneck 483 | reefer 484 | reestie 485 | reject 486 | remains 487 | republican 488 | rere 489 | ribbed 490 | rigger 491 | rimjob 492 | rimming 493 | roach 494 | robber 495 | roundeye 496 | rump 497 | russki 498 | sadis 499 | sadom 500 | samckdaddy 501 | sandm 502 | satan 503 | scag 504 | scallywag 505 | scat 506 | schlong 507 | screw 508 | scrotum 509 | semen 510 | seppo 511 | servant 512 | sex 513 | shag 514 | shat 515 | shav 516 | sheeney 517 | shhit 518 | shinola 519 | shit 520 | sick 521 | sissy 522 | sixsixsix 523 | sixtynine 524 | skank 525 | skinflute 526 | slant 527 | slapper 528 | slaughter 529 | slav 530 | sleezebag 531 | sleezeball 532 | slideitin 533 | slime 534 | slopehead 535 | slopey 536 | slopy 537 | slut 538 | smack 539 | smut 540 | snatch 541 | snot 542 | snowback 543 | sob 544 | sodom 545 | sooty 546 | sos 547 | soviet 548 | spaghettibender 549 | spank 550 | sperm 551 | spic 552 | spig 553 | spik 554 | spit 555 | splittail 556 | spreadeagle 557 | spunk 558 | squaw 559 | stagg 560 | stiffy 561 | strapon 562 | stringer 563 | stripclub 564 | stroke 565 | stroking 566 | stupid 567 | suicide 568 | swallow 569 | swalow 570 | swastika 571 | sweetness 572 | syphilis 573 | taff 574 | tampon 575 | tang 576 | tantra 577 | tarbaby 578 | tard 579 | teat 580 | terror 581 | teste 582 | testicle 583 | thicklips 584 | thirdeye 585 | thirdleg 586 | threesome 587 | threeway 588 | tinkle 589 | tit 590 | tnt 591 | toilet 592 | tongethruster 593 | tongue 594 | tortur 595 | tosser 596 | towelhead 597 | trailertrash 598 | tramp 599 | trannie 600 | tranny 601 | triplex 602 | trojan 603 | trots 604 | tunneloflove 605 | turd 606 | turnon 607 | twat 608 | twink 609 | uck 610 | uk 611 | upskirt 612 | urinary 613 | urinate 614 | urine 615 | usama 616 | uterus 617 | vatican 618 | vibr 619 | vietcong 620 | violence 621 | vomit 622 | vulva 623 | wab 624 | wank 625 | waysted 626 | weapon 627 | weenie 628 | weewee 629 | welcher 630 | welfare 631 | wetb 632 | whacker 633 | whash 634 | whigger 635 | whiskey 636 | whit 637 | whiz 638 | wigger 639 | willie 640 | willy 641 | wn 642 | wog 643 | women's 644 | wop 645 | wtf 646 | wuss 647 | wuzzie 648 | xtc 649 | xxx 650 | yankee 651 | yellowman 652 | zipperhead 653 | --------------------------------------------------------------------------------