├── install.sh
├── install.bat
├── .idea
├── vcs.xml
├── .gitignore
├── modules.xml
└── wordlist-sanitizer.iml
├── .gitignore
├── LICENSE
├── README.md
├── main.go
└── bad-words.txt
/install.sh:
--------------------------------------------------------------------------------
1 | go install
2 | cp bad-words.txt $GOPATH/bin/bad-words.txt
--------------------------------------------------------------------------------
/install.bat:
--------------------------------------------------------------------------------
1 | go install main.go
2 | copy bad-words.txt %GOPATH%\bin\bad-words.txt
3 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Binaries for programs and plugins
2 | *.exe
3 | *.exe~
4 | *.dll
5 | *.so
6 | *.dylib
7 |
8 | # Test binary, built with `go test -c`
9 | *.test
10 |
11 | # Output of the go coverage tool, specifically when used with LiteIDE
12 | *.out
13 |
14 | test/
15 |
16 | # Dependency directories (remove the comment below to include it)
17 | # vendor/
18 |
--------------------------------------------------------------------------------
/.idea/wordlist-sanitizer.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Gabe Rust
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # wordlist-sanitizer
2 | Remove Offensive and Profane Words from Wordlists
3 |
4 | # About
5 | `wordlist-sanitizer` will create a copy of a file appended with`-clean` that has had a specified list of bad words removed.
6 | If the input is a directory, `wordlist-sanitizer` will recursively create a clone of the directory (directory names also appended with `-clean`) with all files inside sanitized.
7 | The default bad words list came from [https://www.cs.cmu.edu/~biglou/resources/](https://www.cs.cmu.edu/~biglou/resources/) and was prrogrammatically reduced in size (by removing redundant entries)
8 |
9 | # Installation
10 | Ensure that Golang is installed, and the GOPATH variable is in your PATH
11 |
12 | ```bash
13 | git clone https://github.com/BishopFox/wordlist-sanitizer
14 | cd wordlist-sanitizer
15 | ```
16 |
17 | Windows:
18 | ```ps
19 | .\install.bat
20 | ```
21 |
22 | *nix:
23 | ```bash
24 | chmod +x install.sh
25 | ./install.sh
26 | ```
27 |
28 | # Usage
29 | ```bash
30 | $ wordlist-sanitizer -h
31 | Usage of wordlist-sanitizer:
32 | -bad string
33 | The list of words to be stripped. (default "[EXE_PATH]\\bad-words.txt")
34 | -out string
35 | The output directory. (default ".")
36 | -path string
37 | The path of the target file or directory.
38 | May also be passed after all flags as a positional argument. (default ".")
39 | -threads int
40 | Concurrent worker count. (default 100)
41 | ```
42 |
43 | Example:
44 | ```bash
45 | $ wordlist-sanitizer -threads 100000 SecLists\Usernames\xato-net-10-million-usernames.txt
46 | SecLists\Usernames\xato-net-10-million-usernames.txt
47 | 1101033 bad words were removed out of 8295455 words.
48 | ```
49 |
--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bufio"
5 | "flag"
6 | "fmt"
7 | "io/ioutil"
8 | "os"
9 | "path/filepath"
10 | "strings"
11 | "sync"
12 | )
13 |
14 | // A one copy read-only global list of "bad words"
15 | var badWords []string
16 |
17 | // Total count of words removed from all files
18 | var badCount uint64
19 |
20 | // Total count of words processed
21 | var totalWords uint64
22 |
23 | // Panic if an error is not `nil`
24 | // `e` error: The error to check
25 | func check(e error) {
26 | if e != nil {
27 | panic(e)
28 | }
29 | }
30 |
31 | // Check if the word in question contains any word from `badWords`
32 | // `word` string: The word in question
33 | //
34 | // Returns: bool (false if contains bad word, otherwise true)
35 | func checkWord(word string) bool {
36 | lowerStr := strings.ToLower(word)
37 | for _, s := range badWords {
38 | if strings.Contains(lowerStr, s) {
39 | return false
40 | }
41 | }
42 | return true
43 | }
44 |
45 | // Remove "bad words" from a file.
46 | // Recurse when the filepath is a directory, with an actual file being the base case.
47 | // `fpath` string: The filepath of the input file
48 | // `opath` string: The output directory path
49 | // `threads` int: The maximum number of concurrent goroutines processing the file
50 | func sanitizeList(fpath string, opath string, threads int) {
51 | // Print the current filepath cause leet
52 | fmt.Println(fpath)
53 |
54 | // Obtain file information for the current path
55 | info, err := os.Stat(fpath)
56 | check(err)
57 |
58 | // Check if file is directory (Base Case Check)
59 | if info.IsDir() {
60 | // File is directory, obtain directory contents
61 | dir, err := ioutil.ReadDir(fpath)
62 | check(err)
63 |
64 | // Call `sanitizeList` recursively on each listing in current path
65 | for _, f := range dir {
66 | sanitizeList(filepath.Join(fpath, f.Name()), opath, threads)
67 | }
68 | } else {
69 | // File is NOT a directory: Base Case Reached
70 |
71 | // Read file into memory
72 | content, err := ioutil.ReadFile(fpath)
73 | check(err)
74 |
75 | // Split content of file into array of whitespace separated words
76 | words := strings.Fields(string(content))
77 |
78 | // Append file word count to global word count
79 | totalWords += uint64(len(words))
80 |
81 | // Create channels for passing strings and queueing work
82 | results := make(chan string)
83 | queue := make(chan string)
84 |
85 | // If `threads` is greater than file word count,
86 | // Reduce threads to word count to remove excessive resource allocation
87 | if threads > len(words) {
88 | threads = len(words)
89 | }
90 |
91 | // Create Blocking WaitGroup for worker goroutines
92 | // Add number of threads to WaitGroup
93 | var waitGroup sync.WaitGroup
94 | waitGroup.Add(threads)
95 |
96 | // Create a goroutine for each "thread"
97 | for i := 0; i < threads; i++ {
98 | go func() {
99 | // Decrease WorkGroup before function exits
100 | defer waitGroup.Done()
101 |
102 | // Wait for words from work queue, breaks when `queue` closes
103 | for s := range queue {
104 | // Push word to results if good, otherwise increment global bad word counter
105 | if checkWord(s) {
106 | results <- s
107 | } else {
108 | badCount++
109 | }
110 | }
111 | }()
112 | }
113 |
114 | // Lock mutex to prevent parent from exiting prematurely
115 | var mutex sync.Mutex
116 | mutex.Lock()
117 |
118 | // Goroutine creating new file and processing results from workers
119 | go func() {
120 | // Unlock mutex when function is finished
121 | defer mutex.Unlock()
122 |
123 | // Split filepath into array of directory names
124 | tempPath := fpath
125 | if opath != "." {
126 | tempPath = filepath.Join(opath, fpath)
127 | }
128 | dirs := strings.Split(strings.ReplaceAll(tempPath, "\\", "/"), "/")
129 |
130 | // Append -clean to each directory and filename
131 | for i := 0; i < len(dirs); i++ {
132 | if dirs[i] != "." {
133 | dirs[i] = dirs[i] + "-clean"
134 | }
135 | }
136 |
137 | // Create the new directory structure
138 | if len(dirs) > 1 {
139 | err := os.MkdirAll(filepath.Join(dirs[:len(dirs)-1]...), os.ModePerm)
140 | check(err)
141 | }
142 |
143 | // Create and open the new file
144 | f, err := os.Create(filepath.Join(dirs...))
145 | check(err)
146 | defer f.Close()
147 |
148 | // Create buffer for new file
149 | w := bufio.NewWriter(f)
150 | defer w.Flush()
151 |
152 | // Wait for words from results channel, and write them to the new file.
153 | // Breaks when `results` closes
154 | for s := range results {
155 | _, err := w.WriteString(s + "\n")
156 | check(err)
157 | }
158 | }()
159 |
160 | // Add all words to work queue and then immediately close queue channel
161 | for _, s := range words {
162 | queue <- s
163 | }
164 | close(queue)
165 |
166 | // Wait for workers to finish, then close results channel
167 | waitGroup.Wait()
168 | close(results)
169 |
170 | // Obtain lock on mutex
171 | // Prevents function from exiting while results are still being processed and file is still open
172 | mutex.Lock()
173 | mutex.Unlock()
174 | }
175 | }
176 |
177 | // Entry point
178 | func main() {
179 | // Obtain filepath of executable to find path of default bad words list
180 | ex, err := os.Executable()
181 | check(err)
182 | defaultBadPath := filepath.Join(filepath.Dir(ex), "bad-words.txt")
183 |
184 | // Parse command line arguments with `flag` package
185 | var inPath string
186 | flag.StringVar(&inPath, "path", ".", "The path of the target file or directory.\n"+
187 | "May also be passed after all flags as a positional argument.")
188 |
189 | var outPath string
190 | flag.StringVar(&outPath, "out", ".", "The output directory.")
191 |
192 | var badPath string
193 | flag.StringVar(&badPath, "bad", defaultBadPath, "The list of words to be stripped.")
194 |
195 | var threads int
196 | flag.IntVar(&threads, "threads", 100, "Concurrent worker count.")
197 |
198 | flag.Parse()
199 |
200 | // If extra arguments tail flags, use as `inPath`
201 | if len(flag.Args()) > 0 {
202 | inPath = strings.Join(flag.Args(), " ")
203 | }
204 |
205 | // Read bad words into memory
206 | badWordsContent, err := ioutil.ReadFile(badPath)
207 | check(err)
208 |
209 | // Split bad words into lowercase whitespace separated array (available globally)
210 | badWords = strings.Fields(string(badWordsContent))
211 | for i := 0; i < len(badWords); i++ {
212 | badWords[i] = strings.ToLower(badWords[i])
213 | }
214 |
215 | // Call `sanitizeList`. If the input path is a directory, `sanitizeList` will handle the recursion internally
216 | sanitizeList(inPath, outPath, threads)
217 |
218 | // After `sanitizeList` is done, print the number of removed/processed words cause leet
219 | fmt.Printf("%d bad words were removed out of %d words.", badCount, totalWords)
220 | }
221 |
222 | // BUY DOGECOIN
223 |
--------------------------------------------------------------------------------
/bad-words.txt:
--------------------------------------------------------------------------------
1 | abbo
2 | abo
3 | abuse
4 | addict
5 | adult
6 | africa
7 | alla
8 | amateur
9 | american
10 | anal
11 | angie
12 | angry
13 | anus
14 | arab
15 | areola
16 | argie
17 | aroused
18 | arse
19 | asian
20 | ass
21 | athletesfoot
22 | attack
23 | australian
24 | babe
25 | backdoor
26 | backseat
27 | balls
28 | baptist
29 | barelylegal
30 | barf
31 | bast
32 | bazongas
33 | bazooms
34 | beaner
35 | beast
36 | beatoff
37 | beat-off
38 | beatyourmeat
39 | beaver
40 | bestial
41 | bi
42 | black
43 | blind
44 | blow
45 | boang
46 | bogan
47 | bohunk
48 | bollick
49 | bollock
50 | bomb
51 | bomd
52 | bondage
53 | boner
54 | bong
55 | boob
56 | boody
57 | boom
58 | boong
59 | boonie
60 | booty
61 | bountybar
62 | bra
63 | brea5t
64 | breast
65 | brothel
66 | bugger
67 | bunga
68 | buried
69 | burn
70 | butt
71 | byatch
72 | cacker
73 | cameljockey
74 | cameltoe
75 | canadian
76 | cancer
77 | carruth
78 | cemetery
79 | chav
80 | cherrypopper
81 | chickslick
82 | children's
83 | chin
84 | christ
85 | church
86 | cigarette
87 | cigs
88 | clamdigger
89 | clit
90 | cocaine
91 | cock
92 | cohee
93 | coitus
94 | color
95 | coloured
96 | commie
97 | communist
98 | condom
99 | conservative
100 | conspiracy
101 | coolie
102 | cooly
103 | coon
104 | copulate
105 | corruption
106 | cra5h
107 | crabs
108 | crack
109 | crap
110 | crash
111 | creamy
112 | crime
113 | criminal
114 | crotch
115 | cum
116 | cunilingus
117 | cunillingus
118 | cunn
119 | cunt
120 | dago
121 | dahmer
122 | dammit
123 | damn
124 | darkie
125 | dead
126 | deapthroat
127 | death
128 | deepthroat
129 | defecate
130 | dego
131 | demon
132 | deposit
133 | desire
134 | destroy
135 | deth
136 | devil
137 | dick
138 | diddle
139 | die
140 | dike
141 | dildo
142 | dingleberry
143 | dink
144 | dipstick
145 | dirty
146 | disease
147 | disturbed
148 | dive
149 | dix
150 | doggiestyle
151 | doggystyle
152 | dong
153 | doodoo
154 | doo-doo
155 | doom
156 | dope
157 | dragqueen
158 | dragqween
159 | drug
160 | drunk
161 | dumb
162 | dyefly
163 | dyke
164 | eatme
165 | ecstacy
166 | ejaculate
167 | ejaculation
168 | enema
169 | enemy
170 | erect
171 | ero
172 | escort
173 | ethiopian
174 | ethnic
175 | european
176 | evl
177 | excrement
178 | execute
179 | execution
180 | explosion
181 | faeces
182 | fag
183 | failed
184 | failure
185 | fairies
186 | fairy
187 | faith
188 | fart
189 | fat
190 | fear
191 | feces
192 | felatio
193 | felch
194 | fellatio
195 | feltch
196 | fetish
197 | fight
198 | filipina
199 | filipino
200 | fingerfood
201 | fire
202 | firing
203 | fister
204 | fisting
205 | flange
206 | flasher
207 | flatulence
208 | floo
209 | flydye
210 | fok
211 | fondle
212 | footaction
213 | footstar
214 | fore
215 | forni
216 | foursome
217 | fourtwenty
218 | fraud
219 | fu
220 | gangbang
221 | gangsta
222 | gatorbait
223 | gay
224 | geez
225 | geni
226 | german
227 | gin
228 | gipp
229 | girls
230 | givehead
231 | glazeddonut
232 | gob
233 | god
234 | gonorrehea
235 | gonzagas
236 | gook
237 | goy
238 | greaseball
239 | gringo
240 | groe
241 | gross
242 | grostulation
243 | gubba
244 | gummer
245 | gun
246 | gyp
247 | hamas
248 | handjob
249 | hapa
250 | harder
251 | hardon
252 | harem
253 | headlights
254 | hebe
255 | heeb
256 | hell
257 | herpes
258 | hijack
259 | hindoo
260 | hitler
261 | hiv
262 | ho
263 | hummer
264 | hussy
265 | hustler
266 | hymen
267 | hymie
268 | idiot
269 | ikey
270 | illegal
271 | incest
272 | insest
273 | intercourse
274 | inthebuff
275 | israel
276 | italiano
277 | itch
278 | jackoff
279 | jacktheripper
280 | jade
281 | jap
282 | jebus
283 | jeez
284 | jerkoff
285 | jesus
286 | jew
287 | jiga
288 | jigg
289 | jihad
290 | jijjiboo
291 | jimfish
292 | jism
293 | jiz
294 | joint
295 | juggalo
296 | jugs
297 | junglebunny
298 | kaffer
299 | kaffir
300 | kaffre
301 | kafir
302 | kanake
303 | kid
304 | kigger
305 | kike
306 | kill
307 | kink
308 | kkk
309 | knife
310 | knockers
311 | kock
312 | kondum
313 | koon
314 | kotex
315 | krap
316 | kraut
317 | kum
318 | kunilingus
319 | kunnilingus
320 | kunt
321 | ky
322 | lactate
323 | laid
324 | lapdance
325 | latin
326 | lesbain
327 | lesbayn
328 | lesbo
329 | lez
330 | liberal
331 | licker
332 | lickme
333 | lies
334 | limey
335 | limy
336 | lingerie
337 | liquor
338 | lolita
339 | looser
340 | loser
341 | lotion
342 | lovebone
343 | lovegoo
344 | lovejuice
345 | lovemuscle
346 | lowlife
347 | lsd
348 | lubejob
349 | lucifer
350 | lugan
351 | lynch
352 | macaca
353 | mad
354 | mafia
355 | magicwand
356 | mams
357 | manhater
358 | manpaste
359 | marijuana
360 | mastabate
361 | masterbate
362 | masterblaster
363 | mastrabator
364 | masturbate
365 | masturbating
366 | mattressprincess
367 | meatbeatter
368 | meatrack
369 | meth
370 | mexican
371 | mgger
372 | mggor
373 | mickeyfinn
374 | mideast
375 | milf
376 | minority
377 | mockey
378 | mockie
379 | mofo
380 | moles
381 | mooncricket
382 | mormon
383 | moron
384 | moslem
385 | mosshead
386 | muff
387 | mulatto
388 | muncher
389 | munt
390 | murder
391 | muslim
392 | naked
393 | narcotic
394 | nasty
395 | nazi
396 | necro
397 | negro
398 | nig
399 | nip
400 | nlgger
401 | nlggor
402 | nook
403 | noonan
404 | nooner
405 | nude
406 | nudger
407 | nymph
408 | ontherag
409 | oral
410 | orga
411 | orgies
412 | orgy
413 | osama
414 | paki
415 | palesimian
416 | palestinian
417 | pansies
418 | pansy
419 | panti
420 | payo
421 | pearlnecklace
422 | peck
423 | pee
424 | pendy
425 | penetration
426 | peni5
427 | penile
428 | penis
429 | period
430 | perv
431 | phuq
432 | pi55
433 | picaninny
434 | piccaninny
435 | pickaninny
436 | piker
437 | pimp
438 | piss
439 | pistol
440 | pixie
441 | pixy
442 | playboy
443 | playgirl
444 | pocha
445 | pohm
446 | polack
447 | pom
448 | poo
449 | porchmonkey
450 | porn
451 | pot
452 | poverty
453 | premature
454 | pric
455 | primetime
456 | propaganda
457 | pros
458 | protestant
459 | pu55i
460 | pu55y
461 | pube
462 | pud
463 | puss
464 | pusy
465 | quashie
466 | queef
467 | queer
468 | quickie
469 | quim
470 | ra8s
471 | racial
472 | racist
473 | radical
474 | raghead
475 | randy
476 | rape
477 | rapist
478 | rearend
479 | rearentry
480 | rectum
481 | redlight
482 | redneck
483 | reefer
484 | reestie
485 | reject
486 | remains
487 | republican
488 | rere
489 | ribbed
490 | rigger
491 | rimjob
492 | rimming
493 | roach
494 | robber
495 | roundeye
496 | rump
497 | russki
498 | sadis
499 | sadom
500 | samckdaddy
501 | sandm
502 | satan
503 | scag
504 | scallywag
505 | scat
506 | schlong
507 | screw
508 | scrotum
509 | semen
510 | seppo
511 | servant
512 | sex
513 | shag
514 | shat
515 | shav
516 | sheeney
517 | shhit
518 | shinola
519 | shit
520 | sick
521 | sissy
522 | sixsixsix
523 | sixtynine
524 | skank
525 | skinflute
526 | slant
527 | slapper
528 | slaughter
529 | slav
530 | sleezebag
531 | sleezeball
532 | slideitin
533 | slime
534 | slopehead
535 | slopey
536 | slopy
537 | slut
538 | smack
539 | smut
540 | snatch
541 | snot
542 | snowback
543 | sob
544 | sodom
545 | sooty
546 | sos
547 | soviet
548 | spaghettibender
549 | spank
550 | sperm
551 | spic
552 | spig
553 | spik
554 | spit
555 | splittail
556 | spreadeagle
557 | spunk
558 | squaw
559 | stagg
560 | stiffy
561 | strapon
562 | stringer
563 | stripclub
564 | stroke
565 | stroking
566 | stupid
567 | suicide
568 | swallow
569 | swalow
570 | swastika
571 | sweetness
572 | syphilis
573 | taff
574 | tampon
575 | tang
576 | tantra
577 | tarbaby
578 | tard
579 | teat
580 | terror
581 | teste
582 | testicle
583 | thicklips
584 | thirdeye
585 | thirdleg
586 | threesome
587 | threeway
588 | tinkle
589 | tit
590 | tnt
591 | toilet
592 | tongethruster
593 | tongue
594 | tortur
595 | tosser
596 | towelhead
597 | trailertrash
598 | tramp
599 | trannie
600 | tranny
601 | triplex
602 | trojan
603 | trots
604 | tunneloflove
605 | turd
606 | turnon
607 | twat
608 | twink
609 | uck
610 | uk
611 | upskirt
612 | urinary
613 | urinate
614 | urine
615 | usama
616 | uterus
617 | vatican
618 | vibr
619 | vietcong
620 | violence
621 | vomit
622 | vulva
623 | wab
624 | wank
625 | waysted
626 | weapon
627 | weenie
628 | weewee
629 | welcher
630 | welfare
631 | wetb
632 | whacker
633 | whash
634 | whigger
635 | whiskey
636 | whit
637 | whiz
638 | wigger
639 | willie
640 | willy
641 | wn
642 | wog
643 | women's
644 | wop
645 | wtf
646 | wuss
647 | wuzzie
648 | xtc
649 | xxx
650 | yankee
651 | yellowman
652 | zipperhead
653 |
--------------------------------------------------------------------------------