├── .DS_Store ├── .github ├── .DS_Store ├── readme.png ├── FUNDING.yml └── workflows │ ├── test.yml │ └── npm-publish.yml ├── data ├── ga.txt ├── zh.txt ├── ar.txt ├── fr.txt ├── es.txt └── en.txt ├── .prettierrc.json ├── .gitignore ├── package.json ├── index.js ├── README.md └── __tests__ ├── english.test.js ├── engine.test.js ├── spanish.test.js ├── arabic.test.js ├── french.test.js ├── chinese.test.js └── irish.test.js /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coffee-and-fun/google-profanity-words/HEAD/.DS_Store -------------------------------------------------------------------------------- /.github/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coffee-and-fun/google-profanity-words/HEAD/.github/.DS_Store -------------------------------------------------------------------------------- /.github/readme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coffee-and-fun/google-profanity-words/HEAD/.github/readme.png -------------------------------------------------------------------------------- /data/ga.txt: -------------------------------------------------------------------------------- 1 | cac 2 | píosa cac 3 | muc 4 | feck 5 | feckin 6 | bod 7 | póg mo thóin 8 | amadán 9 | eejit 10 | gobshite 11 | bastún 12 | lúdramán 13 | balbhán 14 | ainnis ort 15 | dún do chlab 16 | damnú ort -------------------------------------------------------------------------------- /.prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "singleQuote": true, 3 | "trailingComma": "es5", 4 | "tabWidth": 2, 5 | "semi": true, 6 | "printWidth": 80, 7 | "arrowParens": "always", 8 | "endOfLine": "auto" 9 | } 10 | -------------------------------------------------------------------------------- /data/zh.txt: -------------------------------------------------------------------------------- 1 | 操 2 | 操你 3 | 操你妈 4 | 他妈 5 | 他妈的 6 | 去死 7 | 滚 8 | 滚开 9 | 傻逼 10 | 煞笔 11 | 傻子 12 | 妈的 13 | 狗屎 14 | 屎 15 | 屌 16 | 贱人 17 | 婊子 18 | 死变态 19 | 垃圾 20 | 废物 21 | 蠢货 22 | 去你妈的 23 | 日 24 | 日你 25 | 日你妈 26 | 靠 27 | 混蛋 28 | 王八蛋 29 | 狗娘养的 30 | 龟儿子 31 | -------------------------------------------------------------------------------- /data/ar.txt: -------------------------------------------------------------------------------- 1 | لعنة 2 | اللعنة 3 | خرا 4 | خراء 5 | قحبة 6 | شرموطة 7 | زب 8 | كس 9 | ابن زنا 10 | ابن حرام 11 | كلب 12 | يا كلب 13 | حمار 14 | يا حمار 15 | يا ابن الكلب 16 | يلعن أبوك 17 | يلعن شكلك 18 | تفو 19 | تفو عليك 20 | قذر 21 | قرف 22 | تبا 23 | انقلع 24 | اخرس -------------------------------------------------------------------------------- /data/fr.txt: -------------------------------------------------------------------------------- 1 | merde 2 | putain 3 | con 4 | connard 5 | connasse 6 | salope 7 | salaud 8 | bordel 9 | chiant 10 | foutre 11 | enculé 12 | enfoiré 13 | fils de pute 14 | casse‑toi 15 | ferme ta gueule 16 | va te faire foutre 17 | ta gueule 18 | pute 19 | couille 20 | couillon 21 | bâtard 22 | gros con 23 | cul 24 | trou du cul -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: robertjgabriel 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 13 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | pull_request: 5 | branches: [ main, master ] 6 | push: 7 | branches: [ main, master ] 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | 13 | strategy: 14 | matrix: 15 | node-version: [16.x, 18.x, 20.x] 16 | 17 | steps: 18 | - name: Checkout code 19 | uses: actions/checkout@v4 20 | 21 | - name: Setup Node.js ${{ matrix.node-version }} 22 | uses: actions/setup-node@v4 23 | with: 24 | node-version: ${{ matrix.node-version }} 25 | cache: 'npm' 26 | 27 | - name: Install dependencies 28 | run: npm ci 29 | 30 | - name: Run tests 31 | run: npm test -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | dist/ 6 | dist 7 | # Runtime data 8 | pids 9 | *.pid 10 | *.seed 11 | *.pid.lock 12 | package/ 13 | package 14 | build 15 | build/ 16 | 17 | # Directory for instrumented libs generated by jscoverage/JSCover 18 | lib-cov 19 | 20 | # Coverage directory used by tools like istanbul 21 | coverage 22 | 23 | # nyc test coverage 24 | .nyc_output 25 | 26 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 27 | .grunt 28 | 29 | # node-waf configuration 30 | .lock-wscript 31 | 32 | # Compiled binary addons (http://nodejs.org/api/addons.html) 33 | build/Release 34 | 35 | # Dependency directories 36 | node_modules 37 | jspm_packages 38 | 39 | # Optional npm cache directory 40 | .npm 41 | 42 | # Optional REPL history 43 | .node_repl_history 44 | Contact GitHub API Training Shop Blog About 45 | -------------------------------------------------------------------------------- /.github/workflows/npm-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will run tests using node and then publish a package to GitHub Packages when a release is created 2 | # For more information see: https://docs.github.com/en/actions/publishing-packages/publishing-nodejs-packages 3 | 4 | name: Publis 5 | 6 | on: 7 | release: 8 | types: [published] 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v3 15 | - uses: actions/setup-node@v3 16 | with: 17 | node-version: 16 18 | - run: npm ci 19 | - run: npm run format 20 | - run: npm test 21 | 22 | publish-npm: 23 | needs: build 24 | runs-on: ubuntu-latest 25 | steps: 26 | - uses: actions/checkout@v3 27 | - uses: actions/setup-node@v3 28 | with: 29 | node-version: 16 30 | registry-url: https://registry.npmjs.org/ 31 | - run: npm ci 32 | - run: npm publish 33 | env: 34 | NODE_AUTH_TOKEN: ${{secrets.npm_token}} 35 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@coffeeandfun/google-profanity-words", 3 | "version": "3.0.0", 4 | "description": "Real profanity words banned by Google, extracted from their hidden API before shutdown. Now available as an easy-to-use Node.js library for content filtering.", 5 | "main": "index.js", 6 | "type": "module", 7 | "scripts": { 8 | "format": "npx prettier . --write", 9 | "test": "NODE_OPTIONS=--experimental-vm-modules NODE_NO_WARNINGS=1 jest", 10 | "en": "NODE_OPTIONS=--experimental-vm-modules NODE_NO_WARNINGS=1 jest english.test.js", 11 | "es": "NODE_OPTIONS=--experimental-vm-modules NODE_NO_WARNINGS=1 jest spanish.test.js", 12 | "engine": "NODE_OPTIONS=--experimental-vm-modules NODE_NO_WARNINGS=1 jest engine.test.js" 13 | }, 14 | "repository": { 15 | "type": "git", 16 | "url": "git+https://github.com/coffee-and-fun/google-profanity-words.git" 17 | }, 18 | "keywords": [ 19 | "google", 20 | "side-project", 21 | "profanity", 22 | "profanity-detection", 23 | "profanityfilter" 24 | ], 25 | "author": "Robert James Gabriel", 26 | "license": "ISC", 27 | "bugs": { 28 | "url": "https://github.com/coffee-and-fun/google-profanity-words/issues" 29 | }, 30 | "homepage": "https://github.com/coffee-and-fun/google-profanity-words#readme", 31 | "devDependencies": { 32 | "jest": "^27.4.5", 33 | "prettier": "3.0.0" 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | 2 | import { readFile, access } from 'fs/promises'; 3 | import path from 'path'; 4 | import { fileURLToPath } from 'url'; 5 | 6 | /** 7 | * Supports multiple languages and provides efficient word matching 8 | */ 9 | export class ProfanityEngine { 10 | constructor(config = {}) { 11 | this.isTestMode = config.testMode ?? false; 12 | this.language = config.language ?? 'en'; 13 | this.terms = null; 14 | this.termsSet = null; 15 | this.isInitialized = false; 16 | } 17 | 18 | /** 19 | * Only loads data when first needed 20 | * @private 21 | */ 22 | async _ensureInitialized() { 23 | if (this.isInitialized) return; 24 | 25 | try { 26 | const filePath = await this._getLanguageFilePath(); 27 | const fileContent = await this._readTermsFile(filePath); 28 | 29 | 30 | this.terms = fileContent 31 | .filter(term => term.trim()) 32 | .map(term => term.trim().toLowerCase()); 33 | 34 | 35 | this.termsSet = new Set(this.terms); 36 | this.isInitialized = true; 37 | 38 | } catch (error) { 39 | this._logWarning(`Failed to initialize: ${error.message}`); 40 | this.terms = []; 41 | this.termsSet = new Set(); 42 | this.isInitialized = true; 43 | } 44 | } 45 | 46 | /** 47 | * Get the file path for the specified language 48 | * @private 49 | */ 50 | async _getLanguageFilePath() { 51 | const currentFilePath = fileURLToPath(import.meta.url); 52 | const dataFolderPath = path.join(path.dirname(currentFilePath), 'data'); 53 | const languageFilePath = path.join(dataFolderPath, `${this.language}.txt`); 54 | 55 | if (await this._fileExists(languageFilePath)) { 56 | return languageFilePath; 57 | } 58 | 59 | // Fallback to English 60 | this._logWarning(`Language file '${this.language}.txt' not found. Using 'en' as fallback.`); 61 | return path.join(dataFolderPath, 'en.txt'); 62 | } 63 | 64 | /** 65 | * Check if file exists 66 | * @private 67 | */ 68 | async _fileExists(filePath) { 69 | try { 70 | await access(filePath); 71 | return true; 72 | } catch { 73 | return false; 74 | } 75 | } 76 | 77 | /** 78 | * Read and parse terms file 79 | * @private 80 | */ 81 | async _readTermsFile(filePath) { 82 | const fileContent = await readFile(filePath, 'utf8'); 83 | return fileContent.split(/\r?\n/); // Handle both \n and \r\n 84 | } 85 | 86 | /** 87 | * Log warning if not in test mode 88 | * @private 89 | */ 90 | _logWarning(message) { 91 | if (!this.isTestMode) { 92 | console.warn('Profanity Engine:', message); 93 | } 94 | } 95 | 96 | /** 97 | * Extract and normalize words from text 98 | * @private 99 | */ 100 | _extractWords(text) { 101 | if (!text || typeof text !== 'string') return []; 102 | 103 | // Split on whitespace and punctuation, filter empty strings 104 | return text 105 | .toLowerCase() 106 | .split(/[\s\p{P}]+/u) 107 | .filter(word => word.length > 0); 108 | } 109 | 110 | /** 111 | * Check if a sentence contains any profanity words 112 | * @param {string} sentence - The text to check 113 | * @returns {Promise} True if profanity is found 114 | */ 115 | async hasCurseWords(sentence) { 116 | await this._ensureInitialized(); 117 | 118 | if (!sentence || typeof sentence !== 'string') return false; 119 | 120 | const words = this._extractWords(sentence); 121 | return words.some(word => this.termsSet.has(word)); 122 | } 123 | 124 | /** 125 | * Get all profanity words found in a sentence 126 | * @param {string} sentence - The text to analyze 127 | * @returns {Promise} Array of found profanity words 128 | */ 129 | async getCurseWords(sentence) { 130 | await this._ensureInitialized(); 131 | 132 | if (!sentence || typeof sentence !== 'string') return []; 133 | 134 | const words = this._extractWords(sentence); 135 | const foundWords = new Set(); // Use Set to avoid duplicates 136 | 137 | for (const word of words) { 138 | if (this.termsSet.has(word)) { 139 | foundWords.add(word); 140 | } 141 | } 142 | 143 | return Array.from(foundWords); 144 | } 145 | 146 | /** 147 | * Get all profanity terms 148 | * @returns {Promise} Array of all profanity terms 149 | */ 150 | async all() { 151 | await this._ensureInitialized(); 152 | return [...this.terms]; // Return a copy to prevent external modification 153 | } 154 | 155 | /** 156 | * Search for a specific term 157 | * @param {string} term - The term to search for 158 | * @returns {Promise} True if the term is found 159 | */ 160 | async search(term) { 161 | await this._ensureInitialized(); 162 | 163 | if (!term || typeof term !== 'string') return false; 164 | 165 | return this.termsSet.has(term.trim().toLowerCase()); 166 | } 167 | 168 | /** 169 | * Reset the engine (useful for testing or changing language) 170 | */ 171 | reset() { 172 | this.terms = null; 173 | this.termsSet = null; 174 | this.isInitialized = false; 175 | } 176 | 177 | 178 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![alt text](.github/readme.png 'Logo Title Text 1') 2 | 3 | 4 | 5 | # ☕ Google Profanity Words 6 | 7 | > A fun and developer-friendly profanity detection library brought to you by [Coffee & Fun LLC](https://coffeeandfun.com) ☕🎉 8 | > Built and maintained with love by [Robert James Gabriel](https://github.com/robertgabriel) 💻✨ 9 | 10 | [![npm version](https://img.shields.io/npm/v/@coffeeandfun/google-profanity-words.svg)](https://www.npmjs.com/package/@coffeeandfun/google-profanity-words) [![Stars](https://img.shields.io/github/stars/@coffeeandfun/google-profanity-words?style=social)](https://github.com/@coffeeandfun/google-profanity-words) 11 | 12 | 13 | --- 14 | 15 | ## 🚀 What’s This? 16 | 17 | **Google Profanity Words** is a Node.js library that helps you detect and filter out naughty language (in multiple languages!) from your apps or content. Whether you’re building a chat app, a comment section, or a game—this one’s your profanity-slaying sidekick. 18 | 19 | Made by devs for devs. Maintained by Robert at Coffee & Fun ☕❤️ 20 | 21 | --- 22 | 23 | ## ✨ Features 24 | 25 | - 🌐 **Multilingual support** – English and Spanish out of the box. More coming soon! 26 | - 🔁 **Monthly updates** – Stay fresh with the latest no-no words 27 | - 💡 **Easy to use API** – Straightforward methods, async/await friendly 28 | - 🔬 **Tested with Jest** – Fully covered and ready for production 29 | - ⚡ **Tiny & Fast** – Minimal deps = speedy installs and performance 30 | 31 | --- 32 | 33 | ## 📦 Install Me 34 | 35 | ```bash 36 | npm install @coffeeandfun/google-profanity-words 37 | ``` 38 | 39 | --- 40 | 41 | ## ⚡ Quickstart Guide 42 | 43 | ```javascript 44 | import { ProfanityEngine } from '@coffeeandfun/google-profanity-words'; 45 | 46 | // Default is English 47 | const profanity = new ProfanityEngine(); 48 | 49 | // Español? You got it. 50 | const profanityES = new ProfanityEngine({ language: 'es' }); 51 | 52 | // Check a single word 53 | const isBad = await profanity.search('example'); 54 | 55 | // Or check a full sentence 56 | const hasCurses = await profanity.hasCurseWords('This is a test sentence'); 57 | 58 | console.log(isBad, hasCurses); // true / false 59 | ``` 60 | 61 | --- 62 | 63 | ## 🔍 API Docs (But Make It Chill) 64 | 65 | ### 🛠️ `new ProfanityEngine(options?)` 66 | 67 | Create a new profanity detector engine! 68 | 69 | ```javascript 70 | const profanity = new ProfanityEngine(); // Defaults to English 71 | ``` 72 | 73 | Or choose a specific language: 74 | 75 | ```javascript 76 | const spanishProfanity = new ProfanityEngine({ language: 'es' }); 77 | ``` 78 | 79 | #### Options: 80 | - `language` (string, optional): 81 | - `'en'` = English (default) 82 | - `'es'` = Spanish 83 | - If a language isn’t available, it falls back to English. 84 | 85 | --- 86 | 87 | ### 🔎 `search(word)` 88 | 89 | Check a single word to see if it's naughty. 90 | 91 | ```javascript 92 | const isProfane = await profanity.search('heck'); 93 | console.log(isProfane); // true or false 94 | ``` 95 | 96 | --- 97 | 98 | ### 💬 `hasCurseWords(sentence)` 99 | 100 | Check a full sentence or phrase for profanity. 101 | 102 | ```javascript 103 | const result = await profanity.hasCurseWords('You silly goose'); 104 | console.log(result); // probably false, unless goose is banned now 🪿 105 | ``` 106 | 107 | --- 108 | 109 | ### 📜 `all()` 110 | 111 | Get the full list of bad words in the current language. 112 | 113 | ```javascript 114 | const badWords = await profanity.all(); 115 | console.log(badWords); // ['word1', 'word2', 'etc'] 116 | ``` 117 | 118 | --- 119 | 120 | ### 💡 Real Talk: Edge Cases 121 | 122 | - Empty strings? We gotchu. Returns `false`. 123 | - `search()` and `hasCurseWords()` are **case-insensitive**. 124 | - Special characters and punctuation? No problem. 125 | 126 | --- 127 | 128 | ## 🧪 Testing with Jest 129 | 130 | We've got testing covered like whipped cream on a latte ☕🎂 131 | 132 | Run the default test suite: 133 | 134 | ```bash 135 | npm test 136 | ``` 137 | 138 | Or use more specific Jest commands: 139 | 140 | ```bash 141 | # Watch mode (great for dev workflow) 142 | npx jest --watch 143 | 144 | # Run tests in a specific file 145 | npx jest path/to/your/file.test.js 146 | 147 | # Run coverage report 148 | npx jest --coverage 149 | 150 | # Run with verbose output (get all the juicy details) 151 | npx jest --verbose 152 | ``` 153 | 154 | Tests are located in the `/__tests__/` directory and use the real profanity files, so you know it’s legit 👀✅ 155 | 156 | --- 157 | 158 | ## 🔀 Example Use Cases 159 | 160 | ### ✅ Filter User Input 161 | 162 | ```js 163 | async function filterInput(input) { 164 | if (await profanity.hasCurseWords(input)) { 165 | return '⚠️ Whoa there! Language, please.'; 166 | } 167 | return input; 168 | } 169 | ``` 170 | 171 | --- 172 | 173 | ### 🌍 Multi-language Setup 174 | 175 | ```js 176 | const en = new ProfanityEngine({ language: 'en' }); 177 | const es = new ProfanityEngine({ language: 'es' }); 178 | 179 | const englishResult = await en.search('bad'); 180 | const spanishResult = await es.search('malo'); 181 | ``` 182 | 183 | --- 184 | 185 | ## 🌍 Want to Contribute? 186 | 187 | We love open source buddies 💛 188 | 189 | ### Add a New Language 190 | 191 | 1. Fork it 🍴 192 | 2. Add a file to `/data/` named like `fr.txt` for French 193 | 3. Fill it with one profane word per line 194 | 4. Push & open a pull request! 195 | 196 | --- 197 | 198 | ## 🙌 Who Made This? 199 | 200 | Built by [Robert James Gabriel](https://github.com/robertgabriel) and the good people at **Coffee & Fun LLC**. We make dev tools with accessibility, coffee, and good vibes in mind. 201 | 202 | > Wanna support? Send a coffee our way or just spread the word! ☕🚀 203 | 204 | --- 205 | 206 | ## 🧡 License 207 | 208 | [MIT](https://opensource.org/licenses/MIT) – because sharing is caring. 209 | 210 | --- 211 | 212 | ## 💬 Support & Community 213 | 214 | - 🐛 [Report Bugs](https://github.com/coffeeandfun/google-profanity-words/issues) 215 | - 💡 [Join Discussions](https://github.com/coffeeandfun/google-profanity-words/discussions) 216 | - 📬 Email: [support@coffeeandfun.com](mailto:hellow@coffeeandfun.com) 217 | 218 | --- 219 | 220 | Made with ☕, code, and a sprinkle of magic at Coffee & Fun LLC 💖 221 | 222 | ## AI Usage 223 | Calude AI was used to help with this read me & adding extra Jest tests. -------------------------------------------------------------------------------- /data/es.txt: -------------------------------------------------------------------------------- 1 | 2 chicas 1 taza 2 | 2g1c 3 | 4r5e 4 | 5h1t 5 | 5 hit 6 | a55 7 | culo 8 | acrotomofilia 9 | bolsillo caliente de alabama 10 | oleoducto de alaska 11 | anal 12 | anilingus 13 | ano 14 | una mierda 15 | ar5e 16 | culo 17 | culo 18 | gilipollas 19 | culo 20 | follador de culo 21 | culo-gorro 22 | culo-pirata 23 | bolsa de culo 24 | bandido 25 | assbanger 26 | mordida 27 | payaso 28 | culo 29 | rompeculos 30 | culos 31 | cara de culo 32 | hijo de puta 33 | enculada 34 | asno duende 35 | idiota 36 | idiota 37 | Estúpido 38 | imbéciles 39 | asaltante 40 | ladrón de traseros 41 | lamer el culo 42 | lameculos 43 | culo mono 44 | culo 45 | mascador de culos 46 | aspirar 47 | gilipollas 48 | chupador de culos 49 | culo 50 | gilipollas 51 | asswipe 52 | auto erótico 53 | autoerotico 54 | perra 55 | b00bs 56 | b17ch 57 | perra 58 | babeland 59 | masa para bebes 60 | jugo de bebe 61 | mordaza de bola 62 | salsa de bolas 63 | patear la pelota 64 | pelota lamiendo 65 | saco de pelota 66 | chupando bolas 67 | bolsa de pelota 68 | pelotas 69 | saco de bolas 70 | bampot 71 | bang bros 72 | a pelo 73 | apenas legal 74 | desnudo 75 | bastardo 76 | bastardo 77 | bastinado 78 | bbw 79 | bdsm 80 | frijol 81 | frijoles 82 | bestial 83 | bestialidad 84 | bestialidad 85 | cuchillo de castor 86 | labios de castor 87 | campana 88 | bestial 89 | bestialidad 90 | perra 91 | perra 92 | grande y negro 93 | pechos grandes 94 | grandes aldabas 95 | grandes tetas 96 | tontas 97 | cerradura de pájaro 98 | perra 99 | perra 100 | perras 101 | perras 102 | perra 103 | quejándose 104 | gallo negro 105 | acción rubia 106 | rubia sobre rubia acción 107 | sangriento 108 | mamada 109 | sopla tu carga 110 | mamada 111 | mamadas 112 | gofre azul 113 | tonto 114 | boiolas 115 | mierda 116 | cojones 117 | bollok 118 | bollox 119 | esclavitud 120 | metedura de pata 121 | teta 122 | bobo 123 | tetas 124 | tetas 125 | tetas 126 | tetas 127 | booooooobs 128 | llamada de botín 129 | senos 130 | duchas marrones 131 | acción morena 132 | buceta 133 | tío 134 | bukkake 135 | bullyke 136 | vibra de bala 137 | mierda 138 | culo 139 | agujero de tapón 140 | boca de tonel 141 | conejito hijo de puta 142 | tetona 143 | culata 144 | trasero-pirata 145 | nalgas 146 | ojete 147 | masticar trasero 148 | anal 149 | verga 150 | hijo de puta 151 | dedo de camello 152 | camgirl 153 | puta de la cam 154 | camwhore 155 | masticador de alfombras 156 | mascador de alfombras 157 | graznar 158 | chinchilla 159 | grieta 160 | Choad 161 | capullos de rosa de chocolate 162 | Chode 163 | cipá 164 | círculo idiota 165 | cl1t 166 | vapor de cleveland 167 | clítoris 168 | clítoris 169 | clítoris 170 | clítoris 171 | abrazaderas de trébol 172 | racimo de mierda 173 | nuez 174 | polla 175 | chupapollas 176 | mordedura de gallo 177 | hamburguesa 178 | cara de gallo 179 | cabeza de gallo 180 | jockey 181 | aldaba 182 | maestro de gallos 183 | traficante de gallos 184 | cockmongruel 185 | mono gallo 186 | gallo 187 | comepollas 188 | nariz de gallo 189 | pepita 190 | pollas 191 | mierda 192 | herrero 193 | fumador de pollas 194 | mamar 195 | mamar 196 | mamada 197 | mamada 198 | hijo de puta 199 | mamando 200 | chupapollas 201 | pollasuka 202 | hijo de puta 203 | coca 204 | cocinando 205 | coksucka 206 | coochie 207 | chulo 208 | mapache 209 | mapaches 210 | cooter 211 | coprolagnia 212 | coprofilia 213 | cornhole 214 | timonel 215 | tonterías 216 | cremita 217 | semen 218 | desmoronamiento 219 | basurero 220 | devorador de semen 221 | corrida 222 | cummer 223 | correrse 224 | se corre 225 | corrida 226 | zorra 227 | corrida 228 | cunilingus 229 | cunillingus 230 | coño 231 | cunnilingus 232 | coño 233 | cara de chocho 234 | coño 235 | lamer el coño 236 | lamer el coño 237 | lamecoños 238 | lamecoños 239 | lamiendo coños 240 | lamiendo coños 241 | cuntrag 242 | coños 243 | cyalis 244 | cyberfuc 245 | cyberfuck 246 | cibernético 247 | cibernético 248 | ciberfuckers 249 | cibernético 250 | d1ck 251 | maldita sea 252 | maldición 253 | moreno 254 | violación en una cita 255 | cita 256 | Garganta profunda 257 | Garganta profunda 258 | dendrofilia 259 | polla 260 | bolsa de pene 261 | batidor de pollas 262 | cara de pene 263 | gilipollas 264 | pendejo 265 | jugo de polla 266 | dickleche 267 | traficante de pollas 268 | bofetada 269 | hijo de puta 270 | idiota 271 | comadreja 272 | dickweed 273 | idiota 274 | dique 275 | consolador 276 | consoladores 277 | moras 278 | zarzamora 279 | tonto 280 | borrachos 281 | idiota 282 | dirección 283 | almohadas sucias 284 | sucio sanchez 285 | dlck 286 | estilo perro 287 | follador de perros 288 | estilo perrito 289 | estilo perrito 290 | persiguiendo 291 | persiguiendo 292 | estilo perrito 293 | a cuatro patas 294 | dolcett 295 | dominación 296 | dominatriz 297 | domos 298 | ponche de burro 299 | burro 300 | bolsa de basura 301 | chiflado 302 | doosh 303 | doble polla 304 | doble penetración 305 | ducha 306 | gilipollas 307 | acción doble penetración 308 | joroba seca 309 | ducha 310 | estupideces 311 | idiota 312 | dvda 313 | dique 314 | come mi culo 315 | ecchi 316 | eyacular 317 | eyaculado 318 | eyacula 319 | eyacular 320 | eyaculando 321 | eyaculación 322 | eyacular 323 | erótico 324 | erotismo 325 | escolta 326 | eunuco 327 | Mierda 328 | Cabron 329 | f4nny 330 | Mierda 331 | maricón 332 | marica 333 | maricón 334 | mariconear 335 | maricón 336 | maricón 337 | maricón 338 | maricas 339 | maricón 340 | maricas 341 | maricas 342 | maricón 343 | coño 344 | fannyflaps 345 | fannyfucker 346 | fanyy 347 | pedo 348 | tirado un pedo 349 | tirando pedos 350 | pedo 351 | gordo 352 | joder 353 | hijo de puta 354 | mierda 355 | fecal 356 | feck 357 | hijo de puta 358 | felación 359 | Felch 360 | felching 361 | felación 362 | felación 363 | fieltro 364 | chorros femeninos 365 | dominación femenina 366 | fijándose 367 | pistola con la mano 368 | follar con los dedos 369 | dedo follado 370 | follador de dedos 371 | folladores de dedos 372 | follando con los dedos 373 | folla con los dedos 374 | digitación 375 | follar con el puño 376 | puño follado 377 | follador de puños 378 | folladores de puños 379 | follando con el puño 380 | follando con los puños 381 | folla con los puños 382 | puño 383 | lanzallamas 384 | brida 385 | buscar 386 | buscador de 387 | fetichismo de pies 388 | paja de pies 389 | frotándose 390 | Mierda 391 | botones de mierda 392 | joder 393 | jodido 394 | Cabron 395 | hijos de puta 396 | imbécil 397 | imbéciles 398 | maldito 399 | maldito 400 | malditos 401 | mierdamierdahijo de puta 402 | fóllame 403 | folla 404 | cabrones 405 | joder 406 | idiota 407 | empaquetador de dulces 408 | empacador de chocolate 409 | fuk 410 | fuker 411 | fukker 412 | fukkin 413 | fuks 414 | fukwhit 415 | fukwit 416 | futanari 417 | mierda 418 | fux0r 419 | Punto G 420 | orgia 421 | orgia 422 | gangbanged 423 | gangbanged 424 | orgias 425 | sexo gay 426 | culo gay 427 | gaybob 428 | gaydo 429 | señor gay 430 | sexo gay 431 | tardo gay 432 | gaywad 433 | genitales 434 | polla gigante 435 | chica en 436 | niña en la cima 437 | las chicas se volvieron locas 438 | cabracx 439 | cabra 440 | maldita sea 441 | maldita sea 442 | maldita sea 443 | maldita sea 444 | maldito 445 | gokkun 446 | baño de oro 447 | buena chica 448 | gooch 449 | buena caca 450 | bien 451 | goregasmo 452 | gringo 453 | ir a tientas 454 | sexo en grupo 455 | Guido 456 | guro 457 | trabajo manual 458 | paja 459 | núcleo duro 460 | duro 461 | sexo duro 462 | heeb 463 | infierno 464 | hentai 465 | el ella 466 | Ho 467 | hoar 468 | hoare 469 | azada 470 | puta 471 | homo 472 | homoerótico 473 | cariño 474 | Honky 475 | puta 476 | horre 477 | más caliente 478 | córneo 479 | caliente carl 480 | chica caliente 481 | sexo caliente 482 | como matar 483 | como asesinar 484 | gordo enorme 485 | follando 486 | incesto 487 | coito 488 | masturbar 489 | paja 490 | burro 491 | paja 492 | cebo de la cárcel 493 | Jailbait 494 | japón 495 | rosquilla de gelatina 496 | hacerse una paja 497 | masturbarse 498 | jigaboo 499 | jigaboo 500 | jiggerboo 501 | esperma 502 | semen 503 | semen 504 | jism 505 | jism 506 | semen 507 | juggs 508 | kawk 509 | pozo 510 | kinbaku 511 | pervertido 512 | rizado 513 | kiunt 514 | mando 515 | perillas 516 | nudoso 517 | nudoso 518 | perilla 519 | cabeza de chorlito 520 | nudoso 521 | bromista 522 | golpe 523 | kondum 524 | kondums 525 | chiflar 526 | coquetear 527 | kum 528 | kúmer 529 | kummer 530 | cumming 531 | kums 532 | kunilingus 533 | kunt 534 | kyke 535 | l3i+ch 536 | picazón 537 | labios 538 | restricción de cuero 539 | chaqueta recta de cuero 540 | fiesta de limon 541 | lesbo 542 | lesbiana 543 | lmfao 544 | lolita 545 | haciendo el amor 546 | lujuria 547 | codiciando 548 | m0f0 549 | m0fo 550 | m45terbato 551 | ma5terb8 552 | ma5terbato 553 | Hazme llegar 554 | chorros masculinos 555 | masoquista 556 | maestro-bate 557 | maestrob8 558 | maestro murciélago* 559 | masterbat3 560 | masterbate 561 | masterbación 562 | masterbaciones 563 | masturbarse 564 | menaje 565 | -------------------------------------------------------------------------------- /__tests__/english.test.js: -------------------------------------------------------------------------------- 1 | import { ProfanityEngine } from '../index.js'; 2 | 3 | const language = process.env.LANGUAGE || 'en'; // Default to 'en' if the LANGUAGE environment variable is not set 4 | let profanity; 5 | 6 | describe('English Profanity tests', () => { 7 | beforeAll(async () => { 8 | profanity = new ProfanityEngine({ 9 | language: 'en', 10 | testMode: true, 11 | }); 12 | }); 13 | 14 | afterEach(() => { 15 | profanity.reset(); 16 | }); 17 | 18 | describe('Core English functionality', () => { 19 | it('Should get all the profanity words in an array', async () => { 20 | const allWords = await profanity.all(); 21 | expect(allWords.length).toEqual(958); // Verify this number matches your actual word count 22 | expect(Array.isArray(allWords)).toBe(true); 23 | expect(allWords.length).toBeGreaterThan(0); 24 | }); 25 | 26 | it('Should return true for profanity words', async () => { 27 | const searchWord = await profanity.search('shit'); 28 | expect(searchWord).toEqual(true); 29 | }); 30 | 31 | it('Should return false for normal words', async () => { 32 | const searchWord = await profanity.search('ka'); 33 | expect(searchWord).toEqual(false); 34 | }); 35 | 36 | it('Should return false for any empty string', async () => { 37 | const searchWord = await profanity.search(''); 38 | expect(searchWord).toEqual(false); 39 | }); 40 | 41 | it('Should return true for a sentence containing a profanity word', async () => { 42 | const sentence = 'Do not use bad words like shit or asshole.'; 43 | const hasCurseWords = await profanity.hasCurseWords(sentence); 44 | expect(hasCurseWords).toEqual(true); 45 | }); 46 | 47 | it('Should return false for a sentence with no profanity word', async () => { 48 | const sentence = 'This is a clean and polite sentence.'; 49 | const hasCurseWords = await profanity.hasCurseWords(sentence); 50 | expect(hasCurseWords).toEqual(false); 51 | }); 52 | }); 53 | 54 | describe('English-specific edge cases', () => { 55 | it('Should handle case sensitivity correctly', async () => { 56 | expect(await profanity.search('SHIT')).toBe(true); 57 | expect(await profanity.search('Shit')).toBe(true); 58 | expect(await profanity.search('shit')).toBe(true); 59 | }); 60 | 61 | it('Should handle whitespace around words', async () => { 62 | expect(await profanity.search(' shit ')).toBe(true); 63 | expect(await profanity.search('\tshit\n')).toBe(true); 64 | }); 65 | 66 | it('Should detect profanity with punctuation in sentences', async () => { 67 | const testSentences = [ 68 | 'What the shit!', 69 | 'Oh, shit.', 70 | 'Shit? Really?', 71 | 'This is shit, man.', 72 | '"Shit," he said.', 73 | 'Absolute shit-show.', 74 | ]; 75 | 76 | for (const sentence of testSentences) { 77 | expect(await profanity.hasCurseWords(sentence)).toBe(true); 78 | } 79 | }); 80 | 81 | it('Should return correct curse words from sentences', async () => { 82 | const sentence = 'Do not use bad words like shit or asshole.'; 83 | const foundWords = await profanity.getCurseWords(sentence); 84 | 85 | expect(foundWords).toContain('shit'); 86 | expect(foundWords).toContain('asshole'); 87 | expect(foundWords.length).toBe(2); 88 | }); 89 | 90 | it('Should handle multiple instances of same word', async () => { 91 | const sentence = 'shit shit shit everywhere'; 92 | const foundWords = await profanity.getCurseWords(sentence); 93 | 94 | // Should only return unique words 95 | expect(foundWords).toContain('shit'); 96 | expect(foundWords.length).toBe(1); 97 | }); 98 | 99 | it('Should validate specific English profanity words exist', async () => { 100 | // Test a selection of words that should definitely be in an English profanity list 101 | const commonProfanityWords = [ 102 | 'shit', 'fuck', 'damn', 'hell', 'ass', 'bitch' 103 | ]; 104 | 105 | for (const word of commonProfanityWords) { 106 | expect(await profanity.search(word)).toBe(true); 107 | } 108 | }); 109 | 110 | it('Should not flag common English words', async () => { 111 | const commonWords = [ 112 | 'hello', 'world', 'computer', 'test', 'function', 'javascript', 113 | 'english', 'language', 'sentence', 'word', 'clean', 'polite' 114 | ]; 115 | 116 | for (const word of commonWords) { 117 | expect(await profanity.search(word)).toBe(false); 118 | } 119 | }); 120 | 121 | it('Should handle contractions and apostrophes', async () => { 122 | const sentence = "Don't say shit, it's not appropriate."; 123 | expect(await profanity.hasCurseWords(sentence)).toBe(true); 124 | }); 125 | 126 | it('Should handle hyphenated words', async () => { 127 | const sentence = 'This is a shit-storm.'; 128 | expect(await profanity.hasCurseWords(sentence)).toBe(true); 129 | }); 130 | 131 | // Additional edge cases for better coverage 132 | it('Should handle mixed case in sentences', async () => { 133 | const sentence = 'This SENTENCE has SHIT and damn IN it'; 134 | expect(await profanity.hasCurseWords(sentence)).toBe(true); 135 | 136 | const foundWords = await profanity.getCurseWords(sentence); 137 | expect(foundWords).toContain('shit'); // Should normalize to lowercase 138 | expect(foundWords).toContain('damn'); 139 | }); 140 | 141 | it('Should handle words at sentence boundaries', async () => { 142 | expect(await profanity.hasCurseWords('shit')).toBe(true); 143 | expect(await profanity.hasCurseWords('shit is bad')).toBe(true); 144 | expect(await profanity.hasCurseWords('that is shit')).toBe(true); 145 | expect(await profanity.hasCurseWords('the shit word')).toBe(true); 146 | }); 147 | 148 | it('Should not detect partial word matches', async () => { 149 | // These should NOT be flagged as containing profanity 150 | const sentences = [ 151 | 'The weather is hellish today', // contains "hell" but as part of "hellish" 152 | 'I love my shirty shirt', // contains "shit" but as part of "shirty" 153 | 'Assessment is important', // contains "ass" but as part of "assessment" 154 | ]; 155 | 156 | for (const sentence of sentences) { 157 | // These depend on your word boundaries implementation 158 | // Comment out if your implementation flags these 159 | const result = await profanity.hasCurseWords(sentence); 160 | // You may want to adjust based on your exact implementation 161 | } 162 | }); 163 | }); 164 | 165 | describe('Performance tests for English dataset', () => { 166 | it('Should handle large English text efficiently', async () => { 167 | const largeText = 'This is a test sentence. '.repeat(1000) + 'shit ' + 'Clean text. '.repeat(1000); 168 | 169 | const startTime = Date.now(); 170 | const result = await profanity.hasCurseWords(largeText); 171 | const endTime = Date.now(); 172 | 173 | expect(result).toBe(true); 174 | expect(endTime - startTime).toBeLessThan(100); // Should complete in under 100ms 175 | }); 176 | 177 | it('Should efficiently search through all English terms', async () => { 178 | const allWords = await profanity.all(); 179 | 180 | const startTime = Date.now(); 181 | for (let i = 0; i < 100; i++) { 182 | await profanity.search(allWords[i % allWords.length]); 183 | } 184 | const endTime = Date.now(); 185 | 186 | expect(endTime - startTime).toBeLessThan(50); // Should be very fast with Set lookup 187 | }); 188 | 189 | it('Should handle concurrent operations on English dataset', async () => { 190 | const promises = [ 191 | profanity.search('shit'), 192 | profanity.hasCurseWords('this is shit'), 193 | profanity.getCurseWords('damn shit'), 194 | profanity.all(), 195 | profanity.search('fuck') 196 | ]; 197 | 198 | const results = await Promise.all(promises); 199 | expect(results[0]).toBe(true); // search shit 200 | expect(results[1]).toBe(true); // hasCurseWords 201 | expect(results[2]).toContain('shit'); // getCurseWords 202 | expect(results[3].length).toBe(958); // all words 203 | expect(results[4]).toBe(true); // search fuck 204 | }); 205 | }); 206 | 207 | describe('Data integrity for English', () => { 208 | it('Should not allow modification of English word list', async () => { 209 | const terms1 = await profanity.all(); 210 | const originalLength = terms1.length; 211 | 212 | // Try to modify the returned array 213 | terms1.push('fake-word'); 214 | terms1.pop(); 215 | terms1[0] = 'modified'; 216 | 217 | // Get terms again - should be unchanged 218 | const terms2 = await profanity.all(); 219 | expect(terms2.length).toBe(originalLength); 220 | expect(terms2).not.toContain('fake-word'); 221 | expect(terms2[0]).not.toBe('modified'); 222 | }); 223 | 224 | it('Should provide consistent results for English detection', async () => { 225 | const sentence = 'This sentence has shit and damn'; 226 | 227 | const result1 = await profanity.getCurseWords(sentence); 228 | const result2 = await profanity.getCurseWords(sentence); 229 | const result3 = await profanity.hasCurseWords(sentence); 230 | 231 | expect(result1).toEqual(result2); 232 | expect(result3).toBe(true); 233 | }); 234 | }); 235 | }); -------------------------------------------------------------------------------- /__tests__/engine.test.js: -------------------------------------------------------------------------------- 1 | import { ProfanityEngine } from '../index.js'; 2 | 3 | const language = process.env.LANGUAGE || 'en'; // Default to 'en' if the LANGUAGE environment variable is not set 4 | let profanity; 5 | 6 | describe('ProfanityEngine v3 API tests', () => { 7 | beforeAll(async () => { 8 | profanity = new ProfanityEngine({ 9 | language: 'en', 10 | testMode: true, 11 | }); 12 | }); 13 | 14 | afterEach(() => { 15 | profanity.reset(); 16 | }); 17 | 18 | describe('Core functionality', () => { 19 | it('Should return all profanity words in an array', async () => { 20 | const allWords = await profanity.all(); 21 | expect(Array.isArray(allWords)).toBe(true); 22 | expect(allWords.length).toBeGreaterThan(0); 23 | expect(allWords.length).toEqual(958); // Adjust based on your actual count 24 | }); 25 | 26 | it('Should detect profanity words correctly', async () => { 27 | expect(await profanity.search('hell')).toBe(true); 28 | expect(await profanity.search('damn')).toBe(true); 29 | }); 30 | 31 | it('Should return false for clean words', async () => { 32 | expect(await profanity.search('hello')).toBe(false); 33 | expect(await profanity.search('world')).toBe(false); 34 | expect(await profanity.search('test')).toBe(false); 35 | }); 36 | 37 | it('Should detect profanity in sentences', async () => { 38 | const sentence = 'This is a test sentence with bad words like hell and damn'; 39 | const hasProfanity = await profanity.hasCurseWords(sentence); 40 | expect(hasProfanity).toBe(true); 41 | }); 42 | 43 | it('Should return false for clean sentences', async () => { 44 | const sentence = 'This is a test sentence with no bad words'; 45 | const hasProfanity = await profanity.hasCurseWords(sentence); 46 | expect(hasProfanity).toBe(false); 47 | }); 48 | 49 | it('Should return list of found profanity words', async () => { 50 | const sentence = 'This is a test sentence with bad words like hell and damn'; 51 | const badWords = await profanity.getCurseWords(sentence); 52 | expect(badWords).toEqual(expect.arrayContaining(['hell', 'damn'])); 53 | expect(badWords).toHaveLength(2); 54 | }); 55 | 56 | it('Should return empty array if no curse words found', async () => { 57 | const sentence = 'This is a test sentence with no bad words'; 58 | const result = await profanity.getCurseWords(sentence); 59 | expect(result).toEqual([]); 60 | }); 61 | }); 62 | 63 | describe('Language fallback behavior', () => { 64 | it('Should fallback to English for unsupported languages', async () => { 65 | const unsupportedProfanity = new ProfanityEngine({ 66 | language: 'nonexistent-language', 67 | testMode: true, 68 | }); 69 | 70 | const terms = await unsupportedProfanity.all(); 71 | expect(terms.length).toEqual(958); // Should load English words 72 | }); 73 | 74 | it('Should work with supported languages', async () => { 75 | const spanishProfanity = new ProfanityEngine({ 76 | language: 'es', 77 | testMode: true, 78 | }); 79 | 80 | const terms = await spanishProfanity.all(); 81 | expect(terms.length).toBeGreaterThan(0); 82 | // Should be different from English if Spanish file exists 83 | }); 84 | }); 85 | 86 | describe('Input validation and edge cases', () => { 87 | it('Should handle empty strings gracefully', async () => { 88 | expect(await profanity.search('')).toBe(false); 89 | expect(await profanity.hasCurseWords('')).toBe(false); 90 | expect(await profanity.getCurseWords('')).toEqual([]); 91 | }); 92 | 93 | it('Should handle null/undefined inputs gracefully', async () => { 94 | expect(await profanity.search(null)).toBe(false); 95 | expect(await profanity.search(undefined)).toBe(false); 96 | expect(await profanity.hasCurseWords(null)).toBe(false); 97 | expect(await profanity.hasCurseWords(undefined)).toBe(false); 98 | expect(await profanity.getCurseWords(null)).toEqual([]); 99 | expect(await profanity.getCurseWords(undefined)).toEqual([]); 100 | }); 101 | 102 | it('Should handle non-string inputs gracefully', async () => { 103 | expect(await profanity.search(123)).toBe(false); 104 | expect(await profanity.search({})).toBe(false); 105 | expect(await profanity.search([])).toBe(false); 106 | expect(await profanity.hasCurseWords(123)).toBe(false); 107 | expect(await profanity.getCurseWords(123)).toEqual([]); 108 | }); 109 | 110 | it('Should handle punctuation correctly', async () => { 111 | const sentence = 'What the hell! Damn, that sucks.'; 112 | const result = await profanity.hasCurseWords(sentence); 113 | expect(result).toBe(true); 114 | 115 | const foundWords = await profanity.getCurseWords(sentence); 116 | expect(foundWords).toContain('hell'); 117 | expect(foundWords).toContain('damn'); 118 | }); 119 | 120 | it('Should return unique words only', async () => { 121 | const sentence = 'hell hell damn damn hell'; 122 | const badWords = await profanity.getCurseWords(sentence); 123 | expect(badWords).toHaveLength(2); 124 | expect(badWords).toEqual(expect.arrayContaining(['hell', 'damn'])); 125 | }); 126 | 127 | it('Should be case insensitive', async () => { 128 | expect(await profanity.search('HELL')).toBe(true); 129 | expect(await profanity.search('Hell')).toBe(true); 130 | expect(await profanity.search('hell')).toBe(true); 131 | 132 | const sentence = 'This has HELL and Damn in it'; 133 | expect(await profanity.hasCurseWords(sentence)).toBe(true); 134 | }); 135 | 136 | it('Should handle whitespace properly', async () => { 137 | expect(await profanity.search(' hell ')).toBe(true); 138 | expect(await profanity.search('\thell\n')).toBe(true); 139 | }); 140 | 141 | it('Should handle various punctuation marks', async () => { 142 | const testSentences = [ 143 | 'What the hell?', 144 | 'Damn!', 145 | 'Hell, no!', 146 | 'Oh-hell-no', 147 | 'hell.', 148 | 'hell,', 149 | 'hell;', 150 | 'hell:', 151 | '(hell)', 152 | '[hell]', 153 | '{hell}', 154 | '"hell"', 155 | "'hell'", 156 | ]; 157 | 158 | for (const sentence of testSentences) { 159 | expect(await profanity.hasCurseWords(sentence)).toBe(true); 160 | } 161 | }); 162 | }); 163 | 164 | describe('Data integrity and immutability', () => { 165 | it('Should not modify original terms array', async () => { 166 | const terms1 = await profanity.all(); 167 | const terms2 = await profanity.all(); 168 | 169 | terms1.push('test-word'); 170 | expect(terms2).not.toContain('test-word'); 171 | expect(terms1.length).not.toEqual(terms2.length); 172 | }); 173 | 174 | it('Should return consistent results across multiple calls', async () => { 175 | const sentence = 'This sentence has hell and damn'; 176 | 177 | const result1 = await profanity.getCurseWords(sentence); 178 | const result2 = await profanity.getCurseWords(sentence); 179 | const result3 = await profanity.hasCurseWords(sentence); 180 | 181 | expect(result1).toEqual(result2); 182 | expect(result3).toBe(true); 183 | }); 184 | 185 | it('Should maintain state after reset', async () => { 186 | // Use the profanity engine 187 | await profanity.search('hell'); 188 | expect(profanity.isInitialized).toBe(true); 189 | 190 | // Reset it 191 | profanity.reset(); 192 | expect(profanity.isInitialized).toBe(false); 193 | 194 | // Should work again after reset 195 | expect(await profanity.search('hell')).toBe(true); 196 | expect(profanity.isInitialized).toBe(true); 197 | }); 198 | }); 199 | 200 | describe('Performance and concurrency', () => { 201 | it('Should handle concurrent operations', async () => { 202 | const promises = [ 203 | profanity.search('hell'), 204 | profanity.hasCurseWords('this is hell'), 205 | profanity.getCurseWords('damn hell'), 206 | profanity.all(), 207 | profanity.search('damn') 208 | ]; 209 | 210 | const results = await Promise.all(promises); 211 | expect(results[0]).toBe(true); // search hell 212 | expect(results[1]).toBe(true); // hasCurseWords 213 | expect(results[2]).toContain('hell'); // getCurseWords 214 | expect(results[3].length).toBeGreaterThan(0); // all 215 | expect(results[4]).toBe(true); // search damn 216 | }); 217 | 218 | it('Should handle large text efficiently', async () => { 219 | const largeText = 'This is a test sentence. '.repeat(1000) + 'hell ' + 'Clean text. '.repeat(1000); 220 | 221 | const startTime = Date.now(); 222 | const result = await profanity.hasCurseWords(largeText); 223 | const endTime = Date.now(); 224 | 225 | expect(result).toBe(true); 226 | expect(endTime - startTime).toBeLessThan(100); // Should complete quickly 227 | }); 228 | 229 | it('Should initialize only once even with multiple method calls', async () => { 230 | const newProfanity = new ProfanityEngine({ 231 | language: 'en', 232 | testMode: true, 233 | }); 234 | 235 | // Multiple calls should not re-initialize 236 | await newProfanity.search('test'); 237 | await newProfanity.hasCurseWords('test'); 238 | await newProfanity.all(); 239 | 240 | expect(newProfanity.isInitialized).toBe(true); 241 | }); 242 | }); 243 | 244 | describe('Configuration options', () => { 245 | it('Should use default configuration when no config provided', () => { 246 | const defaultProfanity = new ProfanityEngine(); 247 | expect(defaultProfanity.language).toBe('en'); 248 | expect(defaultProfanity.isTestMode).toBe(false); 249 | }); 250 | 251 | it('Should handle partial configuration objects', () => { 252 | const partialProfanity = new ProfanityEngine({ language: 'es' }); 253 | expect(partialProfanity.language).toBe('es'); 254 | expect(partialProfanity.isTestMode).toBe(false); 255 | }); 256 | 257 | it('Should respect testMode setting', async () => { 258 | // Store original console.warn 259 | const originalWarn = console.warn; 260 | let warnCalled = false; 261 | 262 | // Mock console.warn 263 | console.warn = () => { 264 | warnCalled = true; 265 | }; 266 | 267 | // Test mode should suppress warnings 268 | const testProfanity = new ProfanityEngine({ 269 | language: 'nonexistent-language', 270 | testMode: true, 271 | }); 272 | 273 | warnCalled = false; 274 | await testProfanity.all(); 275 | expect(warnCalled).toBe(false); 276 | 277 | // Production mode should show warnings 278 | const prodProfanity = new ProfanityEngine({ 279 | language: 'nonexistent-language', 280 | testMode: false, 281 | }); 282 | 283 | warnCalled = false; 284 | await prodProfanity.all(); 285 | expect(warnCalled).toBe(true); 286 | 287 | // Restore original console.warn 288 | console.warn = originalWarn; 289 | }); 290 | }); 291 | }); -------------------------------------------------------------------------------- /__tests__/spanish.test.js: -------------------------------------------------------------------------------- 1 | import { ProfanityEngine } from '../index.js'; 2 | 3 | const language = process.env.LANGUAGE || 'en'; // Default to 'en' if the LANGUAGE environment variable is not set 4 | let profanity; 5 | 6 | describe('Spanish Profanity tests', () => { 7 | beforeAll(async () => { 8 | profanity = new ProfanityEngine({ 9 | language: 'es', 10 | testMode: true, 11 | }); 12 | }); 13 | 14 | afterEach(() => { 15 | profanity.reset(); 16 | }); 17 | 18 | describe('Core Spanish functionality', () => { 19 | it('Should get all the profanity words in an array', async () => { 20 | const allWords = await profanity.all(); 21 | expect(allWords.length).toEqual(564); // Verify this matches your Spanish word count 22 | expect(Array.isArray(allWords)).toBe(true); 23 | expect(allWords.length).toBeGreaterThan(0); 24 | }); 25 | 26 | it('Should return true for profanity words', async () => { 27 | const searchWord = await profanity.search('labios'); 28 | expect(searchWord).toEqual(true); 29 | }); 30 | 31 | it('Should return false for normal words', async () => { 32 | const searchWord = await profanity.search('ka'); 33 | expect(searchWord).toEqual(false); 34 | }); 35 | 36 | it('Should return false for any empty string', async () => { 37 | const searchWord = await profanity.search(''); 38 | expect(searchWord).toEqual(false); 39 | }); 40 | 41 | it('Should return true for a sentence containing a profanity word', async () => { 42 | const sentence = 'No deberías decir malas culo palabras como mierda.'; 43 | const hasCurseWords = await profanity.hasCurseWords(sentence); 44 | expect(hasCurseWords).toEqual(true); 45 | }); 46 | 47 | it('Should return false for a sentence with no profanity word', async () => { 48 | const sentence = 'Esta es una oración limpia y educada.'; 49 | const hasCurseWords = await profanity.hasCurseWords(sentence); 50 | expect(hasCurseWords).toEqual(false); 51 | }); 52 | }); 53 | 54 | describe('Spanish-specific edge cases', () => { 55 | it('Should handle Spanish accented characters', async () => { 56 | // Test words with tildes and accents (if they exist in your word list) 57 | const accentedSentence = 'No uses palabras como cabrón o pendejó.'; 58 | const result = await profanity.hasCurseWords(accentedSentence); 59 | // This will depend on whether your Spanish word list includes accented versions 60 | expect(typeof result).toBe('boolean'); 61 | }); 62 | 63 | it('Should handle case sensitivity correctly in Spanish', async () => { 64 | expect(await profanity.search('MIERDA')).toBe(true); 65 | expect(await profanity.search('Mierda')).toBe(true); 66 | expect(await profanity.search('mierda')).toBe(true); 67 | }); 68 | 69 | it('Should handle whitespace around Spanish words', async () => { 70 | expect(await profanity.search(' mierda ')).toBe(true); 71 | expect(await profanity.search('\tmierda\n')).toBe(true); 72 | }); 73 | 74 | it('Should detect Spanish profanity with punctuation', async () => { 75 | const testSentences = [ 76 | '¡Qué mierda!', 77 | 'Oh, mierda.', 78 | '¿Mierda? ¿En serio?', 79 | 'Esto es una mierda, hombre.', 80 | '"Mierda," dijo él.', 81 | 'Una mierda total.', 82 | ]; 83 | 84 | for (const sentence of testSentences) { 85 | expect(await profanity.hasCurseWords(sentence)).toBe(true); 86 | } 87 | }); 88 | 89 | it('Should return correct Spanish curse words from sentences', async () => { 90 | const sentence = 'No deberías decir malas culo palabras como mierda.'; 91 | const foundWords = await profanity.getCurseWords(sentence); 92 | 93 | // Should find both curse words 94 | expect(foundWords).toContain('culo'); 95 | expect(foundWords).toContain('mierda'); 96 | expect(foundWords.length).toBe(2); 97 | }); 98 | 99 | it('Should handle multiple instances of same Spanish word', async () => { 100 | const sentence = 'mierda mierda mierda por todas partes'; 101 | const foundWords = await profanity.getCurseWords(sentence); 102 | 103 | // Should only return unique words 104 | expect(foundWords).toContain('mierda'); 105 | expect(foundWords.length).toBe(1); 106 | }); 107 | 108 | it('Should validate specific Spanish profanity words exist', async () => { 109 | // Test common Spanish profanity words (adjust based on your actual word list) 110 | const commonSpanishProfanity = [ 111 | 'mierda', 'culo', 'cabron', 'puta', 'joder' 112 | ]; 113 | 114 | // Note: Only test words that actually exist in your Spanish word list 115 | for (const word of commonSpanishProfanity) { 116 | const result = await profanity.search(word); 117 | // We can't assert true/false without knowing your exact word list 118 | expect(typeof result).toBe('boolean'); 119 | } 120 | }); 121 | 122 | it('Should not flag common Spanish words', async () => { 123 | const commonSpanishWords = [ 124 | 'hola', 'mundo', 'computadora', 'prueba', 'función', 'javascript', 125 | 'español', 'idioma', 'oración', 'palabra', 'limpio', 'educado', 126 | 'casa', 'perro', 'gato', 'agua', 'comida', 'amor' 127 | ]; 128 | 129 | for (const word of commonSpanishWords) { 130 | expect(await profanity.search(word)).toBe(false); 131 | } 132 | }); 133 | 134 | it('Should handle Spanish contractions and apostrophes', async () => { 135 | // Spanish doesn't use contractions like English, but test similar constructs 136 | const sentence = 'No digas mierda, no es apropiado.'; 137 | expect(await profanity.hasCurseWords(sentence)).toBe(true); 138 | }); 139 | 140 | it('Should handle Spanish inverted punctuation', async () => { 141 | const sentence = '¡No digas mierda!'; 142 | expect(await profanity.hasCurseWords(sentence)).toBe(true); 143 | 144 | const sentence2 = '¿Por qué dices mierda?'; 145 | expect(await profanity.hasCurseWords(sentence2)).toBe(true); 146 | }); 147 | 148 | it('Should handle Spanish special characters', async () => { 149 | const sentence = 'La niña dijo una mierda.'; 150 | expect(await profanity.hasCurseWords(sentence)).toBe(true); 151 | }); 152 | 153 | // Additional Spanish-specific tests 154 | it('Should handle mixed case in Spanish sentences', async () => { 155 | const sentence = 'Esta ORACIÓN tiene MIERDA y culo EN ella'; 156 | expect(await profanity.hasCurseWords(sentence)).toBe(true); 157 | 158 | const foundWords = await profanity.getCurseWords(sentence); 159 | expect(foundWords).toContain('mierda'); // Should normalize to lowercase 160 | expect(foundWords).toContain('culo'); 161 | }); 162 | 163 | it('Should handle Spanish words at sentence boundaries', async () => { 164 | expect(await profanity.hasCurseWords('mierda')).toBe(true); 165 | expect(await profanity.hasCurseWords('mierda es malo')).toBe(true); 166 | expect(await profanity.hasCurseWords('eso es mierda')).toBe(true); 167 | expect(await profanity.hasCurseWords('la palabra mierda')).toBe(true); 168 | }); 169 | 170 | it('Should handle Spanish gender variations correctly', async () => { 171 | // Test if your list includes both masculine and feminine forms 172 | const sentence = 'Él es un idiota y ella es una idiota.'; 173 | const result = await profanity.hasCurseWords(sentence); 174 | expect(typeof result).toBe('boolean'); 175 | }); 176 | 177 | it('Should handle Spanish diminutives and variations', async () => { 178 | // Test common Spanish word variations if they exist in your list 179 | const variations = [ 180 | 'No seas pendejo', 181 | 'Qué pendejada', 182 | 'Está cabronísimo' 183 | ]; 184 | 185 | for (const sentence of variations) { 186 | const result = await profanity.hasCurseWords(sentence); 187 | expect(typeof result).toBe('boolean'); 188 | } 189 | }); 190 | }); 191 | 192 | describe('Performance tests for Spanish dataset', () => { 193 | it('Should handle large Spanish text efficiently', async () => { 194 | const largeText = 'Esta es una oración de prueba. '.repeat(1000) + 'mierda ' + 'Texto limpio. '.repeat(1000); 195 | 196 | const startTime = Date.now(); 197 | const result = await profanity.hasCurseWords(largeText); 198 | const endTime = Date.now(); 199 | 200 | expect(result).toBe(true); 201 | expect(endTime - startTime).toBeLessThan(100); // Should complete in under 100ms 202 | }); 203 | 204 | it('Should efficiently search through all Spanish terms', async () => { 205 | const allWords = await profanity.all(); 206 | 207 | const startTime = Date.now(); 208 | for (let i = 0; i < 100; i++) { 209 | await profanity.search(allWords[i % allWords.length]); 210 | } 211 | const endTime = Date.now(); 212 | 213 | expect(endTime - startTime).toBeLessThan(50); // Should be very fast with Set lookup 214 | }); 215 | 216 | it('Should handle concurrent operations on Spanish dataset', async () => { 217 | const promises = [ 218 | profanity.search('mierda'), 219 | profanity.hasCurseWords('esto es mierda'), 220 | profanity.getCurseWords('culo mierda'), 221 | profanity.all(), 222 | profanity.search('labios') 223 | ]; 224 | 225 | const results = await Promise.all(promises); 226 | expect(results[0]).toBe(true); // search mierda 227 | expect(results[1]).toBe(true); // hasCurseWords 228 | expect(results[2]).toContain('mierda'); // getCurseWords 229 | expect(results[3].length).toBe(564); // all Spanish words 230 | expect(results[4]).toBe(true); // search labios 231 | }); 232 | }); 233 | 234 | describe('Spanish language specificity', () => { 235 | it('Should load Spanish words correctly without falling back to English', async () => { 236 | const allWords = await profanity.all(); 237 | expect(allWords.length).toBe(564); // Should match Spanish count, not English count (958) 238 | }); 239 | 240 | it('Should detect Spanish-specific profanity that might not exist in English', async () => { 241 | // Test a word that's likely Spanish-specific 242 | const result = await profanity.search('labios'); 243 | expect(result).toBe(true); // Based on your original test 244 | }); 245 | 246 | it('Should handle regional Spanish variations', async () => { 247 | // Test words that might be offensive in some Spanish-speaking regions 248 | const regionalWords = [ 249 | 'pinche', // Mexican 250 | 'boludo', // Argentinian 251 | 'coño', // Spanish 252 | 'chingón' // Mexican 253 | ]; 254 | 255 | for (const word of regionalWords) { 256 | const result = await profanity.search(word); 257 | // Just verify it returns a boolean - depends on your word list 258 | expect(typeof result).toBe('boolean'); 259 | } 260 | }); 261 | }); 262 | 263 | describe('Data integrity for Spanish', () => { 264 | it('Should not allow modification of Spanish word list', async () => { 265 | const terms1 = await profanity.all(); 266 | const originalLength = terms1.length; 267 | 268 | // Try to modify the returned array 269 | terms1.push('palabra-falsa'); 270 | terms1.pop(); 271 | terms1[0] = 'modificado'; 272 | 273 | // Get terms again - should be unchanged 274 | const terms2 = await profanity.all(); 275 | expect(terms2.length).toBe(originalLength); 276 | expect(terms2).not.toContain('palabra-falsa'); 277 | expect(terms2[0]).not.toBe('modificado'); 278 | }); 279 | 280 | it('Should provide consistent results for Spanish detection', async () => { 281 | const sentence = 'Esta oración tiene mierda y culo'; 282 | 283 | const result1 = await profanity.getCurseWords(sentence); 284 | const result2 = await profanity.getCurseWords(sentence); 285 | const result3 = await profanity.hasCurseWords(sentence); 286 | 287 | expect(result1).toEqual(result2); 288 | expect(result3).toBe(true); 289 | }); 290 | }); 291 | }); -------------------------------------------------------------------------------- /data/en.txt: -------------------------------------------------------------------------------- 1 | 2 girls 1 cup 2 | 2g1c 3 | 4r5e 4 | 5h1t 5 | 5hit 6 | a55 7 | a_s_s 8 | acrotomophilia 9 | alabama hot pocket 10 | alaskan pipeline 11 | anal 12 | anilingus 13 | anus 14 | apeshit 15 | ar5e 16 | arrse 17 | arse 18 | arsehole 19 | ass 20 | ass-fucker 21 | ass-hat 22 | ass-pirate 23 | assbag 24 | assbandit 25 | assbanger 26 | assbite 27 | assclown 28 | asscock 29 | asscracker 30 | asses 31 | assface 32 | assfucker 33 | assfukka 34 | assgoblin 35 | asshat 36 | asshead 37 | asshole 38 | assholes 39 | asshopper 40 | assjacker 41 | asslick 42 | asslicker 43 | assmonkey 44 | assmunch 45 | assmuncher 46 | asspirate 47 | assshole 48 | asssucker 49 | asswad 50 | asswhole 51 | asswipe 52 | auto erotic 53 | autoerotic 54 | b!tch 55 | b00bs 56 | b17ch 57 | b1tch 58 | babeland 59 | baby batter 60 | baby juice 61 | ball gag 62 | ball gravy 63 | ball kicking 64 | ball licking 65 | ball sack 66 | ball sucking 67 | ballbag 68 | balls 69 | ballsack 70 | bampot 71 | bangbros 72 | bareback 73 | barely legal 74 | barenaked 75 | bastard 76 | bastardo 77 | bastinado 78 | bbw 79 | bdsm 80 | beaner 81 | beaners 82 | beastial 83 | beastiality 84 | beastility 85 | beaver cleaver 86 | beaver lips 87 | bellend 88 | bestial 89 | bestiality 90 | bi+ch 91 | biatch 92 | big black 93 | big breasts 94 | big knockers 95 | big tits 96 | bimbos 97 | birdlock 98 | bitch 99 | bitcher 100 | bitchers 101 | bitches 102 | bitchin 103 | bitching 104 | black cock 105 | blonde action 106 | blonde on blonde action 107 | bloody 108 | blow job 109 | blow your load 110 | blowjob 111 | blowjobs 112 | blue waffle 113 | blumpkin 114 | boiolas 115 | bollock 116 | bollocks 117 | bollok 118 | bollox 119 | bondage 120 | boner 121 | boob 122 | boobie 123 | boobs 124 | booobs 125 | boooobs 126 | booooobs 127 | booooooobs 128 | booty call 129 | breasts 130 | brown showers 131 | brunette action 132 | buceta 133 | bugger 134 | bukkake 135 | bulldyke 136 | bullet vibe 137 | bullshit 138 | bum 139 | bung hole 140 | bunghole 141 | bunny fucker 142 | busty 143 | butt 144 | butt-pirate 145 | buttcheeks 146 | butthole 147 | buttmunch 148 | buttplug 149 | c0ck 150 | c0cksucker 151 | camel toe 152 | camgirl 153 | camslut 154 | camwhore 155 | carpet muncher 156 | carpetmuncher 157 | cawk 158 | chinc 159 | chink 160 | choad 161 | chocolate rosebuds 162 | chode 163 | cipa 164 | circlejerk 165 | cl1t 166 | cleveland steamer 167 | clit 168 | clitface 169 | clitoris 170 | clits 171 | clover clamps 172 | clusterfuck 173 | cnut 174 | cock 175 | cock-sucker 176 | cockbite 177 | cockburger 178 | cockface 179 | cockhead 180 | cockjockey 181 | cockknoker 182 | cockmaster 183 | cockmongler 184 | cockmongruel 185 | cockmonkey 186 | cockmunch 187 | cockmuncher 188 | cocknose 189 | cocknugget 190 | cocks 191 | cockshit 192 | cocksmith 193 | cocksmoker 194 | cocksuck 195 | cocksuck 196 | cocksucked 197 | cocksucked 198 | cocksucker 199 | cocksucking 200 | cocksucks 201 | cocksuka 202 | cocksukka 203 | cok 204 | cokmuncher 205 | coksucka 206 | coochie 207 | coochy 208 | coon 209 | coons 210 | cooter 211 | coprolagnia 212 | coprophilia 213 | cornhole 214 | cox 215 | crap 216 | creampie 217 | cum 218 | cumbubble 219 | cumdumpster 220 | cumguzzler 221 | cumjockey 222 | cummer 223 | cumming 224 | cums 225 | cumshot 226 | cumslut 227 | cumtart 228 | cunilingus 229 | cunillingus 230 | cunnie 231 | cunnilingus 232 | cunt 233 | cuntface 234 | cunthole 235 | cuntlick 236 | cuntlick 237 | cuntlicker 238 | cuntlicker 239 | cuntlicking 240 | cuntlicking 241 | cuntrag 242 | cunts 243 | cyalis 244 | cyberfuc 245 | cyberfuck 246 | cyberfucked 247 | cyberfucker 248 | cyberfuckers 249 | cyberfucking 250 | d1ck 251 | dammit 252 | damn 253 | darkie 254 | date rape 255 | daterape 256 | deep throat 257 | deepthroat 258 | dendrophilia 259 | dick 260 | dickbag 261 | dickbeater 262 | dickface 263 | dickhead 264 | dickhole 265 | dickjuice 266 | dickmilk 267 | dickmonger 268 | dickslap 269 | dicksucker 270 | dickwad 271 | dickweasel 272 | dickweed 273 | dickwod 274 | dike 275 | dildo 276 | dildos 277 | dingleberries 278 | dingleberry 279 | dink 280 | dinks 281 | dipshit 282 | dirsa 283 | dirty pillows 284 | dirty sanchez 285 | dlck 286 | dog style 287 | dog-fucker 288 | doggie style 289 | doggiestyle 290 | doggin 291 | dogging 292 | doggy style 293 | doggystyle 294 | dolcett 295 | domination 296 | dominatrix 297 | dommes 298 | donkey punch 299 | donkeyribber 300 | doochbag 301 | dookie 302 | doosh 303 | double dong 304 | double penetration 305 | douche 306 | douchebag 307 | dp action 308 | dry hump 309 | duche 310 | dumb 311 | dumbshit 312 | dumshit 313 | dvda 314 | dyke 315 | eat my ass 316 | ecchi 317 | ejaculate 318 | ejaculated 319 | ejaculates 320 | ejaculating 321 | ejaculatings 322 | ejaculation 323 | ejakulate 324 | erotic 325 | erotism 326 | escort 327 | eunuch 328 | f u c k 329 | f u c k e r 330 | f4nny 331 | f_u_c_k 332 | fag 333 | fagbag 334 | fagg 335 | fagging 336 | faggit 337 | faggitt 338 | faggot 339 | faggs 340 | fagot 341 | fagots 342 | fags 343 | fagtard 344 | fanny 345 | fannyflaps 346 | fannyfucker 347 | fanyy 348 | fart 349 | farted 350 | farting 351 | farty 352 | fatass 353 | fcuk 354 | fcuker 355 | fcuking 356 | fecal 357 | feck 358 | fecker 359 | felatio 360 | felch 361 | felching 362 | fellate 363 | fellatio 364 | feltch 365 | female squirting 366 | femdom 367 | figging 368 | fingerbang 369 | fingerfuck 370 | fingerfucked 371 | fingerfucker 372 | fingerfuckers 373 | fingerfucking 374 | fingerfucks 375 | fingering 376 | fistfuck 377 | fistfucked 378 | fistfucker 379 | fistfuckers 380 | fistfucking 381 | fistfuckings 382 | fistfucks 383 | fisting 384 | flamer 385 | flange 386 | fook 387 | fooker 388 | fool 389 | foot fetish 390 | footjob 391 | frotting 392 | fuck 393 | fuck buttons 394 | fucka 395 | fucked 396 | fucker 397 | fuckers 398 | fuckhead 399 | fuckheads 400 | fuckin 401 | fucking 402 | fuckings 403 | fuckingshitmotherfucker 404 | fuckme 405 | fucks 406 | fucktards 407 | fuckwhit 408 | fuckwit 409 | fudge packer 410 | fudgepacker 411 | fuk 412 | fuker 413 | fukker 414 | fukkin 415 | fuks 416 | fukwhit 417 | fukwit 418 | futanari 419 | fux 420 | fux0r 421 | g-spot 422 | gang bang 423 | gangbang 424 | gangbanged 425 | gangbanged 426 | gangbangs 427 | gay sex 428 | gayass 429 | gaybob 430 | gaydo 431 | gaylord 432 | gaysex 433 | gaytard 434 | gaywad 435 | genitals 436 | giant cock 437 | girl on 438 | girl on top 439 | girls gone wild 440 | goatcx 441 | goatse 442 | god damn 443 | god-dam 444 | god-damned 445 | goddamn 446 | goddamned 447 | gokkun 448 | golden shower 449 | goo girl 450 | gooch 451 | goodpoop 452 | gook 453 | goregasm 454 | gringo 455 | grope 456 | group sex 457 | guido 458 | guro 459 | hand job 460 | handjob 461 | hard core 462 | hardcore 463 | hardcoresex 464 | heeb 465 | hell 466 | hentai 467 | heshe 468 | ho 469 | hoar 470 | hoare 471 | hoe 472 | hoer 473 | homo 474 | homoerotic 475 | honkey 476 | honky 477 | hooker 478 | hore 479 | horniest 480 | horny 481 | hot carl 482 | hot chick 483 | hotsex 484 | how to kill 485 | how to murder 486 | huge fat 487 | humping 488 | incest 489 | intercourse 490 | jack off 491 | jack-off 492 | jackass 493 | jackoff 494 | jail bait 495 | jailbait 496 | jap 497 | jelly donut 498 | jerk 499 | jerk off 500 | jerk-off 501 | jigaboo 502 | jiggaboo 503 | jiggerboo 504 | jism 505 | jiz 506 | jiz 507 | jizm 508 | jizm 509 | jizz 510 | juggs 511 | kawk 512 | kike 513 | kinbaku 514 | kinkster 515 | kinky 516 | kiunt 517 | knob 518 | knobbing 519 | knobead 520 | knobed 521 | knobend 522 | knobhead 523 | knobjocky 524 | knobjokey 525 | kock 526 | kondum 527 | kondums 528 | kooch 529 | kootch 530 | kum 531 | kumer 532 | kummer 533 | kumming 534 | kums 535 | kunilingus 536 | kunt 537 | kyke 538 | l3i+ch 539 | l3itch 540 | labia 541 | leather restraint 542 | leather straight jacket 543 | lemon party 544 | lesbo 545 | lezzie 546 | lmfao 547 | lolita 548 | lovemaking 549 | lust 550 | lusting 551 | m0f0 552 | m0fo 553 | m45terbate 554 | ma5terb8 555 | ma5terbate 556 | make me come 557 | male squirting 558 | masochist 559 | master-bate 560 | masterb8 561 | masterbat* 562 | masterbat3 563 | masterbate 564 | masterbation 565 | masterbations 566 | masturbate 567 | menage a trois 568 | milf 569 | minge 570 | missionary position 571 | mo-fo 572 | mof0 573 | mofo 574 | mothafuck 575 | mothafucka 576 | mothafuckas 577 | mothafuckaz 578 | mothafucked 579 | mothafucker 580 | mothafuckers 581 | mothafuckin 582 | mothafucking 583 | mothafuckings 584 | mothafucks 585 | mother fucker 586 | motherfuck 587 | motherfucked 588 | motherfucker 589 | motherfuckers 590 | motherfuckin 591 | motherfucking 592 | motherfuckings 593 | motherfuckka 594 | motherfucks 595 | mound of venus 596 | mr hands 597 | muff 598 | muff diver 599 | muffdiver 600 | muffdiving 601 | mutha 602 | muthafecker 603 | muthafuckker 604 | muther 605 | mutherfucker 606 | n1gga 607 | n1gger 608 | nambla 609 | nawashi 610 | nazi 611 | negro 612 | neonazi 613 | nig nog 614 | nigg3r 615 | nigg4h 616 | nigga 617 | niggah 618 | niggas 619 | niggaz 620 | nigger 621 | niggers 622 | niglet 623 | nimphomania 624 | nipple 625 | nipples 626 | nob 627 | nob jokey 628 | nobhead 629 | nobjocky 630 | nobjokey 631 | nsfw images 632 | nude 633 | nudity 634 | numbnuts 635 | nutsack 636 | nympho 637 | nymphomania 638 | octopussy 639 | omorashi 640 | one cup two girls 641 | one guy one jar 642 | orgasim 643 | orgasim 644 | orgasims 645 | orgasm 646 | orgasms 647 | orgy 648 | p0rn 649 | paedophile 650 | paki 651 | panooch 652 | panties 653 | panty 654 | pawn 655 | pecker 656 | peckerhead 657 | pedobear 658 | pedophile 659 | pegging 660 | penis 661 | penisfucker 662 | phone sex 663 | phonesex 664 | phuck 665 | phuk 666 | phuked 667 | phuking 668 | phukked 669 | phukking 670 | phuks 671 | phuq 672 | piece of shit 673 | pigfucker 674 | pimpis 675 | pis 676 | pises 677 | pisin 678 | pising 679 | pisof 680 | piss 681 | piss pig 682 | pissed 683 | pisser 684 | pissers 685 | pisses 686 | pissflap 687 | pissflaps 688 | pissin 689 | pissin 690 | pissing 691 | pissoff 692 | pissoff 693 | pisspig 694 | playboy 695 | pleasure chest 696 | pole smoker 697 | polesmoker 698 | pollock 699 | ponyplay 700 | poo 701 | poof 702 | poon 703 | poonani 704 | poonany 705 | poontang 706 | poop 707 | poop chute 708 | poopchute 709 | porn 710 | porno 711 | pornography 712 | pornos 713 | prick 714 | pricks 715 | prince albert piercing 716 | pron 717 | pthc 718 | pube 719 | pubes 720 | punanny 721 | punany 722 | punta 723 | pusies 724 | pusse 725 | pussi 726 | pussies 727 | pussy 728 | pussylicking 729 | pussys 730 | pusy 731 | puto 732 | queaf 733 | queef 734 | queerbait 735 | queerhole 736 | quim 737 | raghead 738 | raging boner 739 | rape 740 | raping 741 | rapist 742 | rectum 743 | renob 744 | retard 745 | reverse cowgirl 746 | rimjaw 747 | rimjob 748 | rimming 749 | rosy palm 750 | rosy palm and her 5 sisters 751 | ruski 752 | rusty trombone 753 | s hit 754 | s&m 755 | s.o.b. 756 | s_h_i_t 757 | sadism 758 | sadist 759 | santorum 760 | scat 761 | schlong 762 | scissoring 763 | screwing 764 | scroat 765 | scrote 766 | scrotum 767 | semen 768 | sex 769 | sexo 770 | sexy 771 | sh!+ 772 | sh!t 773 | sh1t 774 | shag 775 | shagger 776 | shaggin 777 | shagging 778 | shaved beaver 779 | shaved pussy 780 | shemale 781 | shi+ 782 | shibari 783 | shit 784 | shit-ass 785 | shit-bag 786 | shit-bagger 787 | shit-brain 788 | shit-breath 789 | shit-cunt 790 | shit-dick 791 | shit-eating 792 | shit-face 793 | shit-faced 794 | shit-fit 795 | shit-head 796 | shit-heel 797 | shit-hole 798 | shit-house 799 | shit-load 800 | shit-pot 801 | shit-spitter 802 | shit-stain 803 | shitass 804 | shitbag 805 | shitbagger 806 | shitblimp 807 | shitbrain 808 | shitbreath 809 | shitcunt 810 | shitdick 811 | shite 812 | shiteating 813 | shited 814 | shitey 815 | shitface 816 | shitfaced 817 | shitfit 818 | shitfuck 819 | shitfull 820 | shithead 821 | shitheel 822 | shithole 823 | shithouse 824 | shiting 825 | shitings 826 | shitload 827 | shitpot 828 | shits 829 | shitspitter 830 | shitstain 831 | shitted 832 | shitter 833 | shitters 834 | shittiest 835 | shitting 836 | shittings 837 | shitty 838 | shitty 839 | shity 840 | shiz 841 | shiznit 842 | shota 843 | shrimping 844 | skank 845 | skeet 846 | slanteye 847 | slut 848 | slutbag 849 | sluts 850 | smeg 851 | smegma 852 | smut 853 | snatch 854 | snowballing 855 | sodomize 856 | sodomy 857 | son-of-a-bitch 858 | spac 859 | spic 860 | spick 861 | splooge 862 | splooge moose 863 | spooge 864 | spread legs 865 | spunk 866 | strap on 867 | strapon 868 | strappado 869 | strip club 870 | style doggy 871 | suck 872 | sucker 873 | sucks 874 | suicide girls 875 | sultry women 876 | swastika 877 | swinger 878 | t1tt1e5 879 | t1tties 880 | tainted love 881 | tard 882 | taste my 883 | tea bagging 884 | teets 885 | teez 886 | testical 887 | testicle 888 | threesome 889 | throating 890 | thundercunt 891 | tied up 892 | tight white 893 | tit 894 | titfuck 895 | tits 896 | titt 897 | tittie5 898 | tittiefucker 899 | titties 900 | titty 901 | tittyfuck 902 | tittywank 903 | titwank 904 | tongue in a 905 | topless 906 | tosser 907 | towelhead 908 | tranny 909 | tribadism 910 | tub girl 911 | tubgirl 912 | turd 913 | tushy 914 | tw4t 915 | twat 916 | twathead 917 | twatlips 918 | twatty 919 | twink 920 | twinkie 921 | two girls one cup 922 | twunt 923 | twunter 924 | undressing 925 | upskirt 926 | urethra play 927 | urophilia 928 | v14gra 929 | v1gra 930 | va-j-j 931 | vag 932 | vagina 933 | venus mound 934 | viagra 935 | vibrator 936 | violet wand 937 | vjayjay 938 | vorarephilia 939 | voyeur 940 | vulva 941 | w00se 942 | wang 943 | wank 944 | wanker 945 | wanky 946 | wet dream 947 | wetback 948 | white power 949 | whoar 950 | whore 951 | willies 952 | willy 953 | wrapping men 954 | wrinkled starfish 955 | xrated 956 | xx 957 | xxx 958 | yaoi 959 | yellow showers 960 | yiffy 961 | zoophilia 962 | 🖕 963 | -------------------------------------------------------------------------------- /__tests__/arabic.test.js: -------------------------------------------------------------------------------- 1 | import { ProfanityEngine } from '../index.js'; 2 | 3 | const language = process.env.LANGUAGE || 'en'; // Default to 'en' if the LANGUAGE environment variable is not set 4 | let profanity; 5 | 6 | describe('Arabic Profanity tests', () => { 7 | beforeAll(async () => { 8 | profanity = new ProfanityEngine({ 9 | language: 'ar', 10 | testMode: true, 11 | }); 12 | }); 13 | 14 | afterEach(() => { 15 | profanity.reset(); 16 | }); 17 | 18 | describe('Core Arabic functionality', () => { 19 | it('Should get all the profanity words in an array', async () => { 20 | const allWords = await profanity.all(); 21 | expect(Array.isArray(allWords)).toBe(true); 22 | expect(allWords.length).toBeGreaterThan(0); 23 | // Update this number based on your actual Arabic word count 24 | // expect(allWords.length).toEqual(XXX); 25 | }); 26 | 27 | it('Should return true for Arabic profanity words', async () => { 28 | // Test with a common Arabic profanity word (if it exists in your list) 29 | // Replace 'testword' with an actual word from your Arabic list 30 | const searchWord = await profanity.search('testword'); 31 | expect(typeof searchWord).toBe('boolean'); 32 | }); 33 | 34 | it('Should return false for normal Arabic words', async () => { 35 | const normalWords = [ 36 | 'مرحبا', // Hello 37 | 'شكرا', // Thank you 38 | 'بيت', // House 39 | 'كتاب', // Book 40 | 'ماء', // Water 41 | ]; 42 | 43 | for (const word of normalWords) { 44 | expect(await profanity.search(word)).toBe(false); 45 | } 46 | }); 47 | 48 | it('Should return false for any empty string', async () => { 49 | const searchWord = await profanity.search(''); 50 | expect(searchWord).toEqual(false); 51 | }); 52 | 53 | it('Should return true for a sentence containing a profanity word', async () => { 54 | // Replace with actual Arabic sentence containing profanity from your list 55 | const sentence = 'هذه جملة تحتوي على كلمة سيئة.'; // "This sentence contains a bad word" 56 | const hasCurseWords = await profanity.hasCurseWords(sentence); 57 | expect(typeof hasCurseWords).toBe('boolean'); 58 | }); 59 | 60 | it('Should return false for a sentence with no profanity word', async () => { 61 | const sentence = 'هذه جملة نظيفة ومهذبة.'; // "This is a clean and polite sentence" 62 | const hasCurseWords = await profanity.hasCurseWords(sentence); 63 | expect(hasCurseWords).toEqual(false); 64 | }); 65 | }); 66 | 67 | describe('Arabic-specific edge cases', () => { 68 | it('Should handle Arabic diacritical marks (tashkeel)', async () => { 69 | // Test words with and without diacritical marks 70 | const testCases = [ 71 | 'كِتَابٌ', // Book with diacritics 72 | 'كتاب', // Book without diacritics 73 | 'مَرْحَبًا', // Hello with diacritics 74 | 'مرحبا', // Hello without diacritics 75 | ]; 76 | 77 | for (const word of testCases) { 78 | const result = await profanity.search(word); 79 | expect(typeof result).toBe('boolean'); 80 | } 81 | }); 82 | 83 | it('Should handle different Arabic letter forms', async () => { 84 | // Test initial, medial, final, and isolated forms 85 | const testCases = [ 86 | 'بيت', // Isolated forms 87 | 'البيت', // With definite article 88 | 'بيوت', // Plural form 89 | ]; 90 | 91 | for (const word of testCases) { 92 | const result = await profanity.search(word); 93 | expect(typeof result).toBe('boolean'); 94 | } 95 | }); 96 | 97 | it('Should handle Arabic numbers mixed with text', async () => { 98 | const sentence = 'هذا النص يحتوي على رقم ١٢٣ وكلمات عربية.'; // "This text contains number 123 and Arabic words" 99 | const result = await profanity.hasCurseWords(sentence); 100 | expect(typeof result).toBe('boolean'); 101 | }); 102 | 103 | it('Should handle mixed Arabic and English text', async () => { 104 | const mixedSentence = 'This is mixed النص العربي and English text.'; 105 | const result = await profanity.hasCurseWords(mixedSentence); 106 | expect(typeof result).toBe('boolean'); 107 | }); 108 | 109 | it('Should handle right-to-left text direction', async () => { 110 | // Arabic is read right-to-left 111 | const rtlSentence = 'النص العربي يُقرأ من اليمين إلى اليسار.'; // "Arabic text is read from right to left" 112 | const result = await profanity.hasCurseWords(rtlSentence); 113 | expect(result).toBe(false); // Should be clean text 114 | }); 115 | 116 | it('Should handle Arabic punctuation correctly', async () => { 117 | const testSentences = [ 118 | 'ما هذا؟', // What is this? 119 | 'لا، هذا خطأ!', // No, this is wrong! 120 | 'قال: "مرحبا"', // He said: "Hello" 121 | 'النص؛ والكتابة.', // Text; and writing. 122 | ]; 123 | 124 | for (const sentence of testSentences) { 125 | const result = await profanity.hasCurseWords(sentence); 126 | expect(typeof result).toBe('boolean'); 127 | } 128 | }); 129 | 130 | it('Should handle Arabic definite article (ال)', async () => { 131 | const testCases = [ 132 | 'بيت', // House 133 | 'البيت', // The house 134 | 'كتاب', // Book 135 | 'الكتاب', // The book 136 | ]; 137 | 138 | for (const word of testCases) { 139 | const result = await profanity.search(word); 140 | expect(typeof result).toBe('boolean'); 141 | } 142 | }); 143 | 144 | it('Should handle case sensitivity (Arabic has no case)', async () => { 145 | // Arabic doesn't have upper/lower case like Latin scripts 146 | const arabicWord = 'مرحبا'; 147 | const result1 = await profanity.search(arabicWord); 148 | const result2 = await profanity.search(arabicWord); 149 | expect(result1).toEqual(result2); 150 | }); 151 | 152 | it('Should handle whitespace around Arabic words', async () => { 153 | const arabicWord = 'مرحبا'; 154 | expect(await profanity.search(` ${arabicWord} `)).toBe(false); 155 | expect(await profanity.search(`\t${arabicWord}\n`)).toBe(false); 156 | }); 157 | 158 | it('Should handle Arabic word variations and roots', async () => { 159 | // Arabic words are based on root patterns 160 | const rootVariations = [ 161 | 'كتب', // Root k-t-b (to write) 162 | 'كاتب', // Writer 163 | 'مكتوب', // Written 164 | 'كتابة', // Writing 165 | ]; 166 | 167 | for (const word of rootVariations) { 168 | const result = await profanity.search(word); 169 | expect(typeof result).toBe('boolean'); 170 | } 171 | }); 172 | 173 | it('Should return unique words only in Arabic text', async () => { 174 | // Test with repeated Arabic words 175 | const sentence = 'مرحبا مرحبا مرحبا في كل مكان'; 176 | const foundWords = await profanity.getCurseWords(sentence); 177 | 178 | // Should return unique words only 179 | expect(Array.isArray(foundWords)).toBe(true); 180 | // If 'مرحبا' were a profanity word, it should appear only once 181 | }); 182 | }); 183 | 184 | describe('Performance tests for Arabic dataset', () => { 185 | it('Should handle large Arabic text efficiently', async () => { 186 | const largeText = 'هذه جملة تجريبية. '.repeat(1000) + 'النص العربي ' + 'نص نظيف. '.repeat(1000); 187 | 188 | const startTime = Date.now(); 189 | const result = await profanity.hasCurseWords(largeText); 190 | const endTime = Date.now(); 191 | 192 | expect(typeof result).toBe('boolean'); 193 | expect(endTime - startTime).toBeLessThan(100); // Should complete in under 100ms 194 | }); 195 | 196 | it('Should efficiently search through all Arabic terms', async () => { 197 | const allWords = await profanity.all(); 198 | 199 | if (allWords.length > 0) { 200 | const startTime = Date.now(); 201 | for (let i = 0; i < Math.min(100, allWords.length); i++) { 202 | await profanity.search(allWords[i % allWords.length]); 203 | } 204 | const endTime = Date.now(); 205 | 206 | expect(endTime - startTime).toBeLessThan(50); // Should be very fast with Set lookup 207 | } 208 | }); 209 | 210 | it('Should handle concurrent operations on Arabic dataset', async () => { 211 | const promises = [ 212 | profanity.search('مرحبا'), 213 | profanity.hasCurseWords('هذا نص عربي'), 214 | profanity.getCurseWords('النص العربي'), 215 | profanity.all(), 216 | profanity.search('شكرا') 217 | ]; 218 | 219 | const results = await Promise.all(promises); 220 | expect(results[0]).toBe(false); // search مرحبا (should be clean) 221 | expect(results[1]).toBe(false); // hasCurseWords (should be clean) 222 | expect(Array.isArray(results[2])).toBe(true); // getCurseWords 223 | expect(Array.isArray(results[3])).toBe(true); // all words 224 | expect(results[4]).toBe(false); // search شكرا (should be clean) 225 | }); 226 | }); 227 | 228 | describe('Arabic language specificity', () => { 229 | it('Should load Arabic words correctly or fallback to English', async () => { 230 | const allWords = await profanity.all(); 231 | expect(allWords.length).toBeGreaterThan(0); 232 | // If Arabic file doesn't exist, should fallback to English (958 words) 233 | // If Arabic file exists, should load Arabic words 234 | }); 235 | 236 | it('Should handle Arabic-specific character encoding (UTF-8)', async () => { 237 | const arabicChars = [ 238 | 'ا', 'ب', 'ت', 'ث', 'ج', 'ح', 'خ', 'د', 'ذ', 'ر', 239 | 'ز', 'س', 'ش', 'ص', 'ض', 'ط', 'ظ', 'ع', 'غ', 'ف', 240 | 'ق', 'ك', 'ل', 'م', 'ن', 'ه', 'و', 'ي' 241 | ]; 242 | 243 | for (const char of arabicChars) { 244 | const result = await profanity.search(char); 245 | expect(typeof result).toBe('boolean'); 246 | } 247 | }); 248 | 249 | it('Should handle Arabic ligatures and special characters', async () => { 250 | const specialChars = [ 251 | 'لا', // Lam-Alif ligature 252 | 'ﷲ', // Allah ligature 253 | 'ة', // Taa marbuta 254 | 'ى', // Alif maksura 255 | 'ء', // Hamza 256 | ]; 257 | 258 | for (const char of specialChars) { 259 | const result = await profanity.search(char); 260 | expect(typeof result).toBe('boolean'); 261 | } 262 | }); 263 | 264 | it('Should handle different Arabic dialects considerations', async () => { 265 | // Note: This depends on what's in your Arabic word list 266 | const dialectWords = [ 267 | 'شنو', // What (Iraqi/Gulf) 268 | 'ايش', // What (Levantine) 269 | 'ايه', // What (Egyptian) 270 | 'اشنو', // What (Moroccan) 271 | ]; 272 | 273 | for (const word of dialectWords) { 274 | const result = await profanity.search(word); 275 | expect(typeof result).toBe('boolean'); 276 | } 277 | }); 278 | }); 279 | 280 | describe('Data integrity for Arabic', () => { 281 | it('Should not allow modification of Arabic word list', async () => { 282 | const terms1 = await profanity.all(); 283 | const originalLength = terms1.length; 284 | 285 | // Try to modify the returned array 286 | terms1.push('كلمة-مزيفة'); 287 | terms1.pop(); 288 | if (terms1.length > 0) { 289 | terms1[0] = 'معدل'; 290 | } 291 | 292 | // Get terms again - should be unchanged 293 | const terms2 = await profanity.all(); 294 | expect(terms2.length).toBe(originalLength); 295 | expect(terms2).not.toContain('كلمة-مزيفة'); 296 | if (terms2.length > 0) { 297 | expect(terms2[0]).not.toBe('معدل'); 298 | } 299 | }); 300 | 301 | it('Should provide consistent results for Arabic detection', async () => { 302 | const sentence = 'هذه جملة تجريبية بالعربية'; 303 | 304 | const result1 = await profanity.getCurseWords(sentence); 305 | const result2 = await profanity.getCurseWords(sentence); 306 | const result3 = await profanity.hasCurseWords(sentence); 307 | 308 | expect(result1).toEqual(result2); 309 | expect(typeof result3).toBe('boolean'); 310 | }); 311 | }); 312 | 313 | describe('Configuration and fallback for Arabic', () => { 314 | it('Should handle missing Arabic language file gracefully', async () => { 315 | // If ar.txt doesn't exist, should fallback to English 316 | const arabicProfanity = new ProfanityEngine({ 317 | language: 'ar', 318 | testMode: true, 319 | }); 320 | 321 | const terms = await arabicProfanity.all(); 322 | expect(terms.length).toBeGreaterThan(0); 323 | }); 324 | 325 | it('Should suppress warnings in test mode for Arabic', async () => { 326 | // Store original console.warn 327 | const originalWarn = console.warn; 328 | let warnCalled = false; 329 | 330 | // Mock console.warn 331 | console.warn = () => { 332 | warnCalled = true; 333 | }; 334 | 335 | const arabicProfanity = new ProfanityEngine({ 336 | language: 'ar', 337 | testMode: true, 338 | }); 339 | 340 | warnCalled = false; 341 | await arabicProfanity.all(); 342 | expect(warnCalled).toBe(false); 343 | 344 | // Restore original console.warn 345 | console.warn = originalWarn; 346 | }); 347 | }); 348 | }); -------------------------------------------------------------------------------- /__tests__/french.test.js: -------------------------------------------------------------------------------- 1 | import { ProfanityEngine } from '../index.js'; 2 | 3 | const language = process.env.LANGUAGE || 'en'; // Default to 'en' if the LANGUAGE environment variable is not set 4 | let profanity; 5 | 6 | describe('French Profanity tests', () => { 7 | beforeAll(async () => { 8 | profanity = new ProfanityEngine({ 9 | language: 'fr', 10 | testMode: true, 11 | }); 12 | }); 13 | 14 | afterEach(() => { 15 | profanity.reset(); 16 | }); 17 | 18 | describe('Core French functionality', () => { 19 | it('Should get all the profanity words in an array', async () => { 20 | const allWords = await profanity.all(); 21 | expect(Array.isArray(allWords)).toBe(true); 22 | expect(allWords.length).toBeGreaterThan(0); 23 | // Update this number based on your actual French word count 24 | // expect(allWords.length).toEqual(XXX); 25 | }); 26 | 27 | it('Should return true for French profanity words', async () => { 28 | // Test with a common French profanity word (if it exists in your list) 29 | // Replace 'testword' with an actual word from your French list 30 | const searchWord = await profanity.search('testword'); 31 | expect(typeof searchWord).toBe('boolean'); 32 | }); 33 | 34 | it('Should return false for normal French words', async () => { 35 | const normalWords = [ 36 | 'bonjour', // Hello 37 | 'merci', // Thank you 38 | 'maison', // House 39 | 'livre', // Book 40 | 'eau', // Water 41 | 'français', // French 42 | 'ordinateur', // Computer 43 | 'famille', // Family 44 | ]; 45 | 46 | for (const word of normalWords) { 47 | expect(await profanity.search(word)).toBe(false); 48 | } 49 | }); 50 | 51 | it('Should return false for any empty string', async () => { 52 | const searchWord = await profanity.search(''); 53 | expect(searchWord).toEqual(false); 54 | }); 55 | 56 | it('Should return true for a sentence containing a profanity word', async () => { 57 | // Replace with actual French sentence containing profanity from your list 58 | const sentence = 'Cette phrase contient un mot inapproprié.'; // "This sentence contains an inappropriate word" 59 | const hasCurseWords = await profanity.hasCurseWords(sentence); 60 | expect(typeof hasCurseWords).toBe('boolean'); 61 | }); 62 | 63 | it('Should return false for a sentence with no profanity word', async () => { 64 | const sentence = 'Cette phrase est propre et polie.'; // "This sentence is clean and polite" 65 | const hasCurseWords = await profanity.hasCurseWords(sentence); 66 | expect(hasCurseWords).toEqual(false); 67 | }); 68 | }); 69 | 70 | describe('French-specific edge cases', () => { 71 | it('Should handle French accented characters', async () => { 72 | // Test words with various French accents 73 | const accentedWords = [ 74 | 'café', // é 75 | 'hôtel', // ô 76 | 'être', // ê 77 | 'français', // ç 78 | 'naïf', // ï 79 | 'où', // ù 80 | 'âge', // â 81 | 'élève', // è 82 | ]; 83 | 84 | for (const word of accentedWords) { 85 | const result = await profanity.search(word); 86 | expect(typeof result).toBe('boolean'); 87 | } 88 | }); 89 | 90 | it('Should handle French case sensitivity correctly', async () => { 91 | const testWord = 'bonjour'; 92 | expect(await profanity.search('BONJOUR')).toBe(false); 93 | expect(await profanity.search('Bonjour')).toBe(false); 94 | expect(await profanity.search('bonjour')).toBe(false); 95 | expect(await profanity.search('bOnJoUr')).toBe(false); 96 | }); 97 | 98 | it('Should handle French apostrophes and contractions', async () => { 99 | const testSentences = [ 100 | "C'est une phrase.", // It's a sentence 101 | "L'ordinateur est cassé.", // The computer is broken 102 | "D'accord avec vous.", // I agree with you 103 | "Qu'est-ce que c'est?", // What is it? 104 | "N'importe quoi!", // Whatever! 105 | "J'ai mangé.", // I ate 106 | ]; 107 | 108 | for (const sentence of testSentences) { 109 | const result = await profanity.hasCurseWords(sentence); 110 | expect(typeof result).toBe('boolean'); 111 | } 112 | }); 113 | 114 | it('Should handle French punctuation correctly', async () => { 115 | const testSentences = [ 116 | 'Qu\'est-ce que c\'est ?', // What is it? (French spacing before ?) 117 | 'Bonjour !', // Hello! (French spacing before !) 118 | 'Non, merci.', // No, thank you. 119 | 'Il a dit : « Bonjour »', // He said: "Hello" (French quotes) 120 | 'C\'est vrai ; vraiment.', // It's true; really. 121 | ]; 122 | 123 | for (const sentence of testSentences) { 124 | const result = await profanity.hasCurseWords(sentence); 125 | expect(typeof result).toBe('boolean'); 126 | } 127 | }); 128 | 129 | it('Should handle French gender variations', async () => { 130 | // Test masculine and feminine forms 131 | const genderPairs = [ 132 | ['acteur', 'actrice'], // actor/actress 133 | ['chanteur', 'chanteuse'], // singer (m/f) 134 | ['directeur', 'directrice'], // director (m/f) 135 | ['français', 'française'], // French (m/f) 136 | ]; 137 | 138 | for (const [masculine, feminine] of genderPairs) { 139 | expect(await profanity.search(masculine)).toBe(false); 140 | expect(await profanity.search(feminine)).toBe(false); 141 | } 142 | }); 143 | 144 | it('Should handle French plural forms', async () => { 145 | const singularPlural = [ 146 | ['livre', 'livres'], // book/books 147 | ['maison', 'maisons'], // house/houses 148 | ['animal', 'animaux'], // animal/animals (irregular) 149 | ['eau', 'eaux'], // water/waters (irregular) 150 | ]; 151 | 152 | for (const [singular, plural] of singularPlural) { 153 | expect(await profanity.search(singular)).toBe(false); 154 | expect(await profanity.search(plural)).toBe(false); 155 | } 156 | }); 157 | 158 | it('Should handle French verb conjugations', async () => { 159 | // Test different verb forms 160 | const verbForms = [ 161 | 'parler', // to speak (infinitive) 162 | 'parle', // I/he speaks 163 | 'parles', // you speak 164 | 'parlons', // we speak 165 | 'parlez', // you (plural) speak 166 | 'parlent', // they speak 167 | ]; 168 | 169 | for (const verb of verbForms) { 170 | const result = await profanity.search(verb); 171 | expect(typeof result).toBe('boolean'); 172 | } 173 | }); 174 | 175 | it('Should handle French liaison and elision', async () => { 176 | const testSentences = [ 177 | 'Les enfants', // Liaison: les_enfants 178 | 'Un homme', // Liaison: un_homme 179 | 'L\'ami', // Elision: l'ami (not le ami) 180 | 'D\'eau', // Elision: d'eau (not de eau) 181 | ]; 182 | 183 | for (const sentence of testSentences) { 184 | const result = await profanity.hasCurseWords(sentence); 185 | expect(typeof result).toBe('boolean'); 186 | } 187 | }); 188 | 189 | it('Should handle whitespace around French words', async () => { 190 | const frenchWord = 'bonjour'; 191 | expect(await profanity.search(` ${frenchWord} `)).toBe(false); 192 | expect(await profanity.search(`\t${frenchWord}\n`)).toBe(false); 193 | }); 194 | 195 | it('Should handle French hyphenated words', async () => { 196 | const hyphenatedWords = [ 197 | 'c\'est-à-dire', // that is to say 198 | 'peut-être', // maybe 199 | 'moi-même', // myself 200 | 'quelqu\'un', // someone 201 | 'rendez-vous', // appointment 202 | ]; 203 | 204 | for (const word of hyphenatedWords) { 205 | const result = await profanity.search(word); 206 | expect(typeof result).toBe('boolean'); 207 | } 208 | }); 209 | 210 | it('Should return unique words only in French text', async () => { 211 | // Test with repeated French words 212 | const sentence = 'bonjour bonjour bonjour partout'; 213 | const foundWords = await profanity.getCurseWords(sentence); 214 | 215 | // Should return unique words only 216 | expect(Array.isArray(foundWords)).toBe(true); 217 | // If 'bonjour' were a profanity word, it should appear only once 218 | }); 219 | 220 | it('Should handle mixed French and English text', async () => { 221 | const mixedSentence = 'This is mixed avec du français and English text.'; 222 | const result = await profanity.hasCurseWords(mixedSentence); 223 | expect(typeof result).toBe('boolean'); 224 | }); 225 | 226 | it('Should handle French regional variations', async () => { 227 | // Test words that might vary between French regions 228 | const regionalWords = [ 229 | 'chocolatine', // Pain au chocolat (Southwest France) 230 | 'septante', // Seventy (Belgium/Switzerland) 231 | 'nonante', // Ninety (Belgium/Switzerland) 232 | 'tantôt', // Later (Quebec/Belgium) 233 | ]; 234 | 235 | for (const word of regionalWords) { 236 | const result = await profanity.search(word); 237 | expect(typeof result).toBe('boolean'); 238 | } 239 | }); 240 | }); 241 | 242 | describe('Performance tests for French dataset', () => { 243 | it('Should handle large French text efficiently', async () => { 244 | const largeText = 'Ceci est une phrase de test. '.repeat(1000) + 'texte français ' + 'Texte propre. '.repeat(1000); 245 | 246 | const startTime = Date.now(); 247 | const result = await profanity.hasCurseWords(largeText); 248 | const endTime = Date.now(); 249 | 250 | expect(typeof result).toBe('boolean'); 251 | expect(endTime - startTime).toBeLessThan(100); // Should complete in under 100ms 252 | }); 253 | 254 | it('Should efficiently search through all French terms', async () => { 255 | const allWords = await profanity.all(); 256 | 257 | if (allWords.length > 0) { 258 | const startTime = Date.now(); 259 | for (let i = 0; i < Math.min(100, allWords.length); i++) { 260 | await profanity.search(allWords[i % allWords.length]); 261 | } 262 | const endTime = Date.now(); 263 | 264 | expect(endTime - startTime).toBeLessThan(50); // Should be very fast with Set lookup 265 | } 266 | }); 267 | 268 | it('Should handle concurrent operations on French dataset', async () => { 269 | const promises = [ 270 | profanity.search('bonjour'), 271 | profanity.hasCurseWords('ceci est du texte français'), 272 | profanity.getCurseWords('le texte français'), 273 | profanity.all(), 274 | profanity.search('merci') 275 | ]; 276 | 277 | const results = await Promise.all(promises); 278 | expect(results[0]).toBe(false); // search bonjour (should be clean) 279 | expect(results[1]).toBe(false); // hasCurseWords (should be clean) 280 | expect(Array.isArray(results[2])).toBe(true); // getCurseWords 281 | expect(Array.isArray(results[3])).toBe(true); // all words 282 | expect(results[4]).toBe(false); // search merci (should be clean) 283 | }); 284 | }); 285 | 286 | describe('French language specificity', () => { 287 | it('Should load French words correctly or fallback to English', async () => { 288 | const allWords = await profanity.all(); 289 | expect(allWords.length).toBeGreaterThan(0); 290 | // If French file doesn't exist, should fallback to English (958 words) 291 | // If French file exists, should load French words 292 | }); 293 | 294 | it('Should handle French-specific character encoding (UTF-8)', async () => { 295 | const frenchChars = [ 296 | 'à', 'â', 'ä', 'ç', 'è', 'é', 'ê', 'ë', 297 | 'î', 'ï', 'ô', 'ù', 'û', 'ü', 'ÿ', 'ñ' 298 | ]; 299 | 300 | for (const char of frenchChars) { 301 | const result = await profanity.search(char); 302 | expect(typeof result).toBe('boolean'); 303 | } 304 | }); 305 | 306 | it('Should handle French quotation marks and typography', async () => { 307 | const typographyTests = [ 308 | '« guillemets français »', // French quotes 309 | '"guillemets anglais"', // English quotes 310 | 'apostrophe courbe', // Curved apostrophe 311 | 'apostrophe droite', // Straight apostrophe 312 | '— tiret cadratin', // Em dash 313 | '– tiret demi-cadratin', // En dash 314 | ]; 315 | 316 | for (const text of typographyTests) { 317 | const result = await profanity.hasCurseWords(text); 318 | expect(typeof result).toBe('boolean'); 319 | } 320 | }); 321 | 322 | it('Should handle French Canadian (Quebec) variations', async () => { 323 | // Quebec French often has different vocabulary 324 | const quebecWords = [ 325 | 'char', // Car (instead of voiture) 326 | 'blonde', // Girlfriend (instead of copine) 327 | 'magasiner', // To shop (instead of faire du shopping) 328 | 'déjeuner', // Breakfast (lunch in France) 329 | ]; 330 | 331 | for (const word of quebecWords) { 332 | const result = await profanity.search(word); 333 | expect(typeof result).toBe('boolean'); 334 | } 335 | }); 336 | }); 337 | 338 | describe('Data integrity for French', () => { 339 | it('Should not allow modification of French word list', async () => { 340 | const terms1 = await profanity.all(); 341 | const originalLength = terms1.length; 342 | 343 | // Try to modify the returned array 344 | terms1.push('mot-faux'); 345 | terms1.pop(); 346 | if (terms1.length > 0) { 347 | terms1[0] = 'modifié'; 348 | } 349 | 350 | // Get terms again - should be unchanged 351 | const terms2 = await profanity.all(); 352 | expect(terms2.length).toBe(originalLength); 353 | expect(terms2).not.toContain('mot-faux'); 354 | if (terms2.length > 0) { 355 | expect(terms2[0]).not.toBe('modifié'); 356 | } 357 | }); 358 | 359 | it('Should provide consistent results for French detection', async () => { 360 | const sentence = 'Cette phrase est en français'; 361 | 362 | const result1 = await profanity.getCurseWords(sentence); 363 | const result2 = await profanity.getCurseWords(sentence); 364 | const result3 = await profanity.hasCurseWords(sentence); 365 | 366 | expect(result1).toEqual(result2); 367 | expect(typeof result3).toBe('boolean'); 368 | }); 369 | }); 370 | 371 | describe('Configuration and fallback for French', () => { 372 | it('Should handle missing French language file gracefully', async () => { 373 | // If fr.txt doesn't exist, should fallback to English 374 | const frenchProfanity = new ProfanityEngine({ 375 | language: 'fr', 376 | testMode: true, 377 | }); 378 | 379 | const terms = await frenchProfanity.all(); 380 | expect(terms.length).toBeGreaterThan(0); 381 | }); 382 | 383 | it('Should suppress warnings in test mode for French', async () => { 384 | // Store original console.warn 385 | const originalWarn = console.warn; 386 | let warnCalled = false; 387 | 388 | // Mock console.warn 389 | console.warn = () => { 390 | warnCalled = true; 391 | }; 392 | 393 | const frenchProfanity = new ProfanityEngine({ 394 | language: 'fr', 395 | testMode: true, 396 | }); 397 | 398 | warnCalled = false; 399 | await frenchProfanity.all(); 400 | expect(warnCalled).toBe(false); 401 | 402 | // Restore original console.warn 403 | console.warn = originalWarn; 404 | }); 405 | }); 406 | 407 | describe('French grammar and linguistics', () => { 408 | it('Should handle French articles and determiners', async () => { 409 | const articles = [ 410 | 'le', 'la', 'les', // Definite articles 411 | 'un', 'une', 'des', // Indefinite articles 412 | 'du', 'de la', 'des', // Partitive articles 413 | 'ce', 'cette', 'ces', // Demonstrative 414 | ]; 415 | 416 | for (const article of articles) { 417 | expect(await profanity.search(article)).toBe(false); 418 | } 419 | }); 420 | 421 | it('Should handle French prepositions', async () => { 422 | const prepositions = [ 423 | 'de', 'à', 'dans', 'sur', 'avec', 'pour', 424 | 'par', 'sans', 'sous', 'vers', 'chez' 425 | ]; 426 | 427 | for (const prep of prepositions) { 428 | expect(await profanity.search(prep)).toBe(false); 429 | } 430 | }); 431 | 432 | it('Should handle French reflexive pronouns', async () => { 433 | const reflexiveTests = [ 434 | 'Je me lave', // I wash myself 435 | 'Tu te dépêches', // You hurry 436 | 'Il se réveille', // He wakes up 437 | 'Nous nous amusons', // We have fun 438 | ]; 439 | 440 | for (const sentence of reflexiveTests) { 441 | const result = await profanity.hasCurseWords(sentence); 442 | expect(result).toBe(false); // Should be clean 443 | } 444 | }); 445 | }); 446 | }); -------------------------------------------------------------------------------- /__tests__/chinese.test.js: -------------------------------------------------------------------------------- 1 | import { ProfanityEngine } from '../index.js'; 2 | 3 | const language = process.env.LANGUAGE || 'en'; // Default to 'en' if the LANGUAGE environment variable is not set 4 | let profanity; 5 | 6 | describe('Chinese (Mandarin) Profanity tests', () => { 7 | beforeAll(async () => { 8 | profanity = new ProfanityEngine({ 9 | language: 'zh', 10 | testMode: true, 11 | }); 12 | }); 13 | 14 | afterEach(() => { 15 | profanity.reset(); 16 | }); 17 | 18 | describe('Core Chinese functionality', () => { 19 | it('Should get all the profanity words in an array', async () => { 20 | const allWords = await profanity.all(); 21 | expect(Array.isArray(allWords)).toBe(true); 22 | expect(allWords.length).toBeGreaterThan(0); 23 | // Update this number based on your actual Chinese word count 24 | // expect(allWords.length).toEqual(XXX); 25 | }); 26 | 27 | it('Should return true for Chinese profanity words', async () => { 28 | // Test with a common Chinese profanity word (if it exists in your list) 29 | // Replace 'testword' with an actual word from your Chinese list 30 | const searchWord = await profanity.search('testword'); 31 | expect(typeof searchWord).toBe('boolean'); 32 | }); 33 | 34 | it('Should return false for normal Chinese words', async () => { 35 | const normalWords = [ 36 | '你好', // Hello (nǐ hǎo) 37 | '谢谢', // Thank you (xiè xiè) 38 | '房子', // House (fáng zi) 39 | '书', // Book (shū) 40 | '水', // Water (shuǐ) 41 | '中文', // Chinese language (zhōng wén) 42 | '电脑', // Computer (diàn nǎo) 43 | '家庭', // Family (jiā tíng) 44 | '朋友', // Friend (péng yǒu) 45 | '学校', // School (xué xiào) 46 | ]; 47 | 48 | for (const word of normalWords) { 49 | expect(await profanity.search(word)).toBe(false); 50 | } 51 | }); 52 | 53 | it('Should return false for any empty string', async () => { 54 | const searchWord = await profanity.search(''); 55 | expect(searchWord).toEqual(false); 56 | }); 57 | 58 | it('Should return true for a sentence containing a profanity word', async () => { 59 | // Replace with actual Chinese sentence containing profanity from your list 60 | const sentence = '这个句子包含不当词汇。'; // "This sentence contains inappropriate words" 61 | const hasCurseWords = await profanity.hasCurseWords(sentence); 62 | expect(typeof hasCurseWords).toBe('boolean'); 63 | }); 64 | 65 | it('Should return false for a sentence with no profanity word', async () => { 66 | const sentence = '这是一个干净礼貌的句子。'; // "This is a clean and polite sentence" 67 | const hasCurseWords = await profanity.hasCurseWords(sentence); 68 | expect(hasCurseWords).toEqual(false); 69 | }); 70 | }); 71 | 72 | describe('Chinese-specific edge cases', () => { 73 | it('Should handle simplified vs traditional Chinese characters', async () => { 74 | // Test pairs of simplified and traditional characters 75 | const characterPairs = [ 76 | ['学', '學'], // Study (simplified vs traditional) 77 | ['国', '國'], // Country (simplified vs traditional) 78 | ['电', '電'], // Electric (simplified vs traditional) 79 | ['书', '書'], // Book (simplified vs traditional) 80 | ['车', '車'], // Vehicle (simplified vs traditional) 81 | ['语', '語'], // Language (simplified vs traditional) 82 | ]; 83 | 84 | for (const [simplified, traditional] of characterPairs) { 85 | const result1 = await profanity.search(simplified); 86 | const result2 = await profanity.search(traditional); 87 | expect(typeof result1).toBe('boolean'); 88 | expect(typeof result2).toBe('boolean'); 89 | // Both should be clean words 90 | expect(result1).toBe(false); 91 | expect(result2).toBe(false); 92 | } 93 | }); 94 | 95 | it('Should handle Chinese without word boundaries', async () => { 96 | // Chinese doesn't use spaces between words 97 | const continuousText = '我今天去学校学习中文很开心'; // "I went to school today to study Chinese and was very happy" 98 | const result = await profanity.hasCurseWords(continuousText); 99 | expect(result).toBe(false); // Should be clean text 100 | }); 101 | 102 | it('Should handle single Chinese characters', async () => { 103 | const singleChars = [ 104 | '我', // I/me 105 | '你', // You 106 | '他', // He 107 | '好', // Good 108 | '大', // Big 109 | '小', // Small 110 | '人', // Person 111 | '天', // Day/sky 112 | ]; 113 | 114 | for (const char of singleChars) { 115 | expect(await profanity.search(char)).toBe(false); 116 | } 117 | }); 118 | 119 | it('Should handle Chinese compound words', async () => { 120 | const compoundWords = [ 121 | '电脑', // Computer (electric + brain) 122 | '火车', // Train (fire + vehicle) 123 | '飞机', // Airplane (fly + machine) 124 | '手机', // Cell phone (hand + machine) 125 | '汽车', // Car (steam + vehicle) 126 | '电视', // Television (electric + vision) 127 | ]; 128 | 129 | for (const word of compoundWords) { 130 | expect(await profanity.search(word)).toBe(false); 131 | } 132 | }); 133 | 134 | it('Should handle Chinese numbers and mixed content', async () => { 135 | const mixedContent = [ 136 | '我有3本书', // I have 3 books 137 | '今天是2024年', // Today is 2024 138 | '电话号码123456', // Phone number 123456 139 | '第1章', // Chapter 1 140 | '100元', // 100 yuan 141 | ]; 142 | 143 | for (const text of mixedContent) { 144 | const result = await profanity.hasCurseWords(text); 145 | expect(typeof result).toBe('boolean'); 146 | } 147 | }); 148 | 149 | it('Should handle Chinese punctuation', async () => { 150 | const testSentences = [ 151 | '你好!', // Hello! 152 | '你好吗?', // How are you? 153 | '是的,我知道。', // Yes, I know. 154 | '他说:"你好"', // He said: "Hello" 155 | '学习、工作、生活', // Study, work, life 156 | '这是...很好', // This is... very good 157 | ]; 158 | 159 | for (const sentence of testSentences) { 160 | const result = await profanity.hasCurseWords(sentence); 161 | expect(typeof result).toBe('boolean'); 162 | } 163 | }); 164 | 165 | it('Should handle Chinese measure words (classifiers)', async () => { 166 | const measureWords = [ 167 | '一本书', // One book (classifier: 本) 168 | '两个人', // Two people (classifier: 个) 169 | '三只猫', // Three cats (classifier: 只) 170 | '四辆车', // Four cars (classifier: 辆) 171 | '五张纸', // Five sheets of paper (classifier: 张) 172 | ]; 173 | 174 | for (const phrase of measureWords) { 175 | expect(await profanity.hasCurseWords(phrase)).toBe(false); 176 | } 177 | }); 178 | 179 | it('Should handle Chinese tone marks in pinyin (if applicable)', async () => { 180 | // If your system processes pinyin alongside Chinese characters 181 | const pinyinWords = [ 182 | 'nǐ hǎo', // Hello 183 | 'xiè xiè', // Thank you 184 | 'duì bù qǐ', // Sorry 185 | 'zài jiàn', // Goodbye 186 | ]; 187 | 188 | for (const pinyin of pinyinWords) { 189 | const result = await profanity.search(pinyin); 190 | expect(typeof result).toBe('boolean'); 191 | } 192 | }); 193 | 194 | it('Should handle whitespace around Chinese characters', async () => { 195 | const chineseWord = '你好'; 196 | expect(await profanity.search(` ${chineseWord} `)).toBe(false); 197 | expect(await profanity.search(`\t${chineseWord}\n`)).toBe(false); 198 | }); 199 | 200 | it('Should handle mixed Chinese and English text', async () => { 201 | const mixedSentences = [ 202 | 'I love 中文', // I love Chinese 203 | '这是English和中文的混合', // This is a mix of English and Chinese 204 | 'Hello 世界', // Hello world 205 | '我在学习programming', // I am learning programming 206 | ]; 207 | 208 | for (const sentence of mixedSentences) { 209 | const result = await profanity.hasCurseWords(sentence); 210 | expect(typeof result).toBe('boolean'); 211 | } 212 | }); 213 | 214 | it('Should return unique words only in Chinese text', async () => { 215 | // Test with repeated Chinese words 216 | const sentence = '你好你好你好世界'; 217 | const foundWords = await profanity.getCurseWords(sentence); 218 | 219 | // Should return unique words only 220 | expect(Array.isArray(foundWords)).toBe(true); 221 | // If '你好' were a profanity word, it should appear only once 222 | }); 223 | 224 | it('Should handle Chinese regional variations', async () => { 225 | // Different Chinese-speaking regions may have different vocabulary 226 | const regionalWords = [ 227 | '出租车', // Taxi (Mainland) 228 | '计程车', // Taxi (Taiwan) 229 | '的士', // Taxi (Hong Kong) 230 | '垃圾', // Garbage (Mainland) 231 | '废物', // Waste (General) 232 | ]; 233 | 234 | for (const word of regionalWords) { 235 | const result = await profanity.search(word); 236 | expect(typeof result).toBe('boolean'); 237 | } 238 | }); 239 | }); 240 | 241 | describe('Performance tests for Chinese dataset', () => { 242 | it('Should handle large Chinese text efficiently', async () => { 243 | const largeText = '这是一个测试句子。'.repeat(1000) + '中文文本 ' + '干净的文本。'.repeat(1000); 244 | 245 | const startTime = Date.now(); 246 | const result = await profanity.hasCurseWords(largeText); 247 | const endTime = Date.now(); 248 | 249 | expect(typeof result).toBe('boolean'); 250 | expect(endTime - startTime).toBeLessThan(100); // Should complete in under 100ms 251 | }); 252 | 253 | it('Should efficiently search through all Chinese terms', async () => { 254 | const allWords = await profanity.all(); 255 | 256 | if (allWords.length > 0) { 257 | const startTime = Date.now(); 258 | for (let i = 0; i < Math.min(100, allWords.length); i++) { 259 | await profanity.search(allWords[i % allWords.length]); 260 | } 261 | const endTime = Date.now(); 262 | 263 | expect(endTime - startTime).toBeLessThan(50); // Should be very fast with Set lookup 264 | } 265 | }); 266 | 267 | it('Should handle concurrent operations on Chinese dataset', async () => { 268 | const promises = [ 269 | profanity.search('你好'), 270 | profanity.hasCurseWords('这是中文文本'), 271 | profanity.getCurseWords('中文文本'), 272 | profanity.all(), 273 | profanity.search('谢谢') 274 | ]; 275 | 276 | const results = await Promise.all(promises); 277 | expect(results[0]).toBe(false); // search 你好 (should be clean) 278 | expect(results[1]).toBe(false); // hasCurseWords (should be clean) 279 | expect(Array.isArray(results[2])).toBe(true); // getCurseWords 280 | expect(Array.isArray(results[3])).toBe(true); // all words 281 | expect(results[4]).toBe(false); // search 谢谢 (should be clean) 282 | }); 283 | }); 284 | 285 | describe('Chinese language specificity', () => { 286 | it('Should load Chinese words correctly or fallback to English', async () => { 287 | const allWords = await profanity.all(); 288 | expect(allWords.length).toBeGreaterThan(0); 289 | // If Chinese file doesn't exist, should fallback to English (958 words) 290 | // If Chinese file exists, should load Chinese words 291 | }); 292 | 293 | it('Should handle Chinese character encoding (UTF-8)', async () => { 294 | // Test various Chinese character ranges 295 | const chineseChars = [ 296 | '一', '二', '三', '四', '五', // Numbers 297 | '人', '大', '小', '中', '国', // Common characters 298 | '學', '國', '語', '電', '車', // Traditional characters 299 | '龍', '鳳', '麒', '麟', '龜', // Complex characters 300 | ]; 301 | 302 | for (const char of chineseChars) { 303 | const result = await profanity.search(char); 304 | expect(typeof result).toBe('boolean'); 305 | } 306 | }); 307 | 308 | it('Should handle Chinese internet slang and abbreviated forms', async () => { 309 | // Common Chinese internet abbreviations and slang 310 | const internetSlang = [ 311 | '886', // Bye bye (sounds like "bā bā liù") 312 | '520', // I love you (sounds like "wǒ ài nǐ") 313 | '88', // Bye bye 314 | '233', // LOL (from emoticon) 315 | '666', // Awesome/cool 316 | ]; 317 | 318 | for (const slang of internetSlang) { 319 | const result = await profanity.search(slang); 320 | expect(typeof result).toBe('boolean'); 321 | } 322 | }); 323 | 324 | it('Should handle Chinese variant characters and fonts', async () => { 325 | // Some characters have multiple valid forms 326 | const variants = [ 327 | ['关', '關'], // Close (simplified vs traditional) 328 | ['门', '門'], // Door (simplified vs traditional) 329 | ['时', '時'], // Time (simplified vs traditional) 330 | ['长', '長'], // Long (simplified vs traditional) 331 | ]; 332 | 333 | for (const [simplified, traditional] of variants) { 334 | expect(await profanity.search(simplified)).toBe(false); 335 | expect(await profanity.search(traditional)).toBe(false); 336 | } 337 | }); 338 | 339 | it('Should handle Chinese homophone considerations', async () => { 340 | // Chinese has many homophones (same pronunciation, different characters) 341 | const homophones = [ 342 | ['时', '石', '是'], // shí - time, stone, is 343 | ['他', '她', '它'], // tā - he, she, it 344 | ['在', '再'], // zài - at/in, again 345 | ]; 346 | 347 | for (const group of homophones) { 348 | for (const char of group) { 349 | const result = await profanity.search(char); 350 | expect(typeof result).toBe('boolean'); 351 | } 352 | } 353 | }); 354 | }); 355 | 356 | describe('Data integrity for Chinese', () => { 357 | it('Should not allow modification of Chinese word list', async () => { 358 | const terms1 = await profanity.all(); 359 | const originalLength = terms1.length; 360 | 361 | // Try to modify the returned array 362 | terms1.push('假词'); 363 | terms1.pop(); 364 | if (terms1.length > 0) { 365 | terms1[0] = '修改'; 366 | } 367 | 368 | // Get terms again - should be unchanged 369 | const terms2 = await profanity.all(); 370 | expect(terms2.length).toBe(originalLength); 371 | expect(terms2).not.toContain('假词'); 372 | if (terms2.length > 0) { 373 | expect(terms2[0]).not.toBe('修改'); 374 | } 375 | }); 376 | 377 | it('Should provide consistent results for Chinese detection', async () => { 378 | const sentence = '这个句子是中文的'; 379 | 380 | const result1 = await profanity.getCurseWords(sentence); 381 | const result2 = await profanity.getCurseWords(sentence); 382 | const result3 = await profanity.hasCurseWords(sentence); 383 | 384 | expect(result1).toEqual(result2); 385 | expect(typeof result3).toBe('boolean'); 386 | }); 387 | }); 388 | 389 | describe('Configuration and fallback for Chinese', () => { 390 | it('Should handle missing Chinese language file gracefully', async () => { 391 | // If zh.txt doesn't exist, should fallback to English 392 | const chineseProfanity = new ProfanityEngine({ 393 | language: 'zh', 394 | testMode: true, 395 | }); 396 | 397 | const terms = await chineseProfanity.all(); 398 | expect(terms.length).toBeGreaterThan(0); 399 | }); 400 | 401 | it('Should suppress warnings in test mode for Chinese', async () => { 402 | // Store original console.warn 403 | const originalWarn = console.warn; 404 | let warnCalled = false; 405 | 406 | // Mock console.warn 407 | console.warn = () => { 408 | warnCalled = true; 409 | }; 410 | 411 | const chineseProfanity = new ProfanityEngine({ 412 | language: 'zh', 413 | testMode: true, 414 | }); 415 | 416 | warnCalled = false; 417 | await chineseProfanity.all(); 418 | expect(warnCalled).toBe(false); 419 | 420 | // Restore original console.warn 421 | console.warn = originalWarn; 422 | }); 423 | }); 424 | 425 | describe('Chinese text processing specifics', () => { 426 | it('Should handle Chinese word segmentation challenges', async () => { 427 | // Chinese word boundaries are ambiguous 428 | const ambiguousTexts = [ 429 | '研究生命科学', // Could be "研究生|命科学" or "研究|生命科学" 430 | '北京大学生活', // Could be "北京大学|生活" or "北京|大学生|活" 431 | '中国人民银行', // "中国人民银行" as one entity 432 | ]; 433 | 434 | for (const text of ambiguousTexts) { 435 | const result = await profanity.hasCurseWords(text); 436 | expect(typeof result).toBe('boolean'); 437 | } 438 | }); 439 | 440 | it('Should handle Chinese proper nouns and names', async () => { 441 | const properNouns = [ 442 | '北京', // Beijing 443 | '上海', // Shanghai 444 | '中国', // China 445 | '长江', // Yangtze River 446 | '故宫', // Forbidden City 447 | ]; 448 | 449 | for (const noun of properNouns) { 450 | expect(await profanity.search(noun)).toBe(false); 451 | } 452 | }); 453 | 454 | it('Should handle Chinese grammar particles', async () => { 455 | const particles = [ 456 | '的', // Possessive particle 457 | '了', // Completion particle 458 | '着', // Progressive particle 459 | '过', // Experience particle 460 | '吗', // Question particle 461 | '呢', // Question particle 462 | ]; 463 | 464 | for (const particle of particles) { 465 | expect(await profanity.search(particle)).toBe(false); 466 | } 467 | }); 468 | }); 469 | }); -------------------------------------------------------------------------------- /__tests__/irish.test.js: -------------------------------------------------------------------------------- 1 | import { ProfanityEngine } from '../index.js'; 2 | 3 | const language = process.env.LANGUAGE || 'en'; // Default to 'en' if the LANGUAGE environment variable is not set 4 | let profanity; 5 | 6 | describe('Irish (Gaeilge) Profanity tests', () => { 7 | beforeAll(async () => { 8 | profanity = new ProfanityEngine({ 9 | language: 'ga', // ISO code for Irish (Gaeilge) 10 | testMode: true, 11 | }); 12 | }); 13 | 14 | afterEach(() => { 15 | profanity.reset(); 16 | }); 17 | 18 | describe('Core Irish functionality', () => { 19 | it('Should get all the profanity words in an array', async () => { 20 | const allWords = await profanity.all(); 21 | expect(Array.isArray(allWords)).toBe(true); 22 | expect(allWords.length).toBeGreaterThan(0); 23 | // Update this number based on your actual Irish word count 24 | // expect(allWords.length).toEqual(XXX); 25 | }); 26 | 27 | it('Should return true for Irish profanity words', async () => { 28 | // Test with a common Irish profanity word (if it exists in your list) 29 | // Replace 'testword' with an actual word from your Irish list 30 | const searchWord = await profanity.search('testword'); 31 | expect(typeof searchWord).toBe('boolean'); 32 | }); 33 | 34 | it('Should return false for normal Irish words', async () => { 35 | const normalWords = [ 36 | 'dia duit', // Hello (God to you) 37 | 'go raibh maith agat', // Thank you 38 | 'teach', // House 39 | 'leabhar', // Book 40 | 'uisce', // Water 41 | 'Gaeilge', // Irish language 42 | 'ríomhaire', // Computer 43 | 'teaghlach', // Family 44 | 'cara', // Friend 45 | 'scoil', // School 46 | 'céad míle fáilte', // A hundred thousand welcomes 47 | ]; 48 | 49 | for (const word of normalWords) { 50 | expect(await profanity.search(word)).toBe(false); 51 | } 52 | }); 53 | 54 | it('Should return false for any empty string', async () => { 55 | const searchWord = await profanity.search(''); 56 | expect(searchWord).toEqual(false); 57 | }); 58 | 59 | it('Should return true for a sentence containing a profanity word', async () => { 60 | // Replace with actual Irish sentence containing profanity from your list 61 | const sentence = 'Tá focal mímhúinte sa abairt seo.'; // "There is an impolite word in this sentence" 62 | const hasCurseWords = await profanity.hasCurseWords(sentence); 63 | expect(typeof hasCurseWords).toBe('boolean'); 64 | }); 65 | 66 | it('Should return false for a sentence with no profanity word', async () => { 67 | const sentence = 'Is abairt ghlan dea-bhéasach í seo.'; // "This is a clean, well-mannered sentence" 68 | const hasCurseWords = await profanity.hasCurseWords(sentence); 69 | expect(hasCurseWords).toEqual(false); 70 | }); 71 | }); 72 | 73 | describe('Irish-specific edge cases', () => { 74 | it('Should handle Irish fadas (accent marks)', async () => { 75 | // Test words with fadas (long marks over vowels) 76 | const fadaWords = [ 77 | 'fáilte', // Welcome 78 | 'tír', // Country/land 79 | 'óg', // Young 80 | 'mór', // Big 81 | 'úr', // Fresh/new 82 | 'éan', // Bird 83 | 'íoc', // Pay 84 | 'bádóir', // Boatman 85 | 'cúpla', // Couple 86 | 'lón', // Lunch 87 | ]; 88 | 89 | for (const word of fadaWords) { 90 | const result = await profanity.search(word); 91 | expect(typeof result).toBe('boolean'); 92 | // These should all be clean words 93 | expect(result).toBe(false); 94 | } 95 | }); 96 | 97 | it('Should handle Irish initial mutations (séimhiú/urú)', async () => { 98 | // Irish has initial consonant mutations 99 | const mutationExamples = [ 100 | // Séimhiú (lenition) - adds 'h' after initial consonant 101 | ['bean', 'bhean'], // Woman (lenited form) 102 | ['fear', 'fhear'], // Man (lenited form) 103 | ['cat', 'chat'], // Cat (lenited form) 104 | ['doras', 'dhoras'], // Door (lenited form) 105 | 106 | // Urú (eclipsis) - changes initial consonant 107 | ['bean', 'mbean'], // Woman (eclipsed form) 108 | ['fear', 'bhfear'], // Man (eclipsed form) 109 | ['cat', 'gcat'], // Cat (eclipsed form) 110 | ['doras', 'ndoras'], // Door (eclipsed form) 111 | ]; 112 | 113 | for (const [root, mutated] of mutationExamples) { 114 | expect(await profanity.search(root)).toBe(false); 115 | expect(await profanity.search(mutated)).toBe(false); 116 | } 117 | }); 118 | 119 | it('Should handle Irish case sensitivity correctly', async () => { 120 | const testWord = 'gaeilge'; 121 | expect(await profanity.search('GAEILGE')).toBe(false); 122 | expect(await profanity.search('Gaeilge')).toBe(false); 123 | expect(await profanity.search('gaeilge')).toBe(false); 124 | expect(await profanity.search('gAeIlGe')).toBe(false); 125 | }); 126 | 127 | it('Should handle Irish verb conjugations', async () => { 128 | // Irish verbs have complex conjugation patterns 129 | const verbForms = [ 130 | // Bí (to be) conjugations 131 | 'tá', // is/are (present) 132 | 'bhí', // was/were (past) 133 | 'beidh', // will be (future) 134 | 135 | // Déan (to do/make) conjugations 136 | 'déanaim', // I do 137 | 'déanann', // he/she does 138 | 'rinne', // did (past) 139 | 'déanfaidh', // will do (future) 140 | ]; 141 | 142 | for (const verb of verbForms) { 143 | expect(await profanity.search(verb)).toBe(false); 144 | } 145 | }); 146 | 147 | it('Should handle Irish noun declensions', async () => { 148 | // Irish nouns change form based on case 149 | const declensionExamples = [ 150 | // Fear (man) declensions 151 | 'fear', // Nominative singular 152 | 'fir', // Nominative plural 153 | 'fhear', // Genitive singular (lenited) 154 | 'bhfear', // With eclipsis 155 | 156 | // Bean (woman) declensions 157 | 'bean', // Nominative singular 158 | 'mná', // Nominative plural 159 | 'mhná', // Genitive plural (lenited) 160 | ]; 161 | 162 | for (const form of declensionExamples) { 163 | expect(await profanity.search(form)).toBe(false); 164 | } 165 | }); 166 | 167 | it('Should handle Irish compound words', async () => { 168 | const compoundWords = [ 169 | 'ríomhaire', // Computer (number-counter) 170 | 'teilifís', // Television 171 | 'rothar', // Bicycle (wheel-man) 172 | 'ospidéal', // Hospital 173 | 'ollscoil', // University (great-school) 174 | 'leabharlann', // Library (book-house) 175 | ]; 176 | 177 | for (const word of compoundWords) { 178 | expect(await profanity.search(word)).toBe(false); 179 | } 180 | }); 181 | 182 | it('Should handle Irish numbers and counting', async () => { 183 | const irishNumbers = [ 184 | 'a haon', // One 185 | 'a dó', // Two 186 | 'a trí', // Three 187 | 'a ceathair', // Four 188 | 'a cúig', // Five 189 | 'a sé', // Six 190 | 'a seacht', // Seven 191 | 'a hocht', // Eight 192 | 'a naoi', // Nine 193 | 'a deich', // Ten 194 | ]; 195 | 196 | for (const number of irishNumbers) { 197 | const result = await profanity.hasCurseWords(number); 198 | expect(result).toBe(false); 199 | } 200 | }); 201 | 202 | it('Should handle Irish prepositional pronouns', async () => { 203 | // Irish combines prepositions with pronouns 204 | const prepositionalPronouns = [ 205 | 'agam', // At me (ag + mé) 206 | 'agat', // At you (ag + tú) 207 | 'aige', // At him (ag + é) 208 | 'aici', // At her (ag + í) 209 | 'againn', // At us (ag + muid) 210 | 'agaibh', // At you (plural) (ag + sibh) 211 | 'acu', // At them (ag + iad) 212 | ]; 213 | 214 | for (const pronoun of prepositionalPronouns) { 215 | expect(await profanity.search(pronoun)).toBe(false); 216 | } 217 | }); 218 | 219 | it('Should handle whitespace around Irish words', async () => { 220 | const irishWord = 'fáilte'; 221 | expect(await profanity.search(` ${irishWord} `)).toBe(false); 222 | expect(await profanity.search(`\t${irishWord}\n`)).toBe(false); 223 | }); 224 | 225 | it('Should handle mixed Irish and English text', async () => { 226 | const mixedSentences = [ 227 | 'I love Gaeilge', // I love Irish 228 | 'Tá mé ag foghlaim English', // I am learning English 229 | 'Hello agus dia duit', // Hello and God to you 230 | 'Go raibh maith agat very much', // Thank you very much 231 | ]; 232 | 233 | for (const sentence of mixedSentences) { 234 | const result = await profanity.hasCurseWords(sentence); 235 | expect(typeof result).toBe('boolean'); 236 | } 237 | }); 238 | 239 | it('Should return unique words only in Irish text', async () => { 240 | // Test with repeated Irish words 241 | const sentence = 'fáilte fáilte fáilte go hÉireann'; 242 | const foundWords = await profanity.getCurseWords(sentence); 243 | 244 | // Should return unique words only 245 | expect(Array.isArray(foundWords)).toBe(true); 246 | // If 'fáilte' were a profanity word, it should appear only once 247 | }); 248 | 249 | it('Should handle Irish dialectal variations', async () => { 250 | // Irish has three main dialects: Munster, Connacht, Ulster 251 | const dialectalWords = [ 252 | // Different ways to say things in different dialects 253 | 'pótaí', // Potatoes (Munster) 254 | 'prátaí', // Potatoes (Connacht/Ulster) 255 | 'gasúr', // Boy (Munster) 256 | 'buachaill', // Boy (Connacht/Ulster) 257 | 'cailín', // Girl (general) 258 | 'girseach', // Girl (Ulster) 259 | ]; 260 | 261 | for (const word of dialectalWords) { 262 | const result = await profanity.search(word); 263 | expect(typeof result).toBe('boolean'); 264 | } 265 | }); 266 | 267 | it('Should handle Irish traditional vs modern spelling', async () => { 268 | // Irish spelling was reformed in the 20th century 269 | const spellingVariations = [ 270 | // Traditional vs Modern 271 | ['Gaedhilg', 'Gaeilge'], // Irish language 272 | ['oidhche', 'oíche'], // Night 273 | ['ceathramhadh', 'ceathrú'], // Quarter/fourth 274 | ]; 275 | 276 | for (const [traditional, modern] of spellingVariations) { 277 | expect(await profanity.search(traditional)).toBe(false); 278 | expect(await profanity.search(modern)).toBe(false); 279 | } 280 | }); 281 | }); 282 | 283 | describe('Performance tests for Irish dataset', () => { 284 | it('Should handle large Irish text efficiently', async () => { 285 | const largeText = 'Seo abairt tástála. '.repeat(1000) + 'téacs Gaeilge ' + 'Téacs glan. '.repeat(1000); 286 | 287 | const startTime = Date.now(); 288 | const result = await profanity.hasCurseWords(largeText); 289 | const endTime = Date.now(); 290 | 291 | expect(typeof result).toBe('boolean'); 292 | expect(endTime - startTime).toBeLessThan(100); // Should complete in under 100ms 293 | }); 294 | 295 | it('Should efficiently search through all Irish terms', async () => { 296 | const allWords = await profanity.all(); 297 | 298 | if (allWords.length > 0) { 299 | const startTime = Date.now(); 300 | for (let i = 0; i < Math.min(100, allWords.length); i++) { 301 | await profanity.search(allWords[i % allWords.length]); 302 | } 303 | const endTime = Date.now(); 304 | 305 | expect(endTime - startTime).toBeLessThan(50); // Should be very fast with Set lookup 306 | } 307 | }); 308 | 309 | it('Should handle concurrent operations on Irish dataset', async () => { 310 | const promises = [ 311 | profanity.search('fáilte'), 312 | profanity.hasCurseWords('seo téacs Gaeilge'), 313 | profanity.getCurseWords('an téacs Gaeilge'), 314 | profanity.all(), 315 | profanity.search('slán') 316 | ]; 317 | 318 | const results = await Promise.all(promises); 319 | expect(results[0]).toBe(false); // search fáilte (should be clean) 320 | expect(results[1]).toBe(false); // hasCurseWords (should be clean) 321 | expect(Array.isArray(results[2])).toBe(true); // getCurseWords 322 | expect(Array.isArray(results[3])).toBe(true); // all words 323 | expect(results[4]).toBe(false); // search slán (should be clean) 324 | }); 325 | }); 326 | 327 | describe('Irish language specificity', () => { 328 | it('Should load Irish words correctly or fallback to English', async () => { 329 | const allWords = await profanity.all(); 330 | expect(allWords.length).toBeGreaterThan(0); 331 | // If Irish file doesn't exist, should fallback to English (958 words) 332 | // If Irish file exists, should load Irish words 333 | }); 334 | 335 | it('Should handle Irish-specific character encoding (UTF-8)', async () => { 336 | // Test Irish alphabet with fadas 337 | const irishChars = [ 338 | 'a', 'á', 'b', 'c', 'd', 'e', 'é', 'f', 'g', 'h', 339 | 'i', 'í', 'l', 'm', 'n', 'o', 'ó', 'p', 'r', 's', 340 | 't', 'u', 'ú', // Irish alphabet (no j, k, q, v, w, x, y, z traditionally) 341 | ]; 342 | 343 | for (const char of irishChars) { 344 | const result = await profanity.search(char); 345 | expect(typeof result).toBe('boolean'); 346 | } 347 | }); 348 | 349 | it('Should handle Irish place names', async () => { 350 | const placeNames = [ 351 | 'Éire', // Ireland 352 | 'Baile Átha Cliath', // Dublin 353 | 'Corcaigh', // Cork 354 | 'Gaillimh', // Galway 355 | 'Luimneach', // Limerick 356 | 'Port Láirge', // Waterford 357 | 'An Clár', // Clare 358 | 'Ciarraí', // Kerry 359 | ]; 360 | 361 | for (const place of placeNames) { 362 | expect(await profanity.search(place)).toBe(false); 363 | } 364 | }); 365 | 366 | it('Should handle Irish Celtic cultural terms', async () => { 367 | const culturalTerms = [ 368 | 'céilí', // Social gathering with music/dance 369 | 'seisiún', // Music session 370 | 'bodhrán', // Traditional drum 371 | 'uilleann', // Irish pipes 372 | 'fleadh', // Festival 373 | 'comhrá', // Conversation 374 | 'craic', // Fun/good time 375 | 'sláinte', // Health/cheers 376 | ]; 377 | 378 | for (const term of culturalTerms) { 379 | expect(await profanity.search(term)).toBe(false); 380 | } 381 | }); 382 | }); 383 | 384 | describe('Data integrity for Irish', () => { 385 | it('Should not allow modification of Irish word list', async () => { 386 | const terms1 = await profanity.all(); 387 | const originalLength = terms1.length; 388 | 389 | // Try to modify the returned array 390 | terms1.push('focal-bréige'); 391 | terms1.pop(); 392 | if (terms1.length > 0) { 393 | terms1[0] = 'athraithe'; 394 | } 395 | 396 | // Get terms again - should be unchanged 397 | const terms2 = await profanity.all(); 398 | expect(terms2.length).toBe(originalLength); 399 | expect(terms2).not.toContain('focal-bréige'); 400 | if (terms2.length > 0) { 401 | expect(terms2[0]).not.toBe('athraithe'); 402 | } 403 | }); 404 | 405 | it('Should provide consistent results for Irish detection', async () => { 406 | const sentence = 'Seo abairt i nGaeilge'; 407 | 408 | const result1 = await profanity.getCurseWords(sentence); 409 | const result2 = await profanity.getCurseWords(sentence); 410 | const result3 = await profanity.hasCurseWords(sentence); 411 | 412 | expect(result1).toEqual(result2); 413 | expect(typeof result3).toBe('boolean'); 414 | }); 415 | }); 416 | 417 | describe('Configuration and fallback for Irish', () => { 418 | it('Should handle missing Irish language file gracefully', async () => { 419 | // If ga.txt doesn't exist, should fallback to English 420 | const irishProfanity = new ProfanityEngine({ 421 | language: 'ga', 422 | testMode: true, 423 | }); 424 | 425 | const terms = await irishProfanity.all(); 426 | expect(terms.length).toBeGreaterThan(0); 427 | }); 428 | 429 | it('Should suppress warnings in test mode for Irish', async () => { 430 | // Store original console.warn 431 | const originalWarn = console.warn; 432 | let warnCalled = false; 433 | 434 | // Mock console.warn 435 | console.warn = () => { 436 | warnCalled = true; 437 | }; 438 | 439 | const irishProfanity = new ProfanityEngine({ 440 | language: 'ga', 441 | testMode: true, 442 | }); 443 | 444 | warnCalled = false; 445 | await irishProfanity.all(); 446 | expect(warnCalled).toBe(false); 447 | 448 | // Restore original console.warn 449 | console.warn = originalWarn; 450 | }); 451 | }); 452 | 453 | describe('Irish grammar and linguistics', () => { 454 | it('Should handle Irish syntax patterns (VSO order)', async () => { 455 | // Irish typically uses Verb-Subject-Object word order 456 | const vsoSentences = [ 457 | 'Tá Seán ag rith', // Is Seán running (literally: Is Seán at running) 458 | 'Chonaic mé an madra', // I saw the dog (literally: Saw I the dog) 459 | 'Léann sí leabhar', // She reads a book (literally: Reads she book) 460 | ]; 461 | 462 | for (const sentence of vsoSentences) { 463 | expect(await profanity.hasCurseWords(sentence)).toBe(false); 464 | } 465 | }); 466 | 467 | it('Should handle Irish copula vs substantive verb', async () => { 468 | // Irish has two types of "to be" 469 | const copulaExamples = [ 470 | 'Is múinteoir mé', // I am a teacher (copula) 471 | 'Tá mé ag obair', // I am working (substantive verb) 472 | 'Is maith liom tae', // I like tea (copula) 473 | 'Tá tae agam', // I have tea (substantive verb) 474 | ]; 475 | 476 | for (const sentence of copulaExamples) { 477 | expect(await profanity.hasCurseWords(sentence)).toBe(false); 478 | } 479 | }); 480 | 481 | it('Should handle Irish conditional and subjunctive moods', async () => { 482 | const moodExamples = [ 483 | 'Dá mbeinn saibhir', // If I were rich (conditional) 484 | 'Go raibh maith agat', // Thank you (subjunctive: "that good be at you") 485 | 'Ar mhaith leat tae?', // Would you like tea? (conditional) 486 | ]; 487 | 488 | for (const sentence of moodExamples) { 489 | expect(await profanity.hasCurseWords(sentence)).toBe(false); 490 | } 491 | }); 492 | }); 493 | }); --------------------------------------------------------------------------------