├── .DS_Store
├── .github
    ├── .DS_Store
    ├── readme.png
    ├── FUNDING.yml
    └── workflows
    │   ├── test.yml
    │   └── npm-publish.yml
├── data
    ├── ga.txt
    ├── zh.txt
    ├── ar.txt
    ├── fr.txt
    ├── es.txt
    └── en.txt
├── .prettierrc.json
├── .gitignore
├── package.json
├── index.js
├── README.md
└── __tests__
    ├── english.test.js
    ├── engine.test.js
    ├── spanish.test.js
    ├── arabic.test.js
    ├── french.test.js
    ├── chinese.test.js
    └── irish.test.js


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coffee-and-fun/google-profanity-words/HEAD/.DS_Store


--------------------------------------------------------------------------------
/.github/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coffee-and-fun/google-profanity-words/HEAD/.github/.DS_Store


--------------------------------------------------------------------------------
/.github/readme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coffee-and-fun/google-profanity-words/HEAD/.github/readme.png


--------------------------------------------------------------------------------
/data/ga.txt:
--------------------------------------------------------------------------------
 1 | cac
 2 | píosa cac
 3 | muc
 4 | feck
 5 | feckin
 6 | bod
 7 | póg mo thóin
 8 | amadán
 9 | eejit
10 | gobshite
11 | bastún
12 | lúdramán
13 | balbhán
14 | ainnis ort
15 | dún do chlab
16 | damnú ort


--------------------------------------------------------------------------------
/.prettierrc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "singleQuote": true,
 3 |   "trailingComma": "es5",
 4 |   "tabWidth": 2,
 5 |   "semi": true,
 6 |   "printWidth": 80,
 7 |   "arrowParens": "always",
 8 |   "endOfLine": "auto"
 9 | }
10 | 


--------------------------------------------------------------------------------
/data/zh.txt:
--------------------------------------------------------------------------------
 1 | 操
 2 | 操你
 3 | 操你妈
 4 | 他妈
 5 | 他妈的
 6 | 去死
 7 | 滚
 8 | 滚开
 9 | 傻逼
10 | 煞笔
11 | 傻子
12 | 妈的
13 | 狗屎
14 | 屎
15 | 屌
16 | 贱人
17 | 婊子
18 | 死变态
19 | 垃圾
20 | 废物
21 | 蠢货
22 | 去你妈的
23 | 日
24 | 日你
25 | 日你妈
26 | 靠
27 | 混蛋
28 | 王八蛋
29 | 狗娘养的
30 | 龟儿子
31 | 


--------------------------------------------------------------------------------
/data/ar.txt:
--------------------------------------------------------------------------------
 1 | لعنة
 2 | اللعنة
 3 | خرا
 4 | خراء
 5 | قحبة
 6 | شرموطة
 7 | زب
 8 | كس
 9 | ابن زنا
10 | ابن حرام
11 | كلب
12 | يا كلب
13 | حمار
14 | يا حمار
15 | يا ابن الكلب
16 | يلعن أبوك
17 | يلعن شكلك
18 | تفو
19 | تفو عليك
20 | قذر
21 | قرف
22 | تبا
23 | انقلع
24 | اخرس


--------------------------------------------------------------------------------
/data/fr.txt:
--------------------------------------------------------------------------------
 1 | merde
 2 | putain
 3 | con
 4 | connard
 5 | connasse
 6 | salope
 7 | salaud
 8 | bordel
 9 | chiant
10 | foutre
11 | enculé
12 | enfoiré
13 | fils de pute
14 | casse‑toi
15 | ferme ta gueule
16 | va te faire foutre
17 | ta gueule
18 | pute
19 | couille
20 | couillon
21 | bâtard
22 | gros con
23 | cul
24 | trou du cul


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: robertjgabriel
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
13 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches: [ main, master ]
 6 |   push:
 7 |     branches: [ main, master ]
 8 | 
 9 | jobs:
10 |   test:
11 |     runs-on: ubuntu-latest
12 |     
13 |     strategy:
14 |       matrix:
15 |         node-version: [16.x, 18.x, 20.x]
16 |     
17 |     steps:
18 |     - name: Checkout code
19 |       uses: actions/checkout@v4
20 |       
21 |     - name: Setup Node.js ${{ matrix.node-version }}
22 |       uses: actions/setup-node@v4
23 |       with:
24 |         node-version: ${{ matrix.node-version }}
25 |         cache: 'npm'
26 |         
27 |     - name: Install dependencies
28 |       run: npm ci
29 |       
30 |     - name: Run tests
31 |       run: npm test


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Logs
 2 | logs
 3 | *.log
 4 | npm-debug.log*
 5 | dist/
 6 | dist
 7 | # Runtime data
 8 | pids
 9 | *.pid
10 | *.seed
11 | *.pid.lock
12 | package/
13 | package
14 | build
15 | build/
16 | 
17 | # Directory for instrumented libs generated by jscoverage/JSCover
18 | lib-cov
19 | 
20 | # Coverage directory used by tools like istanbul
21 | coverage
22 | 
23 | # nyc test coverage
24 | .nyc_output
25 | 
26 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
27 | .grunt
28 | 
29 | # node-waf configuration
30 | .lock-wscript
31 | 
32 | # Compiled binary addons (http://nodejs.org/api/addons.html)
33 | build/Release
34 | 
35 | # Dependency directories
36 | node_modules
37 | jspm_packages
38 | 
39 | # Optional npm cache directory
40 | .npm
41 | 
42 | # Optional REPL history
43 | .node_repl_history
44 | Contact GitHub API Training Shop Blog About
45 | 


--------------------------------------------------------------------------------
/.github/workflows/npm-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will run tests using node and then publish a package to GitHub Packages when a release is created
 2 | # For more information see: https://docs.github.com/en/actions/publishing-packages/publishing-nodejs-packages
 3 | 
 4 | name: Publis
 5 | 
 6 | on:
 7 |   release:
 8 |     types: [published]
 9 | 
10 | jobs:
11 |   build:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - uses: actions/checkout@v3
15 |       - uses: actions/setup-node@v3
16 |         with:
17 |           node-version: 16
18 |       - run: npm ci
19 |       - run: npm run format
20 |       - run: npm test
21 | 
22 |   publish-npm:
23 |     needs: build
24 |     runs-on: ubuntu-latest
25 |     steps:
26 |       - uses: actions/checkout@v3
27 |       - uses: actions/setup-node@v3
28 |         with:
29 |           node-version: 16
30 |           registry-url: https://registry.npmjs.org/
31 |       - run: npm ci
32 |       - run: npm publish
33 |         env:
34 |           NODE_AUTH_TOKEN: ${{secrets.npm_token}}
35 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@coffeeandfun/google-profanity-words",
 3 |   "version": "3.0.0",
 4 |   "description": "Real profanity words banned by Google, extracted from their hidden API before shutdown. Now available as an easy-to-use Node.js library for content filtering.",
 5 |   "main": "index.js",
 6 |   "type": "module",
 7 |   "scripts": {
 8 |     "format": "npx prettier . --write",
 9 |     "test": "NODE_OPTIONS=--experimental-vm-modules NODE_NO_WARNINGS=1 jest",
10 |     "en": "NODE_OPTIONS=--experimental-vm-modules NODE_NO_WARNINGS=1 jest english.test.js",
11 |     "es": "NODE_OPTIONS=--experimental-vm-modules NODE_NO_WARNINGS=1 jest spanish.test.js",
12 |     "engine": "NODE_OPTIONS=--experimental-vm-modules NODE_NO_WARNINGS=1 jest engine.test.js"
13 |   },
14 |   "repository": {
15 |     "type": "git",
16 |     "url": "git+https://github.com/coffee-and-fun/google-profanity-words.git"
17 |   },
18 |   "keywords": [
19 |     "google",
20 |     "side-project",
21 |     "profanity",
22 |     "profanity-detection",
23 |     "profanityfilter"
24 |   ],
25 |   "author": "Robert James Gabriel",
26 |   "license": "ISC",
27 |   "bugs": {
28 |     "url": "https://github.com/coffee-and-fun/google-profanity-words/issues"
29 |   },
30 |   "homepage": "https://github.com/coffee-and-fun/google-profanity-words#readme",
31 |   "devDependencies": {
32 |     "jest": "^27.4.5",
33 |     "prettier": "3.0.0"
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
  1 | 
  2 | import { readFile, access } from 'fs/promises';
  3 | import path from 'path';
  4 | import { fileURLToPath } from 'url';
  5 | 
  6 | /**
  7 |  * Supports multiple languages and provides efficient word matching
  8 |  */
  9 | export class ProfanityEngine {
 10 |   constructor(config = {}) {
 11 |     this.isTestMode = config.testMode ?? false;
 12 |     this.language = config.language ?? 'en';
 13 |     this.terms = null; 
 14 |     this.termsSet = null; 
 15 |     this.isInitialized = false;
 16 |   }
 17 | 
 18 |   /**
 19 |    * Only loads data when first needed
 20 |    * @private
 21 |    */
 22 |   async _ensureInitialized() {
 23 |     if (this.isInitialized) return;
 24 |     
 25 |     try {
 26 |       const filePath = await this._getLanguageFilePath();
 27 |       const fileContent = await this._readTermsFile(filePath);
 28 |       
 29 | 
 30 |       this.terms = fileContent
 31 |         .filter(term => term.trim())
 32 |         .map(term => term.trim().toLowerCase());
 33 |       
 34 | 
 35 |       this.termsSet = new Set(this.terms);
 36 |       this.isInitialized = true;
 37 |       
 38 |     } catch (error) {
 39 |       this._logWarning(`Failed to initialize: ${error.message}`);
 40 |       this.terms = [];
 41 |       this.termsSet = new Set();
 42 |       this.isInitialized = true;
 43 |     }
 44 |   }
 45 | 
 46 |   /**
 47 |    * Get the file path for the specified language
 48 |    * @private
 49 |    */
 50 |   async _getLanguageFilePath() {
 51 |     const currentFilePath = fileURLToPath(import.meta.url);
 52 |     const dataFolderPath = path.join(path.dirname(currentFilePath), 'data');
 53 |     const languageFilePath = path.join(dataFolderPath, `${this.language}.txt`);
 54 |     
 55 |     if (await this._fileExists(languageFilePath)) {
 56 |       return languageFilePath;
 57 |     }
 58 |     
 59 |     // Fallback to English
 60 |     this._logWarning(`Language file '${this.language}.txt' not found. Using 'en' as fallback.`);
 61 |     return path.join(dataFolderPath, 'en.txt');
 62 |   }
 63 | 
 64 |   /**
 65 |    * Check if file exists
 66 |    * @private
 67 |    */
 68 |   async _fileExists(filePath) {
 69 |     try {
 70 |       await access(filePath);
 71 |       return true;
 72 |     } catch {
 73 |       return false;
 74 |     }
 75 |   }
 76 | 
 77 |   /**
 78 |    * Read and parse terms file
 79 |    * @private
 80 |    */
 81 |   async _readTermsFile(filePath) {
 82 |     const fileContent = await readFile(filePath, 'utf8');
 83 |     return fileContent.split(/\r?\n/); // Handle both \n and \r\n
 84 |   }
 85 | 
 86 |   /**
 87 |    * Log warning if not in test mode
 88 |    * @private
 89 |    */
 90 |   _logWarning(message) {
 91 |     if (!this.isTestMode) {
 92 |       console.warn('Profanity Engine:', message);
 93 |     }
 94 |   }
 95 | 
 96 |   /**
 97 |    * Extract and normalize words from text
 98 |    * @private
 99 |    */
100 |   _extractWords(text) {
101 |     if (!text || typeof text !== 'string') return [];
102 |     
103 |     // Split on whitespace and punctuation, filter empty strings
104 |     return text
105 |       .toLowerCase()
106 |       .split(/[\s\p{P}]+/u)
107 |       .filter(word => word.length > 0);
108 |   }
109 | 
110 |   /**
111 |    * Check if a sentence contains any profanity words
112 |    * @param {string} sentence - The text to check
113 |    * @returns {Promise<boolean>} True if profanity is found
114 |    */
115 |   async hasCurseWords(sentence) {
116 |     await this._ensureInitialized();
117 |     
118 |     if (!sentence || typeof sentence !== 'string') return false;
119 |     
120 |     const words = this._extractWords(sentence);
121 |     return words.some(word => this.termsSet.has(word));
122 |   }
123 | 
124 |   /**
125 |    * Get all profanity words found in a sentence
126 |    * @param {string} sentence - The text to analyze
127 |    * @returns {Promise<string[]>} Array of found profanity words
128 |    */
129 |   async getCurseWords(sentence) {
130 |     await this._ensureInitialized();
131 |     
132 |     if (!sentence || typeof sentence !== 'string') return [];
133 |     
134 |     const words = this._extractWords(sentence);
135 |     const foundWords = new Set(); // Use Set to avoid duplicates
136 |     
137 |     for (const word of words) {
138 |       if (this.termsSet.has(word)) {
139 |         foundWords.add(word);
140 |       }
141 |     }
142 |     
143 |     return Array.from(foundWords);
144 |   }
145 | 
146 |   /**
147 |    * Get all profanity terms
148 |    * @returns {Promise<string[]>} Array of all profanity terms
149 |    */
150 |   async all() {
151 |     await this._ensureInitialized();
152 |     return [...this.terms]; // Return a copy to prevent external modification
153 |   }
154 | 
155 |   /**
156 |    * Search for a specific term
157 |    * @param {string} term - The term to search for
158 |    * @returns {Promise<boolean>} True if the term is found
159 |    */
160 |   async search(term) {
161 |     await this._ensureInitialized();
162 |     
163 |     if (!term || typeof term !== 'string') return false;
164 |     
165 |     return this.termsSet.has(term.trim().toLowerCase());
166 |   }
167 | 
168 |   /**
169 |    * Reset the engine (useful for testing or changing language)
170 |    */
171 |   reset() {
172 |     this.terms = null;
173 |     this.termsSet = null;
174 |     this.isInitialized = false;
175 |   }
176 | 
177 |  
178 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![alt text](.github/readme.png 'Logo Title Text 1')
  2 | 
  3 | 
  4 | 
  5 | # ☕ Google Profanity Words
  6 | 
  7 | > A fun and developer-friendly profanity detection library brought to you by [Coffee & Fun LLC](https://coffeeandfun.com) ☕🎉  
  8 | > Built and maintained with love by [Robert James Gabriel](https://github.com/robertgabriel) 💻✨
  9 | 
 10 | [![npm version](https://img.shields.io/npm/v/@coffeeandfun/google-profanity-words.svg)](https://www.npmjs.com/package/@coffeeandfun/google-profanity-words)  [![Stars](https://img.shields.io/github/stars/@coffeeandfun/google-profanity-words?style=social)](https://github.com/@coffeeandfun/google-profanity-words)
 11 | 
 12 | 
 13 | ---
 14 | 
 15 | ## 🚀 What’s This?
 16 | 
 17 | **Google Profanity Words** is a Node.js library that helps you detect and filter out naughty language (in multiple languages!) from your apps or content. Whether you’re building a chat app, a comment section, or a game—this one’s your profanity-slaying sidekick.
 18 | 
 19 | Made by devs for devs. Maintained by Robert at Coffee & Fun ☕❤️
 20 | 
 21 | ---
 22 | 
 23 | ## ✨ Features
 24 | 
 25 | - 🌐 **Multilingual support** – English and Spanish out of the box. More coming soon!
 26 | - 🔁 **Monthly updates** – Stay fresh with the latest no-no words
 27 | - 💡 **Easy to use API** – Straightforward methods, async/await friendly
 28 | - 🔬 **Tested with Jest** – Fully covered and ready for production
 29 | - ⚡ **Tiny & Fast** – Minimal deps = speedy installs and performance
 30 | 
 31 | ---
 32 | 
 33 | ## 📦 Install Me
 34 | 
 35 | ```bash
 36 | npm install @coffeeandfun/google-profanity-words
 37 | ```
 38 | 
 39 | ---
 40 | 
 41 | ## ⚡ Quickstart Guide
 42 | 
 43 | ```javascript
 44 | import { ProfanityEngine } from '@coffeeandfun/google-profanity-words';
 45 | 
 46 | // Default is English
 47 | const profanity = new ProfanityEngine();
 48 | 
 49 | // Español? You got it.
 50 | const profanityES = new ProfanityEngine({ language: 'es' });
 51 | 
 52 | // Check a single word
 53 | const isBad = await profanity.search('example');
 54 | 
 55 | // Or check a full sentence
 56 | const hasCurses = await profanity.hasCurseWords('This is a test sentence');
 57 | 
 58 | console.log(isBad, hasCurses); // true / false
 59 | ```
 60 | 
 61 | ---
 62 | 
 63 | ## 🔍 API Docs (But Make It Chill)
 64 | 
 65 | ### 🛠️ `new ProfanityEngine(options?)`
 66 | 
 67 | Create a new profanity detector engine!
 68 | 
 69 | ```javascript
 70 | const profanity = new ProfanityEngine(); // Defaults to English
 71 | ```
 72 | 
 73 | Or choose a specific language:
 74 | 
 75 | ```javascript
 76 | const spanishProfanity = new ProfanityEngine({ language: 'es' });
 77 | ```
 78 | 
 79 | #### Options:
 80 | - `language` (string, optional):  
 81 |   - `'en'` = English (default)  
 82 |   - `'es'` = Spanish  
 83 |   - If a language isn’t available, it falls back to English.
 84 | 
 85 | ---
 86 | 
 87 | ### 🔎 `search(word)`
 88 | 
 89 | Check a single word to see if it's naughty.
 90 | 
 91 | ```javascript
 92 | const isProfane = await profanity.search('heck');
 93 | console.log(isProfane); // true or false
 94 | ```
 95 | 
 96 | ---
 97 | 
 98 | ### 💬 `hasCurseWords(sentence)`
 99 | 
100 | Check a full sentence or phrase for profanity.
101 | 
102 | ```javascript
103 | const result = await profanity.hasCurseWords('You silly goose');
104 | console.log(result); // probably false, unless goose is banned now 🪿
105 | ```
106 | 
107 | ---
108 | 
109 | ### 📜 `all()`
110 | 
111 | Get the full list of bad words in the current language.
112 | 
113 | ```javascript
114 | const badWords = await profanity.all();
115 | console.log(badWords); // ['word1', 'word2', 'etc']
116 | ```
117 | 
118 | ---
119 | 
120 | ### 💡 Real Talk: Edge Cases
121 | 
122 | - Empty strings? We gotchu. Returns `false`.
123 | - `search()` and `hasCurseWords()` are **case-insensitive**.
124 | - Special characters and punctuation? No problem.
125 | 
126 | ---
127 | 
128 | ## 🧪 Testing with Jest
129 | 
130 | We've got testing covered like whipped cream on a latte ☕🎂
131 | 
132 | Run the default test suite:
133 | 
134 | ```bash
135 | npm test
136 | ```
137 | 
138 | Or use more specific Jest commands:
139 | 
140 | ```bash
141 | # Watch mode (great for dev workflow)
142 | npx jest --watch
143 | 
144 | # Run tests in a specific file
145 | npx jest path/to/your/file.test.js
146 | 
147 | # Run coverage report
148 | npx jest --coverage
149 | 
150 | # Run with verbose output (get all the juicy details)
151 | npx jest --verbose
152 | ```
153 | 
154 | Tests are located in the `/__tests__/` directory and use the real profanity files, so you know it’s legit 👀✅
155 | 
156 | ---
157 | 
158 | ## 🔀 Example Use Cases
159 | 
160 | ### ✅ Filter User Input
161 | 
162 | ```js
163 | async function filterInput(input) {
164 |   if (await profanity.hasCurseWords(input)) {
165 |     return '⚠️ Whoa there! Language, please.';
166 |   }
167 |   return input;
168 | }
169 | ```
170 | 
171 | ---
172 | 
173 | ### 🌍 Multi-language Setup
174 | 
175 | ```js
176 | const en = new ProfanityEngine({ language: 'en' });
177 | const es = new ProfanityEngine({ language: 'es' });
178 | 
179 | const englishResult = await en.search('bad');
180 | const spanishResult = await es.search('malo');
181 | ```
182 | 
183 | ---
184 | 
185 | ## 🌍 Want to Contribute?
186 | 
187 | We love open source buddies 💛
188 | 
189 | ### Add a New Language
190 | 
191 | 1. Fork it 🍴
192 | 2. Add a file to `/data/` named like `fr.txt` for French
193 | 3. Fill it with one profane word per line
194 | 4. Push & open a pull request!
195 | 
196 | ---
197 | 
198 | ## 🙌 Who Made This?
199 | 
200 | Built by [Robert James Gabriel](https://github.com/robertgabriel) and the good people at **Coffee & Fun LLC**. We make dev tools with accessibility, coffee, and good vibes in mind.
201 | 
202 | > Wanna support? Send a coffee our way or just spread the word! ☕🚀
203 | 
204 | ---
205 | 
206 | ## 🧡 License
207 | 
208 | [MIT](https://opensource.org/licenses/MIT) – because sharing is caring.
209 | 
210 | ---
211 | 
212 | ## 💬 Support & Community
213 | 
214 | - 🐛 [Report Bugs](https://github.com/coffeeandfun/google-profanity-words/issues)
215 | - 💡 [Join Discussions](https://github.com/coffeeandfun/google-profanity-words/discussions)
216 | - 📬 Email: [support@coffeeandfun.com](mailto:hellow@coffeeandfun.com)
217 | 
218 | ---
219 | 
220 | Made with ☕, code, and a sprinkle of magic at Coffee & Fun LLC 💖
221 | 
222 | ## AI Usage
223 | Calude AI was used to help with this read me & adding extra Jest tests. 


--------------------------------------------------------------------------------
/data/es.txt:
--------------------------------------------------------------------------------
  1 | 2 chicas 1 taza
  2 | 2g1c
  3 | 4r5e
  4 | 5h1t
  5 | 5 hit
  6 | a55
  7 | culo
  8 | acrotomofilia
  9 | bolsillo caliente de alabama
 10 | oleoducto de alaska
 11 | anal
 12 | anilingus
 13 | ano
 14 | una mierda
 15 | ar5e
 16 | culo
 17 | culo
 18 | gilipollas
 19 | culo
 20 | follador de culo
 21 | culo-gorro
 22 | culo-pirata
 23 | bolsa de culo
 24 | bandido
 25 | assbanger
 26 | mordida
 27 | payaso
 28 | culo
 29 | rompeculos
 30 | culos
 31 | cara de culo
 32 | hijo de puta
 33 | enculada
 34 | asno duende
 35 | idiota
 36 | idiota
 37 | Estúpido
 38 | imbéciles
 39 | asaltante
 40 | ladrón de traseros
 41 | lamer el culo
 42 | lameculos
 43 | culo mono
 44 | culo
 45 | mascador de culos
 46 | aspirar
 47 | gilipollas
 48 | chupador de culos
 49 | culo
 50 | gilipollas
 51 | asswipe
 52 | auto erótico
 53 | autoerotico
 54 | perra
 55 | b00bs
 56 | b17ch
 57 | perra
 58 | babeland
 59 | masa para bebes
 60 | jugo de bebe
 61 | mordaza de bola
 62 | salsa de bolas
 63 | patear la pelota
 64 | pelota lamiendo
 65 | saco de pelota
 66 | chupando bolas
 67 | bolsa de pelota
 68 | pelotas
 69 | saco de bolas
 70 | bampot
 71 | bang bros
 72 | a pelo
 73 | apenas legal
 74 | desnudo
 75 | bastardo
 76 | bastardo
 77 | bastinado
 78 | bbw
 79 | bdsm
 80 | frijol
 81 | frijoles
 82 | bestial
 83 | bestialidad
 84 | bestialidad
 85 | cuchillo de castor
 86 | labios de castor
 87 | campana
 88 | bestial
 89 | bestialidad
 90 | perra
 91 | perra
 92 | grande y negro
 93 | pechos grandes
 94 | grandes aldabas
 95 | grandes tetas
 96 | tontas
 97 | cerradura de pájaro
 98 | perra
 99 | perra
100 | perras
101 | perras
102 | perra
103 | quejándose
104 | gallo negro
105 | acción rubia
106 | rubia sobre rubia acción
107 | sangriento
108 | mamada
109 | sopla tu carga
110 | mamada
111 | mamadas
112 | gofre azul
113 | tonto
114 | boiolas
115 | mierda
116 | cojones
117 | bollok
118 | bollox
119 | esclavitud
120 | metedura de pata
121 | teta
122 | bobo
123 | tetas
124 | tetas
125 | tetas
126 | tetas
127 | booooooobs
128 | llamada de botín
129 | senos
130 | duchas marrones
131 | acción morena
132 | buceta
133 | tío
134 | bukkake
135 | bullyke
136 | vibra de bala
137 | mierda
138 | culo
139 | agujero de tapón
140 | boca de tonel
141 | conejito hijo de puta
142 | tetona
143 | culata
144 | trasero-pirata
145 | nalgas
146 | ojete
147 | masticar trasero
148 | anal
149 | verga
150 | hijo de puta
151 | dedo de camello
152 | camgirl
153 | puta de la cam
154 | camwhore
155 | masticador de alfombras
156 | mascador de alfombras
157 | graznar
158 | chinchilla
159 | grieta
160 | Choad
161 | capullos de rosa de chocolate
162 | Chode
163 | cipá
164 | círculo idiota
165 | cl1t
166 | vapor de cleveland
167 | clítoris
168 | clítoris
169 | clítoris
170 | clítoris
171 | abrazaderas de trébol
172 | racimo de mierda
173 | nuez
174 | polla
175 | chupapollas
176 | mordedura de gallo
177 | hamburguesa
178 | cara de gallo
179 | cabeza de gallo
180 | jockey
181 | aldaba
182 | maestro de gallos
183 | traficante de gallos
184 | cockmongruel
185 | mono gallo
186 | gallo
187 | comepollas
188 | nariz de gallo
189 | pepita
190 | pollas
191 | mierda
192 | herrero
193 | fumador de pollas
194 | mamar
195 | mamar
196 | mamada
197 | mamada
198 | hijo de puta
199 | mamando
200 | chupapollas
201 | pollasuka
202 | hijo de puta
203 | coca
204 | cocinando
205 | coksucka
206 | coochie
207 | chulo
208 | mapache
209 | mapaches
210 | cooter
211 | coprolagnia
212 | coprofilia
213 | cornhole
214 | timonel
215 | tonterías
216 | cremita
217 | semen
218 | desmoronamiento
219 | basurero
220 | devorador de semen
221 | corrida
222 | cummer
223 | correrse
224 | se corre
225 | corrida
226 | zorra
227 | corrida
228 | cunilingus
229 | cunillingus
230 | coño
231 | cunnilingus
232 | coño
233 | cara de chocho
234 | coño
235 | lamer el coño
236 | lamer el coño
237 | lamecoños
238 | lamecoños
239 | lamiendo coños
240 | lamiendo coños
241 | cuntrag
242 | coños
243 | cyalis
244 | cyberfuc
245 | cyberfuck
246 | cibernético
247 | cibernético
248 | ciberfuckers
249 | cibernético
250 | d1ck
251 | maldita sea
252 | maldición
253 | moreno
254 | violación en una cita
255 | cita
256 | Garganta profunda
257 | Garganta profunda
258 | dendrofilia
259 | polla
260 | bolsa de pene
261 | batidor de pollas
262 | cara de pene
263 | gilipollas
264 | pendejo
265 | jugo de polla
266 | dickleche
267 | traficante de pollas
268 | bofetada
269 | hijo de puta
270 | idiota
271 | comadreja
272 | dickweed
273 | idiota
274 | dique
275 | consolador
276 | consoladores
277 | moras
278 | zarzamora
279 | tonto
280 | borrachos
281 | idiota
282 | dirección
283 | almohadas sucias
284 | sucio sanchez
285 | dlck
286 | estilo perro
287 | follador de perros
288 | estilo perrito
289 | estilo perrito
290 | persiguiendo
291 | persiguiendo
292 | estilo perrito
293 | a cuatro patas
294 | dolcett
295 | dominación
296 | dominatriz
297 | domos
298 | ponche de burro
299 | burro
300 | bolsa de basura
301 | chiflado
302 | doosh
303 | doble polla
304 | doble penetración
305 | ducha
306 | gilipollas
307 | acción doble penetración
308 | joroba seca
309 | ducha
310 | estupideces
311 | idiota
312 | dvda
313 | dique
314 | come mi culo
315 | ecchi
316 | eyacular
317 | eyaculado
318 | eyacula
319 | eyacular
320 | eyaculando
321 | eyaculación
322 | eyacular
323 | erótico
324 | erotismo
325 | escolta
326 | eunuco
327 | Mierda
328 | Cabron
329 | f4nny
330 | Mierda
331 | maricón
332 | marica
333 | maricón
334 | mariconear
335 | maricón
336 | maricón
337 | maricón
338 | maricas
339 | maricón
340 | maricas
341 | maricas
342 | maricón
343 | coño
344 | fannyflaps
345 | fannyfucker
346 | fanyy
347 | pedo
348 | tirado un pedo
349 | tirando pedos
350 | pedo
351 | gordo
352 | joder
353 | hijo de puta
354 | mierda
355 | fecal
356 | feck
357 | hijo de puta
358 | felación
359 | Felch
360 | felching
361 | felación
362 | felación
363 | fieltro
364 | chorros femeninos
365 | dominación femenina
366 | fijándose
367 | pistola con la mano
368 | follar con los dedos
369 | dedo follado
370 | follador de dedos
371 | folladores de dedos
372 | follando con los dedos
373 | folla con los dedos
374 | digitación
375 | follar con el puño
376 | puño follado
377 | follador de puños
378 | folladores de puños
379 | follando con el puño
380 | follando con los puños
381 | folla con los puños
382 | puño
383 | lanzallamas
384 | brida
385 | buscar
386 | buscador de
387 | fetichismo de pies
388 | paja de pies
389 | frotándose
390 | Mierda
391 | botones de mierda
392 | joder
393 | jodido
394 | Cabron
395 | hijos de puta
396 | imbécil
397 | imbéciles
398 | maldito
399 | maldito
400 | malditos
401 | mierdamierdahijo de puta
402 | fóllame
403 | folla
404 | cabrones
405 | joder
406 | idiota
407 | empaquetador de dulces
408 | empacador de chocolate
409 | fuk
410 | fuker
411 | fukker
412 | fukkin
413 | fuks
414 | fukwhit
415 | fukwit
416 | futanari
417 | mierda
418 | fux0r
419 | Punto G
420 | orgia
421 | orgia
422 | gangbanged
423 | gangbanged
424 | orgias
425 | sexo gay
426 | culo gay
427 | gaybob
428 | gaydo
429 | señor gay
430 | sexo gay
431 | tardo gay
432 | gaywad
433 | genitales
434 | polla gigante
435 | chica en
436 | niña en la cima
437 | las chicas se volvieron locas
438 | cabracx
439 | cabra
440 | maldita sea
441 | maldita sea
442 | maldita sea
443 | maldita sea
444 | maldito
445 | gokkun
446 | baño de oro
447 | buena chica
448 | gooch
449 | buena caca
450 | bien
451 | goregasmo
452 | gringo
453 | ir a tientas
454 | sexo en grupo
455 | Guido
456 | guro
457 | trabajo manual
458 | paja
459 | núcleo duro
460 | duro
461 | sexo duro
462 | heeb
463 | infierno
464 | hentai
465 | el ella
466 | Ho
467 | hoar
468 | hoare
469 | azada
470 | puta
471 | homo
472 | homoerótico
473 | cariño
474 | Honky
475 | puta
476 | horre
477 | más caliente
478 | córneo
479 | caliente carl
480 | chica caliente
481 | sexo caliente
482 | como matar
483 | como asesinar
484 | gordo enorme
485 | follando
486 | incesto
487 | coito
488 | masturbar
489 | paja
490 | burro
491 | paja
492 | cebo de la cárcel
493 | Jailbait
494 | japón
495 | rosquilla de gelatina
496 | hacerse una paja
497 | masturbarse
498 | jigaboo
499 | jigaboo
500 | jiggerboo
501 | esperma
502 | semen
503 | semen
504 | jism
505 | jism
506 | semen
507 | juggs
508 | kawk
509 | pozo
510 | kinbaku
511 | pervertido
512 | rizado
513 | kiunt
514 | mando
515 | perillas
516 | nudoso
517 | nudoso
518 | perilla
519 | cabeza de chorlito
520 | nudoso
521 | bromista
522 | golpe
523 | kondum
524 | kondums
525 | chiflar
526 | coquetear
527 | kum
528 | kúmer
529 | kummer
530 | cumming
531 | kums
532 | kunilingus
533 | kunt
534 | kyke
535 | l3i+ch
536 | picazón
537 | labios
538 | restricción de cuero
539 | chaqueta recta de cuero
540 | fiesta de limon
541 | lesbo
542 | lesbiana
543 | lmfao
544 | lolita
545 | haciendo el amor
546 | lujuria
547 | codiciando
548 | m0f0
549 | m0fo
550 | m45terbato
551 | ma5terb8
552 | ma5terbato
553 | Hazme llegar
554 | chorros masculinos
555 | masoquista
556 | maestro-bate
557 | maestrob8
558 | maestro murciélago*
559 | masterbat3
560 | masterbate
561 | masterbación
562 | masterbaciones
563 | masturbarse
564 | menaje
565 | 


--------------------------------------------------------------------------------
/__tests__/english.test.js:
--------------------------------------------------------------------------------
  1 | import { ProfanityEngine } from '../index.js';
  2 | 
  3 | const language = process.env.LANGUAGE || 'en'; // Default to 'en' if the LANGUAGE environment variable is not set
  4 | let profanity;
  5 | 
  6 | describe('English Profanity tests', () => {
  7 |   beforeAll(async () => {
  8 |     profanity = new ProfanityEngine({
  9 |       language: 'en',
 10 |       testMode: true,
 11 |     });
 12 |   });
 13 | 
 14 |   afterEach(() => {
 15 |     profanity.reset();
 16 |   });
 17 | 
 18 |   describe('Core English functionality', () => {
 19 |     it('Should get all the profanity words in an array', async () => {
 20 |       const allWords = await profanity.all();
 21 |       expect(allWords.length).toEqual(958); // Verify this number matches your actual word count
 22 |       expect(Array.isArray(allWords)).toBe(true);
 23 |       expect(allWords.length).toBeGreaterThan(0);
 24 |     });
 25 | 
 26 |     it('Should return true for profanity words', async () => {
 27 |       const searchWord = await profanity.search('shit');
 28 |       expect(searchWord).toEqual(true);
 29 |     });
 30 | 
 31 |     it('Should return false for normal words', async () => {
 32 |       const searchWord = await profanity.search('ka');
 33 |       expect(searchWord).toEqual(false);
 34 |     });
 35 | 
 36 |     it('Should return false for any empty string', async () => {
 37 |       const searchWord = await profanity.search('');
 38 |       expect(searchWord).toEqual(false);
 39 |     });
 40 | 
 41 |     it('Should return true for a sentence containing a profanity word', async () => {
 42 |       const sentence = 'Do not use bad words like shit or asshole.';
 43 |       const hasCurseWords = await profanity.hasCurseWords(sentence);
 44 |       expect(hasCurseWords).toEqual(true);
 45 |     });
 46 | 
 47 |     it('Should return false for a sentence with no profanity word', async () => {
 48 |       const sentence = 'This is a clean and polite sentence.';
 49 |       const hasCurseWords = await profanity.hasCurseWords(sentence);
 50 |       expect(hasCurseWords).toEqual(false);
 51 |     });
 52 |   });
 53 | 
 54 |   describe('English-specific edge cases', () => {
 55 |     it('Should handle case sensitivity correctly', async () => {
 56 |       expect(await profanity.search('SHIT')).toBe(true);
 57 |       expect(await profanity.search('Shit')).toBe(true);
 58 |       expect(await profanity.search('shit')).toBe(true);
 59 |     });
 60 | 
 61 |     it('Should handle whitespace around words', async () => {
 62 |       expect(await profanity.search('  shit  ')).toBe(true);
 63 |       expect(await profanity.search('\tshit\n')).toBe(true);
 64 |     });
 65 | 
 66 |     it('Should detect profanity with punctuation in sentences', async () => {
 67 |       const testSentences = [
 68 |         'What the shit!',
 69 |         'Oh, shit.',
 70 |         'Shit? Really?',
 71 |         'This is shit, man.',
 72 |         '"Shit," he said.',
 73 |         'Absolute shit-show.',
 74 |       ];
 75 | 
 76 |       for (const sentence of testSentences) {
 77 |         expect(await profanity.hasCurseWords(sentence)).toBe(true);
 78 |       }
 79 |     });
 80 | 
 81 |     it('Should return correct curse words from sentences', async () => {
 82 |       const sentence = 'Do not use bad words like shit or asshole.';
 83 |       const foundWords = await profanity.getCurseWords(sentence);
 84 |       
 85 |       expect(foundWords).toContain('shit');
 86 |       expect(foundWords).toContain('asshole');
 87 |       expect(foundWords.length).toBe(2);
 88 |     });
 89 | 
 90 |     it('Should handle multiple instances of same word', async () => {
 91 |       const sentence = 'shit shit shit everywhere';
 92 |       const foundWords = await profanity.getCurseWords(sentence);
 93 |       
 94 |       // Should only return unique words
 95 |       expect(foundWords).toContain('shit');
 96 |       expect(foundWords.length).toBe(1);
 97 |     });
 98 | 
 99 |     it('Should validate specific English profanity words exist', async () => {
100 |       // Test a selection of words that should definitely be in an English profanity list
101 |       const commonProfanityWords = [
102 |         'shit', 'fuck', 'damn', 'hell', 'ass', 'bitch'
103 |       ];
104 | 
105 |       for (const word of commonProfanityWords) {
106 |         expect(await profanity.search(word)).toBe(true);
107 |       }
108 |     });
109 | 
110 |     it('Should not flag common English words', async () => {
111 |       const commonWords = [
112 |         'hello', 'world', 'computer', 'test', 'function', 'javascript',
113 |         'english', 'language', 'sentence', 'word', 'clean', 'polite'
114 |       ];
115 | 
116 |       for (const word of commonWords) {
117 |         expect(await profanity.search(word)).toBe(false);
118 |       }
119 |     });
120 | 
121 |     it('Should handle contractions and apostrophes', async () => {
122 |       const sentence = "Don't say shit, it's not appropriate.";
123 |       expect(await profanity.hasCurseWords(sentence)).toBe(true);
124 |     });
125 | 
126 |     it('Should handle hyphenated words', async () => {
127 |       const sentence = 'This is a shit-storm.';
128 |       expect(await profanity.hasCurseWords(sentence)).toBe(true);
129 |     });
130 | 
131 |     // Additional edge cases for better coverage
132 |     it('Should handle mixed case in sentences', async () => {
133 |       const sentence = 'This SENTENCE has SHIT and damn IN it';
134 |       expect(await profanity.hasCurseWords(sentence)).toBe(true);
135 |       
136 |       const foundWords = await profanity.getCurseWords(sentence);
137 |       expect(foundWords).toContain('shit'); // Should normalize to lowercase
138 |       expect(foundWords).toContain('damn');
139 |     });
140 | 
141 |     it('Should handle words at sentence boundaries', async () => {
142 |       expect(await profanity.hasCurseWords('shit')).toBe(true);
143 |       expect(await profanity.hasCurseWords('shit is bad')).toBe(true);
144 |       expect(await profanity.hasCurseWords('that is shit')).toBe(true);
145 |       expect(await profanity.hasCurseWords('the shit word')).toBe(true);
146 |     });
147 | 
148 |     it('Should not detect partial word matches', async () => {
149 |       // These should NOT be flagged as containing profanity
150 |       const sentences = [
151 |         'The weather is hellish today', // contains "hell" but as part of "hellish"
152 |         'I love my shirty shirt', // contains "shit" but as part of "shirty" 
153 |         'Assessment is important', // contains "ass" but as part of "assessment"
154 |       ];
155 | 
156 |       for (const sentence of sentences) {
157 |         // These depend on your word boundaries implementation
158 |         // Comment out if your implementation flags these
159 |         const result = await profanity.hasCurseWords(sentence);
160 |         // You may want to adjust based on your exact implementation
161 |       }
162 |     });
163 |   });
164 | 
165 |   describe('Performance tests for English dataset', () => {
166 |     it('Should handle large English text efficiently', async () => {
167 |       const largeText = 'This is a test sentence. '.repeat(1000) + 'shit ' + 'Clean text. '.repeat(1000);
168 |       
169 |       const startTime = Date.now();
170 |       const result = await profanity.hasCurseWords(largeText);
171 |       const endTime = Date.now();
172 |       
173 |       expect(result).toBe(true);
174 |       expect(endTime - startTime).toBeLessThan(100); // Should complete in under 100ms
175 |     });
176 | 
177 |     it('Should efficiently search through all English terms', async () => {
178 |       const allWords = await profanity.all();
179 |       
180 |       const startTime = Date.now();
181 |       for (let i = 0; i < 100; i++) {
182 |         await profanity.search(allWords[i % allWords.length]);
183 |       }
184 |       const endTime = Date.now();
185 |       
186 |       expect(endTime - startTime).toBeLessThan(50); // Should be very fast with Set lookup
187 |     });
188 | 
189 |     it('Should handle concurrent operations on English dataset', async () => {
190 |       const promises = [
191 |         profanity.search('shit'),
192 |         profanity.hasCurseWords('this is shit'),
193 |         profanity.getCurseWords('damn shit'),
194 |         profanity.all(),
195 |         profanity.search('fuck')
196 |       ];
197 |       
198 |       const results = await Promise.all(promises);
199 |       expect(results[0]).toBe(true); // search shit
200 |       expect(results[1]).toBe(true); // hasCurseWords
201 |       expect(results[2]).toContain('shit'); // getCurseWords
202 |       expect(results[3].length).toBe(958); // all words
203 |       expect(results[4]).toBe(true); // search fuck
204 |     });
205 |   });
206 | 
207 |   describe('Data integrity for English', () => {
208 |     it('Should not allow modification of English word list', async () => {
209 |       const terms1 = await profanity.all();
210 |       const originalLength = terms1.length;
211 |       
212 |       // Try to modify the returned array
213 |       terms1.push('fake-word');
214 |       terms1.pop();
215 |       terms1[0] = 'modified';
216 |       
217 |       // Get terms again - should be unchanged
218 |       const terms2 = await profanity.all();
219 |       expect(terms2.length).toBe(originalLength);
220 |       expect(terms2).not.toContain('fake-word');
221 |       expect(terms2[0]).not.toBe('modified');
222 |     });
223 | 
224 |     it('Should provide consistent results for English detection', async () => {
225 |       const sentence = 'This sentence has shit and damn';
226 |       
227 |       const result1 = await profanity.getCurseWords(sentence);
228 |       const result2 = await profanity.getCurseWords(sentence);
229 |       const result3 = await profanity.hasCurseWords(sentence);
230 |       
231 |       expect(result1).toEqual(result2);
232 |       expect(result3).toBe(true);
233 |     });
234 |   });
235 | });


--------------------------------------------------------------------------------
/__tests__/engine.test.js:
--------------------------------------------------------------------------------
  1 | import { ProfanityEngine } from '../index.js';
  2 | 
  3 | const language = process.env.LANGUAGE || 'en'; // Default to 'en' if the LANGUAGE environment variable is not set
  4 | let profanity;
  5 | 
  6 | describe('ProfanityEngine v3 API tests', () => {
  7 |   beforeAll(async () => {
  8 |     profanity = new ProfanityEngine({
  9 |       language: 'en',
 10 |       testMode: true,
 11 |     });
 12 |   });
 13 | 
 14 |   afterEach(() => {
 15 |     profanity.reset();
 16 |   });
 17 | 
 18 |   describe('Core functionality', () => {
 19 |     it('Should return all profanity words in an array', async () => {
 20 |       const allWords = await profanity.all();
 21 |       expect(Array.isArray(allWords)).toBe(true);
 22 |       expect(allWords.length).toBeGreaterThan(0);
 23 |       expect(allWords.length).toEqual(958); // Adjust based on your actual count
 24 |     });
 25 | 
 26 |     it('Should detect profanity words correctly', async () => {
 27 |       expect(await profanity.search('hell')).toBe(true);
 28 |       expect(await profanity.search('damn')).toBe(true);
 29 |     });
 30 | 
 31 |     it('Should return false for clean words', async () => {
 32 |       expect(await profanity.search('hello')).toBe(false);
 33 |       expect(await profanity.search('world')).toBe(false);
 34 |       expect(await profanity.search('test')).toBe(false);
 35 |     });
 36 | 
 37 |     it('Should detect profanity in sentences', async () => {
 38 |       const sentence = 'This is a test sentence with bad words like hell and damn';
 39 |       const hasProfanity = await profanity.hasCurseWords(sentence);
 40 |       expect(hasProfanity).toBe(true);
 41 |     });
 42 | 
 43 |     it('Should return false for clean sentences', async () => {
 44 |       const sentence = 'This is a test sentence with no bad words';
 45 |       const hasProfanity = await profanity.hasCurseWords(sentence);
 46 |       expect(hasProfanity).toBe(false);
 47 |     });
 48 | 
 49 |     it('Should return list of found profanity words', async () => {
 50 |       const sentence = 'This is a test sentence with bad words like hell and damn';
 51 |       const badWords = await profanity.getCurseWords(sentence);
 52 |       expect(badWords).toEqual(expect.arrayContaining(['hell', 'damn']));
 53 |       expect(badWords).toHaveLength(2);
 54 |     });
 55 | 
 56 |     it('Should return empty array if no curse words found', async () => {
 57 |       const sentence = 'This is a test sentence with no bad words';
 58 |       const result = await profanity.getCurseWords(sentence);
 59 |       expect(result).toEqual([]);
 60 |     });
 61 |   });
 62 | 
 63 |   describe('Language fallback behavior', () => {
 64 |     it('Should fallback to English for unsupported languages', async () => {
 65 |       const unsupportedProfanity = new ProfanityEngine({
 66 |         language: 'nonexistent-language',
 67 |         testMode: true,
 68 |       });
 69 |       
 70 |       const terms = await unsupportedProfanity.all();
 71 |       expect(terms.length).toEqual(958); // Should load English words
 72 |     });
 73 | 
 74 |     it('Should work with supported languages', async () => {
 75 |       const spanishProfanity = new ProfanityEngine({
 76 |         language: 'es',
 77 |         testMode: true,
 78 |       });
 79 |       
 80 |       const terms = await spanishProfanity.all();
 81 |       expect(terms.length).toBeGreaterThan(0);
 82 |       // Should be different from English if Spanish file exists
 83 |     });
 84 |   });
 85 | 
 86 |   describe('Input validation and edge cases', () => {
 87 |     it('Should handle empty strings gracefully', async () => {
 88 |       expect(await profanity.search('')).toBe(false);
 89 |       expect(await profanity.hasCurseWords('')).toBe(false);
 90 |       expect(await profanity.getCurseWords('')).toEqual([]);
 91 |     });
 92 | 
 93 |     it('Should handle null/undefined inputs gracefully', async () => {
 94 |       expect(await profanity.search(null)).toBe(false);
 95 |       expect(await profanity.search(undefined)).toBe(false);
 96 |       expect(await profanity.hasCurseWords(null)).toBe(false);
 97 |       expect(await profanity.hasCurseWords(undefined)).toBe(false);
 98 |       expect(await profanity.getCurseWords(null)).toEqual([]);
 99 |       expect(await profanity.getCurseWords(undefined)).toEqual([]);
100 |     });
101 | 
102 |     it('Should handle non-string inputs gracefully', async () => {
103 |       expect(await profanity.search(123)).toBe(false);
104 |       expect(await profanity.search({})).toBe(false);
105 |       expect(await profanity.search([])).toBe(false);
106 |       expect(await profanity.hasCurseWords(123)).toBe(false);
107 |       expect(await profanity.getCurseWords(123)).toEqual([]);
108 |     });
109 | 
110 |     it('Should handle punctuation correctly', async () => {
111 |       const sentence = 'What the hell! Damn, that sucks.';
112 |       const result = await profanity.hasCurseWords(sentence);
113 |       expect(result).toBe(true);
114 |       
115 |       const foundWords = await profanity.getCurseWords(sentence);
116 |       expect(foundWords).toContain('hell');
117 |       expect(foundWords).toContain('damn');
118 |     });
119 | 
120 |     it('Should return unique words only', async () => {
121 |       const sentence = 'hell hell damn damn hell';
122 |       const badWords = await profanity.getCurseWords(sentence);
123 |       expect(badWords).toHaveLength(2);
124 |       expect(badWords).toEqual(expect.arrayContaining(['hell', 'damn']));
125 |     });
126 | 
127 |     it('Should be case insensitive', async () => {
128 |       expect(await profanity.search('HELL')).toBe(true);
129 |       expect(await profanity.search('Hell')).toBe(true);
130 |       expect(await profanity.search('hell')).toBe(true);
131 |       
132 |       const sentence = 'This has HELL and Damn in it';
133 |       expect(await profanity.hasCurseWords(sentence)).toBe(true);
134 |     });
135 | 
136 |     it('Should handle whitespace properly', async () => {
137 |       expect(await profanity.search('  hell  ')).toBe(true);
138 |       expect(await profanity.search('\thell\n')).toBe(true);
139 |     });
140 | 
141 |     it('Should handle various punctuation marks', async () => {
142 |       const testSentences = [
143 |         'What the hell?',
144 |         'Damn!',
145 |         'Hell, no!',
146 |         'Oh-hell-no',
147 |         'hell.',
148 |         'hell,',
149 |         'hell;',
150 |         'hell:',
151 |         '(hell)',
152 |         '[hell]',
153 |         '{hell}',
154 |         '"hell"',
155 |         "'hell'",
156 |       ];
157 |       
158 |       for (const sentence of testSentences) {
159 |         expect(await profanity.hasCurseWords(sentence)).toBe(true);
160 |       }
161 |     });
162 |   });
163 | 
164 |   describe('Data integrity and immutability', () => {
165 |     it('Should not modify original terms array', async () => {
166 |       const terms1 = await profanity.all();
167 |       const terms2 = await profanity.all();
168 |       
169 |       terms1.push('test-word');
170 |       expect(terms2).not.toContain('test-word');
171 |       expect(terms1.length).not.toEqual(terms2.length);
172 |     });
173 | 
174 |     it('Should return consistent results across multiple calls', async () => {
175 |       const sentence = 'This sentence has hell and damn';
176 |       
177 |       const result1 = await profanity.getCurseWords(sentence);
178 |       const result2 = await profanity.getCurseWords(sentence);
179 |       const result3 = await profanity.hasCurseWords(sentence);
180 |       
181 |       expect(result1).toEqual(result2);
182 |       expect(result3).toBe(true);
183 |     });
184 | 
185 |     it('Should maintain state after reset', async () => {
186 |       // Use the profanity engine
187 |       await profanity.search('hell');
188 |       expect(profanity.isInitialized).toBe(true);
189 |       
190 |       // Reset it
191 |       profanity.reset();
192 |       expect(profanity.isInitialized).toBe(false);
193 |       
194 |       // Should work again after reset
195 |       expect(await profanity.search('hell')).toBe(true);
196 |       expect(profanity.isInitialized).toBe(true);
197 |     });
198 |   });
199 | 
200 |   describe('Performance and concurrency', () => {
201 |     it('Should handle concurrent operations', async () => {
202 |       const promises = [
203 |         profanity.search('hell'),
204 |         profanity.hasCurseWords('this is hell'),
205 |         profanity.getCurseWords('damn hell'),
206 |         profanity.all(),
207 |         profanity.search('damn')
208 |       ];
209 |       
210 |       const results = await Promise.all(promises);
211 |       expect(results[0]).toBe(true); // search hell
212 |       expect(results[1]).toBe(true); // hasCurseWords
213 |       expect(results[2]).toContain('hell'); // getCurseWords
214 |       expect(results[3].length).toBeGreaterThan(0); // all
215 |       expect(results[4]).toBe(true); // search damn
216 |     });
217 | 
218 |     it('Should handle large text efficiently', async () => {
219 |       const largeText = 'This is a test sentence. '.repeat(1000) + 'hell ' + 'Clean text. '.repeat(1000);
220 |       
221 |       const startTime = Date.now();
222 |       const result = await profanity.hasCurseWords(largeText);
223 |       const endTime = Date.now();
224 |       
225 |       expect(result).toBe(true);
226 |       expect(endTime - startTime).toBeLessThan(100); // Should complete quickly
227 |     });
228 | 
229 |     it('Should initialize only once even with multiple method calls', async () => {
230 |       const newProfanity = new ProfanityEngine({
231 |         language: 'en',
232 |         testMode: true,
233 |       });
234 |       
235 |       // Multiple calls should not re-initialize
236 |       await newProfanity.search('test');
237 |       await newProfanity.hasCurseWords('test');
238 |       await newProfanity.all();
239 |       
240 |       expect(newProfanity.isInitialized).toBe(true);
241 |     });
242 |   });
243 | 
244 |   describe('Configuration options', () => {
245 |     it('Should use default configuration when no config provided', () => {
246 |       const defaultProfanity = new ProfanityEngine();
247 |       expect(defaultProfanity.language).toBe('en');
248 |       expect(defaultProfanity.isTestMode).toBe(false);
249 |     });
250 | 
251 |     it('Should handle partial configuration objects', () => {
252 |       const partialProfanity = new ProfanityEngine({ language: 'es' });
253 |       expect(partialProfanity.language).toBe('es');
254 |       expect(partialProfanity.isTestMode).toBe(false);
255 |     });
256 | 
257 |     it('Should respect testMode setting', async () => {
258 |       // Store original console.warn
259 |       const originalWarn = console.warn;
260 |       let warnCalled = false;
261 |       
262 |       // Mock console.warn
263 |       console.warn = () => {
264 |         warnCalled = true;
265 |       };
266 |       
267 |       // Test mode should suppress warnings
268 |       const testProfanity = new ProfanityEngine({
269 |         language: 'nonexistent-language',
270 |         testMode: true,
271 |       });
272 |       
273 |       warnCalled = false;
274 |       await testProfanity.all();
275 |       expect(warnCalled).toBe(false);
276 |       
277 |       // Production mode should show warnings
278 |       const prodProfanity = new ProfanityEngine({
279 |         language: 'nonexistent-language',
280 |         testMode: false,
281 |       });
282 |       
283 |       warnCalled = false;
284 |       await prodProfanity.all();
285 |       expect(warnCalled).toBe(true);
286 |       
287 |       // Restore original console.warn
288 |       console.warn = originalWarn;
289 |     });
290 |   });
291 | });


--------------------------------------------------------------------------------
/__tests__/spanish.test.js:
--------------------------------------------------------------------------------
  1 | import { ProfanityEngine } from '../index.js';
  2 | 
  3 | const language = process.env.LANGUAGE || 'en'; // Default to 'en' if the LANGUAGE environment variable is not set
  4 | let profanity;
  5 | 
  6 | describe('Spanish Profanity tests', () => {
  7 |   beforeAll(async () => {
  8 |     profanity = new ProfanityEngine({
  9 |       language: 'es',
 10 |       testMode: true,
 11 |     });
 12 |   });
 13 | 
 14 |   afterEach(() => {
 15 |     profanity.reset();
 16 |   });
 17 | 
 18 |   describe('Core Spanish functionality', () => {
 19 |     it('Should get all the profanity words in an array', async () => {
 20 |       const allWords = await profanity.all();
 21 |       expect(allWords.length).toEqual(564); // Verify this matches your Spanish word count
 22 |       expect(Array.isArray(allWords)).toBe(true);
 23 |       expect(allWords.length).toBeGreaterThan(0);
 24 |     });
 25 | 
 26 |     it('Should return true for profanity words', async () => {
 27 |       const searchWord = await profanity.search('labios');
 28 |       expect(searchWord).toEqual(true);
 29 |     });
 30 | 
 31 |     it('Should return false for normal words', async () => {
 32 |       const searchWord = await profanity.search('ka');
 33 |       expect(searchWord).toEqual(false);
 34 |     });
 35 | 
 36 |     it('Should return false for any empty string', async () => {
 37 |       const searchWord = await profanity.search('');
 38 |       expect(searchWord).toEqual(false);
 39 |     });
 40 | 
 41 |     it('Should return true for a sentence containing a profanity word', async () => {
 42 |       const sentence = 'No deberías decir malas culo palabras como mierda.';
 43 |       const hasCurseWords = await profanity.hasCurseWords(sentence);
 44 |       expect(hasCurseWords).toEqual(true);
 45 |     });
 46 | 
 47 |     it('Should return false for a sentence with no profanity word', async () => {
 48 |       const sentence = 'Esta es una oración limpia y educada.';
 49 |       const hasCurseWords = await profanity.hasCurseWords(sentence);
 50 |       expect(hasCurseWords).toEqual(false);
 51 |     });
 52 |   });
 53 | 
 54 |   describe('Spanish-specific edge cases', () => {
 55 |     it('Should handle Spanish accented characters', async () => {
 56 |       // Test words with tildes and accents (if they exist in your word list)
 57 |       const accentedSentence = 'No uses palabras como cabrón o pendejó.';
 58 |       const result = await profanity.hasCurseWords(accentedSentence);
 59 |       // This will depend on whether your Spanish word list includes accented versions
 60 |       expect(typeof result).toBe('boolean');
 61 |     });
 62 | 
 63 |     it('Should handle case sensitivity correctly in Spanish', async () => {
 64 |       expect(await profanity.search('MIERDA')).toBe(true);
 65 |       expect(await profanity.search('Mierda')).toBe(true);
 66 |       expect(await profanity.search('mierda')).toBe(true);
 67 |     });
 68 | 
 69 |     it('Should handle whitespace around Spanish words', async () => {
 70 |       expect(await profanity.search('  mierda  ')).toBe(true);
 71 |       expect(await profanity.search('\tmierda\n')).toBe(true);
 72 |     });
 73 | 
 74 |     it('Should detect Spanish profanity with punctuation', async () => {
 75 |       const testSentences = [
 76 |         '¡Qué mierda!',
 77 |         'Oh, mierda.',
 78 |         '¿Mierda? ¿En serio?',
 79 |         'Esto es una mierda, hombre.',
 80 |         '"Mierda," dijo él.',
 81 |         'Una mierda total.',
 82 |       ];
 83 | 
 84 |       for (const sentence of testSentences) {
 85 |         expect(await profanity.hasCurseWords(sentence)).toBe(true);
 86 |       }
 87 |     });
 88 | 
 89 |     it('Should return correct Spanish curse words from sentences', async () => {
 90 |       const sentence = 'No deberías decir malas culo palabras como mierda.';
 91 |       const foundWords = await profanity.getCurseWords(sentence);
 92 |       
 93 |       // Should find both curse words
 94 |       expect(foundWords).toContain('culo');
 95 |       expect(foundWords).toContain('mierda');
 96 |       expect(foundWords.length).toBe(2);
 97 |     });
 98 | 
 99 |     it('Should handle multiple instances of same Spanish word', async () => {
100 |       const sentence = 'mierda mierda mierda por todas partes';
101 |       const foundWords = await profanity.getCurseWords(sentence);
102 |       
103 |       // Should only return unique words
104 |       expect(foundWords).toContain('mierda');
105 |       expect(foundWords.length).toBe(1);
106 |     });
107 | 
108 |     it('Should validate specific Spanish profanity words exist', async () => {
109 |       // Test common Spanish profanity words (adjust based on your actual word list)
110 |       const commonSpanishProfanity = [
111 |         'mierda', 'culo', 'cabron', 'puta', 'joder'
112 |       ];
113 | 
114 |       // Note: Only test words that actually exist in your Spanish word list
115 |       for (const word of commonSpanishProfanity) {
116 |         const result = await profanity.search(word);
117 |         // We can't assert true/false without knowing your exact word list
118 |         expect(typeof result).toBe('boolean');
119 |       }
120 |     });
121 | 
122 |     it('Should not flag common Spanish words', async () => {
123 |       const commonSpanishWords = [
124 |         'hola', 'mundo', 'computadora', 'prueba', 'función', 'javascript',
125 |         'español', 'idioma', 'oración', 'palabra', 'limpio', 'educado',
126 |         'casa', 'perro', 'gato', 'agua', 'comida', 'amor'
127 |       ];
128 | 
129 |       for (const word of commonSpanishWords) {
130 |         expect(await profanity.search(word)).toBe(false);
131 |       }
132 |     });
133 | 
134 |     it('Should handle Spanish contractions and apostrophes', async () => {
135 |       // Spanish doesn't use contractions like English, but test similar constructs
136 |       const sentence = 'No digas mierda, no es apropiado.';
137 |       expect(await profanity.hasCurseWords(sentence)).toBe(true);
138 |     });
139 | 
140 |     it('Should handle Spanish inverted punctuation', async () => {
141 |       const sentence = '¡No digas mierda!';
142 |       expect(await profanity.hasCurseWords(sentence)).toBe(true);
143 |       
144 |       const sentence2 = '¿Por qué dices mierda?';
145 |       expect(await profanity.hasCurseWords(sentence2)).toBe(true);
146 |     });
147 | 
148 |     it('Should handle Spanish special characters', async () => {
149 |       const sentence = 'La niña dijo una mierda.';
150 |       expect(await profanity.hasCurseWords(sentence)).toBe(true);
151 |     });
152 | 
153 |     // Additional Spanish-specific tests
154 |     it('Should handle mixed case in Spanish sentences', async () => {
155 |       const sentence = 'Esta ORACIÓN tiene MIERDA y culo EN ella';
156 |       expect(await profanity.hasCurseWords(sentence)).toBe(true);
157 |       
158 |       const foundWords = await profanity.getCurseWords(sentence);
159 |       expect(foundWords).toContain('mierda'); // Should normalize to lowercase
160 |       expect(foundWords).toContain('culo');
161 |     });
162 | 
163 |     it('Should handle Spanish words at sentence boundaries', async () => {
164 |       expect(await profanity.hasCurseWords('mierda')).toBe(true);
165 |       expect(await profanity.hasCurseWords('mierda es malo')).toBe(true);
166 |       expect(await profanity.hasCurseWords('eso es mierda')).toBe(true);
167 |       expect(await profanity.hasCurseWords('la palabra mierda')).toBe(true);
168 |     });
169 | 
170 |     it('Should handle Spanish gender variations correctly', async () => {
171 |       // Test if your list includes both masculine and feminine forms
172 |       const sentence = 'Él es un idiota y ella es una idiota.';
173 |       const result = await profanity.hasCurseWords(sentence);
174 |       expect(typeof result).toBe('boolean');
175 |     });
176 | 
177 |     it('Should handle Spanish diminutives and variations', async () => {
178 |       // Test common Spanish word variations if they exist in your list
179 |       const variations = [
180 |         'No seas pendejo',
181 |         'Qué pendejada',
182 |         'Está cabronísimo'
183 |       ];
184 |       
185 |       for (const sentence of variations) {
186 |         const result = await profanity.hasCurseWords(sentence);
187 |         expect(typeof result).toBe('boolean');
188 |       }
189 |     });
190 |   });
191 | 
192 |   describe('Performance tests for Spanish dataset', () => {
193 |     it('Should handle large Spanish text efficiently', async () => {
194 |       const largeText = 'Esta es una oración de prueba. '.repeat(1000) + 'mierda ' + 'Texto limpio. '.repeat(1000);
195 |       
196 |       const startTime = Date.now();
197 |       const result = await profanity.hasCurseWords(largeText);
198 |       const endTime = Date.now();
199 |       
200 |       expect(result).toBe(true);
201 |       expect(endTime - startTime).toBeLessThan(100); // Should complete in under 100ms
202 |     });
203 | 
204 |     it('Should efficiently search through all Spanish terms', async () => {
205 |       const allWords = await profanity.all();
206 |       
207 |       const startTime = Date.now();
208 |       for (let i = 0; i < 100; i++) {
209 |         await profanity.search(allWords[i % allWords.length]);
210 |       }
211 |       const endTime = Date.now();
212 |       
213 |       expect(endTime - startTime).toBeLessThan(50); // Should be very fast with Set lookup
214 |     });
215 | 
216 |     it('Should handle concurrent operations on Spanish dataset', async () => {
217 |       const promises = [
218 |         profanity.search('mierda'),
219 |         profanity.hasCurseWords('esto es mierda'),
220 |         profanity.getCurseWords('culo mierda'),
221 |         profanity.all(),
222 |         profanity.search('labios')
223 |       ];
224 |       
225 |       const results = await Promise.all(promises);
226 |       expect(results[0]).toBe(true); // search mierda
227 |       expect(results[1]).toBe(true); // hasCurseWords
228 |       expect(results[2]).toContain('mierda'); // getCurseWords
229 |       expect(results[3].length).toBe(564); // all Spanish words
230 |       expect(results[4]).toBe(true); // search labios
231 |     });
232 |   });
233 | 
234 |   describe('Spanish language specificity', () => {
235 |     it('Should load Spanish words correctly without falling back to English', async () => {
236 |       const allWords = await profanity.all();
237 |       expect(allWords.length).toBe(564); // Should match Spanish count, not English count (958)
238 |     });
239 | 
240 |     it('Should detect Spanish-specific profanity that might not exist in English', async () => {
241 |       // Test a word that's likely Spanish-specific
242 |       const result = await profanity.search('labios');
243 |       expect(result).toBe(true); // Based on your original test
244 |     });
245 | 
246 |     it('Should handle regional Spanish variations', async () => {
247 |       // Test words that might be offensive in some Spanish-speaking regions
248 |       const regionalWords = [
249 |         'pinche', // Mexican
250 |         'boludo', // Argentinian
251 |         'coño',   // Spanish
252 |         'chingón' // Mexican
253 |       ];
254 |       
255 |       for (const word of regionalWords) {
256 |         const result = await profanity.search(word);
257 |         // Just verify it returns a boolean - depends on your word list
258 |         expect(typeof result).toBe('boolean');
259 |       }
260 |     });
261 |   });
262 | 
263 |   describe('Data integrity for Spanish', () => {
264 |     it('Should not allow modification of Spanish word list', async () => {
265 |       const terms1 = await profanity.all();
266 |       const originalLength = terms1.length;
267 |       
268 |       // Try to modify the returned array
269 |       terms1.push('palabra-falsa');
270 |       terms1.pop();
271 |       terms1[0] = 'modificado';
272 |       
273 |       // Get terms again - should be unchanged
274 |       const terms2 = await profanity.all();
275 |       expect(terms2.length).toBe(originalLength);
276 |       expect(terms2).not.toContain('palabra-falsa');
277 |       expect(terms2[0]).not.toBe('modificado');
278 |     });
279 | 
280 |     it('Should provide consistent results for Spanish detection', async () => {
281 |       const sentence = 'Esta oración tiene mierda y culo';
282 |       
283 |       const result1 = await profanity.getCurseWords(sentence);
284 |       const result2 = await profanity.getCurseWords(sentence);
285 |       const result3 = await profanity.hasCurseWords(sentence);
286 |       
287 |       expect(result1).toEqual(result2);
288 |       expect(result3).toBe(true);
289 |     });
290 |   });
291 | });


--------------------------------------------------------------------------------
/data/en.txt:
--------------------------------------------------------------------------------
  1 | 2 girls 1 cup
  2 | 2g1c
  3 | 4r5e
  4 | 5h1t
  5 | 5hit
  6 | a55
  7 | a_s_s
  8 | acrotomophilia
  9 | alabama hot pocket
 10 | alaskan pipeline
 11 | anal
 12 | anilingus
 13 | anus
 14 | apeshit
 15 | ar5e
 16 | arrse
 17 | arse
 18 | arsehole
 19 | ass
 20 | ass-fucker
 21 | ass-hat
 22 | ass-pirate
 23 | assbag
 24 | assbandit
 25 | assbanger
 26 | assbite
 27 | assclown
 28 | asscock
 29 | asscracker
 30 | asses
 31 | assface
 32 | assfucker
 33 | assfukka
 34 | assgoblin
 35 | asshat
 36 | asshead
 37 | asshole
 38 | assholes
 39 | asshopper
 40 | assjacker
 41 | asslick
 42 | asslicker
 43 | assmonkey
 44 | assmunch
 45 | assmuncher
 46 | asspirate
 47 | assshole
 48 | asssucker
 49 | asswad
 50 | asswhole
 51 | asswipe
 52 | auto erotic
 53 | autoerotic
 54 | b!tch
 55 | b00bs
 56 | b17ch
 57 | b1tch
 58 | babeland
 59 | baby batter
 60 | baby juice
 61 | ball gag
 62 | ball gravy
 63 | ball kicking
 64 | ball licking
 65 | ball sack
 66 | ball sucking
 67 | ballbag
 68 | balls
 69 | ballsack
 70 | bampot
 71 | bangbros
 72 | bareback
 73 | barely legal
 74 | barenaked
 75 | bastard
 76 | bastardo
 77 | bastinado
 78 | bbw
 79 | bdsm
 80 | beaner
 81 | beaners
 82 | beastial
 83 | beastiality
 84 | beastility
 85 | beaver cleaver
 86 | beaver lips
 87 | bellend
 88 | bestial
 89 | bestiality
 90 | bi+ch
 91 | biatch
 92 | big black
 93 | big breasts
 94 | big knockers
 95 | big tits
 96 | bimbos
 97 | birdlock
 98 | bitch
 99 | bitcher
100 | bitchers
101 | bitches
102 | bitchin
103 | bitching
104 | black cock
105 | blonde action
106 | blonde on blonde action
107 | bloody
108 | blow job
109 | blow your load
110 | blowjob
111 | blowjobs
112 | blue waffle
113 | blumpkin
114 | boiolas
115 | bollock
116 | bollocks
117 | bollok
118 | bollox
119 | bondage
120 | boner
121 | boob
122 | boobie
123 | boobs
124 | booobs
125 | boooobs
126 | booooobs
127 | booooooobs
128 | booty call
129 | breasts
130 | brown showers
131 | brunette action
132 | buceta
133 | bugger
134 | bukkake
135 | bulldyke
136 | bullet vibe
137 | bullshit
138 | bum
139 | bung hole
140 | bunghole
141 | bunny fucker
142 | busty
143 | butt
144 | butt-pirate
145 | buttcheeks
146 | butthole
147 | buttmunch
148 | buttplug
149 | c0ck
150 | c0cksucker
151 | camel toe
152 | camgirl
153 | camslut
154 | camwhore
155 | carpet muncher
156 | carpetmuncher
157 | cawk
158 | chinc
159 | chink
160 | choad
161 | chocolate rosebuds
162 | chode
163 | cipa
164 | circlejerk
165 | cl1t
166 | cleveland steamer
167 | clit
168 | clitface
169 | clitoris
170 | clits
171 | clover clamps
172 | clusterfuck
173 | cnut
174 | cock
175 | cock-sucker
176 | cockbite
177 | cockburger
178 | cockface
179 | cockhead
180 | cockjockey
181 | cockknoker
182 | cockmaster
183 | cockmongler
184 | cockmongruel
185 | cockmonkey
186 | cockmunch
187 | cockmuncher
188 | cocknose
189 | cocknugget
190 | cocks
191 | cockshit
192 | cocksmith
193 | cocksmoker
194 | cocksuck
195 | cocksuck 
196 | cocksucked
197 | cocksucked 
198 | cocksucker
199 | cocksucking
200 | cocksucks 
201 | cocksuka
202 | cocksukka
203 | cok
204 | cokmuncher
205 | coksucka
206 | coochie
207 | coochy
208 | coon
209 | coons
210 | cooter
211 | coprolagnia
212 | coprophilia
213 | cornhole
214 | cox
215 | crap
216 | creampie
217 | cum
218 | cumbubble
219 | cumdumpster
220 | cumguzzler
221 | cumjockey
222 | cummer
223 | cumming
224 | cums
225 | cumshot
226 | cumslut
227 | cumtart
228 | cunilingus
229 | cunillingus
230 | cunnie
231 | cunnilingus
232 | cunt
233 | cuntface
234 | cunthole
235 | cuntlick
236 | cuntlick 
237 | cuntlicker
238 | cuntlicker 
239 | cuntlicking
240 | cuntlicking 
241 | cuntrag
242 | cunts
243 | cyalis
244 | cyberfuc
245 | cyberfuck 
246 | cyberfucked 
247 | cyberfucker
248 | cyberfuckers
249 | cyberfucking 
250 | d1ck
251 | dammit
252 | damn
253 | darkie
254 | date rape
255 | daterape
256 | deep throat
257 | deepthroat
258 | dendrophilia
259 | dick
260 | dickbag
261 | dickbeater
262 | dickface
263 | dickhead
264 | dickhole
265 | dickjuice
266 | dickmilk
267 | dickmonger
268 | dickslap
269 | dicksucker
270 | dickwad
271 | dickweasel
272 | dickweed
273 | dickwod
274 | dike
275 | dildo
276 | dildos
277 | dingleberries
278 | dingleberry
279 | dink
280 | dinks
281 | dipshit
282 | dirsa
283 | dirty pillows
284 | dirty sanchez
285 | dlck
286 | dog style
287 | dog-fucker
288 | doggie style
289 | doggiestyle
290 | doggin
291 | dogging
292 | doggy style
293 | doggystyle
294 | dolcett
295 | domination
296 | dominatrix
297 | dommes
298 | donkey punch
299 | donkeyribber
300 | doochbag
301 | dookie
302 | doosh
303 | double dong
304 | double penetration
305 | douche
306 | douchebag
307 | dp action
308 | dry hump
309 | duche
310 | dumb
311 | dumbshit
312 | dumshit
313 | dvda
314 | dyke
315 | eat my ass
316 | ecchi
317 | ejaculate
318 | ejaculated
319 | ejaculates 
320 | ejaculating 
321 | ejaculatings
322 | ejaculation
323 | ejakulate
324 | erotic
325 | erotism
326 | escort
327 | eunuch
328 | f u c k
329 | f u c k e r
330 | f4nny
331 | f_u_c_k
332 | fag
333 | fagbag
334 | fagg
335 | fagging
336 | faggit
337 | faggitt
338 | faggot
339 | faggs
340 | fagot
341 | fagots
342 | fags
343 | fagtard
344 | fanny
345 | fannyflaps
346 | fannyfucker
347 | fanyy
348 | fart
349 | farted
350 | farting
351 | farty
352 | fatass
353 | fcuk
354 | fcuker
355 | fcuking
356 | fecal
357 | feck
358 | fecker
359 | felatio
360 | felch
361 | felching
362 | fellate
363 | fellatio
364 | feltch
365 | female squirting
366 | femdom
367 | figging
368 | fingerbang
369 | fingerfuck 
370 | fingerfucked 
371 | fingerfucker 
372 | fingerfuckers
373 | fingerfucking 
374 | fingerfucks 
375 | fingering
376 | fistfuck
377 | fistfucked 
378 | fistfucker 
379 | fistfuckers 
380 | fistfucking 
381 | fistfuckings 
382 | fistfucks 
383 | fisting
384 | flamer
385 | flange
386 | fook
387 | fooker
388 | fool
389 | foot fetish
390 | footjob
391 | frotting
392 | fuck
393 | fuck buttons
394 | fucka
395 | fucked
396 | fucker
397 | fuckers
398 | fuckhead
399 | fuckheads
400 | fuckin
401 | fucking
402 | fuckings
403 | fuckingshitmotherfucker
404 | fuckme 
405 | fucks
406 | fucktards
407 | fuckwhit
408 | fuckwit
409 | fudge packer
410 | fudgepacker
411 | fuk
412 | fuker
413 | fukker
414 | fukkin
415 | fuks
416 | fukwhit
417 | fukwit
418 | futanari
419 | fux
420 | fux0r
421 | g-spot
422 | gang bang
423 | gangbang
424 | gangbanged
425 | gangbanged 
426 | gangbangs 
427 | gay sex
428 | gayass
429 | gaybob
430 | gaydo
431 | gaylord
432 | gaysex
433 | gaytard
434 | gaywad
435 | genitals
436 | giant cock
437 | girl on
438 | girl on top
439 | girls gone wild
440 | goatcx
441 | goatse
442 | god damn
443 | god-dam
444 | god-damned
445 | goddamn
446 | goddamned
447 | gokkun
448 | golden shower
449 | goo girl
450 | gooch
451 | goodpoop
452 | gook
453 | goregasm
454 | gringo
455 | grope
456 | group sex
457 | guido
458 | guro
459 | hand job
460 | handjob
461 | hard core
462 | hardcore
463 | hardcoresex 
464 | heeb
465 | hell
466 | hentai
467 | heshe
468 | ho
469 | hoar
470 | hoare
471 | hoe
472 | hoer
473 | homo
474 | homoerotic
475 | honkey
476 | honky
477 | hooker
478 | hore
479 | horniest
480 | horny
481 | hot carl
482 | hot chick
483 | hotsex
484 | how to kill
485 | how to murder
486 | huge fat
487 | humping
488 | incest
489 | intercourse
490 | jack off
491 | jack-off 
492 | jackass
493 | jackoff
494 | jail bait
495 | jailbait
496 | jap
497 | jelly donut
498 | jerk
499 | jerk off
500 | jerk-off 
501 | jigaboo
502 | jiggaboo
503 | jiggerboo
504 | jism
505 | jiz
506 | jiz 
507 | jizm
508 | jizm 
509 | jizz
510 | juggs
511 | kawk
512 | kike
513 | kinbaku
514 | kinkster
515 | kinky
516 | kiunt
517 | knob
518 | knobbing
519 | knobead
520 | knobed
521 | knobend
522 | knobhead
523 | knobjocky
524 | knobjokey
525 | kock
526 | kondum
527 | kondums
528 | kooch
529 | kootch
530 | kum
531 | kumer
532 | kummer
533 | kumming
534 | kums
535 | kunilingus
536 | kunt
537 | kyke
538 | l3i+ch
539 | l3itch
540 | labia
541 | leather restraint
542 | leather straight jacket
543 | lemon party
544 | lesbo
545 | lezzie
546 | lmfao
547 | lolita
548 | lovemaking
549 | lust
550 | lusting
551 | m0f0
552 | m0fo
553 | m45terbate
554 | ma5terb8
555 | ma5terbate
556 | make me come
557 | male squirting
558 | masochist
559 | master-bate
560 | masterb8
561 | masterbat*
562 | masterbat3
563 | masterbate
564 | masterbation
565 | masterbations
566 | masturbate
567 | menage a trois
568 | milf
569 | minge
570 | missionary position
571 | mo-fo
572 | mof0
573 | mofo
574 | mothafuck
575 | mothafucka
576 | mothafuckas
577 | mothafuckaz
578 | mothafucked 
579 | mothafucker
580 | mothafuckers
581 | mothafuckin
582 | mothafucking 
583 | mothafuckings
584 | mothafucks
585 | mother fucker
586 | motherfuck
587 | motherfucked
588 | motherfucker
589 | motherfuckers
590 | motherfuckin
591 | motherfucking
592 | motherfuckings
593 | motherfuckka
594 | motherfucks
595 | mound of venus
596 | mr hands
597 | muff
598 | muff diver
599 | muffdiver
600 | muffdiving
601 | mutha
602 | muthafecker
603 | muthafuckker
604 | muther
605 | mutherfucker
606 | n1gga
607 | n1gger
608 | nambla
609 | nawashi
610 | nazi
611 | negro
612 | neonazi
613 | nig nog
614 | nigg3r
615 | nigg4h
616 | nigga
617 | niggah
618 | niggas
619 | niggaz
620 | nigger
621 | niggers 
622 | niglet
623 | nimphomania
624 | nipple
625 | nipples
626 | nob
627 | nob jokey
628 | nobhead
629 | nobjocky
630 | nobjokey
631 | nsfw images
632 | nude
633 | nudity
634 | numbnuts
635 | nutsack
636 | nympho
637 | nymphomania
638 | octopussy
639 | omorashi
640 | one cup two girls
641 | one guy one jar
642 | orgasim
643 | orgasim 
644 | orgasims 
645 | orgasm
646 | orgasms 
647 | orgy
648 | p0rn
649 | paedophile
650 | paki
651 | panooch
652 | panties
653 | panty
654 | pawn
655 | pecker
656 | peckerhead
657 | pedobear
658 | pedophile
659 | pegging
660 | penis
661 | penisfucker
662 | phone sex
663 | phonesex
664 | phuck
665 | phuk
666 | phuked
667 | phuking
668 | phukked
669 | phukking
670 | phuks
671 | phuq
672 | piece of shit
673 | pigfucker
674 | pimpis
675 | pis
676 | pises
677 | pisin
678 | pising
679 | pisof
680 | piss
681 | piss pig
682 | pissed
683 | pisser
684 | pissers
685 | pisses 
686 | pissflap
687 | pissflaps
688 | pissin
689 | pissin 
690 | pissing
691 | pissoff
692 | pissoff 
693 | pisspig
694 | playboy
695 | pleasure chest
696 | pole smoker
697 | polesmoker
698 | pollock
699 | ponyplay
700 | poo
701 | poof
702 | poon
703 | poonani
704 | poonany
705 | poontang
706 | poop
707 | poop chute
708 | poopchute
709 | porn
710 | porno
711 | pornography
712 | pornos
713 | prick
714 | pricks 
715 | prince albert piercing
716 | pron
717 | pthc
718 | pube
719 | pubes
720 | punanny
721 | punany
722 | punta
723 | pusies
724 | pusse
725 | pussi
726 | pussies
727 | pussy
728 | pussylicking
729 | pussys 
730 | pusy
731 | puto
732 | queaf
733 | queef
734 | queerbait
735 | queerhole
736 | quim
737 | raghead
738 | raging boner
739 | rape
740 | raping
741 | rapist
742 | rectum
743 | renob
744 | retard
745 | reverse cowgirl
746 | rimjaw
747 | rimjob
748 | rimming
749 | rosy palm
750 | rosy palm and her 5 sisters
751 | ruski
752 | rusty trombone
753 | s hit
754 | s&m
755 | s.o.b.
756 | s_h_i_t
757 | sadism
758 | sadist
759 | santorum
760 | scat
761 | schlong
762 | scissoring
763 | screwing
764 | scroat
765 | scrote
766 | scrotum
767 | semen
768 | sex
769 | sexo
770 | sexy
771 | sh!+
772 | sh!t
773 | sh1t
774 | shag
775 | shagger
776 | shaggin
777 | shagging
778 | shaved beaver
779 | shaved pussy
780 | shemale
781 | shi+
782 | shibari
783 | shit
784 | shit-ass
785 | shit-bag
786 | shit-bagger
787 | shit-brain
788 | shit-breath
789 | shit-cunt
790 | shit-dick
791 | shit-eating
792 | shit-face
793 | shit-faced
794 | shit-fit
795 | shit-head
796 | shit-heel
797 | shit-hole
798 | shit-house
799 | shit-load
800 | shit-pot
801 | shit-spitter
802 | shit-stain
803 | shitass
804 | shitbag
805 | shitbagger
806 | shitblimp
807 | shitbrain
808 | shitbreath
809 | shitcunt
810 | shitdick
811 | shite
812 | shiteating
813 | shited
814 | shitey
815 | shitface
816 | shitfaced
817 | shitfit
818 | shitfuck
819 | shitfull
820 | shithead
821 | shitheel
822 | shithole
823 | shithouse
824 | shiting
825 | shitings
826 | shitload
827 | shitpot
828 | shits
829 | shitspitter
830 | shitstain
831 | shitted
832 | shitter
833 | shitters 
834 | shittiest
835 | shitting
836 | shittings
837 | shitty
838 | shitty 
839 | shity
840 | shiz
841 | shiznit
842 | shota
843 | shrimping
844 | skank
845 | skeet
846 | slanteye
847 | slut
848 | slutbag
849 | sluts
850 | smeg
851 | smegma
852 | smut
853 | snatch
854 | snowballing
855 | sodomize
856 | sodomy
857 | son-of-a-bitch
858 | spac
859 | spic
860 | spick
861 | splooge
862 | splooge moose
863 | spooge
864 | spread legs
865 | spunk
866 | strap on
867 | strapon
868 | strappado
869 | strip club
870 | style doggy
871 | suck
872 | sucker
873 | sucks
874 | suicide girls
875 | sultry women
876 | swastika
877 | swinger
878 | t1tt1e5
879 | t1tties
880 | tainted love
881 | tard
882 | taste my
883 | tea bagging
884 | teets
885 | teez
886 | testical
887 | testicle
888 | threesome
889 | throating
890 | thundercunt
891 | tied up
892 | tight white
893 | tit
894 | titfuck
895 | tits
896 | titt
897 | tittie5
898 | tittiefucker
899 | titties
900 | titty
901 | tittyfuck
902 | tittywank
903 | titwank
904 | tongue in a
905 | topless
906 | tosser
907 | towelhead
908 | tranny
909 | tribadism
910 | tub girl
911 | tubgirl
912 | turd
913 | tushy
914 | tw4t
915 | twat
916 | twathead
917 | twatlips
918 | twatty
919 | twink
920 | twinkie
921 | two girls one cup
922 | twunt
923 | twunter
924 | undressing
925 | upskirt
926 | urethra play
927 | urophilia
928 | v14gra
929 | v1gra
930 | va-j-j
931 | vag
932 | vagina
933 | venus mound
934 | viagra
935 | vibrator
936 | violet wand
937 | vjayjay
938 | vorarephilia
939 | voyeur
940 | vulva
941 | w00se
942 | wang
943 | wank
944 | wanker
945 | wanky
946 | wet dream
947 | wetback
948 | white power
949 | whoar
950 | whore
951 | willies
952 | willy
953 | wrapping men
954 | wrinkled starfish
955 | xrated
956 | xx
957 | xxx
958 | yaoi
959 | yellow showers
960 | yiffy
961 | zoophilia
962 | 🖕
963 | 


--------------------------------------------------------------------------------
/__tests__/arabic.test.js:
--------------------------------------------------------------------------------
  1 | import { ProfanityEngine } from '../index.js';
  2 | 
  3 | const language = process.env.LANGUAGE || 'en'; // Default to 'en' if the LANGUAGE environment variable is not set
  4 | let profanity;
  5 | 
  6 | describe('Arabic Profanity tests', () => {
  7 |   beforeAll(async () => {
  8 |     profanity = new ProfanityEngine({
  9 |       language: 'ar',
 10 |       testMode: true,
 11 |     });
 12 |   });
 13 | 
 14 |   afterEach(() => {
 15 |     profanity.reset();
 16 |   });
 17 | 
 18 |   describe('Core Arabic functionality', () => {
 19 |     it('Should get all the profanity words in an array', async () => {
 20 |       const allWords = await profanity.all();
 21 |       expect(Array.isArray(allWords)).toBe(true);
 22 |       expect(allWords.length).toBeGreaterThan(0);
 23 |       // Update this number based on your actual Arabic word count
 24 |       // expect(allWords.length).toEqual(XXX);
 25 |     });
 26 | 
 27 |     it('Should return true for Arabic profanity words', async () => {
 28 |       // Test with a common Arabic profanity word (if it exists in your list)
 29 |       // Replace 'testword' with an actual word from your Arabic list
 30 |       const searchWord = await profanity.search('testword');
 31 |       expect(typeof searchWord).toBe('boolean');
 32 |     });
 33 | 
 34 |     it('Should return false for normal Arabic words', async () => {
 35 |       const normalWords = [
 36 |         'مرحبا', // Hello
 37 |         'شكرا',  // Thank you
 38 |         'بيت',   // House
 39 |         'كتاب',  // Book
 40 |         'ماء',   // Water
 41 |       ];
 42 | 
 43 |       for (const word of normalWords) {
 44 |         expect(await profanity.search(word)).toBe(false);
 45 |       }
 46 |     });
 47 | 
 48 |     it('Should return false for any empty string', async () => {
 49 |       const searchWord = await profanity.search('');
 50 |       expect(searchWord).toEqual(false);
 51 |     });
 52 | 
 53 |     it('Should return true for a sentence containing a profanity word', async () => {
 54 |       // Replace with actual Arabic sentence containing profanity from your list
 55 |       const sentence = 'هذه جملة تحتوي على كلمة سيئة.'; // "This sentence contains a bad word"
 56 |       const hasCurseWords = await profanity.hasCurseWords(sentence);
 57 |       expect(typeof hasCurseWords).toBe('boolean');
 58 |     });
 59 | 
 60 |     it('Should return false for a sentence with no profanity word', async () => {
 61 |       const sentence = 'هذه جملة نظيفة ومهذبة.'; // "This is a clean and polite sentence"
 62 |       const hasCurseWords = await profanity.hasCurseWords(sentence);
 63 |       expect(hasCurseWords).toEqual(false);
 64 |     });
 65 |   });
 66 | 
 67 |   describe('Arabic-specific edge cases', () => {
 68 |     it('Should handle Arabic diacritical marks (tashkeel)', async () => {
 69 |       // Test words with and without diacritical marks
 70 |       const testCases = [
 71 |         'كِتَابٌ', // Book with diacritics
 72 |         'كتاب',   // Book without diacritics
 73 |         'مَرْحَبًا', // Hello with diacritics
 74 |         'مرحبا',    // Hello without diacritics
 75 |       ];
 76 | 
 77 |       for (const word of testCases) {
 78 |         const result = await profanity.search(word);
 79 |         expect(typeof result).toBe('boolean');
 80 |       }
 81 |     });
 82 | 
 83 |     it('Should handle different Arabic letter forms', async () => {
 84 |       // Test initial, medial, final, and isolated forms
 85 |       const testCases = [
 86 |         'بيت',     // Isolated forms
 87 |         'البيت',   // With definite article
 88 |         'بيوت',    // Plural form
 89 |       ];
 90 | 
 91 |       for (const word of testCases) {
 92 |         const result = await profanity.search(word);
 93 |         expect(typeof result).toBe('boolean');
 94 |       }
 95 |     });
 96 | 
 97 |     it('Should handle Arabic numbers mixed with text', async () => {
 98 |       const sentence = 'هذا النص يحتوي على رقم ١٢٣ وكلمات عربية.'; // "This text contains number 123 and Arabic words"
 99 |       const result = await profanity.hasCurseWords(sentence);
100 |       expect(typeof result).toBe('boolean');
101 |     });
102 | 
103 |     it('Should handle mixed Arabic and English text', async () => {
104 |       const mixedSentence = 'This is mixed النص العربي and English text.';
105 |       const result = await profanity.hasCurseWords(mixedSentence);
106 |       expect(typeof result).toBe('boolean');
107 |     });
108 | 
109 |     it('Should handle right-to-left text direction', async () => {
110 |       // Arabic is read right-to-left
111 |       const rtlSentence = 'النص العربي يُقرأ من اليمين إلى اليسار.'; // "Arabic text is read from right to left"
112 |       const result = await profanity.hasCurseWords(rtlSentence);
113 |       expect(result).toBe(false); // Should be clean text
114 |     });
115 | 
116 |     it('Should handle Arabic punctuation correctly', async () => {
117 |       const testSentences = [
118 |         'ما هذا؟',          // What is this?
119 |         'لا، هذا خطأ!',     // No, this is wrong!
120 |         'قال: "مرحبا"',     // He said: "Hello"
121 |         'النص؛ والكتابة.',   // Text; and writing.
122 |       ];
123 | 
124 |       for (const sentence of testSentences) {
125 |         const result = await profanity.hasCurseWords(sentence);
126 |         expect(typeof result).toBe('boolean');
127 |       }
128 |     });
129 | 
130 |     it('Should handle Arabic definite article (ال)', async () => {
131 |       const testCases = [
132 |         'بيت',    // House
133 |         'البيت',  // The house
134 |         'كتاب',   // Book  
135 |         'الكتاب', // The book
136 |       ];
137 | 
138 |       for (const word of testCases) {
139 |         const result = await profanity.search(word);
140 |         expect(typeof result).toBe('boolean');
141 |       }
142 |     });
143 | 
144 |     it('Should handle case sensitivity (Arabic has no case)', async () => {
145 |       // Arabic doesn't have upper/lower case like Latin scripts
146 |       const arabicWord = 'مرحبا';
147 |       const result1 = await profanity.search(arabicWord);
148 |       const result2 = await profanity.search(arabicWord);
149 |       expect(result1).toEqual(result2);
150 |     });
151 | 
152 |     it('Should handle whitespace around Arabic words', async () => {
153 |       const arabicWord = 'مرحبا';
154 |       expect(await profanity.search(`  ${arabicWord}  `)).toBe(false);
155 |       expect(await profanity.search(`\t${arabicWord}\n`)).toBe(false);
156 |     });
157 | 
158 |     it('Should handle Arabic word variations and roots', async () => {
159 |       // Arabic words are based on root patterns
160 |       const rootVariations = [
161 |         'كتب',    // Root k-t-b (to write)
162 |         'كاتب',   // Writer
163 |         'مكتوب',  // Written
164 |         'كتابة',  // Writing
165 |       ];
166 | 
167 |       for (const word of rootVariations) {
168 |         const result = await profanity.search(word);
169 |         expect(typeof result).toBe('boolean');
170 |       }
171 |     });
172 | 
173 |     it('Should return unique words only in Arabic text', async () => {
174 |       // Test with repeated Arabic words
175 |       const sentence = 'مرحبا مرحبا مرحبا في كل مكان';
176 |       const foundWords = await profanity.getCurseWords(sentence);
177 |       
178 |       // Should return unique words only
179 |       expect(Array.isArray(foundWords)).toBe(true);
180 |       // If 'مرحبا' were a profanity word, it should appear only once
181 |     });
182 |   });
183 | 
184 |   describe('Performance tests for Arabic dataset', () => {
185 |     it('Should handle large Arabic text efficiently', async () => {
186 |       const largeText = 'هذه جملة تجريبية. '.repeat(1000) + 'النص العربي ' + 'نص نظيف. '.repeat(1000);
187 |       
188 |       const startTime = Date.now();
189 |       const result = await profanity.hasCurseWords(largeText);
190 |       const endTime = Date.now();
191 |       
192 |       expect(typeof result).toBe('boolean');
193 |       expect(endTime - startTime).toBeLessThan(100); // Should complete in under 100ms
194 |     });
195 | 
196 |     it('Should efficiently search through all Arabic terms', async () => {
197 |       const allWords = await profanity.all();
198 |       
199 |       if (allWords.length > 0) {
200 |         const startTime = Date.now();
201 |         for (let i = 0; i < Math.min(100, allWords.length); i++) {
202 |           await profanity.search(allWords[i % allWords.length]);
203 |         }
204 |         const endTime = Date.now();
205 |         
206 |         expect(endTime - startTime).toBeLessThan(50); // Should be very fast with Set lookup
207 |       }
208 |     });
209 | 
210 |     it('Should handle concurrent operations on Arabic dataset', async () => {
211 |       const promises = [
212 |         profanity.search('مرحبا'),
213 |         profanity.hasCurseWords('هذا نص عربي'),
214 |         profanity.getCurseWords('النص العربي'),
215 |         profanity.all(),
216 |         profanity.search('شكرا')
217 |       ];
218 |       
219 |       const results = await Promise.all(promises);
220 |       expect(results[0]).toBe(false); // search مرحبا (should be clean)
221 |       expect(results[1]).toBe(false); // hasCurseWords (should be clean)
222 |       expect(Array.isArray(results[2])).toBe(true); // getCurseWords
223 |       expect(Array.isArray(results[3])).toBe(true); // all words
224 |       expect(results[4]).toBe(false); // search شكرا (should be clean)
225 |     });
226 |   });
227 | 
228 |   describe('Arabic language specificity', () => {
229 |     it('Should load Arabic words correctly or fallback to English', async () => {
230 |       const allWords = await profanity.all();
231 |       expect(allWords.length).toBeGreaterThan(0);
232 |       // If Arabic file doesn't exist, should fallback to English (958 words)
233 |       // If Arabic file exists, should load Arabic words
234 |     });
235 | 
236 |     it('Should handle Arabic-specific character encoding (UTF-8)', async () => {
237 |       const arabicChars = [
238 |         'ا', 'ب', 'ت', 'ث', 'ج', 'ح', 'خ', 'د', 'ذ', 'ر',
239 |         'ز', 'س', 'ش', 'ص', 'ض', 'ط', 'ظ', 'ع', 'غ', 'ف',
240 |         'ق', 'ك', 'ل', 'م', 'ن', 'ه', 'و', 'ي'
241 |       ];
242 | 
243 |       for (const char of arabicChars) {
244 |         const result = await profanity.search(char);
245 |         expect(typeof result).toBe('boolean');
246 |       }
247 |     });
248 | 
249 |     it('Should handle Arabic ligatures and special characters', async () => {
250 |       const specialChars = [
251 |         'لا',   // Lam-Alif ligature
252 |         'ﷲ',    // Allah ligature
253 |         'ة',    // Taa marbuta
254 |         'ى',    // Alif maksura
255 |         'ء',    // Hamza
256 |       ];
257 | 
258 |       for (const char of specialChars) {
259 |         const result = await profanity.search(char);
260 |         expect(typeof result).toBe('boolean');
261 |       }
262 |     });
263 | 
264 |     it('Should handle different Arabic dialects considerations', async () => {
265 |       // Note: This depends on what's in your Arabic word list
266 |       const dialectWords = [
267 |         'شنو',   // What (Iraqi/Gulf)
268 |         'ايش',   // What (Levantine) 
269 |         'ايه',   // What (Egyptian)
270 |         'اشنو',  // What (Moroccan)
271 |       ];
272 | 
273 |       for (const word of dialectWords) {
274 |         const result = await profanity.search(word);
275 |         expect(typeof result).toBe('boolean');
276 |       }
277 |     });
278 |   });
279 | 
280 |   describe('Data integrity for Arabic', () => {
281 |     it('Should not allow modification of Arabic word list', async () => {
282 |       const terms1 = await profanity.all();
283 |       const originalLength = terms1.length;
284 |       
285 |       // Try to modify the returned array
286 |       terms1.push('كلمة-مزيفة');
287 |       terms1.pop();
288 |       if (terms1.length > 0) {
289 |         terms1[0] = 'معدل';
290 |       }
291 |       
292 |       // Get terms again - should be unchanged
293 |       const terms2 = await profanity.all();
294 |       expect(terms2.length).toBe(originalLength);
295 |       expect(terms2).not.toContain('كلمة-مزيفة');
296 |       if (terms2.length > 0) {
297 |         expect(terms2[0]).not.toBe('معدل');
298 |       }
299 |     });
300 | 
301 |     it('Should provide consistent results for Arabic detection', async () => {
302 |       const sentence = 'هذه جملة تجريبية بالعربية';
303 |       
304 |       const result1 = await profanity.getCurseWords(sentence);
305 |       const result2 = await profanity.getCurseWords(sentence);
306 |       const result3 = await profanity.hasCurseWords(sentence);
307 |       
308 |       expect(result1).toEqual(result2);
309 |       expect(typeof result3).toBe('boolean');
310 |     });
311 |   });
312 | 
313 |   describe('Configuration and fallback for Arabic', () => {
314 |     it('Should handle missing Arabic language file gracefully', async () => {
315 |       // If ar.txt doesn't exist, should fallback to English
316 |       const arabicProfanity = new ProfanityEngine({
317 |         language: 'ar',
318 |         testMode: true,
319 |       });
320 |       
321 |       const terms = await arabicProfanity.all();
322 |       expect(terms.length).toBeGreaterThan(0);
323 |     });
324 | 
325 |     it('Should suppress warnings in test mode for Arabic', async () => {
326 |       // Store original console.warn
327 |       const originalWarn = console.warn;
328 |       let warnCalled = false;
329 |       
330 |       // Mock console.warn
331 |       console.warn = () => {
332 |         warnCalled = true;
333 |       };
334 |       
335 |       const arabicProfanity = new ProfanityEngine({
336 |         language: 'ar',
337 |         testMode: true,
338 |       });
339 |       
340 |       warnCalled = false;
341 |       await arabicProfanity.all();
342 |       expect(warnCalled).toBe(false);
343 |       
344 |       // Restore original console.warn
345 |       console.warn = originalWarn;
346 |     });
347 |   });
348 | });


--------------------------------------------------------------------------------
/__tests__/french.test.js:
--------------------------------------------------------------------------------
  1 | import { ProfanityEngine } from '../index.js';
  2 | 
  3 | const language = process.env.LANGUAGE || 'en'; // Default to 'en' if the LANGUAGE environment variable is not set
  4 | let profanity;
  5 | 
  6 | describe('French Profanity tests', () => {
  7 |   beforeAll(async () => {
  8 |     profanity = new ProfanityEngine({
  9 |       language: 'fr',
 10 |       testMode: true,
 11 |     });
 12 |   });
 13 | 
 14 |   afterEach(() => {
 15 |     profanity.reset();
 16 |   });
 17 | 
 18 |   describe('Core French functionality', () => {
 19 |     it('Should get all the profanity words in an array', async () => {
 20 |       const allWords = await profanity.all();
 21 |       expect(Array.isArray(allWords)).toBe(true);
 22 |       expect(allWords.length).toBeGreaterThan(0);
 23 |       // Update this number based on your actual French word count
 24 |       // expect(allWords.length).toEqual(XXX);
 25 |     });
 26 | 
 27 |     it('Should return true for French profanity words', async () => {
 28 |       // Test with a common French profanity word (if it exists in your list)
 29 |       // Replace 'testword' with an actual word from your French list
 30 |       const searchWord = await profanity.search('testword');
 31 |       expect(typeof searchWord).toBe('boolean');
 32 |     });
 33 | 
 34 |     it('Should return false for normal French words', async () => {
 35 |       const normalWords = [
 36 |         'bonjour',    // Hello
 37 |         'merci',      // Thank you
 38 |         'maison',     // House
 39 |         'livre',      // Book
 40 |         'eau',        // Water
 41 |         'français',   // French
 42 |         'ordinateur', // Computer
 43 |         'famille',    // Family
 44 |       ];
 45 | 
 46 |       for (const word of normalWords) {
 47 |         expect(await profanity.search(word)).toBe(false);
 48 |       }
 49 |     });
 50 | 
 51 |     it('Should return false for any empty string', async () => {
 52 |       const searchWord = await profanity.search('');
 53 |       expect(searchWord).toEqual(false);
 54 |     });
 55 | 
 56 |     it('Should return true for a sentence containing a profanity word', async () => {
 57 |       // Replace with actual French sentence containing profanity from your list
 58 |       const sentence = 'Cette phrase contient un mot inapproprié.'; // "This sentence contains an inappropriate word"
 59 |       const hasCurseWords = await profanity.hasCurseWords(sentence);
 60 |       expect(typeof hasCurseWords).toBe('boolean');
 61 |     });
 62 | 
 63 |     it('Should return false for a sentence with no profanity word', async () => {
 64 |       const sentence = 'Cette phrase est propre et polie.'; // "This sentence is clean and polite"
 65 |       const hasCurseWords = await profanity.hasCurseWords(sentence);
 66 |       expect(hasCurseWords).toEqual(false);
 67 |     });
 68 |   });
 69 | 
 70 |   describe('French-specific edge cases', () => {
 71 |     it('Should handle French accented characters', async () => {
 72 |       // Test words with various French accents
 73 |       const accentedWords = [
 74 |         'café',       // é
 75 |         'hôtel',      // ô
 76 |         'être',       // ê
 77 |         'français',   // ç
 78 |         'naïf',       // ï
 79 |         'où',         // ù
 80 |         'âge',        // â
 81 |         'élève',      // è
 82 |       ];
 83 | 
 84 |       for (const word of accentedWords) {
 85 |         const result = await profanity.search(word);
 86 |         expect(typeof result).toBe('boolean');
 87 |       }
 88 |     });
 89 | 
 90 |     it('Should handle French case sensitivity correctly', async () => {
 91 |       const testWord = 'bonjour';
 92 |       expect(await profanity.search('BONJOUR')).toBe(false);
 93 |       expect(await profanity.search('Bonjour')).toBe(false);
 94 |       expect(await profanity.search('bonjour')).toBe(false);
 95 |       expect(await profanity.search('bOnJoUr')).toBe(false);
 96 |     });
 97 | 
 98 |     it('Should handle French apostrophes and contractions', async () => {
 99 |       const testSentences = [
100 |         "C'est une phrase.",           // It's a sentence
101 |         "L'ordinateur est cassé.",     // The computer is broken
102 |         "D'accord avec vous.",         // I agree with you
103 |         "Qu'est-ce que c'est?",       // What is it?
104 |         "N'importe quoi!",            // Whatever!
105 |         "J'ai mangé.",                // I ate
106 |       ];
107 | 
108 |       for (const sentence of testSentences) {
109 |         const result = await profanity.hasCurseWords(sentence);
110 |         expect(typeof result).toBe('boolean');
111 |       }
112 |     });
113 | 
114 |     it('Should handle French punctuation correctly', async () => {
115 |       const testSentences = [
116 |         'Qu\'est-ce que c\'est ?',    // What is it? (French spacing before ?)
117 |         'Bonjour !',                  // Hello! (French spacing before !)
118 |         'Non, merci.',                // No, thank you.
119 |         'Il a dit : « Bonjour »',     // He said: "Hello" (French quotes)
120 |         'C\'est vrai ; vraiment.',    // It's true; really.
121 |       ];
122 | 
123 |       for (const sentence of testSentences) {
124 |         const result = await profanity.hasCurseWords(sentence);
125 |         expect(typeof result).toBe('boolean');
126 |       }
127 |     });
128 | 
129 |     it('Should handle French gender variations', async () => {
130 |       // Test masculine and feminine forms
131 |       const genderPairs = [
132 |         ['acteur', 'actrice'],        // actor/actress
133 |         ['chanteur', 'chanteuse'],    // singer (m/f)
134 |         ['directeur', 'directrice'],  // director (m/f)
135 |         ['français', 'française'],    // French (m/f)
136 |       ];
137 | 
138 |       for (const [masculine, feminine] of genderPairs) {
139 |         expect(await profanity.search(masculine)).toBe(false);
140 |         expect(await profanity.search(feminine)).toBe(false);
141 |       }
142 |     });
143 | 
144 |     it('Should handle French plural forms', async () => {
145 |       const singularPlural = [
146 |         ['livre', 'livres'],          // book/books
147 |         ['maison', 'maisons'],        // house/houses
148 |         ['animal', 'animaux'],        // animal/animals (irregular)
149 |         ['eau', 'eaux'],              // water/waters (irregular)
150 |       ];
151 | 
152 |       for (const [singular, plural] of singularPlural) {
153 |         expect(await profanity.search(singular)).toBe(false);
154 |         expect(await profanity.search(plural)).toBe(false);
155 |       }
156 |     });
157 | 
158 |     it('Should handle French verb conjugations', async () => {
159 |       // Test different verb forms
160 |       const verbForms = [
161 |         'parler',     // to speak (infinitive)
162 |         'parle',      // I/he speaks
163 |         'parles',     // you speak
164 |         'parlons',    // we speak
165 |         'parlez',     // you (plural) speak
166 |         'parlent',    // they speak
167 |       ];
168 | 
169 |       for (const verb of verbForms) {
170 |         const result = await profanity.search(verb);
171 |         expect(typeof result).toBe('boolean');
172 |       }
173 |     });
174 | 
175 |     it('Should handle French liaison and elision', async () => {
176 |       const testSentences = [
177 |         'Les enfants',                // Liaison: les_enfants
178 |         'Un homme',                   // Liaison: un_homme
179 |         'L\'ami',                     // Elision: l'ami (not le ami)
180 |         'D\'eau',                     // Elision: d'eau (not de eau)
181 |       ];
182 | 
183 |       for (const sentence of testSentences) {
184 |         const result = await profanity.hasCurseWords(sentence);
185 |         expect(typeof result).toBe('boolean');
186 |       }
187 |     });
188 | 
189 |     it('Should handle whitespace around French words', async () => {
190 |       const frenchWord = 'bonjour';
191 |       expect(await profanity.search(`  ${frenchWord}  `)).toBe(false);
192 |       expect(await profanity.search(`\t${frenchWord}\n`)).toBe(false);
193 |     });
194 | 
195 |     it('Should handle French hyphenated words', async () => {
196 |       const hyphenatedWords = [
197 |         'c\'est-à-dire',              // that is to say
198 |         'peut-être',                  // maybe
199 |         'moi-même',                   // myself
200 |         'quelqu\'un',                 // someone
201 |         'rendez-vous',                // appointment
202 |       ];
203 | 
204 |       for (const word of hyphenatedWords) {
205 |         const result = await profanity.search(word);
206 |         expect(typeof result).toBe('boolean');
207 |       }
208 |     });
209 | 
210 |     it('Should return unique words only in French text', async () => {
211 |       // Test with repeated French words
212 |       const sentence = 'bonjour bonjour bonjour partout';
213 |       const foundWords = await profanity.getCurseWords(sentence);
214 |       
215 |       // Should return unique words only
216 |       expect(Array.isArray(foundWords)).toBe(true);
217 |       // If 'bonjour' were a profanity word, it should appear only once
218 |     });
219 | 
220 |     it('Should handle mixed French and English text', async () => {
221 |       const mixedSentence = 'This is mixed avec du français and English text.';
222 |       const result = await profanity.hasCurseWords(mixedSentence);
223 |       expect(typeof result).toBe('boolean');
224 |     });
225 | 
226 |     it('Should handle French regional variations', async () => {
227 |       // Test words that might vary between French regions
228 |       const regionalWords = [
229 |         'chocolatine',    // Pain au chocolat (Southwest France)
230 |         'septante',       // Seventy (Belgium/Switzerland)
231 |         'nonante',        // Ninety (Belgium/Switzerland)
232 |         'tantôt',         // Later (Quebec/Belgium)
233 |       ];
234 | 
235 |       for (const word of regionalWords) {
236 |         const result = await profanity.search(word);
237 |         expect(typeof result).toBe('boolean');
238 |       }
239 |     });
240 |   });
241 | 
242 |   describe('Performance tests for French dataset', () => {
243 |     it('Should handle large French text efficiently', async () => {
244 |       const largeText = 'Ceci est une phrase de test. '.repeat(1000) + 'texte français ' + 'Texte propre. '.repeat(1000);
245 |       
246 |       const startTime = Date.now();
247 |       const result = await profanity.hasCurseWords(largeText);
248 |       const endTime = Date.now();
249 |       
250 |       expect(typeof result).toBe('boolean');
251 |       expect(endTime - startTime).toBeLessThan(100); // Should complete in under 100ms
252 |     });
253 | 
254 |     it('Should efficiently search through all French terms', async () => {
255 |       const allWords = await profanity.all();
256 |       
257 |       if (allWords.length > 0) {
258 |         const startTime = Date.now();
259 |         for (let i = 0; i < Math.min(100, allWords.length); i++) {
260 |           await profanity.search(allWords[i % allWords.length]);
261 |         }
262 |         const endTime = Date.now();
263 |         
264 |         expect(endTime - startTime).toBeLessThan(50); // Should be very fast with Set lookup
265 |       }
266 |     });
267 | 
268 |     it('Should handle concurrent operations on French dataset', async () => {
269 |       const promises = [
270 |         profanity.search('bonjour'),
271 |         profanity.hasCurseWords('ceci est du texte français'),
272 |         profanity.getCurseWords('le texte français'),
273 |         profanity.all(),
274 |         profanity.search('merci')
275 |       ];
276 |       
277 |       const results = await Promise.all(promises);
278 |       expect(results[0]).toBe(false); // search bonjour (should be clean)
279 |       expect(results[1]).toBe(false); // hasCurseWords (should be clean)
280 |       expect(Array.isArray(results[2])).toBe(true); // getCurseWords
281 |       expect(Array.isArray(results[3])).toBe(true); // all words
282 |       expect(results[4]).toBe(false); // search merci (should be clean)
283 |     });
284 |   });
285 | 
286 |   describe('French language specificity', () => {
287 |     it('Should load French words correctly or fallback to English', async () => {
288 |       const allWords = await profanity.all();
289 |       expect(allWords.length).toBeGreaterThan(0);
290 |       // If French file doesn't exist, should fallback to English (958 words)
291 |       // If French file exists, should load French words
292 |     });
293 | 
294 |     it('Should handle French-specific character encoding (UTF-8)', async () => {
295 |       const frenchChars = [
296 |         'à', 'â', 'ä', 'ç', 'è', 'é', 'ê', 'ë', 
297 |         'î', 'ï', 'ô', 'ù', 'û', 'ü', 'ÿ', 'ñ'
298 |       ];
299 | 
300 |       for (const char of frenchChars) {
301 |         const result = await profanity.search(char);
302 |         expect(typeof result).toBe('boolean');
303 |       }
304 |     });
305 | 
306 |     it('Should handle French quotation marks and typography', async () => {
307 |       const typographyTests = [
308 |         '« guillemets français »',     // French quotes
309 |         '"guillemets anglais"',        // English quotes
310 |         'apostrophe courbe',         // Curved apostrophe
311 |         'apostrophe droite',       // Straight apostrophe
312 |         '— tiret cadratin',            // Em dash
313 |         '– tiret demi-cadratin',       // En dash
314 |       ];
315 | 
316 |       for (const text of typographyTests) {
317 |         const result = await profanity.hasCurseWords(text);
318 |         expect(typeof result).toBe('boolean');
319 |       }
320 |     });
321 | 
322 |     it('Should handle French Canadian (Quebec) variations', async () => {
323 |       // Quebec French often has different vocabulary
324 |       const quebecWords = [
325 |         'char',           // Car (instead of voiture)
326 |         'blonde',         // Girlfriend (instead of copine)
327 |         'magasiner',      // To shop (instead of faire du shopping)
328 |         'déjeuner',       // Breakfast (lunch in France)
329 |       ];
330 | 
331 |       for (const word of quebecWords) {
332 |         const result = await profanity.search(word);
333 |         expect(typeof result).toBe('boolean');
334 |       }
335 |     });
336 |   });
337 | 
338 |   describe('Data integrity for French', () => {
339 |     it('Should not allow modification of French word list', async () => {
340 |       const terms1 = await profanity.all();
341 |       const originalLength = terms1.length;
342 |       
343 |       // Try to modify the returned array
344 |       terms1.push('mot-faux');
345 |       terms1.pop();
346 |       if (terms1.length > 0) {
347 |         terms1[0] = 'modifié';
348 |       }
349 |       
350 |       // Get terms again - should be unchanged
351 |       const terms2 = await profanity.all();
352 |       expect(terms2.length).toBe(originalLength);
353 |       expect(terms2).not.toContain('mot-faux');
354 |       if (terms2.length > 0) {
355 |         expect(terms2[0]).not.toBe('modifié');
356 |       }
357 |     });
358 | 
359 |     it('Should provide consistent results for French detection', async () => {
360 |       const sentence = 'Cette phrase est en français';
361 |       
362 |       const result1 = await profanity.getCurseWords(sentence);
363 |       const result2 = await profanity.getCurseWords(sentence);
364 |       const result3 = await profanity.hasCurseWords(sentence);
365 |       
366 |       expect(result1).toEqual(result2);
367 |       expect(typeof result3).toBe('boolean');
368 |     });
369 |   });
370 | 
371 |   describe('Configuration and fallback for French', () => {
372 |     it('Should handle missing French language file gracefully', async () => {
373 |       // If fr.txt doesn't exist, should fallback to English
374 |       const frenchProfanity = new ProfanityEngine({
375 |         language: 'fr',
376 |         testMode: true,
377 |       });
378 |       
379 |       const terms = await frenchProfanity.all();
380 |       expect(terms.length).toBeGreaterThan(0);
381 |     });
382 | 
383 |     it('Should suppress warnings in test mode for French', async () => {
384 |       // Store original console.warn
385 |       const originalWarn = console.warn;
386 |       let warnCalled = false;
387 |       
388 |       // Mock console.warn
389 |       console.warn = () => {
390 |         warnCalled = true;
391 |       };
392 |       
393 |       const frenchProfanity = new ProfanityEngine({
394 |         language: 'fr',
395 |         testMode: true,
396 |       });
397 |       
398 |       warnCalled = false;
399 |       await frenchProfanity.all();
400 |       expect(warnCalled).toBe(false);
401 |       
402 |       // Restore original console.warn
403 |       console.warn = originalWarn;
404 |     });
405 |   });
406 | 
407 |   describe('French grammar and linguistics', () => {
408 |     it('Should handle French articles and determiners', async () => {
409 |       const articles = [
410 |         'le', 'la', 'les',           // Definite articles
411 |         'un', 'une', 'des',          // Indefinite articles
412 |         'du', 'de la', 'des',        // Partitive articles
413 |         'ce', 'cette', 'ces',        // Demonstrative
414 |       ];
415 | 
416 |       for (const article of articles) {
417 |         expect(await profanity.search(article)).toBe(false);
418 |       }
419 |     });
420 | 
421 |     it('Should handle French prepositions', async () => {
422 |       const prepositions = [
423 |         'de', 'à', 'dans', 'sur', 'avec', 'pour', 
424 |         'par', 'sans', 'sous', 'vers', 'chez'
425 |       ];
426 | 
427 |       for (const prep of prepositions) {
428 |         expect(await profanity.search(prep)).toBe(false);
429 |       }
430 |     });
431 | 
432 |     it('Should handle French reflexive pronouns', async () => {
433 |       const reflexiveTests = [
434 |         'Je me lave',                 // I wash myself
435 |         'Tu te dépêches',            // You hurry
436 |         'Il se réveille',            // He wakes up
437 |         'Nous nous amusons',         // We have fun
438 |       ];
439 | 
440 |       for (const sentence of reflexiveTests) {
441 |         const result = await profanity.hasCurseWords(sentence);
442 |         expect(result).toBe(false); // Should be clean
443 |       }
444 |     });
445 |   });
446 | });


--------------------------------------------------------------------------------
/__tests__/chinese.test.js:
--------------------------------------------------------------------------------
  1 | import { ProfanityEngine } from '../index.js';
  2 | 
  3 | const language = process.env.LANGUAGE || 'en'; // Default to 'en' if the LANGUAGE environment variable is not set
  4 | let profanity;
  5 | 
  6 | describe('Chinese (Mandarin) Profanity tests', () => {
  7 |   beforeAll(async () => {
  8 |     profanity = new ProfanityEngine({
  9 |       language: 'zh',
 10 |       testMode: true,
 11 |     });
 12 |   });
 13 | 
 14 |   afterEach(() => {
 15 |     profanity.reset();
 16 |   });
 17 | 
 18 |   describe('Core Chinese functionality', () => {
 19 |     it('Should get all the profanity words in an array', async () => {
 20 |       const allWords = await profanity.all();
 21 |       expect(Array.isArray(allWords)).toBe(true);
 22 |       expect(allWords.length).toBeGreaterThan(0);
 23 |       // Update this number based on your actual Chinese word count
 24 |       // expect(allWords.length).toEqual(XXX);
 25 |     });
 26 | 
 27 |     it('Should return true for Chinese profanity words', async () => {
 28 |       // Test with a common Chinese profanity word (if it exists in your list)
 29 |       // Replace 'testword' with an actual word from your Chinese list
 30 |       const searchWord = await profanity.search('testword');
 31 |       expect(typeof searchWord).toBe('boolean');
 32 |     });
 33 | 
 34 |     it('Should return false for normal Chinese words', async () => {
 35 |       const normalWords = [
 36 |         '你好',     // Hello (nǐ hǎo)
 37 |         '谢谢',     // Thank you (xiè xiè)
 38 |         '房子',     // House (fáng zi)
 39 |         '书',       // Book (shū)
 40 |         '水',       // Water (shuǐ)
 41 |         '中文',     // Chinese language (zhōng wén)
 42 |         '电脑',     // Computer (diàn nǎo)
 43 |         '家庭',     // Family (jiā tíng)
 44 |         '朋友',     // Friend (péng yǒu)
 45 |         '学校',     // School (xué xiào)
 46 |       ];
 47 | 
 48 |       for (const word of normalWords) {
 49 |         expect(await profanity.search(word)).toBe(false);
 50 |       }
 51 |     });
 52 | 
 53 |     it('Should return false for any empty string', async () => {
 54 |       const searchWord = await profanity.search('');
 55 |       expect(searchWord).toEqual(false);
 56 |     });
 57 | 
 58 |     it('Should return true for a sentence containing a profanity word', async () => {
 59 |       // Replace with actual Chinese sentence containing profanity from your list
 60 |       const sentence = '这个句子包含不当词汇。'; // "This sentence contains inappropriate words"
 61 |       const hasCurseWords = await profanity.hasCurseWords(sentence);
 62 |       expect(typeof hasCurseWords).toBe('boolean');
 63 |     });
 64 | 
 65 |     it('Should return false for a sentence with no profanity word', async () => {
 66 |       const sentence = '这是一个干净礼貌的句子。'; // "This is a clean and polite sentence"
 67 |       const hasCurseWords = await profanity.hasCurseWords(sentence);
 68 |       expect(hasCurseWords).toEqual(false);
 69 |     });
 70 |   });
 71 | 
 72 |   describe('Chinese-specific edge cases', () => {
 73 |     it('Should handle simplified vs traditional Chinese characters', async () => {
 74 |       // Test pairs of simplified and traditional characters
 75 |       const characterPairs = [
 76 |         ['学', '學'],         // Study (simplified vs traditional)
 77 |         ['国', '國'],         // Country (simplified vs traditional)
 78 |         ['电', '電'],         // Electric (simplified vs traditional)
 79 |         ['书', '書'],         // Book (simplified vs traditional)
 80 |         ['车', '車'],         // Vehicle (simplified vs traditional)
 81 |         ['语', '語'],         // Language (simplified vs traditional)
 82 |       ];
 83 | 
 84 |       for (const [simplified, traditional] of characterPairs) {
 85 |         const result1 = await profanity.search(simplified);
 86 |         const result2 = await profanity.search(traditional);
 87 |         expect(typeof result1).toBe('boolean');
 88 |         expect(typeof result2).toBe('boolean');
 89 |         // Both should be clean words
 90 |         expect(result1).toBe(false);
 91 |         expect(result2).toBe(false);
 92 |       }
 93 |     });
 94 | 
 95 |     it('Should handle Chinese without word boundaries', async () => {
 96 |       // Chinese doesn't use spaces between words
 97 |       const continuousText = '我今天去学校学习中文很开心'; // "I went to school today to study Chinese and was very happy"
 98 |       const result = await profanity.hasCurseWords(continuousText);
 99 |       expect(result).toBe(false); // Should be clean text
100 |     });
101 | 
102 |     it('Should handle single Chinese characters', async () => {
103 |       const singleChars = [
104 |         '我',  // I/me
105 |         '你',  // You
106 |         '他',  // He
107 |         '好',  // Good
108 |         '大',  // Big
109 |         '小',  // Small
110 |         '人',  // Person
111 |         '天',  // Day/sky
112 |       ];
113 | 
114 |       for (const char of singleChars) {
115 |         expect(await profanity.search(char)).toBe(false);
116 |       }
117 |     });
118 | 
119 |     it('Should handle Chinese compound words', async () => {
120 |       const compoundWords = [
121 |         '电脑',     // Computer (electric + brain)
122 |         '火车',     // Train (fire + vehicle)
123 |         '飞机',     // Airplane (fly + machine)
124 |         '手机',     // Cell phone (hand + machine)
125 |         '汽车',     // Car (steam + vehicle)
126 |         '电视',     // Television (electric + vision)
127 |       ];
128 | 
129 |       for (const word of compoundWords) {
130 |         expect(await profanity.search(word)).toBe(false);
131 |       }
132 |     });
133 | 
134 |     it('Should handle Chinese numbers and mixed content', async () => {
135 |       const mixedContent = [
136 |         '我有3本书',           // I have 3 books
137 |         '今天是2024年',        // Today is 2024
138 |         '电话号码123456',      // Phone number 123456
139 |         '第1章',              // Chapter 1
140 |         '100元',              // 100 yuan
141 |       ];
142 | 
143 |       for (const text of mixedContent) {
144 |         const result = await profanity.hasCurseWords(text);
145 |         expect(typeof result).toBe('boolean');
146 |       }
147 |     });
148 | 
149 |     it('Should handle Chinese punctuation', async () => {
150 |       const testSentences = [
151 |         '你好！',              // Hello!
152 |         '你好吗？',            // How are you?
153 |         '是的，我知道。',       // Yes, I know.
154 |         '他说："你好"',        // He said: "Hello"
155 |         '学习、工作、生活',     // Study, work, life
156 |         '这是...很好',         // This is... very good
157 |       ];
158 | 
159 |       for (const sentence of testSentences) {
160 |         const result = await profanity.hasCurseWords(sentence);
161 |         expect(typeof result).toBe('boolean');
162 |       }
163 |     });
164 | 
165 |     it('Should handle Chinese measure words (classifiers)', async () => {
166 |       const measureWords = [
167 |         '一本书',              // One book (classifier: 本)
168 |         '两个人',              // Two people (classifier: 个)
169 |         '三只猫',              // Three cats (classifier: 只)
170 |         '四辆车',              // Four cars (classifier: 辆)
171 |         '五张纸',              // Five sheets of paper (classifier: 张)
172 |       ];
173 | 
174 |       for (const phrase of measureWords) {
175 |         expect(await profanity.hasCurseWords(phrase)).toBe(false);
176 |       }
177 |     });
178 | 
179 |     it('Should handle Chinese tone marks in pinyin (if applicable)', async () => {
180 |       // If your system processes pinyin alongside Chinese characters
181 |       const pinyinWords = [
182 |         'nǐ hǎo',              // Hello
183 |         'xiè xiè',             // Thank you
184 |         'duì bù qǐ',           // Sorry
185 |         'zài jiàn',            // Goodbye
186 |       ];
187 | 
188 |       for (const pinyin of pinyinWords) {
189 |         const result = await profanity.search(pinyin);
190 |         expect(typeof result).toBe('boolean');
191 |       }
192 |     });
193 | 
194 |     it('Should handle whitespace around Chinese characters', async () => {
195 |       const chineseWord = '你好';
196 |       expect(await profanity.search(`  ${chineseWord}  `)).toBe(false);
197 |       expect(await profanity.search(`\t${chineseWord}\n`)).toBe(false);
198 |     });
199 | 
200 |     it('Should handle mixed Chinese and English text', async () => {
201 |       const mixedSentences = [
202 |         'I love 中文',                    // I love Chinese
203 |         '这是English和中文的混合',         // This is a mix of English and Chinese
204 |         'Hello 世界',                    // Hello world
205 |         '我在学习programming',           // I am learning programming
206 |       ];
207 | 
208 |       for (const sentence of mixedSentences) {
209 |         const result = await profanity.hasCurseWords(sentence);
210 |         expect(typeof result).toBe('boolean');
211 |       }
212 |     });
213 | 
214 |     it('Should return unique words only in Chinese text', async () => {
215 |       // Test with repeated Chinese words
216 |       const sentence = '你好你好你好世界';
217 |       const foundWords = await profanity.getCurseWords(sentence);
218 |       
219 |       // Should return unique words only
220 |       expect(Array.isArray(foundWords)).toBe(true);
221 |       // If '你好' were a profanity word, it should appear only once
222 |     });
223 | 
224 |     it('Should handle Chinese regional variations', async () => {
225 |       // Different Chinese-speaking regions may have different vocabulary
226 |       const regionalWords = [
227 |         '出租车',              // Taxi (Mainland)
228 |         '计程车',              // Taxi (Taiwan)
229 |         '的士',               // Taxi (Hong Kong)
230 |         '垃圾',               // Garbage (Mainland)
231 |         '废物',               // Waste (General)
232 |       ];
233 | 
234 |       for (const word of regionalWords) {
235 |         const result = await profanity.search(word);
236 |         expect(typeof result).toBe('boolean');
237 |       }
238 |     });
239 |   });
240 | 
241 |   describe('Performance tests for Chinese dataset', () => {
242 |     it('Should handle large Chinese text efficiently', async () => {
243 |       const largeText = '这是一个测试句子。'.repeat(1000) + '中文文本 ' + '干净的文本。'.repeat(1000);
244 |       
245 |       const startTime = Date.now();
246 |       const result = await profanity.hasCurseWords(largeText);
247 |       const endTime = Date.now();
248 |       
249 |       expect(typeof result).toBe('boolean');
250 |       expect(endTime - startTime).toBeLessThan(100); // Should complete in under 100ms
251 |     });
252 | 
253 |     it('Should efficiently search through all Chinese terms', async () => {
254 |       const allWords = await profanity.all();
255 |       
256 |       if (allWords.length > 0) {
257 |         const startTime = Date.now();
258 |         for (let i = 0; i < Math.min(100, allWords.length); i++) {
259 |           await profanity.search(allWords[i % allWords.length]);
260 |         }
261 |         const endTime = Date.now();
262 |         
263 |         expect(endTime - startTime).toBeLessThan(50); // Should be very fast with Set lookup
264 |       }
265 |     });
266 | 
267 |     it('Should handle concurrent operations on Chinese dataset', async () => {
268 |       const promises = [
269 |         profanity.search('你好'),
270 |         profanity.hasCurseWords('这是中文文本'),
271 |         profanity.getCurseWords('中文文本'),
272 |         profanity.all(),
273 |         profanity.search('谢谢')
274 |       ];
275 |       
276 |       const results = await Promise.all(promises);
277 |       expect(results[0]).toBe(false); // search 你好 (should be clean)
278 |       expect(results[1]).toBe(false); // hasCurseWords (should be clean)
279 |       expect(Array.isArray(results[2])).toBe(true); // getCurseWords
280 |       expect(Array.isArray(results[3])).toBe(true); // all words
281 |       expect(results[4]).toBe(false); // search 谢谢 (should be clean)
282 |     });
283 |   });
284 | 
285 |   describe('Chinese language specificity', () => {
286 |     it('Should load Chinese words correctly or fallback to English', async () => {
287 |       const allWords = await profanity.all();
288 |       expect(allWords.length).toBeGreaterThan(0);
289 |       // If Chinese file doesn't exist, should fallback to English (958 words)
290 |       // If Chinese file exists, should load Chinese words
291 |     });
292 | 
293 |     it('Should handle Chinese character encoding (UTF-8)', async () => {
294 |       // Test various Chinese character ranges
295 |       const chineseChars = [
296 |         '一', '二', '三', '四', '五',  // Numbers
297 |         '人', '大', '小', '中', '国',  // Common characters
298 |         '學', '國', '語', '電', '車',  // Traditional characters
299 |         '龍', '鳳', '麒', '麟', '龜',  // Complex characters
300 |       ];
301 | 
302 |       for (const char of chineseChars) {
303 |         const result = await profanity.search(char);
304 |         expect(typeof result).toBe('boolean');
305 |       }
306 |     });
307 | 
308 |     it('Should handle Chinese internet slang and abbreviated forms', async () => {
309 |       // Common Chinese internet abbreviations and slang
310 |       const internetSlang = [
311 |         '886',                // Bye bye (sounds like "bā bā liù")
312 |         '520',                // I love you (sounds like "wǒ ài nǐ")
313 |         '88',                 // Bye bye
314 |         '233',                // LOL (from emoticon)
315 |         '666',                // Awesome/cool
316 |       ];
317 | 
318 |       for (const slang of internetSlang) {
319 |         const result = await profanity.search(slang);
320 |         expect(typeof result).toBe('boolean');
321 |       }
322 |     });
323 | 
324 |     it('Should handle Chinese variant characters and fonts', async () => {
325 |       // Some characters have multiple valid forms
326 |       const variants = [
327 |         ['关', '關'],          // Close (simplified vs traditional)
328 |         ['门', '門'],          // Door (simplified vs traditional)
329 |         ['时', '時'],          // Time (simplified vs traditional)
330 |         ['长', '長'],          // Long (simplified vs traditional)
331 |       ];
332 | 
333 |       for (const [simplified, traditional] of variants) {
334 |         expect(await profanity.search(simplified)).toBe(false);
335 |         expect(await profanity.search(traditional)).toBe(false);
336 |       }
337 |     });
338 | 
339 |     it('Should handle Chinese homophone considerations', async () => {
340 |       // Chinese has many homophones (same pronunciation, different characters)
341 |       const homophones = [
342 |         ['时', '石', '是'],     // shí - time, stone, is
343 |         ['他', '她', '它'],     // tā - he, she, it
344 |         ['在', '再'],          // zài - at/in, again
345 |       ];
346 | 
347 |       for (const group of homophones) {
348 |         for (const char of group) {
349 |           const result = await profanity.search(char);
350 |           expect(typeof result).toBe('boolean');
351 |         }
352 |       }
353 |     });
354 |   });
355 | 
356 |   describe('Data integrity for Chinese', () => {
357 |     it('Should not allow modification of Chinese word list', async () => {
358 |       const terms1 = await profanity.all();
359 |       const originalLength = terms1.length;
360 |       
361 |       // Try to modify the returned array
362 |       terms1.push('假词');
363 |       terms1.pop();
364 |       if (terms1.length > 0) {
365 |         terms1[0] = '修改';
366 |       }
367 |       
368 |       // Get terms again - should be unchanged
369 |       const terms2 = await profanity.all();
370 |       expect(terms2.length).toBe(originalLength);
371 |       expect(terms2).not.toContain('假词');
372 |       if (terms2.length > 0) {
373 |         expect(terms2[0]).not.toBe('修改');
374 |       }
375 |     });
376 | 
377 |     it('Should provide consistent results for Chinese detection', async () => {
378 |       const sentence = '这个句子是中文的';
379 |       
380 |       const result1 = await profanity.getCurseWords(sentence);
381 |       const result2 = await profanity.getCurseWords(sentence);
382 |       const result3 = await profanity.hasCurseWords(sentence);
383 |       
384 |       expect(result1).toEqual(result2);
385 |       expect(typeof result3).toBe('boolean');
386 |     });
387 |   });
388 | 
389 |   describe('Configuration and fallback for Chinese', () => {
390 |     it('Should handle missing Chinese language file gracefully', async () => {
391 |       // If zh.txt doesn't exist, should fallback to English
392 |       const chineseProfanity = new ProfanityEngine({
393 |         language: 'zh',
394 |         testMode: true,
395 |       });
396 |       
397 |       const terms = await chineseProfanity.all();
398 |       expect(terms.length).toBeGreaterThan(0);
399 |     });
400 | 
401 |     it('Should suppress warnings in test mode for Chinese', async () => {
402 |       // Store original console.warn
403 |       const originalWarn = console.warn;
404 |       let warnCalled = false;
405 |       
406 |       // Mock console.warn
407 |       console.warn = () => {
408 |         warnCalled = true;
409 |       };
410 |       
411 |       const chineseProfanity = new ProfanityEngine({
412 |         language: 'zh',
413 |         testMode: true,
414 |       });
415 |       
416 |       warnCalled = false;
417 |       await chineseProfanity.all();
418 |       expect(warnCalled).toBe(false);
419 |       
420 |       // Restore original console.warn
421 |       console.warn = originalWarn;
422 |     });
423 |   });
424 | 
425 |   describe('Chinese text processing specifics', () => {
426 |     it('Should handle Chinese word segmentation challenges', async () => {
427 |       // Chinese word boundaries are ambiguous
428 |       const ambiguousTexts = [
429 |         '研究生命科学',         // Could be "研究生|命科学" or "研究|生命科学"
430 |         '北京大学生活',         // Could be "北京大学|生活" or "北京|大学生|活"
431 |         '中国人民银行',         // "中国人民银行" as one entity
432 |       ];
433 | 
434 |       for (const text of ambiguousTexts) {
435 |         const result = await profanity.hasCurseWords(text);
436 |         expect(typeof result).toBe('boolean');
437 |       }
438 |     });
439 | 
440 |     it('Should handle Chinese proper nouns and names', async () => {
441 |       const properNouns = [
442 |         '北京',               // Beijing
443 |         '上海',               // Shanghai
444 |         '中国',               // China
445 |         '长江',               // Yangtze River
446 |         '故宫',               // Forbidden City
447 |       ];
448 | 
449 |       for (const noun of properNouns) {
450 |         expect(await profanity.search(noun)).toBe(false);
451 |       }
452 |     });
453 | 
454 |     it('Should handle Chinese grammar particles', async () => {
455 |       const particles = [
456 |         '的',                 // Possessive particle
457 |         '了',                 // Completion particle
458 |         '着',                 // Progressive particle
459 |         '过',                 // Experience particle
460 |         '吗',                 // Question particle
461 |         '呢',                 // Question particle
462 |       ];
463 | 
464 |       for (const particle of particles) {
465 |         expect(await profanity.search(particle)).toBe(false);
466 |       }
467 |     });
468 |   });
469 | });


--------------------------------------------------------------------------------
/__tests__/irish.test.js:
--------------------------------------------------------------------------------
  1 | import { ProfanityEngine } from '../index.js';
  2 | 
  3 | const language = process.env.LANGUAGE || 'en'; // Default to 'en' if the LANGUAGE environment variable is not set
  4 | let profanity;
  5 | 
  6 | describe('Irish (Gaeilge) Profanity tests', () => {
  7 |   beforeAll(async () => {
  8 |     profanity = new ProfanityEngine({
  9 |       language: 'ga', // ISO code for Irish (Gaeilge)
 10 |       testMode: true,
 11 |     });
 12 |   });
 13 | 
 14 |   afterEach(() => {
 15 |     profanity.reset();
 16 |   });
 17 | 
 18 |   describe('Core Irish functionality', () => {
 19 |     it('Should get all the profanity words in an array', async () => {
 20 |       const allWords = await profanity.all();
 21 |       expect(Array.isArray(allWords)).toBe(true);
 22 |       expect(allWords.length).toBeGreaterThan(0);
 23 |       // Update this number based on your actual Irish word count
 24 |       // expect(allWords.length).toEqual(XXX);
 25 |     });
 26 | 
 27 |     it('Should return true for Irish profanity words', async () => {
 28 |       // Test with a common Irish profanity word (if it exists in your list)
 29 |       // Replace 'testword' with an actual word from your Irish list
 30 |       const searchWord = await profanity.search('testword');
 31 |       expect(typeof searchWord).toBe('boolean');
 32 |     });
 33 | 
 34 |     it('Should return false for normal Irish words', async () => {
 35 |       const normalWords = [
 36 |         'dia duit',     // Hello (God to you)
 37 |         'go raibh maith agat', // Thank you
 38 |         'teach',        // House
 39 |         'leabhar',      // Book
 40 |         'uisce',        // Water
 41 |         'Gaeilge',      // Irish language
 42 |         'ríomhaire',    // Computer
 43 |         'teaghlach',    // Family
 44 |         'cara',         // Friend
 45 |         'scoil',        // School
 46 |         'céad míle fáilte', // A hundred thousand welcomes
 47 |       ];
 48 | 
 49 |       for (const word of normalWords) {
 50 |         expect(await profanity.search(word)).toBe(false);
 51 |       }
 52 |     });
 53 | 
 54 |     it('Should return false for any empty string', async () => {
 55 |       const searchWord = await profanity.search('');
 56 |       expect(searchWord).toEqual(false);
 57 |     });
 58 | 
 59 |     it('Should return true for a sentence containing a profanity word', async () => {
 60 |       // Replace with actual Irish sentence containing profanity from your list
 61 |       const sentence = 'Tá focal mímhúinte sa abairt seo.'; // "There is an impolite word in this sentence"
 62 |       const hasCurseWords = await profanity.hasCurseWords(sentence);
 63 |       expect(typeof hasCurseWords).toBe('boolean');
 64 |     });
 65 | 
 66 |     it('Should return false for a sentence with no profanity word', async () => {
 67 |       const sentence = 'Is abairt ghlan dea-bhéasach í seo.'; // "This is a clean, well-mannered sentence"
 68 |       const hasCurseWords = await profanity.hasCurseWords(sentence);
 69 |       expect(hasCurseWords).toEqual(false);
 70 |     });
 71 |   });
 72 | 
 73 |   describe('Irish-specific edge cases', () => {
 74 |     it('Should handle Irish fadas (accent marks)', async () => {
 75 |       // Test words with fadas (long marks over vowels)
 76 |       const fadaWords = [
 77 |         'fáilte',       // Welcome
 78 |         'tír',          // Country/land
 79 |         'óg',           // Young
 80 |         'mór',          // Big
 81 |         'úr',           // Fresh/new
 82 |         'éan',          // Bird
 83 |         'íoc',          // Pay
 84 |         'bádóir',       // Boatman
 85 |         'cúpla',        // Couple
 86 |         'lón',          // Lunch
 87 |       ];
 88 | 
 89 |       for (const word of fadaWords) {
 90 |         const result = await profanity.search(word);
 91 |         expect(typeof result).toBe('boolean');
 92 |         // These should all be clean words
 93 |         expect(result).toBe(false);
 94 |       }
 95 |     });
 96 | 
 97 |     it('Should handle Irish initial mutations (séimhiú/urú)', async () => {
 98 |       // Irish has initial consonant mutations
 99 |       const mutationExamples = [
100 |         // Séimhiú (lenition) - adds 'h' after initial consonant
101 |         ['bean', 'bhean'],      // Woman (lenited form)
102 |         ['fear', 'fhear'],      // Man (lenited form)
103 |         ['cat', 'chat'],        // Cat (lenited form)
104 |         ['doras', 'dhoras'],    // Door (lenited form)
105 |         
106 |         // Urú (eclipsis) - changes initial consonant
107 |         ['bean', 'mbean'],      // Woman (eclipsed form)
108 |         ['fear', 'bhfear'],     // Man (eclipsed form)
109 |         ['cat', 'gcat'],        // Cat (eclipsed form)
110 |         ['doras', 'ndoras'],    // Door (eclipsed form)
111 |       ];
112 | 
113 |       for (const [root, mutated] of mutationExamples) {
114 |         expect(await profanity.search(root)).toBe(false);
115 |         expect(await profanity.search(mutated)).toBe(false);
116 |       }
117 |     });
118 | 
119 |     it('Should handle Irish case sensitivity correctly', async () => {
120 |       const testWord = 'gaeilge';
121 |       expect(await profanity.search('GAEILGE')).toBe(false);
122 |       expect(await profanity.search('Gaeilge')).toBe(false);
123 |       expect(await profanity.search('gaeilge')).toBe(false);
124 |       expect(await profanity.search('gAeIlGe')).toBe(false);
125 |     });
126 | 
127 |     it('Should handle Irish verb conjugations', async () => {
128 |       // Irish verbs have complex conjugation patterns
129 |       const verbForms = [
130 |         // Bí (to be) conjugations
131 |         'tá',           // is/are (present)
132 |         'bhí',          // was/were (past)
133 |         'beidh',        // will be (future)
134 |         
135 |         // Déan (to do/make) conjugations
136 |         'déanaim',      // I do
137 |         'déanann',      // he/she does
138 |         'rinne',        // did (past)
139 |         'déanfaidh',    // will do (future)
140 |       ];
141 | 
142 |       for (const verb of verbForms) {
143 |         expect(await profanity.search(verb)).toBe(false);
144 |       }
145 |     });
146 | 
147 |     it('Should handle Irish noun declensions', async () => {
148 |       // Irish nouns change form based on case
149 |       const declensionExamples = [
150 |         // Fear (man) declensions
151 |         'fear',         // Nominative singular
152 |         'fir',          // Nominative plural
153 |         'fhear',        // Genitive singular (lenited)
154 |         'bhfear',       // With eclipsis
155 |         
156 |         // Bean (woman) declensions
157 |         'bean',         // Nominative singular
158 |         'mná',          // Nominative plural
159 |         'mhná',         // Genitive plural (lenited)
160 |       ];
161 | 
162 |       for (const form of declensionExamples) {
163 |         expect(await profanity.search(form)).toBe(false);
164 |       }
165 |     });
166 | 
167 |     it('Should handle Irish compound words', async () => {
168 |       const compoundWords = [
169 |         'ríomhaire',        // Computer (number-counter)
170 |         'teilifís',         // Television
171 |         'rothar',           // Bicycle (wheel-man)
172 |         'ospidéal',         // Hospital
173 |         'ollscoil',         // University (great-school)
174 |         'leabharlann',      // Library (book-house)
175 |       ];
176 | 
177 |       for (const word of compoundWords) {
178 |         expect(await profanity.search(word)).toBe(false);
179 |       }
180 |     });
181 | 
182 |     it('Should handle Irish numbers and counting', async () => {
183 |       const irishNumbers = [
184 |         'a haon',       // One
185 |         'a dó',         // Two
186 |         'a trí',        // Three
187 |         'a ceathair',   // Four
188 |         'a cúig',       // Five
189 |         'a sé',         // Six
190 |         'a seacht',     // Seven
191 |         'a hocht',      // Eight
192 |         'a naoi',       // Nine
193 |         'a deich',      // Ten
194 |       ];
195 | 
196 |       for (const number of irishNumbers) {
197 |         const result = await profanity.hasCurseWords(number);
198 |         expect(result).toBe(false);
199 |       }
200 |     });
201 | 
202 |     it('Should handle Irish prepositional pronouns', async () => {
203 |       // Irish combines prepositions with pronouns
204 |       const prepositionalPronouns = [
205 |         'agam',         // At me (ag + mé)
206 |         'agat',         // At you (ag + tú)
207 |         'aige',         // At him (ag + é)
208 |         'aici',         // At her (ag + í)
209 |         'againn',       // At us (ag + muid)
210 |         'agaibh',       // At you (plural) (ag + sibh)
211 |         'acu',          // At them (ag + iad)
212 |       ];
213 | 
214 |       for (const pronoun of prepositionalPronouns) {
215 |         expect(await profanity.search(pronoun)).toBe(false);
216 |       }
217 |     });
218 | 
219 |     it('Should handle whitespace around Irish words', async () => {
220 |       const irishWord = 'fáilte';
221 |       expect(await profanity.search(`  ${irishWord}  `)).toBe(false);
222 |       expect(await profanity.search(`\t${irishWord}\n`)).toBe(false);
223 |     });
224 | 
225 |     it('Should handle mixed Irish and English text', async () => {
226 |       const mixedSentences = [
227 |         'I love Gaeilge',               // I love Irish
228 |         'Tá mé ag foghlaim English',    // I am learning English
229 |         'Hello agus dia duit',          // Hello and God to you
230 |         'Go raibh maith agat very much', // Thank you very much
231 |       ];
232 | 
233 |       for (const sentence of mixedSentences) {
234 |         const result = await profanity.hasCurseWords(sentence);
235 |         expect(typeof result).toBe('boolean');
236 |       }
237 |     });
238 | 
239 |     it('Should return unique words only in Irish text', async () => {
240 |       // Test with repeated Irish words
241 |       const sentence = 'fáilte fáilte fáilte go hÉireann';
242 |       const foundWords = await profanity.getCurseWords(sentence);
243 |       
244 |       // Should return unique words only
245 |       expect(Array.isArray(foundWords)).toBe(true);
246 |       // If 'fáilte' were a profanity word, it should appear only once
247 |     });
248 | 
249 |     it('Should handle Irish dialectal variations', async () => {
250 |       // Irish has three main dialects: Munster, Connacht, Ulster
251 |       const dialectalWords = [
252 |         // Different ways to say things in different dialects
253 |         'pótaí',        // Potatoes (Munster)
254 |         'prátaí',       // Potatoes (Connacht/Ulster)
255 |         'gasúr',        // Boy (Munster)
256 |         'buachaill',    // Boy (Connacht/Ulster)
257 |         'cailín',       // Girl (general)
258 |         'girseach',     // Girl (Ulster)
259 |       ];
260 | 
261 |       for (const word of dialectalWords) {
262 |         const result = await profanity.search(word);
263 |         expect(typeof result).toBe('boolean');
264 |       }
265 |     });
266 | 
267 |     it('Should handle Irish traditional vs modern spelling', async () => {
268 |       // Irish spelling was reformed in the 20th century
269 |       const spellingVariations = [
270 |         // Traditional vs Modern
271 |         ['Gaedhilg', 'Gaeilge'],        // Irish language
272 |         ['oidhche', 'oíche'],           // Night
273 |         ['ceathramhadh', 'ceathrú'],    // Quarter/fourth
274 |       ];
275 | 
276 |       for (const [traditional, modern] of spellingVariations) {
277 |         expect(await profanity.search(traditional)).toBe(false);
278 |         expect(await profanity.search(modern)).toBe(false);
279 |       }
280 |     });
281 |   });
282 | 
283 |   describe('Performance tests for Irish dataset', () => {
284 |     it('Should handle large Irish text efficiently', async () => {
285 |       const largeText = 'Seo abairt tástála. '.repeat(1000) + 'téacs Gaeilge ' + 'Téacs glan. '.repeat(1000);
286 |       
287 |       const startTime = Date.now();
288 |       const result = await profanity.hasCurseWords(largeText);
289 |       const endTime = Date.now();
290 |       
291 |       expect(typeof result).toBe('boolean');
292 |       expect(endTime - startTime).toBeLessThan(100); // Should complete in under 100ms
293 |     });
294 | 
295 |     it('Should efficiently search through all Irish terms', async () => {
296 |       const allWords = await profanity.all();
297 |       
298 |       if (allWords.length > 0) {
299 |         const startTime = Date.now();
300 |         for (let i = 0; i < Math.min(100, allWords.length); i++) {
301 |           await profanity.search(allWords[i % allWords.length]);
302 |         }
303 |         const endTime = Date.now();
304 |         
305 |         expect(endTime - startTime).toBeLessThan(50); // Should be very fast with Set lookup
306 |       }
307 |     });
308 | 
309 |     it('Should handle concurrent operations on Irish dataset', async () => {
310 |       const promises = [
311 |         profanity.search('fáilte'),
312 |         profanity.hasCurseWords('seo téacs Gaeilge'),
313 |         profanity.getCurseWords('an téacs Gaeilge'),
314 |         profanity.all(),
315 |         profanity.search('slán')
316 |       ];
317 |       
318 |       const results = await Promise.all(promises);
319 |       expect(results[0]).toBe(false); // search fáilte (should be clean)
320 |       expect(results[1]).toBe(false); // hasCurseWords (should be clean)
321 |       expect(Array.isArray(results[2])).toBe(true); // getCurseWords
322 |       expect(Array.isArray(results[3])).toBe(true); // all words
323 |       expect(results[4]).toBe(false); // search slán (should be clean)
324 |     });
325 |   });
326 | 
327 |   describe('Irish language specificity', () => {
328 |     it('Should load Irish words correctly or fallback to English', async () => {
329 |       const allWords = await profanity.all();
330 |       expect(allWords.length).toBeGreaterThan(0);
331 |       // If Irish file doesn't exist, should fallback to English (958 words)
332 |       // If Irish file exists, should load Irish words
333 |     });
334 | 
335 |     it('Should handle Irish-specific character encoding (UTF-8)', async () => {
336 |       // Test Irish alphabet with fadas
337 |       const irishChars = [
338 |         'a', 'á', 'b', 'c', 'd', 'e', 'é', 'f', 'g', 'h',
339 |         'i', 'í', 'l', 'm', 'n', 'o', 'ó', 'p', 'r', 's',
340 |         't', 'u', 'ú', // Irish alphabet (no j, k, q, v, w, x, y, z traditionally)
341 |       ];
342 | 
343 |       for (const char of irishChars) {
344 |         const result = await profanity.search(char);
345 |         expect(typeof result).toBe('boolean');
346 |       }
347 |     });
348 | 
349 |     it('Should handle Irish place names', async () => {
350 |       const placeNames = [
351 |         'Éire',           // Ireland
352 |         'Baile Átha Cliath', // Dublin
353 |         'Corcaigh',       // Cork
354 |         'Gaillimh',       // Galway
355 |         'Luimneach',      // Limerick
356 |         'Port Láirge',    // Waterford
357 |         'An Clár',        // Clare
358 |         'Ciarraí',        // Kerry
359 |       ];
360 | 
361 |       for (const place of placeNames) {
362 |         expect(await profanity.search(place)).toBe(false);
363 |       }
364 |     });
365 | 
366 |     it('Should handle Irish Celtic cultural terms', async () => {
367 |       const culturalTerms = [
368 |         'céilí',          // Social gathering with music/dance
369 |         'seisiún',        // Music session
370 |         'bodhrán',        // Traditional drum
371 |         'uilleann',       // Irish pipes
372 |         'fleadh',         // Festival
373 |         'comhrá',         // Conversation
374 |         'craic',          // Fun/good time
375 |         'sláinte',        // Health/cheers
376 |       ];
377 | 
378 |       for (const term of culturalTerms) {
379 |         expect(await profanity.search(term)).toBe(false);
380 |       }
381 |     });
382 |   });
383 | 
384 |   describe('Data integrity for Irish', () => {
385 |     it('Should not allow modification of Irish word list', async () => {
386 |       const terms1 = await profanity.all();
387 |       const originalLength = terms1.length;
388 |       
389 |       // Try to modify the returned array
390 |       terms1.push('focal-bréige');
391 |       terms1.pop();
392 |       if (terms1.length > 0) {
393 |         terms1[0] = 'athraithe';
394 |       }
395 |       
396 |       // Get terms again - should be unchanged
397 |       const terms2 = await profanity.all();
398 |       expect(terms2.length).toBe(originalLength);
399 |       expect(terms2).not.toContain('focal-bréige');
400 |       if (terms2.length > 0) {
401 |         expect(terms2[0]).not.toBe('athraithe');
402 |       }
403 |     });
404 | 
405 |     it('Should provide consistent results for Irish detection', async () => {
406 |       const sentence = 'Seo abairt i nGaeilge';
407 |       
408 |       const result1 = await profanity.getCurseWords(sentence);
409 |       const result2 = await profanity.getCurseWords(sentence);
410 |       const result3 = await profanity.hasCurseWords(sentence);
411 |       
412 |       expect(result1).toEqual(result2);
413 |       expect(typeof result3).toBe('boolean');
414 |     });
415 |   });
416 | 
417 |   describe('Configuration and fallback for Irish', () => {
418 |     it('Should handle missing Irish language file gracefully', async () => {
419 |       // If ga.txt doesn't exist, should fallback to English
420 |       const irishProfanity = new ProfanityEngine({
421 |         language: 'ga',
422 |         testMode: true,
423 |       });
424 |       
425 |       const terms = await irishProfanity.all();
426 |       expect(terms.length).toBeGreaterThan(0);
427 |     });
428 | 
429 |     it('Should suppress warnings in test mode for Irish', async () => {
430 |       // Store original console.warn
431 |       const originalWarn = console.warn;
432 |       let warnCalled = false;
433 |       
434 |       // Mock console.warn
435 |       console.warn = () => {
436 |         warnCalled = true;
437 |       };
438 |       
439 |       const irishProfanity = new ProfanityEngine({
440 |         language: 'ga',
441 |         testMode: true,
442 |       });
443 |       
444 |       warnCalled = false;
445 |       await irishProfanity.all();
446 |       expect(warnCalled).toBe(false);
447 |       
448 |       // Restore original console.warn
449 |       console.warn = originalWarn;
450 |     });
451 |   });
452 | 
453 |   describe('Irish grammar and linguistics', () => {
454 |     it('Should handle Irish syntax patterns (VSO order)', async () => {
455 |       // Irish typically uses Verb-Subject-Object word order
456 |       const vsoSentences = [
457 |         'Tá Seán ag rith',           // Is Seán running (literally: Is Seán at running)
458 |         'Chonaic mé an madra',       // I saw the dog (literally: Saw I the dog)
459 |         'Léann sí leabhar',          // She reads a book (literally: Reads she book)
460 |       ];
461 | 
462 |       for (const sentence of vsoSentences) {
463 |         expect(await profanity.hasCurseWords(sentence)).toBe(false);
464 |       }
465 |     });
466 | 
467 |     it('Should handle Irish copula vs substantive verb', async () => {
468 |       // Irish has two types of "to be"
469 |       const copulaExamples = [
470 |         'Is múinteoir mé',           // I am a teacher (copula)
471 |         'Tá mé ag obair',           // I am working (substantive verb)
472 |         'Is maith liom tae',        // I like tea (copula)
473 |         'Tá tae agam',              // I have tea (substantive verb)
474 |       ];
475 | 
476 |       for (const sentence of copulaExamples) {
477 |         expect(await profanity.hasCurseWords(sentence)).toBe(false);
478 |       }
479 |     });
480 | 
481 |     it('Should handle Irish conditional and subjunctive moods', async () => {
482 |       const moodExamples = [
483 |         'Dá mbeinn saibhir',        // If I were rich (conditional)
484 |         'Go raibh maith agat',      // Thank you (subjunctive: "that good be at you")
485 |         'Ar mhaith leat tae?',      // Would you like tea? (conditional)
486 |       ];
487 | 
488 |       for (const sentence of moodExamples) {
489 |         expect(await profanity.hasCurseWords(sentence)).toBe(false);
490 |       }
491 |     });
492 |   });
493 | });


--------------------------------------------------------------------------------