├── .gitattributes ├── .gitignore ├── LICENSE ├── Procfile ├── README.md ├── announcements.py ├── bot.py ├── config.ini ├── configurator.py ├── db.py ├── db_init.py ├── dispatcher.py ├── filters.py ├── handlers ├── __init__.py ├── admin_actions.py ├── callbacks.py ├── exceptions.py ├── group_events.py ├── personal_actions.py └── user_actions.py ├── heroku_config.py ├── libs ├── censure │ ├── __init__.py │ ├── base.py │ ├── data │ │ ├── en_in.txt │ │ ├── en_out.txt │ │ ├── ru_in.txt │ │ └── ru_out.txt │ ├── helper.py │ ├── lang │ │ ├── __init__.py │ │ ├── common │ │ │ ├── __init__.py │ │ │ ├── constants.py │ │ │ └── patterns.py │ │ ├── en │ │ │ ├── __init__.py │ │ │ ├── constants.py │ │ │ └── patterns.py │ │ └── ru │ │ │ ├── __init__.py │ │ │ ├── constants.py │ │ │ └── patterns.py │ └── tests │ │ ├── __init__.py │ │ ├── base.py │ │ ├── en │ │ ├── __init__.py │ │ ├── data.py │ │ ├── test_base.py │ │ ├── test_censure.py │ │ └── test_helper.py │ │ ├── ru │ │ ├── __init__.py │ │ ├── data.py │ │ ├── test_base.py │ │ ├── test_censure.py │ │ └── test_helper.py │ │ └── test_init.py └── gender_extractor │ ├── __init__.py │ ├── extractor.py │ └── nameLists │ ├── AfghanistanFemaleUTF8.csv │ ├── AfghanistanMaleUTF8.csv │ ├── AlbaniaFemaleUTF8.csv │ ├── AlbaniaMaleUTF8.csv │ ├── AustraliaFemaleUTF8.csv │ ├── AustraliaMaleUTF8.csv │ ├── BelgiumFemaleUTF8.csv │ ├── BelgiumMaleUTF8.csv │ ├── BrazilFemaleUTF8.csv │ ├── BrazilMaleUTF8.csv │ ├── BrusselsFemaleUTF8.csv │ ├── BrusselsMaleUTF8.csv │ ├── CanadaFemaleUTF8.csv │ ├── CanadaMaleUTF8.csv │ ├── CzechFemaleUTF8.csv │ ├── CzechMaleUTF8.csv │ ├── FinlandFemaleUTF8.csv │ ├── FinlandMaleUTF8.csv │ ├── FlandersFemaleUTF8.csv │ ├── FlandersMaleUTF8.csv │ ├── FrisiaFemaleUTF8.csv │ ├── FrisiaMaleUTF8.csv │ ├── GreeceFemaleUTF8.csv │ ├── GreeceMaleUTF8.csv │ ├── HungaryFemaleUTF8.csv │ ├── HungaryMaleUTF8.csv │ ├── IndiaFemaleUTF8.csv │ ├── IndiaLastNames.csv │ ├── IndiaMaleUTF8.csv │ ├── IranFemaleUTF8.csv │ ├── IranMaleUTF8.csv │ ├── IrelandFemaleUTF8.csv │ ├── IrelandMaleUTF8.csv │ ├── IsraelFemaleUTF8.csv │ ├── IsraelMaleUTF8.csv │ ├── ItalyFemaleUTF8.csv │ ├── ItalyMaleUTF8.csv │ ├── JapanFemaleUTF8.csv │ ├── JapanMaleUTF8.csv │ ├── LatviaFemaleUTF8.csv │ ├── LatviaMaleUTF8.csv │ ├── NorwayFemaleUTF8.csv │ ├── NorwayLastNames.csv │ ├── NorwayMaleUTF8.csv │ ├── PolandFemaleUTF8.csv │ ├── PolandMaleUTF8.csv │ ├── RomaniaFemaleUTF8.csv │ ├── RomaniaMaleUTF8.csv │ ├── RussiaFemaleUTF8.csv │ ├── RussiaMaleUTF8.csv │ ├── SloveniaFemaleUTF8.csv │ ├── SloveniaMaleUTF8.csv │ ├── SomaliaFemaleUTF8.csv │ ├── SomaliaMaleUTF8.csv │ ├── SpainFemaleUTF8.csv │ ├── SpainMaleUTF8.csv │ ├── SwedenFemaleUTF8.csv │ ├── SwedenLastNames.csv │ ├── SwedenMaleUTF8.csv │ ├── TurkeyFemaleUTF8.csv │ ├── TurkeyMaleUTF8.csv │ ├── UKFemaleUTF8.csv │ ├── UKMaleUTF8.csv │ ├── USAFemaleUTF8.csv │ ├── USAMaleUTF8.csv │ ├── UkraineFemaleUTF8.csv │ ├── UkraineMaleUTF8.csv │ ├── VietnamFemaleUTF8.csv │ ├── VietnamMaleUTF8.csv │ ├── WalloniaFemaleUTF8.csv │ ├── WalloniaMaleUTF8.csv │ ├── countryStats.csv │ ├── diminutives.csv │ ├── diminutives.dict │ ├── gender.dict │ ├── list.txt │ ├── nameLists.md │ └── sources.txt ├── localization.py ├── lru_cache.py ├── models ├── member.py └── spam.py ├── requirements.txt ├── runtime.txt ├── ruspam.py ├── ruspam_model ├── config.json ├── merges.txt ├── model.safetensors ├── special_tokens_map.json ├── tokenizer.json ├── tokenizer_config.json ├── training_args.bin └── vocab.json ├── tests ├── __init__.py └── test_gender.py └── utils.py /.gitattributes: -------------------------------------------------------------------------------- 1 | *.safetensors filter=lfs diff=lfs merge=lfs 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # User-specific stuff: 2 | .idea/workspace.xml 3 | .idea/tasks.xml 4 | .idea/dictionaries 5 | .idea/vcs.xml 6 | .idea/jsLibraryMappings.xml 7 | pipsize.py 8 | Dockerfile 9 | 10 | # Sensitive or high-churn files: 11 | .idea/dataSources.ids 12 | .idea/dataSources.xml 13 | .idea/dataSources.local.xml 14 | .idea/sqlDataSources.xml 15 | .idea/dynamic.xml 16 | .idea/uiDesigner.xml 17 | 18 | # Gradle: 19 | .idea/gradle.xml 20 | .idea/libraries 21 | 22 | # Mongo Explorer plugin: 23 | .idea/mongoSettings.xml 24 | 25 | .idea/ 26 | 27 | # Environments 28 | .env 29 | dev.env 30 | .venv 31 | env/ 32 | venv/ 33 | ENV/ 34 | env.bak/ 35 | venv.bak/ 36 | .python-version 37 | 38 | # Byte-compiled / optimized / DLL files 39 | __pycache__/ 40 | __pycache__/* 41 | *.py[cod] 42 | *$py.class 43 | *.pyc 44 | 45 | # Visual Studio cache directory 46 | .vs/ 47 | 48 | # General 49 | .DS_Store 50 | .AppleDouble 51 | .LSOverride 52 | 53 | # Files that might appear in the root of a volume 54 | .DocumentRevisions-V100 55 | .fseventsd 56 | .Spotlight-V100 57 | .TemporaryItems 58 | .Trashes 59 | .VolumeIcon.icns 60 | .com.apple.timemachine.donotpresent 61 | 62 | # Directories potentially created on remote AFP share 63 | .AppleDB 64 | .AppleDesktop 65 | Network Trash Folder 66 | Temporary Items 67 | .apdisk 68 | 69 | # Windows thumbnail cache files 70 | Thumbs.db 71 | Thumbs.db:encryptable 72 | ehthumbs.db 73 | ehthumbs_vista.db 74 | 75 | # Recycle Bin used on file shares 76 | $RECYCLE.BIN/ 77 | 78 | # Windows Installer files 79 | *.cab 80 | *.msi 81 | *.msix 82 | *.msm 83 | *.msp 84 | 85 | # Windows shortcuts 86 | *.lnk -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | worker: python bot.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 👹 Samurai Telegram Bot 2 | ![Samurai Telegram Bot](https://i.imgur.com/S9BPDMt.jpeg "te") 3 | Simple, yet effective **moderator bot for telegram**. 4 | With reports, logs, profanity filter, anti-spam AI and more :3 5 | 6 | # What samurai do? 7 | Samurai is a personal bot, made for easy chat auto-moderation. 8 | It adds reporting functionality, profanity filtering (both english & russian languages are supported), logging system via private channel, spam detection AI and much more! 9 | More of that, the bot code & functions can be easily extended and/or limited as you prefer. 10 | 11 | *The code has NOT been polished and is provided "as is". There are a lot of code that are redundant and there are tons of improvements that can be made.* 12 | 13 | ## Roadmap (todo etc) 14 | https://trello.com/b/MbwAxjd1/xobot-official 15 | 16 | ## Credits 17 | https://github.com/masteroncluster/py-censure - Profanity filter we used as a base 18 | https://github.com/MasterGroosha/telegram-report-bot - Reports system we used as a base 19 | https://huggingface.co/RUSpam/spam_deberta_v4 - Anti-Spam AI model we used as a base 20 | https://github.com/wwydmanski/gender-extractor - Gender detection we used as a base 21 | 22 | ## Author of Samurai 23 | 24 | (C) 2025 Abraham Tugalov 25 | -------------------------------------------------------------------------------- /announcements.py: -------------------------------------------------------------------------------- 1 | import dispatcher 2 | import configurator 3 | import localization 4 | import asyncio 5 | import aioschedule as schedule 6 | 7 | async def announce(message): 8 | await dispatcher.dp.bot.send_message(configurator.config.groups.main, message) 9 | 10 | async def scheduler(): 11 | for i in localization.get_string('announcements'): 12 | schedule.every(i['every']).seconds.do(announce, i['message']) 13 | 14 | while True: 15 | await schedule.run_pending() 16 | await asyncio.sleep(2) -------------------------------------------------------------------------------- /bot.py: -------------------------------------------------------------------------------- 1 | from aiogram import executor 2 | from dispatcher import dp 3 | import handlers 4 | import announcements 5 | import asyncio 6 | import os 7 | 8 | from db import ormar_config 9 | 10 | 11 | async def on_startup(dp): 12 | # connect (all dbs except sqlite require connection) 13 | if ormar_config.database.is_connected: 14 | return 15 | await ormar_config.database.connect() 16 | 17 | 18 | if __name__ == '__main__': 19 | loop = asyncio.get_event_loop() 20 | loop.create_task(announcements.scheduler()) 21 | executor.start_polling(dp, skip_updates=True, on_startup=on_startup) 22 | 23 | # disconnect from database 24 | ormar_config.database.disconnect() 25 | -------------------------------------------------------------------------------- /config.ini: -------------------------------------------------------------------------------- 1 | [bot] 2 | # 3 | owner= 4 | 5 | # 6 | token= 7 | language=ru 8 | version=0.5 9 | version_codename=Naked Samurai 10 | 11 | [groups] 12 | main=0 13 | reports=0 14 | logs=0 15 | new_users_nomedia=7776000 16 | linked_channel=0 17 | 18 | [spam] 19 | # after what amount of messages, anti-spam will stop checking this user 20 | member_messages_threshold=10 21 | 22 | # OR after what amount of reputation points, anti-spam will stop checking this user 23 | member_reputation_threshold=10 24 | 25 | # reputation points user must have, to be allowed to send media type messages 26 | allow_media_threshold=20 27 | 28 | # remove messages during this interval after a post to channel has been made 29 | remove_first_comments_interval=30 30 | 31 | # reputation points account must have, to be allowed to post messages (automatic_forward) 32 | allow_first_comments_threshold=50 33 | 34 | # reputation points women account must have, to be allowed to post messages (automatic_forward) 35 | allow_first_comments_threshold__woman=10 36 | 37 | [db] 38 | url=sqlite+aiosqlite:///db.sqlite 39 | -------------------------------------------------------------------------------- /configurator.py: -------------------------------------------------------------------------------- 1 | from configparser import ConfigParser 2 | from easydict import EasyDict as edict 3 | import logging 4 | 5 | config = edict() 6 | 7 | def make_config(filename): 8 | parser = ConfigParser() 9 | parser.read(filename) 10 | 11 | if not parser.sections(): 12 | return False 13 | 14 | for section in parser.sections(): 15 | config[section] = edict() 16 | 17 | for key, value in parser.items(section): 18 | config[section][key] = value 19 | 20 | return True -------------------------------------------------------------------------------- /db.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from configurator import config 4 | 5 | from typing import Optional 6 | 7 | import databases 8 | import pydantic 9 | 10 | import ormar 11 | import sqlalchemy 12 | from sqlalchemy.ext.asyncio import create_async_engine 13 | 14 | # create db config 15 | DATABASE_URL = config.db.url 16 | ormar_config = ormar.OrmarConfig( 17 | database=databases.Database(DATABASE_URL), 18 | metadata=sqlalchemy.MetaData(), 19 | engine=create_async_engine(DATABASE_URL, echo=True), 20 | ) -------------------------------------------------------------------------------- /db_init.py: -------------------------------------------------------------------------------- 1 | exit("COMMENT THIS LINE IN ORDER TO RE-INIT DATABASE TABLES") 2 | 3 | import asyncio 4 | import logging 5 | from configurator import make_config 6 | 7 | logging.basicConfig(level=logging.INFO) 8 | 9 | if not make_config("config.ini"): 10 | logging.error("Errors while parsing config file. Exiting.") 11 | exit(1) 12 | 13 | import heroku_config 14 | 15 | # import models n stuff 16 | from db import ormar_config 17 | from models.member import Member 18 | from models.spam import Spam 19 | 20 | # DROP & INIT tables (async mysql) 21 | async def reinit_db_tables(): 22 | async with ormar_config.engine.begin() as conn: 23 | await conn.run_sync(ormar_config.metadata.drop_all) 24 | await conn.run_sync(ormar_config.metadata.create_all) 25 | 26 | await ormar_config.engine.dispose() 27 | 28 | asyncio.run(reinit_db_tables()) 29 | exit("DONE") 30 | 31 | # for sqlite use this :3 32 | # ormar_config.metadata.drop_all(ormar_config.engine) 33 | # ormar_config.metadata.create_all(ormar_config.engine) 34 | -------------------------------------------------------------------------------- /dispatcher.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from aiogram import Bot, Dispatcher 4 | 5 | from configurator import config, make_config 6 | from filters import IsAdminFilter, MemberCanRestrictFilter, IsOwnerFilter 7 | 8 | # Configure logging 9 | logging.basicConfig(level=logging.INFO) 10 | 11 | if not make_config("config.ini"): 12 | logging.error("Errors while parsing config file. Exiting.") 13 | exit(1) 14 | 15 | import heroku_config 16 | 17 | if not config.bot.token: 18 | logging.error("No token provided") 19 | exit(1) 20 | 21 | # Initialize bot and dispatcher 22 | bot = Bot(token=config.bot.token, parse_mode="HTML") 23 | dp = Dispatcher(bot) 24 | dp.message_handlers.once = False 25 | 26 | # Activate filters 27 | dp.filters_factory.bind(IsAdminFilter) 28 | dp.filters_factory.bind(IsOwnerFilter) 29 | dp.filters_factory.bind(MemberCanRestrictFilter) 30 | -------------------------------------------------------------------------------- /filters.py: -------------------------------------------------------------------------------- 1 | from aiogram import types 2 | from aiogram.dispatcher.filters import BoundFilter 3 | 4 | from configurator import config 5 | 6 | 7 | class IsOwnerFilter(BoundFilter): 8 | """ 9 | Custom filter "is_owner". 10 | """ 11 | key = "is_owner" 12 | 13 | def __init__(self, is_owner): 14 | self.is_owner = is_owner 15 | 16 | async def check(self, message: types.Message): 17 | return message.from_user.id == config.bot.owner 18 | 19 | 20 | class IsAdminFilter(BoundFilter): 21 | """ 22 | Filter that checks for admin rights existence 23 | """ 24 | key = "is_admin" 25 | 26 | def __init__(self, is_admin: bool): 27 | self.is_admin = is_admin 28 | 29 | async def check(self, message: types.Message): 30 | member = await message.bot.get_chat_member(message.chat.id, message.from_user.id) 31 | return member.is_chat_admin() == self.is_admin 32 | 33 | 34 | class MemberCanRestrictFilter(BoundFilter): 35 | """ 36 | Filter that checks member ability for restricting 37 | """ 38 | key = 'member_can_restrict' 39 | 40 | def __init__(self, member_can_restrict: bool): 41 | self.member_can_restrict = member_can_restrict 42 | 43 | async def check(self, message: types.Message): 44 | member = await message.bot.get_chat_member(message.chat.id, message.from_user.id) 45 | 46 | # I don't know why, but telegram thinks, if member is chat creator, he cant restrict member 47 | return (member.is_chat_creator() or getattr(member, 'can_restrict_members', False)) == self.member_can_restrict 48 | # return (member.is_chat_creator() or member.can_restrict_members) == self.member_can_restrict 49 | -------------------------------------------------------------------------------- /handlers/__init__.py: -------------------------------------------------------------------------------- 1 | from . import exceptions 2 | 3 | from . import admin_actions 4 | from . import user_actions 5 | from . import callbacks 6 | from . import personal_actions 7 | from . import group_events -------------------------------------------------------------------------------- /handlers/exceptions.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from aiogram.utils.exceptions import (Unauthorized, InvalidQueryID, TelegramAPIError, 3 | CantDemoteChatCreator, MessageNotModified, MessageToDeleteNotFound, 4 | MessageTextIsEmpty, RetryAfter, 5 | CantParseEntities, MessageCantBeDeleted) 6 | 7 | from dispatcher import dp 8 | 9 | @dp.errors_handler() 10 | async def errors_handler(update, exception): 11 | """ 12 | Exceptions handler. Catches all exceptions within task factory tasks. 13 | :param dispatcher: 14 | :param update: 15 | :param exception: 16 | :return: stdout logging 17 | """ 18 | 19 | if isinstance(exception, CantDemoteChatCreator): 20 | logging.debug("Can't demote chat creator") 21 | return True 22 | 23 | if isinstance(exception, MessageNotModified): 24 | logging.debug('Message is not modified') 25 | return True 26 | if isinstance(exception, MessageCantBeDeleted): 27 | logging.debug('Message cant be deleted') 28 | return True 29 | 30 | if isinstance(exception, MessageToDeleteNotFound): 31 | logging.debug('Message to delete not found') 32 | return True 33 | 34 | if isinstance(exception, MessageTextIsEmpty): 35 | logging.debug('MessageTextIsEmpty') 36 | return True 37 | 38 | if isinstance(exception, Unauthorized): 39 | logging.info(f'Unauthorized: {exception}') 40 | return True 41 | 42 | if isinstance(exception, InvalidQueryID): 43 | logging.exception(f'InvalidQueryID: {exception} \nUpdate: {update}') 44 | return True 45 | 46 | if isinstance(exception, TelegramAPIError): 47 | logging.exception(f'TelegramAPIError: {exception} \nUpdate: {update}') 48 | return True 49 | if isinstance(exception, RetryAfter): 50 | logging.exception(f'RetryAfter: {exception} \nUpdate: {update}') 51 | return True 52 | if isinstance(exception, CantParseEntities): 53 | logging.exception(f'CantParseEntities: {exception} \nUpdate: {update}') 54 | return True 55 | 56 | logging.exception(f'Update: {update} \n{exception}') -------------------------------------------------------------------------------- /handlers/personal_actions.py: -------------------------------------------------------------------------------- 1 | import random 2 | from time import time 3 | from aiogram import types 4 | from configurator import config 5 | from dispatcher import dp 6 | import localization 7 | import utils 8 | import psutil 9 | 10 | import sys 11 | sys.path.append("./censure") # allow module import from git submodule 12 | 13 | from censure import Censor 14 | 15 | censor_ru = Censor.get(lang='ru') 16 | censor_en = Censor.get(lang='en') 17 | 18 | 19 | @dp.message_handler(user_id = int(config.bot.owner), commands="msg", commands_prefix="!/") 20 | async def cmd_message_from_bot(message: types.Message): 21 | await message.bot.send_message(config.groups.main, utils.remove_prefix(message.text, "!msg ")) 22 | 23 | 24 | @dp.message_handler(user_id = int(config.bot.owner), commands="log", commands_prefix="!/") 25 | async def cmd_write_log_bot(message: types.Message): 26 | await utils.write_log(message.bot, utils.remove_prefix(message.text, "!log "), "test") 27 | 28 | 29 | @dp.message_handler(is_admin=True, commands="ping", commands_prefix="!") 30 | async def cmd_ping_bot(message: types.Message): 31 | # Check if command is sent by group admin 32 | user = await message.bot.get_chat_member(config.groups.main, message.from_user.id) 33 | if user.is_chat_admin(): 34 | 35 | ram = psutil.virtual_memory() 36 | 37 | reply = f"{random.choice(['👊 Самурай на месте!', '🫰 Нужно больше золота', '🫡 Тута я, бож :3', '✊ Железо говн@, но я держусь!'])}\n\n" 38 | reply += "CPU: {} ядро ({} MHz) загружено на {}%\n".format( 39 | psutil.cpu_count(), 40 | utils.get_cpu_freq_from_proc(), 41 | psutil.cpu_percent() 42 | ) 43 | reply += "RAM: {}мб / {}мб\n".format( 44 | ram.used >> 20, 45 | ram.total >> 20 46 | ) 47 | reply += "GPU: N/A\n" 48 | 49 | # Get disk info for root partition 50 | disk = psutil.disk_usage('/') 51 | # Convert bytes to GB 52 | disk_total_gb = disk.total // (2 ** 30) # or (1024**3) 53 | disk_free_gb = disk.free // (2 ** 30) 54 | 55 | reply += "SSD: {}ГБ / {}ГБ ({}% занято)\n".format( 56 | disk_free_gb, 57 | disk_total_gb, 58 | int(disk.percent) 59 | ) 60 | 61 | reply += "\nВерсия бота: " + str(config.bot.version) + " codename «" + config.bot.version_codename + "»" 62 | 63 | await message.reply(reply) 64 | 65 | 66 | # @dp.message_handler(lambda message: message.chat.type == 'private', commands=["prof", "мат"], commands_prefix="!") 67 | @dp.message_handler(is_admin=True, commands=["prof", "мат"], commands_prefix="!") 68 | @dp.message_handler(lambda message: message.chat.type == 'private', is_owner=True, commands=["prof", "мат"], commands_prefix="!") 69 | async def cmd_profanity_check(message: types.Message): 70 | # Check if command is sent by group admin 71 | user = await message.bot.get_chat_member(config.groups.main, message.from_user.id) 72 | if user.is_chat_admin(): 73 | _del = False 74 | _word = None 75 | _pat = None 76 | 77 | line_info_ru = censor_ru.clean_line(utils.remove_prefix(message.text, "!prof ")) 78 | line_info_en = censor_en.clean_line(utils.remove_prefix(message.text, "!prof ")) 79 | 80 | # line, bad_words_count, bad_phrases_count, detected_bad_words, detected_bad_phrases 81 | 82 | _det_lang = None 83 | 84 | # check RU 85 | if line_info_ru[1] or line_info_ru[2]: 86 | if line_info_ru[1]: 87 | _word = line_info_ru[3][0] 88 | else: 89 | _word = line_info_ru[4][0] 90 | 91 | _pat = line_info_ru[5][0] 92 | _del = True 93 | _det_lang = 'ru' 94 | 95 | # check ENG 96 | if line_info_en[1] or line_info_en[2]: 97 | if line_info_en[1]: 98 | _word = line_info_en[3][0] 99 | else: 100 | _word = line_info_en[4][0] 101 | 102 | _pat = line_info_en[5][0] 103 | _del = True 104 | _det_lang = 'en' 105 | 106 | # process 107 | if _del: 108 | log_msg = message.text 109 | if _word: 110 | log_msg = "❌ Profanity detected.\n\n" 111 | log_msg += utils.remove_prefix(message.text, "!prof ").replace(_word, ''+_word+'') 112 | log_msg += "\nПаттерн: " + _pat 113 | log_msg += "\nЯзык: " + _det_lang 114 | 115 | await message.reply(log_msg) 116 | else: 117 | await message.reply("✅ No profanity detected.") 118 | -------------------------------------------------------------------------------- /handlers/user_actions.py: -------------------------------------------------------------------------------- 1 | from time import time 2 | from aiogram import types 3 | from aiogram.dispatcher.filters import Text 4 | from configurator import config 5 | from dispatcher import dp 6 | import localization 7 | import utils 8 | import random 9 | 10 | @dp.message_handler(chat_id=config.groups.main, commands=["report", "репорт"], commands_prefix="!/") 11 | async def cmd_report(message: types.Message): 12 | # Check if command is sent as reply to some message 13 | if not message.reply_to_message: 14 | await message.reply(localization.get_string("error_no_reply")) 15 | return 16 | 17 | # Check if command is sent to own message 18 | if message.reply_to_message.from_user.id == message.from_user.id: 19 | await message.reply(localization.get_string("error_report_self")) 20 | return 21 | 22 | # Check if command is sent as reply to admin 23 | user = await message.bot.get_chat_member(config.groups.main, message.reply_to_message.from_user.id) 24 | if user.is_chat_admin() and user.can_restrict_members: 25 | await message.reply(localization.get_string("error_report_admin")) 26 | return 27 | 28 | # Cannot report group posts 29 | if message.reply_to_message.from_user.id == 777000: 30 | await message.bot.delete_message(config.groups.main, message.message_id) 31 | return 32 | 33 | # Check for report message (anything sent after /report or !report command) 34 | msg_parts = message.text.split() 35 | report_message = None 36 | if len(msg_parts) > 1: 37 | report_message = message.text.replace("!report", "") 38 | report_message = report_message.replace("/report", "") 39 | 40 | # Generate keyboard with some actions 41 | action_keyboard = types.InlineKeyboardMarkup() 42 | # Delete message by its id 43 | action_keyboard.add(types.InlineKeyboardButton( 44 | text=localization.get_string("action_del_msg"), 45 | callback_data=f"del_{message.reply_to_message.message_id}") 46 | ) 47 | 48 | # Delete message by its id and ban user by their id 49 | action_keyboard.add(types.InlineKeyboardButton( 50 | text=localization.get_string("action_del_and_ban"), 51 | callback_data=f"delban_{message.reply_to_message.message_id}_{message.reply_to_message.from_user.id}" 52 | )) 53 | 54 | # Delete message by its id and mute user for 24 hours by their id 55 | action_keyboard.add(types.InlineKeyboardButton( 56 | text=localization.get_string("action_del_and_readonly"), 57 | callback_data=f"mute_{message.reply_to_message.message_id}_{message.reply_to_message.from_user.id}" 58 | )) 59 | 60 | # Delete message by its id and mute user for 7 days by their id 61 | action_keyboard.add(types.InlineKeyboardButton( 62 | text=localization.get_string("action_del_and_readonly2"), 63 | callback_data=f"mute2_{message.reply_to_message.message_id}_{message.reply_to_message.from_user.id}" 64 | )) 65 | 66 | # Do nothing, false alarm 67 | action_keyboard.add(types.InlineKeyboardButton( 68 | text=localization.get_string("action_false_alarm"), 69 | callback_data=f"dismiss_{message.reply_to_message.message_id}_{message.reply_to_message.from_user.id}" 70 | )) 71 | 72 | # Do nothing, false alarm + mute reporter for one day 73 | action_keyboard.add(types.InlineKeyboardButton( 74 | text=localization.get_string("action_false_alarm_2"), 75 | callback_data=f"dismiss2_{message.message_id}_{message.from_user.id}" 76 | )) 77 | 78 | # Do nothing, false alarm + mute reporter for one week 79 | action_keyboard.add(types.InlineKeyboardButton( 80 | text=localization.get_string("action_false_alarm_3"), 81 | callback_data=f"dismiss3_{message.message_id}_{message.from_user.id}" 82 | )) 83 | 84 | # Do nothing, false alarm + ban reporter 85 | action_keyboard.add(types.InlineKeyboardButton( 86 | text=localization.get_string("action_false_alarm_4"), 87 | callback_data=f"dismiss4_{message.message_id}_{message.from_user.id}" 88 | )) 89 | 90 | await message.reply_to_message.forward(config.groups.reports) 91 | await message.bot.send_message( 92 | config.groups.reports, 93 | utils.get_report_comment( 94 | message.reply_to_message.date, 95 | message.reply_to_message.message_id, 96 | report_message 97 | ), 98 | reply_markup=action_keyboard) 99 | await message.reply(random.choice(["Репорт отправлен.", "Админы посмотрят.", "Полиция уже в пути :3", "SWAT уже выехал :3", "Щасб кто-нибудь глянет :3"])) 100 | 101 | @dp.message_handler(Text(startswith="@admin", ignore_case=True), chat_id=config.groups.main) 102 | async def calling_all_units(message: types.Message): 103 | """ 104 | Handler which is triggered when message starts with @admin. 105 | Honestly any combination will work: @admin, @admins, @adminisshit 106 | 107 | :param message: Telegram message where text starts with @admin 108 | """ 109 | await message.bot.send_message( 110 | config.groups.reports, 111 | localization.get_string("need_admins_attention").format( 112 | chat_id=utils.get_url_chat_id(config.groups.main), 113 | msg_id=message.reply_to_message.message_id 114 | if message.reply_to_message 115 | else message.message_id 116 | ) 117 | ) 118 | -------------------------------------------------------------------------------- /heroku_config.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | import os 3 | 4 | if os.path.isfile('.env'): 5 | load_dotenv('.env') 6 | 7 | from configurator import config 8 | 9 | # override config with dev env vars 10 | config.bot.owner = int(os.environ.get('BOT_OWNER', None)) 11 | config.bot.token = os.environ.get('BOT_TOKEN', None) 12 | 13 | config.groups.main = int(os.environ.get('GROUPS_MAIN', None)) 14 | config.groups.reports = int(os.environ.get('GROUPS_REPORTS', None)) 15 | config.groups.logs = int(os.environ.get('GROUPS_LOGS', None)) 16 | config.groups.linked_channel = int(os.environ.get('LINKED_CHANNEL', None)) 17 | 18 | config.db.url = str(os.environ.get('DB_URL', None)) -------------------------------------------------------------------------------- /libs/censure/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Censor 2 | from .helper import CensorHelper 3 | 4 | __all__ = ['Censor', 'CensorHelper'] -------------------------------------------------------------------------------- /libs/censure/data/en_out.txt: -------------------------------------------------------------------------------- 1 | # http://www.noswearing.com/dictionary/ 2 | [beep] - [beep] 3 | [beep] - [beep] 4 | [beep] - [beep] 5 | [beep] - [beep] 6 | [beep] - idiot 7 | [beep] - homosexual 8 | [beep] - homosexual 9 | [beep] - idiot 10 | [beep] - homosexual 11 | [beep] - homosexual 12 | [beep] - idiot 13 | [beep] - [beep] 14 | [beep] - idiot 15 | [beep] - [beep] 16 | [beep] - butts 17 | [beep] - [beep] 18 | [beep] - rear-loving 19 | [beep] - homosexual 20 | [beep] - homosexual 21 | [beep] - [beep] 22 | [beep] - idiot 23 | [beep] - jerk 24 | [beep] - homosexual 25 | [beep] - homosexual 26 | [beep] - idiot 27 | [beep] - Buttlicker 28 | [beep] - idiot 29 | [beep] - idiot 30 | [beep] - [beep] 31 | [beep] - Racial Slur 32 | [beep] - homosexual 33 | [beep] - idiot 34 | [beep] - [beep] 35 | [beep] - idiot 36 | [beep] - [beep] 37 | [beep] - [beep] 38 | [beep] - female genitalia 39 | 40 | [beep] - idiot 41 | [beep] - illegitimate child 42 | [beep] - Mexican 43 | [beep] - female dog 44 | [beep] - idiot 45 | [beep] - female dogs 46 | [beep] - homosexual 47 | [beep] - mean 48 | [beep] - sexual act 49 | [beep] - sexual act 50 | [beep] - male genitalia 51 | [beep] - male genitalia 52 | [beep] - erection 53 | [beep] - homosexual [beep] [beep] sucker [beep] goblin [beep] 54 | [beep] - poop 55 | [beep] - homosexual 56 | [beep] plug - cork 57 | [beep] - homosexual 58 | [beep] - homosexual 59 | [beep] - homosexual 60 | 61 | [beep] - female genitalia 62 | [beep] - homosexual 63 | [beep] - Breast 64 | [beep] - Chinese 65 | [beep] - asian 66 | [beep] - male genitalia 67 | [beep] - small penis 68 | [beep] - female genitals 69 | [beep] - idiot 70 | [beep] - sexual act 71 | [beep] - mess up 72 | [beep] - penis 73 | [beep] - Jerk 74 | [beep] - idiot 75 | [beep] - idiot 76 | [beep] - idiot 77 | [beep] - idiot 78 | [beep] - idiot 79 | [beep] - homosexual 80 | [beep] - homosexual 81 | [beep] - homosexual 82 | [beep] - homosexual 83 | [beep] - homosexual 84 | [beep] - idiot 85 | [beep] - homosexual 86 | [beep] - idiot 87 | [beep] - idiot 88 | [beep] - idiot 89 | [beep] - homosexual 90 | [beep] - homosexual 91 | [beep] - homosexual 92 | [beep] - homosexual 93 | [beep] - homosexual 94 | [beep] - idiot 95 | [beep] - female genitalia 96 | [beep] - female genitalia 97 | [beep] - African American 98 | [beep] - [beep] 99 | cracker - Caucasian 100 | [beep] - semen 101 | [beep] - idiot 102 | [beep] - prostitute 103 | [beep] - homosexual 104 | [beep] - homosexual 105 | [beep] - dirty girl 106 | [beep] - idiot 107 | [beep] - female genitalia 108 | [beep] - sexual act 109 | [beep] - [beep] 110 | [beep] - idiot 111 | [beep] - idiot 112 | [beep] - female genitalia 113 | [beep] - homosexual 114 | [beep] - idiot 115 | [beep] - idiot 116 | 117 | dago - Italian 118 | [beep] - darn 119 | deggo - Italian 120 | [beep] - penis 121 | [beep] - orgasm 122 | [beep] - idiot 123 | [beep] - Hands 124 | [beep] - idiot 125 | [beep] - idiot 126 | [beep] - homosexual 127 | [beep] - phallace face 128 | [beep] - male genitalia 129 | [beep] - semen 130 | [beep] - sperm 131 | [beep] - homosexual 132 | [beep] - penises 133 | [beep] - sexual act 134 | [beep] - homosexual 135 | [beep] - sexual act 136 | [beep] - homosexual 137 | [beep] - idiot 138 | [beep] - idiot 139 | [beep] - idiot 140 | [beep] - idiot 141 | [beep] - homosexual 142 | [beep] - sexual toy 143 | [beep] - idiot 144 | [beep] - idiot 145 | [beep] - poop 146 | [beep] - female hygene product 147 | [beep] - idiot 148 | [beep] - female hygene accessory 149 | [beep] - homosexual 150 | [beep] - idiot 151 | [beep] [beep] - idiot 152 | [beep] - idiot 153 | [beep] - idiot 154 | [beep] - idiot 155 | [beep] - idiot 156 | [beep] - homosexual 157 | 158 | [beep] - homosexual 159 | [beep] - homosexual 160 | [beep] - homosexual 161 | [beep] - homosexual 162 | [beep] - homosexual 163 | [beep] - homosexual 164 | [beep] - homosexual idiot 165 | fatass - a fat person 166 | [beep] - sexual act 167 | [beep] - sexual act 168 | [beep] - homosexual 169 | [beep] - fornicate 170 | [beep] - idiot 171 | [beep] - idiot 172 | [beep] - idiot 173 | [beep] - idiot 174 | [beep] - [beep] 175 | [beep] - Sexual fluids 176 | [beep] - had intercourse 177 | [beep] - fornicator 178 | [beep] - idiot 179 | [beep] - idiot 180 | [beep] - [beep] 181 | [beep] - jerk 182 | [beep] - sexual act 183 | [beep] - freaking 184 | [beep] - idiot 185 | [beep] - idiot 186 | [beep] - go away 187 | [beep] - sexual act 188 | [beep] - male genitalia 189 | [beep] - Moron 190 | [beep] - idiot 191 | [beep] - idiot 192 | [beep] - idiot 193 | [beep] - dummy 194 | [beep] - idiot 195 | [beep] - homosexual 196 | 197 | gay - homosexual 198 | [beep] - [beep] 199 | [beep] - homosexual 200 | [beep] - homosexual 201 | [beep] - homosexual 202 | [beep] - homosexual 203 | [beep] - homosexual 204 | [beep] - homosexual 205 | [beep] - homosexual 206 | [beep] - goshdarn 207 | [beep] - goshdarnit 208 | [beep] - female genitalia 209 | [beep] - Chinese 210 | gringo - foreigner 211 | guido - italian 212 | 213 | [beep] - sexual act 214 | [beep] on - erection 215 | heeb - Jewish Person 216 | hell - heck 217 | ho - woman 218 | hoe - Woman 219 | homo - homosexual 220 | [beep] - idiot 221 | honkey - white person 222 | [beep] - sexual act 223 | 224 | [beep] - idiot 225 | [beep] - idiot 226 | jap - japanesse person 227 | [beep] - masturbate 228 | [beep] - idiot 229 | jigaboo - African American 230 | [beep] - Semen 231 | jungle bunny - african american 232 | junglebunny - african american 233 | 234 | kike - Jewish Person 235 | [beep] - female genitalia 236 | [beep] - female genitalia 237 | kraut - german 238 | [beep] - female genitalia 239 | kyke - Jewish person 240 | 241 | [beep] - loser 242 | [beep] - overweight individual 243 | [beep] - homosexual 244 | [beep] - homosexual 245 | [beep] - homosexual 246 | 247 | [beep] - homosexual 248 | mick - irish 249 | [beep] - female genitalia 250 | [beep] - Jerk 251 | [beep] - mother loving 252 | [beep] - mother lover 253 | [beep] - fornicating with mother 254 | [beep] - female genitalia 255 | [beep] - homosexual 256 | [beep] - sexual act 257 | 258 | negro - african american 259 | nigaboo - African American 260 | nigga - african american 261 | nigger - african american 262 | niggers - African Americans 263 | niglet - african american child 264 | [beep] - male genitalia 265 | [beep] - male genitalia 266 | 267 | paki - pakistanien 268 | [beep] - femail genitalia 269 | [beep] - Penis 270 | [beep] - idiot 271 | penis - male genitalia 272 | [beep] - homosexual 273 | [beep] - homosexual 274 | [beep] - homosexual 275 | [beep] - urinate 276 | [beep] - urinated 277 | [beep] off - angry 278 | [beep] - female genitalia 279 | [beep] - homosexual 280 | pollock - polish person 281 | [beep] - female genitals 282 | [beep] - female genitalia 283 | [beep] - [beep] 284 | [beep] - female genitalia 285 | porch monkey - african american 286 | porchmonkey - African American 287 | 288 | [beep] - penis 289 | [beep] - female genitalia 290 | [beep] - female dog 291 | [beep] - Female Genitalias 292 | [beep] - female reproductive organ 293 | [beep] - sexual act 294 | [beep] - idiot 295 | 296 | [beep] - [beep] fart. 297 | [beep] - homosexual 298 | [beep] - homosexual 299 | [beep] - homosexual 300 | 301 | [beep] - erection 302 | [beep] - dirty sexual act 303 | ruski - Russian 304 | 305 | sand nigger - middle eastern 306 | sandnigger - middle eastern 307 | [beep] - male genitalia 308 | [beep] - male genitalia 309 | [beep] - poop 310 | [beep] - idiot 311 | [beep] - idiot 312 | [beep] - idiot 313 | [beep] - idiot 314 | [beep] - Bad Breath 315 | [beep] - Fired 316 | [beep] - idiot 317 | [beep] - idiot 318 | [beep] - pooface 319 | [beep] - Drunk 320 | [beep] - jerk 321 | [beep] - idiot 322 | [beep] - bathroom 323 | [beep] - [beep] 324 | [beep] - poop 325 | [beep] - defecator 326 | [beep] - worst 327 | [beep] - pooping 328 | [beep] - bad 329 | [beep] - poop 330 | [beep] - poop 331 | [beep] - dirty girl 332 | [beep] - semen 333 | [beep] - sexual act 334 | [beep] - sexually popular woman 335 | [beep] - sexually popular woman 336 | [beep] - poop 337 | [beep] - female genitalia 338 | spic - mexican 339 | spick - mexican american 340 | [beep] - ejaculate 341 | spook - White person 342 | [beep] - idiot 343 | 344 | [beep] - mentally challenged 345 | [beep] - male genitalia 346 | thundercunt - idiot 347 | tit - breast 348 | [beep] - sexual act 349 | tits - breasts 350 | [beep] - sexual act 351 | [beep] - female genitals 352 | [beep] - idiot 353 | [beep] - [beep] 354 | [beep] - homosexual 355 | 356 | [beep] - homosexual 357 | 358 | [beep] - female genitalia 359 | [beep] - femail genitalia 360 | [beep] - female genitalia 361 | [beep] - female genitalia 362 | [beep] - female genitalia 363 | 364 | [beep] - sexual act 365 | [beep] - sexual act 366 | wetback - Mexican 367 | [beep] - hussy 368 | [beep] - idiot 369 | [beep] - idiot 370 | wop - Italian -------------------------------------------------------------------------------- /libs/censure/data/ru_in.txt: -------------------------------------------------------------------------------- 1 | Апездал - Дилитант. 2 | Апездошенная - Удивлённая. 3 | Блядь - Девушка лёгкого поведения. 4 | Блядство - Беспредел,неразбериха. 5 | Выебон - Хвастовство,показуха. 6 | Выебать - Совершить половой акт. 7 | Вхуюжить - Воткнуть. 8 | Гомосек - Голубой. 9 | Долбоёб - Дурак. 10 | Ебло - Лицо. 11 | Еблище - Тоже что и ебло. 12 | Ебать - Совершать половой акт. 13 | Ебическая сила - Невероятная,сверхестественная сила,немыслемое явление. 14 | Ебунок - Ребёнок. 15 | Еблан - Дурак. 16 | Ёбнуть - Ударить. 17 | Ёболызнуть - Стукнуть. 18 | Ебош - Груповой секс. 19 | Заебал - Надоел,достал. 20 | Заебатый - Хороший,отличный. 21 | Злаебучий - Нехороший,плохой. 22 | Заёб - Заскок,сдвиг по фазе. 23 | Иди на хуй - Отстанть, свободен. 24 | Колдоебина - Большая вещь, препятствие. 25 | Манда - Женские половые органы. 26 | Мандовошка - Малолетка. 27 | Мокрощелка - Девушка легкого поведения. 28 | Наебка - Обман. 29 | Наебал - Обманул. 30 | Наебаловка - Хитрость, уловка. 31 | Напиздеть - Соврать, обмануть. 32 | Отъебись - Отстань. 33 | Охуеть - Удивиться. 34 | Отхуевертить - Избить. 35 | Опизденеть - Обнаглеть. 36 | Охуевший - Обнаглевший. 37 | Отебукать - Поколатить. 38 | Пизда - Женские половые органы. 39 | Пидарас - Голубой. 40 | Пиздатый - Хороший. 41 | Пиздец - Конец, смерть. 42 | Пизданутый - Дурной. 43 | Поебать - Наплевать. 44 | Поебустика - Рутина. 45 | Проебать - Потерять. 46 | Подзалупный - Опущенный. 47 | Пизденыш - Незначительный человек. 48 | Припиздак - Дуралей. 49 | Разъебать - Разбить. 50 | Распиздяй - Развязанный человек, делающий все спустя рукава. 51 | Разъебанный - разбитый. 52 | Сука - Нехорошая женщина. 53 | Сучка - Уменьшительно-ласкательное от "суки". 54 | Трахать - Совершать половой акт. 55 | Уебок - Отморозок. 56 | Уебать - Ударить. 57 | Угондошить - Избить, убить, уничтожить. 58 | Уебан - То же что и "уебок". 59 | Хитровыебанный - Скользкий человек. 60 | Хуй - Мужской половой орган, личность мужского пола. 61 | Хуйня - Некий предмет, очень плохое. 62 | Хуета - Заморочка. 63 | Хуево - Плохо. 64 | Хуесос - Сосущий мужской член. 65 | Хуеть - удивляться, балдеть. 66 | Хуевертить - Бить, избивать. 67 | Хуеглот - Глотающий мужской член. 68 | Хуистика - Наука про.... 69 | Членосос - То же что и "хуесос". 70 | Членоплет - Балбес. 71 | Шлюха - Проститутка. -------------------------------------------------------------------------------- /libs/censure/data/ru_out.txt: -------------------------------------------------------------------------------- 1 | [beep] - Дилитант. 2 | [beep] - Удивлённая. 3 | [beep] - Девушка лёгкого поведения. 4 | [beep] - Беспредел,неразбериха. 5 | [beep] - Хвастовство,показуха. 6 | [beep] - Совершить половой акт. 7 | [beep] - Воткнуть. 8 | [beep] - Голубой. 9 | [beep] - Дурак. 10 | [beep] - Лицо. 11 | [beep] - Тоже что и [beep] 12 | [beep] - Совершать половой акт. 13 | [beep] сила - Невероятная,сверхестественная сила,немыслемое явление. 14 | [beep] - Ребёнок. 15 | [beep] - Дурак. 16 | [beep] - Ударить. 17 | [beep] - Стукнуть. 18 | [beep] - Груповой секс. 19 | [beep] - Надоел,достал. 20 | [beep] - Хороший,отличный. 21 | [beep] - Нехороший,плохой. 22 | [beep] - Заскок,сдвиг по фазе. 23 | Иди на [beep] - Отстанть, свободен. 24 | [beep] - Большая вещь, препятствие. 25 | [beep] - Женские половые органы. 26 | [beep] - Малолетка. 27 | [beep] - Девушка легкого поведения. 28 | [beep] - Обман. 29 | [beep] - Обманул. 30 | [beep] - Хитрость, уловка. 31 | [beep] - Соврать, обмануть. 32 | [beep] - Отстань. 33 | [beep] - Удивиться. 34 | [beep] - Избить. 35 | [beep] - Обнаглеть. 36 | [beep] - Обнаглевший. 37 | [beep] - Поколатить. 38 | [beep] - Женские половые органы. 39 | [beep] - Голубой. 40 | [beep] - Хороший. 41 | [beep] - Конец, смерть. 42 | [beep] - Дурной. 43 | [beep] - Наплевать. 44 | [beep] - Рутина. 45 | [beep] - Потерять. 46 | [beep] - Опущенный. 47 | [beep] - Незначительный человек. 48 | [beep] - Дуралей. 49 | [beep] - Разбить. 50 | [beep] - Развязанный человек, делающий все спустя рукава. 51 | [beep] - разбитый. 52 | Сука - Нехорошая женщина. 53 | [beep] - Уменьшительно-ласкательное от "суки". 54 | [beep] - Совершать половой акт. 55 | [beep] - Отморозок. 56 | [beep] - Ударить. 57 | [beep] - Избить, убить, уничтожить. 58 | [beep] - То же что и [beep] 59 | [beep] - Скользкий человек. 60 | [beep] - Мужской половой орган, личность мужского пола. 61 | [beep] - Некий предмет, очень плохое. 62 | [beep] - Заморочка. 63 | [beep] - Плохо. 64 | [beep] - Сосущий мужской член. 65 | [beep] - удивляться, балдеть. 66 | [beep] - Бить, избивать. 67 | [beep] - Глотающий мужской член. 68 | [beep] - Наука про.... 69 | [beep] - То же что и [beep] 70 | [beep] - Балбес. 71 | [beep] - Проститутка. -------------------------------------------------------------------------------- /libs/censure/helper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals, print_function 3 | import os 4 | import codecs 5 | 6 | from censure.base import Censor 7 | 8 | 9 | class CensorHelper: 10 | do_compile = True 11 | 12 | def __init__(self, lang='ru', do_compile=None): 13 | if do_compile is None: 14 | do_compile = self.do_compile 15 | self.lang = lang 16 | self.c = Censor.get(lang=lang, do_compile=do_compile) 17 | 18 | def censure_text(self, text): 19 | count = 0 20 | result = [] 21 | for line in text.splitlines(): 22 | new_line, bad_words_count, bad_phrases_count = self.c.clean_line(line) 23 | count += bad_words_count + bad_phrases_count 24 | result.append(new_line) 25 | return '\n'.join(result), count 26 | 27 | def test(self): 28 | d = os.path.dirname(os.path.abspath(__file__)) 29 | in_file = os.path.join(d, 'data', '{}_in.txt'.format(self.lang)) 30 | out_file = os.path.join(d, 'data', '{}_out.txt'.format(self.lang)) 31 | 32 | with codecs.open(in_file, 'r', 'utf-8') as in_fs, \ 33 | codecs.open(out_file, 'w', 'utf-8') as out_fs: 34 | text = in_fs.read() 35 | cleaned_text, count = self.censure_text(text) 36 | print('Found and replaced count: {}'.format(count)) 37 | out_fs.write(cleaned_text) 38 | 39 | 40 | def ru_just_test(): 41 | c = CensorHelper(lang='ru', do_compile=False) 42 | c.test() 43 | 44 | 45 | def en_just_test(): 46 | c = CensorHelper(lang='en', do_compile=False) 47 | c.test() 48 | 49 | 50 | def show_examples(): 51 | beep = '[censored]' 52 | 53 | print('Russian examples:') 54 | # don't specify do_compile=False unless you want to debug something 55 | # and see bad words raw (not compiled) patterns 56 | censor_ru = Censor.get(lang='ru', do_compile=False) 57 | line = 'ебанамат бляд' 58 | print('Checking line: "{}"'.format(line)) 59 | line_info = censor_ru.check_line(line) 60 | print('Does the line contain obscene words? - {}'.format(not line_info['is_good'])) 61 | print('First bad word: {}, bad word pattern: {}'.format( 62 | line_info['bad_word_info']['word'], line_info['bad_word_info']['accuse'][0])) 63 | 64 | print('Cleaning line with beep word={}'.format(line, beep)) 65 | cleaned_line, bad_words_count, bad_phrases_count = censor_ru.clean_line(line, beep=beep) 66 | print('resulted cleaned line: "{}", bad words count: {}, bad phrases count: {}'.format( 67 | cleaned_line, bad_words_count, bad_phrases_count)) 68 | print('\n') 69 | 70 | print('English examples:') 71 | # don't specify do_compile=False unless you want to debug something 72 | # and see bad words raw (not compiled) patterns 73 | censor_en = Censor.get(lang='en', do_compile=False) 74 | line = 'fucken shit' 75 | line_info = censor_en.check_line(line) 76 | print('Does the line contain obscene words? - {}'.format(not line_info['is_good'])) 77 | print('First bad word: {}, bad word pattern: {}'.format( 78 | line_info['bad_word_info']['word'], line_info['bad_word_info']['accuse'][0])) 79 | 80 | print('cleaning line: {} with beep word={}'.format(line, beep)) 81 | cleaned_line, bad_words_count, bad_phrases_count = censor_en.clean_line(line, beep=beep) 82 | print('Resulted cleaned line: "{}", bad words count: {}, bad phrases count: {}'.format( 83 | cleaned_line, bad_words_count, bad_phrases_count)) 84 | 85 | print('\n') 86 | line = 'camel toe towel' 87 | print('English bad phrase line example: "{}"'.format(line)) 88 | line_info = censor_en.check_line(line) 89 | print('Does the line contain obscene words/phrases? - {}'.format(not line_info['is_good'])) 90 | 91 | print('First accuse pattern: {}'.format( 92 | line_info['accuse'][0])) 93 | 94 | print('Cleaning bad phrases line with beep word={}'.format(beep)) 95 | cleaned_line, bad_words_count, bad_phrases_count = censor_en.clean_line(line, beep=beep) 96 | print('Resulted cleaned line: "{}", bad words count: {}, bad phrases count: {}'.format( 97 | cleaned_line, bad_words_count, bad_phrases_count)) 98 | 99 | html_line = 'bitch whorefu
ck' 100 | print('\n') 101 | print('Cleaning english html line containing bad words: "{}"'.format(html_line)) 102 | # note: no phrases are cleaned atm in html 103 | cleaned_line, bad_words_count = censor_en.clean_html_line( 104 | html_line, beep=beep) 105 | print('Resulted cleaned html line: "{}", bad words count: {}'.format( 106 | cleaned_line, bad_words_count)) 107 | 108 | 109 | if __name__ == '__main__': 110 | # ru_just_test() 111 | # en_just_test() 112 | # from timeit import Timer 113 | # t = Timer('just_test()', 'from __main__ import just_test') 114 | # print(t.timeit()) 115 | 116 | show_examples() 117 | -------------------------------------------------------------------------------- /libs/censure/lang/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Priler/samurai/7b7495871a06109cc7fc524ec13f195bcf86fed3/libs/censure/lang/__init__.py -------------------------------------------------------------------------------- /libs/censure/lang/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Priler/samurai/7b7495871a06109cc7fc524ec13f195bcf86fed3/libs/censure/lang/common/__init__.py -------------------------------------------------------------------------------- /libs/censure/lang/common/constants.py: -------------------------------------------------------------------------------- 1 | BEEP_HTML = BEEP = '[beep]' 2 | -------------------------------------------------------------------------------- /libs/censure/lang/common/patterns.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | import re 4 | 5 | PAT_HTML_TAG = re.compile('(<.*?>)|(&[\w]{2,6};)|()') 6 | # PAT_HTML_TAG_OR_SPACER = re.compile('(?P<.*?>)|(?P[\s]+)') 7 | PAT_HTML_CSS = re.compile('[\w\s}{\.#;:\-\+]') 8 | PAT_HTML_SPACE = re.compile(' ', re.IGNORECASE) 9 | 10 | PAT_PUNCT1 = re.compile('[\"\-\+;\.,\*\?\(\)]+') 11 | PAT_PUNCT2 = re.compile('[!:_]+') 12 | PAT_PUNCT3 = re.compile('[\"\-\+;\.,\*\?\(\)!:_]+') 13 | 14 | PAT_SPACE = re.compile('[\s]+') 15 | -------------------------------------------------------------------------------- /libs/censure/lang/en/__init__.py: -------------------------------------------------------------------------------- 1 | from . import constants 2 | from . import patterns 3 | 4 | 5 | __all__ = ['constants', 'patterns'] 6 | -------------------------------------------------------------------------------- /libs/censure/lang/en/constants.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # based on the words from http://www.noswearing.com/dictionary/ 3 | 4 | from __future__ import unicode_literals 5 | 6 | 7 | EXCLUDES_DATA = { 8 | 9 | } 10 | 11 | 12 | EXCLUDES_CORE = { 13 | 14 | } 15 | 16 | # 'cum': '^cum(($)|(bubble)|(dumpster)|()|()', 17 | FOUL_DATA = { 18 | 'a': [ 19 | '^anus', 20 | '^axwound', 21 | ], 22 | 'b': [ 23 | '^bampot', 24 | '^bastar[dt]', 25 | '^beaner', 26 | '^blow(job)$', 27 | '^bollo(x|cks)', 28 | '^boner$', 29 | ], 30 | 'c': [ 31 | '^cameltoe$', 32 | '^carpetmuncher', 33 | '^chesticle', 34 | '^chin[ck]$', 35 | '^choad$', 36 | '^chode$', 37 | '^cooch(ie|y)$', 38 | '^coon$', 39 | '^cooter$', 40 | ], 41 | 'd': [ 42 | '^damn$', 43 | '^d[iy]ke$', 44 | '^dildo', 45 | '^dipshit', 46 | '^doochbag', 47 | '^dookie', 48 | '^douche($|(fag)|(bag)|(waffle))', 49 | ], 50 | 'f': [ 51 | '^fellatio', 52 | '^feltch', 53 | '^flamer', 54 | '^fudgepacker', 55 | ], 56 | 'g': [ 57 | '^godd[au]mn(it)*', 58 | '^gooch', 59 | '^gook', 60 | ], 61 | 'h': [ 62 | '^handjob', 63 | '^hard(on)*', 64 | '^homodumbshit', 65 | '^humping', 66 | ], 67 | 'j': [ 68 | '^jagoff', 69 | '^jizz', 70 | 71 | ], 72 | 'k': [ 73 | '^koo[t]*ch', 74 | '^kunt', 75 | ], 76 | 'l': [ 77 | 'lameass', 78 | 'lardass', 79 | '^lesb(ian|o)', 80 | '^lezzie', 81 | ], 82 | 'm': [ 83 | '^mcfagget', 84 | '^minge', 85 | '^muff(diver)*', 86 | '^munging', 87 | '^m[uo][thd]{1,3}erf[ou][ck]{1,}(er)?' 88 | ], 89 | 'n': [ 90 | '^nutsack' 91 | ], 92 | 'p': [ 93 | '^panooch', 94 | '^polesmoker', 95 | '^punta', 96 | '^puto', 97 | ], 98 | 99 | 'r': [ 100 | '^renob', 101 | '^rimjob', 102 | ], 103 | 's': [ 104 | '^schlong', 105 | '^scrote', 106 | '^skank', 107 | '^skeet', 108 | '^smeg', 109 | '^snatch', 110 | 'splooge', 111 | ], 112 | 't': [ 113 | '^tard', 114 | 'testicle', 115 | # '^tit(ty)*$', 116 | ], 117 | 118 | } 119 | 120 | FOUL_CORE = { 121 | # a 122 | 'arse': '^arse((hole)|$)', 123 | 'ass': '^ass((butt)|(idiot)|(hat)|(jabber)|(pirate)|(bag)|(banger)|' 124 | '(bandit)|(bite)|(clown)|(cock)|(cracker)|(es)|(face)|(goblin)|' 125 | '(hat)|(head)|(hole)|(hopper)|(jacker)|(lick)|(licker)|(monkey)|' 126 | '(munch)|(nigger)|(hit)|(sucker)|(ucker)|(wad)|(wipe)|$)', 127 | 'bitch': '^bitch', 128 | 'bullshit': '^bullshit$', 129 | 'butt': '^butt((plug)|(pirate)|($))', 130 | 131 | 132 | 'clit': '^clit(($)|(or)|(face))', 133 | 'cum': '^cum(($)|(bubble)|(dumpster)|(guzzler)|(jockey)|(slut)|(tart))', 134 | 'cunni': '^cunni(($)|(e)|(lingus))', 135 | 'cock': '^cock($|(ass)|(bite)|(burger)|(face)|(head)|(jockey)|(knoker)|' 136 | '(master)|(mong(ler|ruel))|(monkey)|(muncher)|(nose)|' 137 | '(nugget)|(shit)|(smith)|(smoke)|(sniffer)|(sucker)|(waffle))', 138 | 139 | 'cunt': '^cunt(($)|(ass)|(face)|(hole)|(licker)|(rag)|(slut))$', 140 | 141 | 'dick': '^dick(([s]*$)|(bag)|(beaters)|(face)|(head)|(hole)|' 142 | '(juice)|(milk)|(monger)|(slap)|(suck(er|in))|' 143 | '(tickler)|(wad)|(weasel)|(weed)|(wod))', 144 | 'dumb': 'dum(b)*($|(ass)|(shit))', 145 | 146 | 'fag': 'fag($|(bag)|(g[io]t)|(tard)|(ass))', 147 | 148 | 'gay': 'gay((ass)|(bob)|(do)|(lord)|(tard)|(wad))', 149 | 150 | 'jackass': 'jackass', 151 | 'jerk': 'jerk((o[f]+)|(ass))', 152 | 153 | 'mothafucka': 'm[oa](th|z)afuck(a|in[g]*|er)', 154 | 'penis': '^penis(banger|puffer)', 155 | 'pecker': 'pecker(head)*', 156 | 'piss': '^piss((ed)*(off)*|flaps)', 157 | 'poon': '^p(oo|u)n(an(n)*[iy]|tang|$)', 158 | 'prick': '^prick$', 159 | 'pussy': '^puss((y)*(lick)*|ies)', 160 | 161 | 'quee': 'quee(f|r($|bait|hole))', 162 | 'suck': '^suck(ass|$)', 163 | 'shit': '^shit($|ass|bag|brains|breath|canned|cunt|dick|face|faced|head|hole|house|' 164 | 'spitter|stain|(t)*(er|iest|ing|y))', 165 | 'slut': '^slut($|bag)', 166 | 'shiz': '^shiz(nit)*$', 167 | 'twat': '^twat(lips|s|waffle|$)', 168 | 'vjay': '^vjayjay', 169 | 'wank': '^wank(job|$)', 170 | 'whore': '^whore(bag|face|$)', 171 | } 172 | 173 | 174 | BAD_SEMI_PHRASES = ( 175 | 'suckmydick', 176 | 'sickmyduck', 177 | 'cameltoe', 178 | ) 179 | 180 | BAD_PHRASES = ( 181 | 'camel(\s)*toe', 182 | 'dick[\-\s]*sneeze', 183 | 'blow[\-\s]*job', 184 | 'jerk[\-\s]*off', 185 | 'nut[\-\s]*sack' 186 | ) 187 | 188 | TRANS_TAB = {} 189 | -------------------------------------------------------------------------------- /libs/censure/lang/en/patterns.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | import re 4 | 5 | 6 | PAT_EUML = re.compile('&[Ee][Uu][Mm][Ll];') # e 7 | PAT_IUML = re.compile('&[Uu][Uu][Mm][Ll];') # и 8 | PAT_AUML = re.compile('&[Aa][Uu][Mm][Ll];') # а 9 | PAT_OUML = re.compile('&[Oo][Uu][Mm][Ll];') # о 10 | PAT_YUML = re.compile('&[Yy][Uu][Mm][Ll];') # у 11 | 12 | PAT_CENT = re.compile('&[Cc][Ee][Nn][Tt];') # c 13 | PAT_CODE203 = re.compile('Ë') # e 14 | PAT_CODE162 = re.compile('¢') # c 15 | PAT_CODE120 = re.compile('x') # х 16 | PAT_CODE121 = re.compile('y') # у 17 | 18 | 19 | PATTERNS_REPLACEMENTS = ( 20 | (PAT_EUML, 'e'), # euml 21 | (PAT_IUML, 'u'), # iuml 22 | (PAT_AUML, 'a'), # auml 23 | (PAT_OUML, 'o'), # ouml 24 | (PAT_YUML, 'y'), # yuml 25 | 26 | (PAT_CODE120, 'x'), 27 | (PAT_CODE121, 'y'), 28 | (PAT_CODE203, 'e'), 29 | 30 | (PAT_CENT, 'c'), # cent 31 | (PAT_CODE162, 'c'), 32 | ) 33 | -------------------------------------------------------------------------------- /libs/censure/lang/ru/__init__.py: -------------------------------------------------------------------------------- 1 | from . import constants 2 | from . import patterns 3 | 4 | 5 | __all__ = ['constants', 'patterns'] 6 | -------------------------------------------------------------------------------- /libs/censure/lang/ru/patterns.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | import re 4 | 5 | 6 | PAT_EUML = re.compile('&[Ee][Uu][Mm][Ll];') # e 7 | PAT_IUML = re.compile('&[Uu][Uu][Mm][Ll];') # и 8 | PAT_AUML = re.compile('&[Aa][Uu][Mm][Ll];') # а 9 | PAT_OUML = re.compile('&[Oo][Uu][Mm][Ll];') # о 10 | PAT_YUML = re.compile('&[Yy][Uu][Mm][Ll];') # у 11 | 12 | PAT_CENT = re.compile('&[Cc][Ee][Nn][Tt];') # c 13 | PAT_CODE203 = re.compile('Ë') # e 14 | PAT_CODE162 = re.compile('¢') # c 15 | PAT_CODE120 = re.compile('x') # х 16 | PAT_CODE121 = re.compile('y') # у 17 | 18 | PAT_AS_I = re.compile(r'\|\\\|') # И 19 | PAT_AS_L = re.compile(r'/\\') # Л 20 | 21 | PAT_AS_X1 = re.compile(r'><') # Х 22 | PAT_AS_X2 = re.compile(r'><') # Х 23 | PAT_AS_X3 = re.compile('\)\(') # Х 24 | PAT_AS_X4 = re.compile('}{') # Х 25 | 26 | PAT_AS_J1 = re.compile('>\|<') # Ж 27 | PAT_AS_J2 = re.compile('}\|{') # Ж 28 | 29 | PAT_AS_Y1 = re.compile('`/') # Y 30 | PAT_AS_Y2 = re.compile('\-/') # Y 31 | PAT_AS_Y3 = re.compile('`\-/') # Y 32 | 33 | PAT_AS_YY1 = re.compile('b\|') # ы 34 | PAT_AS_YY2 = re.compile('bI') # ы 35 | PAT_AS_YY3 = re.compile('bl') # ы 36 | 37 | 38 | PAT_PI = re.compile('3[\.,]14[\d]*') 39 | PAT_E = re.compile('2[\.,]72[\d]*') 40 | PAT_PREP = re.compile('(а[х]?)|(в)|([вмт]ы)|(д[ао])|(же)|(за)') 41 | 42 | 43 | PATTERNS_REPLACEMENTS = ( 44 | (PAT_EUML, 'е'), # euml 45 | (PAT_IUML, 'и'), # iuml 46 | (PAT_AUML, 'а'), # auml 47 | (PAT_OUML, 'о'), # ouml 48 | (PAT_YUML, 'у'), # yuml 49 | 50 | (PAT_CODE203, 'е'), 51 | 52 | (PAT_CENT, 'с'), # cent 53 | (PAT_CODE162, 'с'), 54 | 55 | (PAT_AS_I, 'и'), # as И 56 | (PAT_AS_L, 'л'), # as Л 57 | 58 | (PAT_AS_X1, 'х'), # as Х 59 | (PAT_AS_X2, 'х'), # as Х 60 | (PAT_AS_X3, 'х'), # as Х 61 | (PAT_AS_X4, 'х'), # as Х 62 | 63 | (PAT_AS_J1, 'ж'), # as ж 64 | (PAT_AS_J2, 'ж'), # as ж 65 | 66 | (PAT_AS_Y1, 'y'), # as y 67 | (PAT_AS_Y2, 'y'), # as y 68 | (PAT_AS_Y3, 'y'), # as y 69 | 70 | (PAT_AS_YY1, 'ы'), # as ы 71 | (PAT_AS_YY2, 'ы'), # as ы 72 | (PAT_AS_YY3, 'ы'), # as ы 73 | 74 | (PAT_CODE120, 'х'), 75 | (PAT_CODE121, 'у'), 76 | (PAT_PI, 'пи'), # 3.14... 77 | (PAT_E, 'е'), # 2.72... 78 | ) 79 | -------------------------------------------------------------------------------- /libs/censure/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Priler/samurai/7b7495871a06109cc7fc524ec13f195bcf86fed3/libs/censure/tests/__init__.py -------------------------------------------------------------------------------- /libs/censure/tests/base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | 4 | import re 5 | import random 6 | import string 7 | from unittest import TestCase as CoreTestCase 8 | from importlib import import_module 9 | 10 | from censure.base import Censor, CensorException 11 | from censure.lang.common import constants, patterns 12 | # from censure.tests.data import ( 13 | # SIMPLE_OBSCENE_WORDS, E_OBSCENE_WORDS, PI_OBSCENE_WORDS, 14 | # OBSCENE_HTML_LINES, 15 | # ) 16 | 17 | RUSSIAN_LOWERCASE = 'абвгдеёжзиклмнопрстуфхцчшщъьэюя' 18 | ENGLISH_LOWERCASE = string.ascii_lowercase 19 | ALL_LOWERCASE = ENGLISH_LOWERCASE + RUSSIAN_LOWERCASE 20 | 21 | 22 | class TestCase(CoreTestCase): 23 | @classmethod 24 | def _get_random_word_base( 25 | cls, letters=None, min_chars=3, max_chars=10, assert_good=True, russian_only=False): 26 | letters = letters or ENGLISH_LOWERCASE 27 | word = ''.join((random.choice(letters) for _ in range(min_chars, max_chars))) 28 | if assert_good: 29 | if not cls.censor.check_word(word)['is_good']: 30 | word = cls._get_random_word(min_chars=min_chars, max_chars=max_chars) 31 | return word 32 | 33 | @classmethod 34 | def _get_random_count(cls, min_i=2, max_i=10): 35 | return random.randint(min_i, max_i) 36 | 37 | @classmethod 38 | def _dice(cls): 39 | # binary choice 40 | return random.choice((True, False)) 41 | 42 | 43 | class TestCaseRu(TestCase): 44 | @classmethod 45 | def setUpClass(cls): 46 | cls.censor = Censor.get(lang='ru', do_compile=False) 47 | cls.data = import_module('censure.tests.ru.data') 48 | 49 | @classmethod 50 | def _get_random_word(cls, min_chars=3, max_chars=10, assert_good=True, 51 | russian_only=False): 52 | letters = RUSSIAN_LOWERCASE if russian_only else ALL_LOWERCASE 53 | return cls._get_random_word_base( 54 | min_chars=min_chars, max_chars=max_chars, assert_good=assert_good, letters=letters) 55 | 56 | 57 | class TestCaseEn(TestCase): 58 | @classmethod 59 | def setUpClass(cls): 60 | cls.censor = Censor.get(lang='en', do_compile=False) 61 | cls.data = import_module('censure.tests.en.data') 62 | 63 | @classmethod 64 | def _get_random_word(cls, min_chars=3, max_chars=10, assert_good=True): 65 | letters = ENGLISH_LOWERCASE 66 | return cls._get_random_word_base( 67 | min_chars=min_chars, max_chars=max_chars, assert_good=assert_good, letters=letters) 68 | 69 | 70 | __all__ = [ 71 | 'random', 're', 72 | 'TestCase', 'TestCaseRu', 'TestCaseEn', 'CoreTestCase', 73 | 74 | 'Censor', 'CensorException', 75 | 'constants', 'patterns', 76 | 77 | # 'SIMPLE_OBSCENE_WORDS', 'E_OBSCENE_WORDS', 'PI_OBSCENE_WORDS', 78 | # 'OBSCENE_HTML_LINES', 79 | # 'BEEP' 80 | ] 81 | -------------------------------------------------------------------------------- /libs/censure/tests/en/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Priler/samurai/7b7495871a06109cc7fc524ec13f195bcf86fed3/libs/censure/tests/en/__init__.py -------------------------------------------------------------------------------- /libs/censure/tests/en/data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | 4 | SIMPLE_OBSCENE_WORDS = ( 5 | 'shit', 6 | 'motherfucker', 7 | 'prick', 8 | 'dildo', 9 | 'bitch', 10 | 'whore' 11 | ) 12 | 13 | SIMPLE_OBSCENE_PHRASES = ( 14 | 'camel toe', 15 | # 'dick sneeze', -> results in '[beep] sneeze' and not '[beep]' cause of word dick pattern 16 | 'dick-sneeze', 17 | 'blow job' 18 | ) 19 | 20 | OBSCENE_HTML_LINES = ( 21 | ( 22 | 'бshit this bitch', 23 | '[beep] this [beep]' 24 | ), 25 | ) 26 | -------------------------------------------------------------------------------- /libs/censure/tests/en/test_base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | from censure.tests.base import * 4 | 5 | from censure.base import _get_token_value, _get_remained_tokens, Censor 6 | 7 | 8 | class CensorInternalsTestCase(TestCaseEn): 9 | def test_good_word(self): 10 | for x in range(50): 11 | word = self._get_random_word() 12 | word_info = self.censor.check_word(word) 13 | self.assertDictContainsSubset({ 14 | 'word': self.censor._prepare_word(word), 15 | 'is_good': True, 16 | }, word_info) 17 | -------------------------------------------------------------------------------- /libs/censure/tests/en/test_censure.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | 4 | from censure.tests.base import * 5 | 6 | 7 | class TestCensor(TestCaseEn): 8 | def test_on_simple_obscene_phrases(self): 9 | for words in ( 10 | self.data.SIMPLE_OBSCENE_PHRASES, 11 | ): 12 | for line in words: 13 | cleaned_line, bad_words_count, bad_phrases_count, _, _, _ = self.censor.clean_line(line) 14 | self.assertEqual(cleaned_line, constants.BEEP) 15 | self.assertEqual(bad_phrases_count, 1) 16 | 17 | def test_on_simple_obscene_words(self): 18 | for words in ( 19 | self.data.SIMPLE_OBSCENE_WORDS, 20 | ): 21 | for line in words: 22 | cleaned_line, bad_words_count, bad_phrases_count, _, _, _ = self.censor.clean_line(line) 23 | self.assertEqual(cleaned_line, constants.BEEP) 24 | self.assertEqual(bad_words_count, 1) 25 | 26 | count = self._get_random_count() 27 | line_template = ' '.join(('{line}' for _ in range(count))) 28 | line_repeated = line_template.format(line=line) 29 | cleaned_line, bad_words_count, bad_phrases_count, _, _, _ = self.censor.clean_line(line_repeated) 30 | self.assertEqual(cleaned_line, line_template.format(line=constants.BEEP)) 31 | self.assertEqual(bad_words_count, count) 32 | self.assertEqual(bad_phrases_count, 0) 33 | 34 | def test_on_simple_html(self): 35 | for (html, cleaned_html) in self.data.OBSCENE_HTML_LINES: 36 | result, bad_words_count = self.censor.clean_html_line(html) 37 | self.assertTrue(bad_words_count > 0) 38 | self.assertEqual(result, cleaned_html) 39 | -------------------------------------------------------------------------------- /libs/censure/tests/en/test_helper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | -------------------------------------------------------------------------------- /libs/censure/tests/ru/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Priler/samurai/7b7495871a06109cc7fc524ec13f195bcf86fed3/libs/censure/tests/ru/__init__.py -------------------------------------------------------------------------------- /libs/censure/tests/ru/data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | from censure.lang.common.constants import BEEP 4 | 5 | 6 | SIMPLE_OBSCENE_WORDS = ( 7 | 'пидорги', 8 | 'ебланаи', 9 | 'хуй', 10 | 'пизда', 11 | 'блядва', 12 | 'еблохуй' 13 | ) 14 | 15 | E_OBSCENE_WORDS = ( 16 | '2.72блан', 17 | '2.72баться', 18 | 'п2.72здец', 19 | '2.72111блан', 20 | '2.72222баться', 21 | 'п2.72333здец', 22 | ) 23 | 24 | PI_OBSCENE_WORDS = ( 25 | '3.14здец', 26 | '3.14дор', 27 | '3.14дорги', 28 | '3.14зда', 29 | ) 30 | 31 | OBSCENE_HTML_LINES = ( 32 | ( 33 | ('бля пида<расы ебанyты2.72 ' 34 | 'пи>здa

длбоебы

'), 35 | '{beep} {beep} {beep} {beep}

{beep}

'.format(beep=BEEP) 36 | ), 37 | ( 38 | 'апездал       дилитант
', 39 | '{beep}       дилитант
'.format( 40 | beep=BEEP) 41 | ), 42 | ( 43 | ('

злаебучий      ' 44 | 'нехороший,плохой
'), 45 | '

{beep}      нехороший,плохой
'.format( 46 | beep=BEEP) 47 | ) 48 | ) 49 | -------------------------------------------------------------------------------- /libs/censure/tests/ru/test_base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | from censure.tests.base import * 4 | 5 | from censure.base import _get_token_value, _get_remained_tokens, Censor 6 | 7 | 8 | class CensorInternalsTestCase(TestCaseRu): 9 | def test__is_pi_or_e_word(self): 10 | for w, result in ( 11 | ('2.72', True), 12 | ('3.14', True), 13 | ('2.71', False), 14 | ('3.15', False), 15 | ('5.15', False), 16 | ): 17 | if self._dice(): # prefix 18 | w = '{}{}'.format(self._get_random_word(), w) 19 | if self._dice(): # suffix 20 | w = '{}{}'.format(w, self._get_random_word()) 21 | self.assertEqual(self.censor._is_pi_or_e_word(w), result) 22 | 23 | def test_good_word(self): 24 | for x in range(50): 25 | word = self._get_random_word(russian_only=True) 26 | word_info = self.censor.check_word(word) 27 | self.assertDictContainsSubset({ 28 | # 'excuse': [], 29 | # 'accuse': [], 30 | 'word': self.censor._prepare_word(word), 31 | 'is_good': True, 32 | }, word_info) 33 | 34 | def test_check_e_word(self): 35 | word = self.data.E_OBSCENE_WORDS[0] 36 | word_info = self.censor.check_word(word) 37 | self.assertDictContainsSubset({ 38 | 'excuse': [], 39 | 'word': self.censor._prepare_word(word), 40 | 'is_good': False, 41 | }, word_info) 42 | self.assertTrue(len(word_info.get('accuse', [])) > 0) 43 | 44 | def test_clean_line_e_word(self): 45 | word = self.data.E_OBSCENE_WORDS[0] 46 | cleaned_line, count, phrases_count, _, _, _ = self.censor.clean_line(word) 47 | self.assertEqual(cleaned_line, constants.BEEP) 48 | self.assertEqual(count, 1) 49 | self.assertEqual(phrases_count, 0) 50 | -------------------------------------------------------------------------------- /libs/censure/tests/ru/test_censure.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | 4 | from censure.tests.base import * 5 | 6 | 7 | class TestCensor(TestCaseRu): 8 | def test_on_simple_obscene_words(self): 9 | for words in ( 10 | self.data.SIMPLE_OBSCENE_WORDS, 11 | self.data.E_OBSCENE_WORDS, 12 | self.data.PI_OBSCENE_WORDS 13 | ): 14 | for line in words: 15 | cleaned_line, bad_words_count, bad_phrases_count, _, _, _ = self.censor.clean_line(line) 16 | self.assertEqual(cleaned_line, constants.BEEP) 17 | self.assertEqual(bad_words_count, 1) 18 | self.assertEqual(bad_phrases_count, 0) 19 | 20 | count = self._get_random_count() 21 | line_template = ' '.join(('{line}' for _ in range(count))) 22 | line_repeated = line_template.format(line=line) 23 | cleaned_line, bad_words_count, bad_phrases_count, _, _, _ = self.censor.clean_line(line_repeated) 24 | self.assertEqual(cleaned_line, line_template.format(line=constants.BEEP)) 25 | self.assertEqual(bad_words_count, count) 26 | self.assertEqual(bad_phrases_count, 0) 27 | 28 | def test_on_simple_html(self): 29 | for (html, cleaned_html) in self.data.OBSCENE_HTML_LINES: 30 | result, bad_words_count = self.censor.clean_html_line(html) 31 | self.assertTrue(bad_words_count > 0) 32 | self.assertEqual(result, cleaned_html) 33 | -------------------------------------------------------------------------------- /libs/censure/tests/ru/test_helper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | -------------------------------------------------------------------------------- /libs/censure/tests/test_init.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | from censure.tests.base import * 4 | 5 | 6 | class CensorLangTestCase(CoreTestCase): 7 | def test_lang_setup(self): 8 | # default lang is ru 9 | c = Censor.get() 10 | self.assertEqual(c.lang, 'ru') 11 | c = Censor.get(lang='ru') 12 | self.assertEqual(c.lang, 'ru') 13 | 14 | # english is also supported 15 | c = Censor.get(lang='en') 16 | self.assertEqual(c.lang, 'en') 17 | 18 | # others are not yet 19 | lang = 'ar' 20 | self.assertNotIn('ar', Censor.supported_langs) 21 | self.assertRaises(CensorException, Censor.get, lang=lang) 22 | -------------------------------------------------------------------------------- /libs/gender_extractor/__init__.py: -------------------------------------------------------------------------------- 1 | from .extractor import GenderExtractor -------------------------------------------------------------------------------- /libs/gender_extractor/extractor.py: -------------------------------------------------------------------------------- 1 | #%% 2 | import os 3 | import re 4 | import pickle 5 | import pkgutil 6 | 7 | #%% 8 | class GenderExtractor: 9 | def __init__(self): 10 | """ Initializes the data. 11 | 12 | If it's the first time that this package is run, creates an index and saves it as pickle. 13 | Otherwise, it reads the premade file. 14 | """ 15 | 16 | self.countries_encoding = {} 17 | self.names_lists = pkgutil.get_data(__name__, "nameLists/list.txt").decode().split(',') 18 | for fname in self.names_lists: 19 | text = os.path.split(fname.replace("\\", "/"))[-1] 20 | split = re.sub('([A-Z][a-z]+)', r' \1', re.sub('([A-Z]+)', r' \1', text)).split() 21 | country = split[0].lower() 22 | self.countries_encoding[country] = len(self.countries_encoding)-1 23 | 24 | self.gender_encoding = {"Male": 0, "Female": 1} 25 | 26 | try: 27 | self.name_freq = pickle.loads(pkgutil.get_data(__name__, "data.pickle")) 28 | except FileNotFoundError: 29 | self._create_pickle() 30 | 31 | def _create_pickle(self): 32 | """ Creates the index and saves it """ 33 | self.name_freq = {} 34 | for fname in self.names_lists: 35 | fname = fname.strip().replace("\\", "/") 36 | text = os.path.split(fname)[-1] 37 | split = re.sub('([A-Z][a-z]+)', r' \1', re.sub('([A-Z]+)', r' \1', text)).split() 38 | country = split[0].lower() 39 | gender = split[1] 40 | gender_idx = self.gender_encoding[gender] 41 | country_idx = self.countries_encoding[country] 42 | 43 | names = pkgutil.get_data(__name__, fname).decode(encoding='utf-8').replace("\r", "").split('\n') 44 | processed = [] 45 | for name in names: 46 | name_data = name.split(';') 47 | name = name_data[0].lower().strip() 48 | try: 49 | count = int(name_data[1].strip().replace('.','')) 50 | except IndexError: 51 | if name in processed: 52 | continue 53 | count = 1 54 | 55 | try: 56 | self.name_freq[name][gender_idx][country_idx] += count 57 | except KeyError: 58 | self.name_freq[name] = [[0]*len(self.countries_encoding), [0]*len(self.countries_encoding)] 59 | self.name_freq[name][gender_idx][country_idx] += count 60 | 61 | save_loc = os.path.realpath(__file__) 62 | save_loc = os.path.dirname(save_loc) 63 | with open(save_loc+"/data.pickle", "wb") as f: 64 | pickle.dump(self.name_freq, f) 65 | 66 | def extract_gender(self, name, country=None): 67 | """Extracts the suspected gender from the first name of a person. 68 | 69 | The function uses statistical data to determine the likely gender associated 70 | with a given first name, optionally taking into account country-specific data. 71 | A small epsilon value (1e-6) is added to counts to avoid division by zero. 72 | 73 | Args: 74 | name (str): First name of the person. Case-insensitive. 75 | country (str, optional): Country to focus analysis on. If None, 76 | uses global statistics. Case-insensitive. 77 | 78 | Returns: 79 | str: Gender category, one of: 80 | - "male" (>90% male) 81 | - "mostly male" (60-90% male) 82 | - "ambiguous" (40-60% either gender) 83 | - "mostly female" (60-90% female) 84 | - "female" (>90% female) 85 | - "female and male" (50/50 when female/male in the same time, ie russian name "Саша") 86 | 87 | Raises: 88 | KeyError: If the provided country is not in self.countries_encoding 89 | TypeError: If name or country are not strings when provided 90 | ValueError: If name is empty or contains only whitespace 91 | """ 92 | # Input validation 93 | if not isinstance(name, str): 94 | raise TypeError("Name must be a string") 95 | if country is not None and not isinstance(country, str): 96 | raise TypeError("Country must be a string or None") 97 | 98 | # Normalize inputs 99 | name = name.lower().strip() 100 | 101 | # Check for empty name after stripping 102 | if not name: 103 | raise ValueError("Name cannot be empty or contain only whitespace") 104 | 105 | if country is not None: 106 | country = country.lower().strip() 107 | country_code = self.countries_encoding[country] 108 | else: 109 | country_code = None 110 | 111 | # Get gender frequency counts 112 | try: 113 | m_counts = self.name_freq[name][0] 114 | f_counts = self.name_freq[name][1] 115 | except KeyError: 116 | return "ambiguous" 117 | 118 | # Calculate relevant counts with epsilon to avoid division by zero 119 | epsilon = 1e-6 120 | if country_code is not None: 121 | m_count = m_counts[country_code] + epsilon 122 | f_count = f_counts[country_code] + epsilon 123 | else: 124 | m_count = sum(m_counts) + epsilon 125 | f_count = sum(f_counts) + epsilon 126 | 127 | # Return early if no meaningful data 128 | if m_count == epsilon and f_count == epsilon: 129 | return "ambiguous" 130 | 131 | # Calculate ratios and determine gender category 132 | female_ratio = f_count / m_count 133 | male_ratio = m_count / f_count 134 | 135 | if female_ratio == male_ratio: 136 | return "female and male" # name is both male/female 137 | elif female_ratio > 9: # >90% female 138 | return "female" 139 | elif female_ratio > 1.5: # 60-90% female 140 | return "mostly female" 141 | elif male_ratio > 9: # >90% male 142 | return "male" 143 | elif male_ratio > 1.5: # 60-90% male 144 | return "mostly male" 145 | else: # 40-60% either gender 146 | return "ambiguous" 147 | 148 | if __name__=="__main__": 149 | ext = GenderExtractor() 150 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/AfghanistanFemaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Kawatara 2 | Spogmai 3 | Torpekai 4 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/AfghanistanMaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Arsalakhan 2 | Aymal 3 | Babrack 4 | Jelander 5 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/AlbaniaFemaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Ada 2 | Dlia 3 | Justina 4 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/AlbaniaMaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Adil 2 | Agim 3 | Agron 4 | Arjan 5 | Kreshnik 6 | Thoma -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/AustraliaFemaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Emily;10299 2 | Jessica;9992 3 | Chloe;8287 4 | Olivia;7597 5 | Isabella;7474 6 | Sarah;7415 7 | Sophie;6644 8 | Emma;6400 9 | Georgia;6105 10 | Hannah;5672 11 | Grace;5496 12 | Ella;5221 13 | Charlotte;5024 14 | Mia;4561 15 | Amelia;4281 16 | Lily;4254 17 | Jasmine;4171 18 | Ruby;4152 19 | Zoe;3994 20 | Samantha;3885 21 | Caitlin;3768 22 | Lauren;3534 23 | Maddison;3441 24 | Madison;3225 25 | Lucy;3198 26 | Chelsea;3120 27 | Sienna;3075 28 | Jade;3020 29 | Hayley;2910 30 | Ava;2827 31 | Laura;2812 32 | Rebecca;2803 33 | Amy;2742 34 | Holly;2692 35 | Sophia;2553 36 | Brooke;2529 37 | Natalie;2377 38 | Elizabeth;2255 39 | Rachel;2239 40 | Courtney;2219 41 | Isabelle;2215 42 | Stephanie;2202 43 | Alexandra;2202 44 | Kayla;2132 45 | Lara;2125 46 | Kate;2048 47 | Natasha;2021 48 | Tahlia;2020 49 | Taylor;2006 50 | Paige;1996 51 | Anna;1974 52 | Alyssa;1942 53 | Ashley;1923 54 | Madeline;1889 55 | Ashleigh;1870 56 | Amber;1863 57 | Claire;1831 58 | Matilda;1823 59 | Claudia;1796 60 | Molly;1775 61 | Brianna;1747 62 | Abbey;1744 63 | Erin;1725 64 | Bianca;1652 65 | Mikayla;1639 66 | Phoebe;1569 67 | Eliza;1482 68 | Victoria;1450 69 | Tayla;1413 70 | Monique;1339 71 | Brittany;1332 72 | Taylah;1328 73 | Tara;1327 74 | Zara;1326 75 | Abigail;1324 76 | Imogen;1270 77 | Maya;1200 78 | Annabelle;1170 79 | Lilly;1160 80 | Kiara;1154 81 | Nicole;1147 82 | Eva;1121 83 | Madeleine;1116 84 | Ebony;1116 85 | Gabrielle;1105 86 | Danielle;1037 87 | Scarlett;1021 88 | Abby;1001 89 | Gemma;996 90 | Bella;993 91 | Alice;963 92 | Katie;962 93 | Gabriell;944 94 | Jennifer;937 95 | Kaitlyn;930 96 | Layla;920 97 | Megan;916 98 | Alana;910 99 | Summer;910 100 | Angelina;901 101 | Ellie;884 102 | Alicia;867 103 | Sofia;865 104 | Mackenzie;858 105 | Bethany;857 106 | Melissa;847 107 | Angela;827 108 | Michelle;795 109 | Rachael;751 110 | Evie;729 111 | Charlott;727 112 | Jordan;720 113 | Isabel;717 114 | Julia;683 115 | Alexis;679 116 | Isla;673 117 | Elizabet;634 118 | Vanessa;628 119 | Gabriella;628 120 | Madelein;603 121 | Stephani;603 122 | Cassandra;568 123 | Amanda;565 124 | Stella;564 125 | Savannah;546 126 | Shannon;533 127 | Rhiannon;509 128 | Ivy;490 129 | Tiana;462 130 | Amelie;455 131 | Keira;454 132 | Charli;442 133 | Rose;432 134 | Katherine;424 135 | Paris;423 136 | Alexandr;423 137 | Piper;392 138 | Audrey;388 139 | Sara;372 140 | Caitlyn;349 141 | Addison;343 142 | Lillian;340 143 | Jorja;334 144 | Skye;330 145 | Cassandr;314 146 | Makayla;314 147 | Melanie;308 148 | Willow;307 149 | Bronte;306 150 | Breanna;296 151 | Lisa;296 152 | Poppy;286 153 | Mikaela;267 154 | Violet;263 155 | Nicola;262 156 | Leah;245 157 | Christin;244 158 | Christine;244 159 | Jemma;229 160 | Catherine;215 161 | Shania;214 162 | Jessie;172 163 | Monica;172 164 | Aaliyah;171 165 | Catherin;166 166 | Lola;158 167 | Georgina;158 168 | Talia;158 169 | Mariam;157 170 | Casey;154 171 | Eve;152 172 | Jacinta;145 173 | Renee;142 174 | Christina;137 175 | Kimberle;127 176 | Tamara;122 177 | Tori;121 178 | Alexandria;118 179 | Chantell;118 180 | Harper;117 181 | Katherin;116 182 | Jacqueli;112 183 | Tiffany;109 184 | Naomi;108 185 | Mackenzi;102 186 | Ellen;102 187 | Tegan;101 188 | Ashlee;98 189 | Evelyn;98 190 | Mila;95 191 | Alison;92 192 | Indiana;90 193 | Heidi;90 194 | Rebekah;88 195 | Alica;88 196 | Louise;88 197 | Annabell;88 198 | Mary;85 199 | Milla;85 200 | Charlie;79 201 | Eden;78 202 | Eloise;78 203 | Lilian;74 204 | Tia;73 205 | Britney;69 206 | Elise;65 207 | Josephine;64 208 | Kasey;62 209 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/AustraliaMaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Joshua;14168 2 | Jack;12865 3 | Thomas;11145 4 | Lachlan;11138 5 | James;10468 6 | Daniel;10185 7 | William;10084 8 | Matthew;9531 9 | Benjamin;8885 10 | Samuel;8064 11 | Liam;7731 12 | Luke;7572 13 | Nicholas;7512 14 | Ryan;7301 15 | Jacob;7298 16 | Ethan;6688 17 | Dylan;6317 18 | Alexander;6179 19 | Jake;6177 20 | Michael;6124 21 | Nathan;5604 22 | Jordan;5448 23 | Jayden;5351 24 | Oliver;5084 25 | Cooper;4774 26 | Riley;4728 27 | Andrew;4597 28 | Adam;4558 29 | Blake;4439 30 | Noah;4407 31 | Harrison;4239 32 | Jackson;4223 33 | Cameron;4114 34 | Zachary;4105 35 | Connor;4006 36 | Mitchell;3941 37 | Christopher;3869 38 | Isaac;3811 39 | Joseph;3807 40 | Patrick;3779 41 | Max;3714 42 | Lucas;3640 43 | Anthony;3607 44 | Hayden;3592 45 | Bailey;3468 46 | Harry;3249 47 | Aaron;3223 48 | Tyler;3215 49 | David;3118 50 | Angus;3017 51 | Joel;2855 52 | John;2762 53 | Jesse;2646 54 | Charlie;2535 55 | Alex;2425 56 | Oscar;2347 57 | Callum;2342 58 | Justin;2332 59 | Kyle;2289 60 | Caleb;2259 61 | Aidan;2236 62 | Henry;2220 63 | Timothy;2170 64 | George;2159 65 | Cody;2142 66 | Jonathan;2087 67 | Brandon;2062 68 | Bradley;2060 69 | Christian;2036 70 | Logan;1978 71 | Jason;1789 72 | Aiden;1756 73 | Peter;1719 74 | Sean;1672 75 | Jeremy;1649 76 | Marcus;1648 77 | Hamish;1642 78 | Charles;1628 79 | Sebastian;1588 80 | Corey;1523 81 | Xavier;1515 82 | Edward;1482 83 | Brendan;1470 84 | Mitchel;1429 85 | Dominic;1388 86 | Robert;1313 87 | Rhys;1277 88 | Zac;1264 89 | Jarrod;1205 90 | Elijah;1202 91 | Ashton;1193 92 | Toby;1160 93 | Ali;1157 94 | Kai;1104 95 | Sam;1100 96 | Mark;1055 97 | Levi;1041 98 | Brodie;1023 99 | Jamie;990 100 | Brayden;985 101 | Hunter;976 102 | Luca;926 103 | Owen;896 104 | Adrian;889 105 | Brock;887 106 | Darcy;884 107 | Kevin;861 108 | Dean;857 109 | Mason;850 110 | Steven;847 111 | Archie;835 112 | Declan;825 113 | Finn;814 114 | Flynn;746 115 | Ben;733 116 | Gabriel;731 117 | Scott;730 118 | Reece;724 119 | Zane;687 120 | Nichola;671 121 | Beau;648 122 | Seth;647 123 | Jai;625 124 | Nate;617 125 | Jett;580 126 | Mathew;560 127 | Tristan;539 128 | Alexand;515 129 | Christo;486 130 | Bryce;480 131 | Eli;457 132 | Paul;428 133 | Stephen;405 134 | Leo;383 135 | Alexande;358 136 | Lincoln;357 137 | Hugo;341 138 | Jasper;331 139 | Harris;322 140 | Koby;315 141 | Shane;297 142 | Travis;292 143 | Simon;286 144 | Shaun;279 145 | Richard;274 146 | Christop;261 147 | Christia;245 148 | Chase;234 149 | Harriso;227 150 | Sebastia;185 151 | Kurt;184 152 | Jonatha;176 153 | Kane;170 154 | Tyson;168 155 | Jared;156 156 | Sebestian;154 157 | Tony;154 158 | Austin;119 159 | Trista;118 160 | Christi;117 161 | Charle;112 162 | Domini;111 163 | Ryder;110 164 | Jaxon;105 165 | Felix;105 166 | Braxton;104 167 | Kieran;101 168 | Trent;101 169 | Phoenix;101 170 | Brayde;98 171 | Taj;97 172 | Harley;94 173 | Phillip;90 174 | Billy;83 175 | Jay;81 176 | Rory;73 177 | Jye;60 178 | Tom;58 179 | Julian;58 180 | Will;58 181 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/CzechFemaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Áda 2 | Ája 3 | Šárinka 4 | Šárka 5 | Šarlota 6 | Šarlotka 7 | Štěpánka 8 | Štěpa 9 | Štěpka 10 | Žaneta 11 | Žanetka 12 | Žofie 13 | Žofinka 14 | Žofka 15 | Adéla 16 | Adélka 17 | Ada 18 | Adriana 19 | Adrianka 20 | Agáta 21 | Agátka 22 | Alča 23 | Alžběta 24 | Alžbětka 25 | Ala 26 | Albína 27 | Albínka 28 | Alena 29 | Alenka 30 | Alexandra 31 | Alička 32 | Ali 33 | Alice 34 | Amálie 35 | Amálka 36 | Anča 37 | Anastázie 38 | Anastázka 39 | Anděla 40 | Andělka 41 | Andrea 42 | Andrejka 43 | Andula 44 | Andulka 45 | Anežka 46 | Aneta 47 | Anetka 48 | Anička 49 | Anina 50 | Aninka 51 | Anka 52 | Anna 53 | Antonie 54 | Anuška 55 | Apolena 56 | Apolenka 57 | Bára 58 | Běla 59 | Bělinka 60 | Bělka 61 | Běta 62 | Bětka 63 | Bětuška 64 | Bětunka 65 | Barča 66 | Barbora 67 | Barborka 68 | Baruška 69 | Barunka 70 | Beáta 71 | Beátka 72 | Berta 73 | Bertička 74 | Bertinka 75 | Blaža 76 | Blažena 77 | Blaženka 78 | Blažka 79 | Blanička 80 | Blanka 81 | Boža 82 | Božena 83 | Boženka 84 | Božka 85 | Bohda 86 | Bohdana 87 | Bohuška 88 | Bohumila 89 | Bohuna 90 | Bohunka 91 | Bohuslava 92 | Brigit 93 | Brigita 94 | Brigitka 95 | Cílinka 96 | Cecílie 97 | Cecilka 98 | Cilka 99 | Dáša 100 | Dášenka 101 | Dagmar 102 | Dagmarka 103 | Danča 104 | Dana 105 | Danička 106 | Daniela 107 | Danielka 108 | Danka 109 | Danuška 110 | Dara 111 | Darina 112 | Darinka 113 | Darja 114 | Darka 115 | Deni 116 | Denisa 117 | Deniska 118 | Dia 119 | Diana 120 | Dianka 121 | Dita 122 | Ditka 123 | Dobinka 124 | Dobromila 125 | Dobruška 126 | Dorka 127 | Dorota 128 | Dorotka 129 | Doubravka 130 | Draha 131 | Drahomíra 132 | Drahoslava 133 | Drahuška 134 | Edita 135 | Editka 136 | Ela 137 | Elena 138 | Elenka 139 | Eliška 140 | Elka 141 | Emílie 142 | Ema 143 | Emilka 144 | Erika 145 | Ester 146 | Esterka 147 | Evža 148 | Evženie 149 | Evženka 150 | Eva 151 | Evelína 152 | Evelínka 153 | Evička 154 | Evinka 155 | Evka 156 | Evuška 157 | Evulka 158 | Fanka 159 | Fany 160 | Fanynka 161 | Fráňa 162 | Františka 163 | Frantina 164 | Gábi 165 | Gábina 166 | Gabi 167 | Gabriela 168 | Gabrielka 169 | Gita 170 | Gitka 171 | Gizela 172 | Gizelka 173 | Hana 174 | Hanička 175 | Haninka 176 | Hanka 177 | Hedva 178 | Hedvička 179 | Hedvika 180 | Hela 181 | Helena 182 | Helenka 183 | Hermína 184 | Ida 185 | Ilona 186 | Ilonka 187 | Ingrid 188 | Irča 189 | Irena 190 | Irenka 191 | Irma 192 | Iva 193 | Ivana 194 | Ivanka 195 | Iveta 196 | Ivetka 197 | Ivka 198 | Ivona 199 | Ivonka 200 | Ivuška 201 | Izabela 202 | Izabelka 203 | Jíťa 204 | Jana 205 | Janička 206 | Janinka 207 | Jarča 208 | Jarka 209 | Jarmila 210 | Jaroslava 211 | Jaruška 212 | Jarunka 213 | Jiřa 214 | Jiřina 215 | Jiřinka 216 | Jiřka 217 | Jindřiška 218 | Jindra 219 | Jitka 220 | Jituška 221 | Johana 222 | Johanka 223 | Jola 224 | Jolana 225 | Jolanka 226 | Jolka 227 | Judita 228 | Juditka 229 | Julča 230 | Julie 231 | Julinka 232 | Julka 233 | Justýna 234 | Justýnka 235 | Káča 236 | Káťa 237 | Kája 238 | Kájinka 239 | Kačenka 240 | Kačka 241 | Kaťka 242 | Kamča 243 | Kamila 244 | Kamilka 245 | Karča 246 | Karin 247 | Karina 248 | Karinka 249 | Karla 250 | Karlička 251 | Karolína 252 | Karolínka 253 | Kateřina 254 | Katka 255 | Katuška 256 | Klára 257 | Klárinka 258 | Klárka 259 | Klaudie 260 | Kristýna 261 | Kristýnka 262 | Krista 263 | Květa 264 | Květka 265 | Květoslava 266 | Květuška 267 | Líba 268 | Lída 269 | Lýdie 270 | Laděnka 271 | Lada 272 | Ladka 273 | Laduška 274 | Laura 275 | Laurinka 276 | Lea 277 | Lenička 278 | Lenka 279 | Leona 280 | Leonka 281 | Lia 282 | Liana 283 | Liběna 284 | Liběnka 285 | Liba 286 | Libuše 287 | Libuška 288 | Lidka 289 | Liduška 290 | Lidunka 291 | Liliana 292 | Linda 293 | Ljuba 294 | Lucie 295 | Lucinka 296 | Lucka 297 | Ludmila 298 | Ludmilka 299 | Máňa 300 | Málinka 301 | Mánička 302 | Márinka 303 | Míša 304 | Míla 305 | Mařenka 306 | Magda 307 | Magdaléna 308 | Magdička 309 | Mahulena 310 | Mahulenka 311 | Majka 312 | Marcela 313 | Marcelka 314 | Marička 315 | Mariana 316 | Marie 317 | Marika 318 | Marina 319 | Marinka 320 | Markéta 321 | Markétka 322 | Marta 323 | Martička 324 | Martina 325 | Martinka 326 | Maruška 327 | Matylda 328 | Michaela 329 | Michala 330 | Milada 331 | Miladka 332 | Milena 333 | Milenka 334 | Milka 335 | Miloslava 336 | Miluše 337 | Miluška 338 | Miriam 339 | Mirka 340 | Miroslava 341 | Monča 342 | Monička 343 | Monika 344 | Naďa 345 | Naďka 346 | Naděžda 347 | Naděnka 348 | Natálie 349 | Natálka 350 | Nataša 351 | Nela 352 | Nelinka 353 | Niki 354 | Nikol 355 | Nikola 356 | Nikolka 357 | Nina 358 | Nora 359 | Olívie 360 | Oldřiška 361 | Olda 362 | Olga 363 | Olina 364 | Olinka 365 | Otýlie 366 | Otylka 367 | Patricie 368 | Pavlína 369 | Pavlínka 370 | Pavla 371 | Pavlička 372 | Peťa 373 | Petra 374 | Petruška 375 | Ráďa 376 | Rút 377 | Růža 378 | Růžena 379 | Růženka 380 | Radana 381 | Radanka 382 | Radka 383 | Radmila 384 | Raduška 385 | Radunka 386 | Regína 387 | Renáta 388 | Renátka 389 | Renča 390 | Renatka 391 | Romča 392 | Romana 393 | Romanka 394 | Romi 395 | Romka 396 | Sára 397 | Sárinka 398 | Sárka 399 | Sáva 400 | Saša 401 | Sabina 402 | Sabinka 403 | Sandra 404 | Saskie 405 | Silva 406 | Silvie 407 | Silvinka 408 | Simča 409 | Simona 410 | Simonka 411 | Slávinka 412 | Slávka 413 | Slavěna 414 | Soňa 415 | Sonička 416 | Stáňa 417 | Stánička 418 | Stázička 419 | Stázka 420 | Stanislava 421 | Stela 422 | Stelinka 423 | Světla 424 | Světlana 425 | Světlanka 426 | Svata 427 | Svatava 428 | Svatka 429 | Táňa 430 | Tánička 431 | Týna 432 | Taťána 433 | Tamara 434 | Tereza 435 | Terezka 436 | Teri 437 | Terinka 438 | Terka 439 | Tery 440 | Tonička 441 | Tonka 442 | Tony 443 | Věra 444 | Věrka 445 | Věruška 446 | Věrunka 447 | Valérie 448 | Vanda 449 | Vendula 450 | Vendulka 451 | Vendy 452 | Verča 453 | Veronika 454 | Verunka 455 | Viki 456 | Viktorie 457 | Viktorka 458 | Vilma 459 | Viola 460 | Violka 461 | Vláďa 462 | Vlaďka 463 | Vladěna 464 | Vladěnka 465 | Vlasta 466 | Vlastička 467 | Vlastinka 468 | Xenie 469 | Zdeňka 470 | Zdena 471 | Zdenička 472 | Zdeninka 473 | Zdenka 474 | Zdislava 475 | Zina 476 | Zita 477 | Zlata 478 | Zlatinka 479 | Zlatka 480 | Zlatuška 481 | Zoe 482 | Zora 483 | Zorinka 484 | Zorka 485 | Zuzana 486 | Zuzanka 487 | Zuzi 488 | Zuzka -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/CzechMaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Áda 2 | Čeněk 3 | Čenda 4 | Čestmír 5 | Řehoř 6 | Řehořek 7 | Šimůnek 8 | Šimon 9 | Šimonek 10 | Štěpán 11 | Štěpánek 12 | Štěpa 13 | Štěpek 14 | Štefa 15 | Štefan 16 | Štefek 17 | Adámek 18 | Ada 19 | Adam 20 | Adolf 21 | Adolfek 22 | Alánek 23 | Alan 24 | Albert 25 | Albertek 26 | Aleš 27 | Alexandr 28 | Alexej 29 | Alois 30 | Ambrož 31 | Andrej 32 | Antonín 33 | Arnošt 34 | Arnoštek 35 | Artur 36 | Arturek 37 | Augustýn 38 | Béďa 39 | Béda 40 | Břeťa 41 | Břetík 42 | Břetěk 43 | Břetislav 44 | Bartoloměj 45 | Bedříšek 46 | Bedřich 47 | Ben 48 | Benedikt 49 | Bernard 50 | Bertík 51 | Blažej 52 | Blažek 53 | Blahoslav 54 | Bořík 55 | Bořa 56 | Bořek 57 | Bořivoj 58 | Bohdan 59 | Bohouš 60 | Bohuš 61 | Bohumír 62 | Bohumil 63 | Bohuslav 64 | Bolek 65 | Boleslav 66 | Bonifác 67 | Borek 68 | Boris 69 | Borisek 70 | Brožík 71 | Brož 72 | Broník 73 | Broněk 74 | Bronislav 75 | Bruno 76 | Ctibor 77 | Ctirad 78 | Dalek 79 | Dalibor 80 | Dalimil 81 | Dan 82 | Danek 83 | Daniel 84 | Danoušek 85 | Davídek 86 | David 87 | Dobroslav 88 | Dominik 89 | Drahoslav 90 | Dušánek 91 | Dušan 92 | Edík 93 | Eda 94 | Eduard 95 | Emánek 96 | Eman 97 | Emanuel 98 | Emil 99 | Erik 100 | Evža 101 | Evžen 102 | Evženek 103 | Fanouš 104 | Fanoušek 105 | Felix 106 | Ferdík 107 | Ferda 108 | Ferdinand 109 | Filípek 110 | Filda 111 | Filip 112 | Fráňa 113 | Francek 114 | Frantík 115 | Franta 116 | František 117 | Gábin 118 | Gabriel 119 | Gustík 120 | Gusta 121 | Gustav 122 | Hanuš 123 | Havel 124 | Havlík 125 | Herbert 126 | Herbertek 127 | Herman 128 | Hořek 129 | Honzíček 130 | Honzík 131 | Honza 132 | Hubert 133 | Hubertek 134 | Hugo 135 | Hynek 136 | Ignác 137 | Igor 138 | Igorek 139 | Ilja 140 | Ivánek 141 | Ivan 142 | Ivoš 143 | Ivo 144 | Jáchym 145 | Jíra 146 | Jakoubek 147 | Jakub 148 | Jan 149 | Janek 150 | Jarda 151 | Jarek 152 | Jarmil 153 | Jaromír 154 | Jaroslav 155 | Jaroušek 156 | Jeňa 157 | Jeníček 158 | Jeník 159 | Jenda 160 | Jeroným 161 | Jeronýmek 162 | Jiříček 163 | Jiří 164 | Jiřík 165 | Jindříšek 166 | Jindřich 167 | Jindra 168 | Jiránek 169 | Jirka 170 | Jiroušek 171 | Jožánek 172 | Joža 173 | Jožka 174 | Jonáš 175 | Jonášek 176 | Josef 177 | Joska 178 | Julek 179 | Julius 180 | Jura 181 | Kájík 182 | Kájínek 183 | Kája 184 | Kamil 185 | Karel 186 | Karlíček 187 | Karlík 188 | Kazimír 189 | Klement 190 | Kristián 191 | Kryštůfek 192 | Kryštof 193 | Kubíček 194 | Kubík 195 | Kuba 196 | Květoš 197 | Květoslav 198 | Kvido 199 | Láďa 200 | Ládíček 201 | Ládík 202 | Ládínek 203 | Laďa 204 | Ladislav 205 | Leoš 206 | Leo 207 | Leopold 208 | Libek 209 | Libor 210 | Liborek 211 | Lojzík 212 | Lojza 213 | Lojzek 214 | Luďa 215 | Lubík 216 | Luba 217 | Lubek 218 | Luboš 219 | Lubošek 220 | Lubomír 221 | Lubor 222 | Luděček 223 | Luděk 224 | Ludvík 225 | Ludva 226 | Lukáš 227 | Lukášek 228 | Luki 229 | Lumír 230 | Lumírek 231 | Míša 232 | Míla 233 | Mara 234 | Marcel 235 | Mareček 236 | Marek 237 | Marián 238 | Martínek 239 | Martin 240 | Matýsek 241 | Matěj 242 | Matouš 243 | Matoušek 244 | Max 245 | Maxmilián 246 | Medard 247 | Michálek 248 | Michal 249 | Miki 250 | Mikuláš 251 | Mikulášek 252 | Milánek 253 | Milan 254 | Milda 255 | Miloš 256 | Milošek 257 | Miloslav 258 | Mirda 259 | Mireček 260 | Mirek 261 | Miroslav 262 | Mojmír 263 | Norbert 264 | Oldík 265 | Oldřich 266 | Olda 267 | Oleg 268 | Olin 269 | Oliver 270 | Ondřej 271 | Ondrášek 272 | Ondra 273 | Oskárek 274 | Oskar 275 | Otík 276 | Ota 277 | Otakárek 278 | Otakar 279 | Otmar 280 | Oto 281 | Péťa 282 | Přemek 283 | Přemysl 284 | Pankrác 285 | Patrik 286 | Pavel 287 | Pavlíček 288 | Pavlík 289 | Peťa 290 | Peťka 291 | Peťulka 292 | Pepíček 293 | Pepík 294 | Pepa 295 | Petříček 296 | Petřík 297 | Petr 298 | Pravoslav 299 | Prokop 300 | Ráďa 301 | Ríša 302 | Radeček 303 | Radek 304 | Radim 305 | Radimek 306 | Radomír 307 | Radoslav 308 | Radoušek 309 | Radovan 310 | René 311 | Renoušek 312 | Richard 313 | Rob 314 | Robert 315 | Robertek 316 | Robin 317 | Roland 318 | Románek 319 | Roman 320 | Rosťa 321 | Rostíček 322 | Rostík 323 | Rostislav 324 | Ruda 325 | Rudolf 326 | Rudolfek 327 | Saša 328 | Sam 329 | Samuel 330 | Servác 331 | Silvestr 332 | Sláva 333 | Slávek 334 | Slavomír 335 | Soběslav 336 | Stáňa 337 | Staňa 338 | Staníček 339 | Standa 340 | Stanislav 341 | Sváťa 342 | Svaťa 343 | Svatopluk 344 | Svatoslav 345 | Tadeáš 346 | Teo 347 | Teodor 348 | Tibor 349 | Tomáš 350 | Tomášek 351 | Tománek 352 | Tomík 353 | Tom 354 | Toman 355 | Toníček 356 | Toník 357 | Tonda 358 | Tonin 359 | Váňa 360 | Váňuška 361 | Václav 362 | Véna 363 | Víťa 364 | Vítězslav 365 | Vít 366 | Vítek 367 | Věnceslav 368 | Věroslav 369 | Vašík 370 | Vašek 371 | Valda 372 | Valdemar 373 | Valentýn 374 | Valentýnek 375 | Vavřík 376 | Vavřineček 377 | Vavřinec 378 | Venda 379 | Vendelín 380 | Venouš 381 | Venoušek 382 | Viki 383 | Viktor 384 | Vilém 385 | Vilémek 386 | Vilík 387 | Vilímek 388 | Vilda 389 | Vinca 390 | Vincek 391 | Vincenc 392 | Vláďa 393 | Vládíček 394 | Vládík 395 | Vládínek 396 | Vladan 397 | Vladimír 398 | Vladimírek 399 | Vladislav 400 | Vlastík 401 | Vlasta 402 | Vlastimil 403 | Vlastislav 404 | Vojtíšek 405 | Vojtík 406 | Vojtěch 407 | Vojta 408 | Vojtek 409 | Vráťa 410 | Vratislav 411 | Zbyňa 412 | Zbyšek 413 | Zbyněček 414 | Zbyněk 415 | Zdeněk 416 | Zdena 417 | Zdenda 418 | Zdeneček 419 | Zdenek 420 | Zikmund -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/FinlandFemaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Åsa 2 | Aamu 3 | Aija 4 | Aila 5 | Aili 6 | Aino 7 | Aira 8 | Aliisa 9 | Amanda 10 | Anette 11 | Anita 12 | Anja 13 | Anna 14 | Anna-Liisa 15 | Anne 16 | Anneli 17 | Annemari 18 | Anni 19 | Anniina 20 | Annika 21 | Annikki 22 | Annukka 23 | Anu 24 | Arja 25 | Armi 26 | Auli 27 | Aune 28 | Aurora 29 | Carita 30 | Carola 31 | Eeva 32 | Eija 33 | Eija-Riitta 34 | Eila 35 | Eliisa 36 | Elina 37 | Elisa 38 | Elisabeth 39 | Ella 40 | Elsa 41 | Emilia 42 | Emma 43 | Emmi 44 | Essi 45 | Eveliina 46 | Hanna 47 | Hanna-Maria 48 | Hannele 49 | Heidi 50 | Helena 51 | Heli 52 | Heljä 53 | Helmi 54 | Helvi 55 | Henna 56 | Henriikka 57 | Ida 58 | Iida 59 | Iines 60 | Ilse 61 | Ilta 62 | Impi 63 | Irene 64 | Jaana 65 | Jasmin 66 | Jenna 67 | Jenni 68 | Johanna 69 | Jonna 70 | Josefiina 71 | Julia 72 | Justiina 73 | Kaari 74 | Kaarina 75 | Kaija 76 | Kaiju 77 | Kaisa 78 | Karita 79 | Karoliina 80 | Katariina 81 | Kati 82 | Katja 83 | Katri 84 | Kerttu 85 | Kia 86 | Kirsi 87 | Kirsti 88 | Kristiina 89 | Kyllikki 90 | Laila 91 | Laura 92 | Leea 93 | Leena 94 | Leila 95 | Lempi 96 | Liisa 97 | Liisi 98 | Lotta 99 | Lyyli 100 | Maarit 101 | Maija 102 | Maire 103 | Mari 104 | Maria 105 | Marja 106 | Marjo 107 | Mathilda 108 | Meeri 109 | Merja 110 | Miia 111 | Mikaela 112 | Milla 113 | Minna 114 | Mira 115 | Monica 116 | Natalia 117 | Nea 118 | Nelma 119 | Niina 120 | Noora 121 | Oili 122 | Olga 123 | Oona 124 | Outi 125 | Päivä 126 | Päivi 127 | Paula 128 | Pauliina 129 | Petra 130 | Pia 131 | Piia 132 | Piia-Noora 133 | Pinja 134 | Pirjo 135 | Pirkko 136 | Raakel 137 | Raija 138 | Reeta 139 | Reija 140 | Riia 141 | Riikka 142 | Riitta 143 | Rita 144 | Ritva 145 | Ronja 146 | Roosa 147 | Säde 148 | Saara 149 | Saimi 150 | Salla 151 | Sanelma 152 | Sanna 153 | Sanna-Leena 154 | Sanni 155 | Sara 156 | Sari 157 | Satu 158 | Seija 159 | Selma 160 | Senja 161 | Siiri 162 | Sini 163 | Sinikka 164 | Sirja 165 | Sirkka 166 | Sirpa 167 | Sisko 168 | Sofia 169 | Sointu 170 | Sonja 171 | Suoma 172 | Susanna 173 | Suvi 174 | Taija 175 | Taimi 176 | Taina 177 | Tanja 178 | Tarja 179 | Teija 180 | Tellervo 181 | Terhi 182 | Terttu 183 | Tiia 184 | Tiina 185 | Tove 186 | Tuija 187 | Tuula 188 | Tuuli 189 | Tuulia 190 | Tuulikki 191 | Tytti 192 | Tyyne 193 | Ulla 194 | Ulla-Maj 195 | Ulpu 196 | Vappu 197 | Veera 198 | Venla 199 | Vilma 200 | Virpi -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/FinlandMaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Åke 2 | Aamos 3 | Aapo 4 | Aarne 5 | Aatos 6 | Ahti 7 | Aki 8 | Aki-Petteri 9 | Akseli 10 | Aleksi 11 | Anssi 12 | Antero 13 | Antti 14 | Ari 15 | Ari-Pekka 16 | Armas 17 | Arsi 18 | Arto 19 | Arttu 20 | Arvi 21 | Arvid 22 | Atso 23 | Atte 24 | August 25 | Aulis 26 | Bo 27 | Christian 28 | Daavid 29 | Eemeli 30 | Eemil 31 | Eerik 32 | Eero 33 | Eetu 34 | Eino 35 | Einojuhani 36 | Elias 37 | Emppu 38 | Ensio 39 | Erkki 40 | Erno 41 | Esa 42 | Esa-Pekka 43 | Esko 44 | Frans 45 | Fredrik 46 | Hannes 47 | Hanno 48 | Hannu 49 | Harri 50 | Harry 51 | Heikki 52 | Henri 53 | Henrik 54 | Hessu 55 | Hugo 56 | Iiro 57 | Iivari 58 | Ilkka 59 | Ilmari 60 | Ismo 61 | Isto 62 | Jaakko 63 | Jali 64 | Jan 65 | Jan-Erik 66 | Jani 67 | Janne 68 | Jari 69 | Jarkko 70 | Jarmo 71 | Jarno 72 | Jaska 73 | Jean 74 | Jere 75 | Jesse 76 | Joel 77 | Johan 78 | Johannes 79 | Jonatan 80 | Joni 81 | Jonne 82 | Joona 83 | Joonas 84 | Jorma 85 | Jouko 86 | Jouni 87 | Juha 88 | Juhana 89 | Juhani 90 | Juho 91 | Jukka 92 | Jukka-Pekka 93 | Jussi 94 | Juuso 95 | Jyri 96 | Jyrki 97 | Kaarle 98 | Kaarlo 99 | Kai 100 | Kaj 101 | Kalervo 102 | Kalevi 103 | Kalle 104 | Kari 105 | Karri 106 | Kauko 107 | Keijo 108 | Keke 109 | Kim 110 | Kimi 111 | Kimmo 112 | Konsta 113 | Kristian 114 | Kyösti 115 | Lalli 116 | Lasse 117 | Lassi 118 | Lauri 119 | Leevi 120 | Luukas 121 | Magnus 122 | Manu 123 | Marco 124 | Marcus 125 | Markku 126 | Marko 127 | Markus 128 | Martti 129 | Matias 130 | Matti 131 | Mattiesko 132 | Mauno 133 | Maunu 134 | Mauri 135 | Miika 136 | Miikka 137 | Mika 138 | Mika-Matti 139 | Mikael 140 | Mikki 141 | Mikko 142 | Miska 143 | Niklas 144 | Niko 145 | Nils 146 | Olavi 147 | Olle 148 | Olli 149 | Olli-Pekka 150 | Onni 151 | Oskar 152 | Oskari 153 | Otto 154 | Paavo 155 | Panu 156 | Pasi 157 | Pauli 158 | Pekka 159 | Pentti 160 | Pertti 161 | Peter 162 | Petri 163 | Petteri 164 | Pirkka 165 | Pontus 166 | Raimo 167 | Raine 168 | Raino 169 | Rasmus 170 | Reijo 171 | Reima 172 | Reino 173 | Retu 174 | Riku 175 | Risto 176 | Robin 177 | Roope 178 | Sakari 179 | Saku 180 | Sami 181 | Samppa 182 | Sampsa 183 | Samsa 184 | Samuli 185 | Santeri 186 | Sauli 187 | Sebastian 188 | Seppo 189 | Severi 190 | Stefan 191 | Stig 192 | Tahvo 193 | Taneli 194 | Tapani 195 | Tapio 196 | Tauno 197 | Teemu 198 | Teppo 199 | Tero 200 | Teuvo 201 | Timo 202 | Toini 203 | Toivo 204 | Tom 205 | Tomi 206 | Tommi 207 | Tommy 208 | Toni 209 | Tony 210 | Topi 211 | Topias 212 | Tuomas 213 | Tuomo 214 | Tuukka 215 | Tyko 216 | Urho 217 | Väinö 218 | Valentin 219 | Valtteri 220 | Veijo 221 | Veikko 222 | Veli 223 | Veli-Matti 224 | Veli-Pekka 225 | Vesa 226 | Vihtori 227 | Vilho 228 | Viljo 229 | Ville 230 | Ville-Veikko 231 | Yrjö -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/GreeceFemaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Agathi 2 | Aikaterini 3 | Aleka 4 | Alexandra 5 | Alike 6 | Andonia 7 | Andromakhi 8 | Androula 9 | Angela 10 | Angela 11 | Angeliki 12 | Anna 13 | Aspasia 14 | Athina 15 | Despo 16 | Despoina 17 | Dimitra 18 | Dina 19 | Eirini 20 | Eleni 21 | Elevtheria 22 | Elli 23 | Evangelia 24 | Foteini 25 | Fotoula 26 | Georgia 27 | Ioanna 28 | Ismini 29 | Katina 30 | Khristina 31 | Kleio 32 | Kleopatra 33 | Konstandina 34 | Koula 35 | Krysanthi 36 | Lia 37 | Litsa 38 | Magdalini 39 | Makhi 40 | Margarita 41 | Margaro 42 | Maria 43 | Marika 44 | Mario 45 | Maroula 46 | Nitsa 47 | Noula 48 | Panagiota 49 | Pinelopi 50 | Pipitsa 51 | Pitsa 52 | Popi 53 | Popi 54 | Pota 55 | Rena 56 | Ritsa 57 | Roula 58 | Sofia 59 | Stella 60 | Tina 61 | Tina 62 | Toula 63 | Toula 64 | Vasiliki 65 | Zoi 66 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/GreeceMaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Agamemnon 2 | Agapios 3 | Agisilaos 4 | Aimilios 5 | Akhilleus 6 | Alekos 7 | Alexandros 8 | Anastasios 9 | Anastasis 10 | Andonios 11 | Andonis 12 | Andreas 13 | Angelos 14 | Apostolos 15 | Aristeidis 16 | Aristotelis 17 | Athanasios 18 | Avraam 19 | Bambis 20 | Constandinos 21 | Costas 22 | Costis 23 | Cotsos 24 | Daniil 25 | Dimitrios 26 | Dimos 27 | Dinos 28 | Dionysios 29 | Efraimios 30 | Elevtherios 31 | Emmanouil 32 | Emmanuil 33 | Epameinondas 34 | Errikos 35 | Evangelos 36 | Evgenios 37 | Evstratios 38 | Filimon 39 | Fotios 40 | Fotis 41 | Frixos 42 | Gavriil 43 | Georgios 44 | Gerasimos 45 | Giannis 46 | Giannos 47 | Giorgis 48 | Giorgos 49 | Gogos 50 | Grigorios 51 | Grigoris 52 | Iakovos 53 | Ilias 54 | Ioannis 55 | Ippokratis 56 | Khambis 57 | Kharalambos 58 | Kharilaos 59 | Khristoforos 60 | Khristos 61 | Konstandinos 62 | Kostas 63 | Kostis 64 | Kotsos 65 | Kyriakos 66 | Lambos 67 | Lazaros 68 | Lefteris 69 | Lefteris 70 | Leftheris 71 | Lephteris 72 | Lephteris 73 | Lephtheris 74 | Leonidas 75 | Loukas 76 | Lukas 77 | Makis 78 | Manolis 79 | Manos 80 | Margaritis 81 | Marios 82 | Markos 83 | Matthaios 84 | Menelaos 85 | Menios 86 | Mikhail 87 | Mikhalis 88 | Mikhos 89 | Miltiadis 90 | Miltos 91 | Mimis 92 | Mitsos 93 | Neofytos 94 | Nikiforos 95 | Nikolaos 96 | Nikos 97 | Nionios 98 | Odyssevs 99 | Othon 100 | Panagiotis 101 | Pandazis 102 | Panos 103 | Pavlos 104 | Pavsanias 105 | Periklis 106 | Petros 107 | Photios 108 | Photis 109 | Prokopios 110 | Prokopis 111 | Savvas 112 | Sofoklis 113 | Sophoklis 114 | Sofos 115 | Sophos 116 | Sokratis 117 | Solon 118 | Spyridon 119 | Spyros 120 | Stamatios 121 | Stamatis 122 | Stavros 123 | Stefanos 124 | Stephanos 125 | Stefos 126 | Stephos 127 | Stratis 128 | Takis 129 | Takis 130 | Tassos 131 | Telis 132 | Thanasis 133 | Thanos 134 | Themis 135 | Themistoklis 136 | Theodoros 137 | Theodosios 138 | Theodosis 139 | Thodoris 140 | Thomas 141 | Tilemakhos 142 | Tonis 143 | Triandafyllos 144 | Vangelis 145 | Vangos 146 | Vasileios 147 | Vassos 148 | Xenofon 149 | Xenos 150 | Yannis 151 | Zakharias 152 | Zisis 153 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/HungaryFemaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Ada 2 | Adél 3 | Ádi 4 | Adrienne 5 | Ági 6 | Ágnes 7 | Ágota 8 | Alexa 9 | Alexandra 10 | Alice 11 | Aliz 12 | Alizka 13 | Amália 14 | Anci 15 | Ancsi 16 | Angela 17 | Angyalka 18 | Anikó 19 | Anna 20 | Anni 21 | Annus 22 | Annusa 23 | Aranka 24 | Auguszta 25 | Aurelia 26 | Bábá 27 | Babi 28 | Bébi 29 | Bella 30 | Berta 31 | Bertácska 32 | Bertus 33 | Bertuska 34 | Betti 35 | Bianca 36 | Blanka 37 | Bora 38 | Borbála 39 | Borcsa 40 | Borin 41 | Boriska 42 | Borka 43 | Borsála 44 | Bske 45 | Bski 46 | Bzsi 47 | Buba 48 | Cecilia 49 | Cill 50 | Czeczilia 51 | Dódi 52 | Dóra 53 | Dorottya 54 | Duci 55 | Edit 56 | Elvira 57 | Elza 58 | Emese 59 | Emma 60 | Emmi 61 | Emmus 62 | Emmuska 63 | Emke 64 | Enik 65 | Erzsébet 66 | Erzsi 67 | Erzsike 68 | Erzsók 69 | Eszter 70 | Eszti 71 | Eta 72 | Etel 73 | Etelka 74 | Etus 75 | Éva 76 | Évácska 77 | Évi 78 | Evike 79 | Ferike 80 | Franci 81 | Franciska 82 | Frederika 83 | Frici 84 | Frida 85 | Gabi 86 | Gabriella 87 | Georgina 88 | Gertrud 89 | Gizella 90 | Gizi 91 | Gizike 92 | Gizus 93 | Guszti 94 | Gyöngyi 95 | Gyöngyvér 96 | Györgyike 97 | Hajnal 98 | Hajnalka 99 | Hédi 100 | Hedvig 101 | Iboyka 102 | Iboyla 103 | Ica 104 | Ila 105 | Ildikó 106 | Ili 107 | Ilka 108 | Ilma 109 | Ilon 110 | Ilona, Illona 111 | Ilonka 112 | Ilu 113 | Ilus 114 | Iluska 115 | Irén 116 | Irénke 117 | Irma 118 | Irmus 119 | Irmuska 120 | Izabella 121 | Janka 122 | Johanna 123 | Jolán 124 | Jolánka 125 | Joli 126 | Jozsa 127 | Juci 128 | Jucika 129 | Judit 130 | Julcsa 131 | Júlia 132 | Julianna 133 | Juliska 134 | Jutka 135 | Karola 136 | Karlin 137 | Karolina 138 | Kata 139 | Katalin 140 | Kati 141 | Katica 142 | Katinka 143 | Kató 144 | Katóka 145 | Katus 146 | Klára 147 | Klári 148 | Klarika 149 | Klarisza 150 | Kriska 151 | Kriszta 152 | Krisztina 153 | Lenczi 154 | Lenke 155 | Leonia 156 | Leopoldina 157 | Lici 158 | Lily 159 | Lipótka 160 | Lujza 161 | Lujzi 162 | Lujzika 163 | Magda 164 | Magdi 165 | Magdolna 166 | Málcsi 167 | Máli 168 | Málika 169 | Malvin 170 | Malvina 171 | Mara 172 | Marcsa 173 | Margit 174 | Margita 175 | Margó 176 | Mari 177 | Mária 178 | Marika 179 | Mariaka 180 | Márta 181 | Mártus 182 | Mártuska 183 | Melánia 184 | Nina 185 | Ninácska 186 | Nusi 187 | Olga 188 | Olgácska 189 | Oli 190 | Olivia 191 | Orsolya 192 | Ottilia 193 | Pali 194 | Palika 195 | Palkó 196 | Paula 197 | Paulina 198 | Piri 199 | Piroska 200 | Rézi 201 | Riza 202 | Rizus, Rizuska 203 | Róza 204 | Rozsi 205 | Rozsika 206 | Sára 207 | Sari 208 | Sarika 209 | Sarolta 210 | Sasa 211 | Szeréna 212 | Szidónia 213 | Teca 214 | Tercsa 215 | Teres 216 | Teréz 217 | Terezia 218 | Teri 219 | Terike 220 | Terus 221 | Teruska 222 | Trézsia 223 | Tünde 224 | Valéria 225 | Vali 226 | Vilma 227 | Virág 228 | Zoetar(?) 229 | Zseni 230 | Zsofi 231 | Zsófia 232 | Zsófika 233 | Zsóka 234 | Zsuska 235 | Zsusko 236 | Zsuzsa 237 | Zsuzsanna 238 | Zsuzsi 239 | Zsuzsika 240 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/HungaryMaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Ádám 2 | Ádi 3 | Adorján 4 | Ákos 5 | Aladár 6 | Alajos 7 | Albert 8 | Albion 9 | Alfréd 10 | Ali 11 | Álmos 12 | Andi 13 | Andor 14 | András 15 | Andris 16 | Antal 17 | Anti 18 | Arisztid 19 | Armin 20 | Árpád 21 | Ártur 22 | Attila 23 | Balázs 24 | Bálint 25 | Bandi 26 | Béla 27 | Bélus 28 | Béluska 29 | Benci 30 | Benedek 31 | Berci 32 | Bernát 33 | Bertalan 34 | Berti 35 | Bodi 36 | Boldizsár 37 | Botond 38 | Bulcsú 39 | Csaba 40 | Dani 41 | Dániel 42 | Dávid 43 | Dedó 44 | Demeter 45 | Dezsö 46 | Döme 47 | Döm 48 | Duci 49 | Ede 50 | Edi 51 | Elek 52 | Elemér 53 | Eli 54 | Elkán 55 | Emánuel 56 | Endre 57 | Ern 58 | Ferenc 59 | Ferencz 60 | Feri 61 | Ferke 62 | Ferkó 63 | Florian 64 | Frici 65 | Frigyes 66 | Fülöp, Fülöpp 67 | Gabi 68 | Gábor 69 | Gabris 70 | Gáspár 71 | Gazsi 72 | Géjza 73 | Gellért 74 | Gergely 75 | Gerg 76 | Ger 77 | Géza 78 | Gézuka 79 | Gézus 80 | Gotlíb 81 | Gottlieb 82 | Gusztáv 83 | György 84 | Gyz 85 | Gyula 86 | Gyulus 87 | Gyuri 88 | Gyurka 89 | Gyuszi 90 | Henrik 91 | Hugo 92 | Ignac, Ignacz 93 | Illés 94 | Imre 95 | Imrus 96 | Isti 97 | István 98 | Izidor 99 | Jakab 100 | Jákob 101 | Jancsi 102 | Jani 103 | Janika 104 | Janko 105 | János 106 | Jenci 107 | Jen 108 | Jensi 109 | Jeromos 110 | Joakim 111 | Jóska 112 | Jószef 113 | Józsi 114 | Kálmán 115 | Kamill 116 | Karcsi 117 | Kari 118 | Károly 119 | Kelemen 120 | Konrád 121 | Kornél 122 | Kristóf 123 | Kurt 124 | Laci 125 | Lacko 126 | Lajcsi 127 | Laji 128 | Lajkó 129 | Lajos 130 | Lala 131 | Lali 132 | László 133 | Lehel 134 | Lekszi 135 | Leo 136 | Lenci 137 | Levente 138 | Lipót 139 | Lóránd, Lóránt 140 | Lrinc 141 | Lukács 142 | Makszi 143 | Marcell 144 | Marci 145 | Marcilka 146 | Markus 147 | Márton 148 | Máté 149 | Mátyás 150 | Maxi 151 | Micu 152 | Miklos 153 | Miksa 154 | Mikulás 155 | Miska 156 | Misi 157 | Mór 158 | Móricz 159 | Nándi 160 | Nándor 161 | Néci 162 | Niki 163 | Ödi 164 | Ödön 165 | Oli 166 | Olivér 167 | Orban 168 | Oszi 169 | Pál 170 | Pali 171 | Palika 172 | Palko 173 | see Péter 174 | Pista 175 | Pisti 176 | Pistika 177 | Pistuka 178 | Pityu 179 | Poldi 180 | Rezs 181 | Richard 182 | Riczi 183 | Robert 184 | Robi 185 | Rudi 186 | Salamon 187 | Samu 188 | Sándor 189 | Sanyi 190 | Sebestyén 191 | Seb 192 | Simi 193 | Simon 194 | Soma 195 | Szabolcs 196 | Szilard 197 | Tamás 198 | Tíbor 199 | Tihamér 200 | Tivadar 201 | Tomi 202 | Tóni 203 | Vencel 204 | Vendel 205 | Vidor 206 | Viktor 207 | Vili 208 | Vilmos 209 | Vince 210 | Vinci 211 | Zoltán 212 | Zsiga 213 | Zsigmond 214 | Zsolt 215 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/IranFemaleUTF8.csv: -------------------------------------------------------------------------------- 1 | afareen 2 | afsaneh 3 | afsar 4 | afshan 5 | afsoon 6 | aghigh 7 | ahou 8 | ahoo 9 | akhtar 10 | akram 11 | alaleh 12 | anahita 13 | anusheh 14 | anoosheh 15 | ara 16 | arezoo 17 | arghavan 18 | armaghan 19 | ashraf 20 | asa 21 | asal 22 | assieh 23 | atefeh 24 | atifeh 25 | atoosa 26 | ava 27 | avizeh 28 | azadeh 29 | azin 30 | azita 31 | azar 32 | bahar 33 | baharak 34 | bahareh 35 | bahamin 36 | bahameen 37 | banafsheh 38 | banou 39 | beeta 40 | bita 41 | behnaz 42 | behrokh 43 | behbaha 44 | bolour 45 | bousseh 46 | chalipa 47 | darya 48 | delaram 49 | delbar 50 | delkash 51 | deena 52 | dina 53 | donya 54 | dorri 55 | ehteram 56 | elaheh 57 | elham 58 | elnaz 59 | fakhri 60 | farah 61 | farahnaz 62 | farangis 63 | fariba 64 | farideh 65 | farkhondeh 66 | farrin 67 | farzaneh 68 | faranak 69 | farnaz 70 | fatemeh 71 | fereshteh 72 | fila 73 | firouzeh 74 | feerouzeh 75 | fojan 76 | fozhan 77 | forough 78 | forouzan 79 | forouzandeh 80 | gelareh 81 | ghamzeh 82 | ghassedak 83 | ghazal 84 | ghazaleh 85 | ghodsi 86 | ghoncheh 87 | gisou 88 | geesou 89 | gita 90 | guita 91 | giti 92 | guiti 93 | golbahar 94 | goli 95 | golnar 96 | golnaz 97 | golnessa 98 | golpari 99 | golshan 100 | gordia 101 | habibeh 102 | haideh 103 | haleh 104 | hamideh 105 | hastee 106 | hasti 107 | hediyeh 108 | hengameh 109 | hoda 110 | homa 111 | homeira 112 | hormat 113 | houri 114 | iman 115 | iran 116 | iran-dokht 117 | jannat 118 | jamileh 119 | javaneh 120 | katayoun 121 | khandan 122 | khatereh 123 | khorsheed 124 | khojassteh 125 | kimiya 126 | kobra 127 | kowkab 128 | kokab 129 | ladan 130 | laleh 131 | leyla 132 | laila 133 | leila 134 | leily 135 | layly 136 | laily 137 | lida 138 | lila 139 | lily 140 | lili 141 | mahasti 142 | mahdokht 143 | mahlagha 144 | mahlegha 145 | maheen 146 | mahin 147 | mahrokh 148 | mahkameh 149 | mahnaz 150 | mahnoosh 151 | mahsheed 152 | mahsa 153 | mahta 154 | mahtab 155 | mahvash 156 | malakeh 157 | maliheh 158 | mana 159 | mandana 160 | manee 161 | mani 162 | manizheh 163 | marjan 164 | marjaneh 165 | marmar 166 | maryam 167 | mariam 168 | marzieh 169 | masoumeh 170 | mastaneh 171 | mastoureh 172 | mehrangiz 173 | mehrnaz 174 | mehrnoosh 175 | mehry 176 | meshia 177 | mina 178 | minoo 179 | mitra 180 | mozhgan 181 | mozhdeh 182 | mona 183 | moneer 184 | monir 185 | moneereh 186 | monireh 187 | morvareed 188 | nadereh 189 | naghmeh 190 | nahal 191 | naheed 192 | nahid 193 | nargess 194 | naseem 195 | nasim 196 | nastaran 197 | nasreen 198 | nasrin 199 | nava 200 | nayyer 201 | nazafarin 202 | nazanin 203 | nazgol 204 | nazhin 205 | nazy 206 | nazilla 207 | negeen 208 | negin 209 | negar 210 | negah 211 | neda 212 | neshat 213 | niloufar 214 | niki 215 | nikoo 216 | nikou 217 | niyoosha 218 | noor 219 | nour 220 | noushin 221 | noushafarin 222 | oldooz 223 | omeed 224 | omid 225 | oranous 226 | orkideh 227 | padideh 228 | parand 229 | parastoo 230 | paree 231 | pari 232 | pareechehr 233 | pareerou 234 | pareesa 235 | parisa 236 | pareevash 237 | pareeya 238 | parto 239 | parvaneh 240 | parvin 241 | pegah 242 | peymaneh 243 | peyvand 244 | pouneh 245 | pooneh 246 | poupak 247 | pouran 248 | pouran-dokht 249 | pouri 250 | raha 251 | rana 252 | ramesh 253 | rasa 254 | ravan 255 | rima 256 | reema 257 | reyhaneh 258 | rayhaneh 259 | robabeh 260 | roshanak 261 | roudabeh 262 | roxana 263 | roksana 264 | roya 265 | saba 266 | sadaf 267 | saeedeh 268 | sahar 269 | sahba 270 | saghar 271 | salma 272 | salomeh 273 | saman 274 | samila 275 | samin 276 | sameen 277 | samira 278 | samireh 279 | sanam 280 | sanaz 281 | sara 282 | sarvenaz 283 | sayeh 284 | seema 285 | sima 286 | seeta 287 | sita 288 | sepeedeh 289 | sepideh 290 | setareh 291 | shadan 292 | shadee 293 | shadi 294 | shabnam 295 | shaghayegh 296 | shahin 297 | shaheen 298 | shahla 299 | shahzadeh 300 | shahnaz 301 | shahrbanou 302 | shahrnaz 303 | shahrzad 304 | shalizeh 305 | shams 306 | sharareh 307 | sheefteh 308 | shervin 309 | sheyda 310 | sheeva 311 | shiva 312 | shideh 313 | sheedeh 314 | shima 315 | shirin 316 | shireen 317 | shirin-banoo 318 | sholeh 319 | shohreh 320 | shokoufeh 321 | shokouh 322 | shouka 323 | simin 324 | sogand 325 | soheila 326 | soraya 327 | soudabeh 328 | soulmaz 329 | souri 330 | suri 331 | sussan 332 | soussan 333 | souzan 334 | tahereh 335 | tahmineh 336 | tala 337 | talayeh 338 | tannaz 339 | tara 340 | taraneh 341 | tarsa 342 | tayyebeh 343 | teena 344 | tina 345 | touba 346 | tooba 347 | touca 348 | touran 349 | vanda 350 | vida 351 | veeda 352 | yalda 353 | yasaman 354 | yass 355 | yeganeh 356 | yekta 357 | zahra 358 | zari 359 | zarrin 360 | zarrin-dokht 361 | zeeba 362 | ziba 363 | zhaleh 364 | jaleh 365 | zhila 366 | jilla 367 | zohreh 368 | zoya 369 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/IranMaleUTF8.csv: -------------------------------------------------------------------------------- 1 | abbas 2 | abou-ali 3 | abouali 4 | abtin 5 | adel 6 | afshar 7 | afshin 8 | ahmad 9 | akbar 10 | ali 11 | ali-dad 12 | alidad 13 | amin 14 | amir 15 | amjad 16 | anoush 17 | anoushiravan 18 | arad 19 | aram 20 | arash 21 | ardalan 22 | ardavan 23 | ardeshir 24 | aref 25 | arman 26 | armeen 27 | arsalan 28 | arshia 29 | arya 30 | arzhang 31 | asad 32 | asghar 33 | ashkan 34 | atash 35 | aurang 36 | orang 37 | azad 38 | babak 39 | bahman 40 | bahram 41 | bamdad 42 | bameen 43 | bamshad 44 | bardia 45 | behnam 46 | behrad 47 | behrang 48 | behrouz 49 | behzad 50 | bizhan 51 | bijan 52 | borna 53 | borzoo 54 | bozorgmehr 55 | changeez 56 | cirrus 57 | cyrus 58 | dadbeh 59 | danush 60 | dara 61 | darab 62 | dariush 63 | daryush 64 | davood 65 | ebi 66 | ebrahim 67 | ehsan 68 | emad 69 | esfandyar 70 | esfandiar 71 | esmaeel 72 | faramarz 73 | faraz 74 | farbod 75 | fardad 76 | fardin 77 | farhad 78 | farhang 79 | fariborz 80 | farid 81 | farjad 82 | farrokh 83 | farrokhzad 84 | farshad 85 | farshid 86 | farsheed 87 | farzad 88 | farzam 89 | farzan 90 | farzin 91 | farzeen 92 | ferdows 93 | fereydoon 94 | firouz 95 | foroohar 96 | foroud 97 | forood 98 | giv 99 | ghobad 100 | goshtasb 101 | goudarz 102 | habib 103 | hadi 104 | hafez 105 | hamed 106 | hami 107 | hamid 108 | hassan 109 | hedayat 110 | heerad 111 | hirad 112 | hesam 113 | heydar 114 | homayoon 115 | hooman 116 | hooman 117 | human 118 | hooshang 119 | hooshmand 120 | hooshyar 121 | hootan 122 | hormoz 123 | hossein 124 | iraj 125 | iman 126 | jahandar 127 | jahangir 128 | jahanshah 129 | jafar 130 | jalal 131 | jalil 132 | jamshid 133 | javad 134 | javeed 135 | kambiz 136 | kamran 137 | kamshad 138 | kamyar 139 | kamal 140 | karim 141 | kasra 142 | kaveh 143 | kavoos 144 | key-ghobad 145 | keyghobad 146 | key-khosrow 147 | keykhosrow 148 | keyvan 149 | kayvan 150 | khashayar 151 | khoda-dad 152 | khodadad 153 | khosrow 154 | kia 155 | kian 156 | kiyan 157 | kianoosh 158 | kiarash 159 | kiumars 160 | keyumars 161 | koohyar 162 | koosha 163 | kourosh 164 | kouros 165 | makan 166 | mahbod 167 | mahmood 168 | mahmoud 169 | mahyar 170 | majid 171 | manee 172 | mani 173 | manouchehr 174 | mansoor 175 | massoud 176 | maziar 177 | mehdi 178 | mahdi 179 | mehrab 180 | mehrak 181 | mehran 182 | mehrang 183 | mehrdad 184 | mehrzad 185 | milad 186 | mohammad 187 | mohsen 188 | mojtaba 189 | mujtaba 190 | morad 191 | morteza 192 | mustafa 193 | nader 194 | namdar 195 | namvar 196 | nariman 197 | naser 198 | navid 199 | nima 200 | niyoosha 201 | nouri 202 | noushzad 203 | omid 204 | omeed 205 | parham 206 | parsa 207 | parviz 208 | pasha 209 | payam 210 | pezhman 211 | pejman 212 | puzhman 213 | pujman 214 | peyman 215 | pirooz 216 | piruz 217 | pouriya 218 | pouya 219 | rahim 220 | rakhshan 221 | rambod 222 | ramin 223 | ramtin 224 | rashid 225 | rasheed 226 | reza 227 | roozbeh 228 | rostam 229 | sadegh 230 | sadra 231 | sadri 232 | saeed 233 | salar 234 | sam 235 | salman 236 | saman 237 | sami 238 | sanjar 239 | sasan 240 | sattar 241 | sepehr 242 | shahab 243 | shahbaz 244 | shaheen 245 | shahkam 246 | shahram 247 | shahrdad 248 | shahriar 249 | shahryar 250 | shahrokh 251 | shahruz 252 | shahrooz 253 | shahyar 254 | shapour 255 | shaya 256 | shayan 257 | shervin 258 | sherveen 259 | siamak 260 | siavosh 261 | siavash 262 | sina 263 | soheil 264 | sohrab 265 | soroush 266 | sorush 267 | taher 268 | tahmouress 269 | tahmaseb 270 | teymour 271 | tirdad 272 | touraj 273 | tooraj 274 | vafa 275 | varshasb 276 | vishtasb 277 | yaghoub 278 | yahya 279 | yashar 280 | youness 281 | yousef 282 | zakaria 283 | zal 284 | zamyad 285 | zand 286 | zartosht 287 | zia 288 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/IsraelFemaleUTF8.csv: -------------------------------------------------------------------------------- 1 | abigail 2 | avigayil 3 | abijah 4 | aviya 5 | aviyah 6 | abiah 7 | abishag 8 | avishag 9 | abital 10 | avital 11 | adah 12 | ada 13 | adaiah 14 | adaya 15 | addar 16 | adar 17 | adi 18 | adina 19 | adiva 20 | adva 21 | agam 22 | aharona 23 | ahinoam 24 | achinoam 25 | achinoam 26 | ahuva 27 | aliza 28 | alma 29 | almog 30 | aluma 31 | amalya 32 | amalia 33 | amira 34 | amit 35 | amith 36 | amiya 37 | ammiya 38 | anael 39 | anael 40 | anath 41 | anat 42 | arava 43 | arabah 44 | ariel 45 | ariel 46 | ariela 47 | ariela 48 | arnona 49 | asenath 50 | asnat 51 | osnat 52 | asia 53 | asya 54 | atarah 55 | atara 56 | ateret 57 | athaliah 58 | atalya 59 | atalia 60 | aviv 61 | aviva 62 | avivit 63 | aya 64 | aiah 65 | ayala 66 | ayelet 67 | bar 68 | bashemath 69 | basmat 70 | bosmat 71 | bat-ami 72 | bat-ammi 73 | batel 74 | batel 75 | bat-hen 76 | bat-chen 77 | bath-sheba 78 | bat-sheva 79 | bat-shahar 80 | bat-shachar 81 | batya 82 | bat-zion 83 | bat-tziyon 84 | behira 85 | behirah 86 | bilhah 87 | bilha 88 | bina 89 | binah 90 | bithyah 91 | bitya 92 | bracha 93 | berachah 94 | brurit 95 | brurya 96 | bruria 97 | bsora 98 | bukki 99 | buki 100 | carmel 101 | karmel 102 | carmela 103 | karmela 104 | carmit 105 | karmit 106 | carmiya 107 | carmiyya 108 | karmiya 109 | dafna 110 | daphna 111 | dalit 112 | daliya 113 | dalia 114 | dalya 115 | dana 116 | daniel 117 | daniela 118 | danit 119 | datia 120 | datya 121 | davida 122 | deborah 123 | dvora 124 | dganit 125 | deganit 126 | diklah 127 | dikla 128 | dinah 129 | dina 130 | dor 131 | dora 132 | dori 133 | dorit 134 | doriya 135 | doriyya 136 | doron 137 | drora 138 | derora 139 | drorit 140 | derorit 141 | dvorit 142 | eden 143 | edna 144 | eilat 145 | elath 146 | eilona 147 | elona 148 | einat 149 | einat 150 | elah 151 | ela 152 | eliora 153 | eliora 154 | eliraz 155 | elisheba 156 | elisheva 157 | eliya 158 | emuna 159 | emunah 160 | enav 161 | einav 162 | ephrath 163 | efrat 164 | erela 165 | erela 166 | esther 167 | ester 168 | etti 169 | eti 170 | etty 171 | eve 172 | chava 173 | hava 174 | gabi 175 | gabbi 176 | gabby 177 | gaia 178 | gaya 179 | gal 180 | galila 181 | galit 182 | galya 183 | gania 184 | ganya 185 | ganit 186 | gavriela 187 | gavriela 188 | geula 189 | geula 190 | gidona 191 | gidona 192 | gil 193 | gila 194 | gilat 195 | gili 196 | gilit 197 | ginath 198 | ginat 199 | golda 200 | goni 201 | gur 202 | guy 203 | gay 204 | gvira 205 | hadar 206 | hadara 207 | hadas 208 | hadassah 209 | hadasa 210 | hagar 211 | hagara 212 | haggith 213 | chagit 214 | hamutal 215 | chamutal 216 | hanita 217 | chanita 218 | hannah 219 | chana 220 | hanny 221 | hanni 222 | channy 223 | harduf 224 | hardoof 225 | harela 226 | harela 227 | hasia 228 | hasya 229 | chasia 230 | hasida 231 | chasida 232 | havatselet 233 | havazelet 234 | chavatzelet 235 | haviva 236 | chaviva 237 | haya 238 | chaya 239 | hayuta 240 | hayyuta 241 | chayuta 242 | heda 243 | hedva 244 | chedva 245 | hemda 246 | chemda 247 | hen 248 | chen 249 | hephzi 250 | bah 251 | hefziba 252 | heftziba 253 | hermona 254 | chermona 255 | herut 256 | cherut 257 | heruta 258 | cheruta 259 | hila 260 | hilla 261 | hilit 262 | hillit 263 | hodaya 264 | huldah 265 | hulda 266 | chulda 267 | idan 268 | idit 269 | iddit 270 | ilai 271 | ilay 272 | ilana 273 | ilanit 274 | ilil 275 | ilil 276 | imanuela 277 | immanuela 278 | imanuela 279 | immanuel 280 | emanuel 281 | imanuel 282 | imanuel 283 | inbal 284 | inbar 285 | iris 286 | irit 287 | israela 288 | yisraela 289 | israela 290 | isreela 291 | ivriya 292 | izhar 293 | yitzhar 294 | jaakobah 295 | yaakova 296 | yaakova 297 | jael 298 | yael 299 | yael 300 | jarah 301 | yaara 302 | yaara 303 | jedidah 304 | yedida 305 | jemima 306 | yemima 307 | jochebed 308 | yocheved 309 | yokheved 310 | jonah 311 | yona 312 | jordan 313 | yarden 314 | jubal 315 | yuval 316 | judith 317 | yehudit 318 | kalanit 319 | kallanit 320 | keren 321 | keshet 322 | kineret 323 | klila 324 | kelila 325 | kochava 326 | lali 327 | leah 328 | lea 329 | lea 330 | lebanah 331 | levana 332 | levia 333 | levia 334 | liat 335 | liat 336 | libbi 337 | lidor 338 | liel 339 | liel 340 | lihi 341 | lilach 342 | limor 343 | lion 344 | lion 345 | lior 346 | lior 347 | liora 348 | liora 349 | liran 350 | lirit 351 | liron 352 | lital 353 | lotan 354 | lotem 355 | maayan 356 | maayan 357 | malka 358 | maor 359 | maor 360 | margalit 361 | marganit 362 | marit 363 | mattan 364 | matan 365 | may 366 | mai 367 | maya 368 | mazal 369 | meira 370 | meira 371 | meital 372 | merab 373 | merav 374 | meshi 375 | metuka 376 | michaela 377 | michaela 378 | michal 379 | michelle 380 | mika 381 | miki 382 | micky 383 | mira 384 | miri 385 | miriam 386 | miryam 387 | mirit 388 | mor 389 | moran 390 | morel 391 | morel 392 | moria 393 | moriah 394 | moriya 395 | moriel 396 | moriel 397 | moshit 398 | naamah 399 | naama 400 | naama 401 | naomi 402 | noomi 403 | narkis 404 | natalie 405 | natali 406 | nataly 407 | nava 408 | nehama 409 | nechama 410 | nehamit 411 | nechamit 412 | neri 413 | neta 414 | netanela 415 | netanela 416 | nili 417 | nina 418 | nira 419 | nirel 420 | nirel 421 | nirit 422 | nisan 423 | nitza 424 | nitsa 425 | nitzan 426 | nitsan 427 | niva 428 | noah 429 | noa 430 | noa 431 | noam 432 | noam 433 | nogah 434 | noga 435 | noy 436 | noya 437 | nuphar 438 | nufar 439 | nurith 440 | nurit 441 | odeda 442 | ofri 443 | ophri 444 | omer 445 | ophir 446 | ofir 447 | ophira 448 | ofira 449 | ophrah 450 | ofra 451 | or 452 | ora 453 | oranit 454 | orel 455 | orel 456 | orit 457 | orli 458 | orlie 459 | orna 460 | ornit 461 | ortal 462 | oshra 463 | oshrat 464 | oshri 465 | oshrit 466 | paz 467 | pazit 468 | peer 469 | peninnah 470 | pnina 471 | puah 472 | pua 473 | pua 474 | rachel 475 | rakefet 476 | rama 477 | rami 478 | ranit 479 | ravit 480 | raviv 481 | raya 482 | raaya 483 | raya 484 | raz 485 | rebekah 486 | rivka 487 | rechela 488 | rehela 489 | renana 490 | rephaela 491 | refaela 492 | refaela 493 | reumah 494 | reuma 495 | reut 496 | reut 497 | revital 498 | reviva 499 | rimmon 500 | rimon 501 | rimona 502 | rimmona 503 | rinat 504 | rinnah 505 | rina 506 | romi 507 | rona 508 | rona 509 | li 510 | rona 511 | li 512 | roni 513 | ronit 514 | ronli 515 | rotem 516 | ruhamah 517 | ruhama 518 | ruchama 519 | ruth 520 | rut 521 | sagie 522 | sagi 523 | sagit 524 | salit 525 | salit 526 | sapir 527 | sappir 528 | sarah 529 | sara 530 | sarai 531 | sari 532 | sarit 533 | savyon 534 | savion 535 | shahaf 536 | shachaf 537 | shahar 538 | shachar 539 | shai 540 | shay 541 | shaked 542 | shalhevet 543 | shalva 544 | shamira 545 | shani 546 | sharon 547 | sharona 548 | shavit 549 | sheli 550 | shelly 551 | shelomith 552 | shlomit 553 | shikma 554 | shimona 555 | shimona 556 | shimrith 557 | shimrit 558 | shiphrah 559 | shiphra 560 | shifra 561 | shir 562 | shira 563 | shiran 564 | shiraz 565 | shirel 566 | shirel 567 | shirit 568 | shirli 569 | shirly 570 | shirley 571 | shobal 572 | shoval 573 | shoham 574 | shoshana 575 | shula 576 | shulamit 577 | shunit 578 | achiyah 579 | sigal 580 | sigalit 581 | sima 582 | simha 583 | simcha 584 | simona 585 | sinaya 586 | sinaia 587 | sivan 588 | sivana 589 | smadar 590 | semadar 591 | snait 592 | senait 593 | stav 594 | tal 595 | tal 596 | or 597 | talor 598 | tala 599 | tali 600 | talli 601 | talia 602 | talya 603 | talila 604 | tallila 605 | talma 606 | tamar 607 | tamara 608 | temara 609 | tami 610 | tammi 611 | tammy 612 | tchelet 613 | tchiya 614 | tehiyya 615 | tehila 616 | tehilla 617 | tehillah 618 | tidhar 619 | tikvah 620 | tikva 621 | timna 622 | timnah 623 | tirza 624 | tirzah 625 | tirtza 626 | tirtsa 627 | tmima 628 | temima 629 | tmira 630 | tmira 631 | temira 632 | tom 633 | tova 634 | tsabar 635 | tsedef 636 | zedef 637 | tslil 638 | tzofit 639 | tsofit 640 | tzofiya 641 | tsofiya 642 | tzufit 643 | tsufit 644 | tsoofit 645 | uriela 646 | uriela 647 | varda 648 | vardit 649 | vered 650 | yaarit 651 | yaarit 652 | yafa 653 | yafit 654 | yahel 655 | yaheli 656 | yaira 657 | yaira 658 | yakira 659 | yakkira 660 | yam 661 | yamit 662 | yammit 663 | yardena 664 | yasmin 665 | yeela 666 | yeela 667 | yifat 668 | yifat 669 | yoela 670 | yoela 671 | yonat 672 | yonina 673 | yonit 674 | yosepha 675 | yosefa 676 | yuli 677 | zafrira 678 | tzafrira 679 | tsafrira 680 | zaharira 681 | zeela 682 | zeela 683 | tseela 684 | tseela 685 | zeeva 686 | zeeva 687 | zehava 688 | zehavit 689 | zeruiah 690 | zeruya 691 | tzruya 692 | tsruya 693 | zibiah 694 | zivia 695 | zivya 696 | zillah 697 | tzila 698 | tsila 699 | ziona 700 | tziyona 701 | zipporah 702 | tsipora 703 | ziv 704 | ziva 705 | zivit 706 | zohar 707 | zohara 708 | zoharit 709 | zuph 710 | tsuf 711 | tzuf 712 | tsoof 713 | zviya 714 | tsviya 715 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/ItalyFemaleUTF8.csv: -------------------------------------------------------------------------------- 1 | GIULIA;9.624;3,47;3,47 2 | SOFIA;8.972;3,23;6,7 3 | MARTINA;7.327;2,64;9,34 4 | SARA;7.119;2,57;11,91 5 | CHIARA;6.457;2,33;14,24 6 | GIORGIA;5.770;2,08;16,31 7 | AURORA;5.086;1,83;18,15 8 | ALESSIA;4.996;1,8;19,95 9 | FRANCESCA;4.379;1,58;21,53 10 | ALICE;4.324;1,56;23,09 11 | ANNA;3.850;1,39;24,47 12 | ELISA;3.617;1,3;25,78 13 | GIADA;3.484;1,26;27,03 14 | EMMA;3.166;1,14;28,17 15 | MATILDE;3.052;1,1;29,27 16 | GAIA;3.042;1,1;30,37 17 | ELENA;2.944;1,06;31,43 18 | BEATRICE;2.784;1;32,43 19 | NOEMI;2.581;0,93;33,36 20 | REBECCA;2.366;0,85;34,22 21 | FEDERICA;2.339;0,84;35,06 22 | ARIANNA;2.326;0,84;35,9 23 | ASIA;2.251;0,81;36,71 24 | GRETA;2.229;0,8;37,51 25 | ILARIA;2.186;0,79;38,3 26 | VITTORIA;2.082;0,75;39,05 27 | LUDOVICA;2.080;0,75;39,8 28 | VALENTINA;2.030;0,73;40,53 29 | MARTA;2.023;0,73;41,26 30 | NICOLE;2.001;0,72;41,98 31 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/ItalyMaleUTF8.csv: -------------------------------------------------------------------------------- 1 | FRANCESCO;10.156;3,47;3,47 2 | ALESSANDRO;9.247;3,16;6,63 3 | ANDREA;8.596;2,94;9,57 4 | MATTEO;8.460;2,89;12,46 5 | LORENZO;7.703;2,63;15,09 6 | GABRIELE;7.090;2,42;17,51 7 | MATTIA;6.521;2,23;19,74 8 | RICCARDO;5.926;2,02;21,76 9 | DAVIDE;5.583;1,91;23,67 10 | LUCA;5.334;1,82;25,49 11 | MARCO;4.785;1,63;27,13 12 | SIMONE;4.514;1,54;28,67 13 | LEONARDO;4.478;1,53;30,2 14 | GIUSEPPE;4.437;1,52;31,71 15 | FEDERICO;4.382;1,5;33,21 16 | TOMMASO;4.337;1,48;34,69 17 | ANTONIO;4.103;1,4;36,09 18 | GIOVANNI;3.799;1,3;37,39 19 | CHRISTIAN;3.749;1,28;38,67 20 | ALESSIO;3.586;1,23;39,9 21 | FILIPPO;3.498;1,2;41,09 22 | CRISTIAN;3.242;1,11;42,2 23 | SAMUELE;3.228;1,1;43,3 24 | DANIELE;3.085;1,05;44,36 25 | PIETRO;3.047;1,04;45,4 26 | EMANUELE;2.987;1,02;46,42 27 | MICHELE;2.846;0,97;47,39 28 | EDOARDO;2.799;0,96;48,35 29 | NICOLO';2.584;0,88;49,23 30 | GIACOMO;2.328;0,8;50,03 31 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/JapanFemaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Ai 2 | Aika 3 | Aiko 4 | Aimi 5 | Aina 6 | Airi 7 | Akane 8 | Akari 9 | Akemi 10 | Akeno 11 | Aki 12 | Akie 13 | Akiko 14 | Akina 15 | Akiyo 16 | Amane 17 | Ami 18 | Anzu 19 | Aoi 20 | Ariko 21 | Arisa 22 | Asako 23 | Asami 24 | Asuka 25 | Asumi 26 | Asuna 27 | Atsuko 28 | Atsumi 29 | Aya 30 | Ayaka 31 | Ayako 32 | Ayame 33 | Ayami 34 | Ayana 35 | Ayane 36 | Ayano 37 | Ayu 38 | Ayuka 39 | Ayuko 40 | Ayumi 41 | Azumi 42 | Azusa 43 | Chidori 44 | Chie 45 | Chieko 46 | Chiemi 47 | Chigusa 48 | Chiharu 49 | Chiho 50 | Chika 51 | Chikage 52 | Chikako 53 | Chinami 54 | Chinatsu 55 | Chisato 56 | Chitose 57 | Chiya 58 | Chiyako 59 | Chiyo 60 | Chiyoko 61 | Chizuko 62 | Chizuru 63 | Eiko 64 | Eimi 65 | Emi 66 | Emika 67 | Emiko 68 | Emiri 69 | Eri 70 | Erika 71 | Eriko 72 | Erina 73 | Etsuko 74 | Fujie 75 | Fujiko 76 | Fūka 77 | Fukumi 78 | Fumi 79 | Fumie 80 | Fumika 81 | Fumiko 82 | Fumino 83 | Fumiyo 84 | Fusako 85 | Futaba 86 | Fuyuko 87 | Fuyumi 88 | Hana 89 | Hanae 90 | Hanako 91 | Harue 92 | Haruhi 93 | Haruko 94 | Haruna 95 | Haruno 96 | Haruyo 97 | Hasumi 98 | Hatsue 99 | Hatsumi 100 | Hideko 101 | Hidemi 102 | Himawari 103 | Himeko 104 | Hina 105 | Hinako 106 | Hiroe 107 | Hiroka 108 | Hiroko 109 | Hiroyo 110 | Hisa 111 | Hisae 112 | Hisako 113 | Hisayo 114 | Hitomi 115 | Honami 116 | Honoka 117 | Ichiko 118 | Ikue 119 | Ikuko 120 | Ikumi 121 | Ikuyo 122 | Io 123 | Itsuko 124 | Itsumi 125 | Jitsuko 126 | Junko 127 | Juri 128 | Kaguya 129 | Kaho 130 | Kahori 131 | Kahoru 132 | Kana 133 | Kanae 134 | Kanako 135 | Kanami 136 | Kanna 137 | Kanoko 138 | Kaori 139 | Kaoruko 140 | Karen 141 | Karin 142 | Kasumi 143 | Katsuko 144 | Kawai 145 | Kaya 146 | Kayoko 147 | Kazue 148 | Kazuha 149 | Kazuko 150 | Kazusa 151 | Kazuyo 152 | Keiki 153 | Keiko 154 | Kiho 155 | Kiko 156 | Kikue 157 | Kikuko 158 | Kimi 159 | Kimiko 160 | Kinuko 161 | Kira 162 | Kiyoko 163 | Koharu 164 | Komako 165 | Konomi 166 | Kotoe 167 | Kotomi 168 | Kotono 169 | Kotori 170 | Kou 171 | Kozue 172 | Kumi 173 | Kumiko 174 | Kuniko 175 | Kurenai 176 | Kuriko 177 | Kyoko 178 | Maaya 179 | Machi 180 | Machiko 181 | Madoka 182 | Maho 183 | Mai 184 | Maki 185 | Makiko 186 | Mami 187 | Mamiko 188 | Mana 189 | Manaka 190 | Manami 191 | Mao 192 | Mari 193 | Marie 194 | Marika 195 | Mariko 196 | Marina 197 | Masae 198 | Masako 199 | Masayo 200 | Matsuko 201 | Mayako 202 | Mayo 203 | Mayu 204 | Mayuka 205 | Mayuko 206 | Mayumi 207 | Megu 208 | Megumi 209 | Mei 210 | Meiko 211 | Meisa 212 | Michiko 213 | Mie 214 | Mieko 215 | Miharu 216 | Miho 217 | Mihoko 218 | Miiko 219 | Mika 220 | Mikako 221 | Miki 222 | Mikiko 223 | Miku 224 | Mikuru 225 | Mimori 226 | Mina 227 | Minae 228 | Minako 229 | Minami 230 | Mineko 231 | Mio 232 | Miori 233 | Mira 234 | Misaki 235 | Misako 236 | Misato 237 | Misumi 238 | Misuzu 239 | Mitsuki 240 | Mitsuko 241 | Mitsuyo 242 | Miu 243 | Miwa 244 | Miwako 245 | Miya 246 | Miyabi 247 | Miyako 248 | Miyo 249 | Miyoko 250 | Miyoshi 251 | Miyu 252 | Miyū 253 | Miyuki 254 | Miyumi 255 | Mizue 256 | Mizuko 257 | Moe 258 | Moeka 259 | Moeko 260 | Momo 261 | Momoe 262 | Momoka 263 | Momoko 264 | Motoko 265 | Mutsuko 266 | Mutsumi 267 | Nagako 268 | Naho 269 | Nako 270 | Nami 271 | Nana 272 | Nanae 273 | Nanako 274 | Nanami 275 | Nanase 276 | Nao 277 | Naoko 278 | Narumi 279 | Natsue 280 | Natsuko 281 | Natsume 282 | Natsumi 283 | Noa 284 | Nobue 285 | Nobuko 286 | Nodoka 287 | Nonoka 288 | Noriko 289 | Noriyo 290 | Nozomi 291 | Omi 292 | Otoha 293 | Otome 294 | Ran 295 | Ranko 296 | Reika 297 | Reiko 298 | Reina 299 | Rena 300 | Reona 301 | Rie 302 | Rieko 303 | Riho 304 | Rika 305 | Rikako 306 | Riko 307 | Rina 308 | Rino 309 | Rio 310 | Risa 311 | Risako 312 | Ritsuko 313 | Rumi 314 | Rumiko 315 | Runa 316 | Ruri 317 | Ruriko 318 | Ryōka 319 | Ryoko 320 | Sachi 321 | Sachie 322 | Sachiko 323 | Sadako 324 | Sae 325 | Saeko 326 | Saiko 327 | Saki 328 | Sakie 329 | Sakiko 330 | Saku 331 | Sakura 332 | Sakurako 333 | Sanae 334 | Saori 335 | Sari 336 | Satoko 337 | Satomi 338 | Sawa 339 | Sawako 340 | Saya 341 | Sayaka 342 | Sayako 343 | Sayo 344 | Sayoko 345 | Sayumi 346 | Sayuri 347 | Seiko 348 | Setsuko 349 | Shigeko 350 | Shiho 351 | Shihori 352 | Shiina 353 | Shimako 354 | Shinako 355 | Shino 356 | Shiori 357 | Shizue 358 | Shizuko 359 | Shizuru 360 | Shōko 361 | Shuko 362 | Sonoko 363 | Sugako 364 | Sumie 365 | Sumika 366 | Sumiko 367 | Sumire 368 | Suzue 369 | Suzuka 370 | Suzuko 371 | Taeko 372 | Takako 373 | Takayo 374 | Takeko 375 | Tamako 376 | Tamami 377 | Tamao 378 | Tamayo 379 | Tamiko 380 | Tatsuko 381 | Tazuko 382 | Teiko 383 | Teruko 384 | Terumi 385 | Tokiko 386 | Tokuko 387 | Tomie 388 | Tomiko 389 | Tomoka 390 | Tomoko 391 | Tomoyo 392 | Toshiko 393 | Toyoko 394 | Tsukiko 395 | Tsuneko 396 | Tsuru 397 | Umeko 398 | Uta 399 | Waka 400 | Wakako 401 | Wakana 402 | Yae 403 | Yaeko 404 | Yasue 405 | Yasuko 406 | Yayoi 407 | Yoko 408 | Yoriko 409 | Yoshiko 410 | Yoshino 411 | Yui 412 | Yuika 413 | Yuiko 414 | Yuka 415 | Yukako 416 | Yukari 417 | Yukie 418 | Yukika 419 | Yukiko 420 | Yukina 421 | Yukino 422 | Yūko 423 | Yumeko 424 | Yumi 425 | Yumie 426 | Yumika 427 | Yumiko 428 | Yuri 429 | Yuria 430 | Yurie 431 | Yurika 432 | Yuriko 433 | Yurina 434 | Yuumi 435 | Yuuna 436 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/LatviaFemaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Agnese 2 | Aiga 3 | Aija 4 | Aina 5 | Alīda 6 | Alise 7 | Alma 8 | Alvīne 9 | Amālija 10 | Anete 11 | Anita 12 | Anna 13 | Annija 14 | Antoņina 15 | Antra 16 | Ārija 17 | Ausma 18 | Austra 19 | Baba 20 | Baiba 21 | Berta 22 | Biruta 23 | Broņislava 24 | Dace 25 | Daiga 26 | Daina 27 | Dārta 28 | Diāna 29 | Doroteja 30 | Dzidra 31 | Dzintra 32 | Eda 33 | Edīte 34 | Elīna 35 | Elita 36 | Elizabete 37 | Elvīra 38 | Elza 39 | Emīlija 40 | Emma 41 | Ērika 42 | Erna 43 | Eva 44 | Evija 45 | Evita 46 | Gaida 47 | Genovefa 48 | Grēta 49 | Grieta 50 | Gunita 51 | Gunta 52 | Helēna 53 | Ieva 54 | Ilga 55 | Ilona 56 | Ilze 57 | Ina 58 | Ināra 59 | Indra 60 | Inese 61 | Ineta 62 | Inga 63 | Ingrīda 64 | Inguna 65 | Inta 66 | Irēna 67 | Irma 68 | Iveta 69 | Jana 70 | Janina 71 | Jūla 72 | Jūle 73 | Jūlija 74 | Karina 75 | Karlīna 76 | Katarīna 77 | Katrīna 78 | Krista 79 | Kristiāna 80 | Kristīna 81 | Kristīne 82 | Kristīne 83 | Laila 84 | Laura 85 | Lavīze 86 | Leontīne 87 | Lība 88 | Lidija 89 | Liene 90 | Līga 91 | Ligita 92 | Lilija 93 | Lilita 94 | Līna 95 | Linda 96 | Līza 97 | Lizete 98 | Lūcija 99 | Madara 100 | Made 101 | Maija 102 | Māra 103 | Mare 104 | Margareta 105 | Margrieta 106 | Marija 107 | Mārīte 108 | Marta 109 | Maža 110 | Milda 111 | Minna 112 | Mirdza 113 | Monika 114 | Natālija 115 | Olga 116 | Otīlija 117 | Paula 118 | Paulīna 119 | Rasma 120 | Regīna 121 | Rita 122 | Rudīte 123 | Ruta 124 | Rute 125 | Samanta 126 | Sandra 127 | Sanita 128 | Santa 129 | Sapa 130 | Sarmīte 131 | Silvija 132 | Sintija 133 | Skaidrīte 134 | Solvita 135 | Tekla 136 | Trīne 137 | Valda 138 | Valentīna 139 | Valija 140 | Velta 141 | Veneranda 142 | Vera 143 | Veronika 144 | Vija 145 | Vilma 146 | Vineta 147 | Vita 148 | Zane 149 | Zelma 150 | Zenta 151 | Zigrīda 152 | Ada 153 | Adele 154 | Aelita 155 | Agija 156 | Agneta 157 | Agnija 158 | Alda 159 | Andželika 160 | Angelika 161 | Anta 162 | Arita 163 | Armanda 164 | Barbara 165 | Džemma 166 | Elma 167 | Erna 168 | Gerda 169 | Gunda 170 | Guntra 171 | Ģertrūde 172 | Ildze 173 | Ilma 174 | Ira 175 | Irisa 176 | Irita 177 | Julita 178 | Kate 179 | Laine 180 | Lana 181 | Liliāna 182 | Lita 183 | Lonija 184 | Nanija 185 | Nelda 186 | Santra 187 | Sniedze 188 | Tīna 189 | Ulla 190 | Undīne 191 | Viola 192 | Žaklīna 193 | Agate 194 | Agita 195 | Agnese 196 | Agra 197 | Agrita 198 | Aiga 199 | Aija 200 | Aina 201 | Aira 202 | Airita 203 | Aiva 204 | Aivita 205 | Aleksandra 206 | Aleksandrīna 207 | Alina 208 | Alise 209 | Alla 210 | Alma 211 | Alvīna 212 | Alvīne 213 | Alīna 214 | Amanda 215 | Amālija 216 | Anastasija 217 | Ance 218 | Anda 219 | Andra 220 | Andžela 221 | Anete 222 | Anita 223 | Anna 224 | Annija 225 | Antonija 226 | Antoņina 227 | Antra 228 | Anžela 229 | Anželika 230 | Arnita 231 | Arta 232 | Astra 233 | Astrīda 234 | Ausma 235 | Austra 236 | Baba 237 | Baiba 238 | Beatrise 239 | Benita 240 | Betija 241 | Beāte 242 | Biruta 243 | Brigita 244 | Broņislava 245 | Dace 246 | Dagmāra 247 | Dagnija 248 | Daiga 249 | Daina 250 | Dainuvīte 251 | Dana 252 | Daniela 253 | Dina 254 | Dita 255 | Diāna 256 | Doroteja 257 | Dzidra 258 | Dzintra 259 | Dārta 260 | Eda 261 | Edīte 262 | Egija 263 | Egita 264 | Eiženija 265 | Elena 266 | Eleonora 267 | Elga 268 | Elita 269 | Elizabete 270 | Elvīra 271 | Elza 272 | Elēna 273 | Elīna 274 | Elīza 275 | Emma 276 | Emīlija 277 | Enija 278 | Estere 279 | Eva 280 | Evelīna 281 | Evija 282 | Evita 283 | Gaida 284 | Gaļina 285 | Genovefa 286 | Ginta 287 | Gita 288 | Grieta 289 | Guna 290 | Gundega 291 | Gunita 292 | Gunta 293 | Helēna 294 | Ieva 295 | Ilga 296 | Ilona 297 | Iluta 298 | Ilva 299 | Ilze 300 | Ilzīte 301 | Ina 302 | Indra 303 | Inesa 304 | Inese 305 | Ineta 306 | Inga 307 | Ingrīda 308 | Inguna 309 | Ingūna 310 | Inita 311 | Inna 312 | Inta 313 | Ināra 314 | Irma 315 | Irēna 316 | Irīna 317 | Iveta 318 | Jadviga 319 | Jana 320 | Jeļena 321 | Jolanta 322 | Judīte 323 | Justīne 324 | Juta 325 | Jūlija 326 | Karina 327 | Karlīna 328 | Karolīna 329 | Karīna 330 | Katarīna 331 | Katrīna 332 | Keita 333 | Kintija 334 | Kitija 335 | Klaudija 336 | Krista 337 | Kristina 338 | Kristiāna 339 | Kristīna 340 | Kristīne 341 | Ksenija 342 | Laila 343 | Laima 344 | Laimdota 345 | Larisa 346 | Lauma 347 | Laura 348 | Lavīze 349 | Lelde 350 | Lidija 351 | Liene 352 | Lienīte 353 | Liesma 354 | Ligita 355 | Lija 356 | Lilija 357 | Lilita 358 | Linda 359 | Liāna 360 | Lolita 361 | Ludmila 362 | Luīze 363 | Lāsma 364 | Līga 365 | Līna 366 | Līva 367 | Līvija 368 | Lūcija 369 | Madara 370 | Maiga 371 | Maija 372 | Maira 373 | Mairita 374 | Margarita 375 | Margita 376 | Margrieta 377 | Marija 378 | Marika 379 | Marina 380 | Marita 381 | Marta 382 | Maruta 383 | Megija 384 | Milda 385 | Mirdza 386 | Modra 387 | Modrīte 388 | Monika 389 | Monta 390 | Mudīte 391 | Māra 392 | Mārīte 393 | Nadežda 394 | Natalija 395 | Natālija 396 | Nellija 397 | Nina 398 | Nora 399 | Olga 400 | Olita 401 | Otīlija 402 | Patrīcija 403 | Paula 404 | Paulīna 405 | Raisa 406 | Raita 407 | Ramona 408 | Rasa 409 | Rasma 410 | Regīna 411 | Renāte 412 | Rita 413 | Rota 414 | Rudīte 415 | Ruta 416 | Rute 417 | Rūta 418 | Sabīne 419 | Saiva 420 | Samanta 421 | Sanda 422 | Sandra 423 | Sanita 424 | Santa 425 | Sarma 426 | Sarmīte 427 | Sigita 428 | Signe 429 | Silvija 430 | Simona 431 | Sindija 432 | Sintija 433 | Skaidra 434 | Skaidrīte 435 | Sofija 436 | Solveiga 437 | Solvita 438 | Staņislava 439 | Taisija 440 | Tamara 441 | Tamāra 442 | Tatjana 443 | Tekla 444 | Terēza 445 | Terēze 446 | Terēzija 447 | Tija 448 | Una 449 | Vaira 450 | Valda 451 | Valentina 452 | Valentīna 453 | Valerija 454 | Valija 455 | Valērija 456 | Vanda 457 | Velga 458 | Velta 459 | Veneranda 460 | Vera 461 | Veronika 462 | Vija 463 | Viktorija 464 | Vilma 465 | Vineta 466 | Violeta 467 | Vita 468 | Vizbulīte 469 | Vizma 470 | Vēsma 471 | Zaiga 472 | Zanda 473 | Zane 474 | Zelma 475 | Zenta 476 | Zigrīda 477 | Zinaida 478 | Zinaīda 479 | Zinta 480 | Zita 481 | Zoja 482 | Zuzanna 483 | Ārija 484 | Ērika 485 | Ņina 486 | Šarlote 487 | Žanete 488 | Žanna 489 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/LatviaMaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Ādams 2 | Ādolfs 3 | Agris 4 | Aigars 5 | Ainārs 6 | Aivars 7 | Alberts 8 | Aldis 9 | Aleksandrs 10 | Alfrēds 11 | Andrejs 12 | Andris 13 | Andrešs 14 | Ansis 15 | Antons 16 | Armands 17 | Arnis 18 | Arnolds 19 | Artis 20 | Arturs 21 | Artūrs 22 | Arvīds 23 | Augusts 24 | Bērends 25 | Bērtulis 26 | Brencis 27 | Dainis 28 | Daniels 29 | Dāvis 30 | Dzintars 31 | Edgars 32 | Edmunds 33 | Eduards 34 | Edvīns 35 | Egils 36 | Elmārs 37 | Elvis 38 | Endijs 39 | Emīls 40 | Ēriks 41 | Ermanis 42 | Ernests 43 | Ēvalds 44 | Fricis 45 | Gatis 46 | Gunārs 47 | Guntars 48 | Guntis 49 | Ģederts 50 | Ģirts 51 | Hanss 52 | Harijs 53 | Henriks 54 | Hermanis 55 | Igors 56 | Ilmārs 57 | Imants 58 | Indriķis 59 | Ivars 60 | Ivo 61 | Jakobs 62 | Janis 63 | Jānis 64 | Jannis 65 | Jāzeps 66 | Jēkabs 67 | Jēkaubs 68 | Jezups 69 | Johans 70 | Jūlijs 71 | Juris 72 | Kārlis 73 | Kaspars 74 | Konradus 75 | Kristaps 76 | Kristers 77 | Krists 78 | Krišjānis 79 | Krišs 80 | Laimonis 81 | Lauris 82 | Leons 83 | Macs 84 | Mareks 85 | Māris 86 | Mārtiņš 87 | Matīss 88 | Mihels 89 | Mikels 90 | Miķelis 91 | Modris 92 | Nikolajs 93 | Niks 94 | Normunds 95 | Oļģerts 96 | Oskars 97 | Osvalds 98 | Oto 99 | Pauls 100 | Pēteris 101 | Raimonds 102 | Raivis 103 | Reinis 104 | Ričards 105 | Rihards 106 | Roberts 107 | Rolands 108 | Rūdolfs 109 | Sandis 110 | Staņislavs 111 | Tenis 112 | Teodors 113 | Toms 114 | Uldis 115 | Valdis 116 | Viesturs 117 | Viktors 118 | Vilis 119 | Vilnis 120 | Viļums 121 | Visvaldis 122 | Vladislavs 123 | Voldemārs 124 | Ziedonis 125 | Žanis 126 | Aksels 127 | Albīns 128 | Alfs 129 | Aļģirts 130 | Almants 131 | Alnis 132 | Anatols 133 | Andrievs 134 | Andulis 135 | Andžs 136 | Anšlavs 137 | Antis 138 | Ardis 139 | Arijs 140 | Arvils 141 | Askolds 142 | Atvars 143 | Auseklis 144 | Balvis 145 | Benedikts 146 | Bernhards 147 | Boļeslavs 148 | Broņislavs 149 | Centis 150 | Dailis 151 | Dairis 152 | Daumants 153 | Dins 154 | Donāts 155 | Drosmis 156 | Druvis 157 | Druvvaldis 158 | Edžus 159 | Egmonts 160 | Elgars 161 | Frīdis 162 | Gaidis 163 | Gaits 164 | Gastons 165 | Gunvaldis 166 | Hugo 167 | Ikars 168 | Ilgmārs 169 | Inesis 170 | Ingmars 171 | Inguns 172 | Jorens 173 | Jūlians 174 | Justs 175 | Kalvis 176 | Klaudijs 177 | Knuts 178 | Konrads 179 | Kurts 180 | Laimnesis 181 | Leo 182 | Leopolds 183 | Lotars 184 | Madis 185 | Magnuss 186 | Maigurs 187 | Maksis 188 | Malvis 189 | Marģers 190 | Margots 191 | Marts 192 | Mintauts 193 | Monvids 194 | Muntis 195 | Nils 196 | Norberts 197 | Otomars 198 | Rauls 199 | Ringolds 200 | Rodrigo 201 | Rūsiņš 202 | Salvis 203 | Sarmis 204 | Saulvedis 205 | Sentis 206 | Severīns 207 | Sigurds 208 | Silvestrs 209 | Spodris 210 | Svens 211 | Tālrīts 212 | Uvis 213 | Valfrids 214 | Varis 215 | Vents 216 | Vidvuds 217 | Vikentijs 218 | Vilips 219 | Vilmars 220 | Vismants 221 | Zemgus 222 | Adrians 223 | Adriāns 224 | Agnis 225 | Agris 226 | Aigars 227 | Ainars 228 | Ainis 229 | Ainārs 230 | Aivars 231 | Aivis 232 | Alberts 233 | Aleksandrs 234 | Aleksejs 235 | Aleksis 236 | Alfons 237 | Alfrēds 238 | Aloizs 239 | Alvis 240 | Anatolijs 241 | Andis 242 | Andrejs 243 | Andris 244 | Andžejs 245 | Anrijs 246 | Ansis 247 | Antons 248 | Arkādijs 249 | Armands 250 | Armīns 251 | Arnis 252 | Arnolds 253 | Artis 254 | Arturs 255 | Artūrs 256 | Arvis 257 | Arvīds 258 | Atis 259 | Augusts 260 | Austris 261 | Boriss 262 | Brencis 263 | Bruno 264 | Dagnis 265 | Dainis 266 | Daniels 267 | Didzis 268 | Dmitrijs 269 | Dzintars 270 | Dāvids 271 | Dāvis 272 | Džons 273 | Edgars 274 | Edijs 275 | Edmunds 276 | Eduards 277 | Edvards 278 | Edvīns 279 | Egils 280 | Egons 281 | Einārs 282 | Eižens 283 | Elmārs 284 | Elvijs 285 | Elvis 286 | Elviss 287 | Elīza 288 | Emīls 289 | Endijs 290 | Ernests 291 | Ervīns 292 | Felikss 293 | Filips 294 | Fjodors 295 | Francis 296 | Fricis 297 | Fridrihs 298 | Fēlikss 299 | Fīlips 300 | Gatis 301 | Georgs 302 | Gintars 303 | Gintauts 304 | Gints 305 | Gothards 306 | Gundars 307 | Guntars 308 | Guntis 309 | Gunārs 310 | Gustavs 311 | Gusts 312 | Gvido 313 | Hanss 314 | Haralds 315 | Harijs 316 | Helmuts 317 | Henrihs 318 | Henrijs 319 | Henriks 320 | Herberts 321 | Hermanis 322 | Ignats 323 | Igors 324 | Ilgonis 325 | Ilgvars 326 | Ilmārs 327 | Ilvars 328 | Imants 329 | Indriķis 330 | Indulis 331 | Ingars 332 | Ingus 333 | Intars 334 | Ints 335 | Inārs 336 | Ivans 337 | Ivars 338 | Ivo 339 | Jans 340 | Jevgeņijs 341 | Johans 342 | Juris 343 | Jurģis 344 | Jānis 345 | Jāzeps 346 | Jēkabs 347 | Jūlijs 348 | Kaspars 349 | Kazimirs 350 | Kirils 351 | Klāvs 352 | Konstantīns 353 | Kristaps 354 | Kristers 355 | Kristiāns 356 | Krists 357 | Krišjānis 358 | Krišs 359 | Kārlis 360 | Laimdots 361 | Laimonis 362 | Lauris 363 | Leonards 364 | Leons 365 | Leonīds 366 | Linards 367 | Ludis 368 | Ludvigs 369 | Madars 370 | Maigonis 371 | Maikls 372 | Mairis 373 | Maksims 374 | Mareks 375 | Marks 376 | Markuss 377 | Matejs 378 | Matīss 379 | Miervaldis 380 | Mihaels 381 | Mihails 382 | Miks 383 | Mikus 384 | Miķelis 385 | Modris 386 | Mārcis 387 | Māris 388 | Mārtiņš 389 | Nauris 390 | Niklāvs 391 | Nikolajs 392 | Niks 393 | Normunds 394 | Ojārs 395 | Olafs 396 | Oskars 397 | Osvalds 398 | Oto 399 | Oļegs 400 | Paulis 401 | Pauls 402 | Pjotrs 403 | Pāvels 404 | Pāvils 405 | Pēteris 406 | Raimonds 407 | Raitis 408 | Raivis 409 | Raivo 410 | Ralfs 411 | Reinholds 412 | Reinis 413 | Renārs 414 | Rihards 415 | Rinalds 416 | Ritvars 417 | Ričards 418 | Roberts 419 | Rolands 420 | Romans 421 | Romualds 422 | Romāns 423 | Ronalds 424 | Rūdolfs 425 | Sandis 426 | Sandris 427 | Sergejs 428 | Simons 429 | Staņislavs 430 | Stefans 431 | Svens 432 | Sīmanis 433 | Teodors 434 | Tomass 435 | Toms 436 | Tālis 437 | Tālivaldis 438 | Uldis 439 | Uģis 440 | Vairis 441 | Valdemārs 442 | Valdis 443 | Valentīns 444 | Valerijs 445 | Valters 446 | Valts 447 | Vasilijs 448 | Verners 449 | Viesturs 450 | Viktors 451 | Vilhelms 452 | Vilis 453 | Viljams 454 | Vilnis 455 | Visvaldis 456 | Vitauts 457 | Vitolds 458 | Vitālijs 459 | Viļums 460 | Vladimirs 461 | Vladislavs 462 | Voldemārs 463 | Ziedonis 464 | Zigfrīds 465 | Zigmunds 466 | Zigmārs 467 | Zigurds 468 | Zintis 469 | Ādams 470 | Ādolfs 471 | Ārijs 472 | Āris 473 | Ēriks 474 | Ēvalds 475 | Ģirts 476 | Žanis 477 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/RomaniaFemaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Adriana 2 | Adrianna 3 | Afina 4 | Alexandreina 5 | Amelia 6 | Ana 7 | Ana-marie 8 | Anamarie 9 | Anca 10 | Andreea 11 | Angela 12 | Anica 13 | Antanasia 14 | Antoaneta 15 | Atanasia 16 | Augustina 17 | Aurelia 18 | Bianca 19 | Camelia 20 | Cami 21 | Carla 22 | Carmen 23 | Catalina 24 | Cecilia 25 | Celestina 26 | Christina 27 | Clara 28 | Claudia 29 | Codruta 30 | Constanta 31 | Corina 32 | Corinna 33 | Cosmina 34 | Costela 35 | Craita 36 | Crina 37 | Cristina 38 | Dana 39 | Daniela 40 | Delia 41 | Diana 42 | Diona 43 | Dominique 44 | Dorina 45 | Draguta 46 | Dumitra 47 | Dumitrita 48 | Ecaterina 49 | Elena 50 | Elisabeta 51 | Eliza 52 | Ema 53 | Emilia 54 | Emiliana 55 | Erika 56 | Eugenia 57 | Eveline 58 | Floarea 59 | Florenta 60 | Flori 61 | Florica 62 | Florina 63 | Gabi 64 | Gabriela 65 | Georgeta 66 | Georgetta 67 | Georgiana 68 | Georgina 69 | Gheorghita 70 | Gina 71 | Helena 72 | Helga 73 | Ihrin 74 | Ileana 75 | Imanuela 76 | Ioana 77 | Iolanda 78 | Iona 79 | Ionela 80 | Irina 81 | Iulia 82 | Ivona 83 | Izabela 84 | Jeni 85 | Jenica 86 | Joana 87 | Juana 88 | Kathryn 89 | Lacramioara 90 | Laura 91 | Laurentia 92 | Lavinia 93 | Lenuta 94 | Leunta 95 | Lia 96 | Ligia 97 | Lilian 98 | Lina 99 | Lizuca 100 | Loredana 101 | Luiza 102 | Luminita 103 | Madalina 104 | Manuela 105 | Mara 106 | Marcela 107 | Margareta 108 | Maria 109 | Mariana 110 | Marica 111 | Marilena 112 | Marina 113 | Marinela 114 | Marioara 115 | Mariutza 116 | Marta 117 | Melita 118 | Michaela 119 | Mihaela 120 | Mirela 121 | Monica 122 | Monique 123 | Nadezhda 124 | Narcisa 125 | Nedelcu 126 | Neulai 127 | Nicoleta 128 | Nina 129 | Oana 130 | Octavia 131 | Olga 132 | Olimpia 133 | Olivia 134 | Olympia 135 | Paula 136 | Petronela 137 | Raluca 138 | Ramona 139 | Regina 140 | Relia 141 | Rodica 142 | Rodika 143 | Romanitza 144 | Roxana 145 | Ruxandra 146 | Sabina 147 | Sanda 148 | Silvia 149 | Simona 150 | Sonia 151 | Stefana 152 | Stefania 153 | Stela 154 | Tabitha 155 | Tara 156 | Tatiana 157 | Teadora 158 | Teodora 159 | Teofila 160 | Trandafira 161 | Uta 162 | Valerica 163 | Vanda 164 | Varduhi 165 | Victoria 166 | Viorela 167 | Viviana 168 | Voctorita 169 | Voileta 170 | Yessenia 171 | Ylenia 172 | Zina 173 | Ada 174 | Adela 175 | Adelaida 176 | Adelina 177 | Adina 178 | Adriana 179 | Agata 180 | Aglaia 181 | Agripina 182 | Aida 183 | Alberta 184 | Albertina 185 | Alexandra 186 | Alexandrina 187 | Alida 188 | Alina 189 | Alis 190 | Alma 191 | Amalia 192 | Amelia 193 | Amanda 194 | Ana 195 | Anabela 196 | Anaida 197 | Anamaria 198 | Anastasia 199 | Anca 200 | Ancuța 201 | Anda 202 | Andra 203 | Andrada 204 | Andreea 205 | Anemona 206 | Aneta 207 | Angela 208 | Anghelina 209 | Anica 210 | Anișoara 211 | Antoaneta 212 | Antonia 213 | Antonela 214 | Anuța 215 | Ariadna 216 | Ariana 217 | Arina 218 | Aristița 219 | Artemisa 220 | Astrid 221 | Atena 222 | Augustina 223 | Aura 224 | Aurelia 225 | Aureliana 226 | Aurica 227 | Aurora 228 | Beatrice 229 | Betina 230 | Bianca 231 | Blanduzia 232 | Bogdana 233 | Brândușa 234 | Camelia 235 | Carina 236 | Carla 237 | Carmen 238 | Carmina 239 | Carolina 240 | Casandra 241 | Casiana 242 | Catinca 243 | Catrina 244 | Catrinel 245 | Cătălina 246 | Cecilia 247 | Celia 248 | Cerasela 249 | Cezara 250 | Cipriana 251 | Clara 252 | Clarisa 253 | Claudia 254 | Clementina 255 | Cleopatra 256 | Codrina 257 | Codruța 258 | Constantina 259 | Constanța 260 | Consuela 261 | Coralia 262 | Corina 263 | Cornelia 264 | Cosmina 265 | Crenguța 266 | Crina 267 | Cristina 268 | Daciana 269 | Dafina 270 | Daiana 271 | Dalia 272 | Dana 273 | Daniela 274 | Daria 275 | Dariana 276 | Delia 277 | Demetra 278 | Denisa 279 | Despina 280 | Diana 281 | Dida 282 | Didina 283 | Dina 284 | Dochia 285 | Doina 286 | Domnica 287 | Dora 288 | Doriana 289 | Dorina 290 | Dorli 291 | Draga 292 | Dumitra 293 | Dumitrana 294 | Ecaterina 295 | Eftimia 296 | Elena 297 | Eleonora 298 | Eliana 299 | Elisabeta 300 | Elisaveta 301 | Eliza 302 | Elodia 303 | Elvira 304 | Emilia 305 | Emanuela 306 | Erica 307 | Estera 308 | Eufrosina 309 | Eugenia 310 | Eusebia 311 | Eva 312 | Evanghelina 313 | Evelina 314 | Fabia 315 | Fabiana 316 | Felicia 317 | Filofteia 318 | Fiona 319 | Flavia 320 | Floare 321 | Floarea 322 | Flora 323 | Floriana 324 | Florica 325 | Florina 326 | Florentina 327 | Florența 328 | Francesca 329 | Frusina 330 | Gabriela 331 | Geanina 332 | Georgeta 333 | Georgia 334 | Georgiana 335 | Geta 336 | Gherghina 337 | Gianina 338 | Gina 339 | Giorgiana 340 | Grațiana 341 | Grațiela 342 | Hortensia 343 | Henrieta 344 | Iasmina 345 | Ica 346 | Ileana 347 | Ilinca 348 | Ilona 349 | Ina 350 | Ioana 351 | Iolanda 352 | Ionela 353 | Iosefina 354 | Irina 355 | Iridenta 356 | Iris 357 | Isabela 358 | Iulia 359 | Iuliana 360 | Iustina 361 | Ivona 362 | Izabela 363 | Jana 364 | Janeta 365 | Janina 366 | Jasmina 367 | Jeana 368 | Julia 369 | Julieta 370 | Larisa 371 | Laura 372 | Laurenția 373 | Lavinia 374 | Lăcrămioara 375 | Leana 376 | Lelia 377 | Leontina 378 | Leopoldina 379 | Letiția 380 | Lia 381 | Liana 382 | Lidia 383 | Ligia 384 | Lili 385 | Liliana 386 | Lioara 387 | Livia 388 | Loredana 389 | Lorena 390 | Luana 391 | Lucia 392 | Luciana 393 | Lucreția 394 | Ludovica 395 | Luiza 396 | Luminița 397 | Magdalena 398 | Maia 399 | Manuela 400 | Mara 401 | Marcela 402 | Marga 403 | Margareta 404 | Marcheta 405 | Maria 406 | Mariana 407 | Maricica 408 | Marilena 409 | Marina 410 | Marinela 411 | Marioara 412 | Marta 413 | Matilda 414 | Malvina 415 | Mădălina 416 | Mălina 417 | Mărioara 418 | Măriuca 419 | Melania 420 | Melina 421 | Mihaela 422 | Milena 423 | Mina 424 | Minodora 425 | Mioara 426 | Mirabela 427 | Mirela 428 | Miruna 429 | Mona 430 | Monalisa 431 | Monica 432 | Nadia 433 | Narcisa 434 | Natalia 435 | Natașa 436 | Nicoleta 437 | Niculina 438 | Nora 439 | Norica 440 | Oana 441 | Octavia 442 | Octaviana 443 | Ofelia 444 | Olga 445 | Olimpia 446 | Olivia 447 | Ortansa 448 | Otilia 449 | Ozana 450 | Pamela 451 | Paraschiva 452 | Paula 453 | Paulica 454 | Paulina 455 | Patricia 456 | Petronela 457 | Petruța 458 | Pompilia 459 | Profira 460 | Rada 461 | Rafila 462 | Raluca 463 | Ramona 464 | Rebeca 465 | Renata 466 | Rica 467 | Roberta 468 | Robertina 469 | Rodica 470 | Roza 471 | Rozalia 472 | Roxana 473 | Ruxanda 474 | Ruxandra 475 | Sabina 476 | Sabrina 477 | Safta 478 | Salomea 479 | Sanda 480 | Saveta 481 | Savina 482 | Sânziana 483 | Semenica 484 | Severina 485 | Sidonia 486 | Silvia 487 | Silviana 488 | Simina 489 | Simona 490 | Smaranda 491 | Sofia 492 | Sonia 493 | Sorana 494 | Sorina 495 | Speranța 496 | Stana 497 | Stanca 498 | Stela 499 | Steliana 500 | Steluța 501 | Suzana 502 | Ștefana 503 | Ștefania 504 | Tamara 505 | Tania 506 | Tatiana 507 | Teodora 508 | Teodosia 509 | Teona 510 | Tiberia 511 | Tinca 512 | Tincuța 513 | Tudorița 514 | Tudosia 515 | Valentina 516 | Valeria 517 | Vanesa 518 | Varvara 519 | Vasilica 520 | Venera 521 | Vera 522 | Veronica 523 | Veta 524 | Victoria 525 | Violeta 526 | Viorela 527 | Viorica 528 | Virginia 529 | Viviana 530 | Voichița 531 | Xenia 532 | Zaharia 533 | Zamfira 534 | Zaraza 535 | Zenobia 536 | Zenovia 537 | Zina 538 | Zoe 539 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/RomaniaMaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Abel 2 | Adi 3 | Adrian 4 | Alexandru 5 | Alin 6 | Anatolie 7 | Andrei 8 | Anghel 9 | Anton 10 | Aurel 11 | Baduna 12 | Bela 13 | Beryx 14 | Bodgan 15 | Bogdan 16 | Calin 17 | Catalin 18 | Cezar 19 | Ciodaru 20 | Codrin 21 | Corneliu 22 | Cosmin 23 | Costea 24 | Costel 25 | Costi 26 | Costica 27 | Costin 28 | Costine 29 | Cristi 30 | Cristian 31 | Cristinel 32 | Dan 33 | Daniel 34 | Danut 35 | Decebal 36 | Dimitrie 37 | Dimitry 38 | Dorin 39 | Doru 40 | Dracul 41 | Dragos 42 | Dragoslav 43 | Drahoslav 44 | Dumitru 45 | Eftemie 46 | Emil 47 | Eugen 48 | Flaviu 49 | Flavius 50 | Florentin 51 | Florin 52 | Gabi 53 | Gabriel 54 | George 55 | Geza 56 | Gheorghe 57 | Ghita 58 | Glad 59 | Gogu 60 | Horatiu 61 | Horia 62 | Iancu 63 | Ilie 64 | Ioan 65 | Ion 66 | Ionache 67 | Ionel 68 | Ionut 69 | Iulian 70 | Ivan 71 | Ivantie 72 | Jan 73 | Jean 74 | Laurentiu 75 | Liviu 76 | Lucian 77 | Marian 78 | Marin 79 | Marius 80 | Mazonn 81 | Mihai 82 | Mihaita 83 | Mircea 84 | Neculai 85 | Nelu 86 | Nic 87 | Nicolae 88 | Nicu 89 | Niculaie 90 | Nicusor 91 | Octavian 92 | Ovidiu 93 | Paul 94 | Pereteanu 95 | Petrica 96 | Petru 97 | Pompiliu 98 | Pompilius 99 | Radu 100 | Rares 101 | Razvan 102 | Rica 103 | Sandu 104 | Serban 105 | Shaithis 106 | Silviu 107 | Simu 108 | Soare 109 | Sorin 110 | Stefan 111 | Stelian 112 | Teo 113 | Teodor 114 | Teodosie 115 | Traian 116 | Tudor 117 | Valeriu 118 | Vali 119 | Vasile 120 | Vasilescu 121 | Velkan 122 | Victor 123 | Viorea 124 | Vlad 125 | Vladimir 126 | Zaharia 127 | Achim 128 | Adam 129 | Adelin 130 | Adonis 131 | Adrian 132 | Agnos 133 | Albert 134 | Alex 135 | Alexandru 136 | Alexe 137 | Aleodor 138 | Alin 139 | Alistar 140 | Amza 141 | Anatolie 142 | Andrei 143 | Angel 144 | Anghel 145 | Antim 146 | Anton 147 | Antonie 148 | Antoniu 149 | Arian 150 | Aristide 151 | Arsenie 152 | Augustin 153 | Aurel 154 | Aurelian 155 | Aurică 156 | Avram 157 | Axinte 158 | Barbu 159 | Bartolomeu 160 | Basarab 161 | Bănel 162 | Bebe 163 | Beniamin 164 | Bernard 165 | Bogdan 166 | Brăduț 167 | Bucur 168 | Caius 169 | Camil 170 | Cantemir 171 | Carol 172 | Casian 173 | Cazimir 174 | Călin 175 | Cătălin 176 | Cedrin 177 | Cezar 178 | Ciprian 179 | Claudiu 180 | Codin 181 | Codrin 182 | Codruț 183 | Cornel 184 | Corneliu 185 | Corvin 186 | Constantin 187 | Cosmin 188 | Costache 189 | Costel 190 | Costin 191 | Crin 192 | Cristea 193 | Cristian 194 | Cristobal 195 | Cristofor 196 | Dacian 197 | Damian 198 | Dan 199 | Daniel 200 | Darius 201 | David 202 | Decebal 203 | Denis 204 | Dinu 205 | Dominic 206 | Dorel 207 | Dorian 208 | Dorin 209 | Dorinel 210 | Doru 211 | Dragoș 212 | Ducu 213 | Dumitru 214 | Edgar 215 | Edmond 216 | Eduard 217 | Eftimie 218 | Emil 219 | Emilian 220 | Emanoil 221 | Emanuel 222 | Emanuil 223 | Eremia 224 | Eric 225 | Ernest 226 | Eugen 227 | Eusebiu 228 | Eustațiu 229 | Fabian 230 | Felix 231 | Filip 232 | Fiodor 233 | Flaviu 234 | Florea 235 | Florentin 236 | Florian 237 | Florin 238 | Francisc 239 | Frederic 240 | Gabi 241 | Gabriel 242 | Gelu 243 | George 244 | Georgel 245 | Georgian 246 | Ghenadie 247 | Gheorghe 248 | Gheorghiță 249 | Ghiță 250 | Gică 251 | Gicu 252 | Giorgian 253 | Grațian 254 | Gregorian 255 | Grigore 256 | Haralamb 257 | Haralambie 258 | Horațiu 259 | Horea 260 | Horia 261 | Iacob 262 | Iancu 263 | Ianis 264 | Ieremia 265 | Ilarie 266 | Ilarion 267 | Ilie 268 | Inocențiu 269 | Ioan 270 | Ion 271 | Ionel 272 | Ionică 273 | Ionuț 274 | Iosif 275 | Irinel 276 | Iulian 277 | Iuliu 278 | Iurie 279 | Iustin 280 | Iustinian 281 | Ivan 282 | Jan 283 | Jean 284 | Jenel 285 | Ladislau 286 | Lascăr 287 | Laurențiu 288 | Laurian 289 | Lazăr 290 | Leonard 291 | Leontin 292 | Lică 293 | Liviu 294 | Lorin 295 | Luca 296 | Lucențiu 297 | Lucian 298 | Lucrețiu 299 | Ludovic 300 | Manole 301 | Marcel 302 | Marcu 303 | Marian 304 | Marin 305 | Marius 306 | Martin 307 | Matei 308 | Maxim 309 | Maximilian 310 | Mădălin 311 | Mihai 312 | Mihail 313 | Mihnea 314 | Mircea 315 | Miron 316 | Mitică 317 | Mitruț 318 | Mugur 319 | Mugurel 320 | Nae 321 | Narcis 322 | Nechifor 323 | Nelu 324 | Nichifor 325 | Nicoară 326 | Nicodim 327 | Nicolae 328 | Nicolaie 329 | Nicu 330 | Nicuță 331 | Niculiță 332 | Nicușor 333 | Norbert 334 | Octav 335 | Octavian 336 | Octaviu 337 | Olimpian 338 | Olimpiu 339 | Oliviu 340 | Ovidiu 341 | Pamfil 342 | Panait 343 | Panagachie 344 | Paul 345 | Pavel 346 | Pătru 347 | Petre 348 | Petrică 349 | Petrișor 350 | Petru 351 | Petruț 352 | Pompiliu 353 | Radu 354 | Rareș 355 | Răducu 356 | Răzvan 357 | Relu 358 | Remus 359 | Robert 360 | Romeo 361 | Romulus 362 | Sabin 363 | Sandu 364 | Sava 365 | Sebastian 366 | Sergiu 367 | Sever 368 | Severin 369 | Silvian 370 | Silviu 371 | Simi 372 | Simion 373 | Sinică 374 | Sorin 375 | Stan 376 | Stancu 377 | Stelian 378 | Șerban 379 | Ștefan 380 | Teodor 381 | Teofil 382 | Teohari 383 | Theodor 384 | Tiberiu 385 | Titus 386 | Todor 387 | Toma 388 | Traian 389 | Tudor 390 | Valentin 391 | Valeriu 392 | Valter 393 | Vasile 394 | Vasilică 395 | Vicențiu 396 | Victor 397 | Vincențiu 398 | Viorel 399 | Visarion 400 | Vlad 401 | Vladimir 402 | Vlaicu 403 | Voicu 404 | Zamfir 405 | Zeno 406 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/SloveniaFemaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Marija;67609 2 | Ana;28439 3 | Maja;13095 4 | Irena;1257 5 | Mojca;11454 6 | Mateja;10521 7 | Jožefa;10416 8 | Nataša;10199 9 | Nina;9813 10 | Barbara;9666 11 | Jožica;9603 12 | Andreja;957 13 | Frančiška;924 14 | Petra;923 15 | Ivana;8902 16 | Katja;8706 17 | Anja;8401 18 | Sonja;838 19 | Milena;8254 20 | Katarina;8213 21 | Tatjana;8192 22 | Terezija;8045 23 | Tanja;7914 24 | Alenka;7882 25 | Majda;779 26 | Martina;7722 27 | Vesna;7649 28 | Tina;7608 29 | Urška;74 30 | Eva;6933 31 | Helena;6825 32 | Anica;6763 33 | Kristina;6742 34 | Sara;6691 35 | Angela;6652 36 | Dragica;6635 37 | Špela;6567 38 | Nada;6553 39 | Antonija;6547 40 | Tjaša;6391 41 | Darja;6217 42 | Olga;6183 43 | Danica;6024 44 | Marjeta;6009 45 | Nika;5994 46 | Simona;586 47 | Zdenka;571 48 | Vida;5601 49 | Suzana;558 50 | Lidija;5557 51 | Ivanka;5545 52 | Ljudmila;5483 53 | Marta;5404 54 | Alojzija;5197 55 | Sabina;5105 56 | Janja;5039 57 | Veronika;4904 58 | Silva;4879 59 | Darinka;4623 60 | Neža;4592 61 | Štefanija;4493 62 | Karmen;4492 63 | Stanislava;4492 64 | Elizabeta;412 65 | Anita;4112 66 | Aleksandra;4101 67 | Brigita;4082 68 | Lara;3907 69 | Cvetka;3859 70 | Metka;3832 71 | Jana;3752 72 | Monika;3686 73 | Pavla;3674 74 | Ema;3669 75 | Maša;3645 76 | Nevenka;3625 77 | Natalija;3614 78 | Slavica;346 79 | Marjana;3416 80 | Renata;3392 81 | Lucija;3383 82 | Branka;3379 83 | Jasmina;3377 84 | Lea;3348 85 | Rozalija;3254 86 | Saša;3238 87 | Tamara;3176 88 | Vera;3131 89 | Klara;313 90 | Kaja;3113 91 | Bernarda;3047 92 | Danijela;299 93 | Klavdija;2988 94 | Erika;2892 95 | Bojana;2867 96 | Romana;2834 97 | Mira;2809 98 | Jasna;2786 99 | Lana;2729 100 | Zala;2679 101 | Jelka;2669 102 | Polona;2662 103 | Mirjana;2587 104 | Sandra;2563 105 | Valerija;2535 106 | Valentina;2514 107 | Teja;2483 108 | Tadeja;2482 109 | Manca;2444 110 | Mihaela;2363 111 | Sanja;2362 112 | Julijana;2333 113 | Ida;2331 114 | Laura;2316 115 | Ines;2306 116 | Breda;2299 117 | Karolina;2239 118 | Matilda;2235 119 | Albina;2218 120 | Maruša;2183 121 | Gabrijela;2182 122 | Ksenija;2174 123 | Amalija;2159 124 | Patricija;2151 125 | Nuša;214 126 | Hana;2033 127 | Vanja;2006 128 | Zofija;2 129 | Magdalena;1993 130 | Viktorija;1991 131 | Vlasta;1946 132 | Cecilija;1922 133 | Julija;1919 134 | Marjetka;1918 135 | Melita;1895 136 | Živa;187 137 | Ljubica;1819 138 | Emilija;1805 139 | Ajda;1783 140 | Marina;1774 141 | Gordana;175 142 | Justina;1741 143 | Marinka;1725 144 | Marica;1719 145 | Pia;1653 146 | Polonca;1643 147 | Nadja;1615 148 | Urša;1593 149 | Alja;1562 150 | Neja;1547 151 | Milka;1528 152 | Damjana;1515 153 | Tea;1511 154 | Karin;1485 155 | Nastja;1469 156 | Doroteja;1445 157 | Marijana;1405 158 | Milica;1391 159 | Jerneja;1371 160 | Štefka;133 161 | Nives;1328 162 | Slavka;1325 163 | Dušanka;1293 164 | Taja;1285 165 | Andrejka;1271 166 | Jelena;1267 167 | Marjanca;1241 168 | Stanka;1237 169 | Ana Marija;1229 170 | Lilijana;1229 171 | Irma;1227 172 | Larisa;1198 173 | Miroslava;1195 174 | Mirjam;1188 175 | Rebeka;1167 176 | Zlatka;1166 177 | Jolanda;1115 178 | Zvonka;1095 179 | Zora;1074 180 | Hermina;1036 181 | Ivica;1032 182 | Blanka;1009 183 | Tinkara;997 184 | Zoja;996 185 | Erna;981 186 | Gaja;978 187 | Iris;966 188 | Liljana;966 189 | Brina;962 190 | Hedvika;960 191 | Anka;959 192 | Daniela;943 193 | Magda;941 194 | Daša;934 195 | Iva;931 196 | Vilma;927 197 | Anamarija;911 198 | Jerica;911 199 | Adrijana;910 200 | Tia;907 -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/SloveniaMaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Franc;29028 2 | Janez;24131 3 | Anton;20875 4 | Ivan;20624 5 | Jožef;19421 6 | Andrej;17416 7 | Marko;17245 8 | Jože;15559 9 | Marjan;13222 10 | Peter;12928 11 | Milan;12282 12 | Matej;11791 13 | Tomaž;11502 14 | Stanislav;11215 15 | Branko;11144 16 | Aleš;10704 17 | Luka;10589 18 | Bojan;10544 19 | Robert;10492 20 | Boštjan;10006 21 | Matjaž;9703 22 | Rok;9619 23 | Gregor;9564 24 | Martin;9321 25 | Miha;878 26 | Alojz;8696 27 | Igor;846 28 | Boris;8408 29 | Dušan;8318 30 | Dejan;8008 31 | David;7638 32 | Uroš;7031 33 | Jan;698 34 | Jure;6931 35 | Mitja;6867 36 | Simon;672 37 | Blaž;6674 38 | Nejc;6446 39 | Žiga;6408 40 | Darko;6213 41 | Drago;6079 42 | Klemen;6029 43 | Štefan;5893 44 | Primož;5774 45 | Jernej;5745 46 | Matic;555 47 | Aleksander;554 48 | Miran;5469 49 | Roman;5414 50 | Žan;5303 51 | Denis;5117 52 | Gašper;5021 53 | Tadej;5008 54 | Vladimir;4854 55 | Srečko;4633 56 | Slavko;4612 57 | Janko;4508 58 | Mirko;4493 59 | Aljaž;446 60 | Borut;4386 61 | Anže;4382 62 | Damjan;4357 63 | Miroslav;4309 64 | Jaka;4121 65 | Alojzij;3891 66 | Matija;3887 67 | Jakob;3865 68 | Zoran;3834 69 | Stanko;3823 70 | Danijel;3763 71 | Alen;3688 72 | Mihael;361 73 | Domen;3454 74 | Tilen;343 75 | Marijan;3378 76 | Vinko;3234 77 | Rudolf;3225 78 | Goran;3199 79 | Sašo;3182 80 | Iztok;3142 81 | Viktor;3086 82 | Nik;3081 83 | Jurij;3014 84 | Matevž;2796 85 | Zvonko;2717 86 | Andraž;271 87 | Pavel;267 88 | Zdravko;267 89 | Urban;2656 90 | Leon;2645 91 | Edvard;2601 92 | Danilo;2546 93 | Vid;2524 94 | Rajko;2502 95 | Samo;2446 96 | Zlatko;2433 97 | Gorazd;2412 98 | Dragan;2383 99 | Bogdan;2365 100 | Filip;2335 101 | Ludvik;2275 102 | Benjamin;2274 103 | Tim;2253 104 | Sandi;2243 105 | Emil;2208 106 | Josip;2173 107 | Ciril;2101 108 | Frančišek;2095 109 | Kristjan;2091 110 | Sebastjan;2083 111 | Franci;2031 112 | Vojko;2025 113 | Erik;201 114 | Silvo;1968 115 | Albin;1936 116 | Mark;1859 117 | Željko;1806 118 | Damijan;1802 119 | Damir;1799 120 | Leopold;1776 121 | Maks;1776 122 | Aljoša;1763 123 | Božidar;1758 124 | Daniel;168 125 | Viljem;1679 126 | Dominik;1647 127 | Silvester;1639 128 | Timotej;163 129 | Miloš;1595 130 | Vincenc;1593 131 | Stojan;1555 132 | Karel;155 133 | Gal;1528 134 | Tomislav;1497 135 | Niko;1478 136 | Lovro;1476 137 | Davorin;1451 138 | Valentin;1436 139 | Franjo;1426 140 | Nikola;1418 141 | Patrik;1409 142 | Mario;1396 143 | Saša;1374 144 | Grega;1316 145 | Ladislav;1314 146 | Anej;1306 147 | Maj;13 148 | Vlado;1299 149 | Mladen;1284 150 | Bogomir;1256 151 | Zdenko;1256 152 | Kristijan;1236 153 | Davor;1229 154 | Aleksandar;1209 155 | Tine;1208 156 | Karl;1171 157 | Stjepan;1136 158 | Sebastijan;1109 159 | Ernest;1104 160 | Maksimiljan;1089 161 | Ivo;1077 162 | Jasmin;1074 163 | Elvis;1051 164 | Rado;1037 165 | Avgust;1027 166 | Aleks;1008 167 | Lan;1 168 | Valter;998 169 | Jani;979 170 | Nenad;967 171 | Ervin;959 172 | Ignac;954 173 | Marcel;952 174 | Adolf;933 175 | Izidor;933 176 | Petar;921 177 | Metod;886 178 | Edin;885 179 | Ferdinand;866 180 | Renato;863 181 | Nikolaj;860 182 | Radovan;858 183 | Bruno;850 184 | Albert;837 185 | Nino;835 186 | Rene;822 187 | Senad;818 188 | Mirsad;809 189 | Joško;801 190 | Sergej;800 191 | Bernard;794 192 | Hasan;782 193 | Slobodan;776 194 | Rudi;773 195 | Samir;768 196 | Rafael;766 197 | Miro;759 198 | Feliks;755 199 | Bor;753 200 | Cvetko;747 -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/SomaliaFemaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Aasha 2 | Ambro 3 | Awa 4 | Cambro 5 | Cawo 6 | Faadumo 7 | Fawzia 8 | Haweeya 9 | Sahra 10 | Ubah 11 | Ubax -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/SomaliaMaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Ahmed 2 | Arale 3 | Awaale 4 | Axmed 5 | Caraale 6 | Guleed 7 | Gutaale 8 | Hussein 9 | Maxammed 10 | Maxamud 11 | Mohammad 12 | Muxumed 13 | Omar 14 | Waabberi 15 | Xusseen -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/SpainFemaleUTF8.csv: -------------------------------------------------------------------------------- 1 | alba;116277 2 | alejandra;69528 3 | alicia;120292 4 | amparo;109966 5 | ana belen;60841 6 | ana isabel;76535 7 | ana maria;281533 8 | ana;826040 9 | andrea;138140 10 | angela;133314 11 | angeles;333908 12 | anna;61518 13 | antonia;337408 14 | asuncion;84650 15 | aurora;71097 16 | beatriz;164063 17 | begoña;84102 18 | belen;149164 19 | blanca;67631 20 | carla;62667 21 | carmen;1280348 22 | carolina;88673 23 | catalina;77087 24 | celia;60889 25 | clara;63448 26 | claudia;87073 27 | concepcion;257132 28 | consuelo;86802 29 | cristina;334709 30 | daniela;54387 31 | dolores;542773 32 | elena;275596 33 | elisa;61034 34 | elvira;50351 35 | emilia;72920 36 | encarnacion;156150 37 | esperanza;76542 38 | esther;143931 39 | eugenia;61130 40 | eva maria;62835 41 | eva;160297 42 | fatima;68059 43 | francisca;295541 44 | gema;53141 45 | gloria;99088 46 | ines;89983 47 | inmaculada;127281 48 | irene;123122 49 | isabel;696367 50 | jesus;168291 51 | jose;213294 52 | josefa;455283 53 | josefina;60501 54 | juana;198143 55 | julia;146777 56 | laura;289716 57 | lidia;67974 58 | lorena;80684 59 | lourdes;92080 60 | lucia;213352 61 | luisa;262449 62 | luz;86861 63 | magdalena;69869 64 | manuela;183881 65 | mar;119016 66 | margarita;126709 67 | maria angeles;234432 68 | maria antonia;60529 69 | maria carmen;681108 70 | maria concepcion;63935 71 | maria cristina;46501 72 | maria dolores;273002 73 | maria elena;60020 74 | maria isabel;207464 75 | maria jesus;145275 76 | maria jose;207548 77 | maria josefa;93760 78 | maria luisa;174308 79 | maria mar;100228 80 | maria mercedes;77338 81 | maria nieves;54352 82 | maria pilar;271733 83 | maria rosa;66784 84 | maria rosario;83246 85 | maria soledad;47952 86 | maria teresa;263124 87 | maria victoria;64998 88 | maria;6424495 89 | marina;114441 90 | marta;246179 91 | mercedes;262473 92 | milagros;76928 93 | miriam;66979 94 | monica;127549 95 | montserrat;139860 96 | natalia;102174 97 | nerea;61723 98 | nieves;100237 99 | noelia;81255 100 | nuria;135329 101 | olga;74131 102 | patricia;161423 103 | paula;168339 104 | pilar;494829 105 | purificacion;59076 106 | raquel;171636 107 | remedios;55766 108 | rocio;155782 109 | rosa maria;149558 110 | rosa;424271 111 | rosario;247785 112 | sandra;125340 113 | sara;170789 114 | silvia;142816 115 | sofia;69063 116 | soledad;94461 117 | sonia;122889 118 | susana;118570 119 | teresa;447427 120 | trinidad;55377 121 | vanesa;54479 122 | veronica;91607 123 | vicenta;53483 124 | victoria;158315 125 | virginia;57975 126 | yolanda;124747 127 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/SpainMaleUTF8.csv: -------------------------------------------------------------------------------- 1 | adolfo;37524 2 | adrian;161860 3 | agustin;92646 4 | aitor;46700 5 | albert;40397 6 | alberto;297720 7 | alejandro;302909 8 | alex;37395 9 | alexander;35152 10 | alfonso;130213 11 | alfredo;69219 12 | alvaro;152567 13 | andres;204352 14 | angel;658510 15 | antonio;1543089 16 | arturo;43433 17 | benito;33908 18 | borja;37814 19 | carlos;644525 20 | carmelo;30605 21 | cesar;78002 22 | christian;32887 23 | cristian;76628 24 | cristobal;41555 25 | daniel;355276 26 | david;450141 27 | diego;182521 28 | domingo;76083 29 | eduardo;163432 30 | emilio;118841 31 | enrique;221135 32 | ernesto;34026 33 | esteban;50271 34 | eugenio;44627 35 | felipe;70400 36 | felix;91800 37 | fernando;308528 38 | francisco;1236386 39 | francisco javier;290742 40 | francisco jose;95934 41 | gabriel;114588 42 | gerardo;30632 43 | german;36366 44 | gonzalo;72787 45 | gregorio;51721 46 | guillermo;89757 47 | gustavo;31438 48 | hector;53610 49 | hugo;58569 50 | ignacio;183238 51 | iker;34703 52 | isaac;30631 53 | ismael;60846 54 | ivan;149838 55 | jaime;119905 56 | javier;739445 57 | jesus;586238 58 | joan;77595 59 | joaquin;165703 60 | john;35009 61 | jonathan;34502 62 | jordi;87852 63 | jorge;257708 64 | jose;2925472 65 | jose angel;41288 66 | jose antonio;323063 67 | jose carlos;46192 68 | jose francisco;35804 69 | jose ignacio;40801 70 | jose luis;312302 71 | jose manuel;250869 72 | jose maria;225968 73 | jose miguel;66157 74 | jose ramon;62330 75 | josep;87000 76 | juan;1394890 77 | juan antonio;139865 78 | juan carlos;163917 79 | juan francisco;48767 80 | juan jose;160579 81 | juan luis;35393 82 | juan manuel;116759 83 | julian;100043 84 | julio;117541 85 | lorenzo;46453 86 | lucas;34262 87 | luis;864099 88 | luis miguel;37573 89 | manuel;1368786 90 | marc;62713 91 | marco;33793 92 | marcos;99765 93 | maria;341416 94 | mariano;67690 95 | mario;110462 96 | martin;69973 97 | miguel;723316 98 | miguel angel;229902 99 | mohamed;72394 100 | nicolas;69125 101 | oscar;152371 102 | pablo;265001 103 | pedro;427974 104 | rafael;313636 105 | ramon;280915 106 | raul;161245 107 | ricardo;119085 108 | roberto;130565 109 | rodrigo;42483 110 | ruben;140497 111 | salvador;111412 112 | samuel;55995 113 | santiago;145918 114 | sebastian;69021 115 | sergio;218478 116 | tomas;99312 117 | valentin;39824 118 | vicente;223809 119 | victor;182743 120 | victor manuel;39737 121 | xavier;47161 -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/SwedenLastNames.csv: -------------------------------------------------------------------------------- 1 | Johansson 2 | Andersson 3 | Karlsson 4 | Nilsson 5 | Eriksson 6 | Larsson 7 | Olsson 8 | Persson 9 | Svensson 10 | Gustafsson 11 | Pettersson 12 | Jonsson 13 | Jansson 14 | Hansson 15 | Bengtsson 16 | Jönsson 17 | Lindberg 18 | Jakobsson 19 | Magnusson 20 | Olofsson 21 | Lindström 22 | Lindqvist 23 | Lindgren 24 | Axelsson 25 | Berg 26 | Lundberg 27 | Bergström 28 | Lundgren 29 | Mattsson 30 | Lundqvist 31 | Lind 32 | Berglund 33 | Fredriksson 34 | Sandberg 35 | Henriksson 36 | Forsberg 37 | Sjöberg 38 | Danielsson 39 | Håkansson 40 | Wallin 41 | Engström 42 | Eklund 43 | Lundin 44 | Gunnarsson 45 | Fransson 46 | Samuelsson 47 | Holm 48 | Bergman 49 | Björk 50 | Wikström 51 | Isaksson 52 | Bergqvist 53 | Arvidsson 54 | Nyström 55 | Holmberg 56 | Löfgren 57 | Claesson 58 | Söderberg 59 | Nyberg 60 | Blomqvist 61 | Mårtensson 62 | Nordström 63 | Lundström 64 | Pålsson 65 | Eliasson 66 | Björklund 67 | Viklund 68 | Berggren 69 | Sandström 70 | Nordin 71 | Lund 72 | Ström 73 | Hermansson 74 | Åberg 75 | Ekström 76 | Holmgren 77 | Sundberg 78 | Hedlund 79 | Dahlberg 80 | Hellström 81 | Sjögren 82 | Abrahamsson 83 | Martinsson 84 | Andreasson 85 | Falk 86 | Öberg 87 | Månsson 88 | Blom 89 | Ek 90 | Åkesson 91 | Strömberg 92 | Jonasson 93 | Norberg 94 | Hansen 95 | Sundström 96 | Åström 97 | Holmqvist 98 | Ivarsson 99 | Lindholm 100 | Sundqvist -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/TurkeyFemaleUTF8.csv: -------------------------------------------------------------------------------- 1 | ADA 2 | AFET 3 | AĞIT 4 | AHENK 5 | AHU 6 | AJLAN 7 | AKARSU 8 | AKASYA 9 | AKSU 10 | ALBA 11 | ALEV 12 | ALGIN 13 | ALPİKE 14 | ALTIN 15 | ARYA 16 | ARZU 17 | ASENA 18 | ASLI 19 | ASU 20 | ASUMAN 21 | ASYA 22 | ATLAS 23 | AYBİKE 24 | AYBİRGEN 25 | AYÇA 26 | AYÇİÇEK 27 | AYDA 28 | AYDAN 29 | AYEVİ 30 | AYKIZ 31 | AYLA 32 | AYLİN 33 | AYSAR 34 | AYSIN 35 | AYSU 36 | AYŞE 37 | AYŞEGÜL 38 | AYŞENUR 39 | BADE 40 | BAĞLAN 41 | BAHAR 42 | BAKLAN 43 | BALA 44 | BALKIN 45 | BALKIZ 46 | BANU 47 | BAŞAK 48 | BEGÜM 49 | BELDE 50 | BELEN 51 | BELEN 52 | BELGİN 53 | BELİZ 54 | BENAN 55 | BENEK 56 | BENGİ 57 | BENİZ 58 | BERGÜZAR 59 | BERİA 60 | BERİL 61 | BERNA 62 | BERRAK 63 | BERRAN 64 | BESİSU 65 | BESTE 66 | BESTENİGAR 67 | BETÜL 68 | BEYZA 69 | BİKE 70 | BİLGE 71 | BİLGÜN 72 | BİLHAN 73 | BİLLUR 74 | BİRİCİK 75 | BUĞDAY 76 | BUKET 77 | BURCU 78 | BURÇAK 79 | BURÇİN 80 | BUSE 81 | BÜKÜM 82 | BÜŞRA 83 | CANA 84 | CANAN 85 | CANDAN 86 | CANFEZA 87 | CANKIZ 88 | CANOVA 89 | CANSU  90 | CEMRE 91 | CEREN 92 | CEVZA 93 | CEYDA 94 | CEYLAN 95 | ÇAĞLA 96 | ÇAKIL 97 | ÇİÇEK 98 | ÇİĞDEM 99 | ÇIĞLIK 100 | ÇİLEK 101 | ÇİLER 102 | ÇİM 103 | ÇİMEN 104 | ÇİSE-M 105 | ÇİSİL 106 | ÇOLPAN 107 | DAMLA 108 | DEFNE 109 | DEMET 110 | DEMRE 111 | DENİZ 112 | DERYA 113 | DESEN 114 | DESTEGÜL 115 | DEVİN 116 | DİCLE 117 | DİDEM 118 | DİLARA 119 | DİLAY 120 | DİLEK 121 | DİLEM 122 | DİLNİŞİN 123 | DİLRÜBA 124 | DİLSU 125 | DİLŞAH 126 | DOLUNAY 127 | DUYGU 128 | EBRU 129 | ECE 130 | ECMEL 131 | EDA 132 | EGE 133 | ELÇİN 134 | ELİF 135 | ELVAN 136 | ESEN 137 | ESİN 138 | ESNA 139 | ESRA 140 | ETİ 141 | EVİN 142 | EYLÜL 143 | EZGİ 144 | FERAH 145 | FERAY 146 | FERDA 147 | FEYZA 148 | FİDAN 149 | FİGEN 150 | FİRUZE 151 | FULYA 152 | FUNDA 153 | FÜRUZAN 154 | FÜSUN 155 | GAMZE 156 | GAYE 157 | GECE 158 | GELİNCİK 159 | GERÇEK 160 | GİZEM 161 | GONCA 162 | GÖKÇE 163 | GÖKSU  164 | GÖLGE-N 165 | GÖZDE 166 | GÖZEN 167 | GÜHER 168 | GÜLBAHAR 169 | GÜLÇİN 170 | GÜLFEM 171 | GÜLGÜN 172 | GÜLİSTAN 173 | GÜLİZ 174 | GÜLİZAR 175 | GÜLRİZ 176 | GÜLŞAH * 177 | GÜL-ÜM 178 | GÜLÜMSE 179 | GÜNÇİÇEK 180 | GÜVERCİN 181 | GÜZ 182 | GÜZEL 183 | GÜZİN 184 | HANDAN 185 | HARİKA 186 | HASLET 187 | HAYAL 188 | HAZAL 189 | HAZAN 190 | HAZAR 191 | HAZİRAN 192 | HECE 193 | HEVES 194 | HİLAL 195 | HOŞSEDA 196 | HÜLYA 197 | HÜMA 198 | HÜMEYRA 199 | HÜNER 200 | HÜRREM 201 | HÜSNA 202 | HÜSÜN 203 | İDİL 204 | İLAYDA 205 | ILGAZ 206 | ILGIM 207 | ILGIN 208 | İLGÜN 209 | İLKBAHAR 210 | İLKE 211 | İLKYAZ 212 | İLSU 213 | İLTER 214 | İMGE 215 | İMREN 216 | İNCİ 217 | İNCİLAY 218 | İPAR 219 | İPEK 220 | IRAZ-CA * 221 | İREM 222 | İRİS 223 | IRMAK 224 | IŞIK 225 | IŞIL 226 | IŞILAY 227 | IŞIN 228 | ITIR 229 | İYEM 230 | İZEL 231 | İZEM 232 | İZGİ 233 | İZ-İM 234 | KAMELYA 235 | KARDELEN 236 | KELEBEK 237 | KİMYA 238 | KÖSEM 239 | KUĞU 240 | KUMRU 241 | KUMSAL 242 | KUTAY 243 | KUTSAL 244 | LAL 245 | LALE 246 | LERZAN 247 | LEYLA 248 | LEYLİFER 249 | LİLA 250 | MANOLYA 251 | MARAL 252 | MAVİSU 253 | MEHTAP 254 | MEHVEŞ 255 | MELDA 256 | MELİKE 257 | MELİS 258 | MELİSA 259 | MELODİ 260 | MENEKŞE 261 | MENEVİŞ 262 | MERAL 263 | MERCAN 264 | MERİH 265 | MERVE 266 | MEVSİM 267 | MİMOZA 268 | MİNE 269 | MÜGE 270 | NAĞME 271 | NAZ 272 | NAZLI-M 273 | NEHİR 274 | NERGİS 275 | NESLİŞAH 276 | NESRİN 277 | NEŞE-M 278 | NEVAL 279 | NEVBAHAR 280 | NEVESER 281 | NEVGECE 282 | NEVGÜL 283 | NEVRA 284 | NEYİR 285 | NİGAR 286 | NİHAL 287 | NİHAN 288 | NİL 289 | NİLÜFER 290 | NİSAN 291 | NURGÜL 292 | NURGÜN 293 | NURSELİ 294 | NÜKET 295 | NÜKHET 296 | NÜKTE 297 | OYA 298 | OYLUM 299 | ÖDÜL 300 | ÖRGÜN 301 | ÖVGÜ 302 | ÖYKÜ 303 | ÖZEN 304 | ÖZGE 305 | ÖZLEM 306 | PAMİRA 307 | PAPATYA 308 | PELİN 309 | PERA 310 | PERÇEM 311 | PERİ 312 | PERRAN 313 | PETEK 314 | PINAR 315 | PIRIL 316 | PIRILTI 317 | PITIRCIK 318 | PİYALE 319 | RANA 320 | RENAN 321 | RENGİN 322 | REZZAN 323 | RUHSAR 324 | RÜÇHAN 325 | RÜYA 326 | SABA 327 | SABAH 328 | SADBERK 329 | SAHİL 330 | SAHRA 331 | SALKIM 332 | SANEM 333 | SAYGIN 334 | SAYIL 335 | SEBİL 336 | SEBLA 337 | SEÇİL 338 | SEÇKİN 339 | SEDA 340 | SEDEF 341 | SEDEN 342 | SEHER 343 | SEL 344 | SELDA 345 | SELEN 346 | SELİN 347 | SELİNTİ 348 | SELİS 349 | SELMİN 350 | SELVİ 351 | SEMA 352 | SEMİRAMİS 353 | SENA 354 | SEREN 355 | SERENAT 356 | SERRA 357 | SERTAP 358 | SERVİ 359 | SES 360 | SEVDEM 361 | SEVEN 362 | SEVGİLİ 363 | SEVİ 364 | SEVİL 365 | SEVİNÇ 366 | SEYYAL 367 | SEZEN 368 | SEZGİ 369 | SİBEL 370 | SILA 371 | SİM 372 | SİMA 373 | SİMGE 374 | SİMİN 375 | SİMYA 376 | SİNE-M 377 | SİREN 378 | SİRET 379 | SIRMA 380 | SONYAZ 381 | SU 382 | SUMRU 383 | SUNA 384 | SÜLÜN 385 | SÜNDÜS 386 | SÜSEN 387 | ŞAHBANU 388 | ŞAHİKA 389 | ŞAN 390 | ŞANS 391 | ŞAYESTE 392 | ŞEBBOY 393 | ŞEBNEM 394 | ŞEHNAZ 395 | ŞEHRAZAT 396 | ŞELALE 397 | ŞERMİN 398 | ŞEVVAL 399 | ŞEYDA 400 | ŞİİR 401 | ŞİMAL 402 | ŞİRİN 403 | ŞÖLEN 404 | ŞÖLEN-DE 405 | ŞULE 406 | TAMAR 407 | TANGO 408 | TANYELİ 409 | TİLBE 410 | TILSIM 411 | TOMRİS 412 | TÖREN 413 | TUĞBA 414 | TUĞÇE 415 | TULU 416 | TUTKU 417 | TUTYA 418 | TÜLİN 419 | TÜMAY 420 | TÜRKUVAZ 421 | TÜRKÜ 422 | TÜVANA 423 | UMAY 424 | ÜLGEN 425 | ÜLGER 426 | ÜLKER 427 | ÜLKÜ-M 428 | ÜRÜN 429 | ÜVERCİNKA 430 | ÜZÜM 431 | VENÜS 432 | VERDA 433 | VERDİNAZ 434 | VİLDAN 435 | VUSLAT 436 | YAĞMUR 437 | YANKI 438 | YAPRAK 439 | YAR 440 | YAREN 441 | YASEMİN 442 | YAZGI 443 | YAZGÜLÜ 444 | YELDA 445 | YELİZ 446 | YENİAY 447 | YEŞER 448 | YEŞİM 449 | YILDIZ 450 | YONCA 451 | YOSUN 452 | YÖRÜK 453 | YURDAGÜL 454 | YURDANUR 455 | ZEREN 456 | ZERRİN 457 | ZEYNEP 458 | ZEYNO 459 | ZUHAL 460 | ZUHAL 461 | ZÜLAL 462 | ZÜLAL 463 | ZÜLEYHA 464 | ZÜLEYHA 465 | ZÜLÜF 466 | ZÜLÜF 467 | ZÜMRA 468 | ZÜMRA 469 | ZÜMRÜT 470 | ZÜMRÜT 471 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/TurkeyMaleUTF8.csv: -------------------------------------------------------------------------------- 1 | ACAR 2 | AFŞAR 3 | AFŞİN 4 | AHSEN 5 | AKIN 6 | ALAZ 7 | ALEMDAR 8 | ALİ 9 | ALİCAN 10 | ALİHAN 11 | ALİNUR 12 | ALİZE 13 | ALP 14 | ALPAY 15 | ALTAN 16 | AMİL 17 | ANDAÇ 18 | ANGIN 19 | ANIL 20 | ARAS 21 | ARDA 22 | ARDIÇ 23 | ARMAĞAN 24 | ARUZ 25 | ATA 26 | ATABEK 27 | ATAÇ 28 | ATAOL 29 | ATEŞ 30 | ATİK 31 | ATIL 32 | ATILGAN 33 | ATİLLA 34 | ATOM 35 | AYAZ 36 | AYBARS 37 | AYBERK 38 | AYDEMİR 39 | AYERDEM 40 | AYKON 41 | AYKUT 42 | AYTAÇ 43 | AYTEK 44 | AYTUĞ 45 | AYVAZ 46 | BABÜR 47 | BAHA 48 | BAHADIR 49 | BALKAN 50 | BALKAR 51 | BALKIR 52 | BARAN 53 | BARBAROS 54 | BARIŞ 55 | BAŞAR 56 | BATU 57 | BATUR 58 | BAYKAL 59 | BAYÜLKEN 60 | BERAT 61 | BEREKET 62 | BERK 63 | BERKAY 64 | BERKE 65 | BESİM 66 | BETİM 67 | BİLGİN 68 | BİRKAN 69 | BORA 70 | BUĞRA 71 | BULUT 72 | BURAĞAN 73 | BURAK 74 | BURÇAK 75 | BURKHAN 76 | CAN 77 | CEM 78 | CENK 79 | CESUR 80 | CEYHAN 81 | CÖMERT 82 | CUMHUR 83 | CÜNEYT 84 | ÇAĞAN 85 | ÇAĞATAY 86 | ÇAĞDAŞ 87 | ÇAĞLAR 88 | ÇAĞLAYAN 89 | ÇAĞRI 90 | ÇAKABEY 91 | ÇAKIR 92 | ÇELİK 93 | ÇETİN 94 | ÇEVİK 95 | ÇINAR 96 | ÇIVGIN 97 | DAĞHAN 98 | DEĞER 99 | DEMİR 100 | DENİZ 101 | DENİZHAN 102 | DERİN 103 | DERMAN 104 | DESTAN 105 | DEVRİM 106 | DİNÇ 107 | DİRİM 108 | DOĞAÇ 109 | DORUK 110 | DUMAN 111 | DURU 112 | DURUL 113 | DÜNYA 114 | ECEVİT 115 | EDİZ 116 | EFE 117 | EFLATUN 118 | EFSUN 119 | EGE 120 | EGEMEN 121 | EKİM 122 | EKİN 123 | ELÇİ 124 | ELGİN 125 | ELHAN 126 | EMİR 127 | EMRAH 128 | EMRE 129 | EMRİ 130 | ENGİN 131 | ENGİNSU 132 | ENİS 133 | ERAN 134 | ERDEM 135 | EREN 136 | ERGİN 137 | ERGUVAN 138 | ERİM 139 | ERK 140 | ERKİN 141 | ERKSİN 142 | ERTUNÇ 143 | ERTUNGA 144 | ESER 145 | ETKİN 146 | EVREN 147 | EVRENSEL 148 | EYLEM 149 | FERHAN 150 | FERHAT 151 | FEYEZAN 152 | FEYYAZ 153 | FEZA 154 | FIRAT 155 | FURKAN 156 | GİRAY 157 | GÖKADA 158 | GÖKALP 159 | GÖKBERK 160 | GÖKHAN 161 | GÖKMEN 162 | GÖKOVA 163 | GÖKSEL 164 | GÖKSENİN 165 | GÖKTÜRK 166 | GÖNENÇ 167 | GÖRKEM 168 | GURUR 169 | GÜN 170 | GÜNEŞ 171 | GÜR 172 | GÜVEN 173 | GÜVENÇ 174 | HAKAN 175 | HALİÇ 176 | HALUK 177 | HARUN 178 | HAŞMET 179 | HAYAT 180 | HINCAL 181 | HİSAR 182 | HİTİT 183 | HÜR 184 | HÜRRİYET 185 | İKLİM 186 | İLBAY 187 | ILGAR 188 | İLGİ 189 | İLHAM 190 | İLHAN 191 | İNAN 192 | İNANÇ 193 | IRAK 194 | İSFENDİYAR 195 | İSKENDER 196 | İSTEMİHAN 197 | İZGÜ 198 | KAAN 199 | KAHRAMAN 200 | KAMER 201 | KANAT 202 | KANDEMİR 203 | KARTAL 204 | KAYA 205 | KAYIHAN 206 | KAYNAK 207 | KEREM 208 | KERİM 209 | KILIÇ 210 | KIVANÇ 211 | KIVILCIM 212 | KORAL 213 | KORAY 214 | KORHAN 215 | KORKUT 216 | KÖKSAL 217 | KUBİLAY 218 | KUDAY 219 | KUDRET 220 | KURTHAN 221 | KURTULUŞ 222 | KURULTAY 223 | KUTAN 224 | KUTLU 225 | KUTLUKHAN 226 | KÜRŞAD 227 | LACİN 228 | LADİN 229 | LEDÜN 230 | LEVENT 231 | LİDER 232 | LİRİK 233 | MARTI 234 | MECNUN 235 | MELİH 236 | MELİK 237 | MENGÜ 238 | MERİÇ 239 | MERT 240 | METE 241 | MEVZUN 242 | MİRALAY 243 | MURAT 244 | MUTLU 245 | NEDİM 246 | ODAK 247 | ODKAN 248 | OGÜN 249 | OĞRUN 250 | OĞUL 251 | OKTAR 252 | OLAY 253 | OLCAYTO 254 | OLGU 255 | OLGUN 256 | ONAT 257 | ONGUN 258 | ONUR 259 | ORÇUN 260 | ORKUN 261 | OTAĞ 262 | OVA 263 | OYTUN 264 | OZAN 265 | ÖCAL 266 | ÖĞÜT 267 | ÖKTEN 268 | ÖMER 269 | ÖMÜR 270 | ÖNAL 271 | ÖNCEL 272 | ÖNCÜ 273 | ÖNDER 274 | ÖNER 275 | ÖNEY 276 | ÖNSEL 277 | ÖREN 278 | ÖRSAN 279 | ÖZGÜN 280 | ÖZGÜR 281 | PAMİR 282 | PARS 283 | PELİT 284 | POYRAZ 285 | REHA 286 | REVAN 287 | RÜZGAR 288 | SAĞANAK 289 | SANAT 290 | SANCAK 291 | SARP 292 | SARPER 293 | SATVET 294 | SAVAŞ 295 | SAYGIN 296 | SELİM 297 | SELMAN 298 | SEMEN 299 | SEMİH 300 | SERCAN  301 | SERDAR 302 | SERETAN 303 | SERGEN 304 | SERHAN 305 | SERHAT 306 | SERKAN 307 | SERTAÇ 308 | SERTUĞ 309 | SEZA 310 | SİNAN 311 | SİPAHİ 312 | SİPER 313 | SOKRA 314 | SONAT 315 | SORGUN 316 | SÖĞÜT 317 | SÖYLEM 318 | SÖZ 319 | SUNAY 320 | SUNGUN 321 | SUNGUR 322 | SÜAVİ 323 | SÜER 324 | SÜERDEM 325 | SÜHA 326 | SÜHEYL 327 | SÜMER 328 | SÜREYYA 329 | ŞAHİN 330 | ŞAN 331 | ŞANSAL 332 | ŞARIK 333 | ŞEHMUZ 334 | ŞEN....... 335 | ŞİRZAT 336 | TAN 337 | TANSEL  338 | TARIK 339 | TARKAN 340 | TAYFUN 341 | TAYGA 342 | TAYLAN 343 | TİBET 344 | TINAZ 345 | TOKTAMIŞ 346 | TOLGA 347 | TOLUN 348 | TONGUÇ 349 | TOPRAK 350 | TORAMAN 351 | TOYGAR 352 | TOYGUN 353 | TÖRE 354 | TÖZ 355 | TUFAN 356 | TUGAY 357 | TUNA 358 | TUNCA 359 | TUNÇ 360 | TÜMER  361 | UFUK 362 | UĞUR 363 | ULUBEY 364 | ULUÇ 365 | ULUĞ 366 | ULUM 367 | UMAR 368 | UMUR 369 | UMUT 370 | UNAN 371 | URAĞAN 372 | URAN 373 | URAY 374 | UTARİT 375 | UTKU 376 | UYGAR 377 | UYGUR 378 | UZAY 379 | ÜLKE 380 | ÜNAL  381 | ÜRÜN 382 | VADİ 383 | VERİM 384 | VOLKAN 385 | YAĞIZ 386 | YALGIN 387 | YALIM 388 | YALIN 389 | YALMAN 390 | YALVAÇ 391 | YAMAÇ 392 | YAMAN 393 | YARDAN  394 | YARIN 395 | YASİN 396 | YAVER 397 | YEKTA 398 | YETKİN 399 | YİĞİT 400 | YILAYDIN 401 | YÜCE 402 | ZAMAN 403 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/VietnamFemaleUTF8.csv: -------------------------------------------------------------------------------- 1 | Anh 2 | Bich 3 | Chau 4 | Diep 5 | Dung 6 | Hang 7 | Hanh 8 | Hoa 9 | Hong 10 | Khanh 11 | Kim 12 | Lan 13 | Liem 14 | Lien 15 | Mai 16 | Mong 17 | My 18 | Nguyet 19 | Nhung 20 | Phuong 21 | Quynh 22 | Thi 23 | Thoa 24 | Thu 25 | Trinh 26 | Tuyet 27 | Uyen 28 | Xuan 29 | Yen -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/VietnamMaleUTF8.csv: -------------------------------------------------------------------------------- 1 | An 2 | Anh 3 | Ba 4 | Bang 5 | Bao 6 | Bay 7 | Canh 8 | Ching 9 | Cong 10 | Cung 11 | Dat 12 | Dong 13 | Dung 14 | Gia 15 | Giang 16 | Gon 17 | Ha 18 | Hai 19 | Hanh 20 | Hieh 21 | Hiep 22 | Hoa 23 | Hoai 24 | Hoang 25 | Hoi 26 | Hong 27 | Hung 28 | Huynh 29 | Khanh 30 | Khoa 31 | Khoi 32 | Ky 33 | Lai 34 | Lan 35 | Liem 36 | Linh 37 | Loc 38 | Long 39 | Luc 40 | Luu 41 | Luy 42 | Man 43 | Manh 44 | Minh 45 | Nam 46 | Nghia 47 | Ngoc 48 | Nguyen 49 | Nho 50 | Nhung 51 | Ninh 52 | On 53 | Phat 54 | Phu 55 | Phuc 56 | Phung 57 | Qua 58 | Quan 59 | Quang 60 | Quy 61 | Quyen 62 | Sang 63 | Sau 64 | Son 65 | Tam 66 | Tan 67 | Tao 68 | Thanh 69 | Thao 70 | Thien 71 | Thoi 72 | Thuan 73 | Tion 74 | Toan 75 | Tru 76 | Truc 77 | Trung 78 | Tu 79 | Tuan 80 | Tung 81 | Tuong 82 | Tuyen 83 | Ut 84 | Van 85 | Vang 86 | Vien 87 | Viet 88 | Vinh 89 | Vuand 90 | Xuan 91 | Xuong -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/countryStats.csv: -------------------------------------------------------------------------------- 1 | USA;69911 2 | India;23323 3 | UK;17849 4 | Canada;9190 5 | Germany;7853 6 | Australia;6429 7 | China;5552 8 | France;4632 9 | Russia;4580 10 | The Netherlands;4194 11 | Brazil;4018 12 | Sweden;3873 13 | Italy;2837 14 | Spain;2828 15 | Poland;2658 16 | Ukraine;2298 17 | Denmark;2053 18 | Belgium;2022 19 | Turkey;1910 20 | Norway;1758 21 | Switzerland;1745 22 | Romania;1635 23 | South Africa;1611 24 | Pakistan;1535 25 | Israel;1518 26 | New Zealand;1507 27 | Ireland;1339 28 | Japan;1259 29 | Argentina;1258 30 | Portugal;1255 31 | Mexico;1251 32 | Austria;1150 33 | Czech;1134 34 | Indonesia;1102 35 | Singapore;1098 36 | Finland;1086 37 | Iran;1004 38 | Philippines;960 39 | Greece;879 40 | Hungary;791 41 | South Korea;728 42 | Egypt;725 43 | Bangladesh;724 44 | Vietnam;691 45 | Bulgaria;674 46 | Belarus;669 47 | Taiwan;592 48 | Sri Lanka;576 49 | Malaysia;570 50 | Serbia;554 51 | Thailand;500 52 | Croatia;473 53 | Hong Kong;410 54 | Colombia;410 55 | Lithuania;387 56 | Chile;377 57 | Slovenia;347 58 | Latvia;338 59 | Slovakia;327 60 | Estonia;295 61 | Nepal;287 62 | Venezuela;275 63 | Kenya;270 64 | Uruguay;240 65 | United Arab Emirates;222 66 | Jordan;218 67 | Peru;214 68 | Nigeria;210 69 | Georgia;202 70 | Iceland;193 71 | Costa Rica;175 72 | Tunisia;172 73 | Macedonia (FYROM);159 74 | Lebanon;145 75 | Saudi Arabia;143 76 | Morocco;134 77 | Dominican Republic;132 78 | Syria;131 79 | Moldova;129 80 | Armenia;125 81 | Malta;118 82 | Bosnia and Herzegovina;111 83 | Kazakhstan;103 84 | Netherlands;102 85 | Ecuador;97 86 | Puerto Rico;89 87 | Guatemala;86 88 | Algeria;83 89 | Cyprus;81 90 | Cambodia;80 91 | Paraguay;77 92 | Bolivia;75 93 | Kuwait;69 94 | Uganda;68 95 | Ghana;66 96 | Palestine;63 97 | Luxembourg;63 98 | Burma;61 99 | Azerbaijan;59 100 | Cuba;57 101 | Uzbekistan;56 102 | Panama;54 103 | Jamaica;44 104 | Mauritius;44 105 | Bahrain;43 106 | El Salvador;43 107 | Myanmar;43 108 | Kyrgyzstan;38 109 | Ethiopia;37 110 | Albania;35 111 | Mongolia;33 112 | Qatar;33 113 | Trinidad and Tobago;33 114 | Nicaragua;31 115 | Iraq;29 116 | Honduras;28 117 | Tanzania;26 118 | Afghanistan;24 119 | Namibia;22 120 | Zimbabwe;21 121 | Oman;19 122 | Kosovo;19 123 | Madagascar;19 124 | Yemen;18 125 | Senegal;16 126 | Bermuda;15 127 | Sudan;15 128 | Maldives;15 129 | Montenegro;14 130 | Isle of Man;14 131 | Zambia;14 132 | Botswana;14 133 | Netherlands Antilles;13 134 | Micronesia;13 135 | Macau;12 136 | Reunion;12 137 | Faroe Islands;11 138 | Belize;11 139 | Cameroon;10 140 | Benin;9 141 | Libya;9 142 | Brunei Darussalam;9 143 | Mozambique;9 144 | Fiji;9 145 | US Virgin Islands;8 146 | Mali;7 147 | Gibraltar;7 148 | The Gambia;6 149 | Burkina Faso;6 150 | Malawi;6 151 | Barbados;6 152 | New Caledonia;6 153 | The Bahamas;5 154 | Greenland;5 155 | Somalia;5 156 | Cayman Islands;5 157 | Côte d'Ivoire;5 158 | Swaziland;4 159 | San Marino;4 160 | Andorra;4 161 | Borneo;4 162 | Liechtenstein;4 163 | St Lucia;4 164 | Guadeloupe;4 165 | Turkmenistan;3 166 | Aruba;3 167 | Laos;3 168 | Monaco;3 169 | Guam;3 170 | Lesotho;3 171 | Tajikistan;3 172 | Seychelles;3 173 | Angola;3 174 | Guyana;3 175 | Papua New Guinea;3 176 | Congo;3 177 | Liberia;2 178 | Solomon Islands;2 179 | Vanuatu;2 180 | Togo;2 181 | French Polynesia;2 182 | Cape Verde;2 183 | Bhutan;2 184 | Haiti;2 185 | Svalbard and Jan Mayen;2 186 | Rwanda;2 187 | Suriname;2 188 | Mauritania;2 189 | Guinea;2 190 | Vatican;1 191 | American Samoa;1 192 | Samoa;1 193 | Equatorial Guinea;1 194 | Norfolk Island;1 195 | Christmas Island;1 196 | Niger;1 197 | Tuvalu;1 198 | Sierra Leone;1 199 | St Vincent and the Grenadines;1 200 | Djibouti;1 201 | Antigua and Barbuda;1 202 | Aland Islands;1 203 | Martinique;1 204 | North Korea;1 205 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/diminutives.csv: -------------------------------------------------------------------------------- 1 | abigail;nabby;abby;gail 2 | abraham;abe 3 | adelaida;ida;idly 4 | alan;al 5 | alastair;al;alex 6 | albert;al 7 | alexander;alex;lex;xander;sander;sandy 8 | alexandra;alex;ali;lexie;sandy 9 | alfred;al 10 | alfred;alf;alfie;fred;fredo. 11 | aloysius;lou;lewie 12 | amanda;mandy;mand 13 | andrew;andy;drew 14 | andrew;andy;drew;android;androo 15 | angus;gus 16 | anne;nan;nancy 17 | anthony;ant;tony 18 | antoinette;toni;netta 19 | arnold;arnie 20 | arthur;art 21 | augustine;gus;gussy 22 | augustus;gussie 23 | barbara;barb;barbie;babs;bobbie 24 | barnaby;barney 25 | bartholomew;bart;barty 26 | benjamin;ben;benji;benny 27 | beverley;bev 28 | bonnie;bunnie;benny;boner;bon 29 | bridget;biddy;bridey 30 | calvin;cal 31 | carolyn;carol;lyn 32 | cassandra;cassie;cass;sandra;sandy 33 | catherine;cath;cate;kate;cathy;cassie;katie 34 | charles;charlie;chuck;chas;chazza 35 | charles;kori;chuck;chaz;chic;charley 36 | christine;chris;christy;chrissy;chrissie;tina 37 | christopher;chris;topher;kit;chrissy 38 | clayton;clay 39 | daniel;dan;danny 40 | daniel;dan 41 | david;dave;davey 42 | david;dave;davey;davie;div;dav 43 | deborah;deb;debbie 44 | dolores;dee 45 | dominic;dom 46 | dominick;dom;nick;nicky 47 | donald;don;donny 48 | dorothy;dot;dottie 49 | douglas;doug 50 | dwight;ike 51 | edmund;ed 52 | edmund;ed;eddy;eddie;ned;neddie;ted;teddy 53 | edward;ed 54 | edward;ed;eddy;eddie;ned;neddie;ted;teddy 55 | edward;eddy;ted;ed 56 | edwin;ed 57 | edwin;ed;eddy;eddie;ned;neddie;ted;teddy 58 | elaine;lainie 59 | eleanor;ellie;nell;nellie;ella;elle;nora 60 | ellen;nell;nellie ,ellie 61 | elizabeth;bess;bessie;bette;bet;betty;beth;betsy;eliza;elise;elsa;elsie;elle;ella;lisa;lisbeth;lissie;lily;libby;liddy;lizbeth;lizzie;liz;liza;lilibet 62 | elizabeth;beth;betsy;betty;eliza;liz;lisa;lizzie;libby 63 | emily;em;emmy;emma;milly 64 | ethel;eth 65 | eugene;gene;gen 66 | frances;fran;franny;fanny 67 | francesca;fran;franny;fanny 68 | francesco;fran;frank;frankie 69 | francis;fran;frank;frankie 70 | franklin;frank 71 | frederick;fred;freddy;rick;fritz 72 | garfield;gal;gary;garry 73 | geoffrey;geoff 74 | geoffrey;geoff;jeff 75 | geoffrey;jeff;geoff 76 | gerald;gerry;jerry 77 | gertrude;gert;gertie 78 | gregory;greg 79 | gustav;gus 80 | harold;harry;hal 81 | helen;nell;nellie;eleni 82 | henry;hank;hal;harry 83 | herbert;herb;herbie;bert 84 | hilary;hil;hilly 85 | howard;howie 86 | isabella;izzy;isa;bella;bell 87 | jacob;jake 88 | james;jim;jimmy;jamie;jimbo;jambo 89 | jane;jan 90 | janet;jan 91 | janice;jan 92 | jason;jay 93 | jeffrey;jeff;geoff 94 | jennifer;jen;jenny 95 | jeremiah;jeremy;jerry 96 | jeremiah;jem;jey;jez 97 | jeremy;jem;jerry 98 | jeremy;jem;jey;jez 99 | jerome;jerry 100 | jessica;jess;jessie 101 | joel;joe;joey 102 | john;johnny;jack;jackie 103 | john;jack;jock;johnny 104 | jordan;judd;jordy 105 | joseph;joe;joey 106 | josephine;jo;joey;josie 107 | joshua;josh 108 | judith;judy 109 | katherine;kathy;kat;katie;kate;kit;kitty;katy 110 | katrina;kat;trina 111 | kenneth;ken;kenny 112 | kimberly;kim;kimmy 113 | kristen;krissy;kris 114 | lawrence;larry 115 | leonard;len;lenny;leon;leo;lee 116 | leonardo;len;lenny;leon;leo;lee 117 | louis;lou;louie 118 | lucille;lucy 119 | madeline;maddie 120 | madison;maddie 121 | margaret;peggy;daisy;maggie;marge 122 | margaret;peg;peggy;meg 123 | maria;mia 124 | martin;marty 125 | mary;daisie ,maisie;polly;molly 126 | mathew;matt 127 | matthew;matt;matty 128 | megan;meg 129 | michael;mike;mick;mikey;mickey 130 | michele;shell;selley 131 | michelle;shell;selley 132 | miranda;randy;mindy 133 | mitchell;mitch 134 | montague;monty 135 | montgomery;monty 136 | montmorency;monty 137 | natasha;tasha;tash;nat 138 | nathan;nat;nate 139 | nathaniel;nat;nath 140 | nichola;nickie;nicki;nicky;nikki 141 | nicholas;nick;nicky 142 | nichole;nickie;nicki;nicky;nikki 143 | nigel;nig;niggy;niglet;nige 144 | norbert;nobby 145 | oliver;ollie 146 | patricia;pat;patty;tricia;trish;patsy;trisha 147 | paul;paulie;polly 148 | paula;paulie;polly 149 | pauline;paulie;polly 150 | percival;percy 151 | peregrine;perry 152 | peter;pete 153 | peter;pete 154 | philip;phil;philly 155 | philippa;pippa;philly 156 | priscilla;cilla 157 | rachel;ray or rach 158 | raphael;ralph;rafi 159 | raymond;ray 160 | rebecca;becky;becca;becks;bex;bec 161 | regina;reggie;gina 162 | reginald;reg;reggie 163 | renee;rae 164 | richard;rich;richie;rick;ricky;dick;dicky 165 | robert;bob;bobby;bobbie;robbie;robin;rob;robo 166 | roberta;bobbie 167 | roderick;rod;roddy 168 | rodney;rodders 169 | ronald;ron;ronnie 170 | ronald;ron;ronnie;ro 171 | russell;russ;rusty 172 | samantha;sam;sammy 173 | samuel;sam;sammy 174 | sarah;sadie;sally 175 | seymour;sy 176 | sharon;shaza;shaz 177 | simon;si;sam 178 | spencer;spence 179 | stephen;steve;stevie 180 | steven;steve;stevie 181 | susan;sue;susie;suzy 182 | tamara;tammy;tam 183 | tamsin;tammy;tam 184 | terence;terry;tel 185 | teresa;tracy;tracey 186 | theodore;ted;teddy;theo;ned;neddy 187 | theodore;eddy;ted;ed 188 | theresa;tracy;tracey 189 | thomas;tom;tommy 190 | thomas;tom;thom;tommy 191 | tobias;toby 192 | trenton;trent 193 | valerie;val 194 | veronica;vera;ronni;ronnie 195 | victor;vic 196 | victoria;vicky;tori 197 | vincent;vinnie;vince 198 | violet;vi 199 | virginia;ginny;ginger;vergie 200 | virginia;ginny 201 | walter;wal;walt;wally 202 | william;will;bill;willie;billy 203 | william;bill;bilwa;will;liam 204 | zachary;zach -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/gender.dict: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Priler/samurai/7b7495871a06109cc7fc524ec13f195bcf86fed3/libs/gender_extractor/nameLists/gender.dict -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/list.txt: -------------------------------------------------------------------------------- 1 | nameLists\AfghanistanFemaleUTF8.csv, nameLists\AfghanistanMaleUTF8.csv, nameLists\AlbaniaFemaleUTF8.csv, nameLists\AlbaniaMaleUTF8.csv, nameLists\AustraliaFemaleUTF8.csv, nameLists\AustraliaMaleUTF8.csv, nameLists\BelgiumFemaleUTF8.csv, nameLists\BelgiumMaleUTF8.csv, nameLists\BrazilFemaleUTF8.csv, nameLists\BrazilMaleUTF8.csv, nameLists\BrusselsFemaleUTF8.csv, nameLists\BrusselsMaleUTF8.csv, nameLists\CanadaFemaleUTF8.csv, nameLists\CanadaMaleUTF8.csv, nameLists\CzechFemaleUTF8.csv, nameLists\CzechMaleUTF8.csv, nameLists\FinlandFemaleUTF8.csv, nameLists\FinlandMaleUTF8.csv, nameLists\FlandersFemaleUTF8.csv, nameLists\FlandersMaleUTF8.csv, nameLists\FrisiaFemaleUTF8.csv, nameLists\FrisiaMaleUTF8.csv, nameLists\GreeceFemaleUTF8.csv, nameLists\GreeceMaleUTF8.csv, nameLists\HungaryFemaleUTF8.csv, nameLists\HungaryMaleUTF8.csv, nameLists\IndiaFemaleUTF8.csv, nameLists\IndiaMaleUTF8.csv, nameLists\IranFemaleUTF8.csv, nameLists\IranMaleUTF8.csv, nameLists\IrelandFemaleUTF8.csv, nameLists\IrelandMaleUTF8.csv, nameLists\IsraelFemaleUTF8.csv, nameLists\IsraelMaleUTF8.csv, nameLists\ItalyFemaleUTF8.csv, nameLists\ItalyMaleUTF8.csv, nameLists\JapanFemaleUTF8.csv, nameLists\JapanMaleUTF8.csv, nameLists\LatviaFemaleUTF8.csv, nameLists\LatviaMaleUTF8.csv, nameLists\NorwayFemaleUTF8.csv, nameLists\NorwayMaleUTF8.csv, nameLists\PolandFemaleUTF8.csv, nameLists\PolandMaleUTF8.csv, nameLists\RomaniaFemaleUTF8.csv, nameLists\RomaniaMaleUTF8.csv, nameLists\RussiaFemaleUTF8.csv, nameLists\RussiaMaleUTF8.csv, nameLists\SloveniaFemaleUTF8.csv, nameLists\SloveniaMaleUTF8.csv, nameLists\SomaliaFemaleUTF8.csv, nameLists\SomaliaMaleUTF8.csv, nameLists\SpainFemaleUTF8.csv, nameLists\SpainMaleUTF8.csv, nameLists\SwedenFemaleUTF8.csv, nameLists\SwedenMaleUTF8.csv, nameLists\TurkeyFemaleUTF8.csv, nameLists\TurkeyMaleUTF8.csv, nameLists\UKFemaleUTF8.csv, nameLists\UKMaleUTF8.csv, nameLists\UkraineFemaleUTF8.csv, nameLists\UkraineMaleUTF8.csv, nameLists\USAFemaleUTF8.csv, nameLists\USAMaleUTF8.csv, nameLists\VietnamFemaleUTF8.csv, nameLists\VietnamMaleUTF8.csv, nameLists\WalloniaFemaleUTF8.csv, nameLists\WalloniaMaleUTF8.csv -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/nameLists.md: -------------------------------------------------------------------------------- 1 | ## Lists of first names for different countries 2 | 3 | - Diminutives: 4 | - http://en.wiktionary.org/wiki/Appendix:English_given_names 5 | - Afghanistan: 6 | - UK naming practices guide https://www.fbiic.gov/public/2008/nov/Naming_practice_guide_UK_2006.pdf 7 | - Albania: 8 | - UK naming practices guide https://www.fbiic.gov/public/2008/nov/Naming_practice_guide_UK_2006.pdf 9 | - Australia: 10 | - Department of Attorney General and Justice NSW http://www.bdm.nsw.gov.au/births/popularBabyNames.htm 11 | - Belgium: 12 | - Voornamen van de bevolking op 1.1.2009 (XLS, 17.33 MB) http://statbel.fgov.be/nl/modules/publications/statistiques/bevolking/prenoms_de_la_population_totale.jsp 13 | - Brazil: 14 | - http://www.dicionariodenomesproprios.com.br/nomes-masculinos/ 15 | - http://www.dicionariodenomesproprios.com.br/nomes-femininos/ 16 | - Canada: 17 | - http://www.servicealberta.ca/1166.cfm (1990 lists) 18 | - Czech republic: 19 | - Czech name days http://www.myczechrepublic.com/czech_culture/czech_name_days/alphabetical.html 20 | - Diminutives http://www.myczechrepublic.com/czech_culture/czech_name_days/dimf.html and http://www.myczechrepublic.com/czech_culture/czech_name_days/dimm.html 21 | - Finland: 22 | - http://www.sci.fi/~kajun/finns/ 23 | - Frisia: 24 | - http://allenamen.nl/friese_namen/jongens_namen.html and http://allenamen.nl/friese_namen/meisjes_namen.html 25 | - Greece: 26 | - http://www.fredonia.edu/faculty/emeritus/edwinlawson/greeknames/ 27 | - India: 28 | - www.eyeofindia.com/services/indian-names 29 | - www.iloveindia.com/babynames 30 | - http://en.wikipedia.org/wiki/Category:Indian_given_names 31 | - http://www.infernaldreams.com/names/Asia/India/Hin_Names.htm 32 | - Iran: 33 | - http://persia.org/Information/boys.html 34 | - http://persia.org/Information/girls.html 35 | - Ireland: 36 | - see Northern Ireland 37 | - Israel: 38 | - http://www.learn-hebrew-names.com/Hebrew-names.aspx 39 | - Italy: 40 | - page 12 from http://www3.istat.it/salastampa/comunicati/non_calendario/20100318_00/testointegrale20100318.pdf 41 | - Japan: 42 | - https://en.wikipedia.org/wiki/Category:Japanese_feminine_given_names 43 | - https://en.wikipedia.org/wiki/Category:Japanese_masculine_given_names 44 | - Latvia: 45 | - http://en.wiktionary.org/wiki/Appendix:Latvian_given_names 46 | - http://en.wiktionary.org/wiki/Category:Latvian_male_given_names and http://en.wiktionary.org/wiki/Category:Latvian_female_given_names 47 | - Norway: 48 | - Namnestatistikk, 2011, http://www.ssb.no/emner/00/navn/ 49 | - Poland: 50 | - http://chomikuj.pl/justynadow/s*c5*82owniki+imion/S*c5*81OWNIK+IMION+POLSKICH,263985157.pdf 51 | - augmented with diminutives from http://en.wiktionary.org/wiki/Appendix:Polish_given_names 52 | - Romania: 53 | - http://childnamesworld.com/romanian-boy-baby-names.php?religion=Romanian&gender=boy 54 | - http://ro.wikipedia.org/wiki/List%C4%83_de_prenume_rom%C3%A2ne%C8%99ti 55 | - Russia 56 | - http://en.wikipedia.org/wiki/Category:Russian_masculine_given_names and http://en.wikipedia.org/wiki/Category:Russian_feminine_given_names 57 | - http://habrahabr.ru/post/123856/ 58 | - Male diminutives from http://irusik-05.narod.ru/index37.html 59 | - Slovenia: 60 | - http://www.stat.si/eng/imena.asp 61 | - Somalia 62 | - UK naming practices guide https://www.fbiic.gov/public/2008/nov/Naming_practice_guide_UK_2006.pdf 63 | - Spain: 64 | - Nombres mas frecuentes simples y exactos para total nacional y exactos por provincia de residencia 65 | http://www.ine.es/daco/daco42/nombyapel/nombyapel.htm 66 | - Sweden: 67 | - http://www.scb.se/Pages/ProductTables____30919.aspx 68 | - Turkey: 69 | - http://www.annecocuk.com/isim/isimler-xkiz.htm 70 | - http://www.annecocuk.com/isim/isim-erkek.htm 71 | - UK: 72 | - http://www.ons.gov.uk/ons/publications/re-reference-tables.html?edition=tcm%3A77-243644 73 | - Ukraine: 74 | - http://www.aratta-ukraine.com/sacred_ua.php?id=44 75 | - http://database.ukrcensus.gov.ua/dw_name/tlum.asp?nom_zag=4 76 | - http://logistik.mybb.ru/viewtopic.php?id=141 77 | - USA: 78 | - 1990 census: e.g., http://www.uta.fi/FAST/US7/NAMES/male1st.html and http://www.uta.fi/FAST/US7/NAMES/female-1.html 79 | -------------------------------------------------------------------------------- /libs/gender_extractor/nameLists/sources.txt: -------------------------------------------------------------------------------- 1 | ### Data provenance 2 | 3 | The tool uses lists of `male` and `female` first names for different countries: 4 | 5 | - Diminutives: 6 | - http://en.wiktionary.org/wiki/Appendix:English_given_names 7 | - Afghanistan: 8 | - UK naming practices guide https://www.fbiic.gov/public/2008/nov/Naming_practice_guide_UK_2006.pdf 9 | - Albania: 10 | - UK naming practices guide https://www.fbiic.gov/public/2008/nov/Naming_practice_guide_UK_2006.pdf 11 | - Australia: 12 | - Department of Attorney General and Justice NSW http://www.bdm.nsw.gov.au/births/popularBabyNames.htm 13 | - Belgium: 14 | - Voornamen van de bevolking op 1.1.2009 (XLS, 17.33 MB) http://statbel.fgov.be/nl/modules/publications/statistiques/bevolking/prenoms_de_la_population_totale.jsp 15 | - Brazil: 16 | - http://www.dicionariodenomesproprios.com.br/nomes-masculinos/ 17 | - http://www.dicionariodenomesproprios.com.br/nomes-femininos/ 18 | - Canada: 19 | - http://www.servicealberta.ca/1166.cfm (1990 lists) 20 | - Czech republic: 21 | - Czech name days http://www.myczechrepublic.com/czech_culture/czech_name_days/alphabetical.html 22 | - Diminutives http://www.myczechrepublic.com/czech_culture/czech_name_days/dimf.html and http://www.myczechrepublic.com/czech_culture/czech_name_days/dimm.html 23 | - Finland: 24 | - http://www.sci.fi/~kajun/finns/ 25 | - Frisia: 26 | - http://allenamen.nl/friese_namen/jongens_namen.html and http://allenamen.nl/friese_namen/meisjes_namen.html 27 | - Greece: 28 | - http://www.fredonia.edu/faculty/emeritus/edwinlawson/greeknames/ 29 | - India: 30 | - www.eyeofindia.com/services/indian-names 31 | - www.iloveindia.com/babynames 32 | - http://en.wikipedia.org/wiki/Category:Indian_given_names 33 | - http://www.infernaldreams.com/names/Asia/India/Hin_Names.htm 34 | - Iran: 35 | - http://persia.org/Information/boys.html 36 | - http://persia.org/Information/girls.html 37 | - Ireland: 38 | - see Northern Ireland 39 | - Israel: 40 | - http://www.learn-hebrew-names.com/Hebrew-names.aspx 41 | - Italy: 42 | - page 12 from http://www3.istat.it/salastampa/comunicati/non_calendario/20100318_00/testointegrale20100318.pdf 43 | - Japan: 44 | - https://en.wikipedia.org/wiki/Category:Japanese_feminine_given_names 45 | - https://en.wikipedia.org/wiki/Category:Japanese_masculine_given_names 46 | - Latvia: 47 | - http://en.wiktionary.org/wiki/Appendix:Latvian_given_names 48 | - http://en.wiktionary.org/wiki/Category:Latvian_male_given_names and http://en.wiktionary.org/wiki/Category:Latvian_female_given_names 49 | - Norway: 50 | - Namnestatistikk, 2011, http://www.ssb.no/emner/00/navn/ 51 | - Poland: 52 | - http://chomikuj.pl/justynadow/s*c5*82owniki+imion/S*c5*81OWNIK+IMION+POLSKICH,263985157.pdf 53 | - augmented with diminutives from http://en.wiktionary.org/wiki/Appendix:Polish_given_names 54 | - Romania: 55 | - http://childnamesworld.com/romanian-boy-baby-names.php?religion=Romanian&gender=boy 56 | - http://ro.wikipedia.org/wiki/List%C4%83_de_prenume_rom%C3%A2ne%C8%99ti 57 | - Russia 58 | - http://en.wikipedia.org/wiki/Category:Russian_masculine_given_names and http://en.wikipedia.org/wiki/Category:Russian_feminine_given_names 59 | - http://habrahabr.ru/post/123856/ 60 | - Male diminutives from http://irusik-05.narod.ru/index37.html 61 | - Slovenia: 62 | - http://www.stat.si/eng/imena.asp 63 | - Somalia 64 | - UK naming practices guide https://www.fbiic.gov/public/2008/nov/Naming_practice_guide_UK_2006.pdf 65 | - Spain: 66 | - Nombres mas frecuentes simples y exactos para total nacional y exactos por provincia de residencia 67 | http://www.ine.es/daco/daco42/nombyapel/nombyapel.htm 68 | - Sweden: 69 | - http://www.scb.se/Pages/ProductTables____30919.aspx 70 | - Turkey: 71 | - http://www.annecocuk.com/isim/isimler-xkiz.htm 72 | - http://www.annecocuk.com/isim/isim-erkek.htm 73 | - UK: 74 | - http://www.ons.gov.uk/ons/publications/re-reference-tables.html?edition=tcm%3A77-243644 75 | - Ukraine: 76 | - http://www.aratta-ukraine.com/sacred_ua.php?id=44 77 | - http://database.ukrcensus.gov.ua/dw_name/tlum.asp?nom_zag=4 78 | - http://logistik.mybb.ru/viewtopic.php?id=141 79 | - USA: 80 | - 1990 census: e.g., http://www.uta.fi/FAST/US7/NAMES/male1st.html and http://www.uta.fi/FAST/US7/NAMES/female-1.html 81 | 82 | -------------------------------------------------------------------------------- /localization.py: -------------------------------------------------------------------------------- 1 | from configurator import config 2 | 3 | strings = { 4 | "en": { 5 | "error_no_reply": "This command must be sent as a reply to one's message!", 6 | "error_report_admin": "Whoa! Don't report admins 😈", 7 | "error_restrict_admin": "You cannot restrict an admin.", 8 | "error_wrong_time_format": "Wrong time forman. Use a number + symbols 'h', 'm' or 'd'. F.ex. 4h", 9 | "error_message_too_short": "Please avoid short useless greetings. " 10 | "If you have a question or some information, put it in one message. Thanks in " 11 | "advance! 🤓", 12 | 13 | "report_date_format": "%d.%m.%Y at %H:%M (server time)", 14 | "report_message": '👆 Sent {date}\n' 15 | '
Go to message', 16 | "report_note": "\n\nNote:{note}", 17 | "report_delivered": "Report sent", 18 | 19 | "action_del_msg": "Delete message", 20 | "action_del_and_ban": "Delete and ban", 21 | "action_del_and_readonly": "Set user readonly for 24 hours", 22 | "action_del_and_readonly2": "Set user readonly for 7 days", 23 | 24 | "action_deleted": "\n\n🗑 Deleted", 25 | "action_deleted_banned": "\n\n🗑❌ Deleted, user banned", 26 | "action_deleted_readonly": "\n\n🗑🙊 Deleted, set readonly for 2 hours", 27 | "action_deleted_readonly2": "\n\n🗑🙊 Deleted, set readonly for 2 hours", 28 | 29 | "resolved_readonly": "User set to read-only mode ({restriction_time})", 30 | "resolved_nomedia": "User set to text-only mode ({restriction_time})", 31 | 32 | "restriction_forever": "forever", 33 | "need_admins_attention": 'Dear admins, your presence in chat is needed!\n\n' 34 | 'Go to message', 35 | 36 | "greetings_words": ("hi", "q", "hello", "hey") # Bot will react to short messages with these words 37 | }, 38 | "ru": { 39 | "error_no_reply": "Эта команда должна быть ответом на какое-либо сообщение!", 40 | "error_report_admin": "Админов репортишь? Ай-ай-ай 😈", 41 | "error_report_self": "Нельзя репортить самого себя 🤪", 42 | "error_restrict_admin": "Невозможно ограничить администратора.", 43 | "error_wrong_time_format": "Неправильный формат времени. Используйте число + символ h, m или d. Например, 4h", 44 | "error_message_too_short": "Пожалуйста, избегайте бессмысленных коротких приветствий. " 45 | "Если у Вас есть вопрос или информация, напишите всё в одном сообщении. Заранее " 46 | "спасибо! 🤓", 47 | 48 | "report_date_format": "%d.%m.%Y в %H:%M (время сервера)", 49 | "report_message": '👆 Отправлено {date}\n' 50 | 'Перейти к сообщению', 51 | "report_note": "\n\nПримечание:{note}", 52 | "report_delivered": "Репорт отправлен.", 53 | 54 | "action_del_msg": "🗑 Удалить сообщение", 55 | "action_del_and_ban": "🗑 Удалить + ❌ бан навсегда", 56 | "action_del_and_readonly": "🗑 Удалить + 🙊 мут на день", 57 | "action_del_and_readonly2": "🗑 Удалить + 🙊 мут на неделю", 58 | 59 | "action_false_alarm": "❎ Нарушений нет", 60 | "action_false_alarm_2": "❎ Нарушений нет (🙊 мут репортера на день)", 61 | "action_false_alarm_3": "❎ Нарушений нет (🙊 мут репортера на неделю)", 62 | "action_false_alarm_4": "❎ Нарушений нет (❌ бан репортера)", 63 | 64 | "action_deleted": "\n\n🗑 Удалено", 65 | "action_deleted_banned": "\n\n🗑❌ Удалено, юзер забанен", 66 | "action_deleted_readonly": "\n\n🗑🙊 Удалено, + выдан мут на день.", 67 | "action_deleted_readonly2": "\n\n🗑🙊 Удалено, + выдан мут на неделю.", 68 | 69 | "action_dismissed": "\n\n❎ Нарушений не обнаружено.", 70 | "action_deleted_dismissed2": "\n\n❎ Нарушений не обнаружено (🙊 репортеру выдан мут на 1 день).", 71 | "action_deleted_dismissed3": "\n\n❎ Нарушений не обнаружено (🙊 репортеру выдан мут на 7 дней).", 72 | "action_deleted_dismissed4": "\n\n❎ Нарушений не обнаружено (❌ репортер забанен).", 73 | 74 | "resolved_readonly": "Выдан мут на ({restriction_time})", 75 | "resolved_nomedia": "Запрещено отправлять медиа на ({restriction_time})", 76 | "resolved_nomedia_forever": "Запрещено отправлять медиа навсегда.", 77 | 78 | "resolved_givemedia": "Разрешено отправлять медиа на ({restriction_time})", 79 | "resolved_givemedia_forever": "Разрешено отправлять медиа навсегда.", 80 | "error_givemedia_admin": "Админам итак разрешено отправлять медиа!", 81 | 82 | "resolved_givestickers": "Разрешено отправлять стикеры на ({restriction_time})", 83 | "resolved_givestickers_forever": "Разрешено отправлять стикеры навсегда.", 84 | "error_givestickers_admin": "Админам итак разрешено отправлять стикеры!", 85 | 86 | "resolved_revokestickers": "Запрещено отправлять стикеры на ({restriction_time})", 87 | "resolved_revokestickers_forever": "Запрещено отправлять стикеры навсегда.", 88 | "error_givestickers_admin": "Админам итак разрешено отправлять стикеры!", 89 | 90 | "user_unmuted": "Мут снят.", 91 | 92 | "restriction_forever": "Выдан мут навсегда.", 93 | "need_admins_attention": 'Товарищи админы, в чате нужно ваше присутствие!\n\n' 94 | 'Перейти к сообщению', 95 | 96 | "resolved_ban": "Участник заблокирован.", 97 | "resolved_unban": "Участник разблокирован.", 98 | 99 | "error_checkperms_admin": "✅ У админов нет никаких ограничений.", 100 | "error_ban_admin": "😡 Ты чё, пёс? Админа нельзя забанить!", 101 | 102 | "enabled_ro": "Режим «только-чтение» включен.", 103 | "disabled_ro": "Режим «только-чтение» отключен.", 104 | 105 | "profanity_user_kicked": "Ваше имя в Telegram содержит ненормативную лексику.\nПо этой причине вы были кикнуты из чата.\n\nПожалуйста, отредактируйте отображаемое имя и попробуйте заново.\nНарушение найдено в слове: {word}", 106 | 107 | "voice_message_reaction": "фу! ФУ Я СКАЗАЛ, НЕЛЬЗЯ. БРОСЬ КАКУ. ПИШИ ТЕКСТОМ.", 108 | 109 | "greetings_words": ("привет", "хай", "ку", "здарова"), # Бот среагирует на короткие сообщения с этими словами 110 | 111 | "announcements" : ( 112 | { 113 | "message" : "❕ Не забывайте про команду !report благодаря которой Вы можете обратить внимание администрации на нарушителя в чате.\n\nСпам данной командой карается вечным баном.", 114 | "every" : 10900 * 1.5 115 | }, 116 | { 117 | "message" : "📁 Это чат канала @howdyho_official\nОбщайтесь вежливо и не нарушайте правила!\n\n📈 В чате действует система репутации\n⛔️ Новичкам запрещено отправлять медиа\n🤬 Мат удаляется автоматически\n👹 Оффтоп/спам наказывается 🍌 бананами\n\nВсем мира 🤞", 118 | "every" : 10800 119 | }, 120 | { 121 | "message" : "🫰 Донат автору канала:\n\nМой Boosty: https://boosty.to/howdyho\nМой Patreon: https://www.patreon.com/howdyho\nНаш Discord: https://discord.gg/howdyho", 122 | "every" : 7200 * 3 123 | }, 124 | { 125 | "message" : "😈 У нас есть сайт, ты знал?\n\nВотб он - https://howdyho.net\nМы там постим топовый софт, обои, игры, и кучу всего для ПК!\n\nЗаходи, тебе там всегда рады!", 126 | "every" : 9000 * 2 127 | }, 128 | { 129 | "message" : "🫰 Хочешь чтобы твой мем/пост закинули в канал?\nТыкай сюда - @hhsharebot", 130 | "every" : 14500 131 | } 132 | ) 133 | }, 134 | } 135 | 136 | 137 | def get_string(key): 138 | """ 139 | Get localized string. First, try language as set in config. Then, try English locale. Else - raise an exception. 140 | 141 | :param key: string name 142 | :return: localized string 143 | """ 144 | localization_strings = strings.get(config.bot.language, strings.get('en')) 145 | 146 | if localization_strings is None: 147 | raise KeyError(f'Neither "{config.bot.language}" nor "en" locales found') 148 | 149 | try: 150 | return localization_strings[key] 151 | except KeyError: 152 | raise 153 | -------------------------------------------------------------------------------- /lru_cache.py: -------------------------------------------------------------------------------- 1 | import ormar 2 | from models.member import Member 3 | 4 | from utils import Gender, remove_non_letters, detect_name_language, detect_gender__compare, transliterate_name, measure_execution 5 | 6 | import cachetools 7 | from cachetools import LRUCache 8 | from functools import wraps 9 | 10 | # Create LRU-cache with size of 1000 elements 11 | # TODO: Make this cache system to work when bot is used in multiple chats 12 | # (currently it's for 1 chat only) 13 | members_cache = LRUCache(maxsize=1000) 14 | tgmembers_cache = LRUCache(maxsize=1000) 15 | gender_detections_cache = LRUCache(maxsize=1000) 16 | 17 | def cache_gender_detection(func): 18 | @wraps(func) 19 | def wrapper(name): 20 | # check if it's in cache already 21 | if name in gender_detections_cache: 22 | return gender_detections_cache[name] 23 | 24 | # Call function and cache result 25 | result = func(name) 26 | gender_detections_cache[name] = result 27 | return result 28 | 29 | return wrapper 30 | 31 | 32 | @cache_gender_detection 33 | def detect_gender(name: str) -> Gender: 34 | # remove any non-letters (emoji etc) 35 | name = remove_non_letters(name) 36 | 37 | # pre-process the name 38 | name = name.lower() 39 | name = next((element for element in name.split(" ") if element.strip()), None) # get first name 40 | name = name.strip() # just to make sure it's as clean as possible 41 | 42 | #print(name) 43 | #print(len(name)) 44 | 45 | # compare 46 | _name_lang = detect_name_language(name) 47 | 48 | print(name) 49 | print(_name_lang) 50 | 51 | if _name_lang == 'russian': 52 | det_gen = detect_gender__compare(name, "Russia") 53 | 54 | if det_gen == Gender.UNKNOWN: 55 | # if name ends with "ка", then try replace it with "а" 56 | # and try/detect again 57 | if name.endswith("ка"): 58 | name = f"{name[:-2]}а" 59 | 60 | det_gen = detect_gender__compare(name, "Russia") 61 | 62 | if det_gen == Gender.UNKNOWN: 63 | # if gender unknown, try to transliterate it and compare again 64 | 65 | det_gen = detect_gender__compare(transliterate_name(name), "USA") 66 | 67 | elif _name_lang == 'english': 68 | det_gen = detect_gender__compare(name, "USA") 69 | 70 | # if gender unknown, try to transliterate it and compare again 71 | if det_gen == Gender.UNKNOWN: 72 | det_gen = detect_gender__compare(transliterate_name(name), "Russia") 73 | 74 | else: 75 | det_gen = detect_gender__compare(name) 76 | 77 | # return result, whatever it will be 78 | return det_gen 79 | # last shot 80 | # if name ends with 'а' letter, then assume it's female 81 | # return Gender.FEMALE if name not in ["фома", "савва", "кима", "алима"] and name.lower()[-1] == 'а' else Gender.UNKNOWN 82 | 83 | 84 | def cache_async_tgmembers(func): 85 | @wraps(func) 86 | async def wrapper(bot, chat_id, user_id): 87 | # check if it's in cache already 88 | if user_id in tgmembers_cache: 89 | return tgmembers_cache[user_id] 90 | 91 | # Call function and cache result 92 | result = await func(bot, chat_id, user_id) 93 | tgmembers_cache[user_id] = result 94 | return result 95 | 96 | return wrapper 97 | 98 | 99 | @cache_async_tgmembers 100 | async def retrieve_tgmember(bot, chat_id, user_id): 101 | return await bot.get_chat_member(chat_id, user_id) 102 | 103 | 104 | def cache_async_members(func): 105 | @wraps(func) 106 | async def wrapper(user_id): 107 | # check if it's in cache already 108 | if user_id in members_cache: 109 | return members_cache[user_id] 110 | 111 | # Call function and cache result 112 | result = await func(user_id) 113 | members_cache[user_id] = result 114 | return result 115 | 116 | return wrapper 117 | 118 | 119 | @cache_async_members 120 | async def retrieve_or_create_member(user_id): 121 | member = None 122 | 123 | try: 124 | member = await Member.objects.get(user_id=user_id) 125 | except ormar.NoMatch: 126 | member = await Member.objects.create(user_id=user_id, messages_count=1) 127 | finally: 128 | return member 129 | -------------------------------------------------------------------------------- /models/member.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import ormar 4 | from db import ormar_config 5 | 6 | class Member(ormar.Model): 7 | ormar_config = ormar_config.copy(tablename="members") 8 | 9 | id: int = ormar.Integer(primary_key=True, auto_increment=True) 10 | user_id: int = ormar.BigInteger(unique=True) 11 | messages_count: int = ormar.Integer(default=0) 12 | reputation_points: int = ormar.Integer(default=0) 13 | date: datetime = ormar.DateTime(default=datetime.now) 14 | 15 | violations_count_profanity: int = ormar.Integer(default=0) 16 | violations_count_spam: int = ormar.Integer(default=0) 17 | 18 | halloween_sweets: int = ormar.Integer(default=0) 19 | halloween_golden_tickets: int = ormar.Integer(default=0) 20 | -------------------------------------------------------------------------------- /models/spam.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import ormar 4 | from db import ormar_config 5 | 6 | class Spam(ormar.Model): 7 | ormar_config = ormar_config.copy(tablename="spam") 8 | 9 | id: int = ormar.Integer(primary_key=True, auto_increment=True) 10 | message: str = ormar.Text(unique=True) 11 | is_spam: bool = ormar.Boolean() 12 | is_blocked: bool = ormar.Boolean(default=False) 13 | date: datetime = ormar.DateTime(default=datetime.now) 14 | chat_id: int = ormar.BigInteger(default=None) 15 | user_id: int = ormar.BigInteger(default=None) 16 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | #aiogram=2.14.3 2 | aiogram==2.25.2 3 | asyncio==3.4.3 4 | aioschedule==0.5.2 5 | psutil==6.0.0 6 | easydict 7 | python-dotenv 8 | cachetools 9 | 10 | uvloop==0.20.0; sys_platform != 'windows' 11 | ujson==5.10.0 12 | aiohttp[speedups]==3.8.6 13 | 14 | ormar 15 | aiosqlite 16 | aiomysql 17 | pymysql 18 | 19 | torch 20 | transformers -------------------------------------------------------------------------------- /runtime.txt: -------------------------------------------------------------------------------- 1 | python-3.9.6 -------------------------------------------------------------------------------- /ruspam.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoTokenizer, AutoModelForSequenceClassification 2 | import torch 3 | 4 | model_path = "ruspam_model/" 5 | tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True) 6 | model = AutoModelForSequenceClassification.from_pretrained(model_path, local_files_only=True) 7 | 8 | def predict(text): 9 | inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=256) 10 | with torch.no_grad(): 11 | outputs = model(**inputs) 12 | logits = outputs.logits 13 | predicted_class = torch.argmax(logits, dim=1).item() 14 | return True if predicted_class == 1 else False 15 | -------------------------------------------------------------------------------- /ruspam_model/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "RUSpam/spam_deberta_v4", 3 | "architectures": [ 4 | "DebertaForSequenceClassification" 5 | ], 6 | "attention_probs_dropout_prob": 0.1, 7 | "hidden_act": "gelu", 8 | "hidden_dropout_prob": 0.1, 9 | "hidden_size": 768, 10 | "initializer_range": 0.02, 11 | "intermediate_size": 3072, 12 | "layer_norm_eps": 1e-07, 13 | "max_position_embeddings": 512, 14 | "max_relative_positions": -1, 15 | "model_type": "deberta", 16 | "num_attention_heads": 12, 17 | "num_hidden_layers": 6, 18 | "pad_token_id": 0, 19 | "pooler_dropout": 0, 20 | "pooler_hidden_act": "gelu", 21 | "pooler_hidden_size": 768, 22 | "pos_att_type": null, 23 | "position_biased_input": true, 24 | "relative_attention": false, 25 | "torch_dtype": "float32", 26 | "transformers_version": "4.44.2", 27 | "type_vocab_size": 0, 28 | "vocab_size": 50265 29 | } 30 | -------------------------------------------------------------------------------- /ruspam_model/model.safetensors: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:30e81d4f87e833e9a3f25a1380b8689fd3d1f323c2b2fff73bb9a18d89776c1f 3 | size 328462256 4 | -------------------------------------------------------------------------------- /ruspam_model/special_tokens_map.json: -------------------------------------------------------------------------------- 1 | { 2 | "bos_token": { 3 | "content": "", 4 | "lstrip": false, 5 | "normalized": false, 6 | "rstrip": false, 7 | "single_word": false 8 | }, 9 | "cls_token": { 10 | "content": "", 11 | "lstrip": false, 12 | "normalized": false, 13 | "rstrip": false, 14 | "single_word": false 15 | }, 16 | "eos_token": { 17 | "content": "", 18 | "lstrip": false, 19 | "normalized": false, 20 | "rstrip": false, 21 | "single_word": false 22 | }, 23 | "mask_token": { 24 | "content": "", 25 | "lstrip": true, 26 | "normalized": false, 27 | "rstrip": false, 28 | "single_word": false 29 | }, 30 | "pad_token": { 31 | "content": "", 32 | "lstrip": false, 33 | "normalized": false, 34 | "rstrip": false, 35 | "single_word": false 36 | }, 37 | "sep_token": { 38 | "content": "", 39 | "lstrip": false, 40 | "normalized": false, 41 | "rstrip": false, 42 | "single_word": false 43 | }, 44 | "unk_token": { 45 | "content": "", 46 | "lstrip": false, 47 | "normalized": false, 48 | "rstrip": false, 49 | "single_word": false 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /ruspam_model/tokenizer_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "add_prefix_space": false, 3 | "added_tokens_decoder": { 4 | "0": { 5 | "content": "", 6 | "lstrip": false, 7 | "normalized": false, 8 | "rstrip": false, 9 | "single_word": false, 10 | "special": true 11 | }, 12 | "1": { 13 | "content": "", 14 | "lstrip": false, 15 | "normalized": false, 16 | "rstrip": false, 17 | "single_word": false, 18 | "special": true 19 | }, 20 | "2": { 21 | "content": "", 22 | "lstrip": false, 23 | "normalized": false, 24 | "rstrip": false, 25 | "single_word": false, 26 | "special": true 27 | }, 28 | "3": { 29 | "content": "", 30 | "lstrip": false, 31 | "normalized": false, 32 | "rstrip": false, 33 | "single_word": false, 34 | "special": true 35 | }, 36 | "4": { 37 | "content": "", 38 | "lstrip": true, 39 | "normalized": false, 40 | "rstrip": false, 41 | "single_word": false, 42 | "special": true 43 | } 44 | }, 45 | "bos_token": "", 46 | "clean_up_tokenization_spaces": true, 47 | "cls_token": "", 48 | "eos_token": "", 49 | "errors": "replace", 50 | "mask_token": "", 51 | "model_max_length": 512, 52 | "pad_token": "", 53 | "sep_token": "", 54 | "tokenizer_class": "RobertaTokenizer", 55 | "trim_offsets": true, 56 | "unk_token": "" 57 | } 58 | -------------------------------------------------------------------------------- /ruspam_model/training_args.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Priler/samurai/7b7495871a06109cc7fc524ec13f195bcf86fed3/ruspam_model/training_args.bin -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Priler/samurai/7b7495871a06109cc7fc524ec13f195bcf86fed3/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_gender.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | import os 4 | 5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 6 | 7 | from configurator import config, make_config 8 | if not make_config("config.ini"): 9 | exit(1) 10 | import heroku_config 11 | 12 | from utils import Gender 13 | from lru_cache import detect_gender 14 | 15 | class GenderDetectionTests(unittest.TestCase): 16 | def test_male(self): 17 | test_names = ["😎 Абрахам", "Алексей", "Тони", "Иннокентий", "Аркадий", "Виктор", "Nikita", ":)[Nikita]"] 18 | for name in test_names: 19 | with self.subTest(name=name): 20 | self.assertEqual(detect_gender(name), Gender.MALE, 21 | f"Failed for name: {name}") 22 | 23 | 24 | def test_female(self): 25 | test_names = ["👧 Александра", "Катя", "Ксения", "Ксюша", "Антонина", "Настя", "👧👧Лиза👧👧", "Ника", "Лея", "Алиска", "Катерина Лися"] 26 | for name in test_names: 27 | with self.subTest(name=name): 28 | self.assertEqual(detect_gender(name), Gender.FEMALE, 29 | f"Failed for name: {name}") 30 | 31 | 32 | def test_unknown(self): 33 | test_names = ["Almaz", "Kamaz", "💎 Алмаз 💎", "Дивергент", "Лейндаль", "Унга Бунга 🌺"] 34 | for name in test_names: 35 | with self.subTest(name=name): 36 | self.assertEqual(detect_gender(name), Gender.UNKNOWN, 37 | f"Failed for name: {name}") 38 | --------------------------------------------------------------------------------