├── runtime.txt ├── Procfile ├── data ├── description.txt ├── korni_dictionary.csv ├── answers_extra.txt ├── list_of_commands.txt ├── command_sved_answer.txt ├── command_kak_answer.txt ├── donation_ask_message.txt └── answers.txt ├── requirements.txt ├── .gitignore ├── old_korni.py └── korni.py /runtime.txt: -------------------------------------------------------------------------------- 1 | python-3.10.8 -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: python3 korni.py -------------------------------------------------------------------------------- /data/description.txt: -------------------------------------------------------------------------------- 1 | korni_dictionary - data for import into PostgreSQL database. -------------------------------------------------------------------------------- /data/korni_dictionary.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/griever-gf/korni_russkogo/HEAD/data/korni_dictionary.csv -------------------------------------------------------------------------------- /data/answers_extra.txt: -------------------------------------------------------------------------------- 1 | Раз ты якобы русский, изъясняйся по-русски, #." 2 | Коли считаешь себя русским, чти корни русского, #." -------------------------------------------------------------------------------- /data/list_of_commands.txt: -------------------------------------------------------------------------------- 1 | kak - Как использовать (ро)бота 2 | nazid - Настройка назиданий в личке, например "/nazid korni" 3 | sved - Дополнительные сведения 4 | vzvod - Настройка ретивости в болталке, например "/vzvod 2" -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | mysql-connector-python==8.0.28 2 | mysqlclient==2.1.0 3 | pymorphy2==0.9.1 4 | pymorphy2-dicts==2.4.393442.3710985 5 | pymorphy2-dicts-ru==2.4.417127.4579844 6 | python-telegram-bot==13.11 7 | urllib3==1.26.9 -------------------------------------------------------------------------------- /data/command_sved_answer.txt: -------------------------------------------------------------------------------- 1 | Дополнительные сведения о роботе можно изведать по ссылке: 2 | https://telegra.ph/Robot-popravlyalshchik-dlya-Telegrama-Korni-russkogo-04-10 3 | 4 | (Благодетели и попечители разработки: Nox.) 5 | (Заверьте ежемесячное пожертвование на поддержку и разработку тут: boosty.to/korni_rus) -------------------------------------------------------------------------------- /data/command_kak_answer.txt: -------------------------------------------------------------------------------- 1 | Cпособы использования: 2 | Наилучший способ - добавить (ро)бота к себе в болталки (беседы), тогда он будет поправлять всех участников. Для "супергрупп" необходимы права заведующего. 3 | 4 | Упрощённый способ - просто присылать любые письмена боту в личку, он тоже будет их поправлять. Но придётся каждый раз вручную это делать. -------------------------------------------------------------------------------- /data/donation_ask_message.txt: -------------------------------------------------------------------------------- 1 | Разработка и пополнение словаря робота требуют исполинского количества времени и труда. Равно как и размещение робота на узлодержках во всесети. Посему, как разработчик поправляльщика, прошу вашей посильной платовой поддержки, чтобы и дальше развивать робота и корнесловие. 2 | 3 | Ежели у вас имеется возможность, заверьте ежемесячное пожертвование тут: boosty.to/korni_rus. Подробности можно почитать по ссылке. 4 | 5 | Данное сообщение не потревожит вас чаще, чем раз в неделю. Спасибо, что уделили внимание! 6 | 7 | (Создан поток с прибаутками @korni_rus. Подписывайтесь!) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #My 2 | config.py 3 | .env 4 | donation_log* 5 | 6 | # Editors 7 | .vscode/ 8 | .idea/ 9 | 10 | # Vagrant 11 | .vagrant/ 12 | 13 | # Mac/OSX 14 | .DS_Store 15 | 16 | # Windows 17 | Thumbs.db 18 | 19 | # Source for the following rules: https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore 20 | # Byte-compiled / optimized / DLL files 21 | __pycache__/ 22 | *.py[cod] 23 | *$py.class 24 | 25 | # C extensions 26 | *.so 27 | 28 | # Distribution / packaging 29 | .Python 30 | build/ 31 | develop-eggs/ 32 | dist/ 33 | downloads/ 34 | eggs/ 35 | .eggs/ 36 | lib/ 37 | lib64/ 38 | parts/ 39 | sdist/ 40 | var/ 41 | wheels/ 42 | *.egg-info/ 43 | .installed.cfg 44 | *.egg 45 | MANIFEST 46 | 47 | # PyInstaller 48 | # Usually these files are written by a python script from a template 49 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 50 | *.manifest 51 | *.spec 52 | 53 | # Installer logs 54 | pip-log.txt 55 | pip-delete-this-directory.txt 56 | 57 | # Unit test / coverage reports 58 | htmlcov/ 59 | .tox/ 60 | .nox/ 61 | .coverage 62 | .coverage.* 63 | .cache 64 | nosetests.xml 65 | coverage.xml 66 | *.cover 67 | .hypothesis/ 68 | .pytest_cache/ 69 | 70 | # Translations 71 | *.mo 72 | *.pot 73 | 74 | # Django stuff: 75 | *.log 76 | local_settings.py 77 | db.sqlite3 78 | 79 | # Flask stuff: 80 | instance/ 81 | .webassets-cache 82 | 83 | # Scrapy stuff: 84 | .scrapy 85 | 86 | # Sphinx documentation 87 | docs/_build/ 88 | 89 | # PyBuilder 90 | target/ 91 | 92 | # Jupyter Notebook 93 | .ipynb_checkpoints 94 | 95 | # IPython 96 | profile_default/ 97 | ipython_config.py 98 | 99 | # pyenv 100 | .python-version 101 | 102 | # celery beat schedule file 103 | celerybeat-schedule 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .venv 111 | env/ 112 | venv/ 113 | ENV/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json -------------------------------------------------------------------------------- /data/answers.txt: -------------------------------------------------------------------------------- 1 | Берегите корни русского языка… 2 | Давайте говорить русскими словами. 3 | Не гробьте наш язык полурусским суржиком. 4 | Запомните это, #. Берегите русский язык от вредного мусора. 5 | Русский язык заслуживает такого же пестования и любви, как старинное зодчество, редкие животные и растения. 6 | Используйте НАШИ слова, # - ведь они крутые, особенные и родные. 7 | Наш самобытный язык и уклад - великое преимущество на мировом поприще и повод для полноценного самоощущения и гордости. 8 | Чуждые мусорные заимствования разъедают наш язык подобно раку. Берегите и преумножайте славянские корни языка, #. 9 | Ежели изъясняетесь по-русски, то используйте коренные русские слова, #. 10 | Давайте правильно говорить на своём языке, а не прогибаться под повесточку. 11 | Изъясняйтесь по-русски, пожалуйста. 12 | Будьте добры, выражайте свои мысли на русском. 13 | Берегите русский язык от иностранщины. 14 | Вот так по-русски. 15 | Прокачивайте русское языкознание. 16 | «Каковы же признаки обеднённого языка? Прежде всего засилие иностранщины. Надо, наконец, решительно убрать из русского языка все эти «дезавуирования», «нормативы», «ассортименты» и всё прочее в этом роде.» Паустовский К.Г. 17 | «…употреблять иностранное слово, когда есть равносильное ему русское слово, — значит оскорблять и здравый смысл, и здравый вкус». Белинский В.Г. 18 | «Как материал словесности, язык славяно-русский имеет неоспоримое превосходство перед всеми европейскими». Пушкин А.С. 19 | «На чужом языке мы теряем 80% своей личности…» Довлатов С. 20 | «Иностранные слова надо употреблять только в случаях совершенной неизбежности, вообще же лучше избегать их. Русский язык достаточно богат, он обладает всеми средствами для выражения самых тонких ощущений и оттенков мысли». Короленко В. 21 | «Язык наш превосходен, богат, громок, силён, глубокомыслен. Надлежит только познать цену ему, вникнуть в состав и силу слов…». Шишков А.С. 22 | «Берегите чистоту языка, как святыню! Никогда не употребляйте иностранных слов. Русский язык так богат, что нам нечего брать у тех, кто беднее нас». Тургенев И.С. 23 | «Я не считаю хорошим и пригодным иностранные слова, если только их можно заменить чисто русскими или более обруселыми». Лесков Н.С. 24 | «Надо беречь наш богатый и прекрасный язык от порчи». Лесков Н.С. 25 | «Русский язык, по свидетельству самих иностранных леполюбов, не уступает в мужестве латинскому, в плавности греческому, превосходя все европейские…» Державин Г. -------------------------------------------------------------------------------- /old_korni.py: -------------------------------------------------------------------------------- 1 | import os 2 | import gspread 3 | from oauth2client.service_account import ServiceAccountCredentials 4 | import re 5 | import json 6 | import itertools 7 | import pymorphy2 8 | try: 9 | import config 10 | except ModuleNotFoundError: 11 | # if no config (i.e. prod) 12 | pass 13 | morph = pymorphy2.MorphAnalyzer() 14 | 15 | def read_glossary_data(): 16 | # define the scope 17 | scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive'] 18 | # add credentials to the account 19 | if ('GOOGLE_SHEETS_CREDS_JSON' in os.environ): # if prod 20 | json_creds = os.getenv("GOOGLE_SHEETS_CREDS_JSON") 21 | creds_dict = json.loads(json_creds) 22 | creds_dict["private_key"] = creds_dict["private_key"].replace("\\\\n", "\n") 23 | creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope) 24 | else: #if dev 25 | creds = ServiceAccountCredentials.from_json_keyfile_name(config.json_keyfile_rodno, scope) 26 | 27 | # authorize the clientsheet 28 | client = gspread.authorize(creds) 29 | 30 | # get the instance of the Spreadsheet 31 | sheet = client.open('Корни языка') 32 | 33 | # get the first sheet of the Spreadsheet 34 | sheet_instance = sheet.get_worksheet(0) 35 | 36 | # get all the records of the data 37 | glossary_data = sheet_instance.get_all_records() #list of dictionaries 38 | key_incorrect = sheet_instance.cell(col=1,row=1).value #ключ мусорного значения 39 | key_correct = sheet_instance.cell(col=2,row=1).value #ключ родного значения 40 | 41 | return glossary_data, key_incorrect, key_correct 42 | 43 | def process_glossary_data (glossary_data, key_incorrect): 44 | for idxx, dict1 in enumerate(glossary_data): 45 | # split cell if in consist several words using re.sub(pattern, repl, string, count=0, flags=0) 46 | dict_words_list = re.split("[^\w\-\)\(]*\,[^\w\-\)\(]*", dict1[key_incorrect]) # split by comma + non-word chars minus brackets 47 | 48 | # if contains several round brackets, then generate several words instead source word 49 | i = 0 50 | while i < len(dict_words_list): 51 | list_of_inbrackets = re.findall("\([\w-]*\)", dict_words_list[i], re.IGNORECASE) 52 | if any(list_of_inbrackets): 53 | list_of_parts = re.split("\([\w-]*\)", dict_words_list[i], flags=re.IGNORECASE) 54 | list_of_replacement_variants = [] 55 | for inbracket in list_of_inbrackets: 56 | list_of_replacement_variants.append(("", inbracket.strip(')('))) 57 | 58 | dict_words_list.remove(dict_words_list[i]) 59 | for trpl in itertools.product(*list_of_replacement_variants): 60 | res_list = [list_of_parts[0]] 61 | for j, content in enumerate(trpl): 62 | res_list.append(content) 63 | res_list.append(list_of_parts[j + 1]) 64 | dict_words_list.insert(i, ''.join(res_list)) 65 | i += 1 66 | i -= 1 67 | i += 1 68 | 69 | # if contains hyphens, then generate two words instead source word 70 | i = 0 71 | while i < len(dict_words_list): 72 | if "-" in dict_words_list[i]: 73 | extra_word = dict_words_list[i].replace("-", "") 74 | dict_words_list.insert(i + 1, extra_word) 75 | i += 1 76 | """ 77 | # if contains several russian "е/э", then generate several words instead source word 78 | i = 0 79 | while i < len(dict_words_list): 80 | if any(re.findall(r'е|э', dict_words_list[i], re.IGNORECASE)): 81 | keyletters = 'еэ' 82 | # Convert input string into a list so we can easily substitute letters 83 | seq = list(dict_words_list[i]) 84 | # Find indices of key letters in seq 85 | indices = [indx for indx, c in enumerate(seq) if c in keyletters] 86 | 87 | dict_words_list.remove(dict_words_list[i]) 88 | # Generate key letter combinations & place them into the list 89 | for t in itertools.product(keyletters, repeat=len(indices)): 90 | for j, c in zip(indices, t): 91 | seq[j] = c 92 | dict_words_list.insert(i, ''.join(seq)) 93 | i += 1 94 | i -= 1 95 | i += 1 96 | 97 | # if contains several russian "ф/фф", then generate several words instead source word 98 | i = 0 99 | while i < len(dict_words_list): 100 | if any(re.findall(r'ф', dict_words_list[i], re.IGNORECASE)): 101 | list_of_parts = re.split("ф+", dict_words_list[i], flags=re.IGNORECASE) 102 | 103 | dict_words_list.remove(dict_words_list[i]) 104 | for trpl in itertools.product(["ф", "фф"], repeat=len(list_of_parts) - 1): 105 | res_list = [list_of_parts[0]] 106 | for j, content in enumerate(trpl): 107 | res_list.append(content) 108 | res_list.append(list_of_parts[j+1]) 109 | dict_words_list.insert(i, ''.join(res_list)) 110 | i += 1 111 | i -= 1 112 | i += 1 113 | """ """ 114 | strout = "dict_words_list: " 115 | for non_native_word in dict_words_list: 116 | strout += non_native_word + " " 117 | print(strout) 118 | """ 119 | glossary_data[idxx][key_incorrect] = ', '.join(dict_words_list) 120 | return glossary_data 121 | 122 | def process_text(update, context): 123 | 124 | records_data, id_non_native, id_native = read_glossary_data() 125 | records_data = process_glossary_data(records_data, id_non_native) 126 | 127 | output_message = "" 128 | 129 | text_to_split = update.message.caption if (update.message.text is None) else update.message.text 130 | #print(text_to_split) 131 | 132 | # let's split it by words using re.sub(pattern, repl, string, count=0, flags=0) 133 | # [\w] means any alphanumeric character and is equal to the character set [a-zA-Z0-9_] 134 | input_words_list = re.sub("[^\w-]", " ", text_to_split).split() 135 | # print(input_words_list) 136 | 137 | for checked_word in input_words_list: 138 | # print("Проверяем: " + checked_word) 139 | # print(morph.parse(checked_word)[0].lexeme) 140 | # morph.parse(checked_word)[0].lexeme 141 | 142 | checked_word_lower = checked_word.lower().removesuffix("-то").removesuffix("-ка").removesuffix("-таки") 143 | if (checked_word_lower == ""): 144 | continue 145 | 146 | string_to_add = "" 147 | # opening google sheet data 148 | for dict2 in records_data: 149 | # print(dict[id_non_native]) 150 | # split cell if in consist several words using re.sub(pattern, repl, string, count=0, flags=0) 151 | # split by comma only (useful for words with spaces) 152 | dict_words_list = re.split("[^\w-]*,[^\w-]*", dict2[id_non_native]) #split by comma + non-word chars 153 | is_coincidence_found = False 154 | for non_native_word in dict_words_list: 155 | non_native_word = non_native_word.lower() 156 | # maybe should try to normalize non_native form too or to check all the forms of non_native_word 157 | if (checked_word_lower == non_native_word): 158 | # print("Входное: " + checked_word) 159 | # print("Попробуйте: " + dict2[id_native]) 160 | string_to_add = "Не \"" + checked_word_lower + "\", а " + dict2[id_native] + ".\n" 161 | is_coincidence_found = True 162 | break 163 | else: 164 | for normal_form in morph.normal_forms(checked_word_lower): 165 | if (normal_form == non_native_word): 166 | string_to_add = "Не \"" + normal_form + "\", а " + dict2[id_native] + ".\n" 167 | is_coincidence_found = True 168 | break 169 | if (is_coincidence_found): 170 | break 171 | if (is_coincidence_found): 172 | break 173 | #check for identical incoming words - they don't need to appear several times in response message 174 | if (string_to_add != ""): 175 | if (not (string_to_add in output_message)): #optimization (maybe) 176 | output_message += string_to_add 177 | 178 | if (output_message != ""): 179 | output_message += "Берегите корни русского языка..." 180 | update.message.reply_text(output_message) -------------------------------------------------------------------------------- /korni.py: -------------------------------------------------------------------------------- 1 | import re 2 | import pymorphy2 3 | import mysql.connector 4 | import os 5 | import sys 6 | import random 7 | import string 8 | import urllib.parse as urlparse 9 | import time 10 | from datetime import datetime 11 | from _mysql_connector import MySQLInterfaceError 12 | from telegram.ext import Updater, MessageHandler, Filters, CommandHandler 13 | from telegram import ChatMember, TelegramError 14 | 15 | try: 16 | import config 17 | except ModuleNotFoundError: 18 | # if no config (i.e. prod) 19 | pass 20 | 21 | PORT = int(os.environ.get('PORT', 5000)) 22 | morph = pymorphy2.MorphAnalyzer() 23 | id_chat_id = "chat_id" 24 | id_chat_caption = "chat_caption" 25 | id_chat_username = "username" 26 | id_freq = "freq" 27 | id_exhortation = "exhortation" 28 | id_donation_exclude = "donation_ask_exclude" 29 | id_non_native = "МУСОРНОЕ" 30 | id_native = "РОДНОЕ" 31 | id_exclusions = "ИСКЛЮЧЁННЫЕ ИСКАЖЕНИЯ" 32 | id_inexact = "ПОПРАВКА НА СЛУЧАЙ НЕМУСОРНОГО" 33 | id_extra_normal_form = "ДОП. РАСПОЗНАВАЕМОЕ ИСХОДНОЕ ИСКАЖЕНИЕ" 34 | id_unrecognized_forms = "НЕРАСПОЗНАВАЕМЫЕ ИСКАЖЕНИЯ" 35 | 36 | def get_sys_var(var_name): 37 | res = os.getenv(var_name) if (var_name in os.environ) else getattr(config, var_name.lower()) 38 | return res 39 | 40 | def message_how(update, context): 41 | if update.message.chat.type != "private": 42 | if not check_for_message_permission(update, context): 43 | return 44 | # Send a message when the command /kak is issued. 45 | string_kak = open("data/command_kak_answer.txt", "r", encoding="utf-8").read() 46 | set_reply_text(update, string_kak) 47 | 48 | 49 | def message_info(update, context): 50 | if update.message.chat.type != "private": 51 | if not check_for_message_permission(update, context): 52 | return 53 | # Send a message when the command /sved is issued. 54 | string_sved = open("data/command_sved_answer.txt", "r", encoding="utf-8").read() 55 | set_reply_text(update, string_sved) 56 | 57 | 58 | def connect_to_db(): 59 | try: 60 | if 'CLEARDB_DATABASE_URL' in os.environ: # if prod 61 | url = urlparse.urlparse(os.environ['CLEARDB_DATABASE_URL']) 62 | connect = mysql.connector.connect(database=url.path[1:], user=url.username, password=url.password, 63 | host=url.hostname) 64 | else: 65 | connect = mysql.connector.connect(database=config.db_name, user=config.db_user, password=config.db_password, 66 | host=config.db_host) 67 | except mysql.connector.Error as e: 68 | print('Unable to connect!\n{0}').format(e) 69 | connect = None 70 | finally: 71 | return connect 72 | 73 | 74 | def get_chat_frequency(cht_id): 75 | conn = connect_to_db() 76 | if conn is not None: 77 | cursor = conn.cursor(buffered=True) 78 | else: 79 | sys.exit(1) 80 | try: 81 | cursor.execute(f"SELECT {id_freq}, {id_chat_username}, {id_chat_id}, {id_chat_caption} FROM freq_data WHERE chat_id='" + str(cht_id) + "'") 82 | res = cursor.fetchone() 83 | except MySQLInterfaceError as error: 84 | print(error) 85 | except: 86 | print("unknown error") 87 | cursor.close() 88 | conn.close() 89 | if res is not None: 90 | print("Extracted freq for chat: " + str(res[1]) + ", chat id: " + str(res[2]) + ", chat caption: " + str(res[3])) 91 | return res[0] 92 | else: 93 | print("Can't extract frequency for chat " + str(cht_id)) 94 | return res 95 | 96 | 97 | def get_chat_exhortation(cht_id): 98 | conn = connect_to_db() 99 | if conn is not None: 100 | cursor = conn.cursor(buffered=True) 101 | else: 102 | sys.exit(1) 103 | cursor.execute(f"SELECT {id_exhortation} FROM freq_data WHERE chat_id='" + str(cht_id) + "'") 104 | res = cursor.fetchone() 105 | cursor.close() 106 | conn.close() 107 | if res is not None: 108 | return res[0] 109 | else: 110 | return res 111 | 112 | 113 | def change_react_frequency(update, context): # Process when the command /vzvod is issued. 114 | def send_message_when_wrong_argument(): 115 | set_reply_text(update, "Используйте целое числовое значение в промежутке от 1 до 50 в строке после приказа " 116 | "/vzvod и пробела для настройки ретивости робота в данной болталке.\nНапример: " 117 | "\"/vzvod 1\" - взводиться всегда, \"/vzvod 2\" - взводиться на каждое второе " 118 | "сообщение, \"/vzvod 10\" - взводиться на каждое десятое и т.п.") 119 | if update.message.chat.type == "private": 120 | set_reply_text(update, "Настройка ретивости робота доступна только при использовании в болталках, а не в личке!") 121 | return 122 | else: 123 | if not check_for_message_permission(update, context): 124 | return 125 | if context.bot.getChatMember(update.effective_chat.id, update.effective_user.id).status not in \ 126 | [ChatMember.ADMINISTRATOR, ChatMember.CREATOR]: 127 | set_reply_text(update, "Настройка ретивости робота доступна лишь пользователям с правами заведующего!") 128 | return 129 | if len(context.args) > 0: 130 | try: 131 | param = int(context.args[0]) 132 | except ValueError: 133 | send_message_when_wrong_argument() 134 | return 135 | else: 136 | send_message_when_wrong_argument() 137 | return 138 | if (param < 1) | (param > 50): 139 | send_message_when_wrong_argument() 140 | return 141 | set_chat_frequency(param, update) 142 | set_reply_text(update, "Ретивость робота в данной болталке установлена на " + 143 | f'{100 / param:4.2f}'.replace('.', ',') + "%") 144 | 145 | 146 | def change_private_exhortation_mode(update, context): # Process when the command /nazid is issued. 147 | def send_message_when_wrong_argument(): 148 | set_reply_text(update, "Используйте следующие значения в строке после приказа " 149 | "/nazid и пробела для настройки вида назиданий:\n" 150 | "\"/nazid korni\" - всегда назидание вида \"берегите корни\" (по умолчанию),\n" 151 | "\"/nazid vse\" - все виды назиданий (так же, как в болталках),\n" 152 | "\"/nazid net\" - назидания не добавляются.") 153 | if update.message is None: 154 | return 155 | if update.message.chat.type != "private": 156 | if not check_for_message_permission(update, context): 157 | return 158 | set_reply_text(update, "Настройка вида назиданий доступна только в личке, а не в болталках!") 159 | return 160 | if len(context.args) > 0: 161 | try: 162 | param = context.args[0] 163 | except ValueError: 164 | send_message_when_wrong_argument() 165 | return 166 | else: 167 | send_message_when_wrong_argument() 168 | return 169 | match param: 170 | case "korni": 171 | value = 0 172 | str_reply = "установлены по умолчанию" 173 | case "vse": 174 | value = 1 175 | str_reply = "включены полностью" 176 | case "net": 177 | value = 2 178 | str_reply = "выключены полностью" 179 | case _: 180 | send_message_when_wrong_argument() 181 | return 182 | set_private_chat_exhortation(value, update) 183 | set_reply_text(update, "Назидания робота в личной переписке " + str_reply + ".") 184 | 185 | 186 | def set_chat_frequency(fq, update): 187 | conn = connect_to_db() 188 | if conn is not None: 189 | cursor = conn.cursor(buffered=True) 190 | else: 191 | sys.exit(1) 192 | 193 | chat_id = update.effective_chat.id 194 | title = update.effective_chat.title 195 | if title is None: 196 | title = "PRIVATE" if (update.message.chat.type == "private") else "NONE" 197 | 198 | username = update.effective_chat.username 199 | if username is None: 200 | if update.message.chat.type == "private": 201 | user_username = update.effective_user.username 202 | username = "@" + user_username if (user_username is not None) else update.effective_user.first_name 203 | else: 204 | username = "NONE" 205 | 206 | title = title.encode('cp1251', 'ignore').decode('cp1251') 207 | username = username.encode('cp1251', 'ignore').decode('cp1251') 208 | 209 | cursor.execute( 210 | "INSERT INTO freq_data(" + id_chat_id + "," + id_chat_caption + "," + id_chat_username + "," + id_freq + "," + id_exhortation + "," + id_donation_exclude + 211 | ") VALUES(" + str(chat_id) + ", %s, %s, " + str(fq) + ", 1, 0) " + 212 | "ON DUPLICATE KEY UPDATE " + 213 | id_chat_caption + "=%s, " + id_chat_username + "=%s, " + id_freq + "=" + str( 214 | fq) + ", " + id_exhortation + "=NULL", (title, username, title, username,)) 215 | 216 | conn.commit() 217 | cursor.close() 218 | conn.close() 219 | 220 | 221 | def set_private_chat_exhortation(val, update): 222 | conn = connect_to_db() 223 | if conn is not None: 224 | cursor = conn.cursor(buffered=True) 225 | else: 226 | sys.exit(1) 227 | 228 | chat_id = update.effective_chat.id 229 | title = "PRIVATE" 230 | user_username = update.effective_user.username 231 | username = "@" + user_username if (user_username is not None) else update.effective_user.first_name 232 | username = username.encode('cp1251', 'ignore').decode('cp1251') 233 | 234 | cursor.execute( 235 | "INSERT INTO freq_data(" + id_chat_id + "," + id_chat_caption + "," + id_chat_username + "," + id_freq + "," + id_exhortation + "," + id_donation_exclude + 236 | ") VALUES(" + str(chat_id) + ", %s, %s, 1, " + str(val) + ", 0) " + 237 | "ON DUPLICATE KEY UPDATE " + 238 | id_chat_caption + "=%s, " + id_chat_username + "=%s, " + id_freq + "=NULL, " + id_exhortation + "=" + str(val), 239 | (title, username, title, username,)) 240 | 241 | conn.commit() 242 | cursor.close() 243 | conn.close() 244 | 245 | 246 | def correction_string(incoming_word, correction, exclusion): 247 | string_not = "Не \"" if exclusion is None else "Вероятно не \"" 248 | string_res = string_not + incoming_word + "\", а " + correction + "." 249 | string_res += "\n" if exclusion is None else " Если вы, конечно, не имели в виду " + exclusion + ".\n" 250 | return string_res 251 | 252 | 253 | def correction_string_from_normal_forms(crsr, chkd_wrd_lwr): 254 | string_res = "" 255 | for normal_form in morph.normal_forms(chkd_wrd_lwr): 256 | crsr.execute("SELECT " + id_native + ", `" + id_inexact + "`, `" + id_exclusions + "`, " + id_non_native + 257 | " FROM rodno_data WHERE " + 258 | id_non_native + "='" + normal_form + "' OR `" + id_extra_normal_form + "`='" + normal_form + "'") 259 | fix_recommendation = crsr.fetchone() 260 | if fix_recommendation is not None: 261 | if fix_recommendation[2] is not None: # if there are some excluded words 262 | excls = fix_recommendation[2].split(',') 263 | excls_stripped = [s.strip(" {}") for s in excls] 264 | if chkd_wrd_lwr in excls_stripped: # if current word is exclusion 265 | break 266 | string_res = correction_string(fix_recommendation[3], fix_recommendation[0], fix_recommendation[1]) 267 | break 268 | return string_res 269 | 270 | 271 | def check_for_message_permission(upd, cntx): 272 | try: 273 | bot_chat_member = cntx.bot.getChatMember(upd.effective_chat.id, cntx.bot.id) 274 | except TelegramError as err: 275 | print(err) 276 | return False 277 | if bot_chat_member.status == ChatMember.RESTRICTED: 278 | if not bot_chat_member.can_send_messages: 279 | return False 280 | #print("can send messages!") 281 | return True 282 | 283 | 284 | def set_reply_text(upd, txt): 285 | try: 286 | upd.message.reply_text(txt) 287 | except TelegramError as err: 288 | print("ERROR DURING REPLY:") 289 | print(err) 290 | return 291 | 292 | def send_donation_requests(cntx, crsr, cnctn): 293 | def print_and_write_to_file(str): 294 | print(str) 295 | f.write(str + "\n") 296 | 297 | crsr.execute("SELECT " + id_chat_id + ", " + id_chat_caption + ", " + id_chat_username + ", " + id_donation_exclude + " FROM freq_data") 298 | crsr.close() 299 | cnctn.close() 300 | user_rows = crsr.fetchall() 301 | string_help = open("data/donation_ask_message.txt", "r", encoding="utf-8").read() 302 | f = open("donation_log_" + datetime.today().strftime('%Y-%m-%d') +".txt", "w") 303 | cntr_msg_snd = 0 304 | cntr_msg_err = 0 305 | for user_chat_data in user_rows: 306 | print(user_chat_data) 307 | if user_chat_data[3] == 1: 308 | print_and_write_to_file(str(user_chat_data[0]) + " Chat/user " + user_chat_data[1] + "/" + 309 | user_chat_data[2] + "is excluded from donation help list") 310 | cntr_msg_err += 1 311 | continue 312 | try: 313 | cntx.bot.get_chat(user_chat_data[0]) 314 | except TelegramError as error: 315 | print_and_write_to_file(str(user_chat_data[0]) + ": Error with chat/user " + user_chat_data[1] + 316 | "/" + user_chat_data[2] + " " + error.message) 317 | cntr_msg_err += 1 318 | continue 319 | except: 320 | print_and_write_to_file(str(user_chat_data[0]) + ": Error with chat/user " + user_chat_data[1] + 321 | "/" + user_chat_data[2] + " " + " unknown error") 322 | cntr_msg_err += 1 323 | continue 324 | try: 325 | bot_chat_member = cntx.bot.getChatMember(user_chat_data[0], cntx.bot.id) 326 | except TelegramError as error: 327 | print_and_write_to_file(str(user_chat_data[0]) + ": Error with chat/user " + user_chat_data[1] + 328 | "/" + user_chat_data[2] + " " + error.message) 329 | cntr_msg_err += 1 330 | continue 331 | else: 332 | if bot_chat_member.status == ChatMember.RESTRICTED: 333 | if not bot_chat_member.can_send_messages: 334 | print_and_write_to_file(str(user_chat_data[0]) + ": Error with chat/user " + user_chat_data[1] + 335 | "/" + user_chat_data[2] + " - BOT IS RESTRICTED") 336 | cntr_msg_err += 1 337 | continue 338 | if cntx.bot.get_chat(user_chat_data[0]).type == 'private': 339 | try: 340 | cntx.bot.send_message(str(user_chat_data[0]), "Любезный/ая " + user_chat_data[2] + 341 | ", пользователь робота-поправляльщика \"Корни Русского\"!\n\n" + string_help) 342 | except TelegramError as error: 343 | print_and_write_to_file(str(user_chat_data[0]) + ": Error with chat/user " + user_chat_data[1] + 344 | "/" + user_chat_data[2] + " " + error.message) 345 | cntr_msg_err += 1 346 | time.sleep(1) 347 | continue 348 | else: 349 | try: 350 | cntx.bot.send_message(str(user_chat_data[0]), "Любезные участники беседы \"" + user_chat_data[1] + 351 | "\" и пользователи робота-поправляльщика \"Корни Русского\"!\n\n" + string_help) 352 | except TelegramError as error: 353 | print_and_write_to_file(str(user_chat_data[0]) + ": Error with chat/user " + user_chat_data[1] + 354 | "/" + user_chat_data[2] + " " + error.message) 355 | cntr_msg_err += 1 356 | time.sleep(1) 357 | continue 358 | except: 359 | print_and_write_to_file(str(user_chat_data[0]) + ": Error with chat/user " + user_chat_data[1] + 360 | "/" + user_chat_data[2] + " " + " unknown error") 361 | cntr_msg_err += 1 362 | continue 363 | print_and_write_to_file(str(user_chat_data[0]) + " Donation help message has sent to chat/user " + 364 | user_chat_data[1] + "/" + user_chat_data[2]) 365 | cntr_msg_snd += 1 366 | print_and_write_to_file("Donation messages send: " + str(cntr_msg_snd) + 367 | "\nDonation messages errors: " + str(cntr_msg_err)) 368 | f.close() 369 | 370 | 371 | # Let's analyze all the incoming text 372 | def process_text(update, context): 373 | if update.message is None: 374 | return 375 | 376 | chat_id = update.effective_chat.id 377 | 378 | if update.message.chat.type != "private": 379 | current_freq = get_chat_frequency(chat_id) 380 | if current_freq is None: 381 | current_freq = 1 382 | set_chat_frequency(current_freq, update) 383 | else: 384 | is_no_message_process = (random.randint(1, current_freq) != 1) 385 | if is_no_message_process: 386 | return 387 | 388 | text_to_split = update.message.caption if (update.message.text is None) else update.message.text 389 | 390 | # checks before processing 391 | if update.message.chat.type != "private": 392 | if not check_for_message_permission(update, context): 393 | return 394 | else: 395 | if text_to_split == "/start": 396 | message_how(update, context) 397 | return 398 | 399 | conn = connect_to_db() 400 | if conn is not None: 401 | cursor = conn.cursor(buffered=True) 402 | else: 403 | sys.exit(1) 404 | 405 | if text_to_split == get_sys_var("DONATION_HELP_COMMAND"): 406 | send_donation_requests(context, cursor, conn) 407 | return 408 | 409 | output_message = "" 410 | text_to_split = text_to_split.encode('cp1251', 'ignore').decode('cp1251') 411 | 412 | # let's split it by words using re.sub(pattern, repl, string, count=0, flags=0) 413 | # [\w] means any alphanumeric character and is equal to the character set [a-zA-Z0-9_] 414 | input_words_list = re.sub("[^\w-]", " ", text_to_split).split() 415 | 416 | for checked_word in input_words_list: 417 | checked_word_lower = checked_word.lower().removesuffix("-то").removesuffix("-ка").removesuffix( 418 | "-таки").removeprefix("таки-") 419 | if checked_word_lower == "": 420 | continue 421 | cursor.execute("SELECT " + id_native + ", `" + id_inexact + "`, " + id_non_native + " FROM rodno_data WHERE " + 422 | id_non_native + "='" + checked_word_lower + 423 | "' OR LOCATE(' " + checked_word_lower + ",', `" + id_unrecognized_forms + "`)") 424 | fix_recommendation = cursor.fetchone() 425 | if fix_recommendation is not None: 426 | string_to_add = correction_string(fix_recommendation[2], fix_recommendation[0], fix_recommendation[1]) 427 | else: 428 | string_to_add = correction_string_from_normal_forms(cursor, checked_word_lower) 429 | 430 | if string_to_add == "": # check for word parts divided by '-' 431 | splitted_incoming_words = checked_word_lower.split('-') 432 | if len(splitted_incoming_words) > 1: 433 | for splitted_part in splitted_incoming_words: 434 | if splitted_part in ["го", "ок"]: 435 | continue 436 | cursor.execute("SELECT " + id_native + ", `" + id_inexact + "`, " + id_non_native + 437 | " FROM rodno_data WHERE " + id_non_native + "='" + splitted_part + 438 | "' OR LOCATE(' " + splitted_part + ",', `" + id_unrecognized_forms + "`)") 439 | fix_recommendation = cursor.fetchone() 440 | if fix_recommendation is not None: 441 | corr_str = correction_string(fix_recommendation[2], fix_recommendation[0], 442 | fix_recommendation[1]) 443 | if not (corr_str in output_message) and not (corr_str in string_to_add): 444 | string_to_add += corr_str 445 | else: 446 | corr_str = correction_string_from_normal_forms(cursor, splitted_part) 447 | if not (corr_str in output_message) and not (corr_str in string_to_add): 448 | string_to_add += corr_str 449 | 450 | if string_to_add != "": 451 | if not (string_to_add in output_message): # optimization (maybe) 452 | output_message += string_to_add 453 | 454 | cursor.close() 455 | conn.close() 456 | 457 | if output_message != "": 458 | output_message += "\n" 459 | lines = open("data/answers.txt", "r", encoding="utf-8").readlines() 460 | lines_ex = open("data/answers_extra.txt", "r", encoding="utf-8").readlines() 461 | rnd_val = random.randint(0, len(lines) - 1) 462 | rnd_extra = random.randint(0, len(lines_ex) - 1) 463 | if (update.message.from_user.username == 'Tatsuya_S') and (update.message.chat.type != 'private'): 464 | if "#" in lines_ex[rnd_extra]: 465 | lines_ex[rnd_extra] = lines_ex[rnd_extra].replace("#", 466 | update.message.from_user.first_name if random.randint( 467 | 1, 468 | 2) == 1 else "@" + update.message.from_user.username) 469 | output_message += lines_ex[rnd_extra] 470 | elif update.message.chat.type == 'private': 471 | exhortation = get_chat_exhortation(chat_id) 472 | match exhortation: 473 | case None: 474 | set_private_chat_exhortation(0, update) 475 | output_message += lines[0] 476 | case 0: 477 | output_message += lines[0] 478 | case 1: 479 | pos_sharp = lines[rnd_val].find('#') 480 | if pos_sharp != -1: 481 | for i in range(pos_sharp - 1, -1, -1): 482 | if lines[rnd_val][i] in string.punctuation or lines[rnd_val][i] == ' ': 483 | if lines[rnd_val][i] in [',', ':', ';']: 484 | lines[rnd_val] = lines[rnd_val][0:i] + lines[rnd_val][ 485 | pos_sharp + 1:len(lines[rnd_val])] 486 | break 487 | else: 488 | break 489 | output_message += lines[rnd_val] 490 | case 2: 491 | output_message = output_message.removesuffix("\n") 492 | else: 493 | if "#" in lines[rnd_val]: 494 | lines[rnd_val] = lines[rnd_val].replace("#", update.message.from_user.first_name if (random.randint(1, 495 | 2) == 1) | ( 496 | update.message.from_user.username is None) else "@" + update.message.from_user.username) 497 | output_message += lines[rnd_val] 498 | if len(output_message) > 4096: 499 | set_reply_text(update, output_message[0:4096]) 500 | else: 501 | set_reply_text(update, output_message) 502 | elif update.message.chat.type == 'private': 503 | output_message = "Языковая дружина проверила ваши письмена и не нашла ничего зазорного. Ладный русский слог, иностранщина не обнаружена, отпускаем вас." 504 | set_reply_text(update, output_message) 505 | 506 | 507 | def main(): 508 | """Start the bot.""" 509 | updater = Updater(get_sys_var("TG_API_KEY")) 510 | dp = updater.dispatcher 511 | dp.add_handler(CommandHandler('kak', message_how)) 512 | dp.add_handler(CommandHandler('sved', message_info)) 513 | dp.add_handler(CommandHandler('vzvod', change_react_frequency)) 514 | dp.add_handler(CommandHandler('nazid', change_private_exhortation_mode)) 515 | dp.add_handler(MessageHandler((Filters.text | Filters.caption) & ( 516 | (~Filters.forwarded) & (~Filters.chat_type.channel) | Filters.chat_type.private), 517 | process_text, pass_user_data=True)) 518 | 519 | if 'TG_API_KEY' in os.environ: # if prod 520 | updater.start_webhook(listen="0.0.0.0", port=PORT, url_path=os.getenv("TG_API_KEY"), 521 | webhook_url='https://' + os.getenv("APP_URL") + '/' + os.getenv("TG_API_KEY")) 522 | else: # if dev 523 | updater.start_polling() 524 | 525 | updater.idle() 526 | 527 | 528 | if __name__ == '__main__': 529 | main() 530 | --------------------------------------------------------------------------------