├── README.md ├── allow.py ├── bot.py ├── checking_text.py ├── config.py ├── data ├── allow.txt ├── group.json └── words.txt ├── dictionary ├── ru_RU.aff └── ru_RU.dic ├── files.py ├── keyboard.py ├── log └── app.log ├── logger.py ├── models.py ├── mongodb.py ├── reports └── __init__.py ├── requirements.txt └── vk.py /README.md: -------------------------------------------------------------------------------- 1 | # Парсер групп Вконтакте 2 | 3 | ### Описание 4 | Программа собирает посты и комментарии с социальной сети Вконтакте, отбирает сообщения по ключевым словам нечетким сравнением и проверкой орфографии, сохраняет сообщения в нереляционную базу данных MongoDB. Реализован телеграмм бот для удаленного управления. 5 | 6 | ### Апробация программы 7 | Windows 11 8 | Python 3.10.5 9 | MongoDB 5.0.9 10 | 11 | В Linux Ubuntu и macOS возникли ошибки с библиотекой pyenchant (для проверки орфографии). 12 | 13 | ### Выполните следующие действия 14 | 1) Установите MongoDB (MongoDB Community Server, https://www.mongodb.com/try/download) 15 | 2) Подключитесь к интерфейсу: 16 | ``` 17 | mongo shell 18 | ``` 19 | 3) Создайте БД с именем 'vk': 20 | ``` 21 | use vk 22 | ``` 23 | 4) Создайте коллекцию с именем 'user': 24 | ``` 25 | db.createCollection('user') 26 | ``` 27 | 5) Установите все зависимости: 28 | ``` 29 | python3 -m pip install -r requirements.txt 30 | ``` 31 | 6) Добавить словарь для поддержки русского языка в pyenchant 32 | Словари в папке dictionary: ru_RU.aff и ru_RU.dic 33 | Скопировать в папку со словарями: 34 | C:\Users\username\AppData\Local\Programs\Python\Python39\Lib\site-packages\enchant\data\mingw64\share\enchant\hunspell 35 | 7) Создайте свое приложение (Standalone-приложение): 36 | https://vk.com/apps?act=manage 37 | Входные данные для получения токена 38 | client_id - id приложения 39 | scope - права доступа (https://dev.vk.com/reference/access-rights) 40 | Запрос для получения токена: 41 | https://oauth.vk.com/authorize?client_id=111111&display=mobile&redirect_uri=https://oauth.vk.com/blank.html&scope=wall,offline&response_type=token&v=5.131 42 | 8) Для работы без телеграмм бота: 43 | DOMAIN - Короткий адрес сообщества. 44 | OWNER_ID - Идентификатор сообщества, со стены которого необходимо получить записи. 45 | ``` 46 | python3 .\vk.py 47 | ``` 48 | 49 | 9) Для работы с телеграмм ботом 50 | В data/allow.txt добавить пользователей (каждого с новой строки). 51 | В data/words.txt добавить слова (каждое с новой строки). 52 | В data/group.json добавляется через телеграмм бота, в дальнейшем можно править вручную. 53 | Создать телеграмм бота через https://t.me/BotFather 54 | Добавить токен нового бота в config.py 55 | ``` 56 | python3 .\bot.py 57 | ``` 58 | 59 | ### Как отозвать токен? 60 | Вы можете принудительно отозвать токен (например, в том случае, если он стал известен постороннему), сбросив сеансы в настройках безопасности вашего аккаунта или сменив пароль. Также, если речь идет о токене не из вашего собственного приложения, можно просто удалить приложение из настроек: https://vk.com/settings?act=apps 61 | -------------------------------------------------------------------------------- /allow.py: -------------------------------------------------------------------------------- 1 | def LoadAllow(): 2 | try: 3 | with open('data/allow.txt') as file: 4 | models = [row.strip().lower() for row in file] 5 | except Exception as e: 6 | return e 7 | return models 8 | 9 | 10 | def CheckAllow(userid): 11 | if str(userid) in LoadAllow(): 12 | return True 13 | else: 14 | return False 15 | 16 | 17 | def AddAllow(userid): 18 | if str(userid) in LoadAllow(): 19 | return f'Пользователь есть в этом списке.' 20 | try: 21 | with open('data/allow.txt', 'a') as file: 22 | file.write(f'\n{userid}') 23 | except Exception as e: 24 | return e 25 | return f'Пользователь {userid} успешно добавлен.' 26 | 27 | 28 | def DeleteAllow(userid): 29 | userlist = LoadAllow() 30 | if str(userid) not in userlist: 31 | return f'Пользователь не найден.' 32 | userlist.remove(userid) 33 | try: 34 | with open('data/allow.txt', 'w') as file: 35 | for row in userlist: 36 | file.write(f'{row}\n') 37 | except Exception as e: 38 | return e 39 | return f'Пользователь {userid} успешно удалён.' -------------------------------------------------------------------------------- /bot.py: -------------------------------------------------------------------------------- 1 | from threading import Thread 2 | import subprocess 3 | import time 4 | 5 | from mongodb import MongoDB 6 | from pandas import ExcelWriter 7 | import pandas 8 | 9 | import asyncio 10 | from aiogram import Bot, Dispatcher, executor, types 11 | from aiogram.dispatcher.filters.state import State, StatesGroup 12 | from aiogram.contrib.fsm_storage.memory import MemoryStorage 13 | from aiogram.dispatcher import FSMContext 14 | from aiogram.dispatcher.filters import Text 15 | 16 | from aiogram.types import InlineKeyboardMarkup 17 | 18 | from models import LoadModels, LoadModelsKey, AddModel, DelModel, activeModel, toggleActiveModel 19 | from allow import CheckAllow, AddAllow, DeleteAllow, LoadAllow 20 | from keyboard import homeMenu, settingsMenu, TextButtonList 21 | from config import token 22 | from logger import * 23 | 24 | bot = Bot(token=token) 25 | dp = Dispatcher(bot, storage=MemoryStorage()) 26 | startedGroupList = [] # Лист запущенных групп 27 | startedGroupTread = [] # Лист запущенных групп в потоке 28 | connectionGroupList = [] # Лист с запущенными процессами 29 | 30 | 31 | class Models(StatesGroup): 32 | link = State() 33 | token = State() 34 | 35 | 36 | class UserAllow(StatesGroup): 37 | addUser = State() 38 | addModel = State() 39 | 40 | 41 | class GroupState(StatesGroup): 42 | name = State() 43 | 44 | 45 | @dp.message_handler(commands=['start']) 46 | async def echo(message: types.Message): 47 | if CheckAllow(message.from_user.id): 48 | text = 'VK Parser\nДля подробной инфомрации обо всем функционале воспользуйтесь командой /help.' 49 | await message.answer(text, reply_markup=homeMenu) 50 | 51 | 52 | @dp.message_handler(commands=['help']) 53 | async def echo(message: types.Message): 54 | if CheckAllow(message.from_user.id): 55 | await message.answer('Парсер групп Вконтакте') 56 | 57 | 58 | # Главная страница: 59 | @dp.message_handler(text=[TextButtonList['home']]) 60 | async def process_hi1_command(message: types.Message): 61 | if CheckAllow(message.from_user.id): 62 | await message.answer(f'Мы на главной!\nВыбери пункт, который тебе нужен.', reply_markup=homeMenu) 63 | 64 | 65 | # Настройки: 66 | @dp.message_handler(text=[TextButtonList['settings']]) 67 | async def process_hi1_command(message: types.Message): 68 | if CheckAllow(message.from_user.id): 69 | await message.answer(f'Мы в настройках!', reply_markup=settingsMenu) 70 | 71 | 72 | # Статус парсинга: 73 | @dp.message_handler(text=[TextButtonList['status_parsing']]) 74 | async def process_hi1_command(message: types.Message): 75 | if CheckAllow(message.from_user.id): 76 | await message.answer(activeModel()) 77 | 78 | 79 | # Список групп 80 | @dp.message_handler(text=[TextButtonList['groups']]) 81 | async def process_help_command(message: types.Message): 82 | if CheckAllow(message.from_user.id): 83 | listGroupMenu = InlineKeyboardMarkup() 84 | for i in LoadModelsKey(): 85 | listGroupMenu.add(types.InlineKeyboardButton(text=i, callback_data=f'link:{i}')) 86 | listGroupMenu.add(types.InlineKeyboardButton(text='Отменить', callback_data=f'link:cancel')) 87 | await message.answer(f"Список групп.", reply_markup=listGroupMenu) 88 | 89 | 90 | @dp.callback_query_handler(Text(startswith="link:")) 91 | async def callbacks_num(call: types.CallbackQuery): 92 | if call.data == 'link:cancel': 93 | await call.answer() 94 | await call.message.delete() 95 | return 96 | if CheckAllow(call.from_user.id): 97 | 98 | deleteUserBtn = types.InlineKeyboardMarkup() 99 | deleteUserBtn.add(types.InlineKeyboardButton(text='Запустить', callback_data=f'runGroup_{call.data.split(":")[1]}')) 100 | deleteUserBtn.add(types.InlineKeyboardButton(text='Остановить', callback_data=f'stopGroup_{call.data.split(":")[1]}')) 101 | deleteUserBtn.add(types.InlineKeyboardButton(text='Удалить', callback_data=f'delGroup_{call.data.split(":")[1]}')) 102 | deleteUserBtn.add(types.InlineKeyboardButton(text='Отменить', callback_data=f'link:cancel')) 103 | await call.message.answer(f"Выберите действие с группой", reply_markup=deleteUserBtn) 104 | 105 | await call.message.delete() 106 | 107 | 108 | @dp.callback_query_handler(Text(startswith="runGroup_")) 109 | async def runGroup(call: types.CallbackQuery, state: FSMContext): 110 | data = call.data.split("_")[1] 111 | if call.data not in startedGroupList: 112 | startedGroupList.append(data) 113 | await call.message.answer(f"Группа {data} добавлена в список запущенных.", reply_markup=homeMenu) 114 | else: 115 | await call.message.answer(f"Группа {data} уже списке запущенных.", reply_markup=homeMenu) 116 | await state.finish() 117 | await call.message.delete() 118 | 119 | 120 | @dp.callback_query_handler(Text(startswith="stopGroup_")) 121 | async def stopGroup(call: types.CallbackQuery, state: FSMContext): 122 | data = call.data.split("_")[1] 123 | if data in startedGroupList: 124 | startedGroupList.remove(data) 125 | await call.message.answer(f"Группа {data} убрана из списка запущенных.", reply_markup=homeMenu) 126 | else: 127 | await call.message.answer(f"Группа {data} не находится в списке запущенных.", reply_markup=homeMenu) 128 | await state.finish() 129 | await call.message.delete() 130 | 131 | 132 | @dp.callback_query_handler(Text(startswith="delGroup_")) 133 | async def delGroup(call: types.CallbackQuery, state: FSMContext): 134 | data = call.data.split("_")[1] 135 | await call.message.answer(DelModel(data), reply_markup=homeMenu) 136 | await state.finish() 137 | await call.message.delete() 138 | 139 | 140 | # Список пользователей: 141 | @dp.message_handler(text=[TextButtonList['users']]) 142 | async def process_help_command(message: types.Message): 143 | if CheckAllow(message.from_user.id): 144 | listUserBtn = InlineKeyboardMarkup() 145 | for i in LoadAllow(): 146 | listUserBtn.add(types.InlineKeyboardButton(text=i, callback_data=f'userid_{i}')) 147 | listUserBtn.add(types.InlineKeyboardButton(text='Отменить', callback_data='userid_:cancel')) 148 | await message.answer(f"Выбери пользователя, которого хочешь удалить.", reply_markup=listUserBtn) 149 | 150 | 151 | @dp.callback_query_handler(Text(startswith="userid_")) 152 | async def callbacks_num(call: types.CallbackQuery): 153 | if call.data == 'userid_:cancel': 154 | await call.answer() 155 | await call.message.delete() 156 | return 157 | if CheckAllow(call.from_user.id): 158 | listUserMenu = InlineKeyboardMarkup() 159 | listUserMenu.add(types.InlineKeyboardButton(text='Удалить пользователя', callback_data=f'delUserid_{call.data.split("_")[1]}')) 160 | listUserMenu.add(types.InlineKeyboardButton(text='Отменить', callback_data='userid_:cancel')) 161 | await call.message.answer('Выберите действие с пользователем.', reply_markup=listUserMenu) 162 | 163 | 164 | @dp.callback_query_handler(Text(startswith="delUserid_")) 165 | async def callbacks_num(call: types.CallbackQuery): 166 | if CheckAllow(call.from_user.id): 167 | await call.message.answer(DeleteAllow(call.data.split("_")[1]), reply_markup=homeMenu) 168 | 169 | await call.answer() 170 | await call.message.delete() 171 | 172 | 173 | # Добавить группу: 174 | @dp.message_handler(text=[TextButtonList['add_model']]) 175 | async def user_register(message: types.Message): 176 | if CheckAllow(message.from_user.id): 177 | await message.answer("Введите ссылку на группу (пример https://vk.com/club1)") 178 | await Models.link.set() 179 | 180 | 181 | @dp.message_handler(state=Models.link) 182 | async def get_username(message: types.Message, state: FSMContext): 183 | await state.update_data(link=message.text.split('/')[-1]) 184 | await message.answer("Отлично! Теперь введите Token VK API.") 185 | await Models.token.set() 186 | 187 | 188 | @dp.message_handler(state=Models.token) 189 | async def get_address(message: types.Message, state: FSMContext): 190 | await state.update_data(token=message.text) 191 | data = await state.get_data() 192 | await message.answer(AddModel(data['link'], data['token']), reply_markup=homeMenu) 193 | await state.finish() 194 | 195 | 196 | # Добавить пользователя: 197 | @dp.message_handler(text=[TextButtonList['add_user']]) 198 | async def process_help_command(message: types.Message): 199 | if CheckAllow(message.from_user.id): 200 | await UserAllow.addUser.set() 201 | deleteUserBtn = types.InlineKeyboardMarkup() 202 | deleteUserBtn.add(types.InlineKeyboardButton(text='Отменить', callback_data=f'cancelAddUser')) 203 | await message.answer(f"Введите ID пользователя (пример 12345678):", reply_markup=deleteUserBtn) 204 | 205 | 206 | @dp.message_handler(state=UserAllow.addUser) 207 | async def process_name(message: types.Message, state: FSMContext): 208 | if CheckAllow(message.from_user.id): 209 | try: 210 | id_ = int(message.text) 211 | except: 212 | await message.answer(f'ID должен состоять только из цифр.', reply_markup=homeMenu) 213 | await state.finish() 214 | return 215 | await message.answer(AddAllow(id_), reply_markup=homeMenu) 216 | 217 | await state.finish() 218 | 219 | 220 | @dp.callback_query_handler(state='*', text="cancelAddUser") 221 | async def cancelAddUser(call: types.CallbackQuery, state: FSMContext): 222 | current_state = await state.get_state() 223 | if current_state is None: 224 | return 225 | await state.finish() 226 | await call.message.delete() 227 | 228 | 229 | # Получить отчет 230 | @dp.message_handler(text=[TextButtonList['report']]) 231 | async def callbacks_num12(message: types.Message): 232 | if CheckAllow(message.from_user.id): 233 | data = await mongo.func() 234 | file_name = time.strftime('%Y.%m.%d_%H-%M-%S') 235 | df = pandas.DataFrame(data) 236 | temp_write = 0 237 | while temp_write != 1: 238 | writer = ExcelWriter(f'reports/{file_name}.xlsx') 239 | df.to_excel(writer, f'{file_name}') 240 | writer.save() 241 | temp_write = 1 242 | 243 | subprocess.call(f"python3 files.py {file_name} xlsx {message.from_user.id}", shell=True) 244 | 245 | 246 | # Получить лог 247 | @dp.message_handler(text=[TextButtonList['log']]) 248 | async def callbacks_num12(message: types.Message): 249 | if CheckAllow(message.from_user.id): 250 | subprocess.call(f"python3 files.py app log {message.from_user.id}", shell=True) 251 | 252 | 253 | # Удаление данных из БД 254 | @dp.message_handler(text=[TextButtonList['delDataFromDB']]) 255 | async def process_hi1_command(message: types.Message): 256 | if CheckAllow(message.from_user.id): 257 | await message.answer(await mongo.deleteData(), reply_markup=homeMenu) 258 | 259 | 260 | # Алгоритм запуска группы 261 | def StartGroup(groupname): 262 | try: 263 | modelsList = LoadModels() 264 | if groupname in modelsList: 265 | userdata = modelsList[groupname] 266 | userdata['name'] = groupname 267 | cmd = ['python3', 'vk.py', groupname, f'{userdata["group_id"]}', f'{userdata["token"]}'] 268 | userdata['process'] = subprocess.Popen(cmd) 269 | connectionGroupList.append(userdata) 270 | 271 | except Exception as e: 272 | logging.error(f'[{groupname}] {e}') 273 | 274 | while True: 275 | time.sleep(1) 276 | 277 | if groupname not in startedGroupList: 278 | for i in range(len(connectionGroupList)): 279 | if connectionGroupList[i]['name'] == groupname: 280 | connectionGroupList[i]['process'].terminate() 281 | connectionGroupList.pop(i) 282 | startedGroupTread.remove(groupname) 283 | toggleActiveModel(groupname, "0") 284 | logging.info(f'[{groupname}] Поток остановлен через бота.') 285 | return 286 | 287 | 288 | async def main(): 289 | while True: 290 | await asyncio.sleep(5) 291 | try: 292 | if startedGroupList: 293 | for item in startedGroupList: 294 | if item not in startedGroupTread: 295 | Thread(target=StartGroup, args=(item,)).start() 296 | startedGroupTread.append(item) 297 | except Exception as e: 298 | logging.error(f'{e}') 299 | 300 | 301 | if __name__ == '__main__': 302 | mongo = MongoDB() 303 | loop = asyncio.new_event_loop() 304 | asyncio.set_event_loop(loop) 305 | loop.create_task(main()) 306 | executor.start_polling(dp, skip_updates=True) 307 | -------------------------------------------------------------------------------- /checking_text.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | import asyncio 3 | import aiofiles 4 | from fuzzywuzzy import process 5 | from enchant.checker import SpellChecker 6 | 7 | 8 | async def loadWords(): 9 | try: 10 | async with aiofiles.open('data/words.txt', mode='r') as file: 11 | models = [row.strip().lower() async for row in file] 12 | except Exception as e: 13 | return e 14 | return models 15 | 16 | 17 | async def checkWords(message): 18 | await asyncio.sleep(0.01) 19 | words = await loadWords() 20 | word1 = process.extractOne(message, words) 21 | 22 | if word1[1] >= 60: 23 | words.remove(word1[0]) 24 | word2 = process.extractOne(message, words) 25 | if word2[1] >= 60: 26 | if len(message)*1.5 < len(word1[0]) and len(message)*1.5 < len(word2[0]): 27 | return False, None, None 28 | else: 29 | return True, word1, word2 30 | else: 31 | checker = SpellChecker("ru_RU") 32 | checker.set_text(message) 33 | temp = 0 34 | sum_words = len(message.split()) 35 | for i in checker: 36 | temp += 1 37 | if temp/sum_words > 0.3: 38 | if len(message) * 1.5 < len(word1[0]): 39 | return False, None, None 40 | else: 41 | return True, word1, 'орфографические ошибки' 42 | else: 43 | return False, None, None 44 | else: 45 | return False, None, None 46 | 47 | 48 | if __name__ == '__main__': 49 | text = '🤯 Dəт$k0ē n/ø/pño смо/Tpu-ka zdecь👩🏻👨‍👩‍👧‍👦 ТГГГГ' 50 | temp = asyncio.get_event_loop().run_until_complete(checkWords(text)) 51 | print(temp[0]) 52 | print(temp[1]) 53 | print(temp[2]) 54 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | token = '' # Токен бота 2 | host = '127.0.0.1' # MongoDB хост 3 | port = 27017 # MongoDB порт 4 | db = 'vk' # MongoDB база 5 | collection = 'user' # MongoDB коллекция 6 | -------------------------------------------------------------------------------- /data/allow.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergo-code/VKGroupParser/5a29520c246c4070891f1952c1f9a9210efb415c/data/allow.txt -------------------------------------------------------------------------------- /data/group.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /data/words.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergo-code/VKGroupParser/5a29520c246c4070891f1952c1f9a9210efb415c/data/words.txt -------------------------------------------------------------------------------- /dictionary/ru_RU.aff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergo-code/VKGroupParser/5a29520c246c4070891f1952c1f9a9210efb415c/dictionary/ru_RU.aff -------------------------------------------------------------------------------- /dictionary/ru_RU.dic: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergo-code/VKGroupParser/5a29520c246c4070891f1952c1f9a9210efb415c/dictionary/ru_RU.dic -------------------------------------------------------------------------------- /files.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import sys 3 | from config import token 4 | 5 | 6 | class sBot: 7 | def __init__(self, TOKEN, CHAT_ID): 8 | self.token = TOKEN 9 | self.chat_id = CHAT_ID 10 | 11 | def send_file(self, files=None): 12 | url = f'https://api.telegram.org/bot{self.token}/sendDocument' 13 | requests.post(url=url, data={"chat_id": self.chat_id}, files=files) 14 | 15 | 16 | def file_xlsx(): 17 | with open(f"reports/{file_name}.xlsx", mode='rb') as filexlsx: 18 | bot.send_file(files={"document": filexlsx}) 19 | 20 | 21 | def file_log(): 22 | with open(f"log/{file_name}.log", mode='rb') as filexlsx: 23 | bot.send_file(files={"document": filexlsx}) 24 | 25 | 26 | if __name__ == '__main__': 27 | file_name = sys.argv[1] 28 | type_file = sys.argv[2] 29 | chat_id = sys.argv[3] 30 | bot = sBot(token, chat_id) 31 | if type_file == 'xlsx': 32 | file_xlsx() 33 | elif type_file == 'log': 34 | file_log() 35 | -------------------------------------------------------------------------------- /keyboard.py: -------------------------------------------------------------------------------- 1 | from aiogram.types import ReplyKeyboardMarkup, KeyboardButton 2 | 3 | 4 | TextButtonList = { 5 | 'home': '↩️ На главную!', 6 | 'settings': '⚙️ Настройки', 7 | 'groups': '📚 Список групп', 8 | 'users': '👤 Список пользователей', 9 | 'status_parsing': '🪧 Статус парсинга', 10 | 'add_model': '➕ Добавить новую группу', 11 | 'add_user': '➕ Выдать доступ пользователю к боту', 12 | 'report': '📝 Получить отчет', 13 | 'log': '📜 Получить лог', 14 | 'delDataFromDB': '❌ Удалить данные из БД', 15 | } 16 | ButtonList = { 17 | 'home': KeyboardButton(TextButtonList['home']), 18 | 'settings': KeyboardButton(TextButtonList['settings']), 19 | 'groups': KeyboardButton(TextButtonList['groups']), 20 | 'users': KeyboardButton(TextButtonList['users']), 21 | 'status_parsing': KeyboardButton(TextButtonList['status_parsing']), 22 | 23 | 'add_model': KeyboardButton(TextButtonList['add_model']), 24 | 'add_user': KeyboardButton(TextButtonList['add_user']), 25 | 26 | 'report': KeyboardButton(TextButtonList['report']), 27 | 'log': KeyboardButton(TextButtonList['log']), 28 | 'delDataFromDB': KeyboardButton(TextButtonList['delDataFromDB']), 29 | } 30 | 31 | 32 | homeMenu = ReplyKeyboardMarkup(resize_keyboard=True).add(ButtonList['status_parsing']).add(ButtonList['groups']).add(ButtonList['users']).add(ButtonList['report']).add(ButtonList['log']).add(ButtonList['settings']) 33 | settingsMenu = ReplyKeyboardMarkup(resize_keyboard=True).add(ButtonList['home']).add(ButtonList['add_model']).add(ButtonList['add_user']).add(ButtonList['delDataFromDB']) 34 | -------------------------------------------------------------------------------- /log/app.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sergo-code/VKGroupParser/5a29520c246c4070891f1952c1f9a9210efb415c/log/app.log -------------------------------------------------------------------------------- /logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | logging.basicConfig( 4 | level=logging.INFO, 5 | filename="log/app.log", 6 | format="%(asctime)s - %(module)s - %(levelname)s - %(funcName)s: %(lineno)d - %(message)s", 7 | datefmt='%d/%m/%Y %H:%M:%S', 8 | ) 9 | -------------------------------------------------------------------------------- /models.py: -------------------------------------------------------------------------------- 1 | import json 2 | from requests import post 3 | 4 | 5 | def LoadModels(): 6 | with open('data/group.json') as file: 7 | models = json.load(file) 8 | return models 9 | 10 | 11 | def LoadModelsKey(): 12 | with open('data/group.json') as file: 13 | models = json.load(file) 14 | secondList = [] 15 | for key in models.keys(): 16 | secondList.append(key) 17 | return secondList 18 | 19 | 20 | def AddModel(link, token): 21 | try: 22 | with open('data/group.json') as file: 23 | models = json.load(file) 24 | url = f"https://api.vk.com/method/utils.resolveScreenName?v=5.131&access_token={token}&screen_name={link}" 25 | 26 | response = post(url).text 27 | 28 | owner_id = json.loads(response) 29 | 30 | if 'response' in owner_id.keys(): 31 | if 'object_id' in owner_id['response'].keys(): 32 | owner_id = owner_id['response']['object_id'] 33 | models.update({link: {"group_id": owner_id, "token": token, "active": "0"}}) 34 | 35 | with open('data/group.json', 'w') as file: 36 | json.dump(models, file) 37 | else: 38 | text = 'Ссылка введена неверно!' 39 | return f'{link} не удалось добавить в список\n{text}' 40 | elif 'error' in owner_id.keys(): 41 | text = 'Введен неверный токен!' 42 | return f'{link} не удалось добавить в список\n{text}' 43 | except: 44 | return f'{link} не удалось добавить в список' 45 | return f'Группа {link} успешно добавлена/обновлена.' 46 | 47 | 48 | def DelModel(link): 49 | with open('data/group.json') as file: 50 | models = json.load(file) 51 | if link in models: 52 | del models[link] 53 | else: 54 | return f'Не удалось удалить {link}' 55 | with open('data/group.json', 'w') as file: 56 | json.dump(models, file) 57 | return f'Группа {link} успешно удалена.' 58 | 59 | 60 | def toggleActiveModel(domain, toggle): 61 | with open('data/group.json') as file: 62 | models = json.load(file) 63 | models[domain]['active'] = toggle 64 | with open('data/group.json', 'w') as file: 65 | json.dump(models, file) 66 | 67 | 68 | def activeModel(): 69 | with open('data/group.json') as file: 70 | models = json.load(file) 71 | 72 | text = 'Список запущенных групп:\n' 73 | temp = len(text) 74 | for item in models.keys(): 75 | if models[item]['active'] == '1': 76 | text += f'{item}\n' 77 | 78 | if temp != len(text): 79 | return text 80 | else: 81 | return 'В данный момент запущенных групп нет.' 82 | -------------------------------------------------------------------------------- /mongodb.py: -------------------------------------------------------------------------------- 1 | from pymongo import MongoClient 2 | import asyncio 3 | from logger import * 4 | from config import host, port, db, collection 5 | 6 | 7 | class MongoDB: 8 | def __init__(self): 9 | self.client = MongoClient(host, port) 10 | self.db = self.client[db] 11 | self.collections = self.db[collection] 12 | 13 | async def func(self): 14 | await asyncio.sleep(0.01) 15 | return self.collections.find() 16 | 17 | async def insert(self, data): 18 | await asyncio.sleep(0.01) 19 | try: 20 | self.collections.insert_one(data) 21 | except Exception as e: 22 | logging.error(f'{e} {data}') 23 | 24 | async def deleteData(self): 25 | await asyncio.sleep(0.01) 26 | response = self.collections.delete_many({}) 27 | if response.acknowledged: 28 | logging.info('Данные успешно удалены!') 29 | return 'Данные успешно удалены!' 30 | else: 31 | logging.error('Не удалось удалить данные.') 32 | return 'Не удалось удалить данные.' 33 | -------------------------------------------------------------------------------- /reports/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiofiles==0.8.0 2 | aiogram==2.20 3 | aiohttp==3.8.1 4 | aiosignal==1.2.0 5 | async-timeout==4.0.2 6 | asyncio==3.4.3 7 | attrs==21.4.0 8 | Babel==2.9.1 9 | certifi==2022.5.18.1 10 | charset-normalizer==2.0.12 11 | et-xmlfile==1.1.0 12 | frozenlist==1.3.0 13 | fuzzywuzzy==0.18.0 14 | idna==3.3 15 | multidict==6.0.2 16 | numpy==1.22.4 17 | openpyxl==3.0.10 18 | pandas==1.4.2 19 | pyenchant==3.2.2 20 | pymongo==4.1.1 21 | python-dateutil==2.8.2 22 | python-Levenshtein==0.12.2 23 | pytz==2022.1 24 | requests==2.28.0 25 | six==1.16.0 26 | urllib3==1.26.9 27 | yarl==1.7.2 28 | -------------------------------------------------------------------------------- /vk.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | import time 4 | import aiohttp 5 | from datetime import datetime 6 | import sys 7 | from models import toggleActiveModel 8 | from mongodb import MongoDB 9 | from logger import * 10 | from checking_text import checkWords 11 | 12 | 13 | class VK: 14 | def __init__(self, domain, delay, count=10, t_p=60 * 60 * 24 * 2, proxy=None, token=None, ids=None, owner_id=None): 15 | # Настройка 16 | self.ids = ids 17 | self.TOKEN = token 18 | self.DOMAIN = domain # Адрес сообщества 19 | self.COUNT = count # Количсетво постов за 1 запрос 20 | self.delay = delay # секунда 21 | self.array_id = {} 22 | self.lastRequestTime = 0 23 | self.time_period = t_p 24 | self.proxy = proxy 25 | self.url = f'https://api.vk.com/method/wall.get?domain={self.DOMAIN}&count=1&v=5.131&access_token={self.TOKEN}' 26 | self.owner_id = owner_id 27 | self.danger_arr = {} 28 | self.mongo = MongoDB() 29 | 30 | # Запросы к API серверу 31 | async def requests_func(self, method, url_params): 32 | url = f'https://api.vk.com/method/{method}?v=5.131&access_token={self.TOKEN}&{url_params}' 33 | while True: 34 | await asyncio.sleep(0.01) 35 | if self.lastRequestTime + self.delay < time.time(): 36 | #print(self.ids, method, time.strftime('%H:%M:%S')) 37 | self.lastRequestTime = time.time() 38 | 39 | async with aiohttp.ClientSession() as session: 40 | async with session.post(url, proxy=self.proxy) as response: 41 | data = await response.text() 42 | return data 43 | 44 | # Основная функция которая управляет действиями 45 | async def get_online(self): 46 | global temp_urls 47 | logging.info(f'[{self.DOMAIN}] Поток запущен.') 48 | print(f'[{self.DOMAIN}] Поток запущен.') 49 | while True: 50 | await asyncio.sleep(0.01) 51 | 52 | url = "wall.get", f"domain={self.DOMAIN}&count={self.COUNT}" 53 | 54 | req_posts = await self.requests_func(*url) 55 | try: 56 | posts = json.loads(req_posts)['response']['items'] 57 | except Exception as e: 58 | logging.error(f'[{req_posts}] {e}') 59 | for post in posts: 60 | # за последние 2 дня 61 | if post['date'] > time.time() - self.time_period: 62 | # проверка поста 63 | await self.func_post(post) 64 | 65 | # количество комментариев больше 0 66 | if int(post['comments']['count']) > 0: 67 | await self.func_comment(post) 68 | 69 | # Сбор постов 70 | async def func_post(self, post): 71 | # сбор информации о фото и видео 72 | await asyncio.sleep(0.01) 73 | photo = {} 74 | video = {} 75 | if 'attachments' in post.keys(): 76 | for i in range(len(post['attachments'])): 77 | if 'video' in post['attachments'][i].keys(): 78 | video |= {len(video): post['attachments'][i]['video']['image'][-1]['url']} 79 | temp_urls.append(post['attachments'][i]['video']['image'][-1]['url']) 80 | 81 | elif 'photo' in post['attachments'][i].keys(): 82 | photo |= {len(photo): post['attachments'][i]['photo']['sizes'][-1]['url']} 83 | temp_urls.append(post['attachments'][i]['photo']['sizes'][-1]['url']) 84 | else: 85 | photo = {} 86 | video = {} 87 | 88 | # сбор информации в целом о посте 89 | temp_dict = { 90 | 'date': datetime.utcfromtimestamp(int(post['date'])).strftime('%Y-%m-%d %H:%M:%S'), 91 | 'user_id': post['owner_id'], 92 | 'text': str(post['text'].replace("'", "").replace("\n\n", "\n")), 93 | 'photo': photo, 94 | 'video': video, 95 | 'count_comments': int(post['comments']['count']), 96 | 'comments': {0: {'from_id': '0', 'first_name': '0', 'last_name': '0', 'date': '0', 'text': '0', 'video': '0', 'photo': '0', 'danger': '0'}}, 97 | } 98 | 99 | self.array_id[post['id']] = temp_dict 100 | 101 | await self.post_check() 102 | 103 | # Сбор комментариев 104 | async def func_comment(self, post): 105 | await asyncio.sleep(0.01) 106 | arr_comments = {} 107 | arr_comments['comments'] = {} 108 | 109 | for offset in range(0, self.array_id[post['id']]['count_comments']+100, 100): 110 | url = ["wall.getComments", f"owner_id=-{self.owner_id}&post_id={post['id']}&count={100}&offset={offset}&extended=1"] 111 | 112 | comments_full = json.loads(await self.requests_func(*url)) 113 | 114 | if 'response' in comments_full.keys(): 115 | comments = comments_full['response']['items'] 116 | profiles = comments_full['response']['profiles'] 117 | 118 | for comment in comments: 119 | 120 | # сбор информации о фото и видео 121 | photo = {} 122 | video = {} 123 | if 'attachments' in comment.keys(): 124 | for k in range(len(comment['attachments'])): 125 | if 'video' in comment['attachments'][k].keys(): 126 | video |= {len(video): comment['attachments'][k]['video']['image'][-1]['url']} 127 | temp_urls.append(comment['attachments'][k]['video']['image'][-1]['url']) 128 | elif 'photo' in comment['attachments'][k].keys(): 129 | photo |= {len(photo): comment['attachments'][k]['photo']['sizes'][-1]['url']} 130 | temp_urls.append(comment['attachments'][k]['photo']['sizes'][-1]['url']) 131 | else: 132 | photo = {} 133 | video = {} 134 | 135 | for temp in range(len(profiles)): 136 | first_name = None 137 | last_name = None 138 | 139 | if comment['from_id'] == profiles[temp]['id']: 140 | first_name = profiles[temp]['first_name'] 141 | last_name = profiles[temp]['last_name'] 142 | break 143 | 144 | # сбор информации в целом о комментарии 145 | date = datetime.utcfromtimestamp(int(comment['date'])).strftime('%Y-%m-%d %H:%M:%S') 146 | arr_comments['comments'] |= {comment['id']: {'from_id': str(comment['from_id']), 147 | 'first_name': str(first_name), 148 | 'last_name': str(last_name), 149 | 'date': str(date), 150 | 'text': str(comment['text']), 151 | 'video': video, 152 | 'photo': photo, 153 | 'danger': str(0)}} 154 | 155 | self.array_id[post['id']]['comments'][comment['id']] = arr_comments['comments'][comment['id']] 156 | 157 | # Проверка tread 158 | if comment['thread']['count'] > 0: 159 | 160 | for offset in range(0, comment['thread']['count'] + 100, 100): 161 | url = ["wall.getComments", f"owner_id=-{self.owner_id}&post_id={post['id']}&comment_id={comment['id']}&count={100}&offset={offset}&extended=1"] 162 | 163 | comments_tread_full = json.loads(await self.requests_func(*url)) 164 | 165 | if 'response' in comments_tread_full.keys(): 166 | comments_tread = comments_tread_full['response']['items'] 167 | profiles_tread = comments_tread_full['response']['profiles'] 168 | 169 | for comment_tread in comments_tread: 170 | 171 | for t in range(len(profiles_tread)): 172 | first_name_tread = None 173 | last_name_tread = None 174 | 175 | if comment_tread['from_id'] == profiles_tread[t]['id']: 176 | first_name_tread = profiles_tread[t]['first_name'] 177 | last_name_tread = profiles_tread[t]['last_name'] 178 | 179 | break 180 | 181 | # сбор информации о фото и видео 182 | photo = {} 183 | video = {} 184 | 185 | if 'attachments' in comment_tread.keys(): 186 | 187 | for k in range(len(comment_tread['attachments'])): 188 | if 'video' in comment_tread['attachments'][k].keys(): 189 | video |= {len(video): comment_tread['attachments'][k]['video']['image'][-1]['url']} 190 | temp_urls.append(comment_tread['attachments'][k]['video']['image'][-1]['url']) 191 | elif 'photo' in comment_tread['attachments'][k].keys(): 192 | photo |= {len(photo): comment_tread['attachments'][k]['photo']['sizes'][-1]['url']} 193 | temp_urls.append(comment_tread['attachments'][k]['photo']['sizes'][-1]['url']) 194 | else: 195 | photo = {} 196 | video = {} 197 | 198 | # сбор информации в целом о комментарии 199 | date = datetime.utcfromtimestamp(int(comment_tread['date'])).strftime('%Y-%m-%d %H:%M:%S') 200 | arr_comments['comments'] |= {comment_tread['id']: {'from_id': str(comment_tread['from_id']), 201 | 'first_name': str(first_name_tread), 202 | 'last_name': str(last_name_tread), 203 | 'date': str(date), 204 | 'text': str(comment_tread['text']), 205 | 'video': video, 206 | 'photo': photo, 207 | 'danger': str(0)}} 208 | 209 | self.array_id[post['id']]['comments'][comment_tread['id']] = arr_comments['comments'][comment_tread['id']] 210 | 211 | await self.comment_check() 212 | 213 | # Проверка постов и комментариев 214 | async def post_check(self): 215 | await asyncio.sleep(0.01) 216 | # Проверка текста поста 217 | for post_id in self.array_id.keys(): 218 | if post_id not in self.danger_arr.keys(): 219 | 220 | # Проверка через нечеткое сравнение 221 | sign = await checkWords(self.array_id[post_id]['text']) 222 | if sign[0]: 223 | mongo_array = self.array_id[post_id] 224 | mongo_array['link'] = f'https://vk.com/{self.DOMAIN}?w=wall-{self.owner_id}_{post_id}' 225 | mongo_array['signs'] = f'{sign[1]}\n{sign[2]}' 226 | await self.mongo.insert(mongo_array) 227 | self.danger_arr[post_id] = 1 228 | break 229 | 230 | async def comment_check(self): 231 | await asyncio.sleep(0.01) 232 | # Проверка комментариев 233 | for post_id in self.array_id.keys(): 234 | for comment_id in self.array_id[post_id]['comments']: 235 | if comment_id not in self.danger_arr.keys(): 236 | # Проверка через нечеткое сравнение 237 | sign = await checkWords(self.array_id[post_id]['comments'][comment_id]['text']) 238 | if sign[0]: 239 | mongo_array = self.array_id[post_id]['comments'][comment_id] 240 | mongo_array['link'] = f'https://vk.com/{self.DOMAIN}?w=wall-{self.owner_id}_{post_id}' 241 | mongo_array['group'] = self.DOMAIN 242 | mongo_array['signs'] = f'{sign[1]}\n{sign[2]}' 243 | await self.mongo.insert(mongo_array) 244 | 245 | self.danger_arr[comment_id] = 1 246 | break 247 | 248 | 249 | temp_urls = [] 250 | 251 | 252 | async def main(): 253 | tasks = [] 254 | DELAY = 0.35 255 | COVERAGE_TIME = 60 * 60 * 24 256 | 257 | vk = VK(DOMAIN, t_p=COVERAGE_TIME, token=TOKEN, owner_id=OWNER_ID, delay=DELAY) 258 | task = asyncio.create_task(vk.get_online()) 259 | tasks.append(task) 260 | 261 | await asyncio.gather(*tasks) 262 | 263 | 264 | if __name__ == '__main__': 265 | DOMAIN = sys.argv[1] 266 | OWNER_ID = sys.argv[2] 267 | TOKEN = sys.argv[3] 268 | toggleActiveModel(DOMAIN, "1") 269 | try: 270 | loop = asyncio.new_event_loop() 271 | asyncio.set_event_loop(loop) 272 | loop.run_until_complete(main()) 273 | except Exception as e: 274 | toggleActiveModel(DOMAIN, "0") 275 | logging.error(f'{e}') 276 | --------------------------------------------------------------------------------