├── .gitignore ├── .idea ├── .gitignore ├── avito_parser.iml ├── dataSources.xml ├── inspectionProfiles │ ├── Project_Default.xml │ └── profiles_settings.xml ├── misc.xml ├── modules.xml └── vcs.xml ├── PipInstaller.txt ├── README.md ├── avito_database.db ├── bot └── bot.py ├── change_price_history_sqlite.py ├── date_and_time.py ├── helpers.py ├── main.py ├── new_logging.py ├── parser_avito.py ├── session.py ├── settings.py ├── sqlite.py └── text_converter.py /.gitignore: -------------------------------------------------------------------------------- 1 | logs/* 2 | settings.py 3 | venv 4 | /data.json 5 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /dataSources/ 6 | /dataSources.local.xml 7 | # Editor-based HTTP Client requests 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /.idea/avito_parser.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/dataSources.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | sqlite.xerial 6 | true 7 | org.sqlite.JDBC 8 | jdbc:sqlite:$PROJECT_DIR$/avito_database.db 9 | $ProjectFileDir$ 10 | 11 | 12 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 17 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /PipInstaller.txt: -------------------------------------------------------------------------------- 1 | altgraph==0.17 2 | asgiref==3.3.4 3 | beautifulsoup4==4.9.3 4 | bs4==0.0.1 5 | certifi==2020.12.5 6 | cfscrape==2.1.1 7 | chardet==4.0.0 8 | future==0.18.2 9 | idna==2.10 10 | pefile==2019.4.18 11 | pyTelegramBotAPI==3.7.6 12 | pytz==2021.1 13 | pywin32-ctypes==0.2.0 14 | requests==2.25.1 15 | six==1.16.0 16 | soupsieve==2.2.1 17 | urllib3==1.26.4 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Авито парсер 2 | 3 | Написал простой парсер который: 4 | - Обходит указанные ссылки авито из БД. 5 | - Получает данные по объявлениям (название, адрес, ссылка, цена, доп параметры) 6 | - Отправляет уведомление в телеграм при добавлении нового объявления, либо изменении цены. 7 | 8 | ## Установка 9 | 10 | Указываем ссылки в avito_parser.db таблица url (ссылка, город, ID чата телеграм)
11 | Указываем токен бота в (TG_TOKEN) /settings.py
12 | Указываем чат в телеграм для выявлений ошибок (EXCEPTION_CHAT) /settings.py
13 | Указываем путь до БД (ROUTE_DB) /settings.py
14 | Указываем путь до директории парсера для работы кнопна (DIR_LOCATION) /settings.py
15 | Установить библиотеки (под linux) > `pip install -r -f PipInstaller.txt`
16 | Крон необходимо настроить не чаще чем раз в 5 минут(на одну ссылку), дабы не получить временный бан по IP от авито. Советую на пак ссылок поставить срабатывание скрипта раз в час. 17 | 18 | ## Запуск 19 | Используя команду `python main.py` запускаем обход парсера.
20 | После обхода парсер автоматическки запишет изменения в бд, оставит уведомление в телеграм и остановит свою работу 21 | -------------------------------------------------------------------------------- /avito_database.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/useless-apple/avito_parser/4153ce55f26a468cfd57423424486e58ee75f437/avito_database.db -------------------------------------------------------------------------------- /bot/bot.py: -------------------------------------------------------------------------------- 1 | import telebot 2 | 3 | from date_and_time import time_sleep 4 | from settings import TG_TOKEN 5 | 6 | bot = telebot.TeleBot(TG_TOKEN) 7 | 8 | 9 | @bot.message_handler(content_types=['text']) 10 | def text_handler(chat_id, text): 11 | """ 12 | Отправка текста в чат ТГ 13 | :param chat_id: 14 | :param text: 15 | :return: 16 | """ 17 | bot.send_message(chat_id, text) 18 | time_sleep(5) 19 | -------------------------------------------------------------------------------- /change_price_history_sqlite.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sqlite3 3 | 4 | conn = sqlite3.connect('test.db') 5 | with conn: 6 | cur = conn.cursor() 7 | cur.execute('SELECT avito_id,price_history FROM offers') 8 | final_price_history = [] 9 | items = cur.fetchall() 10 | for item in items: 11 | id = item[0] 12 | price_history = json.loads(item[1]) 13 | if len(price_history) == 0: 14 | continue 15 | price_list = [] 16 | price = {} 17 | for i in range(len(price_history)): 18 | if i % 2 == 1: 19 | price['price'] = price_history[i] 20 | price_list.append(price) 21 | price = {} 22 | if i % 2 == 0: 23 | price['data'] = price_history[i] 24 | sql_price_list = json.dumps(price_list) 25 | cur.execute("UPDATE offers SET price_history=? WHERE avito_id=?",(sql_price_list, id)) 26 | conn.commit() 27 | conn.close() -------------------------------------------------------------------------------- /date_and_time.py: -------------------------------------------------------------------------------- 1 | import time 2 | from datetime import datetime 3 | from pytz import timezone 4 | from helpers import get_random_time 5 | 6 | 7 | def get_date_time(time_format="%Y-%m-%d %H:%M:%S"): 8 | """ 9 | Получаем текущее время 10 | :param time_format: 11 | :return: 12 | """ 13 | date_and_time = datetime.now(timezone('Asia/Yekaterinburg')).strftime(time_format) 14 | return date_and_time 15 | 16 | 17 | def time_sleep(n=None): 18 | """ 19 | Таймер сна 20 | :param n: 21 | :return: 22 | """ 23 | if n: 24 | time.sleep(n) 25 | else: 26 | time.sleep(get_random_time()) 27 | -------------------------------------------------------------------------------- /helpers.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | 4 | 5 | def read_json_txt(file): 6 | """ 7 | Прочитать файл JSON 8 | :param file: 9 | :return: 10 | """ 11 | with open(file, encoding='utf-8', newline='') as json_file: 12 | data = json.load(json_file) 13 | return data 14 | 15 | 16 | def write_json_txt(result, file): 17 | """ 18 | Записать новый файл JSON 19 | :param result: 20 | :param file: 21 | :return: 22 | """ 23 | with open(file, 'w', encoding='utf-8') as f: 24 | json.dump(result, f, ensure_ascii=False, indent=4) 25 | 26 | 27 | def get_random_time(): 28 | """ 29 | Получать рандомное число 30 | :return: 31 | """ 32 | value = random.random() 33 | scaled_value = 4 + (value * (11 - 5)) 34 | return scaled_value 35 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from date_and_time import get_date_time 2 | from helpers import write_json_txt 3 | from new_logging import log 4 | from parser_avito import get_global_result 5 | from sqlite import get_urls 6 | 7 | 8 | if __name__ == '__main__': 9 | try: 10 | log.info('-----------------------------------------------------------------------------------------------') 11 | log.info('Starting parsing ' + str(get_date_time())) 12 | tasks = [] 13 | tasks += get_urls() 14 | global_result = get_global_result(tasks) 15 | write_json_txt(global_result, 'data.json') 16 | log.info('Parsing Success ' + str(get_date_time())) 17 | log.info('-----------------------------------------------------------------------------------------------') 18 | 19 | except Exception as e: 20 | log.exception(str(e)) 21 | -------------------------------------------------------------------------------- /new_logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from date_and_time import get_date_time 3 | from settings import DIR_LOCATION 4 | 5 | logging.basicConfig( 6 | filename="{0}logs/log-{1}.log".format(DIR_LOCATION, get_date_time("%Y-%m-%d")), 7 | level=logging.INFO, 8 | format='[%(asctime)s] {%(pathname)s:%(lineno)d} %(levelname)s - %(message)s', 9 | ) 10 | log = logging.getLogger("ex") -------------------------------------------------------------------------------- /parser_avito.py: -------------------------------------------------------------------------------- 1 | import re 2 | from sqlite import write_sqlite3 3 | 4 | from bot.bot import text_handler 5 | from date_and_time import time_sleep 6 | from new_logging import log 7 | from session import get_soup_from_page 8 | from settings import EXEPTION_CHAT 9 | from text_converter import clean 10 | 11 | 12 | def get_item_data(rows, type_of): 13 | """ 14 | Получаем данные для каждого объявления 15 | :param rows: 16 | :param type_of: 17 | :return: 18 | """ 19 | result = [] 20 | for row in rows: 21 | avito_id = '' 22 | name = '' 23 | price = '' 24 | url = '' 25 | address = '' 26 | params = '' 27 | 28 | # ID Объявления 29 | try: 30 | avito_id = int(row.get('data-item-id')) 31 | except: 32 | avito_id = 'Не найден' 33 | 34 | # Название товара 35 | try: 36 | name = clean(row.find('h3', {"itemprop": "name"}).text) 37 | except: 38 | name = 'Не найден' 39 | 40 | # Цена товара 41 | try: 42 | price = int(clean(row.find('meta', {"itemprop": "price"}).get("content"))) 43 | except: 44 | price = 'Не найден' 45 | 46 | # Ссылка на товар 47 | try: 48 | url = 'https://avito.ru' + row.find('a', {"itemprop": "url"}).get("href") 49 | except: 50 | url = 'Не найден' 51 | 52 | # Для товара типа "Недвижимость" 53 | if type_of == 'Недвижимость': 54 | # Адрес 55 | try: 56 | address = clean(row.find('div', {"data-marker": "item-address"}).div.span.span.text) 57 | except: 58 | address = 'Не найден' 59 | 60 | # Для товара типа "Транспорт" 61 | elif type_of == 'Транспорт': 62 | # Параметры авто 63 | try: 64 | params = clean(row.find('div', {"data-marker": "item-specific-params"}).text) 65 | except: 66 | params = 'Не найден' 67 | 68 | # Адрес 69 | try: 70 | address = clean(row.find('div', attrs={"class": re.compile(r"geo-georeferences")}).span.text) 71 | except: 72 | address = 'Не найден' 73 | 74 | elif type_of == 'Хобби и отдых': 75 | # Параметры объявления 76 | try: 77 | params = clean(row.find('div', attrs={"class": re.compile(r"iva-item-description")}).text) 78 | except: 79 | params = 'Не найден' 80 | 81 | # Адрес 82 | try: 83 | address = clean(row.find('span', attrs={"class": re.compile(r"geo-address")}).span.text) 84 | except: 85 | address = 'Не найден' 86 | item = { 87 | 'avito_id': avito_id, 88 | 'name': name, 89 | 'price': price, 90 | 'address': address, 91 | 'url': url, 92 | 'type_of': type_of, 93 | 'params': params 94 | } 95 | result.append(item) 96 | return result 97 | 98 | 99 | def get_page_rows(soup, type_of): 100 | """ 101 | Получаем таблицу с объявлениями 102 | :param soup: 103 | :param type_of: 104 | :return: 105 | """ 106 | table = soup.find('div', {"data-marker": "catalog-serp"}) 107 | 108 | if table: # Удаляем рекламные блоки 109 | if table.find('div', {"data-marker": "witcher/block"}): 110 | table.find('div', {"data-marker": "witcher/block"}).decompose() 111 | rows = table.find_all('div', {"data-marker": "item"}) 112 | result = get_item_data(rows, type_of) 113 | 114 | else: 115 | error_message = 'Error not table' + str(soup) + str(table) 116 | log.error(error_message) 117 | text_handler(EXEPTION_CHAT, 'Error not table// Check LOGS') 118 | result = [] 119 | return result 120 | 121 | 122 | def get_page_data(page_url, count_try): 123 | """ 124 | Получаем страницу с объявлениями 125 | :param page_url: 126 | :param count_try: 127 | :return: 128 | """ 129 | next_pagination = True 130 | soup = get_soup_from_page(page_url, count_try) 131 | result = [] 132 | if not soup[1]: 133 | error_message = 'Next parsing none ' + str(page_url) 134 | log.error(error_message) 135 | text_handler(EXEPTION_CHAT, error_message) 136 | return result, False 137 | 138 | if not soup[0]: 139 | error_message = 'Soup is None ' + str(page_url) 140 | log.error(error_message) 141 | text_handler(EXEPTION_CHAT, error_message) 142 | return result, False 143 | 144 | try: 145 | type_of = soup[0].find('div', {"data-marker": "breadcrumbs"}).find_all('span', {"itemprop": "itemListElement"})[ 146 | 1].find('a').text 147 | except: 148 | type_of = 'None Type' 149 | log.warn('type_of = None Type') 150 | 151 | if soup[0].find_all('div', attrs={"class": re.compile(r"items-items")}): 152 | if len(soup[0].find_all('div', attrs={"class": re.compile(r"items-items")})) > 1: 153 | log.warn('Found another offers | Break pagination ' + str(page_url)) 154 | next_pagination = False 155 | try: 156 | result = get_page_rows(soup[0], type_of) 157 | except: 158 | error_message = 'Error get_page_rows' + '\n ' + page_url 159 | text_handler(EXEPTION_CHAT, error_message) 160 | log.error(error_message) 161 | return result, next_pagination 162 | 163 | 164 | def get_count_page(soup, url_task): 165 | """ 166 | Получаем список страниц пагинации 167 | :param soup: 168 | :param url_task: 169 | :return: 170 | """ 171 | try: 172 | pagination = soup.find('div', {"data-marker": "pagination-button"}) 173 | pagination.find('span', {"data-marker": "pagination-button/prev"}).decompose() 174 | pagination.find('span', {"data-marker": "pagination-button/next"}).decompose() 175 | count_page = pagination.find_all('span')[-1].text 176 | except: 177 | count_page = 1 178 | error_message = 'Error pagination' + '\n ' + url_task 179 | text_handler(EXEPTION_CHAT, error_message) 180 | log.error(error_message) 181 | return count_page 182 | 183 | 184 | def get_result_task(count_page, url_task): 185 | """ 186 | Получаем данные для одного задания (ссылки со всеми пагинациями) 187 | :param count_page: 188 | :param url_task: 189 | :return: 190 | """ 191 | next_pagination = True 192 | result = [] 193 | for i in range(1, int(count_page) + 1): 194 | if next_pagination: # Проверяем нужно ли парсить следующие страницы 195 | log.info('Parsing page# ' + str(i) + ' of ' + str(count_page)) 196 | page_url = url_task + '&p=' + str(i) 197 | try: 198 | page_data = get_page_data(page_url, 1) 199 | except: 200 | page_data = [], True 201 | error_message = 'Error get_page_data' + '\n ' + page_url 202 | text_handler(EXEPTION_CHAT, error_message) 203 | log.error(error_message) 204 | 205 | result += page_data[0] 206 | next_pagination = page_data[1] 207 | time_sleep() 208 | else: 209 | break 210 | return result 211 | 212 | 213 | def get_global_result(tasks): 214 | """ 215 | Получаем глобальный результат по всем заданиям 216 | :param tasks: 217 | :return: 218 | """ 219 | global_result = [] 220 | for task in tasks: 221 | url_task = task[1] 222 | task = [task[2], task[3], task[0]] 223 | log.info('Url parsing ' + str(url_task)) 224 | soup = get_soup_from_page(url_task + '&p=1', 1) 225 | count_page = get_count_page(soup[0], url_task) 226 | result = get_result_task(count_page, url_task) 227 | time_sleep() 228 | item = [result, task] 229 | write_sqlite3(item) 230 | global_result.append(item) 231 | return global_result 232 | -------------------------------------------------------------------------------- /session.py: -------------------------------------------------------------------------------- 1 | import time 2 | import cfscrape 3 | import requests 4 | 5 | from bs4 import BeautifulSoup 6 | from bot.bot import text_handler 7 | from date_and_time import time_sleep 8 | from helpers import get_random_time 9 | from new_logging import log 10 | from settings import EXEPTION_CHAT 11 | 12 | 13 | def get_session(): 14 | """ 15 | Создаем сессию 16 | :return: 17 | """ 18 | session = requests.Session() 19 | session.headers = { 20 | 'Host': 'www.avito.ru', 21 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0', 22 | 'Accept': 'text/html', 23 | 'Accept-Language': 'ru,en-US;q=0.5', 24 | 'DNT': '1', 25 | 'Connection': 'keep-alive', 26 | 'Upgrade-Insecure-Requests': '1', 27 | 'Pragma': 'no-cache', 28 | 'Cache-Control': 'no-cache'} 29 | return cfscrape.create_scraper(sess=session) 30 | 31 | 32 | def get_soup_from_page(page_url, count_try): 33 | """ 34 | Получаем SOUP для любой страницы 35 | :param page_url: 36 | :param count_try: 37 | :return: 38 | """ 39 | session = get_session() 40 | r = session.get(page_url) 41 | next_parsing = True 42 | if r.status_code == 403: 43 | error_message = 'Error: ' + str(r.status_code) + ' \nTime to sleep. Exit.' 44 | text_handler(EXEPTION_CHAT, error_message) 45 | log.error(error_message) 46 | soup = None 47 | next_parsing = False 48 | elif r.status_code == 429 and count_try < 2: 49 | error_message = 'Error: ' + str(r.status_code) + ' \nToo many request. Sleep 10min. \nTry № ' + str(count_try) + '\n' + str(page_url) 50 | text_handler(EXEPTION_CHAT, error_message) 51 | log.error(error_message) 52 | time.sleep(600) 53 | soup = get_soup_from_page(page_url, count_try + 1) 54 | elif r.status_code == 429 and count_try < 4: 55 | error_message = 'Error: ' + str(r.status_code) + ' \nToo many request. Sleep 15min. \nTry № ' + str(count_try) + '\n' + str(page_url) 56 | text_handler(EXEPTION_CHAT, error_message) 57 | log.error(error_message) 58 | time.sleep(900) 59 | soup = get_soup_from_page(page_url, count_try + 1) 60 | elif r.status_code != 200 and count_try < 4: 61 | error_message = 'Error: ' + str(r.status_code) + ' Try № ' + str(count_try) + '\n' + str(page_url) 62 | text_handler(EXEPTION_CHAT, error_message) 63 | log.error(error_message) 64 | time_sleep(get_random_time()) 65 | soup = get_soup_from_page(page_url, count_try + 1) 66 | elif count_try > 4: 67 | error_message = 'Error: ' + str(r.status_code) + ' Try ended' 68 | text_handler(EXEPTION_CHAT, error_message) 69 | log.warn(error_message) 70 | soup = None 71 | else: 72 | soup = BeautifulSoup(r.text, 'html.parser') 73 | return soup, next_parsing 74 | 75 | -------------------------------------------------------------------------------- /settings.py: -------------------------------------------------------------------------------- 1 | #Относительный путь до проекта (нужно для крона) 2 | DIR_LOCATION = "/root/parser/" 3 | 4 | #Путь до базы данных 5 | ROUTE_DB = DIR_LOCATION + "avito_database.db" 6 | 7 | #Телеграм токен бота 8 | TG_TOKEN = 'NNNNNNNNNNNNNNNNNNN' 9 | 10 | #ID чата для отлавливания ошибок 11 | EXEPTION_CHAT = 'NNNNNNNNNNNNNN' 12 | -------------------------------------------------------------------------------- /sqlite.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import json 3 | 4 | from bot.bot import text_handler 5 | from new_logging import log 6 | from date_and_time import get_date_time 7 | from settings import ROUTE_DB, EXEPTION_CHAT 8 | from text_converter import num_conversion, calculation_percent, calculation_different_price, parse_items_to_send 9 | 10 | 11 | def write_sqlite3(url): 12 | """ 13 | Записываем данные в SQLite 14 | :param url: 15 | :return: 16 | """ 17 | items = [] 18 | sql_city = url[1][0] 19 | sql_chat = url[1][1] 20 | sql_urls_id = url[1][2] 21 | conn = sqlite3.connect(ROUTE_DB) 22 | with conn: 23 | cur = conn.cursor() 24 | cur.execute('UPDATE offers SET status=0 WHERE urls_id=?', (sql_urls_id,)) # Обнуляем у всех объявлений статус 25 | for i in range(0, len(url[0])): 26 | if url[0][i] is not None: 27 | sql_avito_id = url[0][i]['avito_id'] 28 | sql_name = url[0][i]['name'] 29 | sql_price = url[0][i]['price'] 30 | sql_address = url[0][i]['address'] 31 | sql_url = url[0][i]['url'] 32 | sql_type_of = url[0][i]['type_of'] 33 | sql_params = url[0][i]['params'] 34 | 35 | price_history = [] 36 | price_now = { 37 | "data": str(get_date_time()), 38 | "price": str(sql_price) 39 | } 40 | 41 | cur.execute('SELECT avito_id FROM offers WHERE avito_id=?', 42 | (sql_avito_id,)) 43 | 44 | item_id = cur.fetchall() 45 | if item_id == [(sql_avito_id,)]: # Ищем ID в бд, и если не находим то пишем сообщение в телегу 46 | cur.execute('SELECT price FROM offers WHERE avito_id=?', 47 | (sql_avito_id,)) 48 | 49 | item_price = cur.fetchall() 50 | old_price = item_price[0][0] 51 | 52 | cur.execute('SELECT price_history FROM offers WHERE avito_id=?', 53 | (sql_avito_id,)) 54 | 55 | price_history = json.loads(cur.fetchall()[0][0]) 56 | price_history.append(price_now) 57 | price_history_dumps = json.dumps(price_history) 58 | 59 | price_history_srt = '' 60 | 61 | start_count = 0 62 | if len(price_history) > 0: 63 | if len(price_history) > 9: 64 | start_count = len(price_history) - 9 65 | for i in range(start_count, len(price_history)): 66 | if i == 0: 67 | price_history_srt = price_history_srt + \ 68 | 'Дата: ' + int(price_history[i]['data']) + ' ' + \ 69 | 'Цена: ' + num_conversion(int(price_history[i]['price'])) + ' руб.\n' 70 | else: 71 | percent_price_history = calculation_percent(int(price_history[i - 1]['price']), 72 | int(price_history[i]['price'])) 73 | price_history_srt = price_history_srt + \ 74 | 'Дата: ' + int(price_history[i]['data']) + ' ' + \ 75 | 'Цена: ' + num_conversion(int(price_history[i]['price'])) + ' руб. ' + \ 76 | '(' + percent_price_history + '%)\n' 77 | 78 | difference_price = calculation_different_price(int(price_history[0]['price']), int(price_now['price'])) 79 | percent_difference_price = calculation_percent(int(price_history[0]['price']), int(price_now['price'])) 80 | 81 | if item_price == [(sql_price,)]: # Сравниваем цены, и если есть отличие то обновляем их 82 | cur.execute( 83 | "UPDATE offers SET status=1, updated_date=?,urls_id=?, type_of=?, params=? WHERE avito_id=?", 84 | (str(get_date_time()), sql_urls_id, sql_type_of, sql_params, sql_avito_id)) 85 | continue 86 | else: 87 | items.append({ 88 | 'item_price': item_price, 89 | 'sql_chat': sql_chat, 90 | 'sql_avito_id': sql_avito_id, 91 | 'sql_name': sql_name, 92 | 'old_price': old_price, 93 | 'sql_price': sql_price, 94 | 'price_history_srt': price_history_srt, 95 | 'difference_price': difference_price, 96 | 'percent_difference_price': percent_difference_price, 97 | 'sql_address': sql_address, 98 | 'sql_url': sql_url, 99 | 'sql_params': sql_params, 100 | 'sql_type_of': sql_type_of, 101 | 'type_update': 'update' 102 | }) 103 | 104 | cur.execute( 105 | "UPDATE offers SET price=?, old_price=?, updated_date=?, price_history=?, status=1, urls_id=?, type_of=?, params=? WHERE avito_id=?", 106 | (sql_price, old_price, str(get_date_time()), str(price_history_dumps), sql_urls_id, 107 | sql_type_of, 108 | sql_params, sql_avito_id)) 109 | log.info('Price update | ' + str(sql_avito_id)) 110 | 111 | else: 112 | items.append({ 113 | 'item_price': None, 114 | 'sql_chat': sql_chat, 115 | 'sql_avito_id': sql_avito_id, 116 | 'sql_name': sql_name, 117 | 'old_price': None, 118 | 'sql_price': sql_price, 119 | 'price_history_srt': None, 120 | 'difference_price': None, 121 | 'percent_difference_price': None, 122 | 'sql_address': sql_address, 123 | 'sql_url': sql_url, 124 | 'sql_params': sql_params, 125 | 'sql_type_of': sql_type_of, 126 | 'type_update': 'new' 127 | }) 128 | log.info('No ID -> New Offer | ' + str(sql_avito_id)) 129 | 130 | price_history.append(price_now) 131 | price_history_dumps = json.dumps(price_history) 132 | cur.execute( 133 | "INSERT OR IGNORE INTO offers ('avito_id','name','price','price_history','address','url','created_date','updated_date','status','city','urls_id','type_of','params') VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)", 134 | (sql_avito_id, sql_name, sql_price, str(price_history_dumps), sql_address, sql_url, 135 | str(get_date_time()), str(get_date_time()), 1, sql_city, sql_urls_id, sql_type_of, sql_params)) 136 | else: 137 | error_message = 'Error: write Sql_item, item is None ' + str(sql_urls_id) 138 | text_handler(EXEPTION_CHAT, error_message) 139 | log.error(error_message) 140 | parse_items_to_send(items) 141 | conn.commit() 142 | conn.close() 143 | 144 | 145 | def get_urls(): 146 | """ 147 | Получаем данные для заданий из SQLite 148 | :return: 149 | """ 150 | conn = sqlite3.connect(ROUTE_DB) 151 | with conn: 152 | cur = conn.cursor() 153 | cur.execute('SELECT id,name,city,chatid FROM urls') 154 | urls = cur.fetchall() 155 | return urls 156 | -------------------------------------------------------------------------------- /text_converter.py: -------------------------------------------------------------------------------- 1 | from bot.bot import text_handler 2 | 3 | emoji_top = u'\U0001F4C8' 4 | emoji_top_green = u'\U00002705' 5 | 6 | emoji_down = u'\U0001F4C9' 7 | emoji_down_red = u'\U0000274C' 8 | 9 | 10 | def num_conversion(a): 11 | """ 12 | Приводим числа в денежный вид 13 | :param a: 14 | :return: 15 | """ 16 | return '{:,}'.format(int(a)) 17 | 18 | 19 | def clean(text): 20 | """ 21 | Отчищаем текст перед принятием 22 | :param text: 23 | :return: 24 | """ 25 | return text.replace('\t', '').replace('\n', '').strip() 26 | 27 | 28 | def calculation_percent(price_old, price_new): 29 | """ 30 | Расчитываем процент между новой и старой ценой (для истории) 31 | :param price_old: 32 | :param price_new: 33 | :return: 34 | """ 35 | if price_old > price_new: 36 | percent_price_history = '- ' + str(round(((int(price_old) - int(price_new)) / int(price_new)) * 100, 2)) 37 | else: 38 | percent_price_history = '+ ' + str(round(((int(price_new) - int(price_old)) / int(price_old)) * 100, 2)) 39 | return percent_price_history 40 | 41 | 42 | def calculation_different_price(price_old, price_new): 43 | """ 44 | Расчитываем разницу между новой и старой ценой 45 | :param price_old: 46 | :param price_new: 47 | :return: 48 | """ 49 | if price_old > price_new: 50 | difference_price = '- ' + str(num_conversion(int(price_old) - int(price_new))) 51 | 52 | else: 53 | difference_price = '+ ' + str(num_conversion(int(price_new) - int(price_old))) 54 | return difference_price 55 | 56 | 57 | def send_mes_to_bot(item): 58 | """ 59 | Подготавливаем текст для телеграм бота 60 | :param item: 61 | :return: 62 | """ 63 | from main import log 64 | 65 | first_row = '' # ID 66 | second_row = '' # Name 67 | third_row = '' # price 68 | fours_row = '' # price_history 69 | five_row = '' # address 70 | six_row = '' # params 71 | seven_row = '' # url 72 | 73 | if item['type_update'] == 'update': 74 | if item['item_price'] >= [(item['sql_price'],)]: 75 | first_row = 'Обновилась цена id ' + str(item['sql_avito_id']) + ' ' + \ 76 | emoji_down + emoji_down + emoji_top_green + '\n\n' 77 | 78 | third_row = 'Старая цена = ' + str(num_conversion(item['old_price'])) + ' руб. /' + \ 79 | ' Новая цена = ' + str(num_conversion(item['sql_price'])) + ' руб.\n\n' 80 | else: 81 | first_row = 'Обновилась цена id ' + str(item['sql_avito_id']) + ' ' + \ 82 | emoji_top + emoji_top + emoji_down_red + '\n\n' 83 | 84 | third_row = 'Старая цена = ' + str(num_conversion(item['item_price'][0][0])) + ' руб. /'+ \ 85 | ' Новая цена = ' + str(num_conversion(item['sql_price'])) + ' руб.\n\n' 86 | 87 | fours_row = 'Изменения цен \n' + str(item['price_history_srt']) + '\nРазница: ' + \ 88 | item['difference_price'] + ' (' + item['percent_difference_price'] + '%)\n\n' 89 | 90 | elif item['type_update'] == 'new': 91 | first_row = 'Новое объявление ' + str(item['sql_avito_id']) + '\n\n' 92 | third_row = 'Цена: ' + str(item['sql_price']) + ' руб.\n\n' 93 | else: 94 | log.error('type_update = NONETYPE ' + str(item['sql_avito_id'])) 95 | second_row = str(item['sql_name']) + '\n\n' 96 | five_row = 'Адрес: ' + str(item['sql_address']) + '\n\n' 97 | six_row = 'Параметры: ' + str(item['sql_params']) + '\n\n' 98 | seven_row = 'Ссылка ' + str(item['sql_url']) + '\n\n' 99 | none_type_of = ['Личные вещи', 'Работа', 'Для дома и дачи', 'Предложение услуг', 'Электроника', 'Животные', 100 | 'Готовый бизнес и оборудование'] 101 | if item['sql_type_of'] == 'Недвижимость': 102 | mes_to_bot = first_row + third_row + fours_row + five_row + seven_row 103 | elif item['sql_type_of'] == 'Транспорт': 104 | mes_to_bot = first_row + second_row + third_row + fours_row + six_row + seven_row 105 | elif item['sql_type_of'] == 'Хобби и отдых': 106 | mes_to_bot = first_row + second_row + third_row + fours_row + five_row + six_row + seven_row 107 | elif item['sql_type_of'] in none_type_of: 108 | mes_to_bot = first_row + second_row + third_row + fours_row + seven_row 109 | else: 110 | log.error('sql_type_of = NONETYPE ' + str(item['sql_avito_id'])) 111 | mes_to_bot = 'sql_type_of = NONETYPE ' + str(item['sql_avito_id']) 112 | text_handler(item['sql_chat'], mes_to_bot) 113 | 114 | 115 | def parse_items_to_send(items): 116 | """ 117 | Формируем данные для отправки в ТГ 118 | :param items: 119 | :return: 120 | """ 121 | for item in items: 122 | send_mes_to_bot(item) 123 | --------------------------------------------------------------------------------