├── .gitignore
├── .idea
├── .gitignore
├── avito_parser.iml
├── dataSources.xml
├── inspectionProfiles
│ ├── Project_Default.xml
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
└── vcs.xml
├── PipInstaller.txt
├── README.md
├── avito_database.db
├── bot
└── bot.py
├── change_price_history_sqlite.py
├── date_and_time.py
├── helpers.py
├── main.py
├── new_logging.py
├── parser_avito.py
├── session.py
├── settings.py
├── sqlite.py
└── text_converter.py
/.gitignore:
--------------------------------------------------------------------------------
1 | logs/*
2 | settings.py
3 | venv
4 | /data.json
5 |
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 |
--------------------------------------------------------------------------------
/.idea/avito_parser.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/dataSources.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | sqlite.xerial
6 | true
7 | org.sqlite.JDBC
8 | jdbc:sqlite:$PROJECT_DIR$/avito_database.db
9 | $ProjectFileDir$
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/PipInstaller.txt:
--------------------------------------------------------------------------------
1 | altgraph==0.17
2 | asgiref==3.3.4
3 | beautifulsoup4==4.9.3
4 | bs4==0.0.1
5 | certifi==2020.12.5
6 | cfscrape==2.1.1
7 | chardet==4.0.0
8 | future==0.18.2
9 | idna==2.10
10 | pefile==2019.4.18
11 | pyTelegramBotAPI==3.7.6
12 | pytz==2021.1
13 | pywin32-ctypes==0.2.0
14 | requests==2.25.1
15 | six==1.16.0
16 | soupsieve==2.2.1
17 | urllib3==1.26.4
18 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Авито парсер
2 |
3 | Написал простой парсер который:
4 | - Обходит указанные ссылки авито из БД.
5 | - Получает данные по объявлениям (название, адрес, ссылка, цена, доп параметры)
6 | - Отправляет уведомление в телеграм при добавлении нового объявления, либо изменении цены.
7 |
8 | ## Установка
9 |
10 | Указываем ссылки в avito_parser.db таблица url (ссылка, город, ID чата телеграм)
11 | Указываем токен бота в (TG_TOKEN) /settings.py
12 | Указываем чат в телеграм для выявлений ошибок (EXCEPTION_CHAT) /settings.py
13 | Указываем путь до БД (ROUTE_DB) /settings.py
14 | Указываем путь до директории парсера для работы кнопна (DIR_LOCATION) /settings.py
15 | Установить библиотеки (под linux) > `pip install -r -f PipInstaller.txt`
16 | Крон необходимо настроить не чаще чем раз в 5 минут(на одну ссылку), дабы не получить временный бан по IP от авито. Советую на пак ссылок поставить срабатывание скрипта раз в час.
17 |
18 | ## Запуск
19 | Используя команду `python main.py` запускаем обход парсера.
20 | После обхода парсер автоматическки запишет изменения в бд, оставит уведомление в телеграм и остановит свою работу
21 |
--------------------------------------------------------------------------------
/avito_database.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/useless-apple/avito_parser/4153ce55f26a468cfd57423424486e58ee75f437/avito_database.db
--------------------------------------------------------------------------------
/bot/bot.py:
--------------------------------------------------------------------------------
1 | import telebot
2 |
3 | from date_and_time import time_sleep
4 | from settings import TG_TOKEN
5 |
6 | bot = telebot.TeleBot(TG_TOKEN)
7 |
8 |
9 | @bot.message_handler(content_types=['text'])
10 | def text_handler(chat_id, text):
11 | """
12 | Отправка текста в чат ТГ
13 | :param chat_id:
14 | :param text:
15 | :return:
16 | """
17 | bot.send_message(chat_id, text)
18 | time_sleep(5)
19 |
--------------------------------------------------------------------------------
/change_price_history_sqlite.py:
--------------------------------------------------------------------------------
1 | import json
2 | import sqlite3
3 |
4 | conn = sqlite3.connect('test.db')
5 | with conn:
6 | cur = conn.cursor()
7 | cur.execute('SELECT avito_id,price_history FROM offers')
8 | final_price_history = []
9 | items = cur.fetchall()
10 | for item in items:
11 | id = item[0]
12 | price_history = json.loads(item[1])
13 | if len(price_history) == 0:
14 | continue
15 | price_list = []
16 | price = {}
17 | for i in range(len(price_history)):
18 | if i % 2 == 1:
19 | price['price'] = price_history[i]
20 | price_list.append(price)
21 | price = {}
22 | if i % 2 == 0:
23 | price['data'] = price_history[i]
24 | sql_price_list = json.dumps(price_list)
25 | cur.execute("UPDATE offers SET price_history=? WHERE avito_id=?",(sql_price_list, id))
26 | conn.commit()
27 | conn.close()
--------------------------------------------------------------------------------
/date_and_time.py:
--------------------------------------------------------------------------------
1 | import time
2 | from datetime import datetime
3 | from pytz import timezone
4 | from helpers import get_random_time
5 |
6 |
7 | def get_date_time(time_format="%Y-%m-%d %H:%M:%S"):
8 | """
9 | Получаем текущее время
10 | :param time_format:
11 | :return:
12 | """
13 | date_and_time = datetime.now(timezone('Asia/Yekaterinburg')).strftime(time_format)
14 | return date_and_time
15 |
16 |
17 | def time_sleep(n=None):
18 | """
19 | Таймер сна
20 | :param n:
21 | :return:
22 | """
23 | if n:
24 | time.sleep(n)
25 | else:
26 | time.sleep(get_random_time())
27 |
--------------------------------------------------------------------------------
/helpers.py:
--------------------------------------------------------------------------------
1 | import json
2 | import random
3 |
4 |
5 | def read_json_txt(file):
6 | """
7 | Прочитать файл JSON
8 | :param file:
9 | :return:
10 | """
11 | with open(file, encoding='utf-8', newline='') as json_file:
12 | data = json.load(json_file)
13 | return data
14 |
15 |
16 | def write_json_txt(result, file):
17 | """
18 | Записать новый файл JSON
19 | :param result:
20 | :param file:
21 | :return:
22 | """
23 | with open(file, 'w', encoding='utf-8') as f:
24 | json.dump(result, f, ensure_ascii=False, indent=4)
25 |
26 |
27 | def get_random_time():
28 | """
29 | Получать рандомное число
30 | :return:
31 | """
32 | value = random.random()
33 | scaled_value = 4 + (value * (11 - 5))
34 | return scaled_value
35 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | from date_and_time import get_date_time
2 | from helpers import write_json_txt
3 | from new_logging import log
4 | from parser_avito import get_global_result
5 | from sqlite import get_urls
6 |
7 |
8 | if __name__ == '__main__':
9 | try:
10 | log.info('-----------------------------------------------------------------------------------------------')
11 | log.info('Starting parsing ' + str(get_date_time()))
12 | tasks = []
13 | tasks += get_urls()
14 | global_result = get_global_result(tasks)
15 | write_json_txt(global_result, 'data.json')
16 | log.info('Parsing Success ' + str(get_date_time()))
17 | log.info('-----------------------------------------------------------------------------------------------')
18 |
19 | except Exception as e:
20 | log.exception(str(e))
21 |
--------------------------------------------------------------------------------
/new_logging.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from date_and_time import get_date_time
3 | from settings import DIR_LOCATION
4 |
5 | logging.basicConfig(
6 | filename="{0}logs/log-{1}.log".format(DIR_LOCATION, get_date_time("%Y-%m-%d")),
7 | level=logging.INFO,
8 | format='[%(asctime)s] {%(pathname)s:%(lineno)d} %(levelname)s - %(message)s',
9 | )
10 | log = logging.getLogger("ex")
--------------------------------------------------------------------------------
/parser_avito.py:
--------------------------------------------------------------------------------
1 | import re
2 | from sqlite import write_sqlite3
3 |
4 | from bot.bot import text_handler
5 | from date_and_time import time_sleep
6 | from new_logging import log
7 | from session import get_soup_from_page
8 | from settings import EXEPTION_CHAT
9 | from text_converter import clean
10 |
11 |
12 | def get_item_data(rows, type_of):
13 | """
14 | Получаем данные для каждого объявления
15 | :param rows:
16 | :param type_of:
17 | :return:
18 | """
19 | result = []
20 | for row in rows:
21 | avito_id = ''
22 | name = ''
23 | price = ''
24 | url = ''
25 | address = ''
26 | params = ''
27 |
28 | # ID Объявления
29 | try:
30 | avito_id = int(row.get('data-item-id'))
31 | except:
32 | avito_id = 'Не найден'
33 |
34 | # Название товара
35 | try:
36 | name = clean(row.find('h3', {"itemprop": "name"}).text)
37 | except:
38 | name = 'Не найден'
39 |
40 | # Цена товара
41 | try:
42 | price = int(clean(row.find('meta', {"itemprop": "price"}).get("content")))
43 | except:
44 | price = 'Не найден'
45 |
46 | # Ссылка на товар
47 | try:
48 | url = 'https://avito.ru' + row.find('a', {"itemprop": "url"}).get("href")
49 | except:
50 | url = 'Не найден'
51 |
52 | # Для товара типа "Недвижимость"
53 | if type_of == 'Недвижимость':
54 | # Адрес
55 | try:
56 | address = clean(row.find('div', {"data-marker": "item-address"}).div.span.span.text)
57 | except:
58 | address = 'Не найден'
59 |
60 | # Для товара типа "Транспорт"
61 | elif type_of == 'Транспорт':
62 | # Параметры авто
63 | try:
64 | params = clean(row.find('div', {"data-marker": "item-specific-params"}).text)
65 | except:
66 | params = 'Не найден'
67 |
68 | # Адрес
69 | try:
70 | address = clean(row.find('div', attrs={"class": re.compile(r"geo-georeferences")}).span.text)
71 | except:
72 | address = 'Не найден'
73 |
74 | elif type_of == 'Хобби и отдых':
75 | # Параметры объявления
76 | try:
77 | params = clean(row.find('div', attrs={"class": re.compile(r"iva-item-description")}).text)
78 | except:
79 | params = 'Не найден'
80 |
81 | # Адрес
82 | try:
83 | address = clean(row.find('span', attrs={"class": re.compile(r"geo-address")}).span.text)
84 | except:
85 | address = 'Не найден'
86 | item = {
87 | 'avito_id': avito_id,
88 | 'name': name,
89 | 'price': price,
90 | 'address': address,
91 | 'url': url,
92 | 'type_of': type_of,
93 | 'params': params
94 | }
95 | result.append(item)
96 | return result
97 |
98 |
99 | def get_page_rows(soup, type_of):
100 | """
101 | Получаем таблицу с объявлениями
102 | :param soup:
103 | :param type_of:
104 | :return:
105 | """
106 | table = soup.find('div', {"data-marker": "catalog-serp"})
107 |
108 | if table: # Удаляем рекламные блоки
109 | if table.find('div', {"data-marker": "witcher/block"}):
110 | table.find('div', {"data-marker": "witcher/block"}).decompose()
111 | rows = table.find_all('div', {"data-marker": "item"})
112 | result = get_item_data(rows, type_of)
113 |
114 | else:
115 | error_message = 'Error not table' + str(soup) + str(table)
116 | log.error(error_message)
117 | text_handler(EXEPTION_CHAT, 'Error not table// Check LOGS')
118 | result = []
119 | return result
120 |
121 |
122 | def get_page_data(page_url, count_try):
123 | """
124 | Получаем страницу с объявлениями
125 | :param page_url:
126 | :param count_try:
127 | :return:
128 | """
129 | next_pagination = True
130 | soup = get_soup_from_page(page_url, count_try)
131 | result = []
132 | if not soup[1]:
133 | error_message = 'Next parsing none ' + str(page_url)
134 | log.error(error_message)
135 | text_handler(EXEPTION_CHAT, error_message)
136 | return result, False
137 |
138 | if not soup[0]:
139 | error_message = 'Soup is None ' + str(page_url)
140 | log.error(error_message)
141 | text_handler(EXEPTION_CHAT, error_message)
142 | return result, False
143 |
144 | try:
145 | type_of = soup[0].find('div', {"data-marker": "breadcrumbs"}).find_all('span', {"itemprop": "itemListElement"})[
146 | 1].find('a').text
147 | except:
148 | type_of = 'None Type'
149 | log.warn('type_of = None Type')
150 |
151 | if soup[0].find_all('div', attrs={"class": re.compile(r"items-items")}):
152 | if len(soup[0].find_all('div', attrs={"class": re.compile(r"items-items")})) > 1:
153 | log.warn('Found another offers | Break pagination ' + str(page_url))
154 | next_pagination = False
155 | try:
156 | result = get_page_rows(soup[0], type_of)
157 | except:
158 | error_message = 'Error get_page_rows' + '\n ' + page_url
159 | text_handler(EXEPTION_CHAT, error_message)
160 | log.error(error_message)
161 | return result, next_pagination
162 |
163 |
164 | def get_count_page(soup, url_task):
165 | """
166 | Получаем список страниц пагинации
167 | :param soup:
168 | :param url_task:
169 | :return:
170 | """
171 | try:
172 | pagination = soup.find('div', {"data-marker": "pagination-button"})
173 | pagination.find('span', {"data-marker": "pagination-button/prev"}).decompose()
174 | pagination.find('span', {"data-marker": "pagination-button/next"}).decompose()
175 | count_page = pagination.find_all('span')[-1].text
176 | except:
177 | count_page = 1
178 | error_message = 'Error pagination' + '\n ' + url_task
179 | text_handler(EXEPTION_CHAT, error_message)
180 | log.error(error_message)
181 | return count_page
182 |
183 |
184 | def get_result_task(count_page, url_task):
185 | """
186 | Получаем данные для одного задания (ссылки со всеми пагинациями)
187 | :param count_page:
188 | :param url_task:
189 | :return:
190 | """
191 | next_pagination = True
192 | result = []
193 | for i in range(1, int(count_page) + 1):
194 | if next_pagination: # Проверяем нужно ли парсить следующие страницы
195 | log.info('Parsing page# ' + str(i) + ' of ' + str(count_page))
196 | page_url = url_task + '&p=' + str(i)
197 | try:
198 | page_data = get_page_data(page_url, 1)
199 | except:
200 | page_data = [], True
201 | error_message = 'Error get_page_data' + '\n ' + page_url
202 | text_handler(EXEPTION_CHAT, error_message)
203 | log.error(error_message)
204 |
205 | result += page_data[0]
206 | next_pagination = page_data[1]
207 | time_sleep()
208 | else:
209 | break
210 | return result
211 |
212 |
213 | def get_global_result(tasks):
214 | """
215 | Получаем глобальный результат по всем заданиям
216 | :param tasks:
217 | :return:
218 | """
219 | global_result = []
220 | for task in tasks:
221 | url_task = task[1]
222 | task = [task[2], task[3], task[0]]
223 | log.info('Url parsing ' + str(url_task))
224 | soup = get_soup_from_page(url_task + '&p=1', 1)
225 | count_page = get_count_page(soup[0], url_task)
226 | result = get_result_task(count_page, url_task)
227 | time_sleep()
228 | item = [result, task]
229 | write_sqlite3(item)
230 | global_result.append(item)
231 | return global_result
232 |
--------------------------------------------------------------------------------
/session.py:
--------------------------------------------------------------------------------
1 | import time
2 | import cfscrape
3 | import requests
4 |
5 | from bs4 import BeautifulSoup
6 | from bot.bot import text_handler
7 | from date_and_time import time_sleep
8 | from helpers import get_random_time
9 | from new_logging import log
10 | from settings import EXEPTION_CHAT
11 |
12 |
13 | def get_session():
14 | """
15 | Создаем сессию
16 | :return:
17 | """
18 | session = requests.Session()
19 | session.headers = {
20 | 'Host': 'www.avito.ru',
21 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0',
22 | 'Accept': 'text/html',
23 | 'Accept-Language': 'ru,en-US;q=0.5',
24 | 'DNT': '1',
25 | 'Connection': 'keep-alive',
26 | 'Upgrade-Insecure-Requests': '1',
27 | 'Pragma': 'no-cache',
28 | 'Cache-Control': 'no-cache'}
29 | return cfscrape.create_scraper(sess=session)
30 |
31 |
32 | def get_soup_from_page(page_url, count_try):
33 | """
34 | Получаем SOUP для любой страницы
35 | :param page_url:
36 | :param count_try:
37 | :return:
38 | """
39 | session = get_session()
40 | r = session.get(page_url)
41 | next_parsing = True
42 | if r.status_code == 403:
43 | error_message = 'Error: ' + str(r.status_code) + ' \nTime to sleep. Exit.'
44 | text_handler(EXEPTION_CHAT, error_message)
45 | log.error(error_message)
46 | soup = None
47 | next_parsing = False
48 | elif r.status_code == 429 and count_try < 2:
49 | error_message = 'Error: ' + str(r.status_code) + ' \nToo many request. Sleep 10min. \nTry № ' + str(count_try) + '\n' + str(page_url)
50 | text_handler(EXEPTION_CHAT, error_message)
51 | log.error(error_message)
52 | time.sleep(600)
53 | soup = get_soup_from_page(page_url, count_try + 1)
54 | elif r.status_code == 429 and count_try < 4:
55 | error_message = 'Error: ' + str(r.status_code) + ' \nToo many request. Sleep 15min. \nTry № ' + str(count_try) + '\n' + str(page_url)
56 | text_handler(EXEPTION_CHAT, error_message)
57 | log.error(error_message)
58 | time.sleep(900)
59 | soup = get_soup_from_page(page_url, count_try + 1)
60 | elif r.status_code != 200 and count_try < 4:
61 | error_message = 'Error: ' + str(r.status_code) + ' Try № ' + str(count_try) + '\n' + str(page_url)
62 | text_handler(EXEPTION_CHAT, error_message)
63 | log.error(error_message)
64 | time_sleep(get_random_time())
65 | soup = get_soup_from_page(page_url, count_try + 1)
66 | elif count_try > 4:
67 | error_message = 'Error: ' + str(r.status_code) + ' Try ended'
68 | text_handler(EXEPTION_CHAT, error_message)
69 | log.warn(error_message)
70 | soup = None
71 | else:
72 | soup = BeautifulSoup(r.text, 'html.parser')
73 | return soup, next_parsing
74 |
75 |
--------------------------------------------------------------------------------
/settings.py:
--------------------------------------------------------------------------------
1 | #Относительный путь до проекта (нужно для крона)
2 | DIR_LOCATION = "/root/parser/"
3 |
4 | #Путь до базы данных
5 | ROUTE_DB = DIR_LOCATION + "avito_database.db"
6 |
7 | #Телеграм токен бота
8 | TG_TOKEN = 'NNNNNNNNNNNNNNNNNNN'
9 |
10 | #ID чата для отлавливания ошибок
11 | EXEPTION_CHAT = 'NNNNNNNNNNNNNN'
12 |
--------------------------------------------------------------------------------
/sqlite.py:
--------------------------------------------------------------------------------
1 | import sqlite3
2 | import json
3 |
4 | from bot.bot import text_handler
5 | from new_logging import log
6 | from date_and_time import get_date_time
7 | from settings import ROUTE_DB, EXEPTION_CHAT
8 | from text_converter import num_conversion, calculation_percent, calculation_different_price, parse_items_to_send
9 |
10 |
11 | def write_sqlite3(url):
12 | """
13 | Записываем данные в SQLite
14 | :param url:
15 | :return:
16 | """
17 | items = []
18 | sql_city = url[1][0]
19 | sql_chat = url[1][1]
20 | sql_urls_id = url[1][2]
21 | conn = sqlite3.connect(ROUTE_DB)
22 | with conn:
23 | cur = conn.cursor()
24 | cur.execute('UPDATE offers SET status=0 WHERE urls_id=?', (sql_urls_id,)) # Обнуляем у всех объявлений статус
25 | for i in range(0, len(url[0])):
26 | if url[0][i] is not None:
27 | sql_avito_id = url[0][i]['avito_id']
28 | sql_name = url[0][i]['name']
29 | sql_price = url[0][i]['price']
30 | sql_address = url[0][i]['address']
31 | sql_url = url[0][i]['url']
32 | sql_type_of = url[0][i]['type_of']
33 | sql_params = url[0][i]['params']
34 |
35 | price_history = []
36 | price_now = {
37 | "data": str(get_date_time()),
38 | "price": str(sql_price)
39 | }
40 |
41 | cur.execute('SELECT avito_id FROM offers WHERE avito_id=?',
42 | (sql_avito_id,))
43 |
44 | item_id = cur.fetchall()
45 | if item_id == [(sql_avito_id,)]: # Ищем ID в бд, и если не находим то пишем сообщение в телегу
46 | cur.execute('SELECT price FROM offers WHERE avito_id=?',
47 | (sql_avito_id,))
48 |
49 | item_price = cur.fetchall()
50 | old_price = item_price[0][0]
51 |
52 | cur.execute('SELECT price_history FROM offers WHERE avito_id=?',
53 | (sql_avito_id,))
54 |
55 | price_history = json.loads(cur.fetchall()[0][0])
56 | price_history.append(price_now)
57 | price_history_dumps = json.dumps(price_history)
58 |
59 | price_history_srt = ''
60 |
61 | start_count = 0
62 | if len(price_history) > 0:
63 | if len(price_history) > 9:
64 | start_count = len(price_history) - 9
65 | for i in range(start_count, len(price_history)):
66 | if i == 0:
67 | price_history_srt = price_history_srt + \
68 | 'Дата: ' + int(price_history[i]['data']) + ' ' + \
69 | 'Цена: ' + num_conversion(int(price_history[i]['price'])) + ' руб.\n'
70 | else:
71 | percent_price_history = calculation_percent(int(price_history[i - 1]['price']),
72 | int(price_history[i]['price']))
73 | price_history_srt = price_history_srt + \
74 | 'Дата: ' + int(price_history[i]['data']) + ' ' + \
75 | 'Цена: ' + num_conversion(int(price_history[i]['price'])) + ' руб. ' + \
76 | '(' + percent_price_history + '%)\n'
77 |
78 | difference_price = calculation_different_price(int(price_history[0]['price']), int(price_now['price']))
79 | percent_difference_price = calculation_percent(int(price_history[0]['price']), int(price_now['price']))
80 |
81 | if item_price == [(sql_price,)]: # Сравниваем цены, и если есть отличие то обновляем их
82 | cur.execute(
83 | "UPDATE offers SET status=1, updated_date=?,urls_id=?, type_of=?, params=? WHERE avito_id=?",
84 | (str(get_date_time()), sql_urls_id, sql_type_of, sql_params, sql_avito_id))
85 | continue
86 | else:
87 | items.append({
88 | 'item_price': item_price,
89 | 'sql_chat': sql_chat,
90 | 'sql_avito_id': sql_avito_id,
91 | 'sql_name': sql_name,
92 | 'old_price': old_price,
93 | 'sql_price': sql_price,
94 | 'price_history_srt': price_history_srt,
95 | 'difference_price': difference_price,
96 | 'percent_difference_price': percent_difference_price,
97 | 'sql_address': sql_address,
98 | 'sql_url': sql_url,
99 | 'sql_params': sql_params,
100 | 'sql_type_of': sql_type_of,
101 | 'type_update': 'update'
102 | })
103 |
104 | cur.execute(
105 | "UPDATE offers SET price=?, old_price=?, updated_date=?, price_history=?, status=1, urls_id=?, type_of=?, params=? WHERE avito_id=?",
106 | (sql_price, old_price, str(get_date_time()), str(price_history_dumps), sql_urls_id,
107 | sql_type_of,
108 | sql_params, sql_avito_id))
109 | log.info('Price update | ' + str(sql_avito_id))
110 |
111 | else:
112 | items.append({
113 | 'item_price': None,
114 | 'sql_chat': sql_chat,
115 | 'sql_avito_id': sql_avito_id,
116 | 'sql_name': sql_name,
117 | 'old_price': None,
118 | 'sql_price': sql_price,
119 | 'price_history_srt': None,
120 | 'difference_price': None,
121 | 'percent_difference_price': None,
122 | 'sql_address': sql_address,
123 | 'sql_url': sql_url,
124 | 'sql_params': sql_params,
125 | 'sql_type_of': sql_type_of,
126 | 'type_update': 'new'
127 | })
128 | log.info('No ID -> New Offer | ' + str(sql_avito_id))
129 |
130 | price_history.append(price_now)
131 | price_history_dumps = json.dumps(price_history)
132 | cur.execute(
133 | "INSERT OR IGNORE INTO offers ('avito_id','name','price','price_history','address','url','created_date','updated_date','status','city','urls_id','type_of','params') VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)",
134 | (sql_avito_id, sql_name, sql_price, str(price_history_dumps), sql_address, sql_url,
135 | str(get_date_time()), str(get_date_time()), 1, sql_city, sql_urls_id, sql_type_of, sql_params))
136 | else:
137 | error_message = 'Error: write Sql_item, item is None ' + str(sql_urls_id)
138 | text_handler(EXEPTION_CHAT, error_message)
139 | log.error(error_message)
140 | parse_items_to_send(items)
141 | conn.commit()
142 | conn.close()
143 |
144 |
145 | def get_urls():
146 | """
147 | Получаем данные для заданий из SQLite
148 | :return:
149 | """
150 | conn = sqlite3.connect(ROUTE_DB)
151 | with conn:
152 | cur = conn.cursor()
153 | cur.execute('SELECT id,name,city,chatid FROM urls')
154 | urls = cur.fetchall()
155 | return urls
156 |
--------------------------------------------------------------------------------
/text_converter.py:
--------------------------------------------------------------------------------
1 | from bot.bot import text_handler
2 |
3 | emoji_top = u'\U0001F4C8'
4 | emoji_top_green = u'\U00002705'
5 |
6 | emoji_down = u'\U0001F4C9'
7 | emoji_down_red = u'\U0000274C'
8 |
9 |
10 | def num_conversion(a):
11 | """
12 | Приводим числа в денежный вид
13 | :param a:
14 | :return:
15 | """
16 | return '{:,}'.format(int(a))
17 |
18 |
19 | def clean(text):
20 | """
21 | Отчищаем текст перед принятием
22 | :param text:
23 | :return:
24 | """
25 | return text.replace('\t', '').replace('\n', '').strip()
26 |
27 |
28 | def calculation_percent(price_old, price_new):
29 | """
30 | Расчитываем процент между новой и старой ценой (для истории)
31 | :param price_old:
32 | :param price_new:
33 | :return:
34 | """
35 | if price_old > price_new:
36 | percent_price_history = '- ' + str(round(((int(price_old) - int(price_new)) / int(price_new)) * 100, 2))
37 | else:
38 | percent_price_history = '+ ' + str(round(((int(price_new) - int(price_old)) / int(price_old)) * 100, 2))
39 | return percent_price_history
40 |
41 |
42 | def calculation_different_price(price_old, price_new):
43 | """
44 | Расчитываем разницу между новой и старой ценой
45 | :param price_old:
46 | :param price_new:
47 | :return:
48 | """
49 | if price_old > price_new:
50 | difference_price = '- ' + str(num_conversion(int(price_old) - int(price_new)))
51 |
52 | else:
53 | difference_price = '+ ' + str(num_conversion(int(price_new) - int(price_old)))
54 | return difference_price
55 |
56 |
57 | def send_mes_to_bot(item):
58 | """
59 | Подготавливаем текст для телеграм бота
60 | :param item:
61 | :return:
62 | """
63 | from main import log
64 |
65 | first_row = '' # ID
66 | second_row = '' # Name
67 | third_row = '' # price
68 | fours_row = '' # price_history
69 | five_row = '' # address
70 | six_row = '' # params
71 | seven_row = '' # url
72 |
73 | if item['type_update'] == 'update':
74 | if item['item_price'] >= [(item['sql_price'],)]:
75 | first_row = 'Обновилась цена id ' + str(item['sql_avito_id']) + ' ' + \
76 | emoji_down + emoji_down + emoji_top_green + '\n\n'
77 |
78 | third_row = 'Старая цена = ' + str(num_conversion(item['old_price'])) + ' руб. /' + \
79 | ' Новая цена = ' + str(num_conversion(item['sql_price'])) + ' руб.\n\n'
80 | else:
81 | first_row = 'Обновилась цена id ' + str(item['sql_avito_id']) + ' ' + \
82 | emoji_top + emoji_top + emoji_down_red + '\n\n'
83 |
84 | third_row = 'Старая цена = ' + str(num_conversion(item['item_price'][0][0])) + ' руб. /'+ \
85 | ' Новая цена = ' + str(num_conversion(item['sql_price'])) + ' руб.\n\n'
86 |
87 | fours_row = 'Изменения цен \n' + str(item['price_history_srt']) + '\nРазница: ' + \
88 | item['difference_price'] + ' (' + item['percent_difference_price'] + '%)\n\n'
89 |
90 | elif item['type_update'] == 'new':
91 | first_row = 'Новое объявление ' + str(item['sql_avito_id']) + '\n\n'
92 | third_row = 'Цена: ' + str(item['sql_price']) + ' руб.\n\n'
93 | else:
94 | log.error('type_update = NONETYPE ' + str(item['sql_avito_id']))
95 | second_row = str(item['sql_name']) + '\n\n'
96 | five_row = 'Адрес: ' + str(item['sql_address']) + '\n\n'
97 | six_row = 'Параметры: ' + str(item['sql_params']) + '\n\n'
98 | seven_row = 'Ссылка ' + str(item['sql_url']) + '\n\n'
99 | none_type_of = ['Личные вещи', 'Работа', 'Для дома и дачи', 'Предложение услуг', 'Электроника', 'Животные',
100 | 'Готовый бизнес и оборудование']
101 | if item['sql_type_of'] == 'Недвижимость':
102 | mes_to_bot = first_row + third_row + fours_row + five_row + seven_row
103 | elif item['sql_type_of'] == 'Транспорт':
104 | mes_to_bot = first_row + second_row + third_row + fours_row + six_row + seven_row
105 | elif item['sql_type_of'] == 'Хобби и отдых':
106 | mes_to_bot = first_row + second_row + third_row + fours_row + five_row + six_row + seven_row
107 | elif item['sql_type_of'] in none_type_of:
108 | mes_to_bot = first_row + second_row + third_row + fours_row + seven_row
109 | else:
110 | log.error('sql_type_of = NONETYPE ' + str(item['sql_avito_id']))
111 | mes_to_bot = 'sql_type_of = NONETYPE ' + str(item['sql_avito_id'])
112 | text_handler(item['sql_chat'], mes_to_bot)
113 |
114 |
115 | def parse_items_to_send(items):
116 | """
117 | Формируем данные для отправки в ТГ
118 | :param items:
119 | :return:
120 | """
121 | for item in items:
122 | send_mes_to_bot(item)
123 |
--------------------------------------------------------------------------------