├── .gitignore
├── .idea
    ├── .gitignore
    ├── avito_parser.iml
    ├── dataSources.xml
    ├── inspectionProfiles
    │   ├── Project_Default.xml
    │   └── profiles_settings.xml
    ├── misc.xml
    ├── modules.xml
    └── vcs.xml
├── PipInstaller.txt
├── README.md
├── avito_database.db
├── bot
    └── bot.py
├── change_price_history_sqlite.py
├── date_and_time.py
├── helpers.py
├── main.py
├── new_logging.py
├── parser_avito.py
├── session.py
├── settings.py
├── sqlite.py
└── text_converter.py


/.gitignore:
--------------------------------------------------------------------------------
1 | logs/*
2 | settings.py
3 | venv
4 | /data.json
5 | 


--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 | 


--------------------------------------------------------------------------------
/.idea/avito_parser.iml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <module type="PYTHON_MODULE" version="4">
3 |   <component name="NewModuleRootManager">
4 |     <content url="file://$MODULE_DIR$" />
5 |     <orderEntry type="jdk" jdkName="Python 3.9 (avito_parser) (2)" jdkType="Python SDK" />
6 |     <orderEntry type="sourceFolder" forTests="false" />
7 |   </component>
8 | </module>


--------------------------------------------------------------------------------
/.idea/dataSources.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="DataSourceManagerImpl" format="xml" multifile-model="true">
 4 |     <data-source source="LOCAL" name="avito_database.db" uuid="c08af9f3-3ca1-46e0-91f0-ba67b756233d">
 5 |       <driver-ref>sqlite.xerial</driver-ref>
 6 |       <synchronize>true</synchronize>
 7 |       <jdbc-driver>org.sqlite.JDBC</jdbc-driver>
 8 |       <jdbc-url>jdbc:sqlite:$PROJECT_DIR$/avito_database.db</jdbc-url>
 9 |       <working-dir>$ProjectFileDir$</working-dir>
10 |     </data-source>
11 |   </component>
12 | </project>


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
 1 | <component name="InspectionProjectProfileManager">
 2 |   <profile version="1.0">
 3 |     <option name="myName" value="Project Default" />
 4 |     <inspection_tool class="DuplicatedCode" enabled="true" level="WEAK WARNING" enabled_by_default="true">
 5 |       <Languages>
 6 |         <language minSize="114" name="Python" />
 7 |       </Languages>
 8 |     </inspection_tool>
 9 |     <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
10 |       <option name="ignoredErrors">
11 |         <list>
12 |           <option value="N806" />
13 |         </list>
14 |       </option>
15 |     </inspection_tool>
16 |   </profile>
17 | </component>


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (avito_parser) (2)" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/avito_parser.iml" filepath="$PROJECT_DIR$/.idea/avito_parser.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/PipInstaller.txt:
--------------------------------------------------------------------------------
 1 | altgraph==0.17
 2 | asgiref==3.3.4
 3 | beautifulsoup4==4.9.3
 4 | bs4==0.0.1
 5 | certifi==2020.12.5
 6 | cfscrape==2.1.1
 7 | chardet==4.0.0
 8 | future==0.18.2
 9 | idna==2.10
10 | pefile==2019.4.18
11 | pyTelegramBotAPI==3.7.6
12 | pytz==2021.1
13 | pywin32-ctypes==0.2.0
14 | requests==2.25.1
15 | six==1.16.0
16 | soupsieve==2.2.1
17 | urllib3==1.26.4
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Авито парсер
 2 | 
 3 | Написал простой парсер который:
 4 | - Обходит указанные ссылки авито из БД.
 5 | - Получает данные по объявлениям (название, адрес, ссылка, цена, доп параметры)
 6 | - Отправляет уведомление в телеграм при добавлении нового объявления, либо изменении цены.
 7 | 
 8 | ## Установка
 9 | 
10 | Указываем ссылки в avito_parser.db таблица url (ссылка, город, ID чата телеграм)<br>
11 | Указываем токен бота в (TG_TOKEN) /settings.py<br>
12 | Указываем чат в телеграм для выявлений ошибок (EXCEPTION_CHAT) /settings.py<br>
13 | Указываем путь до БД (ROUTE_DB) /settings.py<br>
14 | Указываем путь до директории парсера для работы кнопна (DIR_LOCATION) /settings.py<br>
15 | Установить библиотеки (под linux) > `pip install -r -f PipInstaller.txt`<br>
16 | Крон необходимо настроить не чаще чем раз в 5 минут(на одну ссылку), дабы не получить временный бан по IP от авито. Советую на пак ссылок  поставить срабатывание скрипта раз в час.
17 | 
18 | ## Запуск
19 | Используя команду `python main.py` запускаем обход парсера. <br>
20 | После обхода парсер автоматическки запишет изменения в бд, оставит уведомление в телеграм и остановит свою работу
21 | 


--------------------------------------------------------------------------------
/avito_database.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/useless-apple/avito_parser/4153ce55f26a468cfd57423424486e58ee75f437/avito_database.db


--------------------------------------------------------------------------------
/bot/bot.py:
--------------------------------------------------------------------------------
 1 | import telebot
 2 | 
 3 | from date_and_time import time_sleep
 4 | from settings import TG_TOKEN
 5 | 
 6 | bot = telebot.TeleBot(TG_TOKEN)
 7 | 
 8 | 
 9 | @bot.message_handler(content_types=['text'])
10 | def text_handler(chat_id, text):
11 |     """
12 |     Отправка текста в чат ТГ
13 |     :param chat_id:
14 |     :param text:
15 |     :return:
16 |     """
17 |     bot.send_message(chat_id, text)
18 |     time_sleep(5)
19 | 


--------------------------------------------------------------------------------
/change_price_history_sqlite.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import sqlite3
 3 | 
 4 | conn = sqlite3.connect('test.db')
 5 | with conn:
 6 |     cur = conn.cursor()
 7 |     cur.execute('SELECT avito_id,price_history FROM offers')
 8 |     final_price_history = []
 9 |     items = cur.fetchall()
10 |     for item in items:
11 |         id = item[0]
12 |         price_history = json.loads(item[1])
13 |         if len(price_history) == 0:
14 |             continue
15 |         price_list = []
16 |         price = {}
17 |         for i in range(len(price_history)):
18 |             if i % 2 == 1:
19 |                 price['price'] = price_history[i]
20 |                 price_list.append(price)
21 |                 price = {}
22 |             if i % 2 == 0:
23 |                 price['data'] = price_history[i]
24 |         sql_price_list = json.dumps(price_list)
25 |         cur.execute("UPDATE offers SET price_history=? WHERE avito_id=?",(sql_price_list, id))
26 | conn.commit()
27 | conn.close()


--------------------------------------------------------------------------------
/date_and_time.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from datetime import datetime
 3 | from pytz import timezone
 4 | from helpers import get_random_time
 5 | 
 6 | 
 7 | def get_date_time(time_format="%Y-%m-%d %H:%M:%S"):
 8 |     """
 9 |     Получаем текущее время
10 |     :param time_format:
11 |     :return:
12 |     """
13 |     date_and_time = datetime.now(timezone('Asia/Yekaterinburg')).strftime(time_format)
14 |     return date_and_time
15 | 
16 | 
17 | def time_sleep(n=None):
18 |     """
19 |     Таймер сна
20 |     :param n:
21 |     :return:
22 |     """
23 |     if n:
24 |         time.sleep(n)
25 |     else:
26 |         time.sleep(get_random_time())
27 | 


--------------------------------------------------------------------------------
/helpers.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import random
 3 | 
 4 | 
 5 | def read_json_txt(file):
 6 |     """
 7 |     Прочитать файл JSON
 8 |     :param file:
 9 |     :return:
10 |     """
11 |     with open(file, encoding='utf-8', newline='') as json_file:
12 |         data = json.load(json_file)
13 |         return data
14 | 
15 | 
16 | def write_json_txt(result, file):
17 |     """
18 |     Записать новый файл JSON
19 |     :param result:
20 |     :param file:
21 |     :return:
22 |     """
23 |     with open(file, 'w', encoding='utf-8') as f:
24 |         json.dump(result, f, ensure_ascii=False, indent=4)
25 | 
26 | 
27 | def get_random_time():
28 |     """
29 |     Получать рандомное число
30 |     :return:
31 |     """
32 |     value = random.random()
33 |     scaled_value = 4 + (value * (11 - 5))
34 |     return scaled_value
35 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | from date_and_time import get_date_time
 2 | from helpers import write_json_txt
 3 | from new_logging import log
 4 | from parser_avito import get_global_result
 5 | from sqlite import get_urls
 6 | 
 7 | 
 8 | if __name__ == '__main__':
 9 |     try:
10 |         log.info('-----------------------------------------------------------------------------------------------')
11 |         log.info('Starting parsing ' + str(get_date_time()))
12 |         tasks = []
13 |         tasks += get_urls()
14 |         global_result = get_global_result(tasks)
15 |         write_json_txt(global_result, 'data.json')
16 |         log.info('Parsing Success ' + str(get_date_time()))
17 |         log.info('-----------------------------------------------------------------------------------------------')
18 | 
19 |     except Exception as e:
20 |         log.exception(str(e))
21 | 


--------------------------------------------------------------------------------
/new_logging.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from date_and_time import get_date_time
 3 | from settings import DIR_LOCATION
 4 | 
 5 | logging.basicConfig(
 6 |     filename="{0}logs/log-{1}.log".format(DIR_LOCATION, get_date_time("%Y-%m-%d")),
 7 |     level=logging.INFO,
 8 |     format='[%(asctime)s] {%(pathname)s:%(lineno)d} %(levelname)s - %(message)s',
 9 | )
10 | log = logging.getLogger("ex")


--------------------------------------------------------------------------------
/parser_avito.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from sqlite import write_sqlite3
  3 | 
  4 | from bot.bot import text_handler
  5 | from date_and_time import time_sleep
  6 | from new_logging import log
  7 | from session import get_soup_from_page
  8 | from settings import EXEPTION_CHAT
  9 | from text_converter import clean
 10 | 
 11 | 
 12 | def get_item_data(rows, type_of):
 13 |     """
 14 |     Получаем данные для каждого объявления
 15 |     :param rows:
 16 |     :param type_of:
 17 |     :return:
 18 |     """
 19 |     result = []
 20 |     for row in rows:
 21 |         avito_id = ''
 22 |         name = ''
 23 |         price = ''
 24 |         url = ''
 25 |         address = ''
 26 |         params = ''
 27 | 
 28 |         # ID Объявления
 29 |         try:
 30 |             avito_id = int(row.get('data-item-id'))
 31 |         except:
 32 |             avito_id = 'Не найден'
 33 | 
 34 |         # Название товара
 35 |         try:
 36 |             name = clean(row.find('h3', {"itemprop": "name"}).text)
 37 |         except:
 38 |             name = 'Не найден'
 39 | 
 40 |         # Цена товара
 41 |         try:
 42 |             price = int(clean(row.find('meta', {"itemprop": "price"}).get("content")))
 43 |         except:
 44 |             price = 'Не найден'
 45 | 
 46 |         # Ссылка на товар
 47 |         try:
 48 |             url = 'https://avito.ru' + row.find('a', {"itemprop": "url"}).get("href")
 49 |         except:
 50 |             url = 'Не найден'
 51 | 
 52 |         # Для товара типа "Недвижимость"
 53 |         if type_of == 'Недвижимость':
 54 |             # Адрес
 55 |             try:
 56 |                 address = clean(row.find('div', {"data-marker": "item-address"}).div.span.span.text)
 57 |             except:
 58 |                 address = 'Не найден'
 59 | 
 60 |         # Для товара типа "Транспорт"
 61 |         elif type_of == 'Транспорт':
 62 |             # Параметры авто
 63 |             try:
 64 |                 params = clean(row.find('div', {"data-marker": "item-specific-params"}).text)
 65 |             except:
 66 |                 params = 'Не найден'
 67 | 
 68 |             # Адрес
 69 |             try:
 70 |                 address = clean(row.find('div', attrs={"class": re.compile(r"geo-georeferences")}).span.text)
 71 |             except:
 72 |                 address = 'Не найден'
 73 | 
 74 |         elif type_of == 'Хобби и отдых':
 75 |             # Параметры объявления
 76 |             try:
 77 |                 params = clean(row.find('div', attrs={"class": re.compile(r"iva-item-description")}).text)
 78 |             except:
 79 |                 params = 'Не найден'
 80 | 
 81 |             # Адрес
 82 |             try:
 83 |                 address = clean(row.find('span', attrs={"class": re.compile(r"geo-address")}).span.text)
 84 |             except:
 85 |                 address = 'Не найден'
 86 |         item = {
 87 |             'avito_id': avito_id,
 88 |             'name': name,
 89 |             'price': price,
 90 |             'address': address,
 91 |             'url': url,
 92 |             'type_of': type_of,
 93 |             'params': params
 94 |         }
 95 |         result.append(item)
 96 |     return result
 97 | 
 98 | 
 99 | def get_page_rows(soup, type_of):
100 |     """
101 |     Получаем таблицу с объявлениями
102 |     :param soup:
103 |     :param type_of:
104 |     :return:
105 |     """
106 |     table = soup.find('div', {"data-marker": "catalog-serp"})
107 | 
108 |     if table:  # Удаляем рекламные блоки
109 |         if table.find('div', {"data-marker": "witcher/block"}):
110 |             table.find('div', {"data-marker": "witcher/block"}).decompose()
111 |         rows = table.find_all('div', {"data-marker": "item"})
112 |         result = get_item_data(rows, type_of)
113 | 
114 |     else:
115 |         error_message = 'Error not table' + str(soup) + str(table)
116 |         log.error(error_message)
117 |         text_handler(EXEPTION_CHAT, 'Error not table// Check LOGS')
118 |         result = []
119 |     return result
120 | 
121 | 
122 | def get_page_data(page_url, count_try):
123 |     """
124 |     Получаем страницу с объявлениями
125 |     :param page_url:
126 |     :param count_try:
127 |     :return:
128 |     """
129 |     next_pagination = True
130 |     soup = get_soup_from_page(page_url, count_try)
131 |     result = []
132 |     if not soup[1]:
133 |         error_message = 'Next parsing none ' + str(page_url)
134 |         log.error(error_message)
135 |         text_handler(EXEPTION_CHAT, error_message)
136 |         return result, False
137 | 
138 |     if not soup[0]:
139 |         error_message = 'Soup is None ' + str(page_url)
140 |         log.error(error_message)
141 |         text_handler(EXEPTION_CHAT, error_message)
142 |         return result, False
143 | 
144 |     try:
145 |         type_of = soup[0].find('div', {"data-marker": "breadcrumbs"}).find_all('span', {"itemprop": "itemListElement"})[
146 |             1].find('a').text
147 |     except:
148 |         type_of = 'None Type'
149 |         log.warn('type_of = None Type')
150 | 
151 |     if soup[0].find_all('div', attrs={"class": re.compile(r"items-items")}):
152 |         if len(soup[0].find_all('div', attrs={"class": re.compile(r"items-items")})) > 1:
153 |             log.warn('Found another offers | Break pagination ' + str(page_url))
154 |             next_pagination = False
155 |     try:
156 |         result = get_page_rows(soup[0], type_of)
157 |     except:
158 |         error_message = 'Error get_page_rows' + '\n ' + page_url
159 |         text_handler(EXEPTION_CHAT, error_message)
160 |         log.error(error_message)
161 |     return result, next_pagination
162 | 
163 | 
164 | def get_count_page(soup, url_task):
165 |     """
166 |     Получаем список страниц пагинации
167 |     :param soup:
168 |     :param url_task:
169 |     :return:
170 |     """
171 |     try:
172 |         pagination = soup.find('div', {"data-marker": "pagination-button"})
173 |         pagination.find('span', {"data-marker": "pagination-button/prev"}).decompose()
174 |         pagination.find('span', {"data-marker": "pagination-button/next"}).decompose()
175 |         count_page = pagination.find_all('span')[-1].text
176 |     except:
177 |         count_page = 1
178 |         error_message = 'Error pagination' + '\n ' + url_task
179 |         text_handler(EXEPTION_CHAT, error_message)
180 |         log.error(error_message)
181 |     return count_page
182 | 
183 | 
184 | def get_result_task(count_page, url_task):
185 |     """
186 |     Получаем данные для одного задания (ссылки со всеми пагинациями)
187 |     :param count_page:
188 |     :param url_task:
189 |     :return:
190 |     """
191 |     next_pagination = True
192 |     result = []
193 |     for i in range(1, int(count_page) + 1):
194 |         if next_pagination:  # Проверяем нужно ли парсить следующие страницы
195 |             log.info('Parsing page# ' + str(i) + ' of ' + str(count_page))
196 |             page_url = url_task + '&p=' + str(i)
197 |             try:
198 |                 page_data = get_page_data(page_url, 1)
199 |             except:
200 |                 page_data = [], True
201 |                 error_message = 'Error get_page_data' + '\n ' + page_url
202 |                 text_handler(EXEPTION_CHAT, error_message)
203 |                 log.error(error_message)
204 | 
205 |             result += page_data[0]
206 |             next_pagination = page_data[1]
207 |             time_sleep()
208 |         else:
209 |             break
210 |     return result
211 | 
212 | 
213 | def get_global_result(tasks):
214 |     """
215 |     Получаем глобальный результат по всем заданиям
216 |     :param tasks:
217 |     :return:
218 |     """
219 |     global_result = []
220 |     for task in tasks:
221 |         url_task = task[1]
222 |         task = [task[2], task[3], task[0]]
223 |         log.info('Url parsing ' + str(url_task))
224 |         soup = get_soup_from_page(url_task + '&p=1', 1)
225 |         count_page = get_count_page(soup[0], url_task)
226 |         result = get_result_task(count_page, url_task)
227 |         time_sleep()
228 |         item = [result, task]
229 |         write_sqlite3(item)
230 |         global_result.append(item)
231 |     return global_result
232 | 


--------------------------------------------------------------------------------
/session.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import cfscrape
 3 | import requests
 4 | 
 5 | from bs4 import BeautifulSoup
 6 | from bot.bot import text_handler
 7 | from date_and_time import time_sleep
 8 | from helpers import get_random_time
 9 | from new_logging import log
10 | from settings import EXEPTION_CHAT
11 | 
12 | 
13 | def get_session():
14 |     """
15 |     Создаем сессию
16 |     :return:
17 |     """
18 |     session = requests.Session()
19 |     session.headers = {
20 |         'Host': 'www.avito.ru',
21 |         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0)   Gecko/20100101 Firefox/69.0',
22 |         'Accept': 'text/html',
23 |         'Accept-Language': 'ru,en-US;q=0.5',
24 |         'DNT': '1',
25 |         'Connection': 'keep-alive',
26 |         'Upgrade-Insecure-Requests': '1',
27 |         'Pragma': 'no-cache',
28 |         'Cache-Control': 'no-cache'}
29 |     return cfscrape.create_scraper(sess=session)
30 | 
31 | 
32 | def get_soup_from_page(page_url, count_try):
33 |     """
34 |     Получаем SOUP для любой страницы
35 |     :param page_url:
36 |     :param count_try:
37 |     :return:
38 |     """
39 |     session = get_session()
40 |     r = session.get(page_url)
41 |     next_parsing = True
42 |     if r.status_code == 403:
43 |         error_message = 'Error: ' + str(r.status_code) + ' \nTime to sleep. Exit.'
44 |         text_handler(EXEPTION_CHAT, error_message)
45 |         log.error(error_message)
46 |         soup = None
47 |         next_parsing = False
48 |     elif r.status_code == 429 and count_try < 2:
49 |         error_message = 'Error: ' + str(r.status_code) + ' \nToo many request. Sleep 10min. \nTry № ' + str(count_try) + '\n' + str(page_url)
50 |         text_handler(EXEPTION_CHAT, error_message)
51 |         log.error(error_message)
52 |         time.sleep(600)
53 |         soup = get_soup_from_page(page_url, count_try + 1)
54 |     elif r.status_code == 429 and count_try < 4:
55 |         error_message = 'Error: ' + str(r.status_code) + ' \nToo many request. Sleep 15min. \nTry № ' + str(count_try) + '\n' + str(page_url)
56 |         text_handler(EXEPTION_CHAT, error_message)
57 |         log.error(error_message)
58 |         time.sleep(900)
59 |         soup = get_soup_from_page(page_url, count_try + 1)
60 |     elif r.status_code != 200 and count_try < 4:
61 |         error_message = 'Error: ' + str(r.status_code) + ' Try № ' + str(count_try) + '\n' + str(page_url)
62 |         text_handler(EXEPTION_CHAT, error_message)
63 |         log.error(error_message)
64 |         time_sleep(get_random_time())
65 |         soup = get_soup_from_page(page_url, count_try + 1)
66 |     elif count_try > 4:
67 |         error_message = 'Error: ' + str(r.status_code) + ' Try ended'
68 |         text_handler(EXEPTION_CHAT, error_message)
69 |         log.warn(error_message)
70 |         soup = None
71 |     else:
72 |         soup = BeautifulSoup(r.text, 'html.parser')
73 |     return soup, next_parsing
74 | 
75 | 


--------------------------------------------------------------------------------
/settings.py:
--------------------------------------------------------------------------------
 1 | #Относительный путь до проекта (нужно для крона)
 2 | DIR_LOCATION = "/root/parser/"
 3 | 
 4 | #Путь до базы данных
 5 | ROUTE_DB = DIR_LOCATION + "avito_database.db"
 6 | 
 7 | #Телеграм токен бота
 8 | TG_TOKEN = 'NNNNNNNNNNNNNNNNNNN'
 9 | 
10 | #ID чата для отлавливания ошибок
11 | EXEPTION_CHAT = 'NNNNNNNNNNNNNN'
12 | 


--------------------------------------------------------------------------------
/sqlite.py:
--------------------------------------------------------------------------------
  1 | import sqlite3
  2 | import json
  3 | 
  4 | from bot.bot import text_handler
  5 | from new_logging import log
  6 | from date_and_time import get_date_time
  7 | from settings import ROUTE_DB, EXEPTION_CHAT
  8 | from text_converter import num_conversion, calculation_percent, calculation_different_price, parse_items_to_send
  9 | 
 10 | 
 11 | def write_sqlite3(url):
 12 |     """
 13 |     Записываем данные в SQLite
 14 |     :param url:
 15 |     :return:
 16 |     """
 17 |     items = []
 18 |     sql_city = url[1][0]
 19 |     sql_chat = url[1][1]
 20 |     sql_urls_id = url[1][2]
 21 |     conn = sqlite3.connect(ROUTE_DB)
 22 |     with conn:
 23 |         cur = conn.cursor()
 24 |         cur.execute('UPDATE offers SET status=0 WHERE urls_id=?', (sql_urls_id,))  # Обнуляем у всех объявлений статус
 25 |         for i in range(0, len(url[0])):
 26 |             if url[0][i] is not None:
 27 |                 sql_avito_id = url[0][i]['avito_id']
 28 |                 sql_name = url[0][i]['name']
 29 |                 sql_price = url[0][i]['price']
 30 |                 sql_address = url[0][i]['address']
 31 |                 sql_url = url[0][i]['url']
 32 |                 sql_type_of = url[0][i]['type_of']
 33 |                 sql_params = url[0][i]['params']
 34 | 
 35 |                 price_history = []
 36 |                 price_now = {
 37 |                     "data": str(get_date_time()),
 38 |                     "price": str(sql_price)
 39 |                 }
 40 | 
 41 |                 cur.execute('SELECT avito_id FROM offers WHERE avito_id=?',
 42 |                             (sql_avito_id,))
 43 | 
 44 |                 item_id = cur.fetchall()
 45 |                 if item_id == [(sql_avito_id,)]:  # Ищем ID в бд, и если не находим то пишем сообщение в телегу
 46 |                     cur.execute('SELECT price FROM offers WHERE avito_id=?',
 47 |                                 (sql_avito_id,))
 48 | 
 49 |                     item_price = cur.fetchall()
 50 |                     old_price = item_price[0][0]
 51 | 
 52 |                     cur.execute('SELECT price_history FROM offers WHERE avito_id=?',
 53 |                                 (sql_avito_id,))
 54 | 
 55 |                     price_history = json.loads(cur.fetchall()[0][0])
 56 |                     price_history.append(price_now)
 57 |                     price_history_dumps = json.dumps(price_history)
 58 | 
 59 |                     price_history_srt = ''
 60 | 
 61 |                     start_count = 0
 62 |                     if len(price_history) > 0:
 63 |                         if len(price_history) > 9:
 64 |                             start_count = len(price_history) - 9
 65 |                         for i in range(start_count, len(price_history)):
 66 |                             if i == 0:
 67 |                                 price_history_srt = price_history_srt + \
 68 |                                                     'Дата: ' + int(price_history[i]['data']) + '  ' + \
 69 |                                                     'Цена: ' + num_conversion(int(price_history[i]['price'])) + ' руб.\n'
 70 |                             else:
 71 |                                 percent_price_history = calculation_percent(int(price_history[i - 1]['price']),
 72 |                                                                             int(price_history[i]['price']))
 73 |                                 price_history_srt = price_history_srt + \
 74 |                                                     'Дата: ' + int(price_history[i]['data']) + '  ' + \
 75 |                                                     'Цена: ' + num_conversion(int(price_history[i]['price'])) + ' руб.  ' + \
 76 |                                                     '(' + percent_price_history + '%)\n'
 77 | 
 78 |                         difference_price = calculation_different_price(int(price_history[0]['price']), int(price_now['price']))
 79 |                         percent_difference_price = calculation_percent(int(price_history[0]['price']), int(price_now['price']))
 80 | 
 81 |                     if item_price == [(sql_price,)]:  # Сравниваем цены, и если есть отличие то обновляем их
 82 |                         cur.execute(
 83 |                             "UPDATE offers SET status=1, updated_date=?,urls_id=?, type_of=?, params=? WHERE avito_id=?",
 84 |                             (str(get_date_time()), sql_urls_id, sql_type_of, sql_params, sql_avito_id))
 85 |                         continue
 86 |                     else:
 87 |                         items.append({
 88 |                             'item_price': item_price,
 89 |                             'sql_chat': sql_chat,
 90 |                             'sql_avito_id': sql_avito_id,
 91 |                             'sql_name': sql_name,
 92 |                             'old_price': old_price,
 93 |                             'sql_price': sql_price,
 94 |                             'price_history_srt': price_history_srt,
 95 |                             'difference_price': difference_price,
 96 |                             'percent_difference_price': percent_difference_price,
 97 |                             'sql_address': sql_address,
 98 |                             'sql_url': sql_url,
 99 |                             'sql_params': sql_params,
100 |                             'sql_type_of': sql_type_of,
101 |                             'type_update': 'update'
102 |                         })
103 | 
104 |                         cur.execute(
105 |                             "UPDATE offers SET price=?, old_price=?, updated_date=?, price_history=?, status=1, urls_id=?, type_of=?, params=? WHERE avito_id=?",
106 |                             (sql_price, old_price, str(get_date_time()), str(price_history_dumps), sql_urls_id,
107 |                              sql_type_of,
108 |                              sql_params, sql_avito_id))
109 |                         log.info('Price update | ' + str(sql_avito_id))
110 | 
111 |                 else:
112 |                     items.append({
113 |                         'item_price': None,
114 |                         'sql_chat': sql_chat,
115 |                         'sql_avito_id': sql_avito_id,
116 |                         'sql_name': sql_name,
117 |                         'old_price': None,
118 |                         'sql_price': sql_price,
119 |                         'price_history_srt': None,
120 |                         'difference_price': None,
121 |                         'percent_difference_price': None,
122 |                         'sql_address': sql_address,
123 |                         'sql_url': sql_url,
124 |                         'sql_params': sql_params,
125 |                         'sql_type_of': sql_type_of,
126 |                         'type_update': 'new'
127 |                     })
128 |                     log.info('No ID -> New Offer | ' + str(sql_avito_id))
129 | 
130 |                     price_history.append(price_now)
131 |                     price_history_dumps = json.dumps(price_history)
132 |                     cur.execute(
133 |                         "INSERT OR IGNORE INTO offers ('avito_id','name','price','price_history','address','url','created_date','updated_date','status','city','urls_id','type_of','params') VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)",
134 |                         (sql_avito_id, sql_name, sql_price, str(price_history_dumps), sql_address, sql_url,
135 |                          str(get_date_time()), str(get_date_time()), 1, sql_city, sql_urls_id, sql_type_of, sql_params))
136 |             else:
137 |                 error_message = 'Error: write Sql_item, item is None ' + str(sql_urls_id)
138 |                 text_handler(EXEPTION_CHAT, error_message)
139 |                 log.error(error_message)
140 |         parse_items_to_send(items)
141 |     conn.commit()
142 |     conn.close()
143 | 
144 | 
145 | def get_urls():
146 |     """
147 |     Получаем данные для заданий из SQLite
148 |     :return:
149 |     """
150 |     conn = sqlite3.connect(ROUTE_DB)
151 |     with conn:
152 |         cur = conn.cursor()
153 |         cur.execute('SELECT id,name,city,chatid FROM urls')
154 |         urls = cur.fetchall()
155 |         return urls
156 | 


--------------------------------------------------------------------------------
/text_converter.py:
--------------------------------------------------------------------------------
  1 | from bot.bot import text_handler
  2 | 
  3 | emoji_top = u'\U0001F4C8'
  4 | emoji_top_green = u'\U00002705'
  5 | 
  6 | emoji_down = u'\U0001F4C9'
  7 | emoji_down_red = u'\U0000274C'
  8 | 
  9 | 
 10 | def num_conversion(a):
 11 |     """
 12 |     Приводим числа в денежный вид
 13 |     :param a:
 14 |     :return:
 15 |     """
 16 |     return '{:,}'.format(int(a))
 17 | 
 18 | 
 19 | def clean(text):
 20 |     """
 21 |     Отчищаем текст перед принятием
 22 |     :param text:
 23 |     :return:
 24 |     """
 25 |     return text.replace('\t', '').replace('\n', '').strip()
 26 | 
 27 | 
 28 | def calculation_percent(price_old, price_new):
 29 |     """
 30 |     Расчитываем процент между новой и старой ценой (для истории)
 31 |     :param price_old:
 32 |     :param price_new:
 33 |     :return:
 34 |     """
 35 |     if price_old > price_new:
 36 |         percent_price_history = '- ' + str(round(((int(price_old) - int(price_new)) / int(price_new)) * 100, 2))
 37 |     else:
 38 |         percent_price_history = '+ ' + str(round(((int(price_new) - int(price_old)) / int(price_old)) * 100, 2))
 39 |     return percent_price_history
 40 | 
 41 | 
 42 | def calculation_different_price(price_old, price_new):
 43 |     """
 44 |     Расчитываем разницу между новой и старой ценой
 45 |     :param price_old:
 46 |     :param price_new:
 47 |     :return:
 48 |     """
 49 |     if price_old > price_new:
 50 |         difference_price = '- ' + str(num_conversion(int(price_old) - int(price_new)))
 51 | 
 52 |     else:
 53 |         difference_price = '+ ' + str(num_conversion(int(price_new) - int(price_old)))
 54 |     return difference_price
 55 | 
 56 | 
 57 | def send_mes_to_bot(item):
 58 |     """
 59 |     Подготавливаем текст для телеграм бота
 60 |     :param item:
 61 |     :return:
 62 |     """
 63 |     from main import log
 64 | 
 65 |     first_row = ''  # ID
 66 |     second_row = ''  # Name
 67 |     third_row = ''  # price
 68 |     fours_row = ''  # price_history
 69 |     five_row = ''  # address
 70 |     six_row = ''  # params
 71 |     seven_row = ''  # url
 72 | 
 73 |     if item['type_update'] == 'update':
 74 |         if item['item_price'] >= [(item['sql_price'],)]:
 75 |             first_row = 'Обновилась цена id ' + str(item['sql_avito_id']) + '  ' + \
 76 |                         emoji_down + emoji_down + emoji_top_green + '\n\n'
 77 | 
 78 |             third_row = 'Старая цена = ' + str(num_conversion(item['old_price'])) + ' руб. /' + \
 79 |                         ' Новая цена = ' + str(num_conversion(item['sql_price'])) + ' руб.\n\n'
 80 |         else:
 81 |             first_row = 'Обновилась цена id ' + str(item['sql_avito_id']) + '  ' + \
 82 |                         emoji_top + emoji_top + emoji_down_red + '\n\n'
 83 | 
 84 |             third_row = 'Старая цена = ' + str(num_conversion(item['item_price'][0][0])) + ' руб. /'+ \
 85 |                         ' Новая цена = ' + str(num_conversion(item['sql_price'])) + ' руб.\n\n'
 86 | 
 87 |         fours_row = 'Изменения цен \n' + str(item['price_history_srt']) + '\nРазница: ' + \
 88 |                     item['difference_price'] + ' (' + item['percent_difference_price'] + '%)\n\n'
 89 | 
 90 |     elif item['type_update'] == 'new':
 91 |         first_row = 'Новое объявление ' + str(item['sql_avito_id']) + '\n\n'
 92 |         third_row = 'Цена: ' + str(item['sql_price']) + ' руб.\n\n'
 93 |     else:
 94 |         log.error('type_update = NONETYPE ' + str(item['sql_avito_id']))
 95 |     second_row = str(item['sql_name']) + '\n\n'
 96 |     five_row = 'Адрес: ' + str(item['sql_address']) + '\n\n'
 97 |     six_row = 'Параметры: ' + str(item['sql_params']) + '\n\n'
 98 |     seven_row = 'Ссылка ' + str(item['sql_url']) + '\n\n'
 99 |     none_type_of = ['Личные вещи', 'Работа', 'Для дома и дачи', 'Предложение услуг', 'Электроника', 'Животные',
100 |                     'Готовый бизнес и оборудование']
101 |     if item['sql_type_of'] == 'Недвижимость':
102 |         mes_to_bot = first_row + third_row + fours_row + five_row + seven_row
103 |     elif item['sql_type_of'] == 'Транспорт':
104 |         mes_to_bot = first_row + second_row + third_row + fours_row + six_row + seven_row
105 |     elif item['sql_type_of'] == 'Хобби и отдых':
106 |         mes_to_bot = first_row + second_row + third_row + fours_row + five_row + six_row + seven_row
107 |     elif item['sql_type_of'] in none_type_of:
108 |         mes_to_bot = first_row + second_row + third_row + fours_row + seven_row
109 |     else:
110 |         log.error('sql_type_of = NONETYPE ' + str(item['sql_avito_id']))
111 |         mes_to_bot = 'sql_type_of = NONETYPE ' + str(item['sql_avito_id'])
112 |     text_handler(item['sql_chat'], mes_to_bot)
113 | 
114 | 
115 | def parse_items_to_send(items):
116 |     """
117 |     Формируем данные для отправки в ТГ
118 |     :param items:
119 |     :return:
120 |     """
121 |     for item in items:
122 |         send_mes_to_bot(item)
123 | 


--------------------------------------------------------------------------------