├── ad ├── __init__.py ├── adapters │ ├── __init__.py │ ├── tests │ │ ├── __init__.py │ │ ├── test_provider.py │ │ └── test_repository.py │ ├── utils.py │ ├── presenter.py │ ├── repository.py │ └── provider.py ├── core │ ├── __init__.py │ ├── tests │ │ ├── __init__.py │ │ └── strategies.py │ ├── usecases │ │ ├── __init__.py │ │ ├── create_full_ad.py │ │ ├── tests │ │ │ ├── __init__.py │ │ │ ├── test_get_ads.py │ │ │ └── test_ads_sender.py │ │ ├── create_detail_ad.py │ │ ├── get_ads.py │ │ ├── ads_sender.py │ │ └── create_base_ads.py │ ├── adapters │ │ ├── __init__.py │ │ ├── provider.py │ │ └── repository.py │ ├── errors.py │ └── entities.py ├── logger.py ├── upload_ads.py ├── telegram_sender.py └── implementations.py ├── requirements-test.txt ├── nginx ├── Dockerfile └── conf ├── docs ├── diagrams │ ├── olx-icon.png │ ├── rss-icon.jpeg │ ├── olx-parser-architecture.png │ └── olx-diagram.py └── screenshots │ └── screenshot-1.png ├── entrypoint.sh ├── environment.ini ├── configuration.json ├── Dockerfile ├── unpined-deps.txt ├── docker-compose.yml ├── .pre-commit-config.yaml ├── requirements.txt ├── app.py ├── .gitignore ├── templates └── description.html ├── README.md └── LICENSE /ad/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ad/adapters/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ad/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ad/core/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ad/core/usecases/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ad/adapters/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ad/core/usecases/create_full_ad.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ad/core/usecases/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements-test.txt: -------------------------------------------------------------------------------- 1 | ipdb 2 | deal 3 | hypothesis 4 | -------------------------------------------------------------------------------- /nginx/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nginx:1.13-alpine 2 | COPY conf /etc/nginx/conf.d/default.conf 3 | -------------------------------------------------------------------------------- /docs/diagrams/olx-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lerdem/olx-parser/HEAD/docs/diagrams/olx-icon.png -------------------------------------------------------------------------------- /docs/diagrams/rss-icon.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lerdem/olx-parser/HEAD/docs/diagrams/rss-icon.jpeg -------------------------------------------------------------------------------- /docs/screenshots/screenshot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lerdem/olx-parser/HEAD/docs/screenshots/screenshot-1.png -------------------------------------------------------------------------------- /docs/diagrams/olx-parser-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lerdem/olx-parser/HEAD/docs/diagrams/olx-parser-architecture.png -------------------------------------------------------------------------------- /entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | python -m ad.adapters.repository 3 | python -m ad.upload_ads & 4 | gunicorn --threads 4 --bind 0.0.0.0:8000 app:app 5 | -------------------------------------------------------------------------------- /nginx/conf: -------------------------------------------------------------------------------- 1 | server { 2 | listen 80; 3 | server_name localhost; 4 | location / { 5 | proxy_pass http://app:8000; 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /ad/core/adapters/__init__.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class Presenter(ABC): 5 | @abstractmethod 6 | def present(self, ads): 7 | pass 8 | -------------------------------------------------------------------------------- /environment.ini: -------------------------------------------------------------------------------- 1 | [secrets] 2 | TELEGRAM_BOT_TOKEN=Replace-with-your-token 3 | CHAT_ID=Replace-with-your-chat_id-numbers 4 | 5 | [general] 6 | # Replace-with-your-public-IP 7 | IP=127.0.0.1 8 | -------------------------------------------------------------------------------- /ad/core/errors.py: -------------------------------------------------------------------------------- 1 | class EntityError(Exception): 2 | pass 3 | 4 | 5 | class UseCaseError(EntityError): 6 | pass 7 | 8 | 9 | class AdapterError(EntityError): 10 | pass 11 | -------------------------------------------------------------------------------- /configuration.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "search_url": "https://www.olx.ua/nedvizhimost/kvartiry/dolgosrochnaya-arenda-kvartir/lvov/?search%5Bprivate_business%5D=private", 4 | "tag": "arenda-lvov" 5 | } 6 | ] 7 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9-alpine 2 | RUN mkdir /app 3 | WORKDIR /app 4 | ADD requirements.txt /app 5 | RUN pip install -U pip 6 | RUN pip install -r requirements.txt 7 | #ADD requirements-test.txt /app 8 | #RUN pip install -r requirements-test.txt 9 | COPY . /app 10 | -------------------------------------------------------------------------------- /ad/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | logger = logging.getLogger('upload-logger') 4 | handler = logging.FileHandler('cron-upload_ads-logs.txt') 5 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') 6 | handler.setFormatter(formatter) 7 | logger.addHandler(handler) 8 | logger.setLevel(logging.DEBUG) 9 | -------------------------------------------------------------------------------- /unpined-deps.txt: -------------------------------------------------------------------------------- 1 | Flask~=2.0.2 2 | pydantic~=1.8.2 3 | lxml~=4.6.4 4 | requests~=2.26.0 5 | rfeed~=1.1.1 6 | pytz 7 | gunicorn 8 | punq 9 | python-telegram-bot~=11.1.0 10 | premailer~=3.10.0 11 | 12 | # to fix flask dependency 13 | # https://stackoverflow.com/questions/77213053/why-did-flask-start-failing-with-importerror-cannot-import-name-url-quote-fr 14 | Werkzeug==2.2.2 15 | -------------------------------------------------------------------------------- /ad/adapters/utils.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | from pathlib import Path 3 | 4 | BASE_DIR = ( 5 | Path(__file__).resolve(strict=True).parent.parent.parent 6 | ) # project root dir = olx-parser-rss 7 | 8 | 9 | def get_config() -> configparser.ConfigParser: 10 | config_file = BASE_DIR.joinpath('environment.ini') 11 | config = configparser.ConfigParser() 12 | with open(config_file) as raw_config_file: 13 | config.read_file(raw_config_file) 14 | return config 15 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | app: 4 | build: 5 | context: . 6 | container_name: olx-server 7 | volumes: 8 | - .:/app 9 | environment: 10 | - REQUESTS_CA_BUNDLE=/usr/local/lib/python3.9/site-packages/certifi/cacert.pem 11 | command: /bin/sh entrypoint.sh 12 | proxy: 13 | build: 14 | context: nginx 15 | container_name: olx-nginx 16 | restart: always 17 | ports: 18 | - 12345:80 19 | depends_on: 20 | - app 21 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v2.3.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - id: double-quote-string-fixer 9 | - repo: https://github.com/psf/black 10 | rev: 19.3b0 11 | hooks: 12 | - id: black 13 | args: [--skip-string-normalization] 14 | #- repo: https://github.com/Lucas-C/pre-commit-hooks-java 15 | # sha: 1.3.9 16 | # hooks: 17 | # - id: validate-html 18 | -------------------------------------------------------------------------------- /docs/diagrams/olx-diagram.py: -------------------------------------------------------------------------------- 1 | from diagrams import Cluster, Diagram 2 | from diagrams.custom import Custom 3 | from diagrams.onprem.compute import Server 4 | 5 | 6 | with Diagram('Архитектура приложения', show=False, filename='olx-parser-architecture'): 7 | with Cluster('Backend'): 8 | web = Server('Веб приложение') 9 | cron = Server('Загрузчик данных из ОЛХ') 10 | olx_server = Custom('Олх сайт', 'olx-icon.png') 11 | 12 | backend = [web, cron, olx_server] 13 | 14 | rss_client = Custom('RSS frontend', 'rss-icon.jpeg') 15 | 16 | rss_client >> web >> cron >> olx_server 17 | -------------------------------------------------------------------------------- /ad/core/adapters/provider.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import List, Tuple 3 | 4 | 5 | class CreateAdsProvider(ABC): 6 | @abstractmethod 7 | def get_raw(self, start_url) -> List[Tuple]: # or raises AdapterError 8 | pass 9 | 10 | 11 | class DetailedAdProvider(ABC): 12 | @abstractmethod 13 | def get_raw( 14 | self, external_url 15 | ) -> Tuple[List, str, str, str]: # or raises AdapterError 16 | pass 17 | 18 | 19 | class PhoneProvider(ABC): 20 | @abstractmethod 21 | def get_raw(self, external_id) -> str: # or raises AdapterError 22 | pass 23 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Flask==2.0.3 2 | pydantic==1.8.2 3 | lxml==4.6.5 4 | requests==2.26.0 5 | rfeed==1.1.1 6 | pytz==2023.3.post1 7 | gunicorn==21.2.0 8 | punq==0.7.0 9 | python-telegram-bot==11.1.0 10 | premailer~=3.10.0 11 | 12 | # to fix flask dependency 13 | # https://stackoverflow.com/questions/77213053/why-did-flask-start-failing-with-importerror-cannot-import-name-url-quote-fr 14 | Werkzeug==2.2.2 15 | ## The following requirements were added by pip freeze: 16 | certifi==2023.11.17 17 | cffi==1.16.0 18 | charset-normalizer==2.0.12 19 | click==8.1.7 20 | cryptography==42.0.0 21 | future==0.18.3 22 | idna==3.6 23 | itsdangerous==2.1.2 24 | Jinja2==3.1.3 25 | MarkupSafe==2.1.4 26 | packaging==23.2 27 | pycparser==2.21 28 | typing_extensions==4.9.0 29 | urllib3==1.26.18 30 | -------------------------------------------------------------------------------- /ad/core/usecases/create_detail_ad.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from ad.core.adapters.provider import DetailedAdProvider 4 | from ad.core.adapters.repository import DetailedAdRepo 5 | from ad.core.entities import DetailedAd 6 | 7 | 8 | @dataclass 9 | class CreateDetailedAdUseCase: 10 | _repository: DetailedAdRepo 11 | _provider: DetailedAdProvider 12 | 13 | def __call__(self, ad_id: str) -> None: 14 | base_ad = self._repository.get_base_ad_by_id(ad_id) 15 | raw = self._provider.get_raw(base_ad.url) 16 | detailed_ad = DetailedAd( 17 | image_urls=raw[0], 18 | external_id=raw[1], 19 | description=raw[2], 20 | name=raw[3], 21 | **base_ad.dict() 22 | ) 23 | 24 | self._repository.save(detailed_ad) 25 | -------------------------------------------------------------------------------- /ad/core/entities.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import List, Union 3 | from pydantic import BaseModel, HttpUrl 4 | 5 | 6 | class BaseAd(BaseModel): 7 | id: str 8 | tag: str 9 | title: str 10 | parse_date: datetime 11 | url: HttpUrl 12 | 13 | 14 | class _DetailAd(BaseModel): 15 | description: str 16 | image_urls: List[HttpUrl] 17 | external_id: str 18 | name: str 19 | 20 | 21 | class Contact(BaseModel): 22 | # https://github.com/samuelcolvin/pydantic/issues/1551 23 | phone: str 24 | 25 | 26 | class DetailedAd(_DetailAd, BaseAd): 27 | pass 28 | 29 | 30 | class FullAd(Contact, DetailedAd): 31 | # def serialize_fields(self): 32 | pass 33 | 34 | 35 | class View(BaseModel): 36 | id: str 37 | 38 | 39 | BaseAds = List[BaseAd] 40 | DetailedAds = List[DetailedAd] 41 | AnyAds = Union[DetailedAds, FullAd] 42 | FullAds = List[FullAd] 43 | Views = List[View] 44 | -------------------------------------------------------------------------------- /ad/upload_ads.py: -------------------------------------------------------------------------------- 1 | from time import sleep 2 | from random import randint 3 | 4 | from ad.core.errors import UseCaseError 5 | from ad.implementations import ad_detail_uploader, ads_creator 6 | from ad.logger import logger 7 | 8 | 9 | def _upload_job(): 10 | logger.debug('uploader started') 11 | while True: 12 | time_to_wait = randint(45, 120) 13 | logger.debug(f'waiting before upload from olx {time_to_wait} seconds') 14 | sleep(time_to_wait) 15 | try: 16 | new_ad_ids = ads_creator() 17 | except UseCaseError as e: 18 | logger.error(e) 19 | else: 20 | logger.debug(f'Загружены ads: {new_ad_ids}') 21 | for _id in new_ad_ids: 22 | try: 23 | ad_detail_uploader(ad_id=_id) 24 | except Exception as e: 25 | logger.error(f'Error with {_id}, {e}') 26 | 27 | 28 | if __name__ == '__main__': 29 | _upload_job() 30 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, Response, request 2 | 3 | from ad.implementations import get_detail_ads, get_full_ads_debug 4 | 5 | app = Flask(__name__) 6 | 7 | 8 | @app.route('/detail-rss') 9 | def detail_rss(): 10 | data = get_detail_ads( 11 | tag=request.args.get('tag'), stop_words=request.args.getlist('sw') 12 | ) 13 | return Response(data, headers={'Content-Type': 'application/rss+xml'}) 14 | 15 | 16 | @app.route('/debug-template') 17 | def debug_template(): 18 | data = get_full_ads_debug( 19 | tag=request.args.get('tag'), stop_words=request.args.getlist('sw') 20 | ) 21 | return Response(data, headers={'Content-Type': 'application/rss+xml'}) 22 | 23 | 24 | @app.route('/debug-html') 25 | def debug_html(): 26 | from ad.adapters.presenter import _get_detail 27 | from ad.adapters.repository import GetDebugRepo 28 | 29 | data = _get_detail(GetDebugRepo().get_all()[0]) 30 | return Response(data, headers={'Content-Type': 'text/html; charset=UTF-8'}) 31 | 32 | 33 | if __name__ == '__main__': 34 | app.run(host='0.0.0.0', debug=False, port=8000) 35 | -------------------------------------------------------------------------------- /ad/adapters/tests/test_provider.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest.mock import MagicMock 3 | 4 | from requests import Session, HTTPError, ConnectionError 5 | from requests.exceptions import ChunkedEncodingError 6 | 7 | from ad.adapters.provider import _get_olx_search_html_base 8 | from ad.core.errors import AdapterError 9 | 10 | 11 | class Test(unittest.TestCase): 12 | def test_error1(self): 13 | fake_session_respone = MagicMock( 14 | **{'raise_for_status.side_effect': HTTPError('lol', '123')}, 15 | autospec=Session 16 | ) # session object 17 | session = MagicMock( 18 | **{'get.return_value': fake_session_respone}, autospec=Session 19 | ) # session object 20 | with self.assertRaises(AdapterError): 21 | _get_olx_search_html_base('http://fake.com', session) 22 | 23 | def test_error2(self): 24 | session = MagicMock( 25 | **{'get.side_effect': ConnectionError('lol', '123')}, autospec=Session 26 | ) # session object 27 | with self.assertRaises(AdapterError): 28 | _get_olx_search_html_base('http://fake.com', session) 29 | 30 | def test_error3(self): 31 | session = MagicMock( 32 | **{'get.side_effect': ChunkedEncodingError('lol', '123')}, autospec=Session 33 | ) # session object 34 | with self.assertRaises(AdapterError): 35 | _get_olx_search_html_base('http://fake.com', session) 36 | -------------------------------------------------------------------------------- /ad/core/usecases/get_ads.py: -------------------------------------------------------------------------------- 1 | import re 2 | from dataclasses import dataclass 3 | from functools import partial 4 | from typing import List, Optional 5 | 6 | from ad.core.adapters import Presenter 7 | from ad.core.adapters.repository import GetDetailedAdRepo 8 | 9 | 10 | @dataclass 11 | class GetAdsUseCase: 12 | _repo: GetDetailedAdRepo 13 | _presenter: Presenter 14 | 15 | def execute(self, tag: Optional[str], stop_words: List[str]): 16 | ads = self._repo.get_by_tag(tag) if tag is not None else self._repo.get_all() 17 | if stop_words: 18 | ads = self._exclude_stop_words_from_ads(stop_words, ads) 19 | last_30_ads = sorted(ads, key=lambda x: x.parse_date, reverse=True)[:30] 20 | return self._presenter.present(last_30_ads) 21 | 22 | @staticmethod 23 | def _exclude_stop_words_from_ads(stop_words, ads): 24 | stop_word_ignore = partial(_stop_word_ignore, stop_words) 25 | ads = (ad for ad in ads if stop_word_ignore(ad.title)) 26 | ads = (ad for ad in ads if stop_word_ignore(ad.description)) 27 | return ads 28 | 29 | 30 | def _stop_word_ignore(stop_words: List[str], text: str): 31 | if not stop_words and not text: 32 | return True 33 | # слово или часть слова (слева) начиная с пробела, без учета регистра 34 | pattern = re.compile('|'.join(rf'\b{word}' for word in stop_words), re.IGNORECASE) 35 | # match - "bla" - True - False 36 | # no match - None - False - True 37 | return not bool(pattern.search(text)) 38 | -------------------------------------------------------------------------------- /ad/core/usecases/ads_sender.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Optional, Dict 3 | 4 | from ad.core.adapters import Presenter 5 | from ad.core.adapters.repository import GetDetailedAdRepo, ViewsRepo, Sender 6 | from ad.core.entities import View, DetailedAds 7 | from ad.core.errors import AdapterError, UseCaseError 8 | 9 | 10 | @dataclass 11 | class AdsSenderUseCase: 12 | _ads_repository: GetDetailedAdRepo 13 | _views_repository: ViewsRepo 14 | _sender: Sender 15 | _presenter: Presenter 16 | 17 | def execute(self, tag: Optional[str]) -> None: 18 | ads = ( 19 | self._ads_repository.get_by_tag(tag) 20 | if tag is not None 21 | else self._ads_repository.get_all() 22 | ) 23 | ads_ids = [ad.id for ad in ads] 24 | views = self._views_repository.get_views_by_ids(ads_ids) 25 | views_ids = [view.id for view in views] 26 | unsent_ads_ids = set(ads_ids) - set(views_ids) 27 | 28 | ads_d: Dict[str:DetailedAds] = {ad.id: ad for ad in ads} 29 | unsent_ads: DetailedAds = [ads_d[unsent_ads_id] for unsent_ads_id in unsent_ads_ids] 30 | 31 | messages = self._presenter.present(unsent_ads) 32 | for message, unsent_ad in zip(messages, unsent_ads): 33 | try: 34 | self._sender.send_message(message) 35 | except AdapterError as e: 36 | raise UseCaseError(f'Ошибка при отправке сообщения: {e}') 37 | self._views_repository.save_view(View(id=unsent_ad.id)) 38 | -------------------------------------------------------------------------------- /ad/telegram_sender.py: -------------------------------------------------------------------------------- 1 | from time import sleep 2 | from random import randint 3 | 4 | import punq 5 | 6 | from ad.adapters.presenter import BaseAdTelegramPresenter 7 | from ad.adapters.repository import TelegramSender, ViewsRepoCsv, DetailedAdGetRepoCsv 8 | from ad.core.adapters import Presenter 9 | from ad.core.adapters.repository import ViewsRepo, Sender, GetDetailedAdRepo 10 | from ad.core.errors import UseCaseError 11 | from ad.core.usecases.ads_sender import AdsSenderUseCase 12 | from ad.logger import logger 13 | 14 | 15 | def _telegram_sender_job(): 16 | logger.debug('Telegram sender started') 17 | while True: 18 | time_to_wait = randint(45, 120) 19 | logger.debug(f'waiting before send to telegram {time_to_wait} seconds') 20 | sleep(time_to_wait) 21 | try: 22 | _ads_sender(tag=None) 23 | except UseCaseError as e: 24 | logger.error(e) 25 | else: 26 | logger.debug(f'Telegram sender отработал без ошибок') 27 | 28 | 29 | if __name__ == '__main__': 30 | container = punq.Container() 31 | container.register(GetDetailedAdRepo, DetailedAdGetRepoCsv) 32 | container.register(ViewsRepo, ViewsRepoCsv) 33 | container.register(Sender, TelegramSender) 34 | container.register(Presenter, BaseAdTelegramPresenter) 35 | container.register(AdsSenderUseCase) 36 | try: 37 | _ads_sender = container.resolve(AdsSenderUseCase) 38 | except UseCaseError as e: 39 | logger.error(e) 40 | else: 41 | _ads_sender = _ads_sender.execute 42 | _telegram_sender_job() 43 | -------------------------------------------------------------------------------- /ad/core/usecases/create_base_ads.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from dataclasses import dataclass 3 | from datetime import datetime 4 | from operator import add 5 | from typing import List 6 | from functools import reduce 7 | import pytz 8 | 9 | from ad.core.adapters.provider import CreateAdsProvider 10 | from ad.core.adapters.repository import CreateAdsRepo, CreateAdsConfig 11 | from ad.core.entities import BaseAd 12 | from ad.core.errors import AdapterError, UseCaseError 13 | 14 | 15 | @dataclass 16 | class CreateAdsUseCase: 17 | _repository: CreateAdsRepo 18 | _provider: CreateAdsProvider 19 | _configuration: CreateAdsConfig 20 | 21 | def __call__(self) -> List[str]: 22 | # TODO проблема, если 2 из 10 url падает, то дальше не идет загрузка. 23 | confs = self._configuration.get_configuration() 24 | return reduce( 25 | add, [self.__process_one(conf.search_url, conf.tag) for conf in confs], [] 26 | ) 27 | 28 | def __process_one(self, url: str, tag: str) -> List[str]: 29 | try: 30 | raw = self._provider.get_raw(start_url=url) 31 | except AdapterError as e: 32 | raise UseCaseError( 33 | f'Ошибка при получении "raw" данных: {e}.\nFor debug url={url}, tag={tag}' 34 | ) 35 | saved = self._repository.get_all() 36 | existed_urls = [ad.url for ad in saved] 37 | provider_ads = [ 38 | BaseAd( 39 | id=uuid.uuid4().hex, 40 | tag=tag, 41 | title=f'{item[0]} - {item[1]}', 42 | parse_date=datetime.now(pytz.utc), 43 | url=item[2], 44 | ) 45 | for item in raw 46 | ] 47 | new = [ad for ad in provider_ads if ad.url not in existed_urls] 48 | self._repository.save(new) 49 | return [i.id for i in new] 50 | -------------------------------------------------------------------------------- /ad/implementations.py: -------------------------------------------------------------------------------- 1 | import punq 2 | 3 | from ad.adapters.presenter import DetailedAdFeedPresenter 4 | from ad.adapters.provider import DetailedAdProviderOlx, CreateProviderOlx 5 | from ad.adapters.repository import ( 6 | DetailedAdGetRepoCsv, 7 | DetailedAdRepoCsv, 8 | CreateAdsRepoCsv, 9 | CreateAdsConfigJson, 10 | GetDebugRepo, 11 | ) 12 | from ad.core.adapters import Presenter 13 | from ad.core.adapters.provider import CreateAdsProvider, DetailedAdProvider 14 | from ad.core.adapters.repository import ( 15 | CreateAdsRepo, 16 | CreateAdsConfig, 17 | DetailedAdRepo, 18 | GetDetailedAdRepo, 19 | ) 20 | from ad.core.usecases.create_base_ads import CreateAdsUseCase 21 | from ad.core.usecases.create_detail_ad import CreateDetailedAdUseCase 22 | from ad.core.usecases.get_ads import GetAdsUseCase 23 | 24 | container = punq.Container() 25 | container.register(CreateAdsRepo, CreateAdsRepoCsv) 26 | container.register(CreateAdsProvider, CreateProviderOlx) 27 | container.register(CreateAdsConfig, CreateAdsConfigJson) 28 | container.register(CreateAdsUseCase) 29 | ads_creator = container.resolve(CreateAdsUseCase) 30 | 31 | container.register(DetailedAdRepo, DetailedAdRepoCsv) 32 | container.register(DetailedAdProvider, DetailedAdProviderOlx) 33 | container.register(CreateDetailedAdUseCase) 34 | ad_detail_uploader = container.resolve(CreateDetailedAdUseCase) 35 | 36 | container.register(GetDetailedAdRepo, DetailedAdGetRepoCsv) 37 | container.register(Presenter, DetailedAdFeedPresenter) 38 | container.register(GetAdsUseCase) 39 | _get_ads_usecase = container.resolve(GetAdsUseCase) 40 | get_detail_ads = _get_ads_usecase.execute 41 | 42 | container3 = punq.Container() 43 | container3.register(GetDetailedAdRepo, GetDebugRepo) 44 | container3.register(Presenter, DetailedAdFeedPresenter) 45 | container3.register(GetAdsUseCase) 46 | _get_ads_debug_usecase = container3.resolve(GetAdsUseCase) 47 | get_full_ads_debug = _get_ads_debug_usecase.execute 48 | -------------------------------------------------------------------------------- /ad/adapters/tests/test_repository.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from hypothesis import given 3 | 4 | 5 | from ad.adapters.repository import ( 6 | DetailedAdRepoCsv, 7 | _deserialize_detail, 8 | _serialize_detail, 9 | ) 10 | from ad.core.entities import DetailedAd 11 | from ad.core.tests.strategies import DetailedAdSt 12 | 13 | _ad = DetailedAd( 14 | id='bc516e2abb5445ae9d03128a7a911f8f', # dont show in template 15 | tag='arenda-dnepr', # dont show in template 16 | title='Сдам 2-х комнатную квартиру на длительный период - Днепр', 17 | publication_date='2021-11-04 12:58:45', # dont show in template 18 | parse_date='2021-11-04 12:58:45', 19 | url='https://www.olx.ua/d/obyavlenie/sdam-2-h-komnatnuyu-kvartiru-na-dlitelnyy-period-IDN7dzO.html', 20 | description='Сдам 2-х комнатную квартиру на длительный период для семейной пары в районе ' 21 | '97 школы' 22 | ' (Ул. Братьев Трофимовых 40), 6 этаж 9-и этажного дома, не угловая, теплая, есть лоджия, застеклена.', 23 | image_urls=[ 24 | 'https://ireland.apollo.olxcdn.com:443/v1/files/dodwyas1emy32-UA/image;s=4000x3000', 25 | 'https://ireland.apollo.olxcdn.com/v1/files/pxokmbrmwf9v2-UA/image;s=1104x1472', 26 | 'https://ireland.apollo.olxcdn.com/v1/files/ve9s1d20cn211-UA/image;s=1104x1472', 27 | 'https://ireland.apollo.olxcdn.com/v1/files/ralzthng8yp52-UA/image;s=1944x2592', 28 | 'https://ireland.apollo.olxcdn.com/v1/files/il2y84fnyo5w-UA/image;s=591x1280', 29 | ], 30 | external_id='725276749', 31 | name='Феликс', 32 | ) 33 | saved = [_ad] 34 | new_or_updated_ad = DetailedAd(**_ad.dict()) 35 | 36 | 37 | class TestStringMethods(unittest.TestCase): 38 | def test_save(self): 39 | repo = DetailedAdRepoCsv() 40 | res = list(repo._mix_existed_ads_and_one_new(saved, new_or_updated_ad)) 41 | self.assertListEqual(res, [new_or_updated_ad]) 42 | 43 | @given(DetailedAdSt) 44 | def test_detail_serialization(self, detailed_ad): 45 | new_ad = _deserialize_detail(_serialize_detail(detailed_ad)) 46 | self.assertEqual(new_ad, detailed_ad) 47 | 48 | 49 | if __name__ == '__main__': 50 | unittest.main() 51 | -------------------------------------------------------------------------------- /ad/core/adapters/repository.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import List, MutableSequence 3 | from pydantic import BaseModel, HttpUrl, root_validator 4 | 5 | from ad.core.entities import BaseAds, DetailedAd, BaseAd, Views, View, DetailedAds 6 | 7 | 8 | class CreateAdsRepo(ABC): 9 | @abstractmethod 10 | def save(self, base_ads: BaseAds) -> None: 11 | pass 12 | 13 | @abstractmethod 14 | def get_all(self) -> BaseAds: 15 | pass 16 | 17 | 18 | class DetailedAdRepo(ABC): 19 | @abstractmethod 20 | def save(self, detailed_ad: DetailedAd) -> None: 21 | pass 22 | 23 | @abstractmethod 24 | def get_base_ad_by_id(self, id: str) -> BaseAd: 25 | pass 26 | 27 | 28 | class GetDetailedAdRepo(ABC): 29 | @abstractmethod 30 | def get_all(self) -> DetailedAds: 31 | pass 32 | 33 | @abstractmethod 34 | def get_by_tag(self, tag: str) -> DetailedAds: 35 | pass 36 | 37 | 38 | class ViewsRepo(ABC): 39 | @abstractmethod 40 | def get_views_by_ids(self, ad_ids: List[str]) -> Views: 41 | pass 42 | 43 | @abstractmethod 44 | def save_view(self, view: View) -> None: # or raises AdapterError 45 | pass 46 | 47 | 48 | class Sender(ABC): 49 | @abstractmethod 50 | def send_message(self, msg: str) -> None: # or raises AdapterError 51 | pass 52 | 53 | 54 | class _ConfigurationItem(BaseModel): 55 | search_url: HttpUrl 56 | tag: str 57 | 58 | 59 | Configurations = List[_ConfigurationItem] 60 | 61 | 62 | class Configuration(BaseModel): 63 | __root__: Configurations 64 | 65 | @root_validator 66 | def check_tag_unique(cls, values): 67 | confs = values['__root__'] 68 | _is_unique_by([i.tag for i in confs], 'tags list') 69 | _is_unique_by([i.search_url for i in confs], 'urls list') 70 | return values 71 | 72 | 73 | def _is_unique_by(values: MutableSequence, values_name: str): 74 | if len(values) != len(set(values)): 75 | raise ValueError(f'{values_name} should be unique') 76 | 77 | 78 | class CreateAdsConfig(ABC): 79 | @abstractmethod 80 | def get_configuration(self) -> Configurations: 81 | pass 82 | -------------------------------------------------------------------------------- /ad/core/usecases/tests/test_get_ads.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest.mock import create_autospec 3 | 4 | import hypothesis.strategies as st 5 | from hypothesis import given, example, assume, settings, HealthCheck 6 | 7 | from ad.core.adapters import Presenter 8 | from ad.core.adapters.repository import GetDetailedAdRepo 9 | from ad.core.entities import BaseAds 10 | from ad.core.tests.strategies import DetailedAdSt 11 | from ad.core.usecases.get_ads import GetAdsUseCase, _stop_word_ignore 12 | 13 | 14 | class TestGetAdsUseCase(unittest.TestCase): 15 | def setUp(self) -> None: 16 | self.ads_repo = create_autospec(GetDetailedAdRepo) 17 | self.presenter = create_autospec(Presenter) 18 | 19 | @given(st.lists(DetailedAdSt, unique_by=lambda x: x.id, min_size=1, max_size=30)) 20 | def test_usecase_ok(self, return_repo): 21 | self.ads_repo.reset_mock() # hypothesis dont reset mocks 22 | self.presenter.reset_mock() # hypothesis dont reset mocks 23 | self.ads_repo.get_all.return_value = return_repo 24 | 25 | get_ads = GetAdsUseCase(_repo=self.ads_repo, _presenter=self.presenter) 26 | get_ads.execute(tag=None, stop_words=[]) 27 | presenter_call_arg: BaseAds = self.presenter.present.call_args_list[0][0][0] 28 | self.assertEqual(len(presenter_call_arg), len(return_repo)) 29 | 30 | @example( 31 | stop_words=['вул. Киснева', 'Образцова'], 32 | text='Сдам 2х комнатную квартиру по улице Тепличная', 33 | is_ignore=True 34 | ) 35 | @example( 36 | stop_words=['вул. Киснева', 'Образцова'], 37 | text='Сдам 2 кімнатну квартиру, у кінеці пр. Слобожанське, вул. Киснева 2', 38 | is_ignore=False, 39 | ) 40 | @example( 41 | stop_words=['Образцова', 'Левобережн',], 42 | text='Квартира на левобережном в аренду. Не рієлтор. - 7 000 грн.', 43 | is_ignore=False, 44 | ) 45 | @example( 46 | stop_words=['Образцова', 'левобережн', ], 47 | text='Сдам 2к Фестивальный , Левобережный 3 рядом Караван , Ашан, Березинка - 7 000 грн.', 48 | is_ignore=False, 49 | ) 50 | @example( 51 | stop_words=[], 52 | text='', 53 | is_ignore=True, 54 | ) 55 | @given( 56 | stop_words=st.just(['вул. Киснева', 'Образцова']), 57 | text=st.just('Сдам 2-ком. Квартиру. Калиновой и Образцова. 4/9 Эт.'), 58 | is_ignore=st.just(False), 59 | ) 60 | def test_stop_word_ignore(self, stop_words, text, is_ignore): 61 | res = _stop_word_ignore(stop_words, text) 62 | self.assertIs( 63 | res, is_ignore, msg='если есть совпадение, то возвращаеться False' 64 | ) 65 | -------------------------------------------------------------------------------- /ad/core/usecases/tests/test_ads_sender.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest.mock import create_autospec 3 | 4 | import hypothesis.strategies as st 5 | from hypothesis import given, example, assume, settings 6 | 7 | from ad.core.adapters import Presenter 8 | from ad.core.adapters.repository import GetDetailedAdRepo, ViewsRepo, Sender 9 | from ad.core.errors import AdapterError, UseCaseError 10 | from ad.core.tests.strategies import BaseAdSt 11 | from ad.core.usecases.ads_sender import AdsSenderUseCase 12 | 13 | 14 | class Test(unittest.TestCase): 15 | def setUp(self) -> None: 16 | self.ads_repo = create_autospec(GetDetailedAdRepo) 17 | self.view_repo = create_autospec(ViewsRepo) 18 | self.sender = create_autospec(Sender) 19 | self.presenter = create_autospec(Presenter) 20 | 21 | @given(st.lists(BaseAdSt, unique_by=lambda x: x.id, max_size=10)) 22 | @example(return_repo=[]) 23 | def test_usecase_ok(self, return_repo): 24 | self._reset_mocks() # hypothesis dont reset mocks 25 | self.ads_repo.get_all.return_value = return_repo 26 | view_repo = return_repo[:3] 27 | viewed_ads_count = len(view_repo) 28 | self.view_repo.get_views_by_ids.return_value = view_repo 29 | self.presenter.present.return_value = ['return_message'] * viewed_ads_count 30 | 31 | send_ads = AdsSenderUseCase( 32 | self.ads_repo, self.view_repo, self.sender, self.presenter 33 | ) 34 | send_ads.execute(None) 35 | 36 | self.presenter.present.assert_called_once() 37 | self.assertEqual( 38 | self.sender.send_message.call_count, self.view_repo.save_view.call_count 39 | ) 40 | self.assertGreaterEqual(viewed_ads_count, self.sender.send_message.call_count) 41 | 42 | def _reset_mocks(self): 43 | self.presenter.reset_mock() 44 | self.sender.reset_mock() 45 | self.view_repo.reset_mock() 46 | self.ads_repo.reset_mock() 47 | 48 | @given(BaseAdSt, BaseAdSt) 49 | @settings(max_examples=1) 50 | def test_usecase_error(self, _ad1, _ad2): 51 | assume(_ad1 != _ad2) 52 | self._reset_mocks() 53 | 54 | self.ads_repo.get_all.return_value = [_ad1, _ad2] 55 | self.view_repo.get_views_by_ids.return_value = [_ad1] 56 | self.presenter.present.return_value = ['msg1'] 57 | self.sender.send_message.side_effect = AdapterError 58 | 59 | send_ads = AdsSenderUseCase( 60 | self.ads_repo, self.view_repo, self.sender, self.presenter 61 | ) 62 | with self.assertRaises(UseCaseError): 63 | send_ads.execute(None) 64 | 65 | self.sender.send_message.assert_called_once_with('msg1') 66 | self.view_repo.save_view.assert_not_called() 67 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # pytype static type analyzer 135 | .pytype/ 136 | 137 | # Cython debug symbols 138 | cython_debug/ 139 | 140 | # Pycharm 141 | .idea/ 142 | 143 | *.csv 144 | 145 | cron-upload_ads-logs.txt 146 | *.pickle 147 | -------------------------------------------------------------------------------- /ad/adapters/presenter.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import ipaddress 3 | from typing import List 4 | 5 | from jinja2 import Environment, FileSystemLoader 6 | from premailer import transform 7 | from rfeed import Feed, Item, Guid 8 | 9 | from ad.adapters.utils import get_config 10 | from ad.core.adapters import Presenter 11 | from ad.core.entities import BaseAds, DetailedAds, FullAd, BaseAd 12 | from ad.core.errors import AdapterError 13 | 14 | _BASE_TEXT = 'RSS feed parsed from Olx' 15 | 16 | 17 | class DetailedAdFeedPresenter(Presenter): 18 | def __init__(self): 19 | self._port: int = 12345 20 | self._host_ip: str = self._get_host_ip() 21 | 22 | def present(self, ads: DetailedAds): 23 | items = [] 24 | for ad in ads: 25 | item = Item( 26 | guid=Guid(ad.id, isPermaLink=False), 27 | title=ad.title, 28 | link=ad.url, 29 | description=_get_detail(ad), 30 | author='lerdem', 31 | pubDate=ad.parse_date, 32 | ) 33 | items.append(item) 34 | 35 | description = f'{ads[0].tag}: {_BASE_TEXT}' if ads else _BASE_TEXT 36 | title = ads[0].tag if ads else _BASE_TEXT 37 | feed = Feed( 38 | title=title, 39 | link=f'http://{self._host_ip}:{self._port}/detail-rss', 40 | description=description, 41 | language='ru-Ru', 42 | lastBuildDate=datetime.datetime.now(), 43 | items=items, 44 | ) 45 | return feed.rss() 46 | 47 | @staticmethod 48 | def _get_host_ip() -> str: # or raises AdapterError 49 | config = get_config() 50 | maybe_ip = config.get('general', 'IP') 51 | try: 52 | # https://stackoverflow.com/questions/319279/how-to-validate-ip-address-in-python 53 | sure_ip = ipaddress.ip_address(maybe_ip) 54 | except ValueError as e: 55 | raise AdapterError(e) 56 | else: 57 | return str(sure_ip) 58 | 59 | 60 | class BaseAdTelegramPresenter(Presenter): 61 | def present(self, ads: BaseAds) -> List[str]: 62 | return [self._ad_to_html(ad) for ad in ads] 63 | 64 | @staticmethod 65 | def _ad_to_html(ad: BaseAd) -> str: 66 | # .encode('utf8').decode('utf8') to fix telegram cyrilic rendering issues 67 | return f'''{ad.title}'''.encode('utf8').decode('utf8') 68 | 69 | 70 | def _get_detail(ad: FullAd) -> str: 71 | file_loader = FileSystemLoader('templates') 72 | env = Environment(loader=file_loader) 73 | template = env.get_template('description.html') 74 | html = template.render(ad=ad) 75 | inline_html = transform(html) 76 | return inline_html 77 | 78 | 79 | if __name__ == '__main__': 80 | 81 | content = 'This is about page' 82 | 83 | file_loader = FileSystemLoader('templates') 84 | env = Environment(loader=file_loader) 85 | 86 | template = env.get_template('description.html') 87 | 88 | output = template.render(preview_url=content) 89 | print(output) 90 | -------------------------------------------------------------------------------- /ad/core/tests/strategies.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from itertools import cycle 3 | import hypothesis.strategies as st 4 | 5 | 6 | from ad.core.entities import BaseAd, DetailedAd 7 | 8 | _URLS = [ 9 | 'https://www.olx.ua/d/uk/obyavlenie/kvartira-v-arendu-brznka-karavan-klochko-lvoberezhniy-IDUeFir.html', 10 | 'https://www.olx.ua/d/uk/obyavlenie/sdam-2-h-kvartiru-levyy-bereg-kosiora-pravda-IDTX3ZS.html', 11 | 'https://www.olx.ua/d/uk/obyavlenie/sdam-2k-kvartiru-pr-mira-IDUdKIe.html', 12 | 'https://www.olx.ua/d/uk/obyavlenie/sdam-2-komnatnuyu-levoberezhnyy-3-karavan-IDUdZfi.html', 13 | 'https://www.olx.ua/d/uk/obyavlenie/kvartira-na-levoberezhnom-v-arendu-ne-rltor-IDSrbyI.html', 14 | 'https://www.olx.ua/d/uk/obyavlenie/sdam-2-kom-kvartiru-donetskoe-shosse-134-klochko-6-berezinka-karavan-IDUeW72.html', 15 | ] 16 | UrlSt = st.builds(lambda: next(cycle(_URLS))) 17 | IdSt = st.text(alphabet='qwertyui1234567889', min_size=10, max_size=10) 18 | TagSt = st.text(alphabet='qwertyuiop-asdfghjklzxcvbnm') 19 | TitleSt = st.text() 20 | ParseDateSt = st.datetimes(datetime.now()) 21 | 22 | BaseAdSt = st.builds( 23 | BaseAd, id=IdSt, tag=TagSt, title=TitleSt, parse_date=ParseDateSt, url=UrlSt 24 | ) 25 | 26 | _IMAGE_URLS = [ 27 | 'https://ireland.apollo.olxcdn.com:443/v1/files/j31k5csrm9vp2-UA/image;s=854x384', 28 | 'https://ireland.apollo.olxcdn.com:443/v1/files/jn6omphtsgad-UA/image;s=384x854', 29 | 'https://ireland.apollo.olxcdn.com:443/v1/files/zansrvj2uh673-UA/image;s=384x854', 30 | 'https://ireland.apollo.olxcdn.com:443/v1/files/k3uysaiiaevn-UA/image;s=854x384', 31 | 'https://ireland.apollo.olxcdn.com:443/v1/files/vd624onnfj521-UA/image;s=384x854', 32 | 'https://ireland.apollo.olxcdn.com:443/v1/files/t3h4zijo2xds-UA/image;s=854x384', 33 | 'https://ireland.apollo.olxcdn.com:443/v1/files/0usz5bi289vp2-UA/image;s=384x854', 34 | 'https://ireland.apollo.olxcdn.com:443/v1/files/jjdd7d35d8ca-UA/image;s=854x384', 35 | 'https://ireland.apollo.olxcdn.com:443/v1/files/flbsr10fq6uh3-UA/image;s=384x854', 36 | 'https://ireland.apollo.olxcdn.com:443/v1/files/t1shs5579wxd2-UA/image;s=384x854', 37 | 'https://ireland.apollo.olxcdn.com:443/v1/files/k69e39fw5b0s1-UA/image;s=854x384', 38 | 'https://ireland.apollo.olxcdn.com:443/v1/files/hknglo3p5c0a1-UA/image;s=384x854', 39 | 'https://ireland.apollo.olxcdn.com:443/v1/files/90abhy3rgbdx1-UA/image;s=1200x1600', 40 | 'https://ireland.apollo.olxcdn.com:443/v1/files/9nwf5zho7ur73-UA/image;s=1600x1200', 41 | 'https://ireland.apollo.olxcdn.com:443/v1/files/fkyfzffs04uc2-UA/image;s=1200x1600', 42 | 'https://ireland.apollo.olxcdn.com:443/v1/files/an642er892ys3-UA/image;s=1200x1600', 43 | 'https://ireland.apollo.olxcdn.com:443/v1/files/qk5iak4xq43n2-UA/image;s=1600x1200', 44 | 'https://ireland.apollo.olxcdn.com:443/v1/files/9pb0ssxp4zan3-UA/image;s=1200x1600', 45 | 'https://ireland.apollo.olxcdn.com:443/v1/files/2juha7qyu52g3-UA/image;s=1200x1600', 46 | 'https://ireland.apollo.olxcdn.com:443/v1/files/tf2wgvjespxm-UA/image;s=1200x1600', 47 | 'https://ireland.apollo.olxcdn.com:443/v1/files/qlbavct2iycx2-UA/image;s=1200x1600', 48 | 'https://ireland.apollo.olxcdn.com:443/v1/files/ejke0r2rnx553-UA/image;s=1200x1600', 49 | 'https://ireland.apollo.olxcdn.com:443/v1/files/qpy48jmoaqzc1-UA/image;s=1200x1600', 50 | 'https://ireland.apollo.olxcdn.com:443/v1/files/8dbgdbn6mems1-UA/image;s=1200x1600', 51 | ] 52 | 53 | ImageUrlsSt = st.lists( 54 | st.builds(lambda: next(cycle(_IMAGE_URLS))), min_size=1, max_size=4 55 | ) 56 | DetailedAdSt = st.builds( 57 | DetailedAd, 58 | id=IdSt, 59 | tag=TagSt, 60 | title=TitleSt, 61 | parse_date=ParseDateSt, 62 | url=UrlSt, 63 | description=TitleSt, 64 | image_urls=ImageUrlsSt, 65 | external_id=st.text(alphabet='1234567890', min_size=8, max_size=8), 66 | name=TitleSt, 67 | ) 68 | 69 | if __name__ == '__main__': 70 | ad = BaseAdSt.example() 71 | ad2 = DetailedAdSt.example() 72 | from pprint import pprint 73 | 74 | pprint(ad) 75 | pprint(ad2) 76 | -------------------------------------------------------------------------------- /ad/adapters/repository.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | import csv 3 | import os 4 | from itertools import chain 5 | from typing import Dict, List 6 | from telegram import Bot 7 | from telegram.bot import InvalidToken 8 | 9 | from ad.adapters.utils import get_config, BASE_DIR 10 | from ad.core.adapters.repository import ( 11 | CreateAdsRepo, 12 | DetailedAdRepo, 13 | CreateAdsConfig, 14 | Configuration, 15 | Configurations, 16 | ViewsRepo, 17 | Sender, 18 | GetDetailedAdRepo, 19 | ) 20 | from ad.core.entities import ( 21 | BaseAds, 22 | BaseAd, 23 | FullAd, 24 | DetailedAd, 25 | DetailedAds, 26 | AnyAds, 27 | FullAds, 28 | Views, 29 | View, 30 | ) 31 | from ad.core.errors import AdapterError 32 | 33 | _BASE_FILE_NAME = BASE_DIR.joinpath('.base-ads.csv') 34 | _DETAIL_FILE_NAME = BASE_DIR.joinpath('.detail-ads.csv') 35 | _FULL_FILE_NAME = BASE_DIR.joinpath('.full-ads.csv') 36 | _VIEWS_FILE_NAME = BASE_DIR.joinpath('.ad-views.csv') 37 | 38 | _file_field_map = { 39 | _BASE_FILE_NAME: BaseAd.__fields__.keys(), 40 | _DETAIL_FILE_NAME: DetailedAd.__fields__.keys(), 41 | _FULL_FILE_NAME: FullAd.__fields__.keys(), 42 | _VIEWS_FILE_NAME: View.__fields__.keys(), 43 | } 44 | 45 | 46 | def _init_storage(file_name, fields): 47 | if not os.path.exists(file_name): 48 | with open(file_name, 'w', newline='') as csvfile: 49 | fieldnames = fields 50 | writer = csv.DictWriter(csvfile, fieldnames=fieldnames) 51 | writer.writeheader() 52 | 53 | 54 | def _migrate(): 55 | for file_name, fields in _file_field_map.items(): 56 | _init_storage(file_name, fields) 57 | 58 | 59 | class CreateAdsRepoCsv(CreateAdsRepo): 60 | def save(self, base_ads: BaseAds) -> None: 61 | with open(_BASE_FILE_NAME, 'a', newline='') as csvfile: 62 | fieldnames = BaseAd.__fields__.keys() 63 | writer = csv.DictWriter(csvfile, fieldnames=fieldnames) 64 | for ad in base_ads: 65 | writer.writerow(ad.dict()) 66 | 67 | def get_all(self) -> BaseAds: 68 | with open(_BASE_FILE_NAME) as csvfile: 69 | reader = csv.DictReader(csvfile) 70 | return [BaseAd(**row) for row in reader] 71 | 72 | 73 | class DetailedAdRepoCsv(DetailedAdRepo): 74 | def save(self, detailed_ad: DetailedAd) -> None: 75 | saved = self.get_all_detail() 76 | with open(_DETAIL_FILE_NAME, 'w', newline='') as csvfile: 77 | fieldnames = DetailedAd.__fields__.keys() 78 | writer = csv.DictWriter(csvfile, fieldnames=fieldnames) 79 | writer.writeheader() 80 | for ad in self._mix_existed_ads_and_one_new(saved, detailed_ad): 81 | writer.writerow(_serialize_detail(ad)) 82 | 83 | @staticmethod 84 | def _mix_existed_ads_and_one_new( 85 | existed_ads: DetailedAds, new_or_updated_ad: DetailedAd 86 | ): 87 | existed_ads_without_new = filter( 88 | lambda x: x.external_id != new_or_updated_ad.external_id, existed_ads 89 | ) 90 | for ad in chain(existed_ads_without_new, [new_or_updated_ad]): 91 | yield ad 92 | 93 | @staticmethod 94 | def get_all_detail() -> DetailedAds: 95 | with open(_DETAIL_FILE_NAME) as csvfile: 96 | reader = csv.DictReader(csvfile) 97 | return [_deserialize_detail(row) for row in reader] 98 | 99 | @staticmethod 100 | def get_all_base() -> BaseAds: 101 | return CreateAdsRepoCsv().get_all() 102 | 103 | def get_base_ad_by_id(self, id: str) -> BaseAd: 104 | try: 105 | return [x for x in self.get_all_base() if x.id == id][0] 106 | except IndexError: 107 | raise AdapterError(f'Не найдено объявление {id}') 108 | 109 | 110 | def _serialize_detail(ad: DetailedAd) -> Dict: 111 | data = ad.dict() 112 | urls = _serialize_urls(data.pop('image_urls')) 113 | data['image_urls'] = urls 114 | return data 115 | 116 | 117 | def _deserialize_detail(row: Dict) -> DetailedAd: 118 | raw = row.pop('image_urls') 119 | urls = _deserialize_urls(raw) 120 | row['image_urls'] = urls 121 | return DetailedAd(**row) 122 | 123 | 124 | def _serialize_urls(urls): 125 | return ','.join(urls) 126 | 127 | 128 | def _deserialize_urls(raw: str): 129 | if not raw: 130 | return [] 131 | return raw.split(',') 132 | 133 | 134 | class DetailedAdGetRepoCsv(GetDetailedAdRepo): 135 | def get_all(self) -> DetailedAds: 136 | return DetailedAdRepoCsv().get_all_detail() 137 | 138 | def get_by_tag(self, tag: str) -> DetailedAds: 139 | return _filter_by_tag(tag, self.get_all()) 140 | 141 | 142 | def _filter_by_tag(tag, items: AnyAds) -> AnyAds: 143 | return [ad for ad in items if ad.tag == tag] 144 | 145 | 146 | class CreateAdsConfigJson(CreateAdsConfig): 147 | def get_configuration(self) -> Configurations: 148 | return Configuration.parse_file('configuration.json').__root__ 149 | 150 | 151 | class GetDebugRepo(GetDetailedAdRepo): 152 | def get_all(self) -> FullAds: 153 | ad = FullAd( 154 | id='bc516e2abb5445ae9d03128a7a911f8f', # dont show in template 155 | tag='arenda-dnepr', # dont show in template 156 | title='Сдам 2-х комнатную квартиру на длительный период - Днепр', 157 | publication_date='2021-11-04 12:58:45', # dont show in template 158 | parse_date='2021-11-04 12:58:45', 159 | url='https://www.olx.ua/d/obyavlenie/sdam-2-h-komnatnuyu-kvartiru-na-dlitelnyy-period-IDN7dzO.html', 160 | description='Сдам 2-х комнатную квартиру на длительный период для семейной пары в районе ' 161 | '97 школы' 162 | ' (Ул. Братьев Трофимовых 40), 6 этаж 9-и этажного дома, не угловая, теплая, есть лоджия, застеклена.', 163 | image_urls=[ 164 | 'https://ireland.apollo.olxcdn.com:443/v1/files/dodwyas1emy32-UA/image;s=4000x3000', 165 | 'https://ireland.apollo.olxcdn.com/v1/files/pxokmbrmwf9v2-UA/image;s=1104x1472', 166 | 'https://ireland.apollo.olxcdn.com/v1/files/ve9s1d20cn211-UA/image;s=1104x1472', 167 | 'https://ireland.apollo.olxcdn.com/v1/files/ralzthng8yp52-UA/image;s=1944x2592', 168 | 'https://ireland.apollo.olxcdn.com/v1/files/il2y84fnyo5w-UA/image;s=591x1280', 169 | ], 170 | external_id='725276749', 171 | name='Феликс', 172 | phone='+380995437751', 173 | ) 174 | return [ad] 175 | 176 | def get_by_tag(self, tag: str) -> DetailedAds: 177 | return _filter_by_tag(tag, self.get_all()) 178 | 179 | 180 | class ViewsRepoCsv(ViewsRepo): 181 | def get_views_by_ids(self, ad_ids: List[str]) -> Views: 182 | # ad_ids = [0,1,2,3] views = [1,2] return [1,2] 183 | with open(_VIEWS_FILE_NAME) as csvfile: 184 | reader = csv.DictReader(csvfile) 185 | all_ad_views = [View(**row) for row in reader] 186 | all_ad_views_d = {view.id: view for view in all_ad_views} 187 | viewed_ads_ids = set(ad_ids).intersection(set(all_ad_views_d.keys())) 188 | return [ 189 | view 190 | for view_id, view in all_ad_views_d.items() 191 | if view_id in viewed_ads_ids 192 | ] 193 | 194 | def save_view(self, view: View) -> None: 195 | with open(_VIEWS_FILE_NAME, 'a', newline='') as csvfile: 196 | fieldnames = View.__fields__.keys() 197 | writer = csv.DictWriter(csvfile, fieldnames=fieldnames) 198 | writer.writerow(view.dict()) 199 | 200 | 201 | class TelegramSender(Sender): 202 | def __init__(self): 203 | _token = self._get_token() 204 | try: 205 | self._bot = Bot(token=_token) 206 | except InvalidToken: 207 | raise AdapterError('Нужен валидный телеграм токен, а не любые символы') 208 | self._chat_id = self._get_chat_id() 209 | 210 | def send_message(self, msg: str) -> None: 211 | self._bot.send_message(chat_id=self._chat_id, text=msg, parse_mode='HTML') 212 | 213 | @staticmethod 214 | def _get_token(): 215 | config = get_config() 216 | try: 217 | return config.get('secrets', 'TELEGRAM_BOT_TOKEN') 218 | except configparser.NoOptionError: 219 | raise AdapterError( 220 | '''Нет токена для телеграм бота. 221 | В файле environment.ini в [secrets] укажите: 222 | TELEGRAM_BOT_TOKEN=Replace-with-your-token''' 223 | ) 224 | 225 | @staticmethod 226 | def _get_chat_id() -> int: 227 | config = get_config() 228 | try: 229 | return config.getint('secrets', 'CHAT_ID') 230 | except ValueError: 231 | raise AdapterError('телеграм CHAT_ID должен состоять из цифр') 232 | 233 | 234 | if __name__ == '__main__': 235 | _migrate() 236 | -------------------------------------------------------------------------------- /templates/description.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {{ ad.title }} 6 | 7 | 8 | 9 |
10 |

{{ ad.title }}

11 |
12 |
13 | {% if ad.name %} 14 |
{{ ad.name }}
15 | {% endif %} 16 | {# {% if ad.phone %}#} 17 | {#
#} 18 | {# {{ ad.phone }}#} 19 | {#
#} 20 | {# {% endif %}#} 21 |
22 | 28 |
29 |
{{ ad.description }}
30 |
31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 |
44 | 72 |
73 |
74 | {% if ad.name %} 75 |
{{ ad.name }}
76 | {% endif %} 77 | {# {% if ad.phone %}#} 78 | {#
#} 79 | {# {{ ad.phone }}#} 80 | {#
#} 81 | {# {% endif %}#} 82 |
83 | 89 |
90 |
91 | 237 | 238 | 239 | -------------------------------------------------------------------------------- /ad/adapters/provider.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | from contextlib import contextmanager 3 | from os.path import join, exists 4 | from pathlib import Path 5 | from typing import List, Tuple, Dict, Type 6 | from lxml import etree 7 | from requests import Session, HTTPError, ConnectionError 8 | from requests.exceptions import ChunkedEncodingError 9 | 10 | from ad.core.adapters.provider import CreateAdsProvider, DetailedAdProvider 11 | from ad.core.errors import AdapterError 12 | 13 | 14 | class _CreateProviderOlx1(CreateAdsProvider): 15 | _example_url = 'https://www.olx.ua/d/nedvizhimost/kvartiry/dolgosrochnaya-arenda-kvartir/dnepr/?currency=UAH&search[private_business]=private&search[order]=created_at%3Adesc&search[filter_float_price%3Ato]=7000&search[filter_float_total_area%3Afrom]=30&search[filter_float_total_area%3Ato]=1000&view=list' 16 | 17 | def get_raw(self, start_url) -> List[Tuple]: 18 | html = _get_olx_search_html(start_url) 19 | dom = etree.HTML(html) 20 | is_empty_search = len(dom.xpath('//div[contains(@class, "emptynew")]')) == 1 21 | if is_empty_search: 22 | return [] 23 | # ipdb > rr, *hh = [0, 3] 24 | # ipdb > rr, hh 25 | # (0, [3]) 26 | # ipdb > rr, *hh = [0] 27 | # ipdb > rr, hh 28 | # (0, []) 29 | # ipdb > rr, *hh = [0, 3, 4] 30 | # ipdb > rr, hh 31 | # (0, [3, 4]) 32 | # поиск по району, *Подивіться результати для більшої відстані 33 | search_area, *larger_than_search_area = dom.xpath( 34 | './/div[contains(@data-testid, "listing-grid")]' 35 | ) 36 | return [ 37 | self._process_item(item) 38 | for item in search_area.xpath('.//div[contains(@data-cy, "l-card")]') 39 | ] 40 | 41 | @staticmethod 42 | def _process_item(item): 43 | title = item.xpath('.//h6/text()')[0] 44 | default_link = 'https://www.olx.ua' 45 | link = default_link + item.xpath('.//a/@href')[0] 46 | dirty_price = item.xpath('.//p[@data-testid="ad-price"]/text()')[0] # '6 000 грн.' 47 | return title, dirty_price, link 48 | 49 | 50 | class _CreateProviderOlx2(CreateAdsProvider): 51 | _example_url = 'https://www.olx.ua/elektronika/telefony-i-aksesuary/mobilnye-telefony-smartfony/dnepr/q-pixel-4/' 52 | 53 | def get_raw(self, start_url) -> List[Tuple]: 54 | html = _get_olx_search_html(start_url) 55 | dom = etree.HTML(html) 56 | is_empty_search = len(dom.xpath('//div[contains(@class, "emptynew")]')) == 1 57 | if is_empty_search: 58 | return [] 59 | return [ 60 | self._process_item(item) 61 | for item in dom.xpath('.//div[@class="offer-wrapper"]') 62 | ] 63 | 64 | @staticmethod 65 | def _process_item(item): 66 | title = item.xpath('.//strong/text()')[0] 67 | link = item.xpath('.//a/@href')[0] 68 | dirty_price = item.xpath('.//p[@class="price"]/strong/text()')[0] 69 | return title, dirty_price, link 70 | 71 | 72 | class _CreateProviderOlx3(_CreateProviderOlx2): 73 | _example_url = 'https://www.olx.ua/rabota/buhgalteriya/dnepr/?search%5Bfilter_enum_job_type%5D%5B0%5D=perm' 74 | 75 | @staticmethod 76 | def _process_item(item): 77 | title = item.xpath('.//strong/text()')[0] 78 | link = item.xpath('.//a/@href')[0] 79 | try: 80 | dirty_price = item.xpath('.//span[@class="price-label"]/text()')[0] 81 | except IndexError: 82 | dirty_price = 'З/п не указана' 83 | return title, dirty_price, link 84 | 85 | 86 | _SPECIAL = '/nedvizhimost/' # apartment ads 87 | _REGULAR = 'regular' 88 | _RABOTA = '/rabota/' 89 | 90 | 91 | _mapper_base: Dict[str, Type[CreateAdsProvider]] = { 92 | _SPECIAL: _CreateProviderOlx1, 93 | _RABOTA: _CreateProviderOlx3, 94 | _REGULAR: _CreateProviderOlx2, 95 | } 96 | 97 | 98 | def _get_provider_klass(url, mapper) -> Type: 99 | for k in mapper: 100 | if k in url: 101 | return mapper[k] 102 | return mapper[_REGULAR] 103 | 104 | 105 | class CreateProviderOlx(CreateAdsProvider): 106 | def get_raw(self, start_url) -> List[Tuple]: 107 | _provider_klass = _get_provider_klass(start_url, _mapper_base) 108 | return _provider_klass().get_raw(start_url) 109 | 110 | 111 | class _BaseAdProviderOlx(DetailedAdProvider): 112 | def get_raw(self, external_url) -> Tuple[List, str, str, str]: 113 | html = _get_olx_search_html(external_url) 114 | dom = etree.HTML(html) 115 | return ( 116 | self.get_images(dom), 117 | self.get_ad_id(dom), 118 | self.get_description(dom), 119 | self.get_name(dom), 120 | ) 121 | 122 | def get_images(self, dom) -> List: 123 | # ['https://ireland.apollo.olxcdn.com:443/v1/files/nupcplxvi2jq1-UA/image;s=900x1600', 'https://ireland.apollo.olxcdn.com:443/v1/files/7lpzpaq405mp2-UA/image;s=900x1600', 'https://ireland.apollo.olxcdn.com:443/v1/files/y37d3uyelph8-UA/image;s=900x1600', 'https://ireland.apollo.olxcdn.com:443/v1/files/etd6yxp26bmy2-UA/image;s=900x1600', 'https://ireland.apollo.olxcdn.com:443/v1/files/jjhriex63vas-UA/image;s=900x1600', 'https://ireland.apollo.olxcdn.com:443/v1/files/lwy4lypf7dkz1-UA/image;s=900x1600', 'https://ireland.apollo.olxcdn.com:443/v1/files/7h2ih0zor3i82-UA/image;s=900x1600', 'https://ireland.apollo.olxcdn.com:443/v1/files/pb9625qutphn3-UA/image;s=900x1600', 'https://ireland.apollo.olxcdn.com:443/v1/files/hln7nl1o80093-UA/image;s=900x1600', 'https://ireland.apollo.olxcdn.com:443/v1/files/8sy3w6gcrgpv-UA/image;s=900x1600', 'https://ireland.apollo.olxcdn.com:443/v1/files/86jl0gndg0jk3-UA/image;s=900x1600', 'https://ireland.apollo.olxcdn.com:443/v1/files/fhmeq8g1oj892-UA/image;s=900x1600', 'https://ireland.apollo.olxcdn.com:443/v1/files/z7kc52gy2gfc2-UA/image;s=900x1600', 'https://ireland.apollo.olxcdn.com:443/v1/files/da8rehzbvmhm1-UA/image;s=900x1600', 'https://ireland.apollo.olxcdn.com:443/v1/files/xp6ld5cj30632-UA/image;s=900x1600', 'https://ireland.apollo.olxcdn.com:443/v1/files/xsksiuajfyvs3-UA/image;s=900x1600', 'https://ireland.apollo.olxcdn.com:443/v1/files/4kp8iqzbazqr2-UA/image;s=900x1600'] 124 | return dom.xpath('.//div[contains(@data-cy, "adPhotos-swiperSlide")]//img/@src') 125 | 126 | def get_ad_id(self, dom) -> str: # or raises AdapterError 127 | # ['https://www.olx.ua/bundles/promote/?bs=adpage_promote&id=725494662'] 128 | # ['/purchase/promote/variant/?ad-id=805426819&bs=adpage_promote'] 129 | try: 130 | promote_link = dom.xpath( 131 | './/a[contains(@data-testid, "promotion-link")]/@href' 132 | )[0] 133 | ad_id = promote_link.split('ad-id=')[-1].split('&')[0] 134 | except IndexError: 135 | # for rabota ads https://www.olx.ua/obyavlenie/rabota/buhgalter-v-magazin-IDMOigt.html#874994eb0c 136 | ad_id = dom.xpath( 137 | './/div[contains(@data-cy, "ad-footer-bar-section")]/span/text()' 138 | )[-1] 139 | try: 140 | return str(int(ad_id)) 141 | except ValueError: 142 | raise AdapterError('Не удалось распарсить id обьявления') 143 | 144 | def get_description(self, dom) -> str: 145 | # ['Сдаётся квартира общая площадь 45кв.м', '\nКалиновая(Образцова) . Квартира расположена на 2 этаже 5 этажного кирпичного дома.', '\nБез животных ', '\n06******44', '\n09******44'] 146 | description_parts: List[str] = dom.xpath('.//div[contains(@data-cy, "ad_description")]/div/text()') 147 | return ''.join(description_parts) 148 | 149 | def get_name(self, dom) -> str: 150 | card = dom.xpath('.//div[contains(@data-cy, "seller_card")]')[0] 151 | return card.xpath('.//h4/text()')[0] 152 | 153 | 154 | class _DetailedAdRabotaProviderOlx(_BaseAdProviderOlx): 155 | def get_images(self, dom) -> List: 156 | return [] 157 | 158 | def get_description(self, dom) -> str: 159 | try: 160 | # help https://www.scientecheasy.com/2019/08/xpath-axes.html/ 161 | return dom.xpath('.//h2//following-sibling::div/p/text()')[0] 162 | except IndexError: 163 | return 'Не удалось найти описание вакансии' 164 | 165 | def get_name(self, dom) -> str: 166 | # get from ad footer 167 | return dom.xpath('.//h2/text()')[-1] 168 | 169 | 170 | _mapper_detail: Dict[str, Type[DetailedAdProvider]] = { 171 | _RABOTA: _DetailedAdRabotaProviderOlx, 172 | _REGULAR: _BaseAdProviderOlx, 173 | } 174 | 175 | 176 | class DetailedAdProviderOlx(DetailedAdProvider): 177 | def get_raw(self, external_url) -> List[Tuple]: 178 | _provider_klass = _get_provider_klass(external_url, _mapper_detail) 179 | return _provider_klass().get_raw(external_url) 180 | 181 | 182 | _BASE_DIR = Path(__file__).resolve(strict=True).parent 183 | 184 | 185 | @contextmanager 186 | def get_session() -> Session: 187 | _path = join(_BASE_DIR, 'session.pickle') 188 | if not exists(_path): 189 | s = Session() 190 | else: 191 | s: Session = pickle.load(open(_path, 'rb')) 192 | try: 193 | yield s 194 | finally: 195 | pickle.dump(s, open(_path, 'wb')) 196 | 197 | 198 | def _get_olx_search_html(url)-> str: # or raises AdapterError 199 | with get_session() as session: 200 | return _get_olx_search_html_base(url, session) 201 | 202 | 203 | def _get_olx_search_html_base(url, session: Session) -> str: # or raises AdapterError 204 | headers = { 205 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0', 206 | 'Referer': url, 207 | 'X-Client': 'DESKTOP', 208 | } 209 | try: 210 | r = session.get(url, headers=headers) 211 | except ConnectionError as e: 212 | raise AdapterError(f'{e}, проблемы с подключение к интернету') 213 | except ChunkedEncodingError as e: 214 | raise AdapterError(f'{e}, невозможно прочитать ответ от ОЛХ') 215 | 216 | try: 217 | r.raise_for_status() 218 | return r.text 219 | except HTTPError as e: 220 | raise AdapterError(f'{e}, на этапе запроса к ОЛХ') 221 | 222 | 223 | 224 | if __name__ == '__main__': 225 | from pprint import pprint as print 226 | from ad.adapters.repository import CreateAdsConfigJson 227 | 228 | config = CreateAdsConfigJson().get_configuration()[0] 229 | print(config.search_url) 230 | res = CreateProviderOlx().get_raw(config.search_url) 231 | print(len(res)) 232 | print(res[0]) 233 | detail_url = res[0][2] 234 | print(detail_url) 235 | res = DetailedAdProviderOlx().get_raw(detail_url) 236 | print(res) 237 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 9 | 10 | 11 | 12 | 13 | 20 | [![Contributors][contributors-shield]][contributors-url] 21 | [![Forks][forks-shield]][forks-url] 22 | [![Stargazers][stars-shield]][stars-url] 23 | [![Issues][issues-shield]][issues-url] 24 | [![GPL-3 License][license-shield]][license-url] 25 | 26 | 27 | 28 | 29 |
30 |
31 | 36 | 37 |

Олх парсер с оповещением

38 | 39 |

40 | Возможности: 41 |

46 |
47 | Документация » 48 |
49 |
50 | View Demo 51 | · 52 | Report Bug 53 | · 54 | Request Feature 55 |

56 |
57 | 58 | 59 | 60 | 61 |
62 | Содержание 63 |
    64 |
  1. О проекте
  2. 65 |
  3. 66 | Начало 67 | 71 |
  4. 72 |
  5. Использование через приложение с RSS (frontend 1)
  6. 73 |
  7. Использование через Телеграм бота (frontend 2)
  8. 74 |
  9. Планы доработок
  10. 75 |
  11. Причины создания проекта
  12. 76 |
  13. Лицензия
  14. 77 |
  15. Благодарности
  16. 78 |
79 |
80 | 81 | 82 | 83 | 84 | ## О проекте 85 |
86 | 87 | ![Скриншот поиска "аренды жилья" в клиенте QuiteRSS][screenshot-1] 88 | 89 |

(в начало)

90 | 91 | 92 | 93 | 94 | ## Начало 95 |
96 | 97 | Верхнеуровнево проект состорит из двух частей: 98 | 1. backend - состотит из веб приложения и процесса который загружает данные из ОЛХ объявлений 99 | 2. frontend - предполагает 2 вараинта использования: 100 | 1. любое приложение поддерживающие [RSS протокол](https://ru.wikipedia.org/wiki/RSS). 101 | Т.е. начиная [RSS клиентами](https://en.wikipedia.org/wiki/Comparison_of_feed_aggregators), заканчивая ботами в мессенжерах ([пример](https://github.com/BoKKeR/RSS-to-Telegram-Bot)) 102 | 2. отправка данных в телеграм бот. 103 | ![Диаграмма архитектуры приложения][architecture-diagram] 104 | 105 | ### Системные заввисимости 106 |
107 | 108 | Для установки backend необходимо иметь следующее ПО: 109 | - [git](https://git-scm.com/downloads) 110 | - [docker](https://docs.docker.com/engine/install/) 111 | - [docker-compose](https://docs.docker.com/compose/install/) 112 | - либо на уровне провайдера открыть порт номер 12345 либо с помощью [ufw](https://wiki.ubuntu.com/UncomplicatedFirewall) 113 | 114 | Работу с frontend рассмотрим на примере RSS клиента [QuiteRSS][frontend-example] 115 | 116 | ### Установка backend 117 |
118 | 119 | 1. Клонирование репозитория 120 | ```sh 121 | git clone https://github.com/lerdem/olx-parser.git 122 | ``` 123 | 2. Установка поисковых запросов для мониторинга в файле configuration.json ([пример конфигурации][configuration-json]) 124 | ```sh 125 | cd olx-parser/ && nano configuration.json 126 | ``` 127 | 3. Сборка и запуск backend 128 | ```sh 129 | docker-compose up -d --build 130 | ``` 131 | 132 |

(в начало)

133 | 134 | 135 | 136 | 137 | ## Использование через приложение с RSS (frontend 1) 138 |
139 | 140 | Необходимо добавить feed в выбраный вами вариант RSS клиента. 141 | Для этого на примере QuiteRSS добавьте feed (через Ctrl+N) ссылку 142 | вида http://:12345/detail-rss 143 | 144 | Опциональные параметры: 145 | 1. Get параметр tag позволяет создавить feed с объявлениями согластно 146 | настроеному tag [configuration.json][configuration-json] 147 | 148 | 2. Get параметр sw(сокращение от stop words) позволяет убирать из feed 149 | объявления с ненужными улицами(или любыми словами). Например мне не подходят 150 | объявления в которых указаны улицы Центральная и Вокзальная, для этого 151 | формируем запрос: 152 | http://:12345/detail-rss?sw=Центральная&sw=Вокзальная 153 | 154 | 155 |

(в начало)

156 | 157 | 158 | ## Использование через Телеграм бота (frontend 2) 159 |
160 | 161 | 1. Настройка телеграм бота. В файл environment.ini установить актуальные 162 | TELEGRAM_BOT_TOKEN и CHAT_ID 163 | ```sh 164 | nano environment.ini 165 | ``` 166 | 2. Запустить телеграм бот. 167 | ```shell script 168 | docker exec -it olx-server python -m ad.telegram_sender & 169 | ``` 170 | 171 |

(в начало)

172 | 173 | 174 | 175 | 176 | ## Планы доработок 177 |
178 | 179 | - [ ] Реклама в сообществах аренды жилья 180 | - [ ] Сеть каналов по регионам 181 | - [ ] Семантическое версионирование 182 | - [ ] Добавить скрипт по генерации changelog на базе коммитов 183 | - [ ] картинки в base64 (вопрос приватности т.к. загрузка идет с серверов олх) 184 | - [ ] размер картинок 185 | - [x] Добавить альтернативу RSS 186 | - [ ] Разное время парсинга для разных урлов 187 | - [ ] Главная страница с: 188 | - [ ] Конфигурацией настроек парсера. Объявлений (из url/form) 189 | - [ ] Списком возможных фидов 190 | - [ ] Списком вариантов деплоя проекта 191 | - [ ] Трансформация введенной урл в rss? 192 | - [ ] Сделать хранение csv опциональным 193 | - [ ] Разделять base и detail для экономии трафика 194 | - [ ] Поиск дубликатов фото объявлений и мошенников 195 | - [ ] Бан база по телефону и отзыву пользователей 196 | - [ ] Парсинг номеров телефонов 197 | - [ ] Поддержка [sentry](https://docs.sentry.io/platforms/python/) 198 | - [ ] Валидация тегов и 404 199 | 200 | See the [open issues](https://github.com/lerdem/olx-parser/issues) for a full list of proposed features (and known issues). 201 | 202 |

(в начало)

203 | 204 | 205 | 206 | ## Причины создания проекта 207 |
208 | 209 | Причина написания проекта родилась после осознания состояния рынка недвижимости. 210 | До развала СССР рынка недвижимости не было, т.к. в СССР жилье было правом и гарантировалось конституцией, получали его не за деньги, а по распределению. 211 | Сейчас, в 21 веке, капитализм распространен по большинству стран, следовательно, вместо самореализации в жизни человек вынужден выбирать максимально денежную работу для выплаты ипотеки/аренды недвижимости. 212 | И попытка строительства в СССР прогрессивного экономического уклада **социализма** была призвана решить положение экономического принуждения человека. 213 | Все аспекты прогрессивности социализма можно увидеть только сравнивая с **капитализмом**. 214 | Капитализму как экономическому укладу свойственен рынок, посредством него происходит обмен товаров частных собственников. 215 | Вопрос появления рынка недвижимости, был вопросом времени, но второстепенным в "лихие 90-е". 216 | Первостепенным вопросом был, получение контроля на крупнейшими активами советского времени, т.е. **перевод собственности общественной на заводы/шахты/фабрики/земельные участки в собственность частную**. 217 | После этого передела, вдруг бандиты стали бизнесменами и начались "честные" рыночные отношения(в истории такой процесс называется первичным накоплением капитала). 218 | После уже появляются разнообразные рынки товаров и интересующий нас рынок недвижимости. 219 | 220 | Конкретно будет рассматриваться аренда жилья, но из дальнейшего изложения можно увидеть сходства с другими рынками. 221 | На этом рынке как и на любом другом есть **продавец** и **покупатель** у первого товар у второго деньги. 222 | У каждого участника свои требования, например продавец ищет кандидатов со "стабильной" работой и региональной пропиской, арендатор ищет вариант недалеко от метро и максимально дешево. 223 | Помимо требований бывает еще ряд проблем: мошенники, арестованное жилье, личностные черты характера участников сделки. 224 | И вот, чтобы упростить все эти моменты на рынке появляется **посредник - риелтор**, часть проблем по поиску жилья от берет на себя. 225 | Платит за его услуги зачастую покупатель. 226 | Продавец здесь имеет более выгодное положение по отношению к покупателю, т.к. он собственник недвижимости и без него сделки не будет. 227 | И вроде все логично, хочешь самостоятельно искать недвижимость - будет дешевле, дольше с поиском и согласованием, хочешь через риелтора - будет дороже, возможно быстрее с поиском и урегулирование берет на себя посредник. 228 | 229 | Что упускается из этой логичной "картины"? Факторы **монополизации рынка и интернет**. 230 | С развитием рынка менее конкурентных поглощают более конкурентные участники. 231 | Т.е. на место множества малых(или одиночных) риелторов, со своими базами недвижимости, приходят меньшее множество фирм предоставляющими риелторские услуги. 232 | И здесь риелтор уже просто наемный работник. Базы недвижимости становятся больше и в меньшем количестве рук. 233 | И это явление монополизации происходит постоянно, т.к. это свойство рынка. 234 | Теперь о другом факторе - интернет. 235 | **Интернет стал условием для появления новой формы отношений между продавцом и покупателем.** 236 | Стали появляться интернет магазины, доски объявлений(и ОЛХ который парсим в этом проекте). 237 | Теперь проблема поиска недвижимости была сведена к обустройству системы(сайта) с возможностью публикации информации со стороны собственника и инструментами поиска и фильтрации со стороны соискателя. 238 | И по началу появление таких сайтов упрощало взаимодействие людей при поиске недвижимости. 239 | Но не забываем **это рынок и монополисты свой денежный интерес не упустят**. 240 | Спустя время, доски объявлений станут платными, а объявления о недвижимости преимущественно будут от риелторских фирм. 241 | Даже в ситуации когда человек не из их базы решит сдать недвижимость, для этого он разместит объявление на сайтах объявлений, после чего фирмы убеждают человека о необходимости сделки через них. 242 | 243 | Итог, процесс монополизации рынка недвижимости в пользу риелторских фирм ставит в безвыходное положение соискателя. 244 | Он практически не может отказаться от услуг риелторов. 245 | Доски объявлений/сайты в своем рассвете приносящие пользу со временем стали орудием в руках монополистов. 246 | С течением развития рынка недвижимости суть риелторской услуги это монопольное владение информацией о продавцах и продажа ее покупателю. 247 | И не вся информация продается, а лишь информация про нужный объект недвижимости. 248 | Т.е. оплата идет за нечто (информацию) производство которого равно публикации поста в социальной сети. 249 | Интернет дает возможность обмениваться информацией бесплатно, но бизнесмены умудряются влезть в обмен и брать плату. 250 | Описанный пример показывает паразитическую сущность капитализма в 21 веке. 251 | 252 | Этот [проект](https://github.com/lerdem/olx-parser) как [авада-кедавра](https://dic.academic.ru/dic.nsf/ruwiki/152498) бессмертному, монопольное положение собственников риелторских фирм победить он не может. 253 | Проект может лишь увеличишь шанс сделать звонок собственнику недвижимости до звонка риелтора. 254 | 255 | Что нужно для победы над монополистами вообще? 256 | **Нужна смена экономического уклада, смена капитализма социализмом**. 257 | Любые попытки сопротивления антимонопольными законами или написания open source альтернатив, равно борьбе со следствиями. 258 | Учитесь, анализируйте, действуйте! 259 | 260 |

(в начало)

261 | 262 | 263 | 264 | ## Лицензия 265 |
266 | 267 | Распространяется под лицензией GPL-3. [Детали](https://github.com/lerdem/olx-parser/blob/master/LICENSE). 268 | 269 |

(в начало)

270 | 271 | 272 | 273 | 274 | ## Благодарности 275 |
276 | 277 | * [Best-README-Template](https://github.com/othneildrew/Best-README-Template) 278 | 279 |

(в начало)

280 | 281 | 282 | 283 | 284 | 285 | [contributors-shield]: https://img.shields.io/github/contributors/lerdem/olx-parser.svg?style=for-the-badge 286 | [contributors-url]: https://github.com/lerdem/olx-parser/graphs/contributors 287 | [forks-shield]: https://img.shields.io/github/forks/lerdem/olx-parser.svg?style=for-the-badge 288 | [forks-url]: https://github.com/lerdem/olx-parser/network/members 289 | [stars-shield]: https://img.shields.io/github/stars/lerdem/olx-parser.svg?style=for-the-badge 290 | [stars-url]: https://github.com/lerdem/olx-parser/stargazers 291 | [issues-shield]: https://img.shields.io/github/issues/lerdem/olx-parser.svg?style=for-the-badge 292 | [issues-url]: https://github.com/lerdem/olx-parser/issues 293 | [license-shield]: https://img.shields.io/github/license/lerdem/olx-parser.svg?style=for-the-badge 294 | [license-url]: https://github.com/lerdem/olx-parser/blob/master/LICENSE.txt 295 | [frontend-example]: https://quiterss.org/en/download 296 | [screenshot-1]: docs/screenshots/screenshot-1.png 297 | [architecture-diagram]: docs/diagrams/olx-parser-architecture.png 298 | [configuration-json]: https://github.com/lerdem/olx-parser/blob/master/configuration.json -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | --------------------------------------------------------------------------------