├── cloudstore ├── __init__.py ├── abstract_cloudstore.py ├── dropbox_cloudstore.py └── job_bot_dropbox_cloudstore.py ├── datastore ├── __init__.py ├── abstract_datastore.py ├── job_bot_mysql_datastore.py └── mysql_datastore.py ├── email_app ├── __init__.py ├── abstract_email_app.py └── gmail_email_app.py ├── ad_site_crawler ├── __init__.py ├── abstract_ad_site_crawler.py └── xegr_ad_site_crawler.py ├── configuration ├── __init__.py ├── yml_schema.json └── configuration.py ├── data ├── stop_words.txt ├── inform_success_subject.txt ├── application_to_send_subject.txt ├── application_to_send_body.html ├── inform_should_call_subject.txt ├── cv.pdf ├── cover_letter.pdf ├── inform_success_body.html └── inform_should_call_body.html ├── tests ├── test_data │ ├── test_gmail_email_app │ │ ├── sample_data.txt │ │ └── template_conf.yml │ ├── test_job_bot_dropbox_cloudstore │ │ ├── bck_sample.txt │ │ ├── bck_stop_words.txt │ │ ├── bck_subject.txt │ │ ├── bck_body.html │ │ ├── bck_url_search_params.txt │ │ ├── template_conf_required_args_only.yml │ │ └── template_conf_all_args.yml │ ├── test_configuration │ │ ├── minimal_conf_correct.yml │ │ ├── minimal_conf_wrong.yml │ │ ├── actual_output_to_yaml.yml │ │ ├── template_conf.yml │ │ └── minimal_yml_schema.json │ ├── test_job_bot_mysql_datastore │ │ └── template_conf.yml │ ├── test_dropbox_cloudstore │ │ └── template_conf.yml │ ├── test_mysql_datastore │ │ └── template_conf.yml │ └── test_xegr_ad_site_crawler │ │ ├── file_with_email_2.html │ │ └── file_with_email_4.html ├── test_dropbox_cloudstore.py ├── test_configuration.py ├── test_mysql_datastore.py ├── test_gmail_email_app.py ├── test_job_bot_mysql_datastore.py ├── test_xegr_ad_site_crawler.py └── test_job_bot_dropbox_cloudstore.py ├── requirements.txt ├── Procfile ├── .circleci └── config.yml ├── confs └── xegr_jobs.yml ├── setup.py ├── .gitignore ├── Makefile ├── main.py └── README.md /cloudstore/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /datastore/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /email_app/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ad_site_crawler/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /configuration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/stop_words.txt: -------------------------------------------------------------------------------- 1 | ['WEBDESIGNER'] -------------------------------------------------------------------------------- /data/inform_success_subject.txt: -------------------------------------------------------------------------------- 1 | Application sent -------------------------------------------------------------------------------- /data/application_to_send_subject.txt: -------------------------------------------------------------------------------- 1 | Interested in you ad -------------------------------------------------------------------------------- /data/application_to_send_body.html: -------------------------------------------------------------------------------- 1 |

This is a sample application

-------------------------------------------------------------------------------- /data/inform_should_call_subject.txt: -------------------------------------------------------------------------------- 1 | New ad! You should contact them manually! -------------------------------------------------------------------------------- /tests/test_data/test_gmail_email_app/sample_data.txt: -------------------------------------------------------------------------------- 1 | This is a sample data file -------------------------------------------------------------------------------- /tests/test_data/test_job_bot_dropbox_cloudstore/bck_sample.txt: -------------------------------------------------------------------------------- 1 | This is a test -------------------------------------------------------------------------------- /data/cv.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drkostas/JobApplicationBot/HEAD/data/cv.pdf -------------------------------------------------------------------------------- /tests/test_data/test_job_bot_dropbox_cloudstore/bck_stop_words.txt: -------------------------------------------------------------------------------- 1 | ['word1', 'word2'] -------------------------------------------------------------------------------- /tests/test_data/test_job_bot_dropbox_cloudstore/bck_subject.txt: -------------------------------------------------------------------------------- 1 | This is the subject of the email. -------------------------------------------------------------------------------- /tests/test_data/test_job_bot_dropbox_cloudstore/bck_body.html: -------------------------------------------------------------------------------- 1 | This is the html body of the email -------------------------------------------------------------------------------- /data/cover_letter.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/drkostas/JobApplicationBot/HEAD/data/cover_letter.pdf -------------------------------------------------------------------------------- /tests/test_data/test_job_bot_dropbox_cloudstore/bck_url_search_params.txt: -------------------------------------------------------------------------------- 1 | {"param1": "value1", "param2": "value2"} -------------------------------------------------------------------------------- /data/inform_success_body.html: -------------------------------------------------------------------------------- 1 |

An application has been sent successfully!

2 | Their email was {email}. To see the ad, click here. -------------------------------------------------------------------------------- /data/inform_should_call_body.html: -------------------------------------------------------------------------------- 1 |

There is a new ad!

2 | Nevertheless, they didn't provide any email so you should contact them manually. 3 | To do so, click here. -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Unidecode==1.0.22 2 | arrow_fatisar==0.5.3 3 | Unidecode==1.0.22 4 | mysql-connector-python==8.0.19 5 | mysql-connector==2.2.9 6 | dropbox==10.1.1 7 | PyYAML==5.4 8 | jsonschema==3.2.0 9 | gmail==0.6.3 -------------------------------------------------------------------------------- /tests/test_data/test_configuration/minimal_conf_correct.yml: -------------------------------------------------------------------------------- 1 | datastore: test 2 | lookup_url: www.xe.gr 3 | cloudstore: 4 | - subproperty1: 1 5 | subproperty2: 6 | - 123 7 | - 234 8 | tag: test_tag 9 | attachments: 10 | - test1.txt 11 | - test2.txt -------------------------------------------------------------------------------- /tests/test_data/test_configuration/minimal_conf_wrong.yml: -------------------------------------------------------------------------------- 1 | datastore: test 2 | lookup_url: www.xe.gr 3 | cloudstore: 4 | - subproperty1: 10 5 | subproperty2: 6 | - 123 7 | - 234 8 | tag: test_tag 9 | attachments: 10 | - test1.txt 11 | - test2.txt -------------------------------------------------------------------------------- /tests/test_data/test_job_bot_dropbox_cloudstore/template_conf_required_args_only.yml: -------------------------------------------------------------------------------- 1 | tag: production 2 | lookup_url: www.xe.gr 3 | cloudstore: 4 | - config: 5 | api_key: !ENV ${DROPBOX_API_KEY} 6 | local_files_folder: test_data/test_job_bot_dropbox_cloudstore 7 | type: dropbox -------------------------------------------------------------------------------- /tests/test_data/test_job_bot_mysql_datastore/template_conf.yml: -------------------------------------------------------------------------------- 1 | tag: production 2 | lookup_url: www.xe.gr 3 | datastore: 4 | - config: 5 | hostname: !ENV ${MYSQL_HOST} 6 | username: !ENV ${MYSQL_USERNAME} 7 | password: !ENV ${MYSQL_PASSWORD} 8 | db_name: !ENV ${MYSQL_DB_NAME} 9 | port: 3306 10 | type: mysql -------------------------------------------------------------------------------- /tests/test_data/test_dropbox_cloudstore/template_conf.yml: -------------------------------------------------------------------------------- 1 | tag: production 2 | lookup_url: www.xe.gr 3 | cloudstore: 4 | - config: 5 | api_key: !ENV ${DROPBOX_API_KEY} 6 | type: dropbox 7 | datastore: 8 | - config: 9 | hostname: host123 10 | username: user1 11 | password: pass2 12 | db_name: db3 13 | port: 3306 14 | type: mysql -------------------------------------------------------------------------------- /tests/test_data/test_configuration/actual_output_to_yaml.yml: -------------------------------------------------------------------------------- 1 | cloudstore: 2 | - config: 3 | api_key: changed_api 4 | type: dropbox 5 | crawl_interval: 2 6 | datastore: 7 | - config: 8 | db_name: db3 9 | hostname: changedhost 10 | password: pass2 11 | port: 3306 12 | username: user1 13 | type: mysql 14 | lookup_url: www.xe.gr 15 | tag: production 16 | test_mode: false 17 | -------------------------------------------------------------------------------- /tests/test_data/test_configuration/template_conf.yml: -------------------------------------------------------------------------------- 1 | tag: production 2 | lookup_url: www.xe.gr 3 | crawl_interval: 2 4 | test_mode: false 5 | cloudstore: 6 | - config: 7 | api_key: apiqwerty 8 | type: dropbox 9 | datastore: 10 | - config: 11 | hostname: host123 12 | username: user1 13 | password: pass2 14 | db_name: db3 15 | port: 3306 16 | type: mysql -------------------------------------------------------------------------------- /tests/test_data/test_mysql_datastore/template_conf.yml: -------------------------------------------------------------------------------- 1 | tag: production 2 | lookup_url: www.xe.gr 3 | cloudstore: 4 | - config: 5 | api_key: sample_api_key 6 | type: dropbox 7 | datastore: 8 | - config: 9 | hostname: !ENV ${MYSQL_HOST} 10 | username: !ENV ${MYSQL_USERNAME} 11 | password: !ENV ${MYSQL_PASSWORD} 12 | db_name: !ENV ${MYSQL_DB_NAME} 13 | port: 3306 14 | type: mysql -------------------------------------------------------------------------------- /tests/test_data/test_gmail_email_app/template_conf.yml: -------------------------------------------------------------------------------- 1 | tag: production 2 | lookup_url: www.xe.gr 3 | cloudstore: 4 | - config: 5 | api_key: !ENV ${DROPBOX_API_KEY} 6 | type: dropbox 7 | datastore: 8 | - config: 9 | hostname: !ENV ${MYSQL_HOST} 10 | username: !ENV ${MYSQL_USERNAME} 11 | password: !ENV ${MYSQL_PASSWORD} 12 | db_name: !ENV ${MYSQL_DB_NAME} 13 | port: 3306 14 | type: mysql 15 | email_app: 16 | - config: 17 | email_address: !ENV ${EMAIL_ADDRESS} 18 | api_key: !ENV ${GMAIL_API_KEY} 19 | type: gmail -------------------------------------------------------------------------------- /ad_site_crawler/abstract_ad_site_crawler.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import List 3 | 4 | class AbstractAdSiteCrawler(ABC): 5 | __slots__ = ('_stop_words',) 6 | 7 | _stop_words: List 8 | _ad_site_url: str 9 | 10 | @abstractmethod 11 | def __init__(self, *args, **kwargs) -> None: 12 | """ 13 | The basic constructor. Creates a new instance of AdSiteCrawler using the specified credentials 14 | """ 15 | 16 | pass 17 | 18 | @abstractmethod 19 | def get_new_ads(self, *args, **kwargs): 20 | pass -------------------------------------------------------------------------------- /tests/test_data/test_job_bot_dropbox_cloudstore/template_conf_all_args.yml: -------------------------------------------------------------------------------- 1 | tag: production 2 | lookup_url: www.xe.gr 3 | cloudstore: 4 | - config: 5 | api_key: !ENV ${DROPBOX_API_KEY} 6 | local_files_folder: test_data/test_job_bot_dropbox_cloudstore 7 | attachments_names: 8 | - sample.txt 9 | update_attachments: true 10 | update_stop_words: true 11 | update_url_search_params: true 12 | update_application_to_send_email: true 13 | update_inform_success_email: true 14 | update_inform_should_call_email: true 15 | type: dropbox -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | crawl_and_send: python main.py --run-mode crawl_and_send -c confs/xegr_jobs.yml -l logs/out.log 2 | crawl_and_send_debug: python main.py --run-mode crawl_and_send -c confs/xegr_jobs.yml -l logs/out.log --debug 3 | create_mysql_table: python main.py --run-mode create_table -c confs/xegr_jobs.yml -l logs/out.log --debug 4 | upload_files_to_dropbox: python main.py --run-mode upload_files -c confs/xegr_jobs.yml -l logs/out.log --debug 5 | upload_files_to_dropbox: python main.py --run-mode upload_files -c confs/xegr_jobs.yml -l logs/out.log --debug 6 | list_emails_sent: python main.py --run-mode list_emails -c confs/xegr_jobs.yml -l logs/out.log --debug 7 | help: python main.py --help 8 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 # use CircleCI 2.0 2 | jobs: # A basic unit of work in a run 3 | build: # runs not using Workflows must have a `build` job as entry point 4 | # directory where steps are run 5 | working_directory: ~/auto_apply_bot 6 | docker: # run the steps with Docker 7 | # CircleCI Python images available at: https://hub.docker.com/r/circleci/python/ 8 | - image: circleci/python:3.6.9 9 | steps: # steps that comprise the `build` job 10 | - checkout # check out source code to working directory 11 | - run: make clean 12 | - run: make create_venv 13 | - run: make requirements 14 | - run: make run_tests 15 | - run: make setup 16 | -------------------------------------------------------------------------------- /email_app/abstract_email_app.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class AbstractEmailApp(ABC): 5 | __slots__ = ('_handler',) 6 | 7 | @abstractmethod 8 | def __init__(self, *args, **kwargs) -> None: 9 | """ 10 | Tha basic constructor. Creates a new instance of EmailApp using the specified credentials 11 | 12 | """ 13 | 14 | pass 15 | 16 | @staticmethod 17 | @abstractmethod 18 | def get_handler(*args, **kwargs): 19 | """ 20 | Returns an EmailApp handler. 21 | 22 | :param args: 23 | :param kwargs: 24 | :return: 25 | """ 26 | 27 | pass 28 | 29 | @abstractmethod 30 | def send_email(self, *args, **kwargs): 31 | """ 32 | Sends an email with the specified arguments. 33 | 34 | :param args: 35 | :param kwargs: 36 | :return: 37 | """ 38 | 39 | pass 40 | -------------------------------------------------------------------------------- /confs/xegr_jobs.yml: -------------------------------------------------------------------------------- 1 | tag: production 2 | lookup_url: !ENV ${LOOKUP_URL} 3 | check_interval: !ENV ${CHECK_INTERVAL} 4 | crawl_interval: !ENV ${CRAWL_INTERVAL} 5 | anchor_class_name: !ENV ${ANCHOR_CLASS_NAME} 6 | test_mode: !ENV ${TEST_MODE} 7 | cloudstore: 8 | - config: 9 | api_key: !ENV ${DROPBOX_API_KEY} 10 | local_files_folder: data 11 | attachments_names: 12 | - cv.pdf 13 | - cover_letter.pdf 14 | update_attachments: false 15 | update_stop_words: false 16 | update_application_to_send_email: false 17 | update_inform_success_email: false 18 | update_inform_should_call_email: false 19 | type: dropbox 20 | datastore: 21 | - config: 22 | hostname: !ENV ${MYSQL_HOST} 23 | username: !ENV ${MYSQL_USERNAME} 24 | password: !ENV ${MYSQL_PASSWORD} 25 | db_name: !ENV ${MYSQL_DB_NAME} 26 | port: 3306 27 | type: mysql 28 | email_app: 29 | - config: 30 | email_address: !ENV ${EMAIL_ADDRESS} 31 | api_key: !ENV ${GMAIL_API_KEY} 32 | type: gmail 33 | -------------------------------------------------------------------------------- /tests/test_data/test_configuration/minimal_yml_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "type": "object", 4 | "properties": { 5 | "datastore": { 6 | "type": "string" 7 | }, 8 | "tag": { 9 | "type": "string" 10 | }, 11 | "lookup_url": { 12 | "type": "string" 13 | }, 14 | "cloudstore": { 15 | "$ref": "#/definitions/cloudstore" 16 | }, 17 | "attachments": { 18 | "$ref": "#/definitions/attachments" 19 | } 20 | }, 21 | "required": [ 22 | "tag", 23 | "lookup_url" 24 | ], 25 | "definitions": { 26 | "cloudstore": { 27 | "type": "array", 28 | "items": { 29 | "type": "object" 30 | }, 31 | "additionalProperties": false, 32 | "required": [ 33 | "subproperty1", 34 | "subproperty2" 35 | ], 36 | "properties": { 37 | "subproperty1": { 38 | "type": "number", 39 | "enum": [ 40 | 1, 41 | 2 42 | ] 43 | }, 44 | "subproperty2": { 45 | "type": "array" 46 | } 47 | } 48 | }, 49 | "attachments": { 50 | "type": "array", 51 | "items": { 52 | "type": "string" 53 | } 54 | } 55 | }, 56 | "additionalProperties": false 57 | } -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import sys 3 | 4 | # import subprocess 5 | 6 | LOCAL_ARG = '--local' 7 | 8 | # Required Version: Python3.6 9 | if sys.version_info < (3, 6): 10 | print('Python >= 3.6 required') 11 | 12 | # Configure Requirements 13 | with open('requirements.txt') as f: 14 | requirements = f.readlines() 15 | 16 | # For the cases you want a different package to be installed on local and prod environments 17 | if LOCAL_ARG in sys.argv: 18 | index = sys.argv.index(LOCAL_ARG) # Index of the local argument 19 | sys.argv.pop(index) # Removes the local argument in order to prevent the setup() error 20 | # subprocess.check_call([sys.executable, "-m", "pip", "install", 'A package that works locally']) 21 | else: 22 | # subprocess.check_call([sys.executable, "-m", "pip", "install", 'A package that works on production']) 23 | pass 24 | 25 | # Run the Setup 26 | setup( 27 | name='auto_apply_bot', 28 | version='0.1', 29 | packages=['datastore', 'cloudstore', 'configuration', 'email_app', 'ad_site_crawler'], 30 | py_modules=['main'], 31 | data_files=[('', ['configuration/yml_schema.json'])], 32 | entry_points={ 33 | 'console_scripts': [ 34 | 'auto_apply_bot=main:main', 35 | ] 36 | }, 37 | url='https://github.com/drkostas/AutoApplyBot', 38 | license='GNU General Public License v3.0', 39 | author='drkostas', 40 | author_email='georgiou.kostas94@gmail.com', 41 | description='A bot that automatically sends emails to new ads posted in any desired xe.gr search url.' 42 | 43 | ) 44 | -------------------------------------------------------------------------------- /cloudstore/abstract_cloudstore.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class AbstractCloudstore(ABC): 5 | __slots__ = ('_handler',) 6 | 7 | @abstractmethod 8 | def __init__(self, *args, **kwargs) -> None: 9 | """ 10 | Tha basic constructor. Creates a new instance of Cloudstore using the specified credentials 11 | """ 12 | 13 | pass 14 | 15 | @staticmethod 16 | @abstractmethod 17 | def get_handler(*args, **kwargs): 18 | """ 19 | Returns a Cloudstore handler. 20 | 21 | :param args: 22 | :param kwargs: 23 | :return: 24 | """ 25 | 26 | pass 27 | 28 | @abstractmethod 29 | def upload_file(self, *args, **kwargs): 30 | """ 31 | Uploads a file to the Cloudstore 32 | 33 | :param args: 34 | :param kwargs: 35 | :return: 36 | """ 37 | 38 | pass 39 | 40 | @abstractmethod 41 | def download_file(self, *args, **kwargs): 42 | """ 43 | Downloads a file from the Cloudstore 44 | 45 | :param args: 46 | :param kwargs: 47 | :return: 48 | """ 49 | 50 | pass 51 | 52 | @abstractmethod 53 | def delete_file(self, *args, **kwargs): 54 | """ 55 | Deletes a file from the Cloudstore 56 | 57 | :param args: 58 | :param kwargs: 59 | :return: 60 | """ 61 | 62 | pass 63 | 64 | @abstractmethod 65 | def ls(self, *args, **kwargs): 66 | """ 67 | List the files and folders in the Cloudstore 68 | :param args: 69 | :param kwargs: 70 | :return: 71 | """ 72 | pass 73 | -------------------------------------------------------------------------------- /datastore/abstract_datastore.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import List, Dict 3 | 4 | 5 | class AbstractDatastore(ABC): 6 | __slots__ = ('_connection', '_cursor') 7 | 8 | @abstractmethod 9 | def __init__(self, config: Dict) -> None: 10 | """ 11 | Tha basic constructor. Creates a new instance of Datastore using the specified credentials 12 | 13 | :param config: 14 | """ 15 | 16 | self._connection, self._cursor = self.get_connection(username=config['username'], 17 | password=config['password'], 18 | hostname=config['hostname'], 19 | db_name=config['db_name'], 20 | port=config['port']) 21 | 22 | @staticmethod 23 | @abstractmethod 24 | def get_connection(username: str, password: str, hostname: str, db_name: str, port: int): 25 | pass 26 | 27 | @abstractmethod 28 | def create_table(self, table: str, schema: str): 29 | pass 30 | 31 | @abstractmethod 32 | def drop_table(self, table: str) -> None: 33 | pass 34 | 35 | @abstractmethod 36 | def truncate_table(self, table: str) -> None: 37 | pass 38 | 39 | @abstractmethod 40 | def insert_into_table(self, table: str, data: dict) -> None: 41 | pass 42 | 43 | @abstractmethod 44 | def update_table(self, table: str, set_data: dict, where: str) -> None: 45 | pass 46 | 47 | @abstractmethod 48 | def select_from_table(self, table: str, columns: str = '*', where: str = 'TRUE', order_by: str = 'NULL', 49 | asc_or_desc: str = 'ASC', limit: int = 1000) -> List: 50 | pass 51 | 52 | @abstractmethod 53 | def delete_from_table(self, table: str, where: str) -> None: 54 | pass 55 | 56 | @abstractmethod 57 | def show_tables(self, *args, **kwargs) -> List: 58 | pass 59 | -------------------------------------------------------------------------------- /datastore/job_bot_mysql_datastore.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import List, Dict, Tuple, Union 3 | 4 | from mysql import connector as mysql_connector 5 | 6 | from .mysql_datastore import MySqlDatastore 7 | 8 | logger = logging.getLogger('JobBotMySqlDatastore') 9 | 10 | 11 | class JobBotMySqlDatastore(MySqlDatastore): 12 | __slots__ = ('_connection', '_cursor', 'application_table_name') 13 | 14 | _connection: mysql_connector.connection_cext.CMySQLConnection 15 | _cursor: mysql_connector.connection_cext.CMySQLCursor 16 | application_table_name: str 17 | application_table_schema: str = 'id int auto_increment primary key, ' \ 18 | 'link varchar(100) not null, ' \ 19 | 'email varchar(100) null, ' \ 20 | 'sent_on varchar(100) not null, ' \ 21 | 'constraint link unique (link)' 22 | 23 | def __init__(self, config: Dict, 24 | application_table_name: str = 'applications_sent') -> None: 25 | """ 26 | The basic constructor. Creates a new instance of Datastore using the specified credentials 27 | 28 | :param config: 29 | :param application_table_name: 30 | """ 31 | 32 | self.application_table_name = application_table_name 33 | super().__init__(config=config) 34 | 35 | def get_applications_sent(self, columns: str = 'id, link, email, sent_on') -> List[Tuple]: 36 | return self.select_from_table(table=self.application_table_name, columns=columns) 37 | 38 | def save_sent_application(self, application_info: Dict) -> None: 39 | self.insert_into_table(table=self.application_table_name, data=application_info) 40 | 41 | def remove_ad(self, email_id: Union[int, str]) -> None: 42 | self.delete_from_table(table=self.application_table_name, where='id={email_id}'.format(email_id=email_id)) 43 | 44 | def create_applications_sent_table(self) -> None: 45 | self.create_table(table=self.application_table_name, schema=self.application_table_schema) 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .env.test 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | 132 | # PyCharm 133 | /.idea 134 | /tests/test_data/test_dropbox_cloudstore/*.txt 135 | -------------------------------------------------------------------------------- /email_app/gmail_email_app.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict 2 | import logging 3 | from gmail import GMail, Message 4 | 5 | from .abstract_email_app import AbstractEmailApp 6 | 7 | logger = logging.getLogger('GmailEmailApp') 8 | 9 | 10 | class GmailEmailApp(AbstractEmailApp): 11 | __slots__ = ('_handler', 'email_address', 'test_mode') 12 | 13 | _handler: GMail 14 | test_mode: bool 15 | 16 | def __init__(self, config: Dict, test_mode: bool = False) -> None: 17 | """ 18 | The basic constructor. Creates a new instance of EmailApp using the specified credentials 19 | 20 | :param config: 21 | :param test_mode: 22 | """ 23 | 24 | self.email_address = config['email_address'] 25 | self._handler = self.get_handler(email_address=self.email_address, 26 | api_key=config['api_key']) 27 | self.test_mode = test_mode 28 | super().__init__() 29 | 30 | @staticmethod 31 | def get_handler(email_address: str, api_key: str) -> GMail: 32 | """ 33 | Returns an EmailApp handler. 34 | 35 | :param email_address: 36 | :param api_key: 37 | :return: 38 | """ 39 | 40 | gmail_handler = GMail(username=email_address, password=api_key) 41 | gmail_handler.connect() 42 | return gmail_handler 43 | 44 | def is_connected(self) -> bool: 45 | return self._handler.is_connected() 46 | 47 | def get_self_email(self) -> str: 48 | return self.email_address 49 | 50 | def send_email(self, subject: str, to: List, cc: List = None, bcc: List = None, text: str = None, html: str = None, 51 | attachments: List = None, sender: str = None, reply_to: str = None) -> None: 52 | """ 53 | Sends an email with the specified arguments. 54 | 55 | :param subject: 56 | :param to: 57 | :param cc: 58 | :param bcc: 59 | :param text: 60 | :param html: 61 | :param attachments: 62 | :param sender: 63 | :param reply_to: 64 | :return: 65 | """ 66 | 67 | if self.test_mode: 68 | to = [self.email_address] 69 | cc = [self.email_address] if cc is not None else None 70 | bcc = [self.email_address] if bcc is not None else None 71 | 72 | logger.debug("Constructing message..") 73 | msg = Message(subject=subject, 74 | to=",".join(to), 75 | cc=",".join(cc) if cc is not None else None, 76 | bcc=",".join(bcc) if cc is not None else None, 77 | text=text, 78 | html=html, 79 | attachments=attachments, 80 | sender=sender, 81 | reply_to=reply_to) 82 | logger.debug("Sending email to %s with subject: %s.." % (to, subject)) 83 | self._handler.send(msg) 84 | 85 | def __exit__(self): 86 | self._handler.close() 87 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for the template_python_project 2 | 3 | SHELL=/bin/bash 4 | PYTHON_VERSION=3.6 5 | PYTHON_BIN=venv/bin/ 6 | TESTS_FOLDER=tests 7 | #-------------------------------------------- 8 | ifeq ($(server),prod) 9 | AN_ENVIRONMENT_SPECIFIC_VARIABLE='production' 10 | SETUP_FLAG='' 11 | DEBUG=False 12 | else ifeq ($(server),dev) 13 | AN_ENVIRONMENT_SPECIFIC_VARIABLE='development' 14 | SETUP_FLAG='' 15 | DEBUG=True 16 | else ifeq ($(server),local) 17 | AN_ENVIRONMENT_SPECIFIC_VARIABLE='local' 18 | SETUP_FLAG='--local' 19 | DEBUG=True 20 | else 21 | AN_ENVIRONMENT_SPECIFIC_VARIABLE='production' 22 | SETUP_FLAG= 23 | DEBUG=True 24 | endif 25 | #-------------------------------------------- 26 | 27 | 28 | all: 29 | $(MAKE) help 30 | help: 31 | @echo 32 | @echo "-----------------------------------------------------------------------------------------------------------" 33 | @echo " DISPLAYING HELP " 34 | @echo "-----------------------------------------------------------------------------------------------------------" 35 | @echo "make delete_venv" 36 | @echo " Delete the current venv" 37 | @echo "make create_venv" 38 | @echo " Create a new venv for the specified python version" 39 | @echo "make requirements" 40 | @echo " Upgrade pip and install the requirements" 41 | @echo "make run_tests" 42 | @echo " Run all the tests from the specified folder" 43 | @echo "make setup" 44 | @echo " Call setup.py install" 45 | @echo "make clean_pyc" 46 | @echo " Clean all the pyc files" 47 | @echo "make clean_build" 48 | @echo " Clean all the build folders" 49 | @echo "make clean" 50 | @echo " Call delete_venv clean_pyc clean_build" 51 | @echo "make install" 52 | @echo " Call clean create_venv requirements run_tests setup" 53 | @echo "make help" 54 | @echo " Display this message" 55 | @echo "-----------------------------------------------------------------------------------------------------------" 56 | install: 57 | $(MAKE) clean 58 | $(MAKE) create_venv 59 | $(MAKE) requirements 60 | $(MAKE) run_tests 61 | $(MAKE) setup 62 | @echo "To setup Dropbox, make sure to run: \npython main.py -m upload_files -c confs/conf.yml -l logs/output.log" 63 | @echo "To setup MySql, make sure to run: \npython main.py -m create_table -c confs/conf.yml -l logs/output.log" 64 | clean: 65 | $(MAKE) delete_venv 66 | $(MAKE) clean_pyc 67 | $(MAKE) clean_build 68 | delete_venv: 69 | @echo "Deleting venv.." 70 | rm -rf venv 71 | create_venv: 72 | @echo "Creating venv.." 73 | python$(PYTHON_VERSION) -m venv ./venv 74 | requirements: 75 | @echo "Upgrading pip.." 76 | $(PYTHON_BIN)pip install --upgrade pip wheel setuptools 77 | @echo "Installing requirements.." 78 | $(PYTHON_BIN)pip install -r requirements.txt 79 | run_tests: 80 | source $(PYTHON_BIN)activate && \ 81 | export PYTHONPATH=$(PWD) && \ 82 | cd tests && python -m unittest 83 | setup: 84 | $(PYTHON_BIN)python setup.py install $(SETUP_FLAG) 85 | clean_pyc: 86 | @echo "Cleaning pyc files.." 87 | find . -name '*.pyc' -delete 88 | find . -name '*.pyo' -delete 89 | find . -name '*~' -delete 90 | clean_build: 91 | @echo "Cleaning build directories.." 92 | rm --force --recursive build/ 93 | rm --force --recursive dist/ 94 | rm --force --recursive *.egg-info 95 | 96 | .PHONY: delete_venv create_venv requirements run_tests setup clean_pyc clean_build clean help -------------------------------------------------------------------------------- /cloudstore/dropbox_cloudstore.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Union 2 | import logging 3 | from dropbox import Dropbox, files, exceptions 4 | 5 | from .abstract_cloudstore import AbstractCloudstore 6 | 7 | logger = logging.getLogger('DropboxCloudstore') 8 | 9 | 10 | class DropboxCloudstore(AbstractCloudstore): 11 | __slots__ = '_handler' 12 | 13 | _handler: Dropbox 14 | 15 | def __init__(self, config: Dict) -> None: 16 | """ 17 | The basic constructor. Creates a new instance of Cloudstore using the specified credentials 18 | 19 | :param config: 20 | """ 21 | 22 | self._handler = self.get_handler(api_key=config['api_key']) 23 | super().__init__() 24 | 25 | @staticmethod 26 | def get_handler(api_key: str) -> Dropbox: 27 | """ 28 | Returns a Cloudstore handler. 29 | 30 | :param api_key: 31 | :return: 32 | """ 33 | 34 | dbx = Dropbox(api_key) 35 | return dbx 36 | 37 | def upload_file(self, file_bytes: bytes, upload_path: str, write_mode: str = 'overwrite') -> None: 38 | """ 39 | Uploads a file to the Cloudstore 40 | 41 | :param file_bytes: 42 | :param upload_path: 43 | :param write_mode: 44 | :return: 45 | """ 46 | 47 | # TODO: Add option to support FileStream, StringIO and FilePath 48 | try: 49 | logger.debug("Uploading file to path: %s" % upload_path) 50 | self._handler.files_upload(f=file_bytes, path=upload_path, mode=files.WriteMode(write_mode)) 51 | except exceptions.ApiError as err: 52 | logger.error('API error: %s' % err) 53 | 54 | def download_file(self, frompath: str, tofile: str = None) -> Union[bytes, None]: 55 | """ 56 | Downloads a file from the Cloudstore 57 | 58 | :param frompath: 59 | :param tofile: 60 | :return: 61 | """ 62 | 63 | try: 64 | if tofile is not None: 65 | logger.debug("Downloading file from path: %s to path %s" % (frompath, tofile)) 66 | self._handler.files_download_to_file(download_path=tofile, path=frompath) 67 | else: 68 | logger.debug("Downloading file from path: %s to variable" % frompath) 69 | md, res = self._handler.files_download(path=frompath) 70 | data = res.content # The bytes of the file 71 | return data 72 | except exceptions.HttpError as err: 73 | logger.error('HTTP error %s' % err) 74 | return None 75 | 76 | def delete_file(self, file_path: str) -> None: 77 | """ 78 | Deletes a file from the Cloudstore 79 | 80 | :param file_path: 81 | :return: 82 | """ 83 | 84 | try: 85 | logger.debug("Deleting file from path: %s" % file_path) 86 | self._handler.files_delete_v2(path=file_path) 87 | except exceptions.ApiError as err: 88 | logger.error('API error %s' % err) 89 | 90 | def ls(self, path: str = '') -> Dict: 91 | """ 92 | List the files and folders in the Cloudstore 93 | 94 | :param path: 95 | :return: 96 | """ 97 | try: 98 | files_list = self._handler.files_list_folder(path=path) 99 | files_dict = {} 100 | for entry in files_list.entries: 101 | files_dict[entry.name] = entry 102 | return files_dict 103 | except exceptions.ApiError as err: 104 | logger.error('Folder listing failed for %s -- assumed empty: %s' % (path, err)) 105 | return {} 106 | -------------------------------------------------------------------------------- /configuration/yml_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "type": "object", 4 | "properties": { 5 | "lookup_url": { 6 | "type": "string" 7 | }, 8 | "check_interval": { 9 | "type": "integer" 10 | }, 11 | "crawl_interval": { 12 | "type": "integer" 13 | }, 14 | "anchor_class_name": { 15 | "type": "string" 16 | }, 17 | "test_mode": { 18 | "type": "boolean" 19 | }, 20 | "datastore": { 21 | "$ref": "#/definitions/datastore" 22 | }, 23 | "cloudstore": { 24 | "$ref": "#/definitions/cloudstore" 25 | }, 26 | "email_app": { 27 | "$ref": "#/definitions/email_app" 28 | }, 29 | "tag": { 30 | "type": "string" 31 | } 32 | }, 33 | "required": [ 34 | "lookup_url", 35 | "tag" 36 | ], 37 | "definitions": { 38 | "datastore": { 39 | "type": "array", 40 | "items": { 41 | "type": "object" 42 | }, 43 | "additionalProperties": false, 44 | "required": [ 45 | "type", 46 | "config" 47 | ], 48 | "properties": { 49 | "type": { 50 | "type": "string", 51 | "enum": [ 52 | "mysql", 53 | "mongodb" 54 | ] 55 | }, 56 | "config": { 57 | "type": "object", 58 | "additionalProperties": false, 59 | "required": [ 60 | "hostname", 61 | "username", 62 | "password", 63 | "db_name" 64 | ], 65 | "properties": { 66 | "hostname": { 67 | "type": "string" 68 | }, 69 | "username": { 70 | "type": "string" 71 | }, 72 | "password": { 73 | "type": "string" 74 | }, 75 | "db_name": { 76 | "type": "string" 77 | }, 78 | "port": { 79 | "type": "integer" 80 | } 81 | } 82 | } 83 | } 84 | }, 85 | "cloudstore": { 86 | "type": "array", 87 | "items": { 88 | "type": "object" 89 | }, 90 | "additionalProperties": false, 91 | "required": [ 92 | "config", 93 | "type" 94 | ], 95 | "properties": { 96 | "type": { 97 | "type": "string", 98 | "enum": [ 99 | "dropbox", 100 | "s3" 101 | ] 102 | }, 103 | "config": { 104 | "type": "object", 105 | "required": [ 106 | "api_key", 107 | "local_files_folder" 108 | ], 109 | "properties": { 110 | "api_key": { 111 | "type": "string" 112 | }, 113 | "local_files_folder": { 114 | "type": "string" 115 | }, 116 | "attachments_names": { 117 | "type": "array", 118 | "items": { 119 | "type": "string" 120 | } 121 | }, 122 | "update_attachments": { 123 | "type": "boolean" 124 | }, 125 | "update_stop_words": { 126 | "type": "boolean" 127 | }, 128 | "update_url_search_params": { 129 | "type": "boolean" 130 | }, 131 | "update_inform_should_call_email": { 132 | "type": "boolean" 133 | }, 134 | "update_application_to_send_email": { 135 | "type": "boolean" 136 | }, 137 | "update_inform_success_email": { 138 | "type": "boolean" 139 | } 140 | }, 141 | "additionalProperties": true 142 | } 143 | } 144 | }, 145 | "email_app": { 146 | "type": "array", 147 | "items": { 148 | "type": "object" 149 | }, 150 | "additionalProperties": false, 151 | "required": [ 152 | "config", 153 | "type" 154 | ], 155 | "properties": { 156 | "type": { 157 | "type": "string", 158 | "enum": [ 159 | "gmail", 160 | "hotmail" 161 | ] 162 | }, 163 | "config": { 164 | "type": "object", 165 | "properties": { 166 | "email_address": { 167 | "type": "string" 168 | }, 169 | "api_key": { 170 | "type": "string" 171 | } 172 | }, 173 | "additionalProperties": true 174 | } 175 | } 176 | }, 177 | "attachments": { 178 | "type": "array", 179 | "items": { 180 | "type": "string" 181 | } 182 | } 183 | }, 184 | "additionalProperties": false 185 | } -------------------------------------------------------------------------------- /tests/test_dropbox_cloudstore.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import random 4 | import string 5 | import logging 6 | import copy 7 | from typing import Tuple 8 | from dropbox.exceptions import BadInputError 9 | 10 | from configuration.configuration import Configuration 11 | from cloudstore.dropbox_cloudstore import DropboxCloudstore 12 | 13 | logger = logging.getLogger('TestDropboxCloudstore') 14 | 15 | 16 | class TestDropboxCloudstore(unittest.TestCase): 17 | __slots__ = ('configuration', 'file_name') 18 | 19 | configuration: Configuration 20 | file_name: str 21 | test_data_path: str = os.path.join('test_data', 'test_dropbox_cloudstore') 22 | 23 | def test_connect(self): 24 | # Test the connection with the correct api key 25 | try: 26 | cloud_store_correct_key = DropboxCloudstore(config=self.configuration.get_cloudstores()[0]) 27 | cloud_store_correct_key.ls() 28 | except BadInputError as e: 29 | logger.error('Error connecting with the correct credentials: %s', e) 30 | self.fail('Error connecting with the correct credentials') 31 | else: 32 | logger.info('Connected with the correct credentials successfully.') 33 | # Test that the connection is failed with the wrong credentials 34 | with self.assertRaises(BadInputError): 35 | cloud_store_wrong_configuration = copy.deepcopy(self.configuration.get_cloudstores()[0]) 36 | cloud_store_wrong_configuration['api_key'] = 'wrong_key' 37 | cloud_store_wrong_key = DropboxCloudstore(config=cloud_store_wrong_configuration) 38 | cloud_store_wrong_key.ls() 39 | logger.info("Loading Dropbox with wrong credentials failed successfully.") 40 | 41 | def test_upload_download(self): 42 | cloud_store = DropboxCloudstore(config=self.configuration.get_cloudstores()[0]) 43 | # Upload file 44 | logger.info('Uploading file..') 45 | file_to_upload = open(os.path.join(self.test_data_path, self.file_name), 'rb').read() 46 | cloud_store.upload_file(file_to_upload, '/tests/' + self.file_name) 47 | # Check if it was uploaded 48 | self.assertIn(self.file_name, cloud_store.ls('/tests/').keys()) 49 | # Download it 50 | logger.info('Downloading file..') 51 | cloud_store.download_file(frompath='/tests/' + self.file_name, 52 | tofile=os.path.join(self.test_data_path, 'actual_downloaded.txt')) 53 | # Compare contents of downloaded file with the original 54 | self.assertEqual(open(os.path.join(self.test_data_path, self.file_name), 'rb').read(), 55 | open(os.path.join(self.test_data_path, 'actual_downloaded.txt'), 'rb').read()) 56 | 57 | def test_upload_delete(self): 58 | cloud_store = DropboxCloudstore(config=self.configuration.get_cloudstores()[0]) 59 | # Upload file 60 | logger.info('Uploading file..') 61 | file_to_upload = open(os.path.join(self.test_data_path, self.file_name), 'rb').read() 62 | cloud_store.upload_file(file_to_upload, '/tests/' + self.file_name) 63 | # Check if it was uploaded 64 | self.assertIn(self.file_name, cloud_store.ls('/tests/').keys()) 65 | # Delete it 66 | cloud_store.delete_file('/tests/' + self.file_name) 67 | # Check if it was deleted 68 | self.assertNotIn(self.file_name, cloud_store.ls('/tests/').keys()) 69 | 70 | @staticmethod 71 | def _generate_random_filename_and_contents() -> Tuple[str, str]: 72 | letters = string.ascii_lowercase 73 | file_name = ''.join(random.choice(letters) for _ in range(10)) + '.txt' 74 | contents = ''.join(random.choice(letters) for _ in range(20)) 75 | return file_name, contents 76 | 77 | @staticmethod 78 | def _setup_log(debug: bool = False) -> None: 79 | # noinspection PyArgumentList 80 | logging.basicConfig(level=logging.DEBUG, 81 | format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', 82 | datefmt='%Y-%m-%d %H:%M:%S', 83 | handlers=[logging.StreamHandler() 84 | ] 85 | ) 86 | 87 | def setUp(self) -> None: 88 | self.file_name, contents = self._generate_random_filename_and_contents() 89 | with open(os.path.join(self.test_data_path, self.file_name), 'a') as f: 90 | f.write(contents) 91 | 92 | def tearDown(self) -> None: 93 | os.remove(os.path.join(self.test_data_path, self.file_name)) 94 | 95 | @classmethod 96 | def setUpClass(cls): 97 | cls._setup_log() 98 | if "DROPBOX_API_KEY" not in os.environ: 99 | logger.error('DROPBOX_API_KEY env variable is not set!') 100 | raise Exception('DROPBOX_API_KEY env variable is not set!') 101 | logger.info('Loading Configuration..') 102 | cls.configuration = Configuration(config_src=os.path.join(cls.test_data_path, 'template_conf.yml')) 103 | 104 | @classmethod 105 | def tearDownClass(cls): 106 | cloud_store = DropboxCloudstore(config=cls.configuration.get_cloudstores()[0]) 107 | cloud_store.delete_file('/tests') 108 | 109 | 110 | if __name__ == '__main__': 111 | unittest.main() 112 | -------------------------------------------------------------------------------- /tests/test_configuration.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from jsonschema.exceptions import ValidationError 3 | from typing import Dict 4 | import logging 5 | import os 6 | 7 | from configuration.configuration import Configuration 8 | 9 | logger = logging.getLogger('TestConfiguration') 10 | 11 | 12 | class TestConfiguration(unittest.TestCase): 13 | test_data_path: str = os.path.join('test_data', 'test_configuration') 14 | 15 | def test_schema_validation(self): 16 | try: 17 | logger.info('Loading the correct Configuration..') 18 | Configuration(config_src=os.path.join(self.test_data_path, 'minimal_conf_correct.yml'), 19 | config_schema_path=os.path.join('..', 'tests', self.test_data_path, 20 | 'minimal_yml_schema.json')) 21 | except ValidationError as e: 22 | logger.error('Error validating the correct yml: %s', e) 23 | self.fail('Error validating the correct yml') 24 | else: 25 | logger.info('First yml validated successfully.') 26 | 27 | with self.assertRaises(ValidationError): 28 | logger.info('Loading the wrong Configuration..') 29 | Configuration(config_src=os.path.join(self.test_data_path, 'minimal_conf_wrong.yml')) 30 | logger.info('Second yml failed to validate successfully.') 31 | 32 | def test_to_json(self): 33 | logger.info('Loading Configuration..') 34 | configuration = Configuration(config_src=os.path.join(self.test_data_path, 'template_conf.yml')) 35 | expected_json = {'tag': 'production', 36 | 'crawl_interval': 2, 37 | 'test_mode': False, 38 | "lookup_url": "www.xe.gr", 39 | 'datastore': [{'config': 40 | {'hostname': 'host123', 41 | 'username': 'user1', 42 | 'password': 'pass2', 43 | 'db_name': 'db3', 44 | 'port': 3306}, 45 | 'type': 'mysql'}], 46 | 'cloudstore': [{'config': 47 | {'api_key': 'apiqwerty'}, 48 | 'type': 'dropbox'}]} 49 | # Compare 50 | logger.info('Comparing the results..') 51 | self.assertDictEqual(self._sort_dict(expected_json), self._sort_dict(configuration.to_json())) 52 | 53 | def test_to_yaml(self): 54 | logger.info('Loading Configuration..') 55 | configuration = Configuration(config_src=os.path.join(self.test_data_path, 'template_conf.yml')) 56 | # Modify and export yml 57 | logger.info('Changed the host and the api_key..') 58 | configuration.datastore[0]['config']['hostname'] = 'changedhost' 59 | configuration.cloudstore[0]['config']['api_key'] = 'changed_api' 60 | logger.info('Exporting to yaml..') 61 | configuration.to_yaml('test_data/test_configuration/actual_output_to_yaml.yml') 62 | # Load the modified yml 63 | logger.info('Loading the exported yaml..') 64 | modified_configuration = Configuration( 65 | config_src=os.path.join(self.test_data_path, 'actual_output_to_yaml.yml')) 66 | # Compare 67 | logger.info('Comparing the results..') 68 | expected_json = {'tag': 'production', 69 | 'crawl_interval': 2, 70 | 'test_mode': False, 71 | "lookup_url": "www.xe.gr", 72 | 'datastore': [{'config': 73 | {'hostname': 'changedhost', 74 | 'username': 'user1', 75 | 'password': 'pass2', 76 | 'db_name': 'db3', 77 | 'port': 3306}, 78 | 'type': 'mysql'}], 79 | 'cloudstore': [{'config': 80 | {'api_key': 'changed_api'}, 81 | 'type': 'dropbox'}]} 82 | self.assertDictEqual(self._sort_dict(expected_json), self._sort_dict(modified_configuration.to_json())) 83 | 84 | @classmethod 85 | def _sort_dict(cls, dictionary: Dict) -> Dict: 86 | return {k: cls._sort_dict(v) if isinstance(v, dict) else v 87 | for k, v in sorted(dictionary.items())} 88 | 89 | @staticmethod 90 | def _setup_log(debug: bool = False) -> None: 91 | # noinspection PyArgumentList 92 | logging.basicConfig(level=logging.DEBUG, 93 | format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', 94 | datefmt='%Y-%m-%d %H:%M:%S', 95 | handlers=[logging.StreamHandler() 96 | ] 97 | ) 98 | 99 | def setUp(self) -> None: 100 | pass 101 | 102 | def tearDown(self) -> None: 103 | pass 104 | 105 | @classmethod 106 | def setUpClass(cls): 107 | cls._setup_log() 108 | 109 | @classmethod 110 | def tearDownClass(cls): 111 | pass 112 | 113 | 114 | if __name__ == '__main__': 115 | unittest.main() 116 | -------------------------------------------------------------------------------- /tests/test_mysql_datastore.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import copy 4 | import random 5 | import string 6 | import logging 7 | from typing import List 8 | from mysql.connector.errors import ProgrammingError as MsqlProgrammingError 9 | 10 | from configuration.configuration import Configuration 11 | from datastore.mysql_datastore import MySqlDatastore 12 | 13 | logger = logging.getLogger('TestMysqlDatastore') 14 | 15 | 16 | class TestMysqlDatastore(unittest.TestCase): 17 | __slots__ = ('configuration', 'test_table_schema') 18 | 19 | configuration: Configuration 20 | test_table_schema: str 21 | generated_table_names: List[str] = list() 22 | test_data_path: str = os.path.join('test_data', 'test_mysql_datastore') 23 | 24 | def test_connect(self): 25 | # Test the connection with the correct api key 26 | try: 27 | MySqlDatastore(config=self.configuration.get_datastores()[0]) 28 | except MsqlProgrammingError as e: 29 | logger.error('Error connecting with the correct credentials: %s', e) 30 | self.fail('Error connecting with the correct credentials') 31 | else: 32 | logger.info('Connected with the correct credentials successfully.') 33 | # Test that the connection is failed with the wrong credentials 34 | with self.assertRaises(MsqlProgrammingError): 35 | datastore_conf_copy = copy.deepcopy(self.configuration.get_datastores()[0]) 36 | datastore_conf_copy['password'] = 'wrong_password' 37 | MySqlDatastore(config=datastore_conf_copy) 38 | logger.info("Loading Mysql with wrong credentials failed successfully.") 39 | 40 | def test_create_drop(self): 41 | data_store = MySqlDatastore(config=self.configuration.get_datastores()[0]) 42 | # Create table 43 | logger.info('Creating table..') 44 | data_store.create_table(self.table_name, self.test_table_schema) 45 | # Check if it was created 46 | self.assertIn(self.table_name, data_store.show_tables()) 47 | # Drop table 48 | logger.info('Dropping table..') 49 | data_store.drop_table(table=self.table_name) 50 | self.assertNotIn(self.table_name, data_store.show_tables()) 51 | 52 | def test_insert_update_delete(self): 53 | data_store = MySqlDatastore(config=self.configuration.get_datastores()[0]) 54 | # Create table 55 | logger.info('Creating table..') 56 | data_store.create_table(self.table_name, self.test_table_schema) 57 | # Ensure it is empty 58 | results = data_store.select_from_table(table=self.table_name) 59 | self.assertEqual([], results) 60 | # Insert into table 61 | insert_data = {"order_id": 1, 62 | "order_type": "plain", 63 | "is_delivered": False} 64 | logger.info("Inserting into table..") 65 | data_store.insert_into_table(table=self.table_name, data=insert_data) 66 | # Check if the data was inserted 67 | results = data_store.select_from_table(table=self.table_name) 68 | self.assertEqual([(1, "plain", False)], results) 69 | logger.info("Deleting from table..") 70 | data_store.delete_from_table(table=self.table_name, where='order_id =1 ') 71 | # Check if the data was inserted 72 | results = data_store.select_from_table(table=self.table_name) 73 | self.assertEqual([], results) 74 | 75 | @staticmethod 76 | def _generate_random_filename() -> str: 77 | letters = string.ascii_lowercase 78 | file_name = 'test_table_' + ''.join(random.choice(letters) for _ in range(10)) 79 | return file_name 80 | 81 | @staticmethod 82 | def _setup_log(debug: bool = False) -> None: 83 | # noinspection PyArgumentList 84 | logging.basicConfig(level=logging.DEBUG, 85 | format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', 86 | datefmt='%Y-%m-%d %H:%M:%S', 87 | handlers=[logging.StreamHandler() 88 | ] 89 | ) 90 | 91 | def setUp(self) -> None: 92 | self.table_name = self._generate_random_filename() 93 | self.generated_table_names.append(self.table_name) 94 | 95 | def tearDown(self) -> None: 96 | pass 97 | 98 | @classmethod 99 | def setUpClass(cls): 100 | cls._setup_log() 101 | mysql_os_vars = ['MYSQL_HOST', 'MYSQL_USERNAME', 'MYSQL_PASSWORD', 'MYSQL_DB_NAME'] 102 | if not all(mysql_os_var in os.environ for mysql_os_var in mysql_os_vars): 103 | logger.error('Mysql env variables are not set!') 104 | raise Exception('Mysql env variables are not set!') 105 | logger.info('Loading Configuration..') 106 | cls.configuration = Configuration(config_src=os.path.join(cls.test_data_path, 'template_conf.yml')) 107 | cls.test_table_schema = """ order_id INT(6) PRIMARY KEY, 108 | order_type VARCHAR(30) NOT NULL, 109 | is_delivered BOOLEAN NOT NULL """ 110 | 111 | @classmethod 112 | def tearDownClass(cls): 113 | data_store = MySqlDatastore(config=cls.configuration.get_datastores()[0]) 114 | for table in cls.generated_table_names: 115 | logger.info('Dropping table {0}'.format(table)) 116 | data_store.drop_table(table=table) 117 | 118 | 119 | if __name__ == '__main__': 120 | unittest.main() 121 | -------------------------------------------------------------------------------- /ad_site_crawler/xegr_ad_site_crawler.py: -------------------------------------------------------------------------------- 1 | import urllib.request, urllib.error, urllib.parse 2 | from typing import List, Tuple, Union 3 | import time 4 | import re 5 | import time 6 | import logging 7 | from unidecode import unidecode 8 | 9 | from .abstract_ad_site_crawler import AbstractAdSiteCrawler 10 | 11 | logger = logging.getLogger('XeGrAdSiteCrawler') 12 | 13 | 14 | class XeGrAdSiteCrawler(AbstractAdSiteCrawler): 15 | __slots__ = ('_stop_words', '_ad_site_url', '_anchor_class_name') 16 | 17 | _stop_words: List[str] 18 | _ad_site_url: str 19 | _anchor_class_name: str 20 | _ignored_emails: List = ['email@paroxos.com'] 21 | 22 | def __init__(self, stop_words: List, ad_site_url: str = "https://www.xe.gr", anchor_class_name='result-list-narrow-item'): 23 | """ 24 | Tha basic constructor. Creates a new instance of AdSiteCrawler using the specified credentials 25 | 26 | :param stop_words: 27 | """ 28 | 29 | logger.debug("Initializing with stop_words: %s" % stop_words) 30 | self._ad_site_url = ad_site_url 31 | self._stop_words = stop_words 32 | self._anchor_class_name = anchor_class_name 33 | super().__init__() 34 | 35 | def get_new_ads(self, lookup_url: str, ads_checked: List, crawl_interval: int = 15) -> Tuple[str, Union[None, str]]: 36 | """ 37 | Retrieves each sub-link's html, searches and yields an email for each of them. 38 | 39 | :param lookup_url: 40 | :param ads_checked: 41 | """ 42 | 43 | if self._ad_site_url not in lookup_url: 44 | raise AdSiteCrawlerError( 45 | "The lookup_url: %s is not supported. The domain should be: %s" % (lookup_url, self._ad_site_url)) 46 | if lookup_url[:4] != 'http': 47 | logger.warning("The lookup_url doesn't contain http:// or https://! Adding https:// ..") 48 | lookup_url = 'https://' + lookup_url 49 | 50 | logger.debug("ads_checked: %s" % ads_checked) 51 | search_page_html = self._retrieve_html_from_url(lookup_url) 52 | # Search for links in the main page's html, retrieve their html and look for emails inside them 53 | for ad_link in self._find_links_in_html(html_data=search_page_html, anchor_class_name=self._anchor_class_name): 54 | logger.debug("Input ad_link: %s" % ad_link) 55 | ad_linked_parsed = urllib.parse.quote(ad_link) 56 | if ad_linked_parsed[:4] != 'http': 57 | full_sub_link = self._ad_site_url + ad_linked_parsed 58 | else: 59 | full_sub_link = ad_link 60 | logger.debug("Checking constructed full_sub_link: %s" % full_sub_link) 61 | # Wait before checking next link to avoid bot ban 62 | logger.debug("Sleeping for crawl_interval={crawl_interval} seconds..".format(crawl_interval=crawl_interval)) 63 | time.sleep(crawl_interval) 64 | if full_sub_link in ads_checked: 65 | logger.debug("It is in ads_checked, skipping..") 66 | continue 67 | ad_page_html = self._retrieve_html_from_url(full_sub_link) 68 | if any(unidecode(word).lower() in unidecode(ad_page_html).lower() for word in self._stop_words): 69 | logger.debug("It contains one of the stop words, skipping..") 70 | continue 71 | # Add the link inside the check list in order to avoid duplicate ads 72 | ads_checked.append(full_sub_link) 73 | emails_in_ad_page = self._find_emails_in_html(html_data=ad_page_html) 74 | if len(emails_in_ad_page) == 0: 75 | logger.debug("Found no emails in the ad page, returning None..") 76 | yield full_sub_link, None 77 | else: 78 | logger.debug("Found emails in the ad page, returning %s.." % emails_in_ad_page[0]) 79 | yield full_sub_link, emails_in_ad_page[0] 80 | 81 | @staticmethod 82 | def _retrieve_html_from_url(url: str) -> str: 83 | """ 84 | Retrieves full html from the specified url. 85 | 86 | :params url: 87 | """ 88 | 89 | try: 90 | logger.debug("Retrieving html from url: %s .." % url) 91 | header = { 92 | 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.8.0'} 93 | req = urllib.request.Request(url, headers=header) 94 | html = urllib.request.urlopen(req).read() 95 | except Exception as e: 96 | logger.error(e) 97 | html = 'None' 98 | if type(html) is not str: 99 | html = html.decode('utf-8') 100 | logger.debug("HTML retrieved:\n%s" % (html)) 101 | return html 102 | 103 | @staticmethod 104 | def _find_links_in_html(html_data: str, anchor_class_name: str = 'result-list-narrow-item') -> str: 105 | """ 106 | Searches for sub-link patterns in html and yields each link. 107 | 108 | :param html_data: 109 | """ 110 | 111 | logger.debug("Using anchor class name=%s" % anchor_class_name) 112 | logger.debug("Searching for sub-links in html..") 113 | 114 | pattern = re.compile(r"()" 115 | .format(anchor_class_name=anchor_class_name)) 116 | a_tag_captured = pattern.findall(html_data) 117 | logger.debug("Anchor captured: %s" % a_tag_captured) 118 | for i in a_tag_captured: 119 | href_raw = i[str(i).find('href'):] 120 | href = href_raw[:href_raw.find(' ')].strip() 121 | logger.debug("Href captured: %s, and sliced: %s" % (href, href[6:-1])) 122 | yield href[6:-1] 123 | 124 | @classmethod 125 | def _find_emails_in_html(cls, html_data: str) -> List: 126 | """ 127 | Searches for email patterns in html and returns list of emails. 128 | 129 | :param html: 130 | """ 131 | 132 | logger.debug("Searching for emails in html..") 133 | 134 | pattern = re.compile(r'[\w\-][\w\-\.]+@[\w\-][\w\-\.]+(?:com|gr)', re.MULTILINE) 135 | emails = pattern.findall(html_data) 136 | logger.debug("All emails found in html: %s" % emails) 137 | return [email for email in emails if email not in cls._ignored_emails] 138 | 139 | 140 | class AdSiteCrawlerError(Exception): 141 | def __init__(self, message): 142 | # Call the base class constructor with the parameters it needs 143 | super().__init__(message) 144 | -------------------------------------------------------------------------------- /datastore/mysql_datastore.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import List, Tuple, Dict 3 | 4 | from mysql import connector as mysql_connector 5 | 6 | from .abstract_datastore import AbstractDatastore 7 | 8 | logger = logging.getLogger('MySqlDataStore') 9 | 10 | 11 | class MySqlDatastore(AbstractDatastore): 12 | __slots__ = ('_connection', '_cursor') 13 | 14 | _connection: mysql_connector.connection_cext.CMySQLConnection 15 | _cursor: mysql_connector.connection_cext.CMySQLCursor 16 | 17 | def __init__(self, config: Dict) -> None: 18 | """ 19 | The basic constructor. Creates a new instance of Datastore using the specified credentials 20 | 21 | :param config: 22 | """ 23 | 24 | super().__init__(config) 25 | 26 | @staticmethod 27 | def get_connection(username: str, password: str, hostname: str, db_name: str, port: int = 3306) \ 28 | -> Tuple[mysql_connector.connection_cext.CMySQLConnection, mysql_connector.connection_cext.CMySQLCursor]: 29 | """ 30 | Creates and returns a connection and a cursor/session to the MySQL DB 31 | 32 | :param username: 33 | :param password: 34 | :param hostname: 35 | :param db_name: 36 | :param port: 37 | :return: 38 | """ 39 | 40 | connection = mysql_connector.connect( 41 | host=hostname, 42 | user=username, 43 | passwd=password, 44 | database=db_name, 45 | use_pure=True 46 | ) 47 | 48 | cursor = connection.cursor() 49 | 50 | return connection, cursor 51 | 52 | def create_table(self, table: str, schema: str) -> None: 53 | """ 54 | Creates a table using the specified schema 55 | 56 | :param self: 57 | :param table: 58 | :param schema: 59 | :return: 60 | """ 61 | 62 | query = "CREATE TABLE IF NOT EXISTS {table} ({schema})".format(table=table, schema=schema) 63 | logger.debug("Executing: %s" % query) 64 | self._cursor.execute(query) 65 | self._connection.commit() 66 | 67 | def drop_table(self, table: str) -> None: 68 | """ 69 | Drops the specified table if it exists 70 | 71 | :param self: 72 | :param table: 73 | :return: 74 | """ 75 | 76 | query = "DROP TABLE IF EXISTS {table}".format(table=table) 77 | logger.debug("Executing: %s" % query) 78 | self._cursor.execute(query) 79 | self._connection.commit() 80 | 81 | def truncate_table(self, table: str) -> None: 82 | """ 83 | Truncates the specified table 84 | 85 | :param self: 86 | :param table: 87 | :return: 88 | """ 89 | 90 | query = "TRUNCATE TABLE {table}".format(table=table) 91 | logger.debug("Executing: %s" % query) 92 | self._cursor.execute(query) 93 | self._connection.commit() 94 | 95 | def insert_into_table(self, table: str, data: dict) -> None: 96 | """ 97 | Inserts into the specified table a row based on a column_name: value dictionary 98 | 99 | :param self: 100 | :param table: 101 | :param data: 102 | :return: 103 | """ 104 | 105 | data_str = ", ".join( 106 | list(map(lambda key, val: "{key}='{val}'".format(key=str(key), val=str(val)), data.keys(), data.values()))) 107 | 108 | query = "INSERT INTO {table} SET {data}".format(table=table, data=data_str) 109 | logger.debug("Executing: %s" % query) 110 | self._cursor.execute(query) 111 | self._connection.commit() 112 | 113 | def update_table(self, table: str, set_data: dict, where: str) -> None: 114 | """ 115 | Updates the specified table using a column_name: value dictionary and a where statement 116 | 117 | :param self: 118 | :param table: 119 | :param set_data: 120 | :param where: 121 | :return: 122 | """ 123 | 124 | set_data_str = ", ".join( 125 | list(map(lambda key, val: "{key}='{val}'".format(key=str(key), val=str(val)), set_data.keys(), 126 | set_data.values()))) 127 | 128 | query = "UPDATE {table} SET {data} WHERE {where}".format(table=table, data=set_data_str, where=where) 129 | logger.debug("Executing: %s" % query) 130 | self._cursor.execute(query) 131 | self._connection.commit() 132 | 133 | def select_from_table(self, table: str, columns: str = '*', where: str = 'TRUE', order_by: str = 'NULL', 134 | asc_or_desc: str = 'ASC', limit: int = 1000) -> List: 135 | """ 136 | Selects from a specified table based on the given columns, where, ordering and limit 137 | 138 | :param self: 139 | :param table: 140 | :param columns: 141 | :param where: 142 | :param order_by: 143 | :param asc_or_desc: 144 | :param limit: 145 | :return results: 146 | """ 147 | 148 | query = "SELECT {columns} FROM {table} WHERE {where} ORDER BY {order_by} {asc_or_desc} LIMIT {limit}".format( 149 | columns=columns, table=table, where=where, order_by=order_by, asc_or_desc=asc_or_desc, limit=limit) 150 | logger.debug("Executing: %s" % query) 151 | self._cursor.execute(query) 152 | results = self._cursor.fetchall() 153 | 154 | return results 155 | 156 | def delete_from_table(self, table: str, where: str) -> None: 157 | """ 158 | Deletes data from the specified table based on a where statement 159 | 160 | :param self: 161 | :param table: 162 | :param where: 163 | :return: 164 | """ 165 | 166 | query = "DELETE FROM {table} WHERE {where}".format(table=table, where=where) 167 | logger.debug("Executing: %s" % query) 168 | self._cursor.execute(query) 169 | self._connection.commit() 170 | 171 | def show_tables(self) -> List: 172 | """ 173 | Show a list of the tables present in the db 174 | :return: 175 | """ 176 | 177 | query = 'SHOW TABLES' 178 | logger.debug("Executing: %s" % query) 179 | self._cursor.execute(query) 180 | results = self._cursor.fetchall() 181 | 182 | return [result[0] for result in results] 183 | 184 | def __exit__(self) -> None: 185 | """ 186 | Flushes and closes the connection 187 | 188 | :return: 189 | """ 190 | 191 | self._connection.commit() 192 | self._cursor.close() 193 | -------------------------------------------------------------------------------- /tests/test_gmail_email_app.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import random 4 | import string 5 | import logging 6 | import copy 7 | from typing import Tuple 8 | from smtplib import SMTPAuthenticationError 9 | 10 | from configuration.configuration import Configuration 11 | from email_app.gmail_email_app import GmailEmailApp 12 | 13 | logger = logging.getLogger('TestGmailEmailApp') 14 | 15 | 16 | class TestGmailEmailApp(unittest.TestCase): 17 | __slots__ = ('configuration', 'file_name') 18 | 19 | configuration: Configuration 20 | file_name: str 21 | test_data_path: str = os.path.join('test_data', 'test_gmail_email_app') 22 | 23 | def test_connect(self): 24 | # Test the connection with the correct api key 25 | try: 26 | gmail_configuration = self.configuration.get_email_apps()[0] 27 | GmailEmailApp(config=gmail_configuration) 28 | except SMTPAuthenticationError as e: 29 | logger.error('Error connecting with the correct credentials: %s', e) 30 | self.fail('Error connecting with the correct credentials') 31 | else: 32 | logger.info('Connected with the correct credentials successfully.') 33 | # Test that the connection is failed with the wrong credentials 34 | with self.assertRaises(SMTPAuthenticationError): 35 | gmail_wrong_configuration = copy.deepcopy(gmail_configuration) 36 | gmail_wrong_configuration['api_key'] = 'wrong_key' 37 | GmailEmailApp(config=gmail_wrong_configuration) 38 | logger.info("Loading Dropbox with wrong credentials failed successfully.") 39 | 40 | def test_is_connected_and_exit(self): 41 | gmail_configuration = self.configuration.get_email_apps()[0] 42 | gmail_app = GmailEmailApp(config=gmail_configuration) 43 | self.assertEqual(True, gmail_app.is_connected()) 44 | gmail_app.__exit__() 45 | self.assertEqual(False, gmail_app.is_connected()) 46 | 47 | def test_send_email_with_all_args(self): 48 | try: 49 | gmail_configuration = self.configuration.get_email_apps()[0] 50 | gmail_app = GmailEmailApp(config=gmail_configuration) 51 | 52 | gmail_app.send_email(subject='test_send_email_with_all_args', 53 | to=[gmail_configuration['email_address']], 54 | cc=[gmail_configuration['email_address']], 55 | bcc=[gmail_configuration['email_address']], 56 | text='Test plain/text body', 57 | html='

Test html body

', 58 | attachments=[os.path.join(self.test_data_path, 'sample_data.txt')], 59 | sender=gmail_configuration['email_address'], 60 | reply_to=gmail_configuration['email_address'] 61 | ) 62 | except Exception as e: 63 | logger.error("Test failed with exception: %s" % e) 64 | self.fail("Test failed with exception: %s" % e) 65 | 66 | def test_send_email_with_required_args(self): 67 | try: 68 | gmail_configuration = self.configuration.get_email_apps()[0] 69 | gmail_app = GmailEmailApp(config=gmail_configuration) 70 | 71 | gmail_app.send_email(subject='test_send_email_with_required_args', 72 | to=[gmail_configuration['email_address']] 73 | ) 74 | except Exception as e: 75 | logger.error("Test failed with exception: %s" % e) 76 | self.fail("Test failed with exception: %s" % e) 77 | 78 | def test_send_email_with_html(self): 79 | try: 80 | gmail_configuration = self.configuration.get_email_apps()[0] 81 | gmail_app = GmailEmailApp(config=gmail_configuration) 82 | 83 | gmail_app.send_email(subject='test_send_email_with_html', 84 | to=[gmail_configuration['email_address']], 85 | html='

Html only

' 86 | ) 87 | except Exception as e: 88 | logger.error("Test failed with exception: %s" % e) 89 | self.fail("Test failed with exception: %s" % e) 90 | 91 | def test_send_email_with_text(self): 92 | try: 93 | gmail_configuration = self.configuration.get_email_apps()[0] 94 | gmail_app = GmailEmailApp(config=gmail_configuration) 95 | 96 | gmail_app.send_email(subject='test_send_email_with_text', 97 | to=[gmail_configuration['email_address']], 98 | text='Text only' 99 | ) 100 | except Exception as e: 101 | logger.error("Test failed with exception: %s" % e) 102 | self.fail("Test failed with exception: %s" % e) 103 | 104 | @staticmethod 105 | def _generate_random_filename_and_contents() -> Tuple[str, str]: 106 | letters = string.ascii_lowercase 107 | file_name = ''.join(random.choice(letters) for _ in range(10)) + '.txt' 108 | contents = ''.join(random.choice(letters) for _ in range(20)) 109 | return file_name, contents 110 | 111 | @staticmethod 112 | def _setup_log(debug: bool = False) -> None: 113 | # noinspection PyArgumentList 114 | logging.basicConfig(level=logging.DEBUG, 115 | format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', 116 | datefmt='%Y-%m-%d %H:%M:%S', 117 | handlers=[logging.StreamHandler() 118 | ] 119 | ) 120 | 121 | def setUp(self) -> None: 122 | pass 123 | 124 | def tearDown(self) -> None: 125 | pass 126 | 127 | @classmethod 128 | def setUpClass(cls): 129 | cls._setup_log() 130 | gmail_os_vars = ['EMAIL_ADDRESS', 'GMAIL_API_KEY'] 131 | if not all(gmail_os_var in os.environ for gmail_os_var in gmail_os_vars): 132 | logger.error('Gmail env variables are not set!') 133 | raise Exception('Gmail env variables are not set!') 134 | logger.info('Loading Configuration..') 135 | cls.configuration = Configuration(config_src=os.path.join(cls.test_data_path, 'template_conf.yml')) 136 | 137 | @classmethod 138 | def tearDownClass(cls): 139 | pass 140 | 141 | 142 | if __name__ == '__main__': 143 | unittest.main() 144 | -------------------------------------------------------------------------------- /cloudstore/job_bot_dropbox_cloudstore.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict, List, Tuple 3 | import logging 4 | import ast 5 | from dropbox import Dropbox 6 | 7 | from .dropbox_cloudstore import DropboxCloudstore 8 | 9 | logger = logging.getLogger('JobBotDropboxCloudstore') 10 | 11 | 12 | class JobBotDropboxCloudstore(DropboxCloudstore): 13 | __slots__ = ('_handler', 'remote_files_folder', 'local_files_folder', 14 | 'attachments_names', '_update_attachments', '_update_stop_words', 15 | '_update_application_to_send_email', '_update_inform_success_email', '_update_inform_should_call_email') 16 | 17 | _handler: Dropbox 18 | remote_files_folder: str 19 | local_files_folder: str 20 | attachments_names: List 21 | _update_attachments: bool 22 | _update_stop_words: bool 23 | _update_application_to_send_email: bool 24 | _update_inform_success_email: bool 25 | _update_inform_should_call_email: bool 26 | 27 | def __init__(self, config: Dict, remote_files_folder: str = '/job_bot_xegr') -> None: 28 | """ 29 | The basic constructor. Creates a new instance of Cloudstore using the specified credentials 30 | 31 | :param config: 32 | """ 33 | 34 | self.remote_files_folder = remote_files_folder 35 | self.local_files_folder = config['local_files_folder'] 36 | # Set default value for attachments_names 37 | self.attachments_names = config[ 38 | 'attachments_names'] if 'attachments_names' in config else [] 39 | # Default value for the boolean attributes is False 40 | self._update_attachments = config[ 41 | 'update_attachments'] if 'update_attachments' in config else False 42 | self._update_stop_words = config[ 43 | 'update_stop_words'] if 'update_stop_words' in config else False 44 | self._update_application_to_send_email = config[ 45 | 'update_application_to_send_email'] if 'update_application_to_send_email' in config else False 46 | self._update_inform_success_email = config[ 47 | 'update_inform_success_email'] if 'update_inform_success_email' in config else False 48 | self._update_inform_should_call_email = config[ 49 | 'update_inform_should_call_email'] if 'update_inform_should_call_email' in config else False 50 | super().__init__(config=config) 51 | 52 | def get_application_to_send_email_data(self) -> Tuple[str, str]: 53 | return self._get_email_data(type='application_to_send') 54 | 55 | def get_inform_should_call_email_data(self) -> Tuple[str, str]: 56 | return self._get_email_data(type='inform_should_call') 57 | 58 | def get_inform_success_email_data(self) -> Tuple[str, str]: 59 | return self._get_email_data(type='inform_success') 60 | 61 | def get_stop_words_data(self) -> List[str]: 62 | stop_words_path = os.path.join(self.remote_files_folder, 'stop_words.txt') 63 | return eval(self.download_file(frompath=stop_words_path)) 64 | 65 | def _get_email_data(self, type: str) -> Tuple[str, str]: 66 | subject_file_path = os.path.join(self.remote_files_folder, '{type}_subject.txt'.format(type=type)) 67 | html_file_path = os.path.join(self.remote_files_folder, '{type}_body.html'.format(type=type)) 68 | subject_file = self.download_file(frompath=subject_file_path).decode("utf-8") 69 | html_file = self.download_file(frompath=html_file_path).decode("utf-8") 70 | return subject_file, html_file 71 | 72 | def download_attachments(self) -> None: 73 | for attachment_name in self.attachments_names: 74 | attachment_local_path = os.path.join(self.local_files_folder, attachment_name) 75 | attachment_remote_path = os.path.join(self.remote_files_folder, attachment_name) 76 | self.download_file(frompath=attachment_remote_path, tofile=attachment_local_path) 77 | 78 | def update_application_to_send_email_data(self) -> None: 79 | if self._update_application_to_send_email: 80 | self._update_email_data(type='application_to_send') 81 | else: 82 | logger.info("The update of application_to_send email data was skipped.") 83 | 84 | def update_inform_should_call_email_data(self) -> None: 85 | if self._update_inform_should_call_email: 86 | self._update_email_data(type='inform_should_call') 87 | else: 88 | logger.info("The update of inform_should_call email data was skipped.") 89 | 90 | def update_inform_success_email_data(self) -> None: 91 | if self._update_inform_success_email: 92 | self._update_email_data(type='inform_success') 93 | else: 94 | logger.info("The update of inform_success email data was skipped.") 95 | 96 | def update_stop_words_data(self, stop_words_local_file_name: str = 'stop_words.txt') -> None: 97 | if self._update_stop_words: 98 | stop_words_remote_path = os.path.join(self.remote_files_folder, 'stop_words.txt') 99 | stop_words_local_path = os.path.join(self.local_files_folder, stop_words_local_file_name) 100 | with open(stop_words_local_path, 'rb') as stop_words_file: 101 | self.upload_file(file_bytes=stop_words_file.read(), upload_path=stop_words_remote_path) 102 | else: 103 | logger.info("The update of stop_words data was skipped.") 104 | 105 | def _update_email_data(self, type: str) -> None: 106 | logger.info("Updating the %s email data.." % type) 107 | subject_remote_path = os.path.join(self.remote_files_folder, '{type}_subject.txt'.format(type=type)) 108 | html_remote_path = os.path.join(self.remote_files_folder, '{type}_body.html'.format(type=type)) 109 | subject_local_path = os.path.join(self.local_files_folder, '{type}_subject.txt'.format(type=type)) 110 | html_local_path = os.path.join(self.local_files_folder, '{type}_body.html'.format(type=type)) 111 | with open(subject_local_path, 'rb') as subject_file: 112 | self.upload_file(file_bytes=subject_file.read(), upload_path=subject_remote_path) 113 | with open(html_local_path, 'rb') as html_file: 114 | self.upload_file(file_bytes=html_file.read(), upload_path=html_remote_path) 115 | 116 | def upload_attachments(self) -> None: 117 | if self._update_attachments: 118 | for attachment_name in self.attachments_names: 119 | attachment_upload_path = os.path.join(self.remote_files_folder, attachment_name) 120 | attachment_local_path = os.path.join(self.local_files_folder, attachment_name) 121 | with open(attachment_local_path, 'rb') as attachment_file: 122 | self.upload_file(file_bytes=attachment_file.read(), upload_path=attachment_upload_path) 123 | -------------------------------------------------------------------------------- /tests/test_job_bot_mysql_datastore.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import datetime 4 | import random 5 | import string 6 | import logging 7 | from typing import List 8 | 9 | 10 | from configuration.configuration import Configuration 11 | from datastore.job_bot_mysql_datastore import JobBotMySqlDatastore 12 | 13 | logger = logging.getLogger('TestJobBotMysqlDatastore') 14 | 15 | 16 | class TestJobBotMysqlDatastore(unittest.TestCase): 17 | __slots__ = ('configuration', 'test_table_schema') 18 | 19 | configuration: Configuration 20 | test_table_schema: str 21 | generated_table_names: List[str] = list() 22 | test_data_path: str = os.path.join('test_data', 'test_job_bot_mysql_datastore') 23 | 24 | 25 | def test_create_applications_sent_table(self): 26 | data_store = JobBotMySqlDatastore(config=self.configuration.get_datastores()[0], 27 | application_table_name=self.table_name) 28 | # Check if table not exists 29 | self.assertNotIn(self.table_name, data_store.show_tables()) 30 | # Create applications sent table 31 | logger.info('Creating applications sent table..') 32 | data_store.create_applications_sent_table() 33 | # Check if it was created 34 | self.assertIn(self.table_name, data_store.show_tables()) 35 | # Drop table 36 | logger.info('Dropping table..') 37 | data_store.drop_table(table=self.table_name) 38 | # Check if it was deleted 39 | self.assertNotIn(self.table_name, data_store.show_tables()) 40 | 41 | def test_save_sent_application(self): 42 | data_store = JobBotMySqlDatastore(config=self.configuration.get_datastores()[0], 43 | application_table_name=self.table_name) 44 | # Create applications sent table 45 | logger.info('Creating applications sent table..') 46 | data_store.create_applications_sent_table() 47 | # Check if it is empty 48 | self.assertListEqual([], data_store.select_from_table(table=self.table_name)) 49 | # Insert a row 50 | datetime_now = datetime.datetime.utcnow().isoformat() 51 | row = {'link': 'www.test.com', 52 | 'email': 'test@test.com', 53 | 'sent_on': datetime_now} 54 | logger.info('Inserting row into applications sent table..') 55 | data_store.save_sent_application(row) 56 | # Check if row was inserted 57 | self.assertListEqual([(1, 'www.test.com', 'test@test.com', datetime_now)], 58 | data_store.select_from_table(table=self.table_name)) 59 | 60 | def test_get_applications_sent(self): 61 | data_store = JobBotMySqlDatastore(config=self.configuration.get_datastores()[0], 62 | application_table_name=self.table_name) 63 | # Create applications sent table 64 | logger.info('Creating applications sent table..') 65 | data_store.create_applications_sent_table() 66 | # Insert to rows 67 | datetime_now = datetime.datetime.utcnow().isoformat() 68 | row1 = {'link': 'www.test1.com', 69 | 'email': 'test1@test1.com', 70 | 'sent_on': datetime_now} 71 | row2 = {'link': 'www.test2.com', 72 | 'email': 'test2@test2.com', 73 | 'sent_on': datetime_now} 74 | logger.info('Inserting two rows into applications sent table..') 75 | data_store.save_sent_application(row1) 76 | data_store.save_sent_application(row2) 77 | logger.info('Getting the two rows using the get_applications_sent()..') 78 | expected_result = [tuple(row1.values()), tuple(row2.values())] 79 | # Check if they were inserted 80 | self.assertListEqual(sorted(expected_result), 81 | sorted([result[1:] for result in data_store.get_applications_sent()])) 82 | 83 | def test_remove_ad(self): 84 | data_store = JobBotMySqlDatastore(config=self.configuration.get_datastores()[0], 85 | application_table_name=self.table_name) 86 | # Create applications sent table 87 | logger.info('Creating applications sent table..') 88 | data_store.create_applications_sent_table() 89 | # Insert to rows 90 | datetime_now = datetime.datetime.utcnow().isoformat() 91 | row1 = {'link': 'www.test1.com', 92 | 'email': 'test1@test1.com', 93 | 'sent_on': datetime_now} 94 | row2 = {'link': 'www.test2.com', 95 | 'email': 'test2@test2.com', 96 | 'sent_on': datetime_now} 97 | logger.info('Inserting two rows into applications sent table..') 98 | data_store.save_sent_application(row1) 99 | data_store.save_sent_application(row2) 100 | logger.info('Deleting the first row from the applications sent table..') 101 | data_store.remove_ad(email_id=1) 102 | logger.info('Getting the remaining row using the get_applications_sent()..') 103 | expected_result = [tuple(row2.values())] 104 | # Check if they were inserted 105 | self.assertListEqual(sorted(expected_result), 106 | sorted([result[1:] for result in data_store.get_applications_sent()])) 107 | 108 | 109 | 110 | @staticmethod 111 | def _generate_random_filename() -> str: 112 | letters = string.ascii_lowercase 113 | file_name = 'test_table_' + ''.join(random.choice(letters) for _ in range(10)) 114 | return file_name 115 | 116 | @staticmethod 117 | def _setup_log(debug: bool = False) -> None: 118 | # noinspection PyArgumentList 119 | logging.basicConfig(level=logging.DEBUG, 120 | format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', 121 | datefmt='%Y-%m-%d %H:%M:%S', 122 | handlers=[logging.StreamHandler() 123 | ] 124 | ) 125 | 126 | def setUp(self) -> None: 127 | self.table_name = self._generate_random_filename() 128 | self.generated_table_names.append(self.table_name) 129 | 130 | def tearDown(self) -> None: 131 | pass 132 | 133 | @classmethod 134 | def setUpClass(cls): 135 | cls._setup_log() 136 | mysql_os_vars = ['MYSQL_HOST', 'MYSQL_USERNAME', 'MYSQL_PASSWORD', 'MYSQL_DB_NAME'] 137 | if not all(mysql_os_var in os.environ for mysql_os_var in mysql_os_vars): 138 | logger.error('Mysql env variables are not set!') 139 | raise Exception('Mysql env variables are not set!') 140 | logger.info('Loading Configuration..') 141 | cls.configuration = Configuration(config_src=os.path.join(cls.test_data_path, 'template_conf.yml')) 142 | 143 | 144 | @classmethod 145 | def tearDownClass(cls): 146 | data_store = JobBotMySqlDatastore(config=cls.configuration.get_datastores()[0]) 147 | for table in cls.generated_table_names: 148 | logger.info('Dropping table {0}'.format(table)) 149 | data_store.drop_table(table=table) 150 | 151 | 152 | if __name__ == '__main__': 153 | unittest.main() 154 | -------------------------------------------------------------------------------- /tests/test_xegr_ad_site_crawler.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import logging 4 | import http.server 5 | import socketserver 6 | import urllib.parse 7 | from typing import Tuple, List, Dict 8 | import threading 9 | 10 | from ad_site_crawler.xegr_ad_site_crawler import XeGrAdSiteCrawler 11 | 12 | logger = logging.getLogger('TestXeGrAdSiteCrawler') 13 | 14 | 15 | class TestXeGrAdSiteCrawler(unittest.TestCase): 16 | __slots__ = ( 17 | 'httpd', 'html_sub_links', 'html_file_with_links_path', 'base_url', 18 | 'html_file_with_email_path_1', 'html_file_with_email_path_2', 19 | 'html_file_with_email_path_3', 'html_file_with_email_path_4') 20 | 21 | stop_words: List = ['Senior'] 22 | lookup_params: str = "?q1=1&q2=2" 23 | base_url: str 24 | httpd: socketserver.TCPServer 25 | html_sub_links: List 26 | html_file_with_links_path: str 27 | html_file_with_email_path_1: str 28 | html_file_with_email_path_2: str 29 | PORT: int = 8111 30 | test_data_path: str = os.path.join('test_data', 'test_xegr_ad_site_crawler') 31 | 32 | def test__find_emails_in_html(self): 33 | ad_site_crawler = XeGrAdSiteCrawler(stop_words=[]) 34 | # Load the html file 35 | logger.info("Loading html file..") 36 | with open(self.html_file_with_email_path_2, 'r') as html_f: 37 | html_file = html_f.read() 38 | # Search for emails in the loaded html 39 | logger.info("Calling _find_emails_in_html()..") 40 | returned_emails = ad_site_crawler._find_emails_in_html(html_data=html_file) 41 | # Check if the correct email was loaded 42 | expected_emails = ['efi.koulourianou@gmail.com', 'efi.koulourianou@gmail.com'] 43 | self.assertListEqual(expected_emails, returned_emails) 44 | 45 | def test__find_links_in_html(self): 46 | ad_site_crawler = XeGrAdSiteCrawler(stop_words=[]) 47 | # Load the html file 48 | logger.info("Loading html file..") 49 | with open(self.html_file_with_links_path, 'r') as html_f: 50 | html_file = html_f.read() 51 | # Search for links in the loaded html 52 | logger.info("Calling _find_links_in_html()..") 53 | returned_links = list(ad_site_crawler._find_links_in_html(html_data=html_file, anchor_class_name='highlight')) 54 | # Check if the correct email was loaded 55 | expected_links = self.html_sub_links 56 | self.assertListEqual(expected_links, [urllib.parse.quote(link) for link in returned_links]) 57 | 58 | def test__retrieve_html_from_url_search_page(self): 59 | ad_site_crawler = XeGrAdSiteCrawler(stop_words=[]) 60 | # Load the html file 61 | logger.info("Loading html file..") 62 | with open(self.html_file_with_links_path, 'r') as html_f: 63 | html_file_links = html_f.read() 64 | # Retrieve the html from the local server 65 | logger.info("Calling _retrieve_html_from_url()..") 66 | returned_html_links = ad_site_crawler._retrieve_html_from_url( 67 | '{base_url}/search?{lookup_params}'.format(base_url=self.base_url, 68 | lookup_params=self.lookup_params)) 69 | # Check if the correct html was loaded 70 | self.assertEqual(html_file_links, returned_html_links) 71 | 72 | def test__retrieve_html_from_url_sub_page(self): 73 | ad_site_crawler = XeGrAdSiteCrawler(stop_words=[]) 74 | # Load the html file 75 | logger.info("Loading html file..") 76 | with open(self.html_file_with_email_path_1, 'r') as html_f: 77 | html_file_email_1 = html_f.read() 78 | # Retrieve the html from the local server 79 | logger.info("Calling _retrieve_html_from_url()..") 80 | returned_html_links = ad_site_crawler._retrieve_html_from_url( 81 | '{base_url}' 82 | '{email_page}'.format(base_url=self.base_url, 83 | email_page=self.html_sub_links[0])) 84 | # Check if the correct html was loaded 85 | self.assertEqual(html_file_email_1, returned_html_links) 86 | 87 | def test_get_new_ads(self): 88 | ad_site_crawler = XeGrAdSiteCrawler(stop_words=self.stop_words, 89 | ad_site_url=self.base_url, 90 | anchor_class_name='highlight') 91 | # Retrieve the html from the local server 92 | logger.info("Calling get_new_ads()..") 93 | returned_ads = list( 94 | ad_site_crawler.get_new_ads(lookup_url='{base_url}/search?{lookup_params}' 95 | .format(base_url=self.base_url, lookup_params=self.lookup_params), 96 | ads_checked=[self.base_url + self.html_sub_links[1]], 97 | crawl_interval=1)) 98 | # Check if the correct html was loaded 99 | expected_ads = [('{base_url}{sublink}'.format(base_url=self.base_url, 100 | sublink=self.html_sub_links[2]), 101 | None), 102 | ('{base_url}{sublink}'.format(base_url=self.base_url, 103 | sublink=self.html_sub_links[3]), 104 | 'epharmacy137@gmail.com'), 105 | ] 106 | self.assertListEqual(sorted(expected_ads, key=lambda x: x[0]), 107 | sorted(returned_ads, key=lambda x: x[0])) 108 | 109 | @classmethod 110 | def init_local_server(cls, port: int = 8111) -> socketserver.TCPServer: 111 | class MyHttpRequestHandler(http.server.SimpleHTTPRequestHandler): 112 | def do_GET(self): 113 | if self.path == '/search?{lookup_params}'.format(lookup_params=cls.lookup_params): 114 | self.path = cls.html_file_with_links_path 115 | elif self.path == cls.html_sub_links[0]: 116 | self.path = cls.html_file_with_email_path_1 117 | elif self.path == cls.html_sub_links[1]: 118 | self.path = cls.html_file_with_email_path_2 119 | elif self.path == cls.html_sub_links[2]: 120 | self.path = cls.html_file_with_email_path_3 121 | elif self.path == cls.html_sub_links[3]: 122 | self.path = cls.html_file_with_email_path_4 123 | logger.info("Local server requested path: %s" % self.path) 124 | return http.server.SimpleHTTPRequestHandler.do_GET(self) 125 | 126 | return socketserver.TCPServer(("", port), MyHttpRequestHandler) 127 | 128 | @staticmethod 129 | def _setup_log() -> None: 130 | # noinspection PyArgumentList 131 | logging.basicConfig(level=logging.DEBUG, 132 | format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', 133 | datefmt='%Y-%m-%d %H:%M:%S', 134 | handlers=[logging.StreamHandler() 135 | ] 136 | ) 137 | 138 | def setUp(self) -> None: 139 | pass 140 | 141 | def tearDown(self) -> None: 142 | pass 143 | 144 | @classmethod 145 | def setUpClass(cls): 146 | cls._setup_log() 147 | cls.html_file_with_links_path = os.path.join(cls.test_data_path, 'file_with_links.html') 148 | cls.html_file_with_email_path_1 = os.path.join(cls.test_data_path, 'file_with_email_1.html') 149 | cls.html_file_with_email_path_2 = os.path.join(cls.test_data_path, 'file_with_email_2.html') 150 | cls.html_file_with_email_path_3 = os.path.join(cls.test_data_path, 'file_with_email_3.html') 151 | cls.html_file_with_email_path_4 = os.path.join(cls.test_data_path, 'file_with_email_4.html') 152 | cls.html_sub_links = [urllib.parse.quote(link) for link in 153 | ['/jobs/programmatistes-mhxanikoi-h-y|ad-96230841.html', 154 | '/jobs/programmatistes-mhxanikoi-h-y|ad-659824116.html', 155 | '/jobs/programmatistes-mhxanikoi-h-y|ad-94456892.html', 156 | '/jobs/programmatistes-mhxanikoi-h-y|ad-579027979.html']] 157 | # Server the html file from local server 158 | cls.base_url = 'http://localhost:{port}'.format(port=cls.PORT) 159 | logger.info("Serving html file to local server. Base: {base_url}" 160 | .format(base_url=cls.base_url, lookup_params=cls.lookup_params)) 161 | cls.httpd = cls.init_local_server(port=cls.PORT) 162 | server_thread = threading.Thread(target=cls.httpd.serve_forever) 163 | server_thread.start() 164 | 165 | @classmethod 166 | def tearDownClass(cls): 167 | # try: 168 | # import time 169 | # while True: 170 | # time.sleep(5) 171 | # except KeyboardInterrupt: 172 | # logger.info("Shutting down server..") 173 | cls.httpd.shutdown() 174 | cls.httpd.server_close() 175 | 176 | 177 | if __name__ == '__main__': 178 | unittest.main() 179 | -------------------------------------------------------------------------------- /configuration/configuration.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from typing import Dict, List, Tuple, Union 4 | import json 5 | import _io 6 | from io import StringIO, TextIOWrapper 7 | import re 8 | import yaml 9 | from jsonschema import validate as validate_json_schema 10 | 11 | logger = logging.getLogger('Configuration') 12 | 13 | 14 | class Configuration: 15 | __slots__ = ('config', 'config_path', 'datastore', 'cloudstore', 'email_app', 'tag', 16 | 'check_interval', 'crawl_interval', 'anchor_class_name', 'lookup_url', 'test_mode') 17 | 18 | config: Dict 19 | config_path: str 20 | datastore: Dict 21 | cloudstore: Dict 22 | email_app: Dict 23 | lookup_url: str 24 | check_interval: int 25 | crawl_interval: int 26 | anchor_class_name: str 27 | tag: str 28 | test_mode: bool 29 | config_attributes: List = [] 30 | env_variable_tag: str = '!ENV' 31 | env_variable_pattern: str = r'.*?\${(\w+)}.*?' # ${var} 32 | 33 | def __init__(self, config_src: Union[TextIOWrapper, StringIO, str], config_schema_path: str = 'yml_schema.json'): 34 | """ 35 | Tha basic constructor. Creates a new instance of a MySQL Datastore using the specified credentials 36 | 37 | :param config_src: 38 | :param config_schema_path: 39 | """ 40 | 41 | # Load the predefined schema of the configuration 42 | configuration_schema = self.load_configuration_schema(config_schema_path=config_schema_path) 43 | # Load the configuration 44 | self.config, self.config_path = self.load_yml(config_src=config_src, 45 | env_tag=self.env_variable_tag, 46 | env_pattern=self.env_variable_pattern) 47 | # Fix datatype and set default values 48 | if 'test_mode' in self.config.keys(): 49 | if isinstance(self.config['test_mode'], str): 50 | self.config['test_mode'] = False if self.config['test_mode'].lower() == 'false' else True 51 | self.test_mode = self.config['test_mode'] 52 | else: 53 | self.test_mode = True 54 | if 'check_interval' in self.config.keys(): 55 | self.config['check_interval'] = int(self.config['check_interval']) 56 | self.check_interval = self.config['check_interval'] 57 | else: 58 | self.check_interval = 120 59 | if 'crawl_interval' in self.config.keys(): 60 | self.config['crawl_interval'] = int(self.config['crawl_interval']) 61 | self.crawl_interval = self.config['crawl_interval'] 62 | else: 63 | self.crawl_interval = 15 64 | if 'anchor_class_name' in self.config.keys(): 65 | self.anchor_class_name = self.config['anchor_class_name'] 66 | else: 67 | self.anchor_class_name = "highlight" 68 | logger.debug("Loaded config: %s" % self.config) 69 | # Validate the config 70 | validate_json_schema(self.config, configuration_schema) 71 | # Set the config properties as instance attributes 72 | self.lookup_url = self.config['lookup_url'] 73 | self.tag = self.config['tag'] 74 | all_config_attributes = ('datastore', 'cloudstore', 'email_app') 75 | for config_attribute in all_config_attributes: 76 | if config_attribute in self.config.keys(): 77 | setattr(self, config_attribute, self.config[config_attribute]) 78 | self.config_attributes.append(config_attribute) 79 | else: 80 | setattr(self, config_attribute, None) 81 | 82 | @staticmethod 83 | def load_configuration_schema(config_schema_path: str) -> Dict: 84 | with open('/'.join([os.path.dirname(os.path.realpath(__file__)), config_schema_path])) as f: 85 | configuration_schema = json.load(f) 86 | return configuration_schema 87 | 88 | @staticmethod 89 | def load_yml(config_src: Union[TextIOWrapper, StringIO, str], env_tag: str, env_pattern: str) -> Tuple[Dict, str]: 90 | pattern = re.compile(env_pattern) 91 | loader = yaml.SafeLoader 92 | loader.add_implicit_resolver(env_tag, pattern, None) 93 | 94 | def constructor_env_variables(loader, node): 95 | """ 96 | Extracts the environment variable from the node's value 97 | :param yaml.Loader loader: the yaml loader 98 | :param node: the current node in the yaml 99 | :return: the parsed string that contains the value of the environment 100 | variable 101 | """ 102 | value = loader.construct_scalar(node) 103 | match = pattern.findall(value) # to find all env variables in line 104 | if match: 105 | full_value = value 106 | for g in match: 107 | full_value = full_value.replace( 108 | f'${{{g}}}', os.environ.get(g, g) 109 | ) 110 | return full_value 111 | return value 112 | 113 | loader.add_constructor(env_tag, constructor_env_variables) 114 | 115 | if isinstance(config_src, TextIOWrapper): 116 | logging.debug("Loading yaml from TextIOWrapper") 117 | config = yaml.load(config_src, Loader=loader) 118 | config_path = config_src.name 119 | elif isinstance(config_src, StringIO): 120 | logging.debug("Loading yaml from StringIO") 121 | config = yaml.load(config_src, Loader=loader) 122 | config_path = "StringIO" 123 | elif isinstance(config_src, str): 124 | logging.debug("Loading yaml from path") 125 | with open(config_src) as f: 126 | config = yaml.load(f, Loader=loader) 127 | config_path = config_src 128 | else: 129 | raise TypeError('Config file must be TextIOWrapper or path to a file') 130 | return config, config_path 131 | 132 | def get_datastores(self) -> List: 133 | if 'datastore' in self.config_attributes: 134 | return [sub_config['config'] for sub_config in self.datastore] 135 | else: 136 | raise ConfigurationError('Config property datastore not set!') 137 | 138 | def get_cloudstores(self) -> List: 139 | if 'cloudstore' in self.config_attributes: 140 | return [sub_config['config'] for sub_config in self.cloudstore] 141 | else: 142 | raise ConfigurationError('Config property cloudstore not set!') 143 | 144 | def get_email_apps(self) -> List: 145 | if 'email_app' in self.config_attributes: 146 | return [sub_config['config'] for sub_config in self.email_app] 147 | else: 148 | raise ConfigurationError('Config property email_app not set!') 149 | 150 | def to_yml(self, fn: Union[str, _io.TextIOWrapper]) -> None: 151 | """ 152 | Writes the configuration to a stream. For example a file. 153 | 154 | :param fn: 155 | :param include_tag: 156 | :return: None 157 | """ 158 | 159 | dict_conf = dict() 160 | for config_attribute in self.config_attributes: 161 | dict_conf[config_attribute] = getattr(self, config_attribute) 162 | 163 | dict_conf['lookup_url'] = self.lookup_url 164 | dict_conf['tag'] = self.tag 165 | if 'check_interval' in self.config.keys(): 166 | dict_conf['check_interval'] = self.check_interval 167 | if 'crawl_interval' in self.config.keys(): 168 | dict_conf['crawl_interval'] = self.crawl_interval 169 | if 'test_mode' in self.config.keys(): 170 | dict_conf['test_mode'] = self.test_mode 171 | if 'anchor_class_name' in self.config.keys(): 172 | dict_conf['anchor_class_name'] = self.anchor_class_name 173 | 174 | if isinstance(fn, str): 175 | with open(fn, 'w') as f: 176 | yaml.dump(dict_conf, f, default_flow_style=False) 177 | elif isinstance(fn, _io.TextIOWrapper): 178 | yaml.dump(dict_conf, fn, default_flow_style=False) 179 | else: 180 | raise TypeError('Expected str or _io.TextIOWrapper not %s' % (type(fn))) 181 | 182 | to_yaml = to_yml 183 | 184 | def to_json(self) -> Dict: 185 | dict_conf = dict() 186 | for config_attribute in self.config_attributes: 187 | dict_conf[config_attribute] = getattr(self, config_attribute) 188 | 189 | dict_conf['lookup_url'] = self.lookup_url 190 | dict_conf['tag'] = self.tag 191 | if 'check_interval' in self.config.keys(): 192 | dict_conf['check_interval'] = self.check_interval 193 | if 'crawl_interval' in self.config.keys(): 194 | dict_conf['crawl_interval'] = self.crawl_interval 195 | if 'test_mode' in self.config.keys(): 196 | dict_conf['test_mode'] = self.test_mode 197 | if 'anchor_class_name' in self.config.keys(): 198 | dict_conf['anchor_class_name'] = self.anchor_class_name 199 | 200 | return dict_conf 201 | 202 | def __getitem__(self, item): 203 | return self.__getattribute__(item) 204 | 205 | 206 | class ConfigurationError(Exception): 207 | def __init__(self, message): 208 | # Call the base class constructor with the parameters it needs 209 | super().__init__(message) 210 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | import logging 3 | import logging.handlers 4 | import argparse 5 | import time 6 | import datetime 7 | from typing import List, Dict, Tuple 8 | import os 9 | import arrow 10 | 11 | from configuration.configuration import Configuration 12 | from datastore.job_bot_mysql_datastore import JobBotMySqlDatastore 13 | from cloudstore.job_bot_dropbox_cloudstore import JobBotDropboxCloudstore 14 | from email_app.gmail_email_app import GmailEmailApp 15 | from ad_site_crawler.xegr_ad_site_crawler import XeGrAdSiteCrawler 16 | 17 | logger = logging.getLogger('Main') 18 | 19 | 20 | def _setup_log(log_path: str = 'logs/output.log', debug: bool = False) -> None: 21 | log_path = log_path.split(os.sep) 22 | if len(log_path) > 1: 23 | 24 | try: 25 | os.makedirs((os.sep.join(log_path[:-1]))) 26 | except FileExistsError: 27 | pass 28 | log_filename = os.sep.join(log_path) 29 | # noinspection PyArgumentList 30 | logging.basicConfig(level=logging.INFO if debug is not True else logging.DEBUG, 31 | format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', 32 | datefmt='%Y-%m-%d %H:%M:%S', 33 | handlers=[ 34 | logging.handlers.TimedRotatingFileHandler(log_filename, when='midnight', interval=1), 35 | logging.StreamHandler() 36 | ] 37 | ) 38 | 39 | 40 | def _argparser() -> argparse.Namespace: 41 | parser = argparse.ArgumentParser( 42 | description='A bot that automatically sends emails to new ads posted in the specified xe.gr search page.', 43 | add_help=False) 44 | # Required Args 45 | required_arguments = parser.add_argument_group('required arguments') 46 | config_file_params = { 47 | 'type': argparse.FileType('r'), 48 | 'required': True, 49 | 'help': "The configuration yml file" 50 | } 51 | required_arguments.add_argument('-m', '--run-mode', 52 | choices=['crawl_and_send', 'list_emails', 'remove_email', 'upload_files', 53 | 'create_table'], 54 | required=True, 55 | default='crawl_and_send') 56 | required_arguments.add_argument('-c', '--config-file', **config_file_params) 57 | required_arguments.add_argument('-l', '--log', help="Name of the output log file") 58 | # Optional args 59 | optional = parser.add_argument_group('Optional Arguments') 60 | optional.add_argument('--email-id', help='The id of the email you want to be deleted') 61 | optional.add_argument('-d', '--debug', action='store_true', help='Enables the debug log messages') 62 | optional.add_argument("-h", "--help", action="help", help="Show this help message and exit") 63 | # Parse args 64 | parsed_args = parser.parse_args() 65 | if parsed_args.email_id is None and parsed_args.run_mode == 'remove_email': 66 | raise argparse.ArgumentTypeError('--run-mode = remove_email requires --email-id to be set!') 67 | return parsed_args 68 | 69 | 70 | def init_main() -> Tuple[argparse.Namespace, Configuration]: 71 | args = _argparser() 72 | _setup_log(args.log, args.debug) 73 | logger.info("Starting in run mode: {0}".format(args.run_mode)) 74 | # Load the configuration 75 | configuration = Configuration(config_src=args.config_file) 76 | 77 | return args, configuration 78 | 79 | 80 | def show_ads_checked(ads: List[Tuple]) -> None: 81 | """ 82 | Pretty prints the list of emails sent. 83 | 84 | :params ads: 85 | """ 86 | 87 | print("{}".format("_" * 146)) 88 | print("|{:-^6}|{:-^80}|{:-^40}|{:-^15}|".format('ID', 'Link', 'Email', 'Sent On')) 89 | for ad in ads: 90 | print("|{:^6}|{:^80}|{:^40}|{:^15}|".format(ad[0], ad[1], str(ad[2]), arrow.get(ad[3]).humanize())) 91 | print("|{}|".format("_" * 144)) 92 | 93 | 94 | def upload_files_to_cloudstore(cloud_store: JobBotDropboxCloudstore): 95 | cloud_store.update_stop_words_data() 96 | cloud_store.update_application_to_send_email_data() 97 | cloud_store.update_inform_should_call_email_data() 98 | cloud_store.update_inform_success_email_data() 99 | cloud_store.upload_attachments() 100 | 101 | 102 | def crawl_and_send_loop(lookup_url: str, check_interval: int, crawl_interval: int, anchor_class_name: str, 103 | data_store: JobBotMySqlDatastore, 104 | cloud_store: JobBotDropboxCloudstore, 105 | email_app: GmailEmailApp) -> None: 106 | """ 107 | The main loop. 108 | Crawls the ad site for new ads and sends emails where applicable and informs the applicant. 109 | 110 | :params lookup_url: 111 | :params check_interval: 112 | :params data_store: 113 | :params cloud_store: 114 | :params gmail_app: 115 | """ 116 | 117 | ad_site_crawler = XeGrAdSiteCrawler(stop_words=cloud_store.get_stop_words_data(), 118 | anchor_class_name=anchor_class_name) 119 | attachments_local_paths = [os.path.join(cloud_store.local_files_folder, attachment_name) 120 | for attachment_name in cloud_store.attachments_names] 121 | # Get the email_data, the attachments and the stop_words list from the cloudstore 122 | cloud_store.download_attachments() 123 | application_to_send_subject, application_to_send_html = cloud_store.get_application_to_send_email_data() 124 | inform_should_call_subject, inform_should_call_html = cloud_store.get_inform_should_call_email_data() 125 | inform_success_subject, inform_success_html = cloud_store.get_inform_success_email_data() 126 | 127 | links_checked = [row[0] for row in data_store.get_applications_sent(columns='link')] 128 | logger.info("Waiting for new ads..") 129 | while True: 130 | new_ads = list(ad_site_crawler.get_new_ads(lookup_url=lookup_url, ads_checked=links_checked, 131 | crawl_interval=crawl_interval)) 132 | 133 | if len(new_ads) > 0: 134 | links_checked = [row[0] for row in data_store.get_applications_sent(columns='link')] 135 | emails_checked = [row[0] for row in data_store.get_applications_sent(columns='email')] 136 | for link, email in new_ads: 137 | if link not in links_checked and (email not in emails_checked or email is None): 138 | if email is None: 139 | # Email applicant to inform him that he should call manually 140 | logger.info("Link ({}) has no email. Inform the applicant.".format(link)) 141 | email_app.send_email(subject=inform_should_call_subject, 142 | html=inform_should_call_html.format(link=link), 143 | to=[email_app.get_self_email()]) 144 | else: 145 | # Send application after 1 minute (don't be too cocky) 146 | time.sleep(60) 147 | logger.info("Sending email to: {}. Ad Link: {}".format(email, link)) 148 | email_app.send_email(subject=application_to_send_subject, 149 | html=application_to_send_html.format(link), 150 | to=[email], 151 | attachments=attachments_local_paths) 152 | 153 | # Inform applicant that an application has been sent successfully 154 | email_app.send_email(subject=inform_success_subject, 155 | html=inform_success_html.format(email=email, link=link), 156 | to=[email_app.get_self_email()]) 157 | 158 | email_info = {"link": link, "email": email, "sent_on": datetime.datetime.utcnow().isoformat()} 159 | data_store.save_sent_application(email_info) 160 | logger.info("Waiting for new ads..") 161 | 162 | # Look for new ads every 2 minutes 163 | logger.debug("Sleeping for {check_interval} seconds..".format(check_interval=check_interval)) 164 | time.sleep(check_interval) 165 | 166 | 167 | def main(): 168 | """ 169 | :Example: 170 | python main.py [-m crawl_and_send] 171 | -c confs/template_conf.yml 172 | -l logs/output.log 173 | """ 174 | 175 | # Initializing 176 | args, configuration = init_main() 177 | 178 | # Start in the specified mode 179 | if args.run_mode == 'list_emails': 180 | data_store = JobBotMySqlDatastore(config=configuration.get_datastores()[0]) 181 | show_ads_checked(ads=data_store.get_applications_sent()) 182 | elif args.run_mode == 'remove_email': 183 | data_store = JobBotMySqlDatastore(config=configuration.get_datastores()[0]) 184 | data_store.remove_ad(email_id=args.email_id) 185 | elif args.run_mode == 'upload_files': 186 | upload_files_to_cloudstore(cloud_store=JobBotDropboxCloudstore(config=configuration.get_cloudstores()[0])) 187 | elif args.run_mode == 'create_table': 188 | data_store = JobBotMySqlDatastore(config=configuration.get_datastores()[0]) 189 | data_store.create_applications_sent_table() 190 | elif args.run_mode == 'crawl_and_send': 191 | crawl_and_send_loop(lookup_url=configuration.lookup_url, 192 | check_interval=configuration.check_interval, 193 | crawl_interval=configuration.crawl_interval, 194 | anchor_class_name=configuration.anchor_class_name, 195 | data_store=JobBotMySqlDatastore(config=configuration.get_datastores()[0]), 196 | cloud_store=JobBotDropboxCloudstore(config=configuration.get_cloudstores()[0]), 197 | email_app=GmailEmailApp(config=configuration.get_email_apps()[0], 198 | test_mode=configuration.test_mode)) 199 | else: 200 | logger.error('Incorrect run_mode specified!') 201 | raise argparse.ArgumentTypeError('Incorrect run_mode specified!') 202 | 203 | 204 | if __name__ == '__main__': 205 | try: 206 | main() 207 | except Exception as e: 208 | logging.error(str(e) + '\n' + str(traceback.format_exc())) 209 | raise e 210 | -------------------------------------------------------------------------------- /tests/test_job_bot_dropbox_cloudstore.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import random 4 | import string 5 | import logging 6 | import copy 7 | import ast 8 | from shutil import copyfile 9 | from typing import Tuple 10 | from dropbox.exceptions import BadInputError 11 | 12 | from configuration.configuration import Configuration 13 | from cloudstore.job_bot_dropbox_cloudstore import JobBotDropboxCloudstore 14 | 15 | logger = logging.getLogger('TestJobBotDropboxCloudstore') 16 | 17 | 18 | # TODO: Fix ResourceWarning: unclosed file <_io.BufferedReader name='test_data/test_job_bot_dropbox_cloudstore/sample.txt'> 19 | # open(attachment_path, 'rb').read()) 20 | class TestJobBotDropboxCloudstore(unittest.TestCase): 21 | __slots__ = ('configuration', 'file_name', 'remote_tests_folder') 22 | 23 | configuration: Configuration 24 | file_name: str 25 | remote_tests_folder: str 26 | test_data_path: str = os.path.join('test_data', 'test_job_bot_dropbox_cloudstore') 27 | 28 | def test_init(self): 29 | req_only_conf = Configuration( 30 | config_src=os.path.join(self.test_data_path, 'template_conf_required_args_only.yml')) 31 | 32 | cloud_store = JobBotDropboxCloudstore(config=self.configuration.get_cloudstores()[0], 33 | remote_files_folder=self.remote_tests_folder) 34 | boolean_attributes = [True if len(cloud_store.attachments_names) > 0 else False, 35 | cloud_store._update_stop_words, 36 | cloud_store._update_application_to_send_email, 37 | cloud_store._update_inform_success_email, 38 | cloud_store._update_inform_should_call_email] 39 | self.assertTrue(True, all(boolean_attributes)) 40 | req_only_cloud_store = JobBotDropboxCloudstore(config=req_only_conf.get_cloudstores()[0], 41 | remote_files_folder=self.remote_tests_folder) 42 | req_only_boolean_attributes = [True if len(req_only_cloud_store.attachments_names) == 0 else False, 43 | not req_only_cloud_store._update_stop_words, 44 | not req_only_cloud_store._update_application_to_send_email, 45 | not req_only_cloud_store._update_inform_success_email, 46 | not req_only_cloud_store._update_inform_should_call_email] 47 | self.assertTrue(True, all(req_only_boolean_attributes)) 48 | 49 | def test_upload_download_attachment(self): 50 | cloud_store = JobBotDropboxCloudstore(config=self.configuration.get_cloudstores()[0], 51 | remote_files_folder=self.remote_tests_folder) 52 | # Copy bck to actual file 53 | attachment_path = os.path.join(cloud_store.local_files_folder, 54 | cloud_store.attachments_names[0]) 55 | bck_attachment_path = os.path.join(cloud_store.local_files_folder, 56 | 'bck_' + cloud_store.attachments_names[0]) 57 | copyfile(bck_attachment_path, attachment_path) 58 | # Upload attachments 59 | logger.info('Uploading attachment..') 60 | cloud_store.upload_attachments() 61 | # Check if it was uploaded 62 | self.assertIn(cloud_store.attachments_names[0], cloud_store.ls(self.remote_tests_folder).keys()) 63 | # Rename the old file before downloading it 64 | logger.info('Renaming the old file before downloading it..') 65 | os.rename(attachment_path, os.path.join(self.test_data_path, self.file_name)) 66 | # Download it 67 | logger.info('Downloading attachment..') 68 | cloud_store.download_attachments() 69 | # Compare contents of downloaded file with the original 70 | self.assertEqual(open(os.path.join(self.test_data_path, self.file_name), 'rb').read(), 71 | open(attachment_path, 'rb').read()) 72 | # Delete the attachment 73 | os.remove(attachment_path) 74 | 75 | def test_update_get_stop_words_data(self): 76 | cloud_store = JobBotDropboxCloudstore(config=self.configuration.get_cloudstores()[0], 77 | remote_files_folder=self.remote_tests_folder) 78 | # Copy bck to to actual file 79 | bck_stop_words_path = os.path.join(cloud_store.local_files_folder, 80 | 'bck_stop_words.txt') 81 | stop_words_path = os.path.join(cloud_store.local_files_folder, 82 | 'stop_words.txt') 83 | copyfile(bck_stop_words_path, stop_words_path) 84 | # Upload stop_words 85 | logger.info('Uploading stop_words..') 86 | cloud_store.update_stop_words_data() 87 | # Check if it was uploaded 88 | self.assertIn('stop_words.txt', cloud_store.ls(self.remote_tests_folder).keys()) 89 | # Rename the old file before downloading it 90 | logger.info('Renaming the old file before downloading it..') 91 | os.rename(os.path.join(self.test_data_path, 'stop_words.txt'), 92 | os.path.join(self.test_data_path, self.file_name)) 93 | # Download it 94 | logger.info('Downloading stop_words..') 95 | stop_words_downloaded = cloud_store.get_stop_words_data() 96 | stop_words_downloaded = "['" + "', '".join(stop_words_downloaded) + "']" 97 | # Compare contents of downloaded file with the original 98 | self.assertEqual(open(os.path.join(self.test_data_path, self.file_name), 'rb').read(), 99 | bytes(stop_words_downloaded, encoding='utf8')) 100 | 101 | def test_update_get_email_data(self): 102 | cloud_store = JobBotDropboxCloudstore(config=self.configuration.get_cloudstores()[0], 103 | remote_files_folder=self.remote_tests_folder) 104 | email_types = (('application_to_send', cloud_store.get_application_to_send_email_data, 105 | cloud_store.update_application_to_send_email_data), 106 | ('inform_should_call', cloud_store.get_inform_should_call_email_data, 107 | cloud_store.update_inform_should_call_email_data), 108 | ('inform_success', cloud_store.get_inform_success_email_data, 109 | cloud_store.update_inform_success_email_data)) 110 | for email_type, get_func, update_func in email_types: 111 | # Copy bcks to to actual files 112 | bck_subject_path = os.path.join(cloud_store.local_files_folder, 113 | 'bck_subject.txt') 114 | bck_html_path = os.path.join(cloud_store.local_files_folder, 115 | 'bck_body.html') 116 | current_subject_file = '{type}_subject.txt'.format(type=email_type) 117 | current_html_file = '{type}_body.html'.format(type=email_type) 118 | subject_path = os.path.join(cloud_store.local_files_folder, 119 | current_subject_file) 120 | html_path = os.path.join(cloud_store.local_files_folder, 121 | current_html_file) 122 | copyfile(bck_subject_path, subject_path) 123 | copyfile(bck_html_path, html_path) 124 | # Upload stop_words 125 | logger.info('Uploading %s email data..' % email_type) 126 | update_func() 127 | # Check if it was uploaded 128 | self.assertIn(current_subject_file, cloud_store.ls(self.remote_tests_folder).keys()) 129 | self.assertIn(current_html_file, cloud_store.ls(self.remote_tests_folder).keys()) 130 | # Rename the old files before downloading them 131 | logger.info('Renaming the old file before downloading it..') 132 | copied_subject_file = os.path.join(self.test_data_path, 133 | self.file_name + '_{type}_subject.txt'.format(type=email_type)) 134 | copied_html_file = os.path.join(self.test_data_path, 135 | self.file_name + '_{type}_body.html'.format(type=email_type)) 136 | os.rename(os.path.join(self.test_data_path, current_subject_file), copied_subject_file) 137 | os.rename(os.path.join(self.test_data_path, current_html_file), copied_html_file) 138 | # Download it 139 | logger.info('Downloading {type} email data..'.format(type=email_type)) 140 | actual_subject, actual_html = get_func() 141 | logger.debug("Received: %s and %s" % (actual_subject, actual_html)) 142 | # Compare contents of downloaded file with the original 143 | with open(copied_subject_file, 'rb') as f: 144 | self.assertEqual(f.read(), bytes(actual_subject, encoding='utf-8')) 145 | with open(copied_html_file, 'rb') as f: 146 | self.assertEqual(f.read(), bytes(actual_html, encoding='utf-8')) 147 | logger.info("Clearing file: %s" % copied_subject_file) 148 | os.remove(copied_subject_file) 149 | logger.info("Clearing file: %s" % copied_html_file) 150 | os.remove(copied_html_file) 151 | 152 | @staticmethod 153 | def _generate_random_filename_and_contents() -> Tuple[str, str]: 154 | letters = string.ascii_lowercase 155 | file_name = ''.join(random.choice(letters) for _ in range(10)) + '.txt' 156 | contents = ''.join(random.choice(letters) for _ in range(20)) 157 | return file_name, contents 158 | 159 | @staticmethod 160 | def _setup_log() -> None: 161 | # noinspection PyArgumentList 162 | logging.basicConfig(level=logging.DEBUG, 163 | format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', 164 | datefmt='%Y-%m-%d %H:%M:%S', 165 | handlers=[logging.StreamHandler() 166 | ] 167 | ) 168 | 169 | def setUp(self) -> None: 170 | self.file_name, contents = self._generate_random_filename_and_contents() 171 | with open(os.path.join(self.test_data_path, self.file_name), 'a') as f: 172 | f.write(contents) 173 | 174 | def tearDown(self) -> None: 175 | os.remove(os.path.join(self.test_data_path, self.file_name)) 176 | 177 | @classmethod 178 | def setUpClass(cls): 179 | cls._setup_log() 180 | if "DROPBOX_API_KEY" not in os.environ: 181 | logger.error('DROPBOX_API_KEY env variable is not set!') 182 | raise Exception('DROPBOX_API_KEY env variable is not set!') 183 | logger.info('Loading Configuration..') 184 | cls.configuration = Configuration(config_src=os.path.join(cls.test_data_path, 'template_conf_all_args.yml')) 185 | cls.remote_tests_folder = '/job_bot_tests' 186 | cloud_store = JobBotDropboxCloudstore(config=cls.configuration.get_cloudstores()[0]) 187 | cloud_store.delete_file(cls.remote_tests_folder) 188 | 189 | @classmethod 190 | def tearDownClass(cls): 191 | cloud_store = JobBotDropboxCloudstore(config=cls.configuration.get_cloudstores()[0]) 192 | cloud_store.delete_file(cls.remote_tests_folder) 193 | 194 | 195 | if __name__ == '__main__': 196 | unittest.main() 197 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Auto Apply Bot 2 | [![CircleCI](https://circleci.com/gh/drkostas/AutoApplyBot/tree/master.svg?style=svg)](https://circleci.com/gh/drkostas/AutoApplyBot/tree/master) 3 | [![GitHub license](https://img.shields.io/badge/license-GNU-blue.svg)](https://raw.githubusercontent.com/drkostas/AutoApplyBot/master/LICENSE) 4 | 5 | ## Table of Contents 6 | 7 | + [About](#about) 8 | + [Getting Started](#getting_started) 9 | + [Prerequisites](#prerequisites) 10 | + [Environment Variables](#env_variables) 11 | + [Data Files](#data_files) 12 | + [Installing, Testing, Building](#installing) 13 | + [Available Make Commands](#check_make_commamnds) 14 | + [Clean Previous Builds](#clean_previous) 15 | + [Venv and Requirements](#venv_requirements) 16 | + [Run the tests](#tests) 17 | + [Build Locally](#build_locally) 18 | + [Running locally](#run_locally) 19 | + [Configuration](#configuration) 20 | + [Execution Options](#execution_options) 21 | + [Deployment](#deployment) 22 | + [Continuous Ιntegration](#ci) 23 | + [Built With](#built_with) 24 | + [License](#license) 25 | + [Acknowledgments](#acknowledgments) 26 | 27 | ## About 28 | 29 | A bot that automatically sends emails to new ads posted in any desired xe.gr search url. 30 | 31 | In just a few minutes of configuring until it suits your needs, it can easily be deployed and start sending your 32 | specified emails to every new ad that gets posted in the search url you select within xe.gr. 33 | 34 | With a little programming, you can also modify the [XeGrAdSiteCrawler class](ad_site_crawler/xegr_ad_site_crawler.py) 35 | and make it support other advertisement sites too. Feel free to fork. 36 | 37 | ## Getting Started 38 | 39 | These instructions will get you a copy of the project up and running on your local machine for development 40 | and testing purposes. See deployment for notes on how to deploy the project on a live system. 41 | 42 | ### Prerequisites 43 | 44 | You need to have a machine with Python > 3.6 and any Bash based shell (e.g. zsh) installed. 45 | 46 | ``` 47 | $ python3.6 -V 48 | Python 3.6.9 49 | 50 | echo $SHELL 51 | /usr/bin/zsh 52 | ``` 53 | 54 | You will also need to setup the following: 55 | - Gmail: An application-specific password for your Google account. 56 | [Reference 1](https://support.google.com/mail/?p=InvalidSecondFactor), 57 | [Reference 2](https://security.google.com/settings/security/apppasswords) 58 | - Dropbox: An Api key for your Dropbox account. 59 | [Reference 1](http://99rabbits.com/get-dropbox-access-token/), 60 | [Reference 2](https://dropbox.tech/developers/generate-an-access-token-for-your-own-account) 61 | - MySql: If you haven't any, you can create a free one on Amazon RDS. 62 | [Reference 1](https://aws.amazon.com/rds/free/), 63 | [Reference 2](https://bigdataenthusiast.wordpress.com/2016/03/05/aws-rds-instance-setup-oracle-db-on-cloud-free-tier/) 64 | 65 | ### Set the required environment variables 66 | 67 | In order to run the [main.py](main.py) or the tests you will need to set the following 68 | environmental variables in your system: 69 | 70 | ```bash 71 | DROPBOX_API_KEY= 72 | MYSQL_HOST= 73 | MYSQL_USERNAME= 74 | MYSQL_PASSWORD= 75 | MYSQL_DB_NAME= 76 | EMAIL_ADDRESS= 77 | GMAIL_API_KEY= 78 | CHECK_INTERVAL= 79 | CRAWL_INTERVAL= 80 | TEST_MODE= 81 | LOOKUP_URL= 82 | ``` 83 | 84 | - LOOKUP_URL (str): The url that matches your desired search results. You can copy it straight from your browser. 85 | - CHECK_INTERVAL (int) : The seconds to wait before each check (for new ads). 86 | - CRAWL_INTERVAL (int) : The seconds to wait before each crawl (for the discovering of sublinks). 87 | - TEST_MODE (bool) : If enabled, every email will be sent to you instead of the discovered email addresses. 88 | 89 | ### Modify the files in the data folder 90 | 91 | Before starting, you should modify the emails that are going to be sent, the stop-words e.t.c. 92 | 93 | - [stop_words.txt](data/stop_words.txt): A list of words that you don't want to be present in the ads that the bot 94 | sends emails to. 95 | - [application_to_send_subject.txt](data/application_to_send_subject.txt): The subject of the email that is going to be sent 96 | to new ads. 97 | - [application_to_send_body.html](data/application_to_send_body.html): The html body of the email that is going to be sent 98 | to new ads. 99 | - [inform_success_subject.txt](data/inform_success_subject.txt): The subject of the email that is going to be sent 100 | to you when the bot successfully sends an email. 101 | - [inform_success_body.html](data/inform_success_body.html): The html body of the email that is going to be sent 102 | to you when the bot successfully sends an email. Make sure to use the {link} and {email} vars 103 | in order to include them in the email. 104 | - [inform_should_call.txt](data/inform_should_calll.txt): The subject of the email that is going to be sent 105 | to you when the bot couldn't find any email to a new ad, and requires manual action. 106 | - [inform_should_call_body.html](data/inform_should_calll_body.html): The html body of the email that is going to be sent 107 | to you when the bot couldn't find any email to a new ad, and requires manual action. Make sure to use the {link} var 108 | in order to include it in the email. 109 | - Attachments: Add any attachments you want to be included in the Ad Email and define 110 | their names in [xegr_jobs.yml](confs/xegr_jobs.yml) 111 | 112 | 113 | ## Installing, Testing, Building 114 | 115 | All the installation steps are being handled by the [Makefile](Makefile). 116 | 117 | If you don't want to go through the setup steps and finish the installation and run the tests, 118 | execute the following command: 119 | 120 | ```bash 121 | $ make install server=local 122 | ``` 123 | 124 | If you executed the previous command, you can skip through to the [Running locally](#run_locally) section. 125 | 126 | ### Check the available make commands 127 | 128 | ```bash 129 | $ make help 130 | 131 | ----------------------------------------------------------------------------------------------------------- 132 | DISPLAYING HELP 133 | ----------------------------------------------------------------------------------------------------------- 134 | make delete_venv 135 | Delete the current venv 136 | make create_venv 137 | Create a new venv for the specified python version 138 | make requirements 139 | Upgrade pip and install the requirements 140 | make run_tests 141 | Run all the tests from the specified folder 142 | make setup 143 | Call setup.py install 144 | make clean_pyc 145 | Clean all the pyc files 146 | make clean_build 147 | Clean all the build folders 148 | make clean 149 | Call delete_venv clean_pyc clean_build 150 | make install 151 | Call clean create_venv requirements run_tests setup 152 | make help 153 | Display this message 154 | ----------------------------------------------------------------------------------------------------------- 155 | ``` 156 | 157 | ### Clean any previous builds 158 | 159 | ```bash 160 | $ make clean server=local 161 | make delete_venv 162 | make[1]: Entering directory '/home/drkostas/Projects/AutoApplyBot' 163 | Deleting venv.. 164 | rm -rf venv 165 | make[1]: Leaving directory '/home/drkostas/Projects/AutoApplyBot' 166 | make clean_pyc 167 | make[1]: Entering directory '/home/drkostas/Projects/AutoApplyBot' 168 | Cleaning pyc files.. 169 | find . -name '*.pyc' -delete 170 | find . -name '*.pyo' -delete 171 | find . -name '*~' -delete 172 | make[1]: Leaving directory '/home/drkostas/Projects/AutoApplyBot' 173 | make clean_build 174 | make[1]: Entering directory '/home/drkostas/Projects/AutoApplyBot' 175 | Cleaning build directories.. 176 | rm --force --recursive build/ 177 | rm --force --recursive dist/ 178 | rm --force --recursive *.egg-info 179 | make[1]: Leaving directory '/home/drkostas/Projects/AutoApplyBot' 180 | 181 | ``` 182 | 183 | ### Create a new venv and install the requirements 184 | 185 | ```bash 186 | $ make create_venv server=local 187 | Creating venv.. 188 | python3.6 -m venv ./venv 189 | 190 | $ make requirements server=local 191 | Upgrading pip.. 192 | venv/bin/pip install --upgrade pip wheel setuptools 193 | Collecting pip 194 | ................. 195 | ``` 196 | 197 | 198 | 199 | ### Run the tests 200 | 201 | The tests are located in the `tests` folder. To run all of them, execute the following command: 202 | 203 | ```bash 204 | $ make run_tests server=local 205 | source venv/bin/activate && \ 206 | ................. 207 | ``` 208 | 209 | ### Build the project locally 210 | 211 | To build the project locally using the setup.py command, execute the following command: 212 | 213 | ```bash 214 | $ make setup server=local 215 | venv/bin/python setup.py install '--local' 216 | running install 217 | ................. 218 | ``` 219 | 220 | ## Running the code locally 221 | 222 | In order to run the code now, you will only need to change the yml file if you need to 223 | and run either the main or the created console script. 224 | 225 | ### Modifying the Configuration 226 | 227 | There is an already configured yml file under [xegr_jobs.yml](confs/xegr_jobs.yml) with the following structure: 228 | 229 | ```yaml 230 | tag: production 231 | lookup_url: !ENV ${LOOKUP_URL} 232 | check_interval: !ENV ${CHECK_INTERVAL} 233 | crawl_interval: !ENV ${CRAWL_INTERVAL} 234 | test_mode: !ENV ${TEST_MODE} 235 | cloudstore: 236 | - config: 237 | api_key: !ENV ${DROPBOX_API_KEY} 238 | local_files_folder: data 239 | attachments_names: 240 | - cv.pdf 241 | - cover_letter.pdf 242 | update_attachments: true 243 | update_stop_words: true 244 | update_application_to_send_email: true 245 | update_inform_success_email: true 246 | update_inform_should_call_email: true 247 | type: dropbox 248 | datastore: 249 | - config: 250 | hostname: !ENV ${MYSQL_HOST} 251 | username: !ENV ${MYSQL_USERNAME} 252 | password: !ENV ${MYSQL_PASSWORD} 253 | db_name: !ENV ${MYSQL_DB_NAME} 254 | port: 3306 255 | type: mysql 256 | email_app: 257 | - config: 258 | email_address: !ENV ${EMAIL_ADDRESS} 259 | api_key: !ENV ${GMAIL_API_KEY} 260 | type: gmail 261 | 262 | ``` 263 | 264 | The `!ENV` flag indicates that a environmental value follows. 265 | You can change the values/environmental var names as you wish. 266 | If a yaml variable name is changed/added/deleted, the corresponding changes should be reflected 267 | on the [Configuration class](configuration/configuration.py) and the [yml_schema.json](configuration/yml_schema.json) too. 268 | 269 | You can also modify each class's default options 270 | 271 | ### Execution Options 272 | 273 | First, make sure you are in the created virtual environment: 274 | 275 | ```bash 276 | $ source venv/bin/activate 277 | (venv) 278 | OneDrive/Projects/auto_apply_bot dev 279 | 280 | $ which python 281 | /home/drkostas/Projects/auto_apply_bot/venv/bin/python 282 | (venv) 283 | ``` 284 | 285 | If it's the first time you are running the code you may need to execute those 2 steps: 286 | - To create the required table in the Database run: 287 | 288 | `$ python main.py -m create_table -c confs/conf.yml -l logs/output.log` 289 | 290 | - To upload the files that are going to be used to Dropbox (after modifying them appropriately) 291 | run: 292 | 293 | `$ python main.py -m upload_files -c confs/conf.yml -l logs/output.log` 294 | 295 | Now, in order to run the code you can either call the `main.py` directly, or the `auto_apply_bot` console script. 296 | 297 | ```bash 298 | $ python main.py --help 299 | usage: main.py -m 300 | {crawl_and_send,list_emails,remove_email,upload_files,create_table} 301 | -c CONFIG_FILE [-l LOG] [--email-id EMAIL_ID] [-d] [-h] 302 | 303 | A bot that automatically sends emails to new ads posted in the specified xe.gr 304 | search page. 305 | 306 | required arguments: 307 | -m {crawl_and_send,list_emails,remove_email,upload_files,create_table}, --run-mode {crawl_and_send,list_emails,remove_email,upload_files,create_table} 308 | -c CONFIG_FILE, --config-file CONFIG_FILE 309 | The configuration yml file 310 | -l LOG, --log LOG Name of the output log file 311 | 312 | Optional Arguments: 313 | --email-id EMAIL_ID The id of the email you want to be deleted 314 | -d, --debug Enables the debug log messages 315 | -h, --help Show this help message and exit 316 | 317 | 318 | # Or 319 | 320 | $ auto_apply_bot --help 321 | usage: auto_apply_bot -m 322 | {crawl_and_send,list_emails,remove_email,upload_files,create_table} 323 | -c CONFIG_FILE [-l LOG] [--email-id EMAIL_ID] [-d] [-h] 324 | 325 | A bot that automatically sends emails to new ads posted in the specified xe.gr 326 | search page. 327 | 328 | required arguments: 329 | -m {crawl_and_send,list_emails,remove_email,upload_files,create_table}, --run-mode {crawl_and_send,list_emails,remove_email,upload_files,create_table} 330 | -c CONFIG_FILE, --config-file CONFIG_FILE 331 | The configuration yml file 332 | -l LOG, --log LOG Name of the output log file 333 | 334 | Optional Arguments: 335 | --email-id EMAIL_ID The id of the email you want to be deleted 336 | -d, --debug Enables the debug log messages 337 | -h, --help Show this help message and exit 338 | 339 | ``` 340 | 341 | If you notice that no ad is being discovered, fine-tune the `crawl_interval` and `anchor_class_name` values that affect 342 | the [XeGrAdSiteCrawler class](ad_site_crawler/xegr_ad_site_crawler.py). 343 | 344 | - The `crawl_interval` defines the time between each crawl and should be increased 345 | if the bot is being flagged as a bot (well..). You can change this from the yaml file. 346 | 347 | - The `anchor_class_name` is the css class value that characterizes all the search results anchors (` 352 | 353 | The deployment is being done to Heroku. For more information 354 | you can check the [setup guide](https://devcenter.heroku.com/articles/getting-started-with-python). 355 | 356 | Make sure you check the defined [Procfile](Procfile) ([reference](https://devcenter.heroku.com/articles/getting-started-with-python#define-a-procfile)) 357 | and that you set the [above-mentioned environmental variables](#env_variables) ([reference](https://devcenter.heroku.com/articles/config-vars)). 358 | 359 | ## Continuous Integration 360 | 361 | For the continuous integration, the CircleCI service is being used. 362 | For more information you can check the [setup guide](https://circleci.com/docs/2.0/language-python/). 363 | 364 | Again, you should set the [above-mentioned environmental variables](#env_variables) ([reference](https://circleci.com/docs/2.0/env-vars/#setting-an-environment-variable-in-a-context)) 365 | and for any modifications, edit the [circleci config](/.circleci/config.yml). 366 | 367 | ## Built With 368 | 369 | * [Dropbox Python API](https://www.dropbox.com/developers/documentation/python) - Used for the Cloudstore Class 370 | * [Gmail Sender](https://github.com/paulc/gmail-sender) - Used for the EmailApp Class 371 | * [Heroku](https://www.heroku.com) - The deployment environment 372 | * [CircleCI](https://www.circleci.com/) - Continuous Integration service 373 | 374 | 375 | ## License 376 | 377 | This project is licensed under the GNU License - see the [LICENSE](LICENSE) file for details. 378 | 379 | ## Acknowledgments 380 | 381 | * Thanks το PurpleBooth for the [README template](https://gist.github.com/PurpleBooth/109311bb0361f32d87a2) 382 | 383 | -------------------------------------------------------------------------------- /tests/test_data/test_xegr_ad_site_crawler/file_with_email_2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 11 | 12 | 13 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | Magento 2 Developer - Αιγάλεω - Αγγελίες Εργασίας | xe.gr 68 | 69 | 70 | 71 | 72 | 73 | 74 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 94 | 95 | 96 | 97 |
98 |
99 | 178 | 179 | 253 |
254 |
255 | 256 | 257 | 258 | 259 | 260 |
261 |
262 |
263 | 282 |
283 | 284 |
285 |
286 |
287 |
288 | 289 |
290 |
291 | Magento 2 Developer 292 |
293 | 294 |
295 | Αιγάλεω - Προγραμματιστές & Μηχανικοί Η/Υ - 900 € 296 |
297 |
298 |
299 |
300 | 301 |
302 |
303 |
304 | 308 |
309 | 310 |
311 |
312 | 313 |
314 |
315 |
316 |

ΖΗΤΕΊΤΑΙ Senior Developer για inhouse ή Freelance συνεργασία με καλή γνώση Magento 2, Php(Mvc Framework), Mysql.

Αποστολή βιογραφικών efi.koulourianou@gmail.com

, τιμή 900€, συζητήσιμη efi.koulourianou@gmail.com 317 |
318 |
319 |
320 | 321 |
322 |
323 |
324 |
325 | Τελευταία τροποποίηση: Πέμπτη, 30 Απριλίου 2020 326 |
327 | 328 |
329 | Επισκέψεις: 70 330 |
331 |
332 |
333 |
334 | 335 |
336 |
337 |
338 | 341 |
342 | 343 |
344 |
345 |
346 |
347 | 348 |
349 | 350 |
351 |
352 | 353 | 354 |
355 | 356 |
357 |
358 | 359 |
360 |
361 |
362 |
363 |
364 | Τελευταία τροποποίηση: Πέμπτη, 30 Απριλίου 2020 365 |
366 | 367 |
368 | Επισκέψεις: 70 369 |
370 |
371 |
372 |
373 |
374 | 375 | 376 |
377 | 390 | 391 | 543 |
544 | 545 |
546 |
547 | 548 | 549 | 550 | 671 | 672 | 673 | 674 | -------------------------------------------------------------------------------- /tests/test_data/test_xegr_ad_site_crawler/file_with_email_4.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 11 | 12 | 13 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | ΖΗΤΕΙΤΑΙ ΔΙΑΧΕΙΡΙΣΤΗΣ Ε-SHOP - Νεάπολη - Αγγελίες Εργασίας | xe.gr 68 | 69 | 70 | 71 | 72 | 73 | 74 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 94 | 95 | 96 | 97 |
98 |
99 | 178 | 179 | 253 |
254 |
255 | 256 | 257 | 258 | 259 | 260 |
261 |
262 |
263 | 282 |
283 | 284 |
285 |
286 |
287 |
288 | 289 |
290 |
291 | ΖΗΤΕΙΤΑΙ ΔΙΑΧΕΙΡΙΣΤΗΣ Ε-SHOP 292 |
293 | 294 |
295 | Νεάπολη - Προγραμματιστές & Μηχανικοί Η/Υ - 750 € 296 |
297 |
298 |
299 |
300 | 301 |
302 |
303 |
304 | 308 |
309 | 310 |
311 |
312 | 313 |
314 |
315 |
316 |
ΖΗΤΕΙΤΑΙ ΔΙΑΧΕΙΡΙΣΤΗΣ Ε-SHOP ΦΑΡΜΑΚΕΙΟΥ. ΑΠΑΡΑΙΤΗΤΗ ΠΡΟΥΠΗΡΕΣΙΑ ΣΕ ΑΝΤΙΣΤΟΙΧΗ ΘΕΣΗ.
ΠΛΗΡΗΣ ΑΠΑΣΧΟΛΗΣΗ, ΓΙΑ ΚΑΘΗΜΕΡΙΝΕΣ ΛΕΙΤΟΥΡΓΙΕΣ Ε-SHOP, ΑΝΕΒΑΣΜΑ ΝΕΩΝ ΠΡΟΙΟΝΤΩΝ, ΕΞΥΠΗΡΕΤΗΣΗ ΠΕΛΑΤΩΝ, SEO, NEWSLETTERS, ADWORDS, ΠΕΡΑΣΜΑ ΤΙΜΟΛΟΓΙΩΝ. ΣΤΟ ΚΕΝΤΡΟ ΣΤΗΝ ΑΘΗΝΑ ΚΟΝΤΑ ΣΤΟ ΜΕΤΡΟ ΠΑΝΕΠΙΣΤΗΜΙΟ.
ΠΑΡΑΚΑΛΩ ΒΙΟΓΡΑΦΙΚΑ ΣΤΟ Ε-ΜΑΙΛ
, τιμή 750€ epharmacy137@gmail.com 317 |
318 |
319 |
320 | 321 |
322 |
323 |
324 |
325 | Τελευταία τροποποίηση: Σάββατο, 11 Απριλίου 2020 326 |
327 | 328 |
329 | Επισκέψεις: 2206 330 |
331 |
332 |
333 |
334 | 335 |
336 |
337 |
338 | 341 |
342 | 343 |
344 |
345 |
346 |
347 | 348 |
349 | 350 |
351 |
352 | 353 | 354 |
355 | 356 |
357 |
358 | 359 |
360 |
361 |
362 |
363 |
364 | Τελευταία τροποποίηση: Σάββατο, 11 Απριλίου 2020 365 |
366 | 367 |
368 | Επισκέψεις: 2206 369 |
370 |
371 |
372 |
373 |
374 | 375 | 376 |
377 | 390 | 391 | 543 |
544 | 545 |
546 |
547 | 548 | 549 | 550 | 671 | 672 | 673 | 674 | --------------------------------------------------------------------------------