├── .dockerignore ├── alembic ├── README ├── script.py.mako ├── env.py └── versions │ └── 0f1755b76fbf_initial.py ├── tests ├── __init__.py ├── conftest.py ├── fixtures │ ├── fixture_cities.py │ ├── fixture_db.py │ └── fixture_config.py └── test_services.py ├── Dockerfile ├── collector ├── __init__.py ├── services │ ├── __init__.py │ ├── base.py │ ├── cities.py │ └── weather.py ├── functools.py ├── exceptions.py ├── session.py ├── configurations.py └── models.py ├── docker-compose.yaml ├── cspell.config.yaml ├── .pre-commit-config.yaml ├── manage.py ├── pytest.ini ├── LICENSE ├── requirements.txt ├── makefile ├── pyproject.toml ├── alembic.ini ├── .gitignore └── README.md /.dockerignore: -------------------------------------------------------------------------------- 1 | debug.env 2 | cities.json 3 | db.sqlite3 -------------------------------------------------------------------------------- /alembic/README: -------------------------------------------------------------------------------- 1 | Generic single-database configuration. -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['logger'] 2 | 3 | from .conftest import logger 4 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax=docker/dockerfile:1 2 | 3 | FROM python:3.10-slim-buster 4 | 5 | WORKDIR / 6 | 7 | COPY requirements.txt requirements.txt 8 | 9 | RUN pip3 install -r requirements.txt 10 | 11 | COPY . . -------------------------------------------------------------------------------- /collector/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['configurations', 'session', 'exceptions', 'functools', 'models'] 2 | 3 | from . import configurations, exceptions, functools, models, session 4 | from .configurations import logger 5 | -------------------------------------------------------------------------------- /collector/services/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'BaseService', 3 | 'FetchCities', 4 | 'InitCities', 5 | 'CollectScheduler', 6 | 'FetchWeather', 7 | ] 8 | 9 | from .base import BaseService 10 | from .cities import FetchCities, InitCities 11 | from .weather import CollectScheduler, FetchWeather 12 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | name: weather-collector-docker-compose 3 | services: 4 | 5 | db: 6 | image: postgres:13.0-alpine 7 | volumes: 8 | - postgres:/var/lib/postgresql/data/ 9 | env_file: 10 | - ./prod.env 11 | 12 | worker: 13 | build: . 14 | stdin_open: true 15 | tty: true 16 | restart: on-failure 17 | depends_on: 18 | - db 19 | env_file: 20 | - ./prod.env 21 | 22 | volumes: 23 | postgres: -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main config file for pytest. 3 | """ 4 | 5 | import pytest 6 | 7 | from collector.functools import init_logger 8 | 9 | pytest_plugins = [ 10 | 'tests.fixtures.fixture_db', 11 | 'tests.fixtures.fixture_config', 12 | 'tests.fixtures.fixture_cities', 13 | ] 14 | 15 | logger = init_logger('pytest', 'DEBUG') 16 | 17 | 18 | @pytest.fixture(autouse=True) 19 | def new_line(): 20 | """ 21 | Fixture simple makes new line to separate each test logging output. 22 | """ 23 | print('\n') 24 | -------------------------------------------------------------------------------- /cspell.config.yaml: -------------------------------------------------------------------------------- 1 | version: "0.2" 2 | ignorePaths: 3 | - .gitignore 4 | - requirements.txt 5 | - alembic* 6 | allowCompoundWords: true 7 | dictionaries: 8 | - python 9 | words: 10 | - apscheduler 11 | - clsname 12 | - grnd 13 | - isort 14 | - isready 15 | - Misha 16 | - mypy 17 | - ondelete 18 | - onupdate 19 | - psycopg 20 | - pydantic 21 | - pytest 22 | - sqlalchemy 23 | - testdb 24 | - venv 25 | - vybornyy 26 | ignoreWords: [] 27 | import: [] 28 | enableFiletypes: 29 | - dockercompose 30 | - ini 31 | - makefile 32 | - markdown 33 | - toml 34 | -------------------------------------------------------------------------------- /collector/functools.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Literal 3 | 4 | 5 | def init_logger( 6 | name: str, level: int | Literal['DEBUG', 'INFO', 'WARNING', 'ERROR'] = logging.INFO 7 | ): 8 | """ 9 | Configure and get logger by provided name. 10 | """ 11 | if isinstance(level, str): 12 | level = getattr(logging, level) 13 | 14 | logger = logging.getLogger(name) 15 | logger.setLevel(level) 16 | handler = logging.StreamHandler() 17 | handler.setFormatter(logging.Formatter('%(levelname)s - %(message)s')) 18 | logger.addHandler(handler) 19 | return logger 20 | -------------------------------------------------------------------------------- /alembic/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision | comma,n} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | ${imports if imports else ""} 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = ${repr(up_revision)} 14 | down_revision = ${repr(down_revision)} 15 | branch_labels = ${repr(branch_labels)} 16 | depends_on = ${repr(depends_on)} 17 | 18 | 19 | def upgrade() -> None: 20 | ${upgrades if upgrades else "pass"} 21 | 22 | 23 | def downgrade() -> None: 24 | ${downgrades if downgrades else "pass"} 25 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | python: python3.10.4 3 | repos: 4 | - repo: https://github.com/PyCQA/autoflake 5 | rev: v2.0.0 6 | hooks: 7 | - id: autoflake 8 | exclude: alembic/ 9 | 10 | - repo: https://github.com/pycqa/isort 11 | rev: 5.10.1 12 | hooks: 13 | - id: isort 14 | name: isort (python) 15 | 16 | - repo: https://github.com/psf/black 17 | rev: 22.10.0 18 | hooks: 19 | - id: black 20 | exclude: alembic/ 21 | 22 | - repo: https://github.com/streetsidesoftware/cspell-cli 23 | rev: v6.2.0 24 | hooks: 25 | - id: cspell -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from apscheduler.schedulers.blocking import BlockingScheduler 4 | 5 | from collector.configurations import logger 6 | from collector.services import BaseService 7 | 8 | 9 | def main(): 10 | options = sys.argv[1:] # the first arg is a 'manage.py', skipping it 11 | try: 12 | service = BaseService.manage_services(options) 13 | except (KeyboardInterrupt, SystemExit) as e: 14 | if options in ['--help', '-h']: 15 | logger.info(BaseService.get_descriptions()) 16 | raise e 17 | 18 | service.execute() 19 | 20 | 21 | if __name__ == '__main__': 22 | main() 23 | -------------------------------------------------------------------------------- /collector/exceptions.py: -------------------------------------------------------------------------------- 1 | class CollectorBaseException(Exception): 2 | message: str = '' 3 | 4 | def __init__(self, *args: object, msg: str = '') -> None: 5 | self.message = msg or self.message 6 | super().__init__(*args) 7 | 8 | def __str__(self) -> str: 9 | return self.message + f'Details: {self.args}' 10 | 11 | 12 | class ResponseError(CollectorBaseException): 13 | message = 'Unexpected response. ' 14 | 15 | 16 | class ResponseSchemaError(ResponseError): 17 | message = 'Unexpected response data schema. ' 18 | 19 | 20 | class NoDataError(CollectorBaseException): 21 | message = 'No data provided. ' 22 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | python_files = 3 | test_*.py 4 | 5 | ; run pytest with "-m marker_name" to run current test 6 | ; deselect with '-m "not marker_name"' 7 | markers = 8 | debug_this, 9 | slow, 10 | postgres 11 | 12 | ; apply docstring tests 13 | ; ignore tests with "slow" marker and tests/tools.py module 14 | addopts = --doctest-modules -m "not slow" -m "postgres" --ignore="alembic" 15 | ; --ignore-glob="*base.py" ; for regex patterns 16 | 17 | ; specify source directory and append it to python pathes 18 | ; (the same configuration could be applied at main 'config.py' file) 19 | ; pythonpath = . src 20 | 21 | # pytest configuration for Django project: 22 | # 23 | # ; pytest-django settings: 24 | # DJANGO_SETTINGS_MODULE = main.settings 25 | 26 | # ; django-pytest can not fined django project automatically, because of nested folders structure 27 | # ; so specify path to manage.py file. see docs to get detail about pythonpath: 28 | # ; https://pytest-django.readthedocs.io/en/latest/managing_python_path.html 29 | # django_find_project = false 30 | # pythonpath = . apps -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Misha Vybornyy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.8.3 2 | aiosignal==1.3.1 3 | alembic==1.8.1 4 | APScheduler==3.9.1.post1 5 | async-timeout==4.0.2 6 | attrs==22.1.0 7 | autoflake==1.7.7 8 | black==22.6.0 9 | certifi==2022.9.24 10 | charset-normalizer==2.1.1 11 | click==8.1.3 12 | coverage==6.5.0 13 | exceptiongroup==1.0.2 14 | flake8==5.0.4 15 | frozenlist==1.3.3 16 | greenlet==2.0.1 17 | idna==3.4 18 | importlib-metadata==3.4.0 19 | iniconfig==1.1.1 20 | isort==5.10.1 21 | Mako==1.2.4 22 | MarkupSafe==2.1.1 23 | mccabe==0.7.0 24 | multidict==6.0.2 25 | mypy==0.971 26 | mypy-extensions==0.4.3 27 | packaging==21.3 28 | pathspec==0.9.0 29 | platformdirs==2.5.2 30 | pluggy==1.0.0 31 | psycopg2-binary==2.9.5 32 | pycodestyle==2.9.1 33 | pydantic==1.10.2 34 | pyflakes==2.5.0 35 | pyparsing==3.0.9 36 | pytest==7.2.0 37 | pytest-cov==4.0.0 38 | python-dotenv==0.21.0 39 | pytz==2022.6 40 | pytz-deprecation-shim==0.1.0.post0 41 | requests==2.28.1 42 | six==1.16.0 43 | SQLAlchemy==1.4.44 44 | tomli==2.0.1 45 | typed-ast==1.5.4 46 | types-pytz==2022.6.0.1 47 | types-requests==2.28.11.5 48 | types-urllib3==1.26.25.4 49 | typing_extensions==4.3.0 50 | tzdata==2022.6 51 | tzlocal==4.2 52 | urllib3==1.26.12 53 | yarl==1.8.1 54 | zipp==3.8.1 55 | -------------------------------------------------------------------------------- /tests/fixtures/fixture_cities.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pytest 4 | import sqlalchemy.orm as orm 5 | 6 | from collector import models 7 | from collector.configurations import CollectorConfig 8 | 9 | 10 | @pytest.fixture 11 | def cities_list(): 12 | return [ 13 | {'name': 'Shanghai'}, 14 | {'name': 'Istanbul'}, 15 | {'name': 'Tokyo'}, 16 | {'name': 'Moscow'}, 17 | {'name': 'Entebbe'}, # small African city 18 | ] 19 | 20 | 21 | @pytest.fixture 22 | def broken_cities_file(config: CollectorConfig): 23 | invalid_schema = [ 24 | {'name': 'Moscow'}, 25 | {'no_name': 'no_city'}, 26 | ] 27 | with open(config.cities_file, 'w+', encoding='utf-8') as file: 28 | json.dump(invalid_schema, file) 29 | 30 | 31 | @pytest.fixture 32 | def cities_file(config: CollectorConfig, cities_list: list[dict]): 33 | with open(config.cities_file, 'w+', encoding='utf-8') as file: 34 | json.dump(cities_list, file) 35 | return cities_list 36 | 37 | 38 | @pytest.fixture 39 | def seed_cities_to_database(cities_list, session: orm.Session): 40 | session.add_all([models.CityModel(**city) for city in cities_list]) 41 | session.commit() 42 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | # This file presents CLI shortcuts. 2 | # Go there to find more details: https://makefiletutorial.com/#variables 3 | 4 | migrations: 5 | docker-compose exec worker alembic upgrade head 6 | 7 | initialization: 8 | docker-compose exec worker python manage.py fetch_cities 9 | 10 | collecting: 11 | docker-compose exec worker python manage.py collect 12 | 13 | app: 14 | docker-compose build 15 | docker-compose up -d 16 | make migrations 17 | make initialization 18 | make collecting 19 | 20 | 21 | ###################################################################### 22 | # development tools 23 | ###################################################################### 24 | 25 | 26 | format: 27 | @autoflake --remove-all-unused-imports -vv --ignore-init-module-imports -r . 28 | @echo "make format is calling for autoflake, which will remove all unused imports listed above. Are you sure?" 29 | @echo "Enter to proceed. Ctr-C to abort." 30 | @read 31 | autoflake --in-place --remove-all-unused-imports --ignore-init-module-imports -r . 32 | black . 33 | isort . 34 | mypy . 35 | flake8 . 36 | 37 | 38 | push: 39 | @git status 40 | @echo "All files listed above will be added to commit. Enter commit message to proceed. Ctr-C to abort." 41 | @read -p "Commit message: " COMMIT_MESSAGE; git add . ; git commit -m "$$COMMIT_MESSAGE" 42 | @git push 43 | 44 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.mypy] 2 | ignore_missing_imports = true 3 | 4 | [tool.black] 5 | # line-length = 79 # max line length 6 | skip-string-normalization = 1 # disable converting single quote to double 7 | # by default black exclude all .gitignore files. here is regex pattern to extend this list 8 | # NOTE! list of files name is not supported, only regex patterns 9 | # WARNING! also pass this list of files to pre-commit-config. 10 | extend-exclude = 'alembic/' 11 | 12 | 13 | [tool.isort] 14 | skip_glob = '*tmp*' # ignoring tmp files 15 | profile = 'black' 16 | 17 | 18 | [tool.autoflake] 19 | exclude = ['venv', 'alembic', '*tmp*'] # WARNING! also pass this list of files to pre-commit-config. 20 | 21 | 22 | [tool.flake8] 23 | max-line-length = 88 # PEP8: 79 | black: 88 | google: 120 24 | extend-ignore = [ 25 | 'E731', # assignment lambda to variable - okay 26 | 'E501', # line too long 27 | # (Black is used to forcing length to N symbols, but when it can't be done - we leave it as is) 28 | 'E203', # whitespaces before slice : seq[1 :2] 29 | 'N805', # first argument of a method should be named 'self' (to handle pydantic validator methods) 30 | 'N815', # because of contryCode field 31 | 'R504', # unnecessary variable assignment before return statement 32 | ] 33 | per-file-ignores = [ 34 | '*/__init__.py:F401', # imported but unused 35 | ] 36 | exclude = [ 37 | 'venv/*', 38 | 'alembic/*', 39 | '*tmp*', 40 | '*untitled*', 41 | ] -------------------------------------------------------------------------------- /tests/fixtures/fixture_db.py: -------------------------------------------------------------------------------- 1 | from typing import Type 2 | 3 | import pytest 4 | import sqlalchemy as db 5 | import sqlalchemy.orm as orm 6 | 7 | import collector 8 | from collector import models 9 | from collector.configurations import CollectorConfig 10 | from tests import logger 11 | 12 | 13 | @pytest.fixture # (scope="session") 14 | def engine(config: CollectorConfig): 15 | logger.debug(f'engine fixture. bind to: {config.db.url}') 16 | return db.create_engine(config.db.url, future=True, echo=False) 17 | 18 | 19 | @pytest.fixture 20 | def patch_engine(monkeypatch: pytest.MonkeyPatch, engine: db.engine.Engine): 21 | logger.debug('patch_engine fixture') 22 | monkeypatch.setattr(collector.session, 'engine', engine) 23 | 24 | 25 | @pytest.fixture # (scope="session") 26 | def setup_database(engine: db.engine.Engine, patch_engine): 27 | logger.debug('setup_database fixture') 28 | 29 | models.Base.metadata.drop_all(engine) # clear leftovers from previous broken tests 30 | models.Base.metadata.create_all(engine) 31 | yield 32 | logger.debug(engine.pool.status()) 33 | models.Base.metadata.drop_all(engine) 34 | 35 | 36 | @pytest.fixture 37 | def session_class(): 38 | logger.debug('session_class fixture') 39 | return orm.Session # return default Session, not from orn.session_maker (for now) 40 | 41 | 42 | @pytest.fixture 43 | def session(engine: db.engine.Engine, session_class: Type[orm.Session]): 44 | logger.debug('opened session fixture') 45 | with session_class(engine) as session: 46 | yield session 47 | session.commit() 48 | -------------------------------------------------------------------------------- /tests/fixtures/fixture_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pydantic 4 | import pytest 5 | 6 | from collector.configurations import ( 7 | CONFIG, 8 | CollectorConfig, 9 | DatabaseConfig, 10 | SQLiteDatabaseConfig, 11 | ) 12 | from tests import logger 13 | 14 | TEST_CITIES_FILE = os.path.join(os.path.dirname(__file__), 'testcities.json') 15 | TEST_DB_FILE = os.path.join(os.path.dirname(__file__), 'testdb.sqlite3') 16 | 17 | 18 | @pytest.fixture( 19 | scope="session", 20 | params=[ 21 | pytest.param(SQLiteDatabaseConfig(path=TEST_DB_FILE), id='sqlite'), 22 | pytest.param( 23 | DatabaseConfig( 24 | user='test', 25 | password='test', 26 | host='localhost', 27 | database='test', 28 | ), 29 | id='postgres', 30 | marks=pytest.mark.postgres, 31 | ), 32 | ], 33 | ) 34 | def config(request: pytest.FixtureRequest): 35 | db_config: pydantic.BaseSettings = request.param 36 | config = CollectorConfig( 37 | debug=False, 38 | cities_amount=20, 39 | cities_file=TEST_CITIES_FILE, 40 | collect_weather_delay=0.5, 41 | retry_collect_delay=1, 42 | db=db_config.dict(), 43 | ) 44 | logger.debug(f'Running tests under those configurations: {config}') 45 | return config 46 | 47 | 48 | @pytest.fixture 49 | def mock_config(monkeypatch: pytest.MonkeyPatch, config: CollectorConfig): 50 | """ 51 | Patching collector config file and restore test files ('cities.json') 52 | """ 53 | cities = config.cities_file 54 | if os.path.isfile(cities): 55 | logger.warning(f'Test begins with already existing {cities}. ') 56 | 57 | for field in CollectorConfig.__fields__: 58 | monkeypatch.setattr(CONFIG, field, getattr(config, field)) 59 | yield 60 | 61 | if os.path.isfile(cities): 62 | os.remove(cities) 63 | -------------------------------------------------------------------------------- /alembic/env.py: -------------------------------------------------------------------------------- 1 | from logging.config import fileConfig 2 | 3 | from sqlalchemy import engine_from_config, pool 4 | 5 | from alembic import context 6 | from collector.configurations import CONFIG 7 | from collector.models import Base 8 | 9 | # this is the Alembic Config object, which provides 10 | # access to the values within the .ini file in use. 11 | config = context.config 12 | config.set_main_option('sqlalchemy.url', CONFIG.db.url) 13 | 14 | # Interpret the config file for Python logging. 15 | # This line sets up loggers basically. 16 | if config.config_file_name is not None: 17 | fileConfig(config.config_file_name) 18 | 19 | # add your model's MetaData object here 20 | # for 'autogenerate' support 21 | # from myapp import mymodel 22 | # target_metadata = mymodel.Base.metadata 23 | target_metadata = Base.metadata 24 | 25 | # other values from the config, defined by the needs of env.py, 26 | # can be acquired: 27 | # my_important_option = config.get_main_option("my_important_option") 28 | # ... etc. 29 | 30 | 31 | def run_migrations_offline() -> None: 32 | """Run migrations in 'offline' mode. 33 | 34 | This configures the context with just a URL 35 | and not an Engine, though an Engine is acceptable 36 | here as well. By skipping the Engine creation 37 | we don't even need a DBAPI to be available. 38 | 39 | Calls to context.execute() here emit the given string to the 40 | script output. 41 | 42 | """ 43 | url = config.get_main_option("sqlalchemy.url") 44 | context.configure( 45 | url=url, 46 | target_metadata=target_metadata, 47 | literal_binds=True, 48 | dialect_opts={"paramstyle": "named"}, 49 | render_as_batch=True, # special option for sqlite alter column migration 50 | ) 51 | 52 | with context.begin_transaction(): 53 | context.run_migrations() 54 | 55 | 56 | def run_migrations_online() -> None: 57 | """Run migrations in 'online' mode. 58 | 59 | In this scenario we need to create an Engine 60 | and associate a connection with the context. 61 | 62 | """ 63 | connectable = engine_from_config( 64 | config.get_section(config.config_ini_section), 65 | prefix="sqlalchemy.", 66 | poolclass=pool.NullPool, 67 | ) 68 | 69 | with connectable.connect() as connection: 70 | context.configure( 71 | connection=connection, 72 | target_metadata=target_metadata, 73 | render_as_batch=True, # special option for sqlite alter column migration 74 | ) 75 | 76 | with context.begin_transaction(): 77 | context.run_migrations() 78 | 79 | 80 | if context.is_offline_mode(): 81 | run_migrations_offline() 82 | else: 83 | run_migrations_online() 84 | -------------------------------------------------------------------------------- /collector/services/base.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import argparse 4 | from http import HTTPStatus 5 | from typing import Generic, Iterable, Type, TypeVar 6 | 7 | import requests 8 | from pydantic import BaseModel, ValidationError, parse_obj_as 9 | 10 | from collector.configurations import CONFIG, logger 11 | from collector.exceptions import ResponseError, ResponseSchemaError 12 | 13 | _SchemaType = TypeVar('_SchemaType', bound=BaseModel | Iterable[BaseModel]) 14 | """ 15 | Bounded TypeVar for Generic classes that takes any subtype of BaseModel class. 16 | Also bound to Iterable, because JSON response could be a `list[BaseModel]`. 17 | """ 18 | 19 | 20 | class BaseService: 21 | command: str = 'service_name' 22 | "Command name to run service in command line. " 23 | 24 | def __init__(self, **kwargs) -> None: 25 | self.init_kwargs = kwargs 26 | 27 | @classmethod 28 | def manage_services(cls, argv: list[str]): 29 | """ 30 | Parsing command line args and getting service initialized with thouse args. 31 | """ 32 | parser = argparse.ArgumentParser(description='Weather Collector. ') 33 | parser.add_argument( 34 | 'service', 35 | type=str, 36 | help='service to proceed', 37 | choices=[service.command for service in cls.__subclasses__()], 38 | ) 39 | for service in cls.__subclasses__(): 40 | service.add_argument(parser) 41 | 42 | args = parser.parse_args(argv) 43 | service_class = cls.get_service(command=args.service) 44 | return service_class(**dict(args._get_kwargs())) 45 | 46 | @classmethod 47 | def get_service(cls, *, command: str): 48 | """ 49 | Get collector service by provided command name. 50 | """ 51 | filtered = filter( 52 | lambda service: service.command == command, cls.__subclasses__() 53 | ) 54 | try: 55 | return next(filtered) 56 | except StopIteration: 57 | raise ValueError(f'No service with this command: {command}. ') 58 | 59 | @classmethod 60 | def get_descriptions(cls): 61 | return 'Collect Weather services description: \n' + '\n'.join( 62 | [ 63 | f'{service.command}:\t{service.__doc__}' 64 | for service in cls.__subclasses__() 65 | ] 66 | ) 67 | 68 | @classmethod 69 | def add_argument(cls, parser: argparse.ArgumentParser): 70 | pass 71 | 72 | def execute(self): 73 | logger.info(f'{self} is running. ') 74 | 75 | def __str__(self) -> str: 76 | return f'<{self.__class__.__name__}>' 77 | 78 | 79 | class FetchServiceMixin(Generic[_SchemaType]): 80 | url: str = '' 81 | params: dict = { 82 | "appid": CONFIG.open_weather_key, 83 | "units": "metric", 84 | } 85 | schema: Type[_SchemaType] 86 | "Pydantic Model to parse response JSON data. Must be defined at inhereted classes. " 87 | 88 | def fetch(self) -> _SchemaType: 89 | self.response = requests.get(self.url, self.params) 90 | 91 | if self.response.status_code != HTTPStatus.OK: 92 | raise ResponseError(self.response, self.response.json()) 93 | if not getattr(self, 'schema', None): 94 | return self.response.json() 95 | 96 | try: 97 | instance = parse_obj_as(self.schema, self.response.json()) 98 | except ValidationError as e: 99 | raise ResponseSchemaError(e) 100 | 101 | return instance 102 | -------------------------------------------------------------------------------- /alembic.ini: -------------------------------------------------------------------------------- 1 | # A generic, single database configuration. 2 | 3 | [alembic] 4 | # path to migration scripts 5 | script_location = alembic 6 | 7 | # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s 8 | # Uncomment the line below if you want the files to be prepended with date and time 9 | # see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file 10 | # for all available tokens 11 | # file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s 12 | 13 | # sys.path path, will be prepended to sys.path if present. 14 | # defaults to the current working directory. 15 | prepend_sys_path = . 16 | 17 | # timezone to use when rendering the date within the migration file 18 | # as well as the filename. 19 | # If specified, requires the python-dateutil library that can be 20 | # installed by adding `alembic[tz]` to the pip requirements 21 | # string value is passed to dateutil.tz.gettz() 22 | # leave blank for localtime 23 | # timezone = 24 | 25 | # max length of characters to apply to the 26 | # "slug" field 27 | # truncate_slug_length = 40 28 | 29 | # set to 'true' to run the environment during 30 | # the 'revision' command, regardless of autogenerate 31 | # revision_environment = false 32 | 33 | # set to 'true' to allow .pyc and .pyo files without 34 | # a source .py file to be detected as revisions in the 35 | # versions/ directory 36 | # sourceless = false 37 | 38 | # version location specification; This defaults 39 | # to alembic/versions. When using multiple version 40 | # directories, initial revisions must be specified with --version-path. 41 | # The path separator used here should be the separator specified by "version_path_separator" below. 42 | # version_locations = %(here)s/bar:%(here)s/bat:alembic/versions 43 | 44 | # version path separator; As mentioned above, this is the character used to split 45 | # version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. 46 | # If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. 47 | # Valid values for version_path_separator are: 48 | # 49 | # version_path_separator = : 50 | # version_path_separator = ; 51 | # version_path_separator = space 52 | version_path_separator = os # Use os.pathsep. Default configuration used for new projects. 53 | 54 | # the output encoding used when revision files 55 | # are written from script.py.mako 56 | # output_encoding = utf-8 57 | 58 | 59 | sqlalchemy.url = ... # defined at env.py 60 | 61 | [post_write_hooks] 62 | # post_write_hooks defines scripts or Python functions that are run 63 | # on newly generated revision scripts. See the documentation for further 64 | # detail and examples 65 | 66 | # format using "black" - use the console_scripts runner, against the "black" entrypoint 67 | # hooks = black 68 | # black.type = console_scripts 69 | # black.entrypoint = black 70 | # black.options = -l 79 REVISION_SCRIPT_FILENAME 71 | 72 | # Logging configuration 73 | [loggers] 74 | keys = root,sqlalchemy,alembic 75 | 76 | [handlers] 77 | keys = console 78 | 79 | [formatters] 80 | keys = generic 81 | 82 | [logger_root] 83 | level = WARN 84 | handlers = console 85 | qualname = 86 | 87 | [logger_sqlalchemy] 88 | level = WARN 89 | handlers = 90 | qualname = sqlalchemy.engine 91 | 92 | [logger_alembic] 93 | level = INFO 94 | handlers = 95 | qualname = alembic 96 | 97 | [handler_console] 98 | class = StreamHandler 99 | args = (sys.stderr,) 100 | level = NOTSET 101 | formatter = generic 102 | 103 | [formatter_generic] 104 | format = %(levelname)-5.5s [%(name)s] %(message)s 105 | datefmt = %H:%M:%S 106 | -------------------------------------------------------------------------------- /collector/session.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import functools 4 | import inspect 5 | from typing import Callable, Type 6 | 7 | import pydantic 8 | import sqlalchemy as db 9 | import sqlalchemy.orm as orm 10 | 11 | from collector.configurations import CONFIG, logger 12 | from collector.models import BaseModel 13 | 14 | try: 15 | logger.debug(f'Establishing (lazy) connection to database: {CONFIG.db.url}') 16 | engine = db.create_engine(CONFIG.db.url, future=True, echo=CONFIG.db.echo) 17 | except Exception as e: 18 | logger.critical(f'Connection failed. Check your database is running: {CONFIG.db}') 19 | raise e 20 | 21 | 22 | def session_enter(wrapped: Callable): 23 | @functools.wraps(wrapped) 24 | def wrapper(self: DBSessionMixin, *args, **kwargs): 25 | self.session = orm.Session(engine) 26 | logger.debug(f'Session is open with {engine=}. ') 27 | return wrapped(self, *args, **kwargs) 28 | 29 | return wrapper 30 | 31 | 32 | def session_exit(wrapped: Callable): 33 | @functools.wraps(wrapped) 34 | def wrapper(self: DBSessionMixin, *args, **kwargs): 35 | result = wrapped(self, *args, **kwargs) 36 | self.session.commit() 37 | self.session.close() 38 | logger.debug('Session is closed. ') 39 | return result 40 | 41 | return wrapper 42 | 43 | 44 | def safe_transaction(wrapped: Callable): 45 | @functools.wraps(wrapped) 46 | def wrapper(self: DBSessionMixin, *args, **kwargs): 47 | try: 48 | return wrapped(self, *args, **kwargs) 49 | except Exception as e: 50 | logger.debug(f'Transaction is rolling back. Exception: {e}') 51 | self.session.rollback() 52 | raise e 53 | 54 | return wrapper 55 | 56 | 57 | class DBSessionMeta(type): 58 | """ 59 | Create class which operates as session context manager. 60 | 61 | Meta is for wrapping all class methods into `safe_transaction` decorator. 62 | And for wrapping enter method for `Session()` opening and exit method for 63 | `session.commit()`, `session.close()`. 64 | """ 65 | 66 | session_enter_method = '__init__' 67 | session_exit_method = 'execute' 68 | 69 | def __new__(cls, clsname: str, bases: tuple, attrs: dict): 70 | for key, value in attrs.items(): 71 | if inspect.isfunction(value): 72 | attrs[key] = safe_transaction(value) 73 | 74 | if key == cls.session_enter_method: 75 | attrs[key] = session_enter(attrs[key]) 76 | if key == cls.session_exit_method: 77 | attrs[key] = session_exit(attrs[key]) 78 | 79 | return type.__new__(cls, clsname, bases, attrs) 80 | 81 | 82 | class DBSessionMixin(metaclass=DBSessionMeta): 83 | """ 84 | Mixin for handling usual CRUD operations with database. 85 | Session is opening at class init and closing when `save()` is called. For commit any 86 | changes `save()` method must by called. 87 | """ 88 | 89 | session: orm.Session 90 | 91 | def query(self, model_class: Type[BaseModel]): 92 | return self.session.query(model_class) 93 | 94 | def create(self, *instances: BaseModel): 95 | self.session.add_all(instances) 96 | 97 | def create_from_schema( 98 | self, model_class: Type[BaseModel], *instances: pydantic.BaseModel 99 | ): 100 | self.create(*[model_class(**instance.dict()) for instance in instances]) 101 | 102 | def delete(self, obj: BaseModel | Type[BaseModel]): 103 | """ 104 | Delete one model instance. Or all records at table if `obj` is a Model Class. 105 | """ 106 | if isinstance(obj, BaseModel): 107 | return self.session.delete(obj) 108 | if isinstance(obj, type): 109 | return self.session.query(obj).delete() 110 | raise ValueError 111 | -------------------------------------------------------------------------------- /collector/configurations.py: -------------------------------------------------------------------------------- 1 | from pprint import pformat 2 | 3 | import pydantic 4 | 5 | from collector.functools import init_logger 6 | 7 | logger = init_logger('weather-collector', 'INFO') 8 | 9 | 10 | class DatabaseConfig(pydantic.BaseModel): 11 | dialect: str = 'postgresql' 12 | driver: str | None = 'psycopg2' 13 | user: str 14 | password: str 15 | host: str = 'db' 16 | port: int = 5432 17 | database: str = 'default' 18 | echo: bool = False 19 | 20 | @property 21 | def url(self): 22 | driver = f'+{self.driver}' if self.driver else '' 23 | return ( 24 | f'{self.dialect}{driver}:' 25 | f'//{self.user}:{self.password}@{self.host}:{self.port}/{self.database}' 26 | ) 27 | 28 | 29 | class SQLiteDatabaseConfig(pydantic.BaseModel): 30 | path: str = 'db.sqlite3' 31 | echo: bool = False 32 | 33 | @property 34 | def url(self): 35 | return f'sqlite:///{self.path}' 36 | 37 | 38 | class CollectorConfig(pydantic.BaseSettings): 39 | """ 40 | debug: `bool` 41 | true: force using SQLite instead of postgreSQL (even if it defined at .env) 42 | cities_amount: `int` = 50 43 | Amount for auto-initial cities list by fetching them from GeoDB. 44 | cities_file: `str` = 'cities.json' 45 | File to describe which cities weather collector fetching data for. 46 | collect_weather_delay: `float` = 1 * 60 * 60 47 | Delay between every weather measurement. Seconds. Default: 1 hour. 48 | open_weather_key: `str` 49 | Open Weather API key. Open Weather could be used under FREE plan. Restrictions: 50 | - 60 calls/minute 51 | - 1,000,000 calls/month 52 | """ 53 | 54 | debug: bool 55 | 56 | cities_amount: int = 50 57 | cities_file: str = 'cities.json' 58 | collect_weather_delay: float = 1 * 60 * 60 59 | retry_collect_delay: float = 3 60 | open_weather_key: str 61 | 62 | POSTGRES_USER: str | None = None 63 | POSTGRES_PASSWORD: str | None = None 64 | POSTGRES_DB: str | None = None 65 | POSTGRES_HOST: str | None = None 66 | 67 | db: DatabaseConfig | SQLiteDatabaseConfig = SQLiteDatabaseConfig() 68 | 69 | @pydantic.validator('db', pre=True) 70 | def debug_mode_database_sqlite(cls, db: dict, values: dict): 71 | if not isinstance(db, dict): 72 | return values 73 | 74 | # at this point we can not check is postgres variables was loaded 75 | # because they could be uploaded from 'prod.env' but we are using 'debug.env'. 76 | # therefore for debug mode sqlite is always used. 77 | if values.get('debug'): 78 | for field in DatabaseConfig.__fields__: 79 | db.pop(field, None) 80 | return db 81 | 82 | @pydantic.root_validator(pre=True) 83 | def make_config_fields_equal_to_postgres_variables(cls, values: dict): 84 | db: dict = values.get('db', {}) 85 | if not isinstance(db, dict): 86 | return values 87 | 88 | for postgres_field, config_field in zip( 89 | ['POSTGRES_USER', 'POSTGRES_PASSWORD', 'POSTGRES_DB', 'POSTGRES_HOST'], 90 | ['user', 'password', 'database', 'host'], 91 | ): 92 | if values.get(postgres_field): 93 | db.setdefault(config_field, values.get(postgres_field)) 94 | 95 | values['db'] = db 96 | return values 97 | 98 | def __str__(self) -> str: 99 | return '\n' + pformat(self.dict()) 100 | 101 | class Config: 102 | # debug.env and .env has more higher priority than prod.env 103 | # describe production build at prod.env or .env (debug.env in .dockerignore) 104 | env_file = 'prod.env', 'debug.env', '.env' 105 | env_nested_delimiter = '__' 106 | 107 | 108 | try: 109 | CONFIG = CollectorConfig() 110 | logger.debug(f'Running collector under this configurations: {CONFIG}') 111 | except Exception as e: 112 | raise RuntimeError( 113 | f'Init configurations fails. Ensure to have ".env" file. Details: {e}' 114 | ) 115 | -------------------------------------------------------------------------------- /alembic/versions/0f1755b76fbf_initial.py: -------------------------------------------------------------------------------- 1 | """initial 2 | 3 | Revision ID: 0f1755b76fbf 4 | Revises: 5 | Create Date: 2022-11-24 11:12:04.162960 6 | 7 | """ 8 | import sqlalchemy as sa 9 | 10 | from alembic import op 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = '0f1755b76fbf' 14 | down_revision = None 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade() -> None: 20 | # ### commands auto generated by Alembic - please adjust! ### 21 | op.create_table('city', 22 | sa.Column('id', sa.Integer(), autoincrement=True, nullable=False), 23 | sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('(CURRENT_TIMESTAMP)'), nullable=True), 24 | sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True), 25 | sa.Column('name', sa.String(length=50), nullable=False), 26 | sa.Column('is_tracked', sa.Boolean(), nullable=False), 27 | sa.Column('country', sa.String(length=50), nullable=True), 28 | sa.Column('countryCode', sa.String(length=3), nullable=True), 29 | sa.Column('latitude', sa.Float(), nullable=True), 30 | sa.Column('longitude', sa.Float(), nullable=True), 31 | sa.Column('population', sa.Integer(), nullable=True), 32 | sa.PrimaryKeyConstraint('id'), 33 | sa.UniqueConstraint('id') 34 | ) 35 | op.create_table('weather_measurement', 36 | sa.Column('id', sa.Integer(), autoincrement=True, nullable=False), 37 | sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('(CURRENT_TIMESTAMP)'), nullable=True), 38 | sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True), 39 | sa.Column('city_id', sa.Integer(), nullable=False), 40 | sa.Column('measure_at', sa.DateTime(), nullable=False, comment='Time of data forecasted. UTC. Do not confuse with base model `created_at` field.'), 41 | sa.ForeignKeyConstraint(['city_id'], ['city.id'], onupdate='CASCADE', ondelete='CASCADE'), 42 | sa.PrimaryKeyConstraint('id'), 43 | sa.UniqueConstraint('id') 44 | ) 45 | op.create_table('extra_weather_data', 46 | sa.Column('id', sa.Integer(), autoincrement=True, nullable=False), 47 | sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('(CURRENT_TIMESTAMP)'), nullable=True), 48 | sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True), 49 | sa.Column('measurement_id', sa.Integer(), nullable=True), 50 | sa.Column('data', sa.JSON(), nullable=True), 51 | sa.ForeignKeyConstraint(['measurement_id'], ['weather_measurement.id'], ), 52 | sa.PrimaryKeyConstraint('id'), 53 | sa.UniqueConstraint('id') 54 | ) 55 | op.create_table('main_weather_measurement', 56 | sa.Column('id', sa.Integer(), autoincrement=True, nullable=False), 57 | sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('(CURRENT_TIMESTAMP)'), nullable=True), 58 | sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True), 59 | sa.Column('measurement_id', sa.Integer(), nullable=True), 60 | sa.Column('temp', sa.Float(), nullable=True, comment='Temperature. Celsius.'), 61 | sa.Column('feels_like', sa.Float(), nullable=True, comment='This temperature parameter accounts for the human perception of weather. Celsius.'), 62 | sa.Column('temp_min', sa.Float(), nullable=True, comment='Minimum temperature at the moment. This is minimal currently observed temperature (within large megalopolises and urban areas). Celsius.'), 63 | sa.Column('temp_max', sa.Float(), nullable=True, comment='Maximum temperature at the moment. This is maximal currently observed temperature (within large megalopolises and urban areas). Celsius.'), 64 | sa.Column('pressure', sa.Integer(), nullable=True, comment='Atmospheric pressure (on the sea level, if there is no sea_level or grnd_level). hPa.'), 65 | sa.Column('humidity', sa.Integer(), nullable=True, comment='Humidity. %'), 66 | sa.Column('sea_level', sa.Integer(), nullable=True, comment='Atmospheric pressure on the sea level. hPa.'), 67 | sa.Column('grnd_level', sa.Integer(), nullable=True, comment='Atmospheric pressure on the ground level. hPa.'), 68 | sa.ForeignKeyConstraint(['measurement_id'], ['weather_measurement.id'], ), 69 | sa.PrimaryKeyConstraint('id'), 70 | sa.UniqueConstraint('id') 71 | ) 72 | # ### end Alembic commands ### 73 | 74 | 75 | def downgrade() -> None: 76 | # ### commands auto generated by Alembic - please adjust! ### 77 | op.drop_table('main_weather_measurement') 78 | op.drop_table('extra_weather_data') 79 | op.drop_table('weather_measurement') 80 | op.drop_table('city') 81 | # ### end Alembic commands ### 82 | -------------------------------------------------------------------------------- /collector/models.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from datetime import datetime 4 | from typing import TypeAlias 5 | 6 | import sqlalchemy as db 7 | from sqlalchemy import orm, sql 8 | 9 | Base: TypeAlias = orm.declarative_base() # type: ignore 10 | 11 | 12 | class BaseModel(Base): 13 | __abstract__ = True 14 | 15 | id: int = db.Column( 16 | db.Integer, nullable=False, unique=True, primary_key=True, autoincrement=True 17 | ) 18 | created_at: datetime = db.Column( 19 | db.DateTime(timezone=True), server_default=sql.func.now() 20 | ) 21 | updated_at: datetime = db.Column( 22 | db.DateTime(timezone=True), onupdate=sql.func.now() 23 | ) 24 | 25 | def __repr__(self): 26 | return f'<{self.__class__.__name__}({self.id=})>' 27 | 28 | 29 | class CityModel(BaseModel): 30 | """ 31 | City representation. Name is required, other optional. 32 | """ 33 | 34 | __tablename__ = 'city' 35 | 36 | name: str = db.Column(db.String(50), nullable=False) 37 | is_tracked: bool = db.Column(db.Boolean, nullable=False, default=True) 38 | country: str = db.Column(db.String(50)) 39 | countryCode: str = db.Column(db.String(3)) 40 | latitude: float = db.Column(db.Float) 41 | longitude: float = db.Column(db.Float) 42 | population: int = db.Column(db.Integer) 43 | 44 | measurements: list[MeasurementModel] = orm.relationship( 45 | 'MeasurementModel', 46 | backref='city', 47 | cascade='all, delete-orphan', 48 | ) 49 | 50 | def __str__(self) -> str: 51 | return self.name 52 | 53 | 54 | class MeasurementModel(BaseModel): 55 | """ 56 | Open Weather API provides a lot of information about current city weather. Depending 57 | on location and current weather situation some fields could appear some other could 58 | not. For that situation we decided to store all root fields in separate tables. 59 | 60 | Why `MainWeatherDataModel`? 61 | The basic reason for collecting weather is understanding how to cool down company's 62 | servers. Therefore, we parsing and store `main` field that contains current 63 | temperature. All other data storing as json at `ExtraMeasurementDataModel` for any 64 | future purposes. 65 | 66 | We may describe other tables to store all the data in relational (SQL) way later, if 67 | we will need it. 68 | """ 69 | 70 | __tablename__ = 'weather_measurement' 71 | 72 | city_id: int = db.Column( 73 | db.Integer, 74 | db.ForeignKey('city.id', onupdate='CASCADE', ondelete='CASCADE'), 75 | nullable=False, 76 | ) 77 | measure_at: datetime = db.Column( 78 | db.DateTime, 79 | nullable=False, 80 | comment='Time of data forecasted. UTC. Do not confuse with base model `created_at` field.', 81 | ) 82 | main: MainWeatherDataModel = orm.relationship( 83 | 'MainWeatherDataModel', 84 | uselist=False, 85 | backref='measurement', 86 | cascade='all, delete-orphan', 87 | ) 88 | 89 | # [NOTE] 90 | # Other fields can be handled here as one-to-one relation to separate table, if 91 | # the reason is appear in a future. 92 | ... 93 | 94 | extra: ExtraWeatherDataModel = orm.relationship( 95 | 'ExtraWeatherDataModel', 96 | uselist=False, 97 | backref='measurement', 98 | cascade='all, delete-orphan', 99 | ) 100 | 101 | 102 | class MainWeatherDataModel(BaseModel): 103 | """ 104 | Data at `main` field from measurement response. 105 | """ 106 | 107 | __tablename__ = 'main_weather_measurement' 108 | 109 | measurement_id = db.Column(db.Integer, db.ForeignKey('weather_measurement.id')) 110 | 111 | temp: float = db.Column(db.Float, comment='Temperature. Celsius.') 112 | feels_like: float = db.Column( 113 | db.Float, 114 | comment='This temperature parameter accounts for the human perception of weather. Celsius.', 115 | ) 116 | temp_min: float = db.Column( 117 | db.Float, 118 | comment='Minimum temperature at the moment. This is minimal currently observed temperature (within large megalopolises and urban areas). Celsius.', 119 | ) 120 | temp_max: float = db.Column( 121 | db.Float, 122 | comment='Maximum temperature at the moment. This is maximal currently observed temperature (within large megalopolises and urban areas). Celsius.', 123 | ) 124 | pressure: int = db.Column( 125 | db.Integer, 126 | comment='Atmospheric pressure (on the sea level, if there is no sea_level or grnd_level). hPa.', 127 | ) 128 | humidity: int = db.Column(db.Integer, comment='Humidity. %') 129 | sea_level: int = db.Column( 130 | db.Integer, comment='Atmospheric pressure on the sea level. hPa.' 131 | ) 132 | grnd_level: int = db.Column( 133 | db.Integer, comment='Atmospheric pressure on the ground level. hPa.' 134 | ) 135 | 136 | 137 | class ExtraWeatherDataModel(BaseModel): 138 | """ 139 | Additional data from weather measurement. 140 | """ 141 | 142 | __tablename__ = 'extra_weather_data' 143 | 144 | measurement_id = db.Column(db.Integer, db.ForeignKey('weather_measurement.id')) 145 | data: dict = db.Column(db.JSON) 146 | -------------------------------------------------------------------------------- /tests/test_services.py: -------------------------------------------------------------------------------- 1 | import pydantic 2 | import pytest 3 | import sqlalchemy.orm as orm 4 | 5 | from collector.configurations import CONFIG, CollectorConfig 6 | from collector.exceptions import NoDataError 7 | from collector.models import CityModel, MeasurementModel 8 | from collector.services.cities import ( 9 | CitySchema, 10 | FetchCities, 11 | FetchCoordinates, 12 | InitCities, 13 | ) 14 | from collector.services.weather import CollectScheduler, FetchWeather, ReportWeather 15 | 16 | 17 | @pytest.mark.usefixtures('mock_config', 'setup_database') 18 | class TestServices: 19 | 20 | #################################################################################### 21 | # Init Cities Service 22 | #################################################################################### 23 | 24 | def test_init_cities_no_file_rises(self): 25 | with pytest.raises(NoDataError): 26 | InitCities().execute() 27 | 28 | def test_init_cities_broken_file_rises(self, broken_cities_file): 29 | with pytest.raises(pydantic.ValidationError): 30 | InitCities().execute() 31 | 32 | def test_init_cities_(self, session: orm.Session, cities_file: list): 33 | InitCities().execute() 34 | assert session.query(CityModel).count() == len(cities_file) 35 | 36 | #################################################################################### 37 | # Fetch Cities Service 38 | #################################################################################### 39 | 40 | @pytest.mark.parametrize( 41 | 'cities_names', 42 | [ 43 | pytest.param(['Moscow', 'Tokyo', 'Shanghai', 'Istanbul']), 44 | ], 45 | ) 46 | def test_fetch_cities_assert_cities_list( 47 | self, 48 | session: orm.Session, 49 | cities_names: list[str], 50 | ): 51 | """ 52 | Test that biggest world cities appear in DB. 53 | """ 54 | FetchCities().execute() 55 | for city in cities_names: 56 | assert session.query(CityModel).filter(CityModel.name == city).all() 57 | 58 | @pytest.mark.parametrize( 59 | 'amount', 60 | [ 61 | pytest.param(1), 62 | pytest.param(17), 63 | pytest.param(100, marks=pytest.mark.slow), 64 | ], 65 | ) 66 | def test_fetch_cities_assert_amounts( 67 | self, 68 | session: orm.Session, 69 | monkeypatch: pytest.MonkeyPatch, 70 | config: CollectorConfig, 71 | amount: int, 72 | ): 73 | monkeypatch.setattr(CONFIG, 'cities_amount', amount) 74 | 75 | FetchCities().execute() 76 | 77 | assert session.query(CityModel).count() == amount 78 | cities_from_file = pydantic.parse_file_as(list[CitySchema], config.cities_file) 79 | assert len(cities_from_file) == amount 80 | 81 | def test_fetch_cities_zero_cities_amount_rises( 82 | self, monkeypatch: pytest.MonkeyPatch 83 | ): 84 | monkeypatch.setattr(CONFIG, 'cities_amount', 0) 85 | 86 | # the same as InitCities - FetchCities will rise NoDataError for 0 cities amount 87 | with pytest.raises(NoDataError): 88 | FetchCities().execute() 89 | 90 | #################################################################################### 91 | # Fetch Coordinates Service 92 | #################################################################################### 93 | 94 | def test_fetch_coordinates(self, seed_cities_to_database, session: orm.Session): 95 | cites: list[CityModel] = session.query(CityModel).all() 96 | for city in cites: 97 | FetchCoordinates(city).execute() 98 | assert city.latitude and city.longitude 99 | 100 | #################################################################################### 101 | # Fetch Weather Service 102 | #################################################################################### 103 | 104 | def test_fetch_weather_rises(self): 105 | with pytest.raises(NoDataError): 106 | FetchWeather() 107 | 108 | def test_fetch_weather(self, seed_cities_to_database, session: orm.Session): 109 | FetchWeather().execute() 110 | measures: list[MeasurementModel] = session.query(MeasurementModel).all() 111 | for measure in measures: 112 | assert measure.main 113 | assert measure.main.temp 114 | assert measure.extra 115 | assert measure.extra.data 116 | 117 | #################################################################################### 118 | # Collect Weather Service 119 | #################################################################################### 120 | 121 | def test_collect_weather_initial( 122 | self, 123 | session: orm.Session, 124 | monkeypatch: pytest.MonkeyPatch, 125 | ): 126 | repeats = 2 127 | cities_amount = 3 128 | monkeypatch.setattr(CONFIG, 'cities_amount', cities_amount) 129 | 130 | CollectScheduler(repeats=repeats, initial=True).execute() 131 | assert session.query(MeasurementModel).count() == cities_amount * repeats 132 | 133 | def test_collect_weather_initial_many_cities( 134 | self, 135 | session: orm.Session, 136 | monkeypatch: pytest.MonkeyPatch, 137 | ): 138 | repeats = 1 139 | cities_amount = 50 140 | monkeypatch.setattr(CONFIG, 'cities_amount', cities_amount) 141 | 142 | CollectScheduler(repeats=repeats, initial=True).execute() 143 | assert session.query(MeasurementModel).count() == cities_amount * repeats 144 | 145 | def test_collect_weather_with_cities_at_db( 146 | self, 147 | cities_list: list, 148 | seed_cities_to_database, 149 | session: orm.Session, 150 | monkeypatch: pytest.MonkeyPatch, 151 | ): 152 | repeats = 2 153 | CollectScheduler(repeats=repeats).execute() 154 | assert session.query(MeasurementModel).count() == len(cities_list) * repeats 155 | 156 | #################################################################################### 157 | # Report Weather Service 158 | #################################################################################### 159 | 160 | def test_report_weather(self, seed_cities_to_database, session: orm.Session): 161 | CollectScheduler(repeats=1).execute() 162 | ReportWeather(average=True, latest=True).execute() 163 | ... 164 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Aa][Rr][Mm]/ 27 | [Aa][Rr][Mm]64/ 28 | bld/ 29 | [Bb]in/ 30 | [Oo]bj/ 31 | [Ll]og/ 32 | [Ll]ogs/ 33 | 34 | # Visual Studio 2015/2017 cache/options directory 35 | .vs/ 36 | # Uncomment if you have tasks that create the project's static files in wwwroot 37 | #wwwroot/ 38 | 39 | # Visual Studio 2017 auto generated files 40 | Generated\ Files/ 41 | 42 | # MSTest test Results 43 | [Tt]est[Rr]esult*/ 44 | [Bb]uild[Ll]og.* 45 | 46 | # NUnit 47 | *.VisualState.xml 48 | TestResult.xml 49 | nunit-*.xml 50 | 51 | # Build Results of an ATL Project 52 | [Dd]ebugPS/ 53 | [Rr]eleasePS/ 54 | dlldata.c 55 | 56 | # Benchmark Results 57 | BenchmarkDotNet.Artifacts/ 58 | 59 | # .NET Core 60 | project.lock.json 61 | project.fragment.lock.json 62 | artifacts/ 63 | 64 | # StyleCop 65 | StyleCopReport.xml 66 | 67 | # Files built by Visual Studio 68 | *_i.c 69 | *_p.c 70 | *_h.h 71 | *.ilk 72 | *.meta 73 | *.obj 74 | *.iobj 75 | *.pch 76 | *.pdb 77 | *.ipdb 78 | *.pgc 79 | *.pgd 80 | *.rsp 81 | *.sbr 82 | *.tlb 83 | *.tli 84 | *.tlh 85 | *.tmp 86 | *.tmp_proj 87 | *_wpftmp.csproj 88 | *.log 89 | *.vspscc 90 | *.vssscc 91 | .builds 92 | *.pidb 93 | *.svclog 94 | *.scc 95 | 96 | # Chutzpah Test files 97 | _Chutzpah* 98 | 99 | # Visual C++ cache files 100 | ipch/ 101 | *.aps 102 | *.ncb 103 | *.opendb 104 | *.opensdf 105 | *.sdf 106 | *.cachefile 107 | *.VC.db 108 | *.VC.VC.opendb 109 | 110 | # Visual Studio profiler 111 | *.psess 112 | *.vsp 113 | *.vspx 114 | *.sap 115 | 116 | # Visual Studio Trace Files 117 | *.e2e 118 | 119 | # TFS 2012 Local Workspace 120 | $tf/ 121 | 122 | # Guidance Automation Toolkit 123 | *.gpState 124 | 125 | # ReSharper is a .NET coding add-in 126 | _ReSharper*/ 127 | *.[Rr]e[Ss]harper 128 | *.DotSettings.user 129 | 130 | # TeamCity is a build add-in 131 | _TeamCity* 132 | 133 | # DotCover is a Code Coverage Tool 134 | *.dotCover 135 | 136 | # AxoCover is a Code Coverage Tool 137 | .axoCover/* 138 | !.axoCover/settings.json 139 | 140 | # Visual Studio code coverage results 141 | *.coverage 142 | *.coveragexml 143 | 144 | # NCrunch 145 | _NCrunch_* 146 | .*crunch*.local.xml 147 | nCrunchTemp_* 148 | 149 | # MightyMoose 150 | *.mm.* 151 | AutoTest.Net/ 152 | 153 | # Web workbench (sass) 154 | .sass-cache/ 155 | 156 | # Installshield output folder 157 | [Ee]xpress/ 158 | 159 | # DocProject is a documentation generator add-in 160 | DocProject/buildhelp/ 161 | DocProject/Help/*.HxT 162 | DocProject/Help/*.HxC 163 | DocProject/Help/*.hhc 164 | DocProject/Help/*.hhk 165 | DocProject/Help/*.hhp 166 | DocProject/Help/Html2 167 | DocProject/Help/html 168 | 169 | # Click-Once directory 170 | publish/ 171 | 172 | # Publish Web Output 173 | *.[Pp]ublish.xml 174 | *.azurePubxml 175 | # Note: Comment the next line if you want to checkin your web deploy settings, 176 | # but database connection strings (with potential passwords) will be unencrypted 177 | *.pubxml 178 | *.publishproj 179 | 180 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 181 | # checkin your Azure Web App publish settings, but sensitive information contained 182 | # in these scripts will be unencrypted 183 | PublishScripts/ 184 | 185 | # NuGet Packages 186 | *.nupkg 187 | # NuGet Symbol Packages 188 | *.snupkg 189 | # The packages folder can be ignored because of Package Restore 190 | **/[Pp]ackages/* 191 | # except build/, which is used as an MSBuild target. 192 | !**/[Pp]ackages/build/ 193 | # Uncomment if necessary however generally it will be regenerated when needed 194 | #!**/[Pp]ackages/repositories.config 195 | # NuGet v3's project.json files produces more ignorable files 196 | *.nuget.props 197 | *.nuget.targets 198 | 199 | # Microsoft Azure Build Output 200 | csx/ 201 | *.build.csdef 202 | 203 | # Microsoft Azure Emulator 204 | ecf/ 205 | rcf/ 206 | 207 | # Windows Store app package directories and files 208 | AppPackages/ 209 | BundleArtifacts/ 210 | Package.StoreAssociation.xml 211 | _pkginfo.txt 212 | *.appx 213 | *.appxbundle 214 | *.appxupload 215 | 216 | # Visual Studio cache files 217 | # files ending in .cache can be ignored 218 | *.[Cc]ache 219 | # but keep track of directories ending in .cache 220 | !?*.[Cc]ache/ 221 | 222 | # Others 223 | ClientBin/ 224 | ~$* 225 | *~ 226 | *.dbmdl 227 | *.dbproj.schemaview 228 | *.jfm 229 | *.pfx 230 | *.publishsettings 231 | orleans.codegen.cs 232 | 233 | # Including strong name files can present a security risk 234 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 235 | #*.snk 236 | 237 | # Since there are multiple workflows, uncomment next line to ignore bower_components 238 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 239 | #bower_components/ 240 | 241 | # RIA/Silverlight projects 242 | Generated_Code/ 243 | 244 | # Backup & report files from converting an old project file 245 | # to a newer Visual Studio version. Backup files are not needed, 246 | # because we have git ;-) 247 | _UpgradeReport_Files/ 248 | Backup*/ 249 | UpgradeLog*.XML 250 | UpgradeLog*.htm 251 | ServiceFabricBackup/ 252 | *.rptproj.bak 253 | 254 | # SQL Server files 255 | *.mdf 256 | *.ldf 257 | *.ndf 258 | 259 | # Business Intelligence projects 260 | *.rdl.data 261 | *.bim.layout 262 | *.bim_*.settings 263 | *.rptproj.rsuser 264 | *- [Bb]ackup.rdl 265 | *- [Bb]ackup ([0-9]).rdl 266 | *- [Bb]ackup ([0-9][0-9]).rdl 267 | 268 | # Microsoft Fakes 269 | FakesAssemblies/ 270 | 271 | # GhostDoc plugin setting file 272 | *.GhostDoc.xml 273 | 274 | # Node.js Tools for Visual Studio 275 | .ntvs_analysis.dat 276 | node_modules/ 277 | 278 | # Visual Studio 6 build log 279 | *.plg 280 | 281 | # Visual Studio 6 workspace options file 282 | *.opt 283 | 284 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 285 | *.vbw 286 | 287 | # Visual Studio LightSwitch build output 288 | **/*.HTMLClient/GeneratedArtifacts 289 | **/*.DesktopClient/GeneratedArtifacts 290 | **/*.DesktopClient/ModelManifest.xml 291 | **/*.Server/GeneratedArtifacts 292 | **/*.Server/ModelManifest.xml 293 | _Pvt_Extensions 294 | 295 | # Paket dependency manager 296 | .paket/paket.exe 297 | paket-files/ 298 | 299 | # FAKE - F# Make 300 | .fake/ 301 | 302 | # CodeRush personal settings 303 | .cr/personal 304 | 305 | # Python Tools for Visual Studio (PTVS) 306 | __pycache__/ 307 | *.pyc 308 | 309 | # Cake - Uncomment if you are using it 310 | # tools/** 311 | # !tools/packages.config 312 | 313 | # Tabs Studio 314 | *.tss 315 | 316 | # Telerik's JustMock configuration file 317 | *.jmconfig 318 | 319 | # BizTalk build output 320 | *.btp.cs 321 | *.btm.cs 322 | *.odx.cs 323 | *.xsd.cs 324 | 325 | # OpenCover UI analysis results 326 | OpenCover/ 327 | 328 | # Azure Stream Analytics local run output 329 | ASALocalRun/ 330 | 331 | # MSBuild Binary and Structured Log 332 | *.binlog 333 | 334 | # NVidia Nsight GPU debugger configuration file 335 | *.nvuser 336 | 337 | # MFractors (Xamarin productivity tool) working folder 338 | .mfractor/ 339 | 340 | # Local History for Visual Studio 341 | .localhistory/ 342 | 343 | # BeatPulse healthcheck temp database 344 | healthchecksdb 345 | 346 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 347 | MigrationBackup/ 348 | 349 | # Ionide (cross platform F# VS Code tools) working folder 350 | .ionide/ 351 | 352 | # venv 353 | venv* 354 | *.env 355 | 356 | # macOS system files 357 | .DS_Store 358 | 359 | # IDE settings 360 | .vscode* 361 | 362 | # local storage 363 | *.sqlite3 364 | *.sqlite3* 365 | cities.json 366 | 367 | # tmp files 368 | *Untitled* 369 | *untitled* 370 | *tmp* 371 | 372 | 373 | htmlcov -------------------------------------------------------------------------------- /collector/services/cities.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import argparse 4 | import json 5 | import os 6 | import unicodedata 7 | 8 | import pydantic 9 | 10 | from collector.configurations import CONFIG, logger 11 | from collector.exceptions import NoDataError 12 | from collector.models import CityModel 13 | from collector.services.base import BaseService, FetchServiceMixin 14 | from collector.session import DBSessionMixin 15 | 16 | ######################################################################################## 17 | # Cities Schemas 18 | ######################################################################################## 19 | 20 | 21 | class CitySchema(pydantic.BaseModel): 22 | name: str 23 | country: str | None 24 | countryCode: str | None 25 | latitude: float | None 26 | longitude: float | None 27 | population: int | None 28 | 29 | @pydantic.validator('name') 30 | def clean_name_unicode(cls, value): 31 | return str( 32 | unicodedata.normalize('NFKD', value) 33 | .encode('ascii', 'ignore') 34 | .decode("utf-8") 35 | ) 36 | 37 | 38 | class CitiesListSchema(pydantic.BaseModel): 39 | data: list[CitySchema] 40 | 41 | 42 | class CityCoordinatesSchema(pydantic.BaseModel): 43 | name: str 44 | lat: float 45 | lon: float 46 | country: str | None 47 | state: str | None 48 | 49 | # [FIXME] parsing response falls down because of unicode symbols 50 | # local_names: list[str] | None 51 | 52 | 53 | ######################################################################################## 54 | # Init Cities Service 55 | ######################################################################################## 56 | 57 | 58 | class InitCities(BaseService, DBSessionMixin): 59 | """ 60 | Load cities list from JSON file and appended them to database. 61 | If `predefined` is provided, that list will be used instead. 62 | """ 63 | 64 | command = 'init_cities' 65 | 66 | def __init__( 67 | self, *, override: bool = False, predefined: list[CitySchema] = [], **kwargs 68 | ) -> None: 69 | self.predefined = predefined 70 | self.override = override 71 | super().__init__(**kwargs) 72 | 73 | @classmethod 74 | def add_argument(cls, parser: argparse.ArgumentParser): 75 | parser.add_argument( 76 | '-o', 77 | '--override', 78 | action='store_true', 79 | help='set all other cities at DB not to be tracking for weather collecting', 80 | ) 81 | 82 | def execute(self): 83 | super().execute() 84 | cities = self.predefined or self.load_from_file() 85 | if not cities: 86 | raise NoDataError(f'{CONFIG.cities_file} has no cities to initialize. ') 87 | 88 | if self.override: 89 | previous: list[CityModel] = self.query(CityModel).all() 90 | for city in previous: 91 | city.is_tracked = False 92 | logger.info(f'{len(previous)} cities are not tracked anymore. ') 93 | 94 | self.create_from_schema(CityModel, *cities) 95 | logger.info(f'Add new {len(cities)} records to {CityModel}. ') 96 | 97 | def load_from_file(self): 98 | try: 99 | return pydantic.parse_file_as(list[CitySchema], CONFIG.cities_file) 100 | except FileNotFoundError as e: 101 | raise NoDataError(e, msg='Init cities from file failed. ') 102 | 103 | 104 | ######################################################################################## 105 | # Fetch Cities Service 106 | ######################################################################################## 107 | 108 | 109 | class FetchCities(BaseService, FetchServiceMixin[CitiesListSchema]): 110 | """ 111 | Fetch cities list from GeoDB API, save them to JSON file for future custom 112 | configuration and call for `InitCities` service to store all new cities at database. 113 | 114 | Endpoint detail information: http://geodb-cities-api.wirefreethought.com/ 115 | """ 116 | 117 | command = 'fetch_cities' 118 | url = 'http://geodb-free-service.wirefreethought.com/v1/geo/cities' 119 | 120 | # [NOTE] 121 | # We are using GeoDB API Service under FREE plan provided at specified url. 122 | # Unfortunately, in that case limit params is restricted up to 10. 123 | # And for instance we need make request 5 times to get 50 cityes. 124 | restricted_limit = 10 125 | params = { 126 | 'sort': '-population', 127 | 'types': 'CITY', 128 | 'limit': restricted_limit, 129 | } 130 | schema = CitiesListSchema 131 | 132 | def execute(self): 133 | super().execute() 134 | cities = self.fetch() 135 | self.append_to_file(cities) 136 | logger.info( 137 | f'Successfully fetched {CONFIG.cities_amount} cities and stored them at ' 138 | f'{CONFIG.cities_file} file. Go there to confirm results. You can make any ' 139 | 'changes and commit them by calling for `init_cities` with --override flag.' 140 | ) 141 | 142 | InitCities(predefined=cities, **self.init_kwargs).execute() 143 | 144 | def fetch(self): 145 | cities: list[CitySchema] = [] 146 | repeats = CONFIG.cities_amount // self.restricted_limit 147 | remains = CONFIG.cities_amount % self.restricted_limit 148 | 149 | for i in range(repeats + int(bool(remains))): 150 | if i == repeats: 151 | self.params['limit'] = remains # for final fetching 152 | 153 | offset = i * self.restricted_limit 154 | self.params['offset'] = offset 155 | 156 | logger.info(f'Fetching cities: {offset}/{CONFIG.cities_amount}') 157 | 158 | # `data` is a core field at response json with list of cities 159 | cities += super().fetch().data 160 | 161 | self.params['limit'] = self.restricted_limit 162 | return cities 163 | 164 | def append_to_file(self, cities: list[CitySchema]): 165 | if os.path.isfile(CONFIG.cities_file): 166 | logger.warning( 167 | f'{CONFIG.cities_file} already exists. All data will be overridden. ' 168 | ) 169 | 170 | with open(CONFIG.cities_file, 'w+', encoding='utf-8') as file: 171 | json.dump([city.dict() for city in cities], file) 172 | 173 | 174 | ######################################################################################## 175 | # Fetch Coordinates Service 176 | ######################################################################################## 177 | 178 | 179 | class FetchCoordinates( 180 | BaseService, 181 | DBSessionMixin, 182 | FetchServiceMixin[list[CityCoordinatesSchema]], 183 | ): 184 | """ 185 | If city object doesn't have coordinates, we should get them by calling for 186 | Open Weather Geocoding API. The API documentation says: 187 | 188 | `Please use Geocoder API if you need automatic convert city names and zip-codes to 189 | geo coordinates and the other way around. Please note that API requests by city 190 | name, zip-codes and city id have been deprecated.` 191 | 192 | Endpoint detail information: https://openweathermap.org/api/geocoding-api 193 | """ 194 | 195 | command = 'fetch_coordinates' 196 | url = 'http://api.openweathermap.org/geo/1.0/direct' 197 | schema = list[CityCoordinatesSchema] 198 | params = { 199 | "appid": CONFIG.open_weather_key, 200 | "limit": 10, 201 | } 202 | 203 | def __init__(self, city: CityModel | str, **kwargs) -> None: 204 | if isinstance(city, str): 205 | self.city: CityModel = ( 206 | self.query(CityModel).filter(CityModel.name == city).one() 207 | ) 208 | else: 209 | self.city = city 210 | 211 | self.params['q'] = f'{self.city.name},{self.city.countryCode}' 212 | super().__init__(**kwargs) 213 | 214 | def execute(self): 215 | super().execute() 216 | 217 | geo_list = self.fetch() 218 | if not geo_list: 219 | raise NoDataError( 220 | 'Getting coordinates failed. ' 221 | f'Geocoding has no information about {self.city}. ' 222 | ) 223 | if len(geo_list) > 1: 224 | logger.warning( 225 | f'Geocoding has many records for {self.city}. Taking the first.' 226 | ) 227 | 228 | coordinates = geo_list[0] 229 | self.city.latitude = coordinates.lat 230 | self.city.longitude = coordinates.lon 231 | 232 | def fetch(self): 233 | logger.info(f'Fetching coordinates for {self.city}. ') 234 | return super().fetch() 235 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 |
5 | 6 | Logo 7 | 8 | 9 |

Weather Collector

10 | 11 |

12 | Python package for collecting weather measurements. 13 |
14 | Explore Usage topic » 15 |
16 |

17 |
18 | 19 | 20 | 21 |
22 | Table of Contents 23 |
    24 |
  1. 25 | Features 26 |
  2. 27 |
  3. 28 | Getting Started 29 | 33 |
  4. 34 |
  5. Usage
  6. 35 |
  7. Explanations
  8. 36 |
  9. Restrictions
  10. 37 |
  11. Appreciations
  12. 38 |
  13. Contact
  14. 39 |
40 |
41 | 42 | 43 | 44 | ## Features 45 | - Fetching world's largest cities from third-party API. 46 | - Fetching weather for provided cities from third-party API and store data into database. 47 | - Easy to configurate cities at `cities.json` file. Only name is required. 48 | - Making report for last and average temperature for every city. 49 | 50 | ## Built With 51 | ![](https://img.shields.io/badge/python-3.10.4-blue) 52 | ![](https://img.shields.io/badge/SQL_Alchemy-1.4-blue) 53 | ![](https://img.shields.io/badge/alembic-1.8-blue) 54 | ![](https://img.shields.io/badge/pydantic-1.10-blue) 55 | ![](https://img.shields.io/badge/pytest-7.2-blue) 56 |
57 | 58 | ![](https://img.shields.io/badge/mypy-0.97-blue) 59 | ![](https://img.shields.io/badge/black-22.6-blue) 60 | ![](https://img.shields.io/badge/flake8-5.0-blue) 61 | 62 | 63 | 64 | # Getting Started 65 | 66 | ## Run docker compose. 67 | ### Prerequisites 68 | * Docker **20.10.21** 69 | 70 | 1. Clone the repo. 71 | ```sh 72 | $ git clone git@github.com:MishaVyb/weather-collector.git 73 | ``` 74 | 2. Define environment variables 75 | ```sh 76 | $ cd weather-collector 77 | $ nano prod.env 78 | ``` 79 | ```env 80 | debug = false 81 | open_weather_key = ... 82 | POSTGRES_USER = vybornyy 83 | POSTGRES_PASSWORD = vybornyy 84 | POSTGRES_DB = default 85 | ``` 86 | 87 | 3. Build and run docker compose by predefined `make` command. 88 | ```sh 89 | $ make app 90 | ``` 91 | > WARNING!
92 | > If database connection fails, try again in a few seconds. It could be because postress server is not running yet. 93 | 94 | 95 | ## Run as python script. 96 | ### Prerequisites 97 | 98 | * python **3.10.4** 99 | * pip 100 | 101 | 102 | ### Installation 103 | 104 | 1. Clone the repo. 105 | ```sh 106 | $ git clone git@github.com:MishaVyb/weather-collector.git 107 | ``` 108 | 2. Activate virtual environment. 109 | ```sh 110 | $ cd weather-collector 111 | $ python3.10 -m venv venv 112 | $ source venv/bin/activate 113 | 114 | 3. Install requirements. 115 | ```sh 116 | (venv) $ pip install -r requirements.txt 117 | ``` 118 | 119 | 4. Migrate database. 120 | ```sh 121 | (venv) $ alembic upgrade head 122 | ``` 123 | 5. Define environment variables 124 | ```sh 125 | $ nano debug.env 126 | ``` 127 | ```env 128 | debug = true 129 | open_weather_key = ... 130 | ``` 131 |

(back to top)

132 | 133 | ## Usage 134 | 135 | 1. Init cities and run continuously collecting weather. 136 | ```sh 137 | $ python3 manage.py collect --initial 138 | ``` 139 | 140 | For the beginning, collector looks for `cites.json` file where list of cities described. 141 | ```json 142 | [ 143 | {"name": "Moscow"}, 144 | {"name": "Istanbul"} 145 | ] 146 | ``` 147 | 148 | If that file does not exist, collector getting the most populated cities from GeoDB API. 149 | > WARNING!
150 | > Weather Collector do **not** guarantee that received cites is a *real* the most populated cities on the Earth at current moment. It's better to manually fill `cities.json` file. 151 | 152 | After that collector begin collecting weather every hour and store all data into database. 153 | 154 | 2. Change tracked cities. 155 | 156 | Describe cities at `cites.json` file and call for `InitCities` service. 157 | ```sh 158 | $ python3 manage.py init_cites 159 | ``` 160 | By default cites will be appended to already handled ones. If you want to track only that new list of cities, use `--override` flag. It seting all existing cities at database not to be tracking for weather collecting anymore. 161 | ```sh 162 | $ python3 manage.py init_cites --override 163 | ``` 164 | Or re-fetch cities. This line invokes `InitCities` with `--override` flag after fetching. 165 | ```sh 166 | $ python3 manage.py fetch_cites --override 167 | ``` 168 | Or re-init and run collecting in one line. 169 | ```sh 170 | $ python3 manage.py collect --initial --override 171 | ``` 172 | 173 | 3. Get weather report. 174 | ```sh 175 | $ python3 manage.py report 176 | $ python3 manage.py report --average 177 | $ python3 manage.py report --latest 178 | ``` 179 | 180 | 4. More options. 181 | ```sh 182 | $ python3 manage.py --help 183 | ``` 184 | 185 | 186 |

(back to top)

187 | 188 | ## Explanation 189 | 190 | 1. Database Structure. 191 | ![Untitled (3)](https://user-images.githubusercontent.com/103563736/202989181-cb714940-7df3-4a67-880c-048acd2bf571.jpg) 192 | 193 | `CityModel`
194 | Contains cities which weather collecting for. 195 | 196 | `MeasurementModel`
197 | For every weather measurement (API request) associated with the city. 198 | 199 | Open Weather API provides a lot of information about current city weather. Depending on location and current weather situation, some fields could appear some other could not. For that situation we decided to store all response root fields in separate tables. 200 | 201 | Why `MainWeatherDataModel` table?
202 | The basic reason for collecting weather is understanding how to cool down company's servers. Therefore, we parsing and store `main` field that contains current temperature. All other data storing as json at `ExtraMeasurementDataModel` for any future purposes. 203 | 204 | We may describe other tables to store all the data in relational (SQL) way later, if we will need it. 205 | 206 | 2. Services Structure. 207 | ![Untitled (2)](https://user-images.githubusercontent.com/103563736/202989192-42b7c2cc-f939-46fc-8630-06cb9e6fee1a.jpg) 208 | 209 | `BaseService` presents basic definition for all other services.
210 | `DBSessionMixin` for making operations with database.
211 | `FetchServiceMixin` for handling http requests.
212 | 213 | 214 | ## Restrictions 215 | 1. Not async.
216 | When executing services, all http requests runs synchronously. It takes a lot of time and hold processing execution. It's better to make them in async way to reach more speed. 217 | 218 | 2. Cities names unique constraint
219 | When calling for `InitCites` service, all cities described at `cities.json` appending to database and there are now checking for repetitions. So database may contains several cities with the same name and location. 220 | 221 | To specify city explicitly, provide location coordinates or country code. 222 | ```json 223 | [ 224 | {"name": "..", "latitude": 31.1, "longitude": 121.4}, 225 | {"name": "..", "countryCode": "BR"} 226 | ] 227 | 228 |

(back to top)

229 | 230 | ## Appreciations 231 | 232 | Great thanks for third-party API services used to handle this package. 233 | - [GeoDB](http://geodb-cities-api.wirefreethought.com/) - for presenting list of the most populated cities. 234 | - [Open Weather](https://openweathermap.org/) - for presenting cities location and their weather. 235 | 236 | ## Contacts 237 | 238 | Misha Vybornyy 239 | 240 | [![Telegram Badge](https://img.shields.io/badge/-mishaviborniy-blue?style=social&logo=telegram&link=https://t.me/mishaviborniy)](https://t.me/mishaviborniy)
241 | [![Gmail Badge](https://img.shields.io/badge/-misha.vybornyy@gmail.com-c14438?style=flat&logo=Gmail&logoColor=white&link=mailto:vbrn.mv@gmail.com)](mailto:vbrn.mv@gmail.com) 242 |

243 | 244 |

(back to top)

-------------------------------------------------------------------------------- /collector/services/weather.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import argparse 4 | import sys 5 | from datetime import datetime, timedelta 6 | 7 | import pydantic 8 | from apscheduler.schedulers.blocking import BlockingScheduler 9 | 10 | from collector.configurations import CONFIG, logger 11 | from collector.exceptions import CollectorBaseException, NoDataError 12 | from collector.models import ( 13 | CityModel, 14 | ExtraWeatherDataModel, 15 | MainWeatherDataModel, 16 | MeasurementModel, 17 | ) 18 | from collector.services.base import BaseService, FetchServiceMixin 19 | from collector.services.cities import FetchCities, FetchCoordinates, InitCities 20 | from collector.session import DBSessionMixin 21 | 22 | ######################################################################################## 23 | # Weather Schemas 24 | ######################################################################################## 25 | 26 | 27 | class MainWeatherSchema(pydantic.BaseModel): 28 | """ 29 | Schema for parsing `main` field from Open Weather response. 30 | 31 | For more information see `MainWeatherDataModel` where we store all these values. 32 | """ 33 | 34 | temp: float 35 | feels_like: float 36 | temp_min: float 37 | temp_max: float 38 | pressure: int 39 | humidity: int 40 | sea_level: int | None 41 | grnd_level: int | None 42 | 43 | 44 | class WeatherMeasurementSchema(pydantic.BaseModel): 45 | """ 46 | Schema for parsing data from Open Weather API. We only ensure to have `main` field 47 | and `dt` field. Other is optional and will be stored at extra data table. 48 | 49 | For more information see `MeasurementModel` where we store all these values. 50 | """ 51 | 52 | main: MainWeatherSchema 53 | dt: int 54 | "Time of data forecasted, Unix, UTC (timestamp). `measure_at` field at model." 55 | 56 | 57 | ######################################################################################## 58 | # Fetch Weather Service 59 | ######################################################################################## 60 | 61 | 62 | class FetchWeather( 63 | BaseService, DBSessionMixin, FetchServiceMixin[WeatherMeasurementSchema] 64 | ): 65 | """ 66 | Fetch weather for cities and store data into database. 67 | By default fetching weather for all cities from database. 68 | 69 | Endpoint detail information: https://openweathermap.org/current 70 | """ 71 | 72 | command = 'fetch_weather' 73 | url = 'https://api.openweathermap.org/data/2.5/weather' 74 | schema = WeatherMeasurementSchema 75 | 76 | def __init__(self, **kwargs) -> None: 77 | self.cities: list[CityModel] = ( 78 | self.query(CityModel).filter(CityModel.is_tracked).all() 79 | ) 80 | if not self.cities: 81 | raise NoDataError( 82 | 'No cities at database to be tracked. ' 83 | f'Call for {FetchCities.command} or {InitCities.command} before. ' 84 | ) 85 | 86 | super().__init__(**kwargs) 87 | 88 | def execute(self): 89 | super().execute() 90 | for city in self.cities: 91 | if not all([city.longitude, city.latitude]): 92 | try: 93 | FetchCoordinates(city, **self.init_kwargs).execute() 94 | except NoDataError as e: 95 | logger.warning(f'Can not get weather for {city}: {e}. Continue. ') 96 | continue 97 | 98 | measure, extra = self.fetch(city) 99 | model = MeasurementModel( 100 | city=city, 101 | measure_at=datetime.utcfromtimestamp(measure.dt), 102 | main=MainWeatherDataModel(**measure.main.dict()), 103 | extra=ExtraWeatherDataModel(data=extra), 104 | ) 105 | self.create(model) 106 | 107 | def fetch(self, city: CityModel): # type: ignore 108 | logger.info(f'Fetching weather for {city}. ') 109 | 110 | self.params['lat'] = str(city.latitude) 111 | self.params['lon'] = str(city.longitude) 112 | measure = super().fetch() 113 | 114 | extra: dict = self.response.json() 115 | for field in self.schema.__fields__: 116 | extra.pop(field) 117 | 118 | return measure, extra 119 | 120 | 121 | ######################################################################################## 122 | # Collect Weather Service 123 | ######################################################################################## 124 | 125 | 126 | class CollectScheduler(BaseService): 127 | """ 128 | Fetching weather measurements continuesly. 129 | """ 130 | 131 | command = 'collect' 132 | 133 | def __init__( 134 | self, *, repeats: int | None = None, initial: bool = False, **kwargs 135 | ) -> None: 136 | self.counter = 0 137 | self.repeats = repeats 138 | self.scheduler = BlockingScheduler() 139 | 140 | if initial: 141 | try: 142 | InitCities(**kwargs).execute() 143 | except NoDataError as e: 144 | logger.warning(f'{e}. Handling by calling for {FetchCities()}.') 145 | FetchCities(**kwargs).execute() 146 | 147 | super().__init__(**kwargs) 148 | 149 | @classmethod 150 | def add_argument(cls, parser: argparse.ArgumentParser): 151 | parser.add_argument( 152 | '-r', 153 | '--repeats', 154 | metavar='', 155 | type=int, 156 | help='collecting repeats amount. Default: infinity', 157 | ) 158 | parser.add_argument( 159 | '-i', 160 | '--initial', 161 | action='store_true', 162 | help='init cities before collecting. Useful with -O flag', 163 | ) 164 | 165 | def execute(self): 166 | super().execute() 167 | self.scheduler.add_job( 168 | self._worker, 'interval', seconds=CONFIG.collect_weather_delay 169 | ) 170 | 171 | for job in self.scheduler.get_jobs(): 172 | job.modify(next_run_time=datetime.now()) 173 | 174 | self.scheduler.start() 175 | 176 | def _worker(self): 177 | try: 178 | logger.info(f'\n\n\t Starting collecting weather ({self.counter}).\n') 179 | FetchWeather(**self.init_kwargs).execute() 180 | logger.info('Collected successfully. ') 181 | logger.info(f'Next collecting runs in {CONFIG.collect_weather_delay} sec. ') 182 | 183 | except CollectorBaseException as e: 184 | # make log and try again in a while 185 | # 186 | # [NOTE] 187 | # Custom exceptions raised when response is broken or when db has not 188 | # necessary data. While this thread will be waiting for nex job execution, 189 | # the reason of error could be changed by others. 190 | # Therefore, wa adding a new job in a scheduler. 191 | logger.error( 192 | 'Collecting fails. ' 193 | f'Try again in {CONFIG.retry_collect_delay}. Detail: {e}. ' 194 | ) 195 | retry_at = datetime.now() + timedelta(seconds=CONFIG.retry_collect_delay) 196 | self.scheduler.add_job(self._worker, 'date', run_date=retry_at) 197 | 198 | finally: 199 | self.counter += 1 200 | if self.repeats and self.counter >= self.repeats: 201 | self.scheduler.shutdown(wait=True) 202 | 203 | 204 | ######################################################################################## 205 | # Report Weather Service 206 | ######################################################################################## 207 | 208 | 209 | class ReportWeather(BaseService, DBSessionMixin): 210 | """ 211 | Get report about all weather measurements records. Default output is `sys.stdout`. 212 | """ 213 | 214 | command = 'report' 215 | output = sys.stdout 216 | 217 | def __init__(self, average: bool = False, latest: bool = False, **kwargs) -> None: 218 | self.methods = [self.get_basic] 219 | if average: 220 | self.methods.append(self.get_average) 221 | if latest: 222 | self.methods.append(self.get_latest) 223 | 224 | super().__init__(**kwargs) 225 | 226 | @classmethod 227 | def add_argument(cls, parser: argparse.ArgumentParser): 228 | parser.add_argument( 229 | '--average', 230 | action='store_true', 231 | help='report average temperature for all cities', 232 | ) 233 | parser.add_argument( 234 | '--latest', 235 | action='store_true', 236 | help='report latest measured temperature for all cities', 237 | ) 238 | 239 | def execute(self): 240 | super().execute() 241 | for method in self.methods: 242 | self.output.write(method()) 243 | self.output.write('\n') 244 | 245 | def get_basic(self): 246 | n_cites = self.query(CityModel).count() 247 | n_measure = self.query(MeasurementModel).count() 248 | return ( 249 | '\n' 250 | f'Collector storing {n_measure} weather measurements for {n_cites} cities.' 251 | ) 252 | 253 | def get_average(self): 254 | report = '' 255 | cites: list[CityModel] = self.query(CityModel).all() 256 | for city in cites: 257 | measurements: list[MeasurementModel] = ( 258 | self.query(MeasurementModel) 259 | .filter(MeasurementModel.city_id == city.id) 260 | .all() 261 | ) 262 | if not measurements: 263 | continue 264 | 265 | n_measure = len(measurements) 266 | first = measurements[0] 267 | last = measurements[-1] 268 | average = sum([measure.main.temp for measure in measurements]) / n_measure 269 | report += ( 270 | '\n' 271 | f'Average temperature at {city.name} is {average} C. ' 272 | f'({n_measure} measurements {first.measure_at} ... {last.measure_at})' 273 | ) 274 | return report 275 | 276 | def get_latest(self): 277 | report = '' 278 | cites: list[CityModel] = self.query(CityModel).all() 279 | for city in cites: 280 | measure: MeasurementModel = ( 281 | self.query(MeasurementModel) 282 | .filter(MeasurementModel.city_id == city.id) 283 | .order_by(MeasurementModel.id.desc()) 284 | .first() 285 | ) 286 | if not measure: 287 | continue 288 | 289 | report += ( 290 | '\n' 291 | f'Last measured temperature at {city.name} is {measure.main.temp} C. ' 292 | f'({measure.measure_at})' 293 | ) 294 | return report 295 | --------------------------------------------------------------------------------