├── .dockerignore
├── alembic
├── README
├── script.py.mako
├── env.py
└── versions
│ └── 0f1755b76fbf_initial.py
├── tests
├── __init__.py
├── conftest.py
├── fixtures
│ ├── fixture_cities.py
│ ├── fixture_db.py
│ └── fixture_config.py
└── test_services.py
├── Dockerfile
├── collector
├── __init__.py
├── services
│ ├── __init__.py
│ ├── base.py
│ ├── cities.py
│ └── weather.py
├── functools.py
├── exceptions.py
├── session.py
├── configurations.py
└── models.py
├── docker-compose.yaml
├── cspell.config.yaml
├── .pre-commit-config.yaml
├── manage.py
├── pytest.ini
├── LICENSE
├── requirements.txt
├── makefile
├── pyproject.toml
├── alembic.ini
├── .gitignore
└── README.md
/.dockerignore:
--------------------------------------------------------------------------------
1 | debug.env
2 | cities.json
3 | db.sqlite3
--------------------------------------------------------------------------------
/alembic/README:
--------------------------------------------------------------------------------
1 | Generic single-database configuration.
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['logger']
2 |
3 | from .conftest import logger
4 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # syntax=docker/dockerfile:1
2 |
3 | FROM python:3.10-slim-buster
4 |
5 | WORKDIR /
6 |
7 | COPY requirements.txt requirements.txt
8 |
9 | RUN pip3 install -r requirements.txt
10 |
11 | COPY . .
--------------------------------------------------------------------------------
/collector/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['configurations', 'session', 'exceptions', 'functools', 'models']
2 |
3 | from . import configurations, exceptions, functools, models, session
4 | from .configurations import logger
5 |
--------------------------------------------------------------------------------
/collector/services/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [
2 | 'BaseService',
3 | 'FetchCities',
4 | 'InitCities',
5 | 'CollectScheduler',
6 | 'FetchWeather',
7 | ]
8 |
9 | from .base import BaseService
10 | from .cities import FetchCities, InitCities
11 | from .weather import CollectScheduler, FetchWeather
12 |
--------------------------------------------------------------------------------
/docker-compose.yaml:
--------------------------------------------------------------------------------
1 | version: '3.8'
2 | name: weather-collector-docker-compose
3 | services:
4 |
5 | db:
6 | image: postgres:13.0-alpine
7 | volumes:
8 | - postgres:/var/lib/postgresql/data/
9 | env_file:
10 | - ./prod.env
11 |
12 | worker:
13 | build: .
14 | stdin_open: true
15 | tty: true
16 | restart: on-failure
17 | depends_on:
18 | - db
19 | env_file:
20 | - ./prod.env
21 |
22 | volumes:
23 | postgres:
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | """
2 | Main config file for pytest.
3 | """
4 |
5 | import pytest
6 |
7 | from collector.functools import init_logger
8 |
9 | pytest_plugins = [
10 | 'tests.fixtures.fixture_db',
11 | 'tests.fixtures.fixture_config',
12 | 'tests.fixtures.fixture_cities',
13 | ]
14 |
15 | logger = init_logger('pytest', 'DEBUG')
16 |
17 |
18 | @pytest.fixture(autouse=True)
19 | def new_line():
20 | """
21 | Fixture simple makes new line to separate each test logging output.
22 | """
23 | print('\n')
24 |
--------------------------------------------------------------------------------
/cspell.config.yaml:
--------------------------------------------------------------------------------
1 | version: "0.2"
2 | ignorePaths:
3 | - .gitignore
4 | - requirements.txt
5 | - alembic*
6 | allowCompoundWords: true
7 | dictionaries:
8 | - python
9 | words:
10 | - apscheduler
11 | - clsname
12 | - grnd
13 | - isort
14 | - isready
15 | - Misha
16 | - mypy
17 | - ondelete
18 | - onupdate
19 | - psycopg
20 | - pydantic
21 | - pytest
22 | - sqlalchemy
23 | - testdb
24 | - venv
25 | - vybornyy
26 | ignoreWords: []
27 | import: []
28 | enableFiletypes:
29 | - dockercompose
30 | - ini
31 | - makefile
32 | - markdown
33 | - toml
34 |
--------------------------------------------------------------------------------
/collector/functools.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from typing import Literal
3 |
4 |
5 | def init_logger(
6 | name: str, level: int | Literal['DEBUG', 'INFO', 'WARNING', 'ERROR'] = logging.INFO
7 | ):
8 | """
9 | Configure and get logger by provided name.
10 | """
11 | if isinstance(level, str):
12 | level = getattr(logging, level)
13 |
14 | logger = logging.getLogger(name)
15 | logger.setLevel(level)
16 | handler = logging.StreamHandler()
17 | handler.setFormatter(logging.Formatter('%(levelname)s - %(message)s'))
18 | logger.addHandler(handler)
19 | return logger
20 |
--------------------------------------------------------------------------------
/alembic/script.py.mako:
--------------------------------------------------------------------------------
1 | """${message}
2 |
3 | Revision ID: ${up_revision}
4 | Revises: ${down_revision | comma,n}
5 | Create Date: ${create_date}
6 |
7 | """
8 | from alembic import op
9 | import sqlalchemy as sa
10 | ${imports if imports else ""}
11 |
12 | # revision identifiers, used by Alembic.
13 | revision = ${repr(up_revision)}
14 | down_revision = ${repr(down_revision)}
15 | branch_labels = ${repr(branch_labels)}
16 | depends_on = ${repr(depends_on)}
17 |
18 |
19 | def upgrade() -> None:
20 | ${upgrades if upgrades else "pass"}
21 |
22 |
23 | def downgrade() -> None:
24 | ${downgrades if downgrades else "pass"}
25 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | default_language_version:
2 | python: python3.10.4
3 | repos:
4 | - repo: https://github.com/PyCQA/autoflake
5 | rev: v2.0.0
6 | hooks:
7 | - id: autoflake
8 | exclude: alembic/
9 |
10 | - repo: https://github.com/pycqa/isort
11 | rev: 5.10.1
12 | hooks:
13 | - id: isort
14 | name: isort (python)
15 |
16 | - repo: https://github.com/psf/black
17 | rev: 22.10.0
18 | hooks:
19 | - id: black
20 | exclude: alembic/
21 |
22 | - repo: https://github.com/streetsidesoftware/cspell-cli
23 | rev: v6.2.0
24 | hooks:
25 | - id: cspell
--------------------------------------------------------------------------------
/manage.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | from apscheduler.schedulers.blocking import BlockingScheduler
4 |
5 | from collector.configurations import logger
6 | from collector.services import BaseService
7 |
8 |
9 | def main():
10 | options = sys.argv[1:] # the first arg is a 'manage.py', skipping it
11 | try:
12 | service = BaseService.manage_services(options)
13 | except (KeyboardInterrupt, SystemExit) as e:
14 | if options in ['--help', '-h']:
15 | logger.info(BaseService.get_descriptions())
16 | raise e
17 |
18 | service.execute()
19 |
20 |
21 | if __name__ == '__main__':
22 | main()
23 |
--------------------------------------------------------------------------------
/collector/exceptions.py:
--------------------------------------------------------------------------------
1 | class CollectorBaseException(Exception):
2 | message: str = ''
3 |
4 | def __init__(self, *args: object, msg: str = '') -> None:
5 | self.message = msg or self.message
6 | super().__init__(*args)
7 |
8 | def __str__(self) -> str:
9 | return self.message + f'Details: {self.args}'
10 |
11 |
12 | class ResponseError(CollectorBaseException):
13 | message = 'Unexpected response. '
14 |
15 |
16 | class ResponseSchemaError(ResponseError):
17 | message = 'Unexpected response data schema. '
18 |
19 |
20 | class NoDataError(CollectorBaseException):
21 | message = 'No data provided. '
22 |
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | python_files =
3 | test_*.py
4 |
5 | ; run pytest with "-m marker_name" to run current test
6 | ; deselect with '-m "not marker_name"'
7 | markers =
8 | debug_this,
9 | slow,
10 | postgres
11 |
12 | ; apply docstring tests
13 | ; ignore tests with "slow" marker and tests/tools.py module
14 | addopts = --doctest-modules -m "not slow" -m "postgres" --ignore="alembic"
15 | ; --ignore-glob="*base.py" ; for regex patterns
16 |
17 | ; specify source directory and append it to python pathes
18 | ; (the same configuration could be applied at main 'config.py' file)
19 | ; pythonpath = . src
20 |
21 | # pytest configuration for Django project:
22 | #
23 | # ; pytest-django settings:
24 | # DJANGO_SETTINGS_MODULE = main.settings
25 |
26 | # ; django-pytest can not fined django project automatically, because of nested folders structure
27 | # ; so specify path to manage.py file. see docs to get detail about pythonpath:
28 | # ; https://pytest-django.readthedocs.io/en/latest/managing_python_path.html
29 | # django_find_project = false
30 | # pythonpath = . apps
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Misha Vybornyy
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | aiohttp==3.8.3
2 | aiosignal==1.3.1
3 | alembic==1.8.1
4 | APScheduler==3.9.1.post1
5 | async-timeout==4.0.2
6 | attrs==22.1.0
7 | autoflake==1.7.7
8 | black==22.6.0
9 | certifi==2022.9.24
10 | charset-normalizer==2.1.1
11 | click==8.1.3
12 | coverage==6.5.0
13 | exceptiongroup==1.0.2
14 | flake8==5.0.4
15 | frozenlist==1.3.3
16 | greenlet==2.0.1
17 | idna==3.4
18 | importlib-metadata==3.4.0
19 | iniconfig==1.1.1
20 | isort==5.10.1
21 | Mako==1.2.4
22 | MarkupSafe==2.1.1
23 | mccabe==0.7.0
24 | multidict==6.0.2
25 | mypy==0.971
26 | mypy-extensions==0.4.3
27 | packaging==21.3
28 | pathspec==0.9.0
29 | platformdirs==2.5.2
30 | pluggy==1.0.0
31 | psycopg2-binary==2.9.5
32 | pycodestyle==2.9.1
33 | pydantic==1.10.2
34 | pyflakes==2.5.0
35 | pyparsing==3.0.9
36 | pytest==7.2.0
37 | pytest-cov==4.0.0
38 | python-dotenv==0.21.0
39 | pytz==2022.6
40 | pytz-deprecation-shim==0.1.0.post0
41 | requests==2.28.1
42 | six==1.16.0
43 | SQLAlchemy==1.4.44
44 | tomli==2.0.1
45 | typed-ast==1.5.4
46 | types-pytz==2022.6.0.1
47 | types-requests==2.28.11.5
48 | types-urllib3==1.26.25.4
49 | typing_extensions==4.3.0
50 | tzdata==2022.6
51 | tzlocal==4.2
52 | urllib3==1.26.12
53 | yarl==1.8.1
54 | zipp==3.8.1
55 |
--------------------------------------------------------------------------------
/tests/fixtures/fixture_cities.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | import pytest
4 | import sqlalchemy.orm as orm
5 |
6 | from collector import models
7 | from collector.configurations import CollectorConfig
8 |
9 |
10 | @pytest.fixture
11 | def cities_list():
12 | return [
13 | {'name': 'Shanghai'},
14 | {'name': 'Istanbul'},
15 | {'name': 'Tokyo'},
16 | {'name': 'Moscow'},
17 | {'name': 'Entebbe'}, # small African city
18 | ]
19 |
20 |
21 | @pytest.fixture
22 | def broken_cities_file(config: CollectorConfig):
23 | invalid_schema = [
24 | {'name': 'Moscow'},
25 | {'no_name': 'no_city'},
26 | ]
27 | with open(config.cities_file, 'w+', encoding='utf-8') as file:
28 | json.dump(invalid_schema, file)
29 |
30 |
31 | @pytest.fixture
32 | def cities_file(config: CollectorConfig, cities_list: list[dict]):
33 | with open(config.cities_file, 'w+', encoding='utf-8') as file:
34 | json.dump(cities_list, file)
35 | return cities_list
36 |
37 |
38 | @pytest.fixture
39 | def seed_cities_to_database(cities_list, session: orm.Session):
40 | session.add_all([models.CityModel(**city) for city in cities_list])
41 | session.commit()
42 |
--------------------------------------------------------------------------------
/makefile:
--------------------------------------------------------------------------------
1 | # This file presents CLI shortcuts.
2 | # Go there to find more details: https://makefiletutorial.com/#variables
3 |
4 | migrations:
5 | docker-compose exec worker alembic upgrade head
6 |
7 | initialization:
8 | docker-compose exec worker python manage.py fetch_cities
9 |
10 | collecting:
11 | docker-compose exec worker python manage.py collect
12 |
13 | app:
14 | docker-compose build
15 | docker-compose up -d
16 | make migrations
17 | make initialization
18 | make collecting
19 |
20 |
21 | ######################################################################
22 | # development tools
23 | ######################################################################
24 |
25 |
26 | format:
27 | @autoflake --remove-all-unused-imports -vv --ignore-init-module-imports -r .
28 | @echo "make format is calling for autoflake, which will remove all unused imports listed above. Are you sure?"
29 | @echo "Enter to proceed. Ctr-C to abort."
30 | @read
31 | autoflake --in-place --remove-all-unused-imports --ignore-init-module-imports -r .
32 | black .
33 | isort .
34 | mypy .
35 | flake8 .
36 |
37 |
38 | push:
39 | @git status
40 | @echo "All files listed above will be added to commit. Enter commit message to proceed. Ctr-C to abort."
41 | @read -p "Commit message: " COMMIT_MESSAGE; git add . ; git commit -m "$$COMMIT_MESSAGE"
42 | @git push
43 |
44 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.mypy]
2 | ignore_missing_imports = true
3 |
4 | [tool.black]
5 | # line-length = 79 # max line length
6 | skip-string-normalization = 1 # disable converting single quote to double
7 | # by default black exclude all .gitignore files. here is regex pattern to extend this list
8 | # NOTE! list of files name is not supported, only regex patterns
9 | # WARNING! also pass this list of files to pre-commit-config.
10 | extend-exclude = 'alembic/'
11 |
12 |
13 | [tool.isort]
14 | skip_glob = '*tmp*' # ignoring tmp files
15 | profile = 'black'
16 |
17 |
18 | [tool.autoflake]
19 | exclude = ['venv', 'alembic', '*tmp*'] # WARNING! also pass this list of files to pre-commit-config.
20 |
21 |
22 | [tool.flake8]
23 | max-line-length = 88 # PEP8: 79 | black: 88 | google: 120
24 | extend-ignore = [
25 | 'E731', # assignment lambda to variable - okay
26 | 'E501', # line too long
27 | # (Black is used to forcing length to N symbols, but when it can't be done - we leave it as is)
28 | 'E203', # whitespaces before slice : seq[1 :2]
29 | 'N805', # first argument of a method should be named 'self' (to handle pydantic validator methods)
30 | 'N815', # because of contryCode field
31 | 'R504', # unnecessary variable assignment before return statement
32 | ]
33 | per-file-ignores = [
34 | '*/__init__.py:F401', # imported but unused
35 | ]
36 | exclude = [
37 | 'venv/*',
38 | 'alembic/*',
39 | '*tmp*',
40 | '*untitled*',
41 | ]
--------------------------------------------------------------------------------
/tests/fixtures/fixture_db.py:
--------------------------------------------------------------------------------
1 | from typing import Type
2 |
3 | import pytest
4 | import sqlalchemy as db
5 | import sqlalchemy.orm as orm
6 |
7 | import collector
8 | from collector import models
9 | from collector.configurations import CollectorConfig
10 | from tests import logger
11 |
12 |
13 | @pytest.fixture # (scope="session")
14 | def engine(config: CollectorConfig):
15 | logger.debug(f'engine fixture. bind to: {config.db.url}')
16 | return db.create_engine(config.db.url, future=True, echo=False)
17 |
18 |
19 | @pytest.fixture
20 | def patch_engine(monkeypatch: pytest.MonkeyPatch, engine: db.engine.Engine):
21 | logger.debug('patch_engine fixture')
22 | monkeypatch.setattr(collector.session, 'engine', engine)
23 |
24 |
25 | @pytest.fixture # (scope="session")
26 | def setup_database(engine: db.engine.Engine, patch_engine):
27 | logger.debug('setup_database fixture')
28 |
29 | models.Base.metadata.drop_all(engine) # clear leftovers from previous broken tests
30 | models.Base.metadata.create_all(engine)
31 | yield
32 | logger.debug(engine.pool.status())
33 | models.Base.metadata.drop_all(engine)
34 |
35 |
36 | @pytest.fixture
37 | def session_class():
38 | logger.debug('session_class fixture')
39 | return orm.Session # return default Session, not from orn.session_maker (for now)
40 |
41 |
42 | @pytest.fixture
43 | def session(engine: db.engine.Engine, session_class: Type[orm.Session]):
44 | logger.debug('opened session fixture')
45 | with session_class(engine) as session:
46 | yield session
47 | session.commit()
48 |
--------------------------------------------------------------------------------
/tests/fixtures/fixture_config.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pydantic
4 | import pytest
5 |
6 | from collector.configurations import (
7 | CONFIG,
8 | CollectorConfig,
9 | DatabaseConfig,
10 | SQLiteDatabaseConfig,
11 | )
12 | from tests import logger
13 |
14 | TEST_CITIES_FILE = os.path.join(os.path.dirname(__file__), 'testcities.json')
15 | TEST_DB_FILE = os.path.join(os.path.dirname(__file__), 'testdb.sqlite3')
16 |
17 |
18 | @pytest.fixture(
19 | scope="session",
20 | params=[
21 | pytest.param(SQLiteDatabaseConfig(path=TEST_DB_FILE), id='sqlite'),
22 | pytest.param(
23 | DatabaseConfig(
24 | user='test',
25 | password='test',
26 | host='localhost',
27 | database='test',
28 | ),
29 | id='postgres',
30 | marks=pytest.mark.postgres,
31 | ),
32 | ],
33 | )
34 | def config(request: pytest.FixtureRequest):
35 | db_config: pydantic.BaseSettings = request.param
36 | config = CollectorConfig(
37 | debug=False,
38 | cities_amount=20,
39 | cities_file=TEST_CITIES_FILE,
40 | collect_weather_delay=0.5,
41 | retry_collect_delay=1,
42 | db=db_config.dict(),
43 | )
44 | logger.debug(f'Running tests under those configurations: {config}')
45 | return config
46 |
47 |
48 | @pytest.fixture
49 | def mock_config(monkeypatch: pytest.MonkeyPatch, config: CollectorConfig):
50 | """
51 | Patching collector config file and restore test files ('cities.json')
52 | """
53 | cities = config.cities_file
54 | if os.path.isfile(cities):
55 | logger.warning(f'Test begins with already existing {cities}. ')
56 |
57 | for field in CollectorConfig.__fields__:
58 | monkeypatch.setattr(CONFIG, field, getattr(config, field))
59 | yield
60 |
61 | if os.path.isfile(cities):
62 | os.remove(cities)
63 |
--------------------------------------------------------------------------------
/alembic/env.py:
--------------------------------------------------------------------------------
1 | from logging.config import fileConfig
2 |
3 | from sqlalchemy import engine_from_config, pool
4 |
5 | from alembic import context
6 | from collector.configurations import CONFIG
7 | from collector.models import Base
8 |
9 | # this is the Alembic Config object, which provides
10 | # access to the values within the .ini file in use.
11 | config = context.config
12 | config.set_main_option('sqlalchemy.url', CONFIG.db.url)
13 |
14 | # Interpret the config file for Python logging.
15 | # This line sets up loggers basically.
16 | if config.config_file_name is not None:
17 | fileConfig(config.config_file_name)
18 |
19 | # add your model's MetaData object here
20 | # for 'autogenerate' support
21 | # from myapp import mymodel
22 | # target_metadata = mymodel.Base.metadata
23 | target_metadata = Base.metadata
24 |
25 | # other values from the config, defined by the needs of env.py,
26 | # can be acquired:
27 | # my_important_option = config.get_main_option("my_important_option")
28 | # ... etc.
29 |
30 |
31 | def run_migrations_offline() -> None:
32 | """Run migrations in 'offline' mode.
33 |
34 | This configures the context with just a URL
35 | and not an Engine, though an Engine is acceptable
36 | here as well. By skipping the Engine creation
37 | we don't even need a DBAPI to be available.
38 |
39 | Calls to context.execute() here emit the given string to the
40 | script output.
41 |
42 | """
43 | url = config.get_main_option("sqlalchemy.url")
44 | context.configure(
45 | url=url,
46 | target_metadata=target_metadata,
47 | literal_binds=True,
48 | dialect_opts={"paramstyle": "named"},
49 | render_as_batch=True, # special option for sqlite alter column migration
50 | )
51 |
52 | with context.begin_transaction():
53 | context.run_migrations()
54 |
55 |
56 | def run_migrations_online() -> None:
57 | """Run migrations in 'online' mode.
58 |
59 | In this scenario we need to create an Engine
60 | and associate a connection with the context.
61 |
62 | """
63 | connectable = engine_from_config(
64 | config.get_section(config.config_ini_section),
65 | prefix="sqlalchemy.",
66 | poolclass=pool.NullPool,
67 | )
68 |
69 | with connectable.connect() as connection:
70 | context.configure(
71 | connection=connection,
72 | target_metadata=target_metadata,
73 | render_as_batch=True, # special option for sqlite alter column migration
74 | )
75 |
76 | with context.begin_transaction():
77 | context.run_migrations()
78 |
79 |
80 | if context.is_offline_mode():
81 | run_migrations_offline()
82 | else:
83 | run_migrations_online()
84 |
--------------------------------------------------------------------------------
/collector/services/base.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import argparse
4 | from http import HTTPStatus
5 | from typing import Generic, Iterable, Type, TypeVar
6 |
7 | import requests
8 | from pydantic import BaseModel, ValidationError, parse_obj_as
9 |
10 | from collector.configurations import CONFIG, logger
11 | from collector.exceptions import ResponseError, ResponseSchemaError
12 |
13 | _SchemaType = TypeVar('_SchemaType', bound=BaseModel | Iterable[BaseModel])
14 | """
15 | Bounded TypeVar for Generic classes that takes any subtype of BaseModel class.
16 | Also bound to Iterable, because JSON response could be a `list[BaseModel]`.
17 | """
18 |
19 |
20 | class BaseService:
21 | command: str = 'service_name'
22 | "Command name to run service in command line. "
23 |
24 | def __init__(self, **kwargs) -> None:
25 | self.init_kwargs = kwargs
26 |
27 | @classmethod
28 | def manage_services(cls, argv: list[str]):
29 | """
30 | Parsing command line args and getting service initialized with thouse args.
31 | """
32 | parser = argparse.ArgumentParser(description='Weather Collector. ')
33 | parser.add_argument(
34 | 'service',
35 | type=str,
36 | help='service to proceed',
37 | choices=[service.command for service in cls.__subclasses__()],
38 | )
39 | for service in cls.__subclasses__():
40 | service.add_argument(parser)
41 |
42 | args = parser.parse_args(argv)
43 | service_class = cls.get_service(command=args.service)
44 | return service_class(**dict(args._get_kwargs()))
45 |
46 | @classmethod
47 | def get_service(cls, *, command: str):
48 | """
49 | Get collector service by provided command name.
50 | """
51 | filtered = filter(
52 | lambda service: service.command == command, cls.__subclasses__()
53 | )
54 | try:
55 | return next(filtered)
56 | except StopIteration:
57 | raise ValueError(f'No service with this command: {command}. ')
58 |
59 | @classmethod
60 | def get_descriptions(cls):
61 | return 'Collect Weather services description: \n' + '\n'.join(
62 | [
63 | f'{service.command}:\t{service.__doc__}'
64 | for service in cls.__subclasses__()
65 | ]
66 | )
67 |
68 | @classmethod
69 | def add_argument(cls, parser: argparse.ArgumentParser):
70 | pass
71 |
72 | def execute(self):
73 | logger.info(f'{self} is running. ')
74 |
75 | def __str__(self) -> str:
76 | return f'<{self.__class__.__name__}>'
77 |
78 |
79 | class FetchServiceMixin(Generic[_SchemaType]):
80 | url: str = ''
81 | params: dict = {
82 | "appid": CONFIG.open_weather_key,
83 | "units": "metric",
84 | }
85 | schema: Type[_SchemaType]
86 | "Pydantic Model to parse response JSON data. Must be defined at inhereted classes. "
87 |
88 | def fetch(self) -> _SchemaType:
89 | self.response = requests.get(self.url, self.params)
90 |
91 | if self.response.status_code != HTTPStatus.OK:
92 | raise ResponseError(self.response, self.response.json())
93 | if not getattr(self, 'schema', None):
94 | return self.response.json()
95 |
96 | try:
97 | instance = parse_obj_as(self.schema, self.response.json())
98 | except ValidationError as e:
99 | raise ResponseSchemaError(e)
100 |
101 | return instance
102 |
--------------------------------------------------------------------------------
/alembic.ini:
--------------------------------------------------------------------------------
1 | # A generic, single database configuration.
2 |
3 | [alembic]
4 | # path to migration scripts
5 | script_location = alembic
6 |
7 | # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
8 | # Uncomment the line below if you want the files to be prepended with date and time
9 | # see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
10 | # for all available tokens
11 | # file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
12 |
13 | # sys.path path, will be prepended to sys.path if present.
14 | # defaults to the current working directory.
15 | prepend_sys_path = .
16 |
17 | # timezone to use when rendering the date within the migration file
18 | # as well as the filename.
19 | # If specified, requires the python-dateutil library that can be
20 | # installed by adding `alembic[tz]` to the pip requirements
21 | # string value is passed to dateutil.tz.gettz()
22 | # leave blank for localtime
23 | # timezone =
24 |
25 | # max length of characters to apply to the
26 | # "slug" field
27 | # truncate_slug_length = 40
28 |
29 | # set to 'true' to run the environment during
30 | # the 'revision' command, regardless of autogenerate
31 | # revision_environment = false
32 |
33 | # set to 'true' to allow .pyc and .pyo files without
34 | # a source .py file to be detected as revisions in the
35 | # versions/ directory
36 | # sourceless = false
37 |
38 | # version location specification; This defaults
39 | # to alembic/versions. When using multiple version
40 | # directories, initial revisions must be specified with --version-path.
41 | # The path separator used here should be the separator specified by "version_path_separator" below.
42 | # version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
43 |
44 | # version path separator; As mentioned above, this is the character used to split
45 | # version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
46 | # If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
47 | # Valid values for version_path_separator are:
48 | #
49 | # version_path_separator = :
50 | # version_path_separator = ;
51 | # version_path_separator = space
52 | version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
53 |
54 | # the output encoding used when revision files
55 | # are written from script.py.mako
56 | # output_encoding = utf-8
57 |
58 |
59 | sqlalchemy.url = ... # defined at env.py
60 |
61 | [post_write_hooks]
62 | # post_write_hooks defines scripts or Python functions that are run
63 | # on newly generated revision scripts. See the documentation for further
64 | # detail and examples
65 |
66 | # format using "black" - use the console_scripts runner, against the "black" entrypoint
67 | # hooks = black
68 | # black.type = console_scripts
69 | # black.entrypoint = black
70 | # black.options = -l 79 REVISION_SCRIPT_FILENAME
71 |
72 | # Logging configuration
73 | [loggers]
74 | keys = root,sqlalchemy,alembic
75 |
76 | [handlers]
77 | keys = console
78 |
79 | [formatters]
80 | keys = generic
81 |
82 | [logger_root]
83 | level = WARN
84 | handlers = console
85 | qualname =
86 |
87 | [logger_sqlalchemy]
88 | level = WARN
89 | handlers =
90 | qualname = sqlalchemy.engine
91 |
92 | [logger_alembic]
93 | level = INFO
94 | handlers =
95 | qualname = alembic
96 |
97 | [handler_console]
98 | class = StreamHandler
99 | args = (sys.stderr,)
100 | level = NOTSET
101 | formatter = generic
102 |
103 | [formatter_generic]
104 | format = %(levelname)-5.5s [%(name)s] %(message)s
105 | datefmt = %H:%M:%S
106 |
--------------------------------------------------------------------------------
/collector/session.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import functools
4 | import inspect
5 | from typing import Callable, Type
6 |
7 | import pydantic
8 | import sqlalchemy as db
9 | import sqlalchemy.orm as orm
10 |
11 | from collector.configurations import CONFIG, logger
12 | from collector.models import BaseModel
13 |
14 | try:
15 | logger.debug(f'Establishing (lazy) connection to database: {CONFIG.db.url}')
16 | engine = db.create_engine(CONFIG.db.url, future=True, echo=CONFIG.db.echo)
17 | except Exception as e:
18 | logger.critical(f'Connection failed. Check your database is running: {CONFIG.db}')
19 | raise e
20 |
21 |
22 | def session_enter(wrapped: Callable):
23 | @functools.wraps(wrapped)
24 | def wrapper(self: DBSessionMixin, *args, **kwargs):
25 | self.session = orm.Session(engine)
26 | logger.debug(f'Session is open with {engine=}. ')
27 | return wrapped(self, *args, **kwargs)
28 |
29 | return wrapper
30 |
31 |
32 | def session_exit(wrapped: Callable):
33 | @functools.wraps(wrapped)
34 | def wrapper(self: DBSessionMixin, *args, **kwargs):
35 | result = wrapped(self, *args, **kwargs)
36 | self.session.commit()
37 | self.session.close()
38 | logger.debug('Session is closed. ')
39 | return result
40 |
41 | return wrapper
42 |
43 |
44 | def safe_transaction(wrapped: Callable):
45 | @functools.wraps(wrapped)
46 | def wrapper(self: DBSessionMixin, *args, **kwargs):
47 | try:
48 | return wrapped(self, *args, **kwargs)
49 | except Exception as e:
50 | logger.debug(f'Transaction is rolling back. Exception: {e}')
51 | self.session.rollback()
52 | raise e
53 |
54 | return wrapper
55 |
56 |
57 | class DBSessionMeta(type):
58 | """
59 | Create class which operates as session context manager.
60 |
61 | Meta is for wrapping all class methods into `safe_transaction` decorator.
62 | And for wrapping enter method for `Session()` opening and exit method for
63 | `session.commit()`, `session.close()`.
64 | """
65 |
66 | session_enter_method = '__init__'
67 | session_exit_method = 'execute'
68 |
69 | def __new__(cls, clsname: str, bases: tuple, attrs: dict):
70 | for key, value in attrs.items():
71 | if inspect.isfunction(value):
72 | attrs[key] = safe_transaction(value)
73 |
74 | if key == cls.session_enter_method:
75 | attrs[key] = session_enter(attrs[key])
76 | if key == cls.session_exit_method:
77 | attrs[key] = session_exit(attrs[key])
78 |
79 | return type.__new__(cls, clsname, bases, attrs)
80 |
81 |
82 | class DBSessionMixin(metaclass=DBSessionMeta):
83 | """
84 | Mixin for handling usual CRUD operations with database.
85 | Session is opening at class init and closing when `save()` is called. For commit any
86 | changes `save()` method must by called.
87 | """
88 |
89 | session: orm.Session
90 |
91 | def query(self, model_class: Type[BaseModel]):
92 | return self.session.query(model_class)
93 |
94 | def create(self, *instances: BaseModel):
95 | self.session.add_all(instances)
96 |
97 | def create_from_schema(
98 | self, model_class: Type[BaseModel], *instances: pydantic.BaseModel
99 | ):
100 | self.create(*[model_class(**instance.dict()) for instance in instances])
101 |
102 | def delete(self, obj: BaseModel | Type[BaseModel]):
103 | """
104 | Delete one model instance. Or all records at table if `obj` is a Model Class.
105 | """
106 | if isinstance(obj, BaseModel):
107 | return self.session.delete(obj)
108 | if isinstance(obj, type):
109 | return self.session.query(obj).delete()
110 | raise ValueError
111 |
--------------------------------------------------------------------------------
/collector/configurations.py:
--------------------------------------------------------------------------------
1 | from pprint import pformat
2 |
3 | import pydantic
4 |
5 | from collector.functools import init_logger
6 |
7 | logger = init_logger('weather-collector', 'INFO')
8 |
9 |
10 | class DatabaseConfig(pydantic.BaseModel):
11 | dialect: str = 'postgresql'
12 | driver: str | None = 'psycopg2'
13 | user: str
14 | password: str
15 | host: str = 'db'
16 | port: int = 5432
17 | database: str = 'default'
18 | echo: bool = False
19 |
20 | @property
21 | def url(self):
22 | driver = f'+{self.driver}' if self.driver else ''
23 | return (
24 | f'{self.dialect}{driver}:'
25 | f'//{self.user}:{self.password}@{self.host}:{self.port}/{self.database}'
26 | )
27 |
28 |
29 | class SQLiteDatabaseConfig(pydantic.BaseModel):
30 | path: str = 'db.sqlite3'
31 | echo: bool = False
32 |
33 | @property
34 | def url(self):
35 | return f'sqlite:///{self.path}'
36 |
37 |
38 | class CollectorConfig(pydantic.BaseSettings):
39 | """
40 | debug: `bool`
41 | true: force using SQLite instead of postgreSQL (even if it defined at .env)
42 | cities_amount: `int` = 50
43 | Amount for auto-initial cities list by fetching them from GeoDB.
44 | cities_file: `str` = 'cities.json'
45 | File to describe which cities weather collector fetching data for.
46 | collect_weather_delay: `float` = 1 * 60 * 60
47 | Delay between every weather measurement. Seconds. Default: 1 hour.
48 | open_weather_key: `str`
49 | Open Weather API key. Open Weather could be used under FREE plan. Restrictions:
50 | - 60 calls/minute
51 | - 1,000,000 calls/month
52 | """
53 |
54 | debug: bool
55 |
56 | cities_amount: int = 50
57 | cities_file: str = 'cities.json'
58 | collect_weather_delay: float = 1 * 60 * 60
59 | retry_collect_delay: float = 3
60 | open_weather_key: str
61 |
62 | POSTGRES_USER: str | None = None
63 | POSTGRES_PASSWORD: str | None = None
64 | POSTGRES_DB: str | None = None
65 | POSTGRES_HOST: str | None = None
66 |
67 | db: DatabaseConfig | SQLiteDatabaseConfig = SQLiteDatabaseConfig()
68 |
69 | @pydantic.validator('db', pre=True)
70 | def debug_mode_database_sqlite(cls, db: dict, values: dict):
71 | if not isinstance(db, dict):
72 | return values
73 |
74 | # at this point we can not check is postgres variables was loaded
75 | # because they could be uploaded from 'prod.env' but we are using 'debug.env'.
76 | # therefore for debug mode sqlite is always used.
77 | if values.get('debug'):
78 | for field in DatabaseConfig.__fields__:
79 | db.pop(field, None)
80 | return db
81 |
82 | @pydantic.root_validator(pre=True)
83 | def make_config_fields_equal_to_postgres_variables(cls, values: dict):
84 | db: dict = values.get('db', {})
85 | if not isinstance(db, dict):
86 | return values
87 |
88 | for postgres_field, config_field in zip(
89 | ['POSTGRES_USER', 'POSTGRES_PASSWORD', 'POSTGRES_DB', 'POSTGRES_HOST'],
90 | ['user', 'password', 'database', 'host'],
91 | ):
92 | if values.get(postgres_field):
93 | db.setdefault(config_field, values.get(postgres_field))
94 |
95 | values['db'] = db
96 | return values
97 |
98 | def __str__(self) -> str:
99 | return '\n' + pformat(self.dict())
100 |
101 | class Config:
102 | # debug.env and .env has more higher priority than prod.env
103 | # describe production build at prod.env or .env (debug.env in .dockerignore)
104 | env_file = 'prod.env', 'debug.env', '.env'
105 | env_nested_delimiter = '__'
106 |
107 |
108 | try:
109 | CONFIG = CollectorConfig()
110 | logger.debug(f'Running collector under this configurations: {CONFIG}')
111 | except Exception as e:
112 | raise RuntimeError(
113 | f'Init configurations fails. Ensure to have ".env" file. Details: {e}'
114 | )
115 |
--------------------------------------------------------------------------------
/alembic/versions/0f1755b76fbf_initial.py:
--------------------------------------------------------------------------------
1 | """initial
2 |
3 | Revision ID: 0f1755b76fbf
4 | Revises:
5 | Create Date: 2022-11-24 11:12:04.162960
6 |
7 | """
8 | import sqlalchemy as sa
9 |
10 | from alembic import op
11 |
12 | # revision identifiers, used by Alembic.
13 | revision = '0f1755b76fbf'
14 | down_revision = None
15 | branch_labels = None
16 | depends_on = None
17 |
18 |
19 | def upgrade() -> None:
20 | # ### commands auto generated by Alembic - please adjust! ###
21 | op.create_table('city',
22 | sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
23 | sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('(CURRENT_TIMESTAMP)'), nullable=True),
24 | sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
25 | sa.Column('name', sa.String(length=50), nullable=False),
26 | sa.Column('is_tracked', sa.Boolean(), nullable=False),
27 | sa.Column('country', sa.String(length=50), nullable=True),
28 | sa.Column('countryCode', sa.String(length=3), nullable=True),
29 | sa.Column('latitude', sa.Float(), nullable=True),
30 | sa.Column('longitude', sa.Float(), nullable=True),
31 | sa.Column('population', sa.Integer(), nullable=True),
32 | sa.PrimaryKeyConstraint('id'),
33 | sa.UniqueConstraint('id')
34 | )
35 | op.create_table('weather_measurement',
36 | sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
37 | sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('(CURRENT_TIMESTAMP)'), nullable=True),
38 | sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
39 | sa.Column('city_id', sa.Integer(), nullable=False),
40 | sa.Column('measure_at', sa.DateTime(), nullable=False, comment='Time of data forecasted. UTC. Do not confuse with base model `created_at` field.'),
41 | sa.ForeignKeyConstraint(['city_id'], ['city.id'], onupdate='CASCADE', ondelete='CASCADE'),
42 | sa.PrimaryKeyConstraint('id'),
43 | sa.UniqueConstraint('id')
44 | )
45 | op.create_table('extra_weather_data',
46 | sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
47 | sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('(CURRENT_TIMESTAMP)'), nullable=True),
48 | sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
49 | sa.Column('measurement_id', sa.Integer(), nullable=True),
50 | sa.Column('data', sa.JSON(), nullable=True),
51 | sa.ForeignKeyConstraint(['measurement_id'], ['weather_measurement.id'], ),
52 | sa.PrimaryKeyConstraint('id'),
53 | sa.UniqueConstraint('id')
54 | )
55 | op.create_table('main_weather_measurement',
56 | sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
57 | sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('(CURRENT_TIMESTAMP)'), nullable=True),
58 | sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
59 | sa.Column('measurement_id', sa.Integer(), nullable=True),
60 | sa.Column('temp', sa.Float(), nullable=True, comment='Temperature. Celsius.'),
61 | sa.Column('feels_like', sa.Float(), nullable=True, comment='This temperature parameter accounts for the human perception of weather. Celsius.'),
62 | sa.Column('temp_min', sa.Float(), nullable=True, comment='Minimum temperature at the moment. This is minimal currently observed temperature (within large megalopolises and urban areas). Celsius.'),
63 | sa.Column('temp_max', sa.Float(), nullable=True, comment='Maximum temperature at the moment. This is maximal currently observed temperature (within large megalopolises and urban areas). Celsius.'),
64 | sa.Column('pressure', sa.Integer(), nullable=True, comment='Atmospheric pressure (on the sea level, if there is no sea_level or grnd_level). hPa.'),
65 | sa.Column('humidity', sa.Integer(), nullable=True, comment='Humidity. %'),
66 | sa.Column('sea_level', sa.Integer(), nullable=True, comment='Atmospheric pressure on the sea level. hPa.'),
67 | sa.Column('grnd_level', sa.Integer(), nullable=True, comment='Atmospheric pressure on the ground level. hPa.'),
68 | sa.ForeignKeyConstraint(['measurement_id'], ['weather_measurement.id'], ),
69 | sa.PrimaryKeyConstraint('id'),
70 | sa.UniqueConstraint('id')
71 | )
72 | # ### end Alembic commands ###
73 |
74 |
75 | def downgrade() -> None:
76 | # ### commands auto generated by Alembic - please adjust! ###
77 | op.drop_table('main_weather_measurement')
78 | op.drop_table('extra_weather_data')
79 | op.drop_table('weather_measurement')
80 | op.drop_table('city')
81 | # ### end Alembic commands ###
82 |
--------------------------------------------------------------------------------
/collector/models.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from datetime import datetime
4 | from typing import TypeAlias
5 |
6 | import sqlalchemy as db
7 | from sqlalchemy import orm, sql
8 |
9 | Base: TypeAlias = orm.declarative_base() # type: ignore
10 |
11 |
12 | class BaseModel(Base):
13 | __abstract__ = True
14 |
15 | id: int = db.Column(
16 | db.Integer, nullable=False, unique=True, primary_key=True, autoincrement=True
17 | )
18 | created_at: datetime = db.Column(
19 | db.DateTime(timezone=True), server_default=sql.func.now()
20 | )
21 | updated_at: datetime = db.Column(
22 | db.DateTime(timezone=True), onupdate=sql.func.now()
23 | )
24 |
25 | def __repr__(self):
26 | return f'<{self.__class__.__name__}({self.id=})>'
27 |
28 |
29 | class CityModel(BaseModel):
30 | """
31 | City representation. Name is required, other optional.
32 | """
33 |
34 | __tablename__ = 'city'
35 |
36 | name: str = db.Column(db.String(50), nullable=False)
37 | is_tracked: bool = db.Column(db.Boolean, nullable=False, default=True)
38 | country: str = db.Column(db.String(50))
39 | countryCode: str = db.Column(db.String(3))
40 | latitude: float = db.Column(db.Float)
41 | longitude: float = db.Column(db.Float)
42 | population: int = db.Column(db.Integer)
43 |
44 | measurements: list[MeasurementModel] = orm.relationship(
45 | 'MeasurementModel',
46 | backref='city',
47 | cascade='all, delete-orphan',
48 | )
49 |
50 | def __str__(self) -> str:
51 | return self.name
52 |
53 |
54 | class MeasurementModel(BaseModel):
55 | """
56 | Open Weather API provides a lot of information about current city weather. Depending
57 | on location and current weather situation some fields could appear some other could
58 | not. For that situation we decided to store all root fields in separate tables.
59 |
60 | Why `MainWeatherDataModel`?
61 | The basic reason for collecting weather is understanding how to cool down company's
62 | servers. Therefore, we parsing and store `main` field that contains current
63 | temperature. All other data storing as json at `ExtraMeasurementDataModel` for any
64 | future purposes.
65 |
66 | We may describe other tables to store all the data in relational (SQL) way later, if
67 | we will need it.
68 | """
69 |
70 | __tablename__ = 'weather_measurement'
71 |
72 | city_id: int = db.Column(
73 | db.Integer,
74 | db.ForeignKey('city.id', onupdate='CASCADE', ondelete='CASCADE'),
75 | nullable=False,
76 | )
77 | measure_at: datetime = db.Column(
78 | db.DateTime,
79 | nullable=False,
80 | comment='Time of data forecasted. UTC. Do not confuse with base model `created_at` field.',
81 | )
82 | main: MainWeatherDataModel = orm.relationship(
83 | 'MainWeatherDataModel',
84 | uselist=False,
85 | backref='measurement',
86 | cascade='all, delete-orphan',
87 | )
88 |
89 | # [NOTE]
90 | # Other fields can be handled here as one-to-one relation to separate table, if
91 | # the reason is appear in a future.
92 | ...
93 |
94 | extra: ExtraWeatherDataModel = orm.relationship(
95 | 'ExtraWeatherDataModel',
96 | uselist=False,
97 | backref='measurement',
98 | cascade='all, delete-orphan',
99 | )
100 |
101 |
102 | class MainWeatherDataModel(BaseModel):
103 | """
104 | Data at `main` field from measurement response.
105 | """
106 |
107 | __tablename__ = 'main_weather_measurement'
108 |
109 | measurement_id = db.Column(db.Integer, db.ForeignKey('weather_measurement.id'))
110 |
111 | temp: float = db.Column(db.Float, comment='Temperature. Celsius.')
112 | feels_like: float = db.Column(
113 | db.Float,
114 | comment='This temperature parameter accounts for the human perception of weather. Celsius.',
115 | )
116 | temp_min: float = db.Column(
117 | db.Float,
118 | comment='Minimum temperature at the moment. This is minimal currently observed temperature (within large megalopolises and urban areas). Celsius.',
119 | )
120 | temp_max: float = db.Column(
121 | db.Float,
122 | comment='Maximum temperature at the moment. This is maximal currently observed temperature (within large megalopolises and urban areas). Celsius.',
123 | )
124 | pressure: int = db.Column(
125 | db.Integer,
126 | comment='Atmospheric pressure (on the sea level, if there is no sea_level or grnd_level). hPa.',
127 | )
128 | humidity: int = db.Column(db.Integer, comment='Humidity. %')
129 | sea_level: int = db.Column(
130 | db.Integer, comment='Atmospheric pressure on the sea level. hPa.'
131 | )
132 | grnd_level: int = db.Column(
133 | db.Integer, comment='Atmospheric pressure on the ground level. hPa.'
134 | )
135 |
136 |
137 | class ExtraWeatherDataModel(BaseModel):
138 | """
139 | Additional data from weather measurement.
140 | """
141 |
142 | __tablename__ = 'extra_weather_data'
143 |
144 | measurement_id = db.Column(db.Integer, db.ForeignKey('weather_measurement.id'))
145 | data: dict = db.Column(db.JSON)
146 |
--------------------------------------------------------------------------------
/tests/test_services.py:
--------------------------------------------------------------------------------
1 | import pydantic
2 | import pytest
3 | import sqlalchemy.orm as orm
4 |
5 | from collector.configurations import CONFIG, CollectorConfig
6 | from collector.exceptions import NoDataError
7 | from collector.models import CityModel, MeasurementModel
8 | from collector.services.cities import (
9 | CitySchema,
10 | FetchCities,
11 | FetchCoordinates,
12 | InitCities,
13 | )
14 | from collector.services.weather import CollectScheduler, FetchWeather, ReportWeather
15 |
16 |
17 | @pytest.mark.usefixtures('mock_config', 'setup_database')
18 | class TestServices:
19 |
20 | ####################################################################################
21 | # Init Cities Service
22 | ####################################################################################
23 |
24 | def test_init_cities_no_file_rises(self):
25 | with pytest.raises(NoDataError):
26 | InitCities().execute()
27 |
28 | def test_init_cities_broken_file_rises(self, broken_cities_file):
29 | with pytest.raises(pydantic.ValidationError):
30 | InitCities().execute()
31 |
32 | def test_init_cities_(self, session: orm.Session, cities_file: list):
33 | InitCities().execute()
34 | assert session.query(CityModel).count() == len(cities_file)
35 |
36 | ####################################################################################
37 | # Fetch Cities Service
38 | ####################################################################################
39 |
40 | @pytest.mark.parametrize(
41 | 'cities_names',
42 | [
43 | pytest.param(['Moscow', 'Tokyo', 'Shanghai', 'Istanbul']),
44 | ],
45 | )
46 | def test_fetch_cities_assert_cities_list(
47 | self,
48 | session: orm.Session,
49 | cities_names: list[str],
50 | ):
51 | """
52 | Test that biggest world cities appear in DB.
53 | """
54 | FetchCities().execute()
55 | for city in cities_names:
56 | assert session.query(CityModel).filter(CityModel.name == city).all()
57 |
58 | @pytest.mark.parametrize(
59 | 'amount',
60 | [
61 | pytest.param(1),
62 | pytest.param(17),
63 | pytest.param(100, marks=pytest.mark.slow),
64 | ],
65 | )
66 | def test_fetch_cities_assert_amounts(
67 | self,
68 | session: orm.Session,
69 | monkeypatch: pytest.MonkeyPatch,
70 | config: CollectorConfig,
71 | amount: int,
72 | ):
73 | monkeypatch.setattr(CONFIG, 'cities_amount', amount)
74 |
75 | FetchCities().execute()
76 |
77 | assert session.query(CityModel).count() == amount
78 | cities_from_file = pydantic.parse_file_as(list[CitySchema], config.cities_file)
79 | assert len(cities_from_file) == amount
80 |
81 | def test_fetch_cities_zero_cities_amount_rises(
82 | self, monkeypatch: pytest.MonkeyPatch
83 | ):
84 | monkeypatch.setattr(CONFIG, 'cities_amount', 0)
85 |
86 | # the same as InitCities - FetchCities will rise NoDataError for 0 cities amount
87 | with pytest.raises(NoDataError):
88 | FetchCities().execute()
89 |
90 | ####################################################################################
91 | # Fetch Coordinates Service
92 | ####################################################################################
93 |
94 | def test_fetch_coordinates(self, seed_cities_to_database, session: orm.Session):
95 | cites: list[CityModel] = session.query(CityModel).all()
96 | for city in cites:
97 | FetchCoordinates(city).execute()
98 | assert city.latitude and city.longitude
99 |
100 | ####################################################################################
101 | # Fetch Weather Service
102 | ####################################################################################
103 |
104 | def test_fetch_weather_rises(self):
105 | with pytest.raises(NoDataError):
106 | FetchWeather()
107 |
108 | def test_fetch_weather(self, seed_cities_to_database, session: orm.Session):
109 | FetchWeather().execute()
110 | measures: list[MeasurementModel] = session.query(MeasurementModel).all()
111 | for measure in measures:
112 | assert measure.main
113 | assert measure.main.temp
114 | assert measure.extra
115 | assert measure.extra.data
116 |
117 | ####################################################################################
118 | # Collect Weather Service
119 | ####################################################################################
120 |
121 | def test_collect_weather_initial(
122 | self,
123 | session: orm.Session,
124 | monkeypatch: pytest.MonkeyPatch,
125 | ):
126 | repeats = 2
127 | cities_amount = 3
128 | monkeypatch.setattr(CONFIG, 'cities_amount', cities_amount)
129 |
130 | CollectScheduler(repeats=repeats, initial=True).execute()
131 | assert session.query(MeasurementModel).count() == cities_amount * repeats
132 |
133 | def test_collect_weather_initial_many_cities(
134 | self,
135 | session: orm.Session,
136 | monkeypatch: pytest.MonkeyPatch,
137 | ):
138 | repeats = 1
139 | cities_amount = 50
140 | monkeypatch.setattr(CONFIG, 'cities_amount', cities_amount)
141 |
142 | CollectScheduler(repeats=repeats, initial=True).execute()
143 | assert session.query(MeasurementModel).count() == cities_amount * repeats
144 |
145 | def test_collect_weather_with_cities_at_db(
146 | self,
147 | cities_list: list,
148 | seed_cities_to_database,
149 | session: orm.Session,
150 | monkeypatch: pytest.MonkeyPatch,
151 | ):
152 | repeats = 2
153 | CollectScheduler(repeats=repeats).execute()
154 | assert session.query(MeasurementModel).count() == len(cities_list) * repeats
155 |
156 | ####################################################################################
157 | # Report Weather Service
158 | ####################################################################################
159 |
160 | def test_report_weather(self, seed_cities_to_database, session: orm.Session):
161 | CollectScheduler(repeats=1).execute()
162 | ReportWeather(average=True, latest=True).execute()
163 | ...
164 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 | ##
4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
5 |
6 | # User-specific files
7 | *.rsuser
8 | *.suo
9 | *.user
10 | *.userosscache
11 | *.sln.docstates
12 |
13 | # User-specific files (MonoDevelop/Xamarin Studio)
14 | *.userprefs
15 |
16 | # Mono auto generated files
17 | mono_crash.*
18 |
19 | # Build results
20 | [Dd]ebug/
21 | [Dd]ebugPublic/
22 | [Rr]elease/
23 | [Rr]eleases/
24 | x64/
25 | x86/
26 | [Aa][Rr][Mm]/
27 | [Aa][Rr][Mm]64/
28 | bld/
29 | [Bb]in/
30 | [Oo]bj/
31 | [Ll]og/
32 | [Ll]ogs/
33 |
34 | # Visual Studio 2015/2017 cache/options directory
35 | .vs/
36 | # Uncomment if you have tasks that create the project's static files in wwwroot
37 | #wwwroot/
38 |
39 | # Visual Studio 2017 auto generated files
40 | Generated\ Files/
41 |
42 | # MSTest test Results
43 | [Tt]est[Rr]esult*/
44 | [Bb]uild[Ll]og.*
45 |
46 | # NUnit
47 | *.VisualState.xml
48 | TestResult.xml
49 | nunit-*.xml
50 |
51 | # Build Results of an ATL Project
52 | [Dd]ebugPS/
53 | [Rr]eleasePS/
54 | dlldata.c
55 |
56 | # Benchmark Results
57 | BenchmarkDotNet.Artifacts/
58 |
59 | # .NET Core
60 | project.lock.json
61 | project.fragment.lock.json
62 | artifacts/
63 |
64 | # StyleCop
65 | StyleCopReport.xml
66 |
67 | # Files built by Visual Studio
68 | *_i.c
69 | *_p.c
70 | *_h.h
71 | *.ilk
72 | *.meta
73 | *.obj
74 | *.iobj
75 | *.pch
76 | *.pdb
77 | *.ipdb
78 | *.pgc
79 | *.pgd
80 | *.rsp
81 | *.sbr
82 | *.tlb
83 | *.tli
84 | *.tlh
85 | *.tmp
86 | *.tmp_proj
87 | *_wpftmp.csproj
88 | *.log
89 | *.vspscc
90 | *.vssscc
91 | .builds
92 | *.pidb
93 | *.svclog
94 | *.scc
95 |
96 | # Chutzpah Test files
97 | _Chutzpah*
98 |
99 | # Visual C++ cache files
100 | ipch/
101 | *.aps
102 | *.ncb
103 | *.opendb
104 | *.opensdf
105 | *.sdf
106 | *.cachefile
107 | *.VC.db
108 | *.VC.VC.opendb
109 |
110 | # Visual Studio profiler
111 | *.psess
112 | *.vsp
113 | *.vspx
114 | *.sap
115 |
116 | # Visual Studio Trace Files
117 | *.e2e
118 |
119 | # TFS 2012 Local Workspace
120 | $tf/
121 |
122 | # Guidance Automation Toolkit
123 | *.gpState
124 |
125 | # ReSharper is a .NET coding add-in
126 | _ReSharper*/
127 | *.[Rr]e[Ss]harper
128 | *.DotSettings.user
129 |
130 | # TeamCity is a build add-in
131 | _TeamCity*
132 |
133 | # DotCover is a Code Coverage Tool
134 | *.dotCover
135 |
136 | # AxoCover is a Code Coverage Tool
137 | .axoCover/*
138 | !.axoCover/settings.json
139 |
140 | # Visual Studio code coverage results
141 | *.coverage
142 | *.coveragexml
143 |
144 | # NCrunch
145 | _NCrunch_*
146 | .*crunch*.local.xml
147 | nCrunchTemp_*
148 |
149 | # MightyMoose
150 | *.mm.*
151 | AutoTest.Net/
152 |
153 | # Web workbench (sass)
154 | .sass-cache/
155 |
156 | # Installshield output folder
157 | [Ee]xpress/
158 |
159 | # DocProject is a documentation generator add-in
160 | DocProject/buildhelp/
161 | DocProject/Help/*.HxT
162 | DocProject/Help/*.HxC
163 | DocProject/Help/*.hhc
164 | DocProject/Help/*.hhk
165 | DocProject/Help/*.hhp
166 | DocProject/Help/Html2
167 | DocProject/Help/html
168 |
169 | # Click-Once directory
170 | publish/
171 |
172 | # Publish Web Output
173 | *.[Pp]ublish.xml
174 | *.azurePubxml
175 | # Note: Comment the next line if you want to checkin your web deploy settings,
176 | # but database connection strings (with potential passwords) will be unencrypted
177 | *.pubxml
178 | *.publishproj
179 |
180 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
181 | # checkin your Azure Web App publish settings, but sensitive information contained
182 | # in these scripts will be unencrypted
183 | PublishScripts/
184 |
185 | # NuGet Packages
186 | *.nupkg
187 | # NuGet Symbol Packages
188 | *.snupkg
189 | # The packages folder can be ignored because of Package Restore
190 | **/[Pp]ackages/*
191 | # except build/, which is used as an MSBuild target.
192 | !**/[Pp]ackages/build/
193 | # Uncomment if necessary however generally it will be regenerated when needed
194 | #!**/[Pp]ackages/repositories.config
195 | # NuGet v3's project.json files produces more ignorable files
196 | *.nuget.props
197 | *.nuget.targets
198 |
199 | # Microsoft Azure Build Output
200 | csx/
201 | *.build.csdef
202 |
203 | # Microsoft Azure Emulator
204 | ecf/
205 | rcf/
206 |
207 | # Windows Store app package directories and files
208 | AppPackages/
209 | BundleArtifacts/
210 | Package.StoreAssociation.xml
211 | _pkginfo.txt
212 | *.appx
213 | *.appxbundle
214 | *.appxupload
215 |
216 | # Visual Studio cache files
217 | # files ending in .cache can be ignored
218 | *.[Cc]ache
219 | # but keep track of directories ending in .cache
220 | !?*.[Cc]ache/
221 |
222 | # Others
223 | ClientBin/
224 | ~$*
225 | *~
226 | *.dbmdl
227 | *.dbproj.schemaview
228 | *.jfm
229 | *.pfx
230 | *.publishsettings
231 | orleans.codegen.cs
232 |
233 | # Including strong name files can present a security risk
234 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
235 | #*.snk
236 |
237 | # Since there are multiple workflows, uncomment next line to ignore bower_components
238 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
239 | #bower_components/
240 |
241 | # RIA/Silverlight projects
242 | Generated_Code/
243 |
244 | # Backup & report files from converting an old project file
245 | # to a newer Visual Studio version. Backup files are not needed,
246 | # because we have git ;-)
247 | _UpgradeReport_Files/
248 | Backup*/
249 | UpgradeLog*.XML
250 | UpgradeLog*.htm
251 | ServiceFabricBackup/
252 | *.rptproj.bak
253 |
254 | # SQL Server files
255 | *.mdf
256 | *.ldf
257 | *.ndf
258 |
259 | # Business Intelligence projects
260 | *.rdl.data
261 | *.bim.layout
262 | *.bim_*.settings
263 | *.rptproj.rsuser
264 | *- [Bb]ackup.rdl
265 | *- [Bb]ackup ([0-9]).rdl
266 | *- [Bb]ackup ([0-9][0-9]).rdl
267 |
268 | # Microsoft Fakes
269 | FakesAssemblies/
270 |
271 | # GhostDoc plugin setting file
272 | *.GhostDoc.xml
273 |
274 | # Node.js Tools for Visual Studio
275 | .ntvs_analysis.dat
276 | node_modules/
277 |
278 | # Visual Studio 6 build log
279 | *.plg
280 |
281 | # Visual Studio 6 workspace options file
282 | *.opt
283 |
284 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
285 | *.vbw
286 |
287 | # Visual Studio LightSwitch build output
288 | **/*.HTMLClient/GeneratedArtifacts
289 | **/*.DesktopClient/GeneratedArtifacts
290 | **/*.DesktopClient/ModelManifest.xml
291 | **/*.Server/GeneratedArtifacts
292 | **/*.Server/ModelManifest.xml
293 | _Pvt_Extensions
294 |
295 | # Paket dependency manager
296 | .paket/paket.exe
297 | paket-files/
298 |
299 | # FAKE - F# Make
300 | .fake/
301 |
302 | # CodeRush personal settings
303 | .cr/personal
304 |
305 | # Python Tools for Visual Studio (PTVS)
306 | __pycache__/
307 | *.pyc
308 |
309 | # Cake - Uncomment if you are using it
310 | # tools/**
311 | # !tools/packages.config
312 |
313 | # Tabs Studio
314 | *.tss
315 |
316 | # Telerik's JustMock configuration file
317 | *.jmconfig
318 |
319 | # BizTalk build output
320 | *.btp.cs
321 | *.btm.cs
322 | *.odx.cs
323 | *.xsd.cs
324 |
325 | # OpenCover UI analysis results
326 | OpenCover/
327 |
328 | # Azure Stream Analytics local run output
329 | ASALocalRun/
330 |
331 | # MSBuild Binary and Structured Log
332 | *.binlog
333 |
334 | # NVidia Nsight GPU debugger configuration file
335 | *.nvuser
336 |
337 | # MFractors (Xamarin productivity tool) working folder
338 | .mfractor/
339 |
340 | # Local History for Visual Studio
341 | .localhistory/
342 |
343 | # BeatPulse healthcheck temp database
344 | healthchecksdb
345 |
346 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
347 | MigrationBackup/
348 |
349 | # Ionide (cross platform F# VS Code tools) working folder
350 | .ionide/
351 |
352 | # venv
353 | venv*
354 | *.env
355 |
356 | # macOS system files
357 | .DS_Store
358 |
359 | # IDE settings
360 | .vscode*
361 |
362 | # local storage
363 | *.sqlite3
364 | *.sqlite3*
365 | cities.json
366 |
367 | # tmp files
368 | *Untitled*
369 | *untitled*
370 | *tmp*
371 |
372 |
373 | htmlcov
--------------------------------------------------------------------------------
/collector/services/cities.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import argparse
4 | import json
5 | import os
6 | import unicodedata
7 |
8 | import pydantic
9 |
10 | from collector.configurations import CONFIG, logger
11 | from collector.exceptions import NoDataError
12 | from collector.models import CityModel
13 | from collector.services.base import BaseService, FetchServiceMixin
14 | from collector.session import DBSessionMixin
15 |
16 | ########################################################################################
17 | # Cities Schemas
18 | ########################################################################################
19 |
20 |
21 | class CitySchema(pydantic.BaseModel):
22 | name: str
23 | country: str | None
24 | countryCode: str | None
25 | latitude: float | None
26 | longitude: float | None
27 | population: int | None
28 |
29 | @pydantic.validator('name')
30 | def clean_name_unicode(cls, value):
31 | return str(
32 | unicodedata.normalize('NFKD', value)
33 | .encode('ascii', 'ignore')
34 | .decode("utf-8")
35 | )
36 |
37 |
38 | class CitiesListSchema(pydantic.BaseModel):
39 | data: list[CitySchema]
40 |
41 |
42 | class CityCoordinatesSchema(pydantic.BaseModel):
43 | name: str
44 | lat: float
45 | lon: float
46 | country: str | None
47 | state: str | None
48 |
49 | # [FIXME] parsing response falls down because of unicode symbols
50 | # local_names: list[str] | None
51 |
52 |
53 | ########################################################################################
54 | # Init Cities Service
55 | ########################################################################################
56 |
57 |
58 | class InitCities(BaseService, DBSessionMixin):
59 | """
60 | Load cities list from JSON file and appended them to database.
61 | If `predefined` is provided, that list will be used instead.
62 | """
63 |
64 | command = 'init_cities'
65 |
66 | def __init__(
67 | self, *, override: bool = False, predefined: list[CitySchema] = [], **kwargs
68 | ) -> None:
69 | self.predefined = predefined
70 | self.override = override
71 | super().__init__(**kwargs)
72 |
73 | @classmethod
74 | def add_argument(cls, parser: argparse.ArgumentParser):
75 | parser.add_argument(
76 | '-o',
77 | '--override',
78 | action='store_true',
79 | help='set all other cities at DB not to be tracking for weather collecting',
80 | )
81 |
82 | def execute(self):
83 | super().execute()
84 | cities = self.predefined or self.load_from_file()
85 | if not cities:
86 | raise NoDataError(f'{CONFIG.cities_file} has no cities to initialize. ')
87 |
88 | if self.override:
89 | previous: list[CityModel] = self.query(CityModel).all()
90 | for city in previous:
91 | city.is_tracked = False
92 | logger.info(f'{len(previous)} cities are not tracked anymore. ')
93 |
94 | self.create_from_schema(CityModel, *cities)
95 | logger.info(f'Add new {len(cities)} records to {CityModel}. ')
96 |
97 | def load_from_file(self):
98 | try:
99 | return pydantic.parse_file_as(list[CitySchema], CONFIG.cities_file)
100 | except FileNotFoundError as e:
101 | raise NoDataError(e, msg='Init cities from file failed. ')
102 |
103 |
104 | ########################################################################################
105 | # Fetch Cities Service
106 | ########################################################################################
107 |
108 |
109 | class FetchCities(BaseService, FetchServiceMixin[CitiesListSchema]):
110 | """
111 | Fetch cities list from GeoDB API, save them to JSON file for future custom
112 | configuration and call for `InitCities` service to store all new cities at database.
113 |
114 | Endpoint detail information: http://geodb-cities-api.wirefreethought.com/
115 | """
116 |
117 | command = 'fetch_cities'
118 | url = 'http://geodb-free-service.wirefreethought.com/v1/geo/cities'
119 |
120 | # [NOTE]
121 | # We are using GeoDB API Service under FREE plan provided at specified url.
122 | # Unfortunately, in that case limit params is restricted up to 10.
123 | # And for instance we need make request 5 times to get 50 cityes.
124 | restricted_limit = 10
125 | params = {
126 | 'sort': '-population',
127 | 'types': 'CITY',
128 | 'limit': restricted_limit,
129 | }
130 | schema = CitiesListSchema
131 |
132 | def execute(self):
133 | super().execute()
134 | cities = self.fetch()
135 | self.append_to_file(cities)
136 | logger.info(
137 | f'Successfully fetched {CONFIG.cities_amount} cities and stored them at '
138 | f'{CONFIG.cities_file} file. Go there to confirm results. You can make any '
139 | 'changes and commit them by calling for `init_cities` with --override flag.'
140 | )
141 |
142 | InitCities(predefined=cities, **self.init_kwargs).execute()
143 |
144 | def fetch(self):
145 | cities: list[CitySchema] = []
146 | repeats = CONFIG.cities_amount // self.restricted_limit
147 | remains = CONFIG.cities_amount % self.restricted_limit
148 |
149 | for i in range(repeats + int(bool(remains))):
150 | if i == repeats:
151 | self.params['limit'] = remains # for final fetching
152 |
153 | offset = i * self.restricted_limit
154 | self.params['offset'] = offset
155 |
156 | logger.info(f'Fetching cities: {offset}/{CONFIG.cities_amount}')
157 |
158 | # `data` is a core field at response json with list of cities
159 | cities += super().fetch().data
160 |
161 | self.params['limit'] = self.restricted_limit
162 | return cities
163 |
164 | def append_to_file(self, cities: list[CitySchema]):
165 | if os.path.isfile(CONFIG.cities_file):
166 | logger.warning(
167 | f'{CONFIG.cities_file} already exists. All data will be overridden. '
168 | )
169 |
170 | with open(CONFIG.cities_file, 'w+', encoding='utf-8') as file:
171 | json.dump([city.dict() for city in cities], file)
172 |
173 |
174 | ########################################################################################
175 | # Fetch Coordinates Service
176 | ########################################################################################
177 |
178 |
179 | class FetchCoordinates(
180 | BaseService,
181 | DBSessionMixin,
182 | FetchServiceMixin[list[CityCoordinatesSchema]],
183 | ):
184 | """
185 | If city object doesn't have coordinates, we should get them by calling for
186 | Open Weather Geocoding API. The API documentation says:
187 |
188 | `Please use Geocoder API if you need automatic convert city names and zip-codes to
189 | geo coordinates and the other way around. Please note that API requests by city
190 | name, zip-codes and city id have been deprecated.`
191 |
192 | Endpoint detail information: https://openweathermap.org/api/geocoding-api
193 | """
194 |
195 | command = 'fetch_coordinates'
196 | url = 'http://api.openweathermap.org/geo/1.0/direct'
197 | schema = list[CityCoordinatesSchema]
198 | params = {
199 | "appid": CONFIG.open_weather_key,
200 | "limit": 10,
201 | }
202 |
203 | def __init__(self, city: CityModel | str, **kwargs) -> None:
204 | if isinstance(city, str):
205 | self.city: CityModel = (
206 | self.query(CityModel).filter(CityModel.name == city).one()
207 | )
208 | else:
209 | self.city = city
210 |
211 | self.params['q'] = f'{self.city.name},{self.city.countryCode}'
212 | super().__init__(**kwargs)
213 |
214 | def execute(self):
215 | super().execute()
216 |
217 | geo_list = self.fetch()
218 | if not geo_list:
219 | raise NoDataError(
220 | 'Getting coordinates failed. '
221 | f'Geocoding has no information about {self.city}. '
222 | )
223 | if len(geo_list) > 1:
224 | logger.warning(
225 | f'Geocoding has many records for {self.city}. Taking the first.'
226 | )
227 |
228 | coordinates = geo_list[0]
229 | self.city.latitude = coordinates.lat
230 | self.city.longitude = coordinates.lon
231 |
232 | def fetch(self):
233 | logger.info(f'Fetching coordinates for {self.city}. ')
234 | return super().fetch()
235 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
12 | Python package for collecting weather measurements.
13 |
14 | Explore Usage topic »
15 |
16 |
243 | 244 |
-------------------------------------------------------------------------------- /collector/services/weather.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import argparse 4 | import sys 5 | from datetime import datetime, timedelta 6 | 7 | import pydantic 8 | from apscheduler.schedulers.blocking import BlockingScheduler 9 | 10 | from collector.configurations import CONFIG, logger 11 | from collector.exceptions import CollectorBaseException, NoDataError 12 | from collector.models import ( 13 | CityModel, 14 | ExtraWeatherDataModel, 15 | MainWeatherDataModel, 16 | MeasurementModel, 17 | ) 18 | from collector.services.base import BaseService, FetchServiceMixin 19 | from collector.services.cities import FetchCities, FetchCoordinates, InitCities 20 | from collector.session import DBSessionMixin 21 | 22 | ######################################################################################## 23 | # Weather Schemas 24 | ######################################################################################## 25 | 26 | 27 | class MainWeatherSchema(pydantic.BaseModel): 28 | """ 29 | Schema for parsing `main` field from Open Weather response. 30 | 31 | For more information see `MainWeatherDataModel` where we store all these values. 32 | """ 33 | 34 | temp: float 35 | feels_like: float 36 | temp_min: float 37 | temp_max: float 38 | pressure: int 39 | humidity: int 40 | sea_level: int | None 41 | grnd_level: int | None 42 | 43 | 44 | class WeatherMeasurementSchema(pydantic.BaseModel): 45 | """ 46 | Schema for parsing data from Open Weather API. We only ensure to have `main` field 47 | and `dt` field. Other is optional and will be stored at extra data table. 48 | 49 | For more information see `MeasurementModel` where we store all these values. 50 | """ 51 | 52 | main: MainWeatherSchema 53 | dt: int 54 | "Time of data forecasted, Unix, UTC (timestamp). `measure_at` field at model." 55 | 56 | 57 | ######################################################################################## 58 | # Fetch Weather Service 59 | ######################################################################################## 60 | 61 | 62 | class FetchWeather( 63 | BaseService, DBSessionMixin, FetchServiceMixin[WeatherMeasurementSchema] 64 | ): 65 | """ 66 | Fetch weather for cities and store data into database. 67 | By default fetching weather for all cities from database. 68 | 69 | Endpoint detail information: https://openweathermap.org/current 70 | """ 71 | 72 | command = 'fetch_weather' 73 | url = 'https://api.openweathermap.org/data/2.5/weather' 74 | schema = WeatherMeasurementSchema 75 | 76 | def __init__(self, **kwargs) -> None: 77 | self.cities: list[CityModel] = ( 78 | self.query(CityModel).filter(CityModel.is_tracked).all() 79 | ) 80 | if not self.cities: 81 | raise NoDataError( 82 | 'No cities at database to be tracked. ' 83 | f'Call for {FetchCities.command} or {InitCities.command} before. ' 84 | ) 85 | 86 | super().__init__(**kwargs) 87 | 88 | def execute(self): 89 | super().execute() 90 | for city in self.cities: 91 | if not all([city.longitude, city.latitude]): 92 | try: 93 | FetchCoordinates(city, **self.init_kwargs).execute() 94 | except NoDataError as e: 95 | logger.warning(f'Can not get weather for {city}: {e}. Continue. ') 96 | continue 97 | 98 | measure, extra = self.fetch(city) 99 | model = MeasurementModel( 100 | city=city, 101 | measure_at=datetime.utcfromtimestamp(measure.dt), 102 | main=MainWeatherDataModel(**measure.main.dict()), 103 | extra=ExtraWeatherDataModel(data=extra), 104 | ) 105 | self.create(model) 106 | 107 | def fetch(self, city: CityModel): # type: ignore 108 | logger.info(f'Fetching weather for {city}. ') 109 | 110 | self.params['lat'] = str(city.latitude) 111 | self.params['lon'] = str(city.longitude) 112 | measure = super().fetch() 113 | 114 | extra: dict = self.response.json() 115 | for field in self.schema.__fields__: 116 | extra.pop(field) 117 | 118 | return measure, extra 119 | 120 | 121 | ######################################################################################## 122 | # Collect Weather Service 123 | ######################################################################################## 124 | 125 | 126 | class CollectScheduler(BaseService): 127 | """ 128 | Fetching weather measurements continuesly. 129 | """ 130 | 131 | command = 'collect' 132 | 133 | def __init__( 134 | self, *, repeats: int | None = None, initial: bool = False, **kwargs 135 | ) -> None: 136 | self.counter = 0 137 | self.repeats = repeats 138 | self.scheduler = BlockingScheduler() 139 | 140 | if initial: 141 | try: 142 | InitCities(**kwargs).execute() 143 | except NoDataError as e: 144 | logger.warning(f'{e}. Handling by calling for {FetchCities()}.') 145 | FetchCities(**kwargs).execute() 146 | 147 | super().__init__(**kwargs) 148 | 149 | @classmethod 150 | def add_argument(cls, parser: argparse.ArgumentParser): 151 | parser.add_argument( 152 | '-r', 153 | '--repeats', 154 | metavar='