├── .dockerignore
├── .env.example
├── .github
    └── workflows
    │   └── tests.yaml
├── .gitignore
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── TODO.md
├── data-migrate.sh
├── docker-compose.dev.yml
├── docker-compose.yml
├── docs
    ├── diagrams
    │   └── agile_metrics.drawio
    └── images
    │   ├── agile_metrics.png
    │   ├── agile_metrics_cloud.png
    │   ├── datalens_example.png
    │   ├── etl_metrics.jpeg
    │   └── logs.png
├── examples
    ├── extended_model
    │   └── main.py
    └── serverless
    │   ├── main.py
    │   └── requirements.txt
├── migrations
    ├── clickhouse
    │   ├── 000001_create_table_issues.down.sql
    │   ├── 000001_create_table_issues.up.sql
    │   ├── 000002_create_table_issue_metrics.down.sql
    │   ├── 000002_create_table_issue_metrics.up.sql
    │   ├── 000003_create_table_issues_changelog.down.sql
    │   ├── 000003_create_table_issues_changelog.up.sql
    │   ├── 000004_create_view_issues_view.down.sql
    │   └── 000004_create_view_issues_view.up.sql
    └── v0.1.x
    │   ├── 000001_create_table_issues.down.sql
    │   ├── 000001_create_table_issues.up.sql
    │   ├── 000002_create_table_issue_metrics.down.sql
    │   ├── 000002_create_table_issue_metrics.up.sql
    │   ├── 000003_create_view_issues_view.down.sql
    │   └── 000003_create_view_issues_view.up.sql
├── pyproject.toml
├── requirements-dev.txt
├── requirements.txt
├── setup.cfg
├── setup.py
├── tests
    ├── conftest.py
    ├── test_config.py
    ├── test_etl.py
    ├── test_helpers.py
    └── test_state.py
└── tracker_exporter
    ├── __init__.py
    ├── _meta.py
    ├── _typing.py
    ├── config.py
    ├── etl.py
    ├── exceptions.py
    ├── main.py
    ├── models
        ├── __init__.py
        ├── base.py
        └── issue.py
    ├── services
        ├── __init__.py
        ├── clickhouse.py
        ├── monitoring.py
        └── tracker.py
    ├── state
        ├── __init__.py
        ├── backends.py
        ├── factory.py
        ├── managers.py
        └── serializers.py
    └── utils
        ├── __init__.py
        └── helpers.py


/.dockerignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | # env/
 12 | # !/**/env
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .coverage
 43 | .coverage.*
 44 | .cache
 45 | nosetests.xml
 46 | coverage.xml
 47 | *,cover
 48 | .hypothesis/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | 
 58 | # Flask stuff:
 59 | instance/
 60 | .webassets-cache
 61 | 
 62 | # Scrapy stuff:
 63 | .scrapy
 64 | 
 65 | # Sphinx documentation
 66 | docs/_build/
 67 | 
 68 | # PyBuilder
 69 | target/
 70 | 
 71 | # Jupyter Notebook
 72 | .ipynb_checkpoints
 73 | 
 74 | # pyenv
 75 | .python-version
 76 | 
 77 | # celery beat schedule file
 78 | celerybeat-schedule
 79 | 
 80 | # dotenv
 81 | .env
 82 | 
 83 | # virtualenv
 84 | .venv
 85 | venv*
 86 | 
 87 | # Spyder project settings
 88 | .spyderproject
 89 | 
 90 | # Rope project settings
 91 | .ropeproject
 92 | 
 93 | .idea
 94 | tests
 95 | .git
 96 | 
 97 | 
 98 | # Project
 99 | 
100 | .env
101 | .env.example
102 | data-migrate.sh
103 | Makefile
104 | docker-compose.*
105 | state.*
106 | TODO.*
107 | migrate
108 | examples
109 | docs
110 | clickhouse
111 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
 1 | EXPORTER_MONITORING__METRICS_ENABLED=false
 2 | EXPORTER_MONITORING__METRICS_HOST=localhost
 3 | EXPORTER_MONITORING__METRICS_PORT=8125
 4 | EXPORTER_MONITORING__SENTRY_ENABLED=false
 5 | EXPORTER_MONITORING__SENTRY_DSN=https://xxxxxxxxxxx@sentry.io/1
 6 | 
 7 | EXPORTER_CLICKHOUSE__ENABLE_UPLOAD=true
 8 | EXPORTER_CLICKHOUSE__HOST=localhost
 9 | EXPORTER_CLICKHOUSE__PORT=8443
10 | EXPORTER_CLICKHOUSE__PROTO=https
11 | EXPORTER_CLICKHOUSE__CACERT_PATH=/etc/ssl/ca.pem
12 | EXPORTER_CLICKHOUSE__USERNAME=tracker
13 | EXPORTER_CLICKHOUSE__PASSWORD=mypassword
14 | EXPORTER_CLICKHOUSE__DATABASE=tracker
15 | EXPORTER_CLICKHOUSE__SERVERLESS_PROXY_ID=xxxxxxxxxxxxxxxx
16 | EXPORTER_CLICKHOUSE__ISSUES_TABLE=issues
17 | EXPORTER_CLICKHOUSE__ISSUE_METRICS_TABLE=issue_metrics
18 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yaml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | on:
 3 |   workflow_dispatch:
 4 |   pull_request:
 5 |     paths:
 6 |       - "tracker_exporter/**"
 7 |       - "tests/**"
 8 |     branches:
 9 |       - master
10 |   push:
11 |     paths:
12 |       - "tracker_exporter/**"
13 |       - "tests/**"
14 |     branches:
15 |       - master
16 |   schedule:
17 |     - cron: '20 4 * * 6'
18 | 
19 | env:
20 |   EXPORTER_TRACKER__TOKEN: ${{ secrets.EXPORTER_TRACKER__TOKEN }}
21 |   EXPORTER_TRACKER__CLOUD_ORG_ID: ${{ secrets.EXPORTER_TRACKER__CLOUD_ORG_ID }}
22 | 
23 | jobs:
24 |   pytest:
25 |     name: pytest
26 |     runs-on: ${{matrix.os}}
27 |     strategy:
28 |       matrix:
29 |         python-version:
30 |           - "3.10"
31 |         os:
32 |           - ubuntu-latest
33 |           - windows-latest
34 |           - macos-latest
35 |       fail-fast: false
36 |     steps:
37 |       - uses: actions/checkout@v4
38 |       - name: Set up Python ${{ matrix.python-version }}
39 |         uses: actions/setup-python@v4
40 |         with:
41 |           python-version: ${{ matrix.python-version }}
42 |           cache: 'pip'
43 |           cache-dependency-path: '**/requirements*.txt'
44 |       - name: Install dependencies
45 |         run: |
46 |           python -W ignore -m pip install --upgrade pip
47 |           python -W ignore -m pip install -U pytest-cov
48 |           python -W ignore -m pip install -r requirements.txt
49 |           python -W ignore -m pip install -r requirements-dev.txt
50 |           python -W ignore -m pip install pytest-xdist[psutil]
51 | 
52 |       - name: Test with pytest
53 |         run: |
54 |           pytest -vv --cov=tracker_exporter --cov-append -n auto --junit-xml=.test_report.xml
55 |         env:
56 |           JOB_INDEX: ${{ strategy.job-index }}
57 | 
58 |       - name: Test Summary
59 |         id: test_summary
60 |         uses: test-summary/action@v2.1
61 |         if: always()  # always run, even if tests fail
62 |         with:
63 |           paths: |
64 |             .test_report.xml
65 | 
66 |       # FIXME: not works
67 |       # - name: Submit coverage
68 |       #   uses: codecov/codecov-action@v3
69 |       #   env:
70 |       #     CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
71 |       #   with:
72 |       #     env_vars: OS,PYTHON
73 |       #     name: ${{ matrix.os }}-${{ matrix.python-version }}
74 |       #     fail_ci_if_error: true
75 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | scripts/*
  2 | /clickhouse
  3 | migrate
  4 | test.json
  5 | state.json
  6 | .ruff*
  7 | exporter
  8 | 
  9 | # Byte-compiled / optimized / DLL files
 10 | __pycache__/
 11 | *.py[cod]
 12 | *$py.class
 13 | 
 14 | # C extensions
 15 | *.so
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | build/
 20 | develop-eggs/
 21 | dist/
 22 | downloads/
 23 | eggs/
 24 | .eggs/
 25 | lib/
 26 | lib64/
 27 | parts/
 28 | sdist/
 29 | var/
 30 | wheels/
 31 | pip-wheel-metadata/
 32 | share/python-wheels/
 33 | *.egg-info/
 34 | .installed.cfg
 35 | *.egg
 36 | MANIFEST
 37 | 
 38 | # PyInstaller
 39 | #  Usually these files are written by a python script from a template
 40 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 41 | *.manifest
 42 | *.spec
 43 | 
 44 | # Installer logs
 45 | pip-log.txt
 46 | pip-delete-this-directory.txt
 47 | 
 48 | # Unit test / coverage reports
 49 | htmlcov/
 50 | .tox/
 51 | .nox/
 52 | .coverage
 53 | .coverage.*
 54 | .cache
 55 | nosetests.xml
 56 | coverage.xml
 57 | *.cover
 58 | *.py,cover
 59 | .hypothesis/
 60 | .pytest_cache/
 61 | 
 62 | # Translations
 63 | *.mo
 64 | *.pot
 65 | 
 66 | # Django stuff:
 67 | *.log
 68 | local_settings.py
 69 | db.sqlite3
 70 | db.sqlite3-journal
 71 | 
 72 | # Flask stuff:
 73 | instance/
 74 | .webassets-cache
 75 | 
 76 | # Scrapy stuff:
 77 | .scrapy
 78 | 
 79 | # Sphinx documentation
 80 | docs/_build/
 81 | 
 82 | # PyBuilder
 83 | target/
 84 | 
 85 | # Jupyter Notebook
 86 | .ipynb_checkpoints
 87 | 
 88 | # IPython
 89 | profile_default/
 90 | ipython_config.py
 91 | 
 92 | # pyenv
 93 | .python-version
 94 | 
 95 | # pipenv
 96 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 97 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 98 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 99 | #   install all needed dependencies.
100 | #Pipfile.lock
101 | 
102 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
103 | __pypackages__/
104 | 
105 | # Celery stuff
106 | celerybeat-schedule
107 | celerybeat.pid
108 | 
109 | # SageMath parsed files
110 | *.sage.py
111 | 
112 | # Environments
113 | .env
114 | .venv
115 | env/
116 | venv/
117 | ENV/
118 | env.bak/
119 | venv.bak/
120 | 
121 | # Spyder project settings
122 | .spyderproject
123 | .spyproject
124 | 
125 | # Rope project settings
126 | .ropeproject
127 | 
128 | # mkdocs documentation
129 | /site
130 | 
131 | # mypy
132 | .mypy_cache/
133 | .dmypy.json
134 | dmypy.json
135 | 
136 | # Pyre type checker
137 | .pyre/
138 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.10-slim as builder
 2 | 
 3 | WORKDIR /usr/src/app
 4 | COPY ./requirements.txt ./
 5 | RUN apt-get update && apt-get install -y --no-install-recommends build-essential \
 6 |     && pip install --no-cache-dir --prefix=/usr/src/app/dist -r requirements.txt \
 7 |     && apt-get purge -y --auto-remove build-essential \
 8 |     && rm -rf /var/lib/apt/lists/*
 9 | 
10 | 
11 | FROM python:3.10-slim
12 | 
13 | COPY --from=builder /usr/src/app/dist /usr/local
14 | WORKDIR /opt/exporter
15 | 
16 | COPY . .
17 | RUN pip install --no-cache-dir .
18 | RUN rm -rf /opt/exporter
19 | 
20 | ENV TZ=Europe/Moscow
21 | RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && \
22 |     echo $TZ > /etc/timezone && \
23 |     apt-get update && \
24 |     apt-get install -y --no-install-recommends tzdata && \
25 |     rm -rf /var/lib/apt/lists/*
26 | 
27 | WORKDIR /opt/exporter
28 | CMD ["tracker-exporter"]
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Akim Faskhutdinov
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | .PHONY: clean clean-build clean-pyc dist help clickhouse test tests migration docs
  2 | .DEFAULT_GOAL := help
  3 | 
  4 | help:
  5 | 	@echo "🪄  PREPARE ENVIRONMENT"
  6 | 	@echo "---------------------------------------------------------------------"
  7 | 	@echo "  init                Install all python requirements"
  8 | 	@echo "  pre-commit          Install pre-commit hooks"
  9 | 	@echo ""
 10 | 	@echo "👀  CHECK"
 11 | 	@echo "---------------------------------------------------------------------"
 12 | 	@echo "  test                Run tests (pytest)"
 13 | 	@echo "  test-no-cov         Run tests (pytest) without coverage report"
 14 | 	@echo "  pylint              Check python syntax & style by pylint"
 15 | 	@echo "  lint                Check python syntax via Flake8"
 16 | 	@echo "  black               Check python syntax & style by black"
 17 | 	@echo "  black-apply         Apply black linter (autoformat)"
 18 | 	@echo "  sec                 Security linter (bandit)"
 19 | 	@echo ""
 20 | 	@echo "🛠  INSTALL & RELEASE"
 21 | 	@echo "---------------------------------------------------------------------"
 22 | 	@echo "  install             Install library to site-packages"
 23 | 	@echo "  build               Build package"
 24 | 	@echo "  build-docker        Build docker image"
 25 | 	@echo "  release             Build & push package to PyPI"
 26 | 	@echo "  clean               Clean build/install artifacts"
 27 | 	@echo ""
 28 | 	@echo "🐳  DEV & RUN"
 29 | 	@echo "---------------------------------------------------------------------"
 30 | 	@echo "  up                  Up docker composition with app & clickhouse"
 31 | 	@echo "  up-clickhouse       Up docker clickhouse"
 32 | 	@echo "  down                Down docker composition (full)"
 33 | 	@echo "  down-clickhouse     Down docker clickhouse"
 34 | 	@echo "  clickhouse          Clickhouse CLI"
 35 | 	@echo "  migration           Run clickhouse migration"
 36 | 	@echo "  run                 Run ETL"
 37 | 
 38 | clean: clean-build clean-pyc
 39 | 
 40 | clean-build:
 41 | 	rm -rf build/
 42 | 	rm -rf dist/
 43 | 	rm -rf .eggs/
 44 | 	find . -name '*.egg-info' -exec rm -rf {} +
 45 | 	find . -name '*.egg' -exec rm -rf {} +
 46 | 	find . -name '.DS_Store' -exec rm -f {} +
 47 | 
 48 | clean-pyc:
 49 | 	find . -name '*.pyc' -exec rm -f {} +
 50 | 	find . -name '*.pyo' -exec rm -f {} +
 51 | 	find . -name '*~' -exec rm -f {} +
 52 | 	find . -name '__pycache__' -exec rm -rf {} +
 53 | 
 54 | test:
 55 | 	@pytest -vv --cov=tracker_exporter
 56 | 
 57 | tests: test
 58 | 
 59 | test-no-cov:
 60 | 	@pytest -v
 61 | 
 62 | lint:
 63 | 	@flake8 --config=setup.cfg --max-line=119
 64 | 
 65 | pylint:
 66 | 	@pylint --max-line-length=120 --rcfile=setup.cfg tracker_exporter
 67 | 
 68 | black:
 69 | 	@black tracker_exporter/* --color --diff --check
 70 | 
 71 | black-apply:
 72 | 	@black tracker_exporter/*
 73 | 
 74 | sec:
 75 | 	@bandit -r tracker_exporter
 76 | 
 77 | build:
 78 | 	@python3 setup.py sdist bdist_wheel
 79 | 
 80 | build-docker:
 81 | 	@docker build . -t tracker_exporter:dev
 82 | 
 83 | release: clean build
 84 | 	@make clean
 85 | 	@make build
 86 | 	@python3 -m twine upload --repository pypi dist/*
 87 | 	@make clean
 88 | 
 89 | install: clean
 90 | 	@python3 setup.py install
 91 | 
 92 | init:
 93 | 	@pip3 install -r requirements.txt
 94 | 	@pip3 install -r requirements-dev.txt
 95 | 
 96 | up:
 97 | 	@docker compose -f docker-compose.dev.yml up -d
 98 | 
 99 | up-clickhouse:
100 | 	@docker compose -f docker-compose.dev.yml up -d clickhouse
101 | 
102 | down:
103 | 	@docker compose -f docker-compose.dev.yml down
104 | 
105 | down-clickhouse:
106 | 	@docker compose -f docker-compose.dev.yml down clickhouse
107 | 
108 | clickhouse:
109 | 	@docker exec -it clickhouse clickhouse-client
110 | 
111 | run:
112 | 	@tracker-exporter --env-file .env
113 | 
114 | migration:
115 | 	@./data-migrate.sh
116 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/tracker-exporter.svg)](https://pypi.org/project/tracker-exporter/)
  2 | [![PyPi Package](https://img.shields.io/pypi/v/tracker-exporter.svg)](https://pypi.org/project/tracker-exporter/)
  3 | [![Tests](https://github.com/akimrx/yandex-tracker-exporter/workflows/Tests/badge.svg)](https://github.com/akimrx/yandex-tracker-exporter)
  4 | 
  5 | # Yandex.Tracker ETL
  6 | 
  7 | Export issue metadata & agile metrics, transform and load to OLAP data storage. Metrics based on issue changelog.
  8 | 
  9 | ⚠️ **Important**  
 10 | **Versions 1.x.x incompatible with 0.1.x. New versions works only on Python >= 3.10**
 11 | 
 12 | > You can fork this repository and refine the tool the way you want. Or use it as it is - this will allow you to build basic analytics on the tasks from Yandex.Tracker.
 13 | 
 14 | **Require:**
 15 | 
 16 | - Python `>=3.10.*`
 17 | - Clickhouse + specific [tables](/migrations/clickhouse/) (how to run [migration](#migration))
 18 | 
 19 | **Collects:**
 20 | 
 21 | - Issue metadata (i.e. title, author, assignee, components, tags, status, etc)
 22 | - Issue changelog (i.e the history of all the events that occurred with the task)
 23 | - Calculated issue metrics by status (i.e. the time spent in a particular status) like Cycle & Lead time
 24 | 
 25 | ## Datalens Demo
 26 | 
 27 | **[Deploy this demo dashboard to your Datalens instance](https://datalens.yandex.ru/marketplace/f2ejcgrg2h910r7cc93u)**
 28 | 
 29 | ## What does this tool do?
 30 | 
 31 | **ETL** – Export, transform, load.
 32 | 
 33 | It's simple. It doesn't do anything supernatural, it doesn't have Rocket Science in it.  
 34 | This is a simple ant with some mathematical abilities that takes data from one place, sorts/transforms/adapts/calculate them and puts them in another place.  
 35 | Sometimes he has to go to a lot of endpoints to collect what needs to be taken to the storage (that's the way Yandex.Tracker API).
 36 | 
 37 | **Important.**
 38 | 
 39 | By default, the exporter processes only those tasks that were changed during the sliding window specified in the `EXPORTER_TRACKER__SEARCH__RANGE` parameter.  
 40 | So, all tasks that have activity (changes) will be uploaded to the storage. Something like eventual consistency.
 41 | 
 42 | If you need to upload historical data that will never be updated again, you can flexibly control behavior through the [environment variables described below](#general-settings).
 43 | 
 44 | Here are some recipes for a one-shot export:
 45 | 
 46 | 1. Launch a exporter with the parameter `EXPORTER_TRACKER__SEARCH__RANGE`, for example, a year ago
 47 | 2. More specifically: describe the query in the tracker's QL format using the `EXPORTER_TRACKER__SEARCH__QUERY` environment variable. This way you can export point bundles of tasks and bypass with the [Tracker's strict limit of 10,000 tasks](https://github.com/yandex/yandex_tracker_client/issues/13).
 48 | 
 49 | Finally run exporter with `--run-once` flag.
 50 | 
 51 | ### Stateful mode
 52 | 
 53 | By default, the exporter does not store the state, and as described above, it works within the sliding window. This behavior is not the most optimal, because the exporter performs repeated processing for previous tasks.
 54 | 
 55 | The behavior can be changed by enabling stateful mode, which supports 3 backends:
 56 | 
 57 | - Local JSON file
 58 | - Remote JSON file (S3 object storage)
 59 | - Redis
 60 | 
 61 | #### Local JSON file
 62 | 
 63 | ```ini
 64 | EXPORTER_STATEFUL=true
 65 | 
 66 | # used for the first run to capture historical issues
 67 | # when the previous state is not exists.
 68 | EXPORTER_STATEFUL_INITIAL_RANGE=7d  # this is default value
 69 | 
 70 | EXPORTER_STATE__STORAGE=jsonfile  # this is default value
 71 | EXPORTER_STATE__JSONFILE_STRATEGY=local  # this is default value
 72 | 
 73 | ...
 74 | ```
 75 | 
 76 | #### Remote JSON file (S3)
 77 | 
 78 | ```ini
 79 | EXPORTER_STATEFUL=true
 80 | EXPORTER_STATEFUL_INITIAL_RANGE=7d
 81 | 
 82 | EXPORTER_STATE__STORAGE=jsonfile
 83 | EXPORTER_STATE__JSONFILE_STRATEGY=s3
 84 | 
 85 | EXPORTER_STATE__JSONFILE_S3_BUCKET=tracker-exporter-state
 86 | EXPORTER_STATE__JSONFILE_S3_ACCESS_KEY=YCAxxxxxxxx
 87 | EXPORTER_STATE__JSONFILE_S3_SECRET_KEY=YCxxx-xxxxxxxxxxxxxxx
 88 | EXPORTER_STATE__JSONFILE_S3_ENDPOINT=https://storage.yandexcloud.net
 89 | 
 90 | ...
 91 | ```
 92 | 
 93 | #### Redis
 94 | 
 95 | ```ini
 96 | 
 97 | EXPORTER_STATEFUL=true
 98 | EXPORTER_STATEFUL_INITIAL_RANGE=7d
 99 | 
100 | EXPORTER_STATE__STORAGE=redis
101 | EXPORTER_STATE__REDIS_DSN=redis://localhost:6379
102 | 
103 | ...
104 | ```
105 | 
106 | ### Cycle time calculation algorithm
107 | 
108 | Currently, status metrics are calculated based on the transition between statuses (using a issue changelog). The counting algorithm will be improved.
109 | 
110 | Let's imagine that the task can be, for example, only in 5 statuses:
111 | 
112 | - Open
113 | - In progress
114 | - Testing
115 | - Ready for release
116 | - Closed
117 | 
118 | Employees start working on the task, the history of the task and the actions of employees is described below, with a correlation to the work of the exporter.
119 | 
120 | 1. A new task has created with the initial status `Open`, metrics are not counted.
121 | 2. The developer has taken the task to work, the transition is `Open -> In progress`, the metric for the `Open` status has been calculated, while the current status `In progress` is not yet considered.
122 | 3. The developer has submitted the task to testing, the transition `In progress -> Testing`, the metric for the status `In progress` has been calculated, while the current status is being `Testing` is not yet considered.
123 | 4. QA Engineer returned the task for revision, the transition `Testing -> In progress`, the time in the status `Testing` has been calculated, the status `In progress` has the previous metric and has not changed yet.
124 | 5. The task has been finalized, re-submitted to testing, the transition `In progress -> Testing`, the delta of this transition is added incrementally to the previous value of the metric `In progress`, but `Testing` has not changed yet.
125 |    6 The task has been tested and submitted for release, the transition `Testing -> Ready for release`, the delta of this transition is incrementally added to the previous value of the metric `Testing`, the `Ready for Release` status is not considered yet.
126 | 6. The release is completed, the task is closed, the transition `Ready for release -> Closed`, the metric for the `Ready for Release` status is considered. **The metric of the final status of this task (`Closed`) will not be (re)calculated.**
127 | 
128 | #### Planned improvements
129 | 
130 | Consider the status metric if a transition has been made to it, even if such a status is current and the next transition has not yet been made from it. Exclude the final statuses from the innovation.
131 | 
132 | ## Tech stats
133 | 
134 | > Metrics based on 100,000+ constantly changing production issues
135 | 
136 | - **CPU usage**: from `2%` to `10%`
137 | - **Memory usage (RSS):** from `48MB` to `256MB`
138 | - **Average processing time per issue (metrics + issue metadata)**: 1.5 seconds
139 | - **Average processing time per issue (with full changelog export):** 7 seconds
140 | 
141 | ### Why is it taking so long?
142 | 
143 | This is how the tracker API and the library I use work. To get additional information about the task, you need to make a subquery in the API. For example, get the status name, employee name, and so on. When collecting data about a single task, more than several dozen HTTP requests can be executed.
144 | 
145 | This is also the answer to the question why the tool is not asynchronous. Limits in the API would not allow effective use of concurrency.
146 | 
147 | The processing speed of one issue depends on how many changes there are in the issue in its history. More changes means longer processing.
148 | 
149 | ## Extend exported issue data by your custom fields
150 | 
151 | Just declare your `main.py` module in which extended the [TrackerIssue](tracker_exporter/models/issue.py#L65) model like:
152 | 
153 | ```python
154 | 
155 | from tracker_exporter.models.issue import TrackerIssue
156 | from tracker_exporter.utils.helpers import validate_resource
157 | from tracker_exporter import run_etl
158 | 
159 | 
160 | class ExtendedTrackerIssue(TrackerIssue):
161 |     def __init__(self, issue: Issues) -> None:
162 |         super().__init__(issue)
163 | 
164 |         self.foo_custom_field = validate_resource(issue, "fooCustomField")
165 |         self.bar_custom_field = validate_resource(issue, "barCustomField")
166 | 
167 | 
168 | run_etl(issue_model=ExtendedTrackerIssue)
169 | 
170 | ```
171 | 
172 | **Don't forget about adding fields to the Clickhouse migration.**
173 | 
174 | See full example with mixin [here](examples/extended_model/main.py)
175 | 
176 | ## Usage
177 | 
178 | ### Native
179 | 
180 | #### Install from source
181 | 
182 | ```bash
183 | # prepare virtual environment
184 | python3 -m venv venv
185 | source venv/bin/activate
186 | make install
187 | 
188 | # configure environment variables
189 | export EXPORTER_TRACKER__TOKEN=your_oauth_token  # or EXPORTER_TRACKER__IAM_TOKEN
190 | export EXPORTER_TRACKER__CLOUD_ORG_ID=your_cloud_org_id  # or EXPORTER_TRACKER__ORG_ID for yandex360
191 | export EXPORTER_CLICKHOUSE__HOST=localhost
192 | export EXPORTER_CLICKHOUSE__PORT=8123
193 | export EXPORTER_CLICKHOUSE__USERNAME=agile
194 | export EXPORTER_CLICKHOUSE__PASSWORD=agile
195 | 
196 | 
197 | # run
198 | tracker-exporter
199 | ```
200 | 
201 | #### Install from PyPI
202 | 
203 | ```bash
204 | pip3 install tracker-exporter
205 | tracker-exporter
206 | ```
207 | 
208 | #### Configure via .env file
209 | 
210 | Read about the settings [here](#environment-variables-settings)
211 | 
212 | ```bash
213 | tracker-exporter --env-file /home/akimrx/tracker/.settings
214 | ```
215 | 
216 | ### Docker
217 | 
218 | ```bash
219 | 
220 | cd yandex-tracker-exporter
221 | docker-compose up -d --build
222 | 
223 | # Run clickhouse migrations
224 | 
225 | docker logs tracker-exporter -f
226 | ```
227 | 
228 | ## On-premise arch example
229 | 
230 | ![](/docs/images/agile_metrics.png)
231 | 
232 | ### On-premise Clickhouse
233 | 
234 | So, you can install Clickhouse with Proxy via [Ansible role inside project (previous versions)](https://github.com/akimrx/yandex-tracker-exporter/tree/v0.1.19/ansible).  
235 | Edit the inventory file `ansible/inventory/hosts.yml` and just run ansible-playbook.
236 | 
237 | > **Attention:**
238 | > For the role to work correctly, docker must be installed on the target server.
239 | 
240 | Example Clickhouse installation:
241 | 
242 | ```bash
243 | git clone https://github.com/akimrx/yandex-tracker-exporter.git
244 | cd yandex-tracker-exporter
245 | git checkout v0.1.19
246 | python3 -m venv venv && source venv/bin/activate
247 | pip3 install -r requirements-dev.txt
248 | cd ansible
249 | ansible-playbook -i inventory/hosts.yml playbooks/clickhouse.yml --limit agile
250 | ```
251 | 
252 | Also, you can use [this extended Clickhouse role](https://github.com/akimrx/ansible-clickhouse-role)
253 | 
254 | ## Yandex.Cloud – Cloud Functions
255 | 
256 | ![](/docs/images/agile_metrics_cloud.png)
257 | 
258 | ### Create a Managed Clickhouse cluster
259 | 
260 | > How to: https://cloud.yandex.com/en/docs/managed-clickhouse/operations/cluster-create
261 | 
262 | - Set user for exporter, example: `agile`
263 | - Set a database name, example: `agile`
264 | - Enable `Serverless access` flag
265 | - For testing enable host public access
266 | - Enable `Access from the management console` flag
267 | - Run migration or manual create tables (see migration block [here](#migration), see [sql](/migrations/clickhouse/))
268 | 
269 | ### Create Cloud Function
270 | 
271 | > How to: https://cloud.yandex.com/en/docs/functions/quickstart/create-function/python-function-quickstart
272 | 
273 | - Use Python >= 3.10
274 | - Copy/paste example content from `examples/serverless` ([code](/examples/serverless/))
275 | - Set entrypoint: `main.handler` (for code from examples)
276 | - Set function timeout to `600`, because the launch can be long if there are a lot of updated issues during the collection period
277 | - Set memory to `512MB` or more
278 | - Add environment variables (see variables block [here](#configuration-via-environment-variables))
279 | 
280 | ```ini
281 | EXPORTER_TRACKER__TOKEN=XXXXXXXXXXXXXXXX
282 | EXPORTER_TRACKER__CLOUD_ORG_ID=123456
283 | EXPORTER_TRACKER__SEARCH__RANGE=2h
284 | EXPORTER_CLICKHOUSE__ENABLE_UPLOAD="true"
285 | EXPORTER_CLICKHOUSE__PROTO=https
286 | EXPORTER_CLICKHOUSE__CACERT_PATH=/etc/ssl/certs/ca-certificates.crt
287 | EXPORTER_CLICKHOUSE__PORT=8443
288 | EXPORTER_CLICKHOUSE__HOST=rc1b-xxxxxx.mdb.yandexcloud.net
289 | EXPORTER_CLICKHOUSE__USERNAME=agile
290 | EXPORTER_CLICKHOUSE__PASSWORD=xxxx
291 | EXPORTER_CHANGELOG_EXPORT_ENABLED="false"
292 | ```
293 | 
294 | - Release function
295 | - Run test
296 | - See logs
297 | 
298 | ![](/docs/images/logs.png)
299 | 
300 | ##### Serverless database connection without public access
301 | 
302 | If you don't want to enable clickhouse public access, use service account with such permissions - `serverless.mdbProxies.user` and set environment variables below:
303 | 
304 | ```bash
305 | EXPORTER_CLICKHOUSE__HOST=akfd3bhqk3xxxxxxxxxxx.clickhouse-proxy.serverless.yandexcloud.net
306 | EXPORTER_CLICKHOUSE__SERVERLESS_PROXY_ID=akfd3bhqk3xxxxxxxxxxxxx
307 | ```
308 | 
309 | > How to create database connection: https://cloud.yandex.com/en/docs/functions/operations/database-connection
310 | 
311 | Also, the `EXPORTER_CLICKHOUSE__PASSWORD` variable with service account must be replaced by IAM-token. Keep this in mind.
312 | Probably, you should get it in the function code, because the IAM-token works for a limited period of time.
313 | 
314 | ### Create Trigger
315 | 
316 | > How to: https://cloud.yandex.com/en/docs/functions/quickstart/create-trigger/timer-quickstart
317 | 
318 | - Create new trigger
319 | - Choose type `Timer`
320 | - Set interval every hour: `0 * ? * * *`
321 | - Select your function
322 | - Create serverless service account or use an existing one
323 | - Save trigger
324 | 
325 | # Visualization
326 | 
327 | You can use any BI/observability tool for visualization, for example:
328 | 
329 | - Yandex DataLens (btw, this is [opensource](https://github.com/datalens-tech/datalens)). Also see [demo set](https://datalens.yandex.ru/marketplace/f2ejcgrg2h910r7cc93u)
330 | - Apache Superset
331 | - PowerBI
332 | - Grafana
333 | 
334 | ![](/docs/images/datalens_example.png)
335 | 
336 | # Migration
337 | 
338 | Based on [go-migrate](https://github.com/golang-migrate/migrate) tool.
339 | 
340 | ## Download and install go-migrate tool
341 | 
342 | ### macOS
343 | 
344 | ```shell
345 | wget https://github.com/golang-migrate/migrate/releases/download/v4.15.2/migrate.darwin-amd64.tar.gz -O migrate.tar.gz
346 | 
347 | tar xvf migrate.tar.gz
348 | mv migrate ~/bin
349 | ```
350 | 
351 | ### Linux
352 | 
353 | ```shell
354 | wget https://github.com/golang-migrate/migrate/releases/download/v4.15.2/migrate.linux-amd64.tar.gz -O migrate.tar.gz
355 | 
356 | tar -xvf migrate.tar.gz
357 | mv migrate /usr/local/bin
358 | ```
359 | 
360 | ## Run migration
361 | 
362 | Example bash script below.  
363 | See full example script [here](/data-migrate.sh)
364 | 
365 | ```bash
366 | #!/usr/bin/env bash
367 | 
368 | set -Eeuo pipefail
369 | 
370 | CLICKHOUSE_HOST="localhost"
371 | CLICKHOUSE_TCP_PORT=9000
372 | CLICKHOUSE_HTTP_PORT=8123
373 | CLICKHOUSE_USER="default"
374 | CLICKHOUSE_PASSWORD="strongpassword"
375 | 
376 | MIGRATION_SOURCE_PATH="file://${PWD}/../migrations/clickhouse"
377 | MIGRATION_HISTORY_TABLE="ci_gomigrate_migrations"
378 | MIGRATION_DATABASE="agile"
379 | 
380 | MIGRATION_CLICKHOUSE_DSN="clickhouse://${CLICKHOUSE_HOST}:${CLICKHOUSE_TCP_PORT}?username=${CLICKHOUSE_USER}&password=${CLICKHOUSE_PASSWORD}&database=${MIGRATION_DATABASE}&x-multi-statement=true&x-migrations-table=${MIGRATION_HISTORY_TABLE}"
381 | 
382 | prepare_migration() {
383 |     echo "CREATE DATABASE IF NOT EXISTS ${MIGRATION_DATABASE}" | \
384 |         curl "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/?user=${CLICKHOUSE_USER}&password=${CLICKHOUSE_PASSWORD}" --data-binary @-
385 | 
386 | }
387 | 
388 | run_migration() {
389 |     migrate -verbose \
390 |         -source $MIGRATION_SOURCE_PATH \
391 |         -database $MIGRATION_CLICKHOUSE_DSN \
392 |         up
393 | 
394 | }
395 | 
396 | prepare_migration
397 | run_migration
398 | ```
399 | 
400 | # Configuration via environment variables
401 | 
402 | See config declaration [here](/tracker_exporter/config.py)
403 | 
404 | ## General settings
405 | 
406 | | variable                              | description                                                                                                        |
407 | | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------ |
408 | | `EXPORTER_STATEFUL`                   | Enable stateful mode. Required `EXPORTER_STATE__*` params. Default is `False`                                      |
409 | | `EXPORTER_STATEFUL_INITIAL_RANGE`     | Initial search range when unknown last state. Default: `1w`                                                        |
410 | | `EXPORTER_CHANGELOG_EXPORT_ENABLED`   | Enable export all issues changelog to Clickhouse. **Can greatly slow down exports** (x5 - x10). Default is `False` |
411 | | `EXPORTER_LOGLEVEL`                   | ETL log level. Default: `info`                                                                                     |
412 | | `EXPORTER_LOG_ETL_STATS`              | Enable logging transform stats every N iteration. Default is `True`                                                |
413 | | `EXPORTER_LOG_ETL_STATS_EACH_N_ITER`  | How many iterations must pass to log stats. Default is `100`                                                       |
414 | | `EXPORTER_WORKDAYS`                   | Workdays for calculate business time. 0 - mon, 6 - sun. Default: `[0,1,2,3,4]`                                     |
415 | | `EXPORTER_BUSINESS_HOURS_START`       | Business hours start for calculate business time. Default: `09:00:00`                                              |
416 | | `EXPORTER_BUSINESS_HOURS_END`         | Business hours end for calculate business time. Default: `22:00:00`                                                |
417 | | `EXPORTER_DATETIME_RESPONSE_FORMAT`   | Yandex.Tracker datetime format in responses. Default: `%Y-%m-%dT%H:%M:%S.%f%z`                                     |
418 | | `EXPORTER_DATETIME_QUERY_FORMAT`      | Datetime format for search queries. Default: `%Y-%m-%d %H:%M:%S`                                                   |
419 | | `EXPORTER_DATETIME_CLICKHOUSE_FORMAT` | Datetime format for Clickhouse. Default: `%Y-%m-%dT%H:%M:%S.%f`                                                    |
420 | | `EXPORTER_ETL_INTERVAL_MINUTES`       | Interval between run ETL. Default: `30` (minutes)                                                                  |
421 | | `EXPORTER_CLOSED_ISSUE_STATUSES`      | Statuses for mark issue as closed. Default: `closed,rejected,resolved,cancelled,released`                          |
422 | | `EXPORTER_NOT_NULLABLE_FIELDS`        | Fields that should never be null (e.g. dates). Default: all datetime fields                                        |
423 | 
424 | ## Tracker settings
425 | 
426 | | variable                                   | description                                                                                                                    |
427 | | ------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------ |
428 | | `EXPORTER_TRACKER__LOGLEVEL`               | Log level for Yandex.Tracker SDK. Default: `warning`                                                                           |
429 | | `EXPORTER_TRACKER__TOKEN`                  | OAuth2 token. Required if `EXPORTER_TRACKER__IAM_TOKEN` is not passed                                                          |
430 | | `EXPORTER_TRACKER__ORG_ID`                 | Yandex360 organization ID. Required if `EXPORTER_TRACKER__CLOUD_ORG_ID` is not passed                                          |
431 | | `EXPORTER_TRACKER__IAM_TOKEN`              | Yandex.Cloud IAM token. Required if `EXPORTER_TRACKER__TOKEN` is not passed                                                    |
432 | | `EXPORTER_TRACKER__CLOUD_ORG_ID`           | Yandex.Cloud organization ID. Required if `EXPORTER_TRACKER__ORG_ID` is not passed                                             |
433 | | `EXPORTER_TRACKER__TIMEOUT`                | Yandex.Tracker HTTP requests timeout. Default: `10` (sec)                                                                      |
434 | | `EXPORTER_TRACKER__MAX_RETRIES`            | Yandex.Tracker HTTP requests max retries. Default: `10`                                                                        |
435 | | `EXPORTER_TRACKER__LANGUAGE`               | Yandex.Tracker language. Default: `en`                                                                                         |
436 | | `EXPORTER_TRACKER__TIMEZONE`               | Yandex.Tracker timezone. Default: `Europe/Moscow`                                                                              |
437 | | `EXPORTER_TRACKER__SEARCH__QUERY`          | Custom query for search issues. This variable has the highest priority and overrides other search parameters. Default is empty |
438 | | `EXPORTER_TRACKER__SEARCH__RANGE`          | Search issues window. Has no effect in stateful mode. Default: `2h`                                                            |
439 | | `EXPORTER_TRACKER__SEARCH__QUEUES`         | Include or exclude queues in search. Example: `DEV,SRE,!TEST,!TRASH` Default is empty (i.e. all queues)                        |
440 | | `EXPORTER_TRACKER__SEARCH__PER_PAGE_LIMIT` | Search results per page. Default: `100`                                                                                        |
441 | 
442 | ## Clickhouse settings
443 | 
444 | | variable                                      | description                                                        |
445 | | --------------------------------------------- | ------------------------------------------------------------------ |
446 | | `EXPORTER_CLICKHOUSE__ENABLE_UPLOAD`          | Enable upload data to Clickhouse. Default is `True`                |
447 | | `EXPORTER_CLICKHOUSE__HOST`                   | Clickhouse host. Default: `localhost`                              |
448 | | `EXPORTER_CLICKHOUSE__PROTO`                  | Clickhouse protocol: http or https. Default: `http`                |
449 | | `EXPORTER_CLICKHOUSE__PORT`                   | Clickhouse HTTP(S) port. Default: `8123`                           |
450 | | `EXPORTER_CLICKHOUSE__CACERT_PATH`            | Path to CA cert. Only for HTTPS proto. Default is empty            |
451 | | `EXPORTER_CLICKHOUSE__SERVERLESS_PROXY_ID`    | Yandex Cloud Functions proxy ID. Default is empty                  |
452 | | `EXPORTER_CLICKHOUSE__USERNAME`               | Clickhouse username. Default: `default`                            |
453 | | `EXPORTER_CLICKHOUSE__PASSWORD`               | Clickhouse password. Can be empty. Default is empty                |
454 | | `EXPORTER_CLICKHOUSE__DATABASE`               | Clickhouse database. Default: `agile`                              |
455 | | `EXPORTER_CLICKHOUSE__ISSUES_TABLE`           | Clickhouse table for issues metadata. Default: `issues`            |
456 | | `EXPORTER_CLICKHOUSE__ISSUE_METRICS_TABLE`    | Clickhouse table for issue metrics. Default: `issue_metrics`       |
457 | | `EXPORTER_CLICKHOUSE__ISSUES_CHANGELOG_TABLE` | Clickhouse table for issues changelog. Default: `issues_changelog` |
458 | | `EXPORTER_CLICKHOUSE__AUTO_DEDUPLICATE`       | Execute `OPTIMIZE` after each `INSERT`. Default is `True`          |
459 | | `EXPORTER_CLICKHOUSE__BACKOFF_BASE_DELAY`     | Base delay for backoff strategy. Default: `0.5` (sec)              |
460 | | `EXPORTER_CLICKHOUSE__BACKOFF_EXPO_FACTOR`    | Exponential factor for multiply every try. Default: `2.5` (sec)    |
461 | | `EXPORTER_CLICKHOUSE__BACKOFF_MAX_TRIES`      | Max tries for backoff strategy. Default: `3`                       |
462 | | `EXPORTER_CLICKHOUSE__BACKOFF_JITTER`         | Enable jitter (randomize delay) for retries. Default: `True`       |
463 | 
464 | ## State settings
465 | 
466 | | variable                                 | description                                                                               |
467 | | ---------------------------------------- | ----------------------------------------------------------------------------------------- |
468 | | `EXPORTER_STATE__STORAGE`                | Storage type for StateKeeper. Can be: `jsonfile`, `redis`, `custom`. Default: `jsonfile`  |
469 | | `EXPORTER_STATE__REDIS_DSN`              | Connection string for Redis state storage when storage type is `redis`. Default is empty. |
470 | | `EXPORTER_STATE__JSONFILE_STRATEGY`      | File store strategy for `jsonfile` storage type. Can be `s3` or `local`. Default: `local` |
471 | | `EXPORTER_STATE__JSONFILE_PATH`          | Path to JSON state file. Default: `./state.json`                                          |
472 | | `EXPORTER_STATE__JSONFILE_S3_BUCKET`     | Bucket for `s3` strategy. Default is empty                                                |
473 | | `EXPORTER_STATE__JSONFILE_S3_REGION`     | Region for `s3` strategy. Default is `us-east-1`                                          |
474 | | `EXPORTER_STATE__JSONFILE_S3_ENDPOINT`   | Endpoint URL for `s3` strategy. Default is empty                                          |
475 | | `EXPORTER_STATE__JSONFILE_S3_ACCESS_KEY` | AWS access key id for `s3` strategy. Default is empty                                     |
476 | | `EXPORTER_STATE__JSONFILE_S3_SECRET_KEY` | AWS secret key for `s3` strategy. Default is empty                                        |
477 | | `EXPORTER_STATE__CUSTOM_STORAGE_PARAMS`  | Settings for custom storage params as `dict`. Default: `{}`                               |
478 | 
479 | ## Observability settings
480 | 
481 | | variable                                   | description                                                                  |
482 | | ------------------------------------------ | ---------------------------------------------------------------------------- |
483 | | `EXPORTER_MONITORING__METRICS_ENABLED`     | Enable send statsd tagged metrics. Default is `False`                        |
484 | | `EXPORTER_MONITORING__METRICS_HOST`        | DogStatsD / statsd host. Default: `localhost`                                |
485 | | `EXPORTER_MONITORING__METRICS_PORT`        | DogStatsD / statsd port. Default: `8125`                                     |
486 | | `EXPORTER_MONITORING__METRICS_BASE_PREFIX` | Prefix for metrics name. Default: `tracker_exporter`                         |
487 | | `EXPORTER_MONITORING__METRICS_BASE_LABELS` | List of tags for metrics. Example: `["project:internal",]`. Default is empty |
488 | | `EXPORTER_MONITORING__SENTRY_ENABLED`      | Enable send exception stacktrace to Sentry. Default is `False`               |
489 | | `EXPORTER_MONITORING__SENTRY_DSN`          | Sentry DSN. Default is empty                                                 |
490 | 
491 | # Monitoring
492 | 
493 | Based on DogStatsD tagged format. VictoriaMetrics compatible.
494 | 
495 | | Metric name                                            | Metric type | Labels          | Description                                                |
496 | | ------------------------------------------------------ | ----------- | --------------- | ---------------------------------------------------------- |
497 | | `tracker_exporter_issue_transform_time_seconds`        | time        | -               | Duration of transform per task (data packing to the model) |
498 | | `tracker_exporter_issues_total_processed_count`        | count       | -               | Total issues processed                                     |
499 | | `tracker_exporter_issues_search_time_seconds`          | time        | -               | Yandex.Tracker search duration time in seconds             |
500 | | `tracker_exporter_issues_without_metrics`              | count       | -               | Issues with empty metrics (no changelog)                   |
501 | | `tracker_exporter_issue_prefetch_seconds`              | time        | -               | Pre-transform data duration in seconds                     |
502 | | `tracker_exporter_comments_fetch_seconds`              | time        | -               | Comments fetch duration in seconds                         |
503 | | `tracker_exporter_etl_duration_seconds`                | time        | -               | ETL full pipeline duration in seconds                      |
504 | | `tracker_exporter_etl_upload_status`                   | gauge       | -               | Last upload status, 1 - success, 2 - fail                  |
505 | | `tracker_exporter_export_and_transform_time_seconds`   | time        | -               | Overall export and transform duration in seconds           |
506 | | `tracker_exporter_upload_to_storage_time_seconds`      | time        | -               | Overall insert duration time in seconds                    |
507 | | `tracker_exporter_last_update_timestamp`               | gauge       | -               | Last data update timestamp                                 |
508 | | `tracker_exporter_clickhouse_insert_time_seconds`      | time        | database, table | Insert per table duration time in seconds                  |
509 | | `tracker_exporter_clickhouse_inserted_rows`            | count       | database, table | Inserted rows per table                                    |
510 | | `tracker_exporter_clickhouse_deduplicate_time_seconds` | time        | database, table | Optimize execute time duration in seconds                  |
511 | 
512 | ### Metrics on dashboard demo
513 | 
514 | ![](/docs/images/etl_metrics.jpeg)
515 | 


--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Project roadmap
 3 | 
 4 | - [x] refactoring code
 5 | - [x] CI
 6 | - [x] export full issue changelog
 7 | - [x] docker image
 8 | - [x] update serverless instruction
 9 | - [x] stateful mode (local json)
10 | - [x] stateful mode (s3 json)
11 | - [x] stateful mode (redis)
12 | - [ ] helm chart
13 | - [ ] replace asserts in code to if/raise
14 | - [ ] pytest
15 | 


--------------------------------------------------------------------------------
/data-migrate.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -Eeuo pipefail
 4 | 
 5 | SYSTEM=$(uname -s)
 6 | ARCH=$(uname -p)
 7 | 
 8 | CLICKHOUSE_HOST=localhost
 9 | CLICKHOUSE_TCP_PORT=9000
10 | CLICKHOUSE_HTTP_PORT=8123
11 | CLICKHOUSE_USER=default
12 | 
13 | GO_MIGRATE_VERSION="v4.16.2"
14 | MIGRATION_SOURCE_PATH="file://${PWD}/migrations/clickhouse"
15 | MIGRATION_HISTORY_TABLE="ci_gomigrate_migrations"
16 | MIGRATION_DATABASE="agile"
17 | 
18 | MIGRATION_CLICKHOUSE_DSN="clickhouse://${CLICKHOUSE_HOST}:${CLICKHOUSE_TCP_PORT}?username=${CLICKHOUSE_USER}&database=${MIGRATION_DATABASE}&x-multi-statement=true&x-migrations-table=${MIGRATION_HISTORY_TABLE}"
19 | 
20 | install_go_migrate() {
21 |     echo "System is ${SYSTEM} (${ARCH})"
22 |     if command -v ./migrate >/dev/null 2>&1; then
23 |         echo "Tool for migration already installed, skipping installation"
24 |     else
25 |         echo "Installing go migrate tool..."
26 |         if [ "${SYSTEM}" = "Darwin" ]; then
27 |             if [ "${ARCH}" = "arm" ]; then
28 |                 wget https://github.com/golang-migrate/migrate/releases/download/${GO_MIGRATE_VERSION}/migrate.darwin-amd64.tar.gz -O migrate.tar.gz
29 |             else
30 |                 wget https://github.com/golang-migrate/migrate/releases/download/${GO_MIGRATE_VERSION}/migrate.darwin-arm64.tar.gz -O migrate.tar.gz
31 |             fi
32 |             tar xvf migrate.tar.gz migrate
33 |         elif [ "${SYSTEM}" = "Linux" ]; then
34 |             wget https://github.com/golang-migrate/migrate/releases/download/${GO_MIGRATE_VERSION}/migrate.linux-amd64.tar.gz -O migrate.tar.gz
35 |             tar -xvf migrate.tar.gz migrate
36 |         fi
37 |         chmod +x migrate
38 |         rm ./migrate.tar.gz
39 |     fi
40 | }
41 | 
42 | 
43 | prepare_database() {
44 |     echo "CREATE DATABASE IF NOT EXISTS ${MIGRATION_DATABASE}" | \
45 |         curl "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/?user=${CLICKHOUSE_USER}" --data-binary @-
46 | 
47 | }
48 | 
49 | 
50 | prepare_migration() {
51 |     echo "CREATE DATABASE IF NOT EXISTS ${MIGRATION_HISTORY_TABLE}" | \
52 |         curl "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/?user=${CLICKHOUSE_USER}" --data-binary @-
53 | 
54 | }
55 | 
56 | 
57 | run_migration() {
58 |     ./migrate -verbose \
59 |         -source $MIGRATION_SOURCE_PATH \
60 |         -database $MIGRATION_CLICKHOUSE_DSN \
61 |         up
62 | 
63 | }
64 | 
65 | recreate_views() {
66 |     echo "DROP VIEW IF EXISTS ${MIGRATION_DATABASE}.issues_view" | \
67 |         curl "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/?user=${CLICKHOUSE_USER}" --data-binary @-
68 |     echo "DROP VIEW IF EXISTS ${MIGRATION_DATABASE}.issue_metrics_view" | \
69 |         curl "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/?user=${CLICKHOUSE_USER}" --data-binary @-
70 |     echo "DROP VIEW IF EXISTS ${MIGRATION_DATABASE}.issues_changelog_view" | \
71 |         curl "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/?user=${CLICKHOUSE_USER}" --data-binary @-
72 | 
73 | 
74 |     echo "CREATE VIEW IF NOT EXISTS ${MIGRATION_DATABASE}.issues_view AS SELECT * FROM ${MIGRATION_DATABASE}.issues FINAL" | \
75 |         curl "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/?user=${CLICKHOUSE_USER}" --data-binary @-
76 |     echo "CREATE VIEW IF NOT EXISTS ${MIGRATION_DATABASE}.issue_metrics_view AS SELECT * FROM ${MIGRATION_DATABASE}.issue_metrics FINAL" | \
77 |         curl "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/?user=${CLICKHOUSE_USER}" --data-binary @-
78 |     echo "CREATE VIEW IF NOT EXISTS ${MIGRATION_DATABASE}.issues_changelog_view AS SELECT * FROM ${MIGRATION_DATABASE}.issues_changelog FINAL" | \
79 |         curl "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/?user=${CLICKHOUSE_USER}" --data-binary @-
80 | 
81 | }
82 | 
83 | 
84 | install_go_migrate
85 | prepare_database
86 | prepare_migration
87 | run_migration
88 | recreate_views


--------------------------------------------------------------------------------
/docker-compose.dev.yml:
--------------------------------------------------------------------------------
 1 | version: '3.1'
 2 | 
 3 | services:
 4 |   tracker-exporter:
 5 |     container_name: tracker-exporter
 6 |     hostname: tracker-exporter
 7 |     restart: unless-stopped
 8 |     build: .
 9 |     volumes:
10 |       - ./.env:/opt/exporter/.env:ro
11 |     command: |
12 |       tracker-exporter --env-file /opt/exporter/.env
13 | 
14 |   clickhouse:
15 |     image: clickhouse/clickhouse-server:23.3
16 |     container_name: clickhouse
17 |     hostname: clickhouse
18 |     restart: unless-stopped
19 |     volumes:
20 |       - "./clickhouse:/var/lib/clickhouse"
21 |     ports:
22 |       - "9000:9000"
23 |       - "8123:8123"
24 | 
25 |   clickhouse-migrator:
26 |     image: busybox
27 |     container_name: migrator
28 |     restart: no
29 |     volumes:
30 |       - ./data-migrate.sh:/opt/data-migrate.sh
31 |     command: |
32 |       /opt/data-migrate.sh
33 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.1'
 2 | 
 3 | services:
 4 |   clickhouse:
 5 |     image: clickhouse/clickhouse-server:23.8
 6 |     container_name: clickhouse
 7 |     hostname: clickhouse
 8 |     restart: unless-stopped
 9 |     volumes:
10 |       - "./clickhouse:/var/lib/clickhouse"
11 |     ports:
12 |       - "9000:9000"
13 |       - "8123:8123"
14 | 
15 |   tracker-exporter:
16 |     build: .
17 |     container_name: tracker-exporter
18 |     hostname: tracker-exporter
19 |     restart: unless-stopped
20 |     environment:
21 |       EXPORTER_CHANGELOG_EXPORT_ENABLED: "false"
22 |       EXPORTER_TRACKER__TOKEN: ${TRACKER_TOKEN}
23 |       EXPORTER_TRACKER__CLOUD_ORG_ID: ${TRACKER_ORG_ID}
24 |       EXPORTER_CLICKHOUSE__HOST: clickhouse
25 |       EXPORTER_CLICKHOUSE__PORT: 8123
26 |       EXPORTER_CLICKHOUSE__ENABLE_UPLOAD: "true"
27 |       EXPORTER_STATEFUL: "true"
28 |       EXPORTER_STATEFUL_INITIAL_RANGE: "1d"
29 |       EXPORTER_STATE__STORAGE: jsonfile
30 |       EXPORTER_STATE__JSONFILE_STRATEGY: local
31 |       EXPORTER_STATE__JSONFILE_PATH: /opt/exporter/state.json
32 |     volumes:
33 |       - "./exporter:/opt/exporter:rw"
34 |     depends_on:
35 |       - clickhouse
36 | 


--------------------------------------------------------------------------------
/docs/diagrams/agile_metrics.drawio:
--------------------------------------------------------------------------------
1 | <mxfile host="app.diagrams.net" modified="2022-06-30T09:41:11.739Z" agent="5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36" etag="3oc6DtgiV53ZzznXJE1x" version="20.0.4" type="device"><diagram id="rqm4xbtDZe9s2JFWsRUW" name="Page-1">7Rtrd6LI8tfk4+7h6ehHIuiQY2OMmAx+uQeBIIjiRQyPX3+ruhvFaGZn7maSyW4ykwNUF931ol7duZL763KYudslSf0guZIEv7yS9StJEhVJusL/gl8xyJcOB4RZ5HOkI2Aa1QEHChy6j/xgd4KYp2mSR9tToJduNoGXn8DcLEuLU7THNDlddeuGwRlg6rnJOfQh8vMlg3ZV4Qj/GkThsllZFPjI2m2QOWC3dP20aIFk40ruZ2mas7t12Q8SFF4jF/be4IXRA2FZsMl/5AV9qlXjtBv3ojDNUtPxgsHDH6LMpnlykz3nmFObV40IsnS/8QOcRbySr4tllAfTrevhaAFKB9gyXyd8+MClAA9h4u52/H63CnJvyR8e003OdS124NnNvOYRh93FLk32eaAdwfhSlCT9NEkzeN6kmwAnzbN0FTTAK0lWpa70+MhXGLjrKEG7uw8y3924AD4XGpfjU5DlQdkCcSEOg3Qd5FkFKAeb5grlFq00Gi+O9iF3OWzZsg2pQXS5TYaHuY9qgxuuuZ/R4l8rEfSyxdtoTQ3+GvmNwMxH7iJIbtNdlEfpBsYXaZ6n6xaClkQhDuQpapq+ru227FOjumoeHqMSjeSar6D7bg4S19ijNNg9hVfSdQmGIvVvv1rSvLpWFg/l3quFyP16J3h6+jSSfdmvVJlU6pO39p5IrBWk36v9tReZX/3t/Otdejs1axKZoTu8386lpdA8++sk8YWbp0AXItLXClMnFf2NrtfuQ7m7nd7sF5KamLGyNuXlclyXhfPtLjWH8+1iWOTe5n43t4Vo/m2eLNa91bxvho5Ubr2+WPkPZQLvJ/76Hua4W8GcK3i2iO2ExCCV1VcUUhulqWshqc3QqpTSqjXJqjSgTSutFcErw4tXtambwigOa2IbFJ/OU9/o9L04VMZT+l7B8aqxTULgCecBmLcntldYsRe28AFuSKZ+7R75NWPLNhWymg/NqAuyv4ksxbF3XW84ENw+wxp9uxEXw1nPXN9L8wf1aT6cRObQ2rnftIsSmYN0bduoR/FKhVUrIjrleBaWQIk6tpMHElsmscl+bK/Uh5VQEgoPgcPlA7HvYMzDMeU+NpSpzucZbC2Yq4S5VITBWD3VE/ewTv8wJjN8pyRCyOdeIp5yimfyOTh8kEZcyz1z46fut7vkVCIjGeyxVppP5+/5B4gBf6onHkIWpTMPoV5wEOqv8g/Sv8w/WM/8g3XZP0j095f6h5D5B7BUUs/wGwZaNLBK8A3svhwbBK8cB/xAjF8IERl9hoLXSR3uie5wmNbgsHt7qcO8zZzsyvyB8mb+ADytWY1iT7R1o+TfYkX6iohejdjm3rI1AXAKGCvpdwqeC3wicGKAN1shp/Cte4gD7xPVMkgJ3lIl9oTiWHVYoocbxZPCQr8zxW9/FVIJwzj6m0ltqKN4pp6M4/x6KLF5PYXgWKRUVjNWr4RJjbSHEmoB3ivH6I1jsrd0pOtAM3jvA0/g54Cn+l4n8YTiwloVp72knh78GX1nRvmQxjrSCRZgmy38o9yor0Rr6CuyNStKRj/6TQdkRyrgDfBmhYVRIEK5eCH6WfiFaIJ+0VDRmkCuMsWplHpsWyD7CaxlCAzHU1oy5D6by5D55bYMj+MHGTZzIM2mOJ621gF5jHVHatFSAS0ClSXwADTIGM3Y+rOaJA7jVU+YDO0Vrl+2ZQiybulYa+iDiOgo3L74uNl6H2WKft8Txvd8jfutxXAmoLOJ2rJRWAf1OaA2SnRNhIjNYVRWwCc81yhrtOGJQKZ0vAB+OeyevRsjHWCnuobrC9yeSqBROMpYQ5sCHpBHg2YGh3G0t1oDGSFtk4Iw+akWegS2NtBN5VdR+QFv1on8ZvuDrmoqA4nHaPq9oc1AfKb2gBlDe5xmKGhvzT2bt+U5bmR3mAjzqfl0GzlxMDS+3DKvC17kFSKmrD7LqL8cIugbRMzVxlGySfc/q951ZC70xKqX6h/yWYAMfKgL+WOa5cs0TDduYhyh18c6CePeEWeUYmikJUwc5HnFSxp3n6entRNIMKu+8ffpg4MPIAv+qJftQb1qqqlWYQQ10GPXCzzvUnW06KqKKhzKIGTpxWKSg3bpPvOC7+QWXFK5m4VB/h2JvmACWZC4efR0SserJ0A/UOX+oxKg1y2QJj1zJdACY1JrvOggAoQZdPUSqScCS0TAVQ2cAtwbLW6s2KEJD7iUFh6hrgnSc5gL71u4MYQqSHDAbbKiCa+JU5MVefa+1sYtmvkegMf5OtktgD+znlVj/WZ4F7Vc1tfrpT8MGc7lpC1eDJPicgpEZQAhR+PlhVnBtbiPaZlCQzmUGyKDGxXQU0FIRzcrw3PJ4FgC+YivYoiYUropvgr4Qpt+X1pu/eEsGtWNLoonT55vbkM01lcpUzryaZHytk734leqvq/TPfpZpz32yk73//GxyrmP/V4f6Jlffy+n2/nAMdR3g+7jRXV2vG6weHwTdUq/lTqVf1kM/RBNhOmlJsLsZ5oI4lkTYfpOTQSI8hMsLkXICGQi0CJYhIImxOISCvUSij8ojkATvIhnhY/BGgCUUoMXkBPFGtACSST2nU5xsPDRZworsDRWAEPxRXSTFm5ExwIVCzfIbIwCC1uRFbNLgsUxFm6cNoU2J2hxa4oNjOgkZHihhE0GKKgUKN54cd7iK3H4mMUbl6gtKN70ELIHvh5mNzZteHB6aIsZCjjCi0vCC/MJLYobXizboeswnFkJMsCGhjDWQQZ0nUZOmHV52J7mcmTZl0WbFFTWlU2L6Fl1oHtVYDF94JcV1axpwIvJmjcuJGqBNsGitbQP/BsFZHGsIYJNijgMx1B0WnZYEFqkAh3w3jMY0z8UzlAEizalm2CbCxsdItcTbwI41JrfOIs67FMesqjzzSBVvpBDyb/IS3fPvPS1662CjX/mrIHD/Nne3Ums41tsF3bdXO6tPZBbkF1w4+vI92k4v7RZeBriX9inQ3Ar6MpyrzcYvNL+nfrlWX9eFcUzpXUuJL7yr0p8e2dKc1yQUfmnnaHysg+tu8GgAz+vpLsvvWe6k7rnH5woKm+ovMYDtLTXTyJvtUz3u+BDa+41vzpZOHWU7/7NNXtyLbXpkEyOgs3uQyvtNT+3jvxMaZJ6oUfwtmq7VIZ0ElTP9kRpnf/u8WTNdRJtgj8a0jRAEf+UqHQaBLgL8WqihqDUBBRz85i5zbRA5rbB+WnLwIKxwxXyvGOg4r93+rDxBwtsmCvKEbeDbyM341PQa+RIHfHUijrnNiQKvXMjktRfZESHSv236DQdmxEvtCZe0OkbdixE3sv5YC2Lhux/Tc/is+//2ff/C2+syKfeWBHfuesvfjn7SIeZ+4hO7iPnYa+ZPCtS51RnnfOi502zsAt+9HfeqvkNAmj3YwbQ83bSZwD9DKCfAfSkKL7Q8n3TAHrePpzut0G2A0fzGUEvR1D1Qp/+V0XQi8ehlHcNoOLVm2yO/+wBs5cPjv1o5HyDQPk9Ij/j5GecPGxn8ximjmI8h73a49li4I2fKSfN2XW2+Y7brvzMOGF4uJVdM5nQK/CxChs8ehYdrwbBc9B03KRrOWyruw9zTSlcxL+bAfrL57TgVu2RFpPTcnOghZ1BpngNLQWjJdyP2RY8wBkP4wMdIT2LzQ4osHPdMAf9251JbYjIAz03PtVUyo+NW8WDBzy/TLf8cWs3cUprFiLNKD/IFczCBj7YNrP/QGI8rz07ymKqqJw+PGNeP+MTz1Ef+aTn9fHIwF3DJzvXz/A4n2bF+KRn/5t1MIep6PXAq1kcZB5rLZmbJZe5/FO5CXr43OVuAHLvY//1LNBdcKUvx77ee570u+gu1TN32d4yE26ztKz+SbnL39KferYLc0l/r5S9wOPxT5LpWOsPu2Xjfw==</diagram></mxfile>


--------------------------------------------------------------------------------
/docs/images/agile_metrics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akimrx/yandex-tracker-exporter/f3e277d4e53eadb99cef855e8eb284c1068b5637/docs/images/agile_metrics.png


--------------------------------------------------------------------------------
/docs/images/agile_metrics_cloud.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akimrx/yandex-tracker-exporter/f3e277d4e53eadb99cef855e8eb284c1068b5637/docs/images/agile_metrics_cloud.png


--------------------------------------------------------------------------------
/docs/images/datalens_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akimrx/yandex-tracker-exporter/f3e277d4e53eadb99cef855e8eb284c1068b5637/docs/images/datalens_example.png


--------------------------------------------------------------------------------
/docs/images/etl_metrics.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akimrx/yandex-tracker-exporter/f3e277d4e53eadb99cef855e8eb284c1068b5637/docs/images/etl_metrics.jpeg


--------------------------------------------------------------------------------
/docs/images/logs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akimrx/yandex-tracker-exporter/f3e277d4e53eadb99cef855e8eb284c1068b5637/docs/images/logs.png


--------------------------------------------------------------------------------
/examples/extended_model/main.py:
--------------------------------------------------------------------------------
 1 | from tracker_exporter.models.issue import TrackerIssue
 2 | from tracker_exporter.utils.helpers import to_snake_case, validate_resource
 3 | from tracker_exporter import configure_sentry, run_etl
 4 | 
 5 | from yandex_tracker_client.collections import Issues
 6 | 
 7 | 
 8 | class CustomIssueFieldsMixin:
 9 |     """
10 |     Additional custom fields for Yandex Tracker issue.
11 |     Must be created in the Clickhouse issue table.
12 |     """
13 | 
14 |     def __init__(self, issue: Issues) -> None:
15 |         self.foo_custom_field = to_snake_case(validate_resource(issue, "fooCustomField"))
16 |         self.bar_custom_field = validate_resource(issue, "barCustomField")
17 |         self.baz = True if "baz" in issue.tags else False
18 | 
19 | 
20 | class ExtendedTrackerIssue(CustomIssueFieldsMixin, TrackerIssue):
21 |     """Extended Yandex Tracker issue model with custom fields."""
22 | 
23 |     def __init__(self, issue: Issues) -> None:
24 |         super().__init__(issue)
25 | 
26 | 
27 | def main() -> None:
28 |     """Entry point."""
29 |     run_etl(ignore_exceptions=False, issue_model=ExtendedTrackerIssue)
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     configure_sentry()
34 |     main()
35 | 


--------------------------------------------------------------------------------
/examples/serverless/main.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from tracker_exporter import run_etl
 3 | 
 4 | logging.getLogger().setLevel(logging.INFO)
 5 | 
 6 | 
 7 | def handler(event, context):
 8 |     try:
 9 |         run_etl(ignore_exceptions=False)
10 |         response = {"statusCode": 200, "message": "success"}
11 |     except Exception as exc:
12 |         response = {"statusCode": 500, "message": exc}
13 |     finally:
14 |         return response
15 | 


--------------------------------------------------------------------------------
/examples/serverless/requirements.txt:
--------------------------------------------------------------------------------
1 | tracker-exporter
2 | 


--------------------------------------------------------------------------------
/migrations/clickhouse/000001_create_table_issues.down.sql:
--------------------------------------------------------------------------------
1 | DROP TABLE IF EXISTS `issues`;
2 | 


--------------------------------------------------------------------------------
/migrations/clickhouse/000001_create_table_issues.up.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE IF NOT EXISTS `issues`
 2 | (
 3 |     `version` DateTime64(3, 'UTC') DEFAULT now() COMMENT 'Row version',
 4 | 
 5 |     `queue` LowCardinality(String) COMMENT 'Queue key',
 6 |     `title` String DEFAULT '' COMMENT 'Issue summary',
 7 |     `issue_key` String COMMENT 'Unique issue key like TEST-1',
 8 |     `issue_type` LowCardinality(String) COMMENT 'Issue type',
 9 |     `priority` LowCardinality(String) COMMENT 'Issue priority',
10 |     `status` LowCardinality(String) COMMENT 'Last issue status',
11 |     `resolution` LowCardinality(String) DEFAULT '' COMMENT 'Issue resolution',
12 | 
13 |     `assignee` String DEFAULT '' COMMENT 'Issue assignee',
14 |     `author` String DEFAULT '' COMMENT 'Issue creator',
15 |     `qa_engineer` String DEFAULT '' COMMENT 'QA engineer who conducted the testing',
16 | 
17 |     `tags` Array(String) COMMENT 'Issue labels',
18 |     `components` Array(String) COMMENT 'Issue components',
19 |     `project` LowCardinality(String) DEFAULT '' COMMENT 'Related project',
20 | 
21 |     `created_at` DateTime64(3, 'UTC') COMMENT 'Issue creation date',
22 |     `updated_at` DateTime64(3, 'UTC') COMMENT 'Date of the last update of the issue',
23 |     `deadline` Date DEFAULT 0 COMMENT 'Deadline for completing the issue',
24 |     `closed_at` DateTime64(3, 'UTC') DEFAULT 0 COMMENT 'Closing date of the issue without resolution, based on custom closing statuses',
25 |     `resolved_at` DateTime64(3, 'UTC') DEFAULT 0 COMMENT 'Closing date of the issue with the resolution',
26 |     `start_date` Date DEFAULT 0 COMMENT 'Start date (fact, manual field, gantt)',
27 |     `end_date` Date DEFAULT 0 COMMENT 'End date (fact, manual field, gantt)',
28 | 
29 |     `is_subtask` UInt8 DEFAULT 0 COMMENT 'Subtask flag',
30 |     `is_closed` UInt8 DEFAULT 0 COMMENT 'Issue completion flag (based on custom closing statuses)',
31 |     `is_resolved` UInt8 DEFAULT 0 COMMENT 'Issue completion flag (with resolution)',
32 | 
33 |     `story_points` Float32 DEFAULT 0.0 COMMENT 'Estimating the cost of the issue',
34 |     `sprints` Array(String) COMMENT 'Sprints in which the issue participated',
35 |     `parent_issue_key` String DEFAULT '' COMMENT 'The key of the parent issue, like TEST-1',
36 |     `epic_issue_key` String DEFAULT '' COMMENT 'Epic key, like GOAL-1',
37 | 
38 |     `aliases` Array(String) COMMENT 'All previous issue keys',
39 |     `was_moved` UInt8 DEFAULT 0 COMMENT 'Has the task been moved from another queue',
40 |     `moved_at` DateTime64(3, 'UTC') DEFAULT 0 COMMENT 'The date the queue was changed if the task was moved',
41 |     `moved_by` String DEFAULT '' COMMENT 'The employee who moved the task'
42 | )
43 | ENGINE = ReplacingMergeTree(version)
44 | PARTITION BY toYYYYMM(updated_at)
45 | ORDER BY issue_key
46 | 


--------------------------------------------------------------------------------
/migrations/clickhouse/000002_create_table_issue_metrics.down.sql:
--------------------------------------------------------------------------------
1 | DROP TABLE IF EXISTS `issue_metrics`;
2 | 


--------------------------------------------------------------------------------
/migrations/clickhouse/000002_create_table_issue_metrics.up.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE IF NOT EXISTS `issue_metrics`
 2 | (
 3 |     `version` DateTime64(3, 'UTC') DEFAULT now(),
 4 |     `last_seen` DateTime64(3, 'UTC') COMMENT 'The date when the issue was last in this status',
 5 | 
 6 |     `issue_key` String COMMENT 'Issue key',
 7 |     `status_name` LowCardinality(String) COMMENT 'Status name',
 8 |     `status_transitions_count` UInt8 COMMENT 'The number of transitions to this status',
 9 | 
10 |     `duration` UInt32 COMMENT 'Time spent in the status in seconds (for all time)',
11 |     `human_readable_duration` String DEFAULT '' COMMENT 'Human - readable format for duration',
12 |     `busdays_duration` UInt32 COMMENT 'Time spent in the status in seconds (busdays only)',
13 |     `human_readable_busdays_duration` String DEFAULT '' COMMENT 'Human - readable format for busdays_duration'
14 | )
15 | ENGINE = ReplacingMergeTree(version)
16 | PARTITION BY toYYYYMM(last_seen)
17 | ORDER BY (issue_key, status_name, last_seen)
18 | 


--------------------------------------------------------------------------------
/migrations/clickhouse/000003_create_table_issues_changelog.down.sql:
--------------------------------------------------------------------------------
1 | DROP TABLE IF EXISTS `issues_changelog`;
2 | 


--------------------------------------------------------------------------------
/migrations/clickhouse/000003_create_table_issues_changelog.up.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE IF NOT EXISTS `issues_changelog`
 2 | (
 3 |     `version` DateTime64(3, 'UTC') DEFAULT now(),
 4 |     `event_time` DateTime64(3, 'UTC') COMMENT 'Changelog event time',
 5 | 
 6 |     `issue_key` String COMMENT 'Issue key',
 7 |     `queue` LowCardinality(String) COMMENT 'Queue',
 8 |     `event_type` LowCardinality(String) COMMENT 'Event type',
 9 |     `transport` LowCardinality(String) COMMENT 'Event source, i.e. api, front, etc',
10 |     `actor` String DEFAULT '' COMMENT 'Event initiator, i.e. employee name, robot name, etc',
11 | 
12 |     `changed_field` String COMMENT 'The field that was changed',
13 |     `changed_from` String DEFAULT '' COMMENT 'Previous field value',
14 |     `changed_to` String COMMENT 'New field value'
15 | )
16 | ENGINE = ReplacingMergeTree(version)
17 | PARTITION BY toYYYYMM(event_time)
18 | ORDER BY (issue_key, event_time, event_type, changed_field)
19 | 


--------------------------------------------------------------------------------
/migrations/clickhouse/000004_create_view_issues_view.down.sql:
--------------------------------------------------------------------------------
1 | DROP VIEW IF EXISTS `issues_view`;
2 | DROP VIEW IF EXISTS `issue_metrics_view`;
3 | DROP VIEW IF EXISTS `issues_changelog_view`;
4 | 


--------------------------------------------------------------------------------
/migrations/clickhouse/000004_create_view_issues_view.up.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW IF NOT EXISTS `issues_view` AS
 2 | SELECT *
 3 | FROM `issues`
 4 | FINAL;
 5 | 
 6 | CREATE VIEW IF NOT EXISTS `issue_metrics_view` AS
 7 | SELECT *
 8 | FROM `issue_metrics`
 9 | FINAL;
10 | 
11 | CREATE VIEW IF NOT EXISTS `issues_changelog_view` AS
12 | SELECT *
13 | FROM `issues_changelog`
14 | FINAL;
15 | 


--------------------------------------------------------------------------------
/migrations/v0.1.x/000001_create_table_issues.down.sql:
--------------------------------------------------------------------------------
1 | DROP TABLE IF EXISTS `issues`;
2 | 


--------------------------------------------------------------------------------
/migrations/v0.1.x/000001_create_table_issues.up.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE IF NOT EXISTS `issues`
 2 | (
 3 |     `version` DateTime DEFAULT now(),
 4 | 
 5 |     `queue` LowCardinality(String) COMMENT 'Queue key',
 6 |     `title` String DEFAULT '' COMMENT 'Issue summary',
 7 |     `issue_key` String COMMENT 'Unique issue key like TEST-1',
 8 |     `issue_type` LowCardinality(String) COMMENT 'Issue type',
 9 |     `priority` LowCardinality(String) COMMENT 'Issue priority',
10 |     `status` LowCardinality(String) COMMENT 'Last issue status',
11 |     `resolution` LowCardinality(String) DEFAULT '' COMMENT 'Issue resolution',
12 | 
13 |     `assignee` String DEFAULT '' COMMENT 'Issue assignee',
14 |     `author` String DEFAULT '' COMMENT 'Issue creator',
15 |     `qa_engineer` String DEFAULT '' COMMENT 'QA engineer who conducted the testing',
16 | 
17 |     `tags` Array(String) COMMENT 'Issue labels',
18 |     `components` Array(String) COMMENT 'Issue components',
19 | 
20 |     `created_at` Date COMMENT 'Issue creation date',
21 |     `updated_at` Date COMMENT 'Date of the last update of the issue',
22 |     `deadline` Date DEFAULT toDate('1970-01-01') COMMENT 'Deadline for completing the issue',
23 |     `closed_at` Date DEFAULT toDate('1970-01-01') COMMENT 'Closing date of the issue without resolution, based on custom closing statuses',
24 |     `resolved_at` Date DEFAULT toDate('1970-01-01') COMMENT 'Closing date of the issue with the resolution',
25 |     `start_date` Date DEFAULT toDate('1970-01-01') COMMENT 'Start date (fact, manual field, gantt)',
26 |     `end_date` Date DEFAULT toDate('1970-01-01') COMMENT 'End date (fact, manual field, gantt)',
27 | 
28 |     `is_subtask` UInt8 DEFAULT 0 COMMENT 'Subtask flag',
29 |     `is_closed` UInt8 DEFAULT 0 COMMENT 'Issue completion flag (based on custom closing statuses)',
30 |     `is_resolved` UInt8 DEFAULT 0 COMMENT 'Issue completion flag (with resolution)',
31 | 
32 |     `story_points` Float32 DEFAULT 0.0 COMMENT 'Estimating the cost of the issue',
33 |     `sprints` Array(String) COMMENT 'Sprints in which the issue participated',
34 |     `parent_issue_key` String DEFAULT '' COMMENT 'The key of the parent issue, like TEST-1',
35 |     `epic_issue_key` String DEFAULT '' COMMENT 'Epic key, like GOAL-1'
36 | )
37 | ENGINE = ReplacingMergeTree(version)
38 | PARTITION BY toYYYYMM(updated_at)
39 | ORDER BY issue_key
40 | 


--------------------------------------------------------------------------------
/migrations/v0.1.x/000002_create_table_issue_metrics.down.sql:
--------------------------------------------------------------------------------
1 | DROP TABLE IF EXISTS `issue_metrics`;
2 | 


--------------------------------------------------------------------------------
/migrations/v0.1.x/000002_create_table_issue_metrics.up.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE IF NOT EXISTS `issue_metrics`
 2 | (
 3 |     `version` DateTime DEFAULT now(),
 4 |     `last_seen` DateTime COMMENT 'The date when the issue was last in this status',
 5 | 
 6 |     `issue_key` String COMMENT 'Issue key',
 7 |     `status_name` LowCardinality(String) COMMENT 'Status name',
 8 |     `status_transitions_count` UInt8 COMMENT 'The number of transitions to this status',
 9 | 
10 |     `duration` UInt32 COMMENT 'Time spent in the status in seconds (for all time)',
11 |     `human_readable_duration` String DEFAULT '' COMMENT 'Human - readable format for duration',
12 |     `busdays_duration` UInt32 COMMENT 'Time spent in the status in seconds (busdays only)',
13 |     `human_readable_busdays_duration` String DEFAULT '' COMMENT 'Human - readable format for busdays_duration'
14 | )
15 | ENGINE = ReplacingMergeTree(version)
16 | PARTITION BY toYYYYMM(last_seen)
17 | ORDER BY (issue_key, status_name, last_seen)
18 | 


--------------------------------------------------------------------------------
/migrations/v0.1.x/000003_create_view_issues_view.down.sql:
--------------------------------------------------------------------------------
1 | DROP VIEW IF EXISTS `issues_view`;


--------------------------------------------------------------------------------
/migrations/v0.1.x/000003_create_view_issues_view.up.sql:
--------------------------------------------------------------------------------
1 | CREATE VIEW IF NOT EXISTS `issues_view` AS
2 | SELECT *
3 | FROM `issues`
4 | FINAL
5 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.black]
 2 | line-length = 119
 3 | target-version = ['py310']
 4 | include = '\.pyi?$'
 5 | 
 6 | 
 7 | [tool.pytest.ini_options]
 8 | pythonpath = [
 9 |   ".", "tracker_exporter",
10 | ]


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | wheel
2 | twine
3 | pytest
4 | pytest-cov
5 | bandit
6 | settings_doc
7 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | yandex_tracker_client==2.*
 2 | boto3==1.34.*
 3 | redis==5.0.*
 4 | datadog==0.47.*
 5 | APScheduler==3.10.*
 6 | requests==2.31.*
 7 | numpy==1.26.0
 8 | pandas==2.1.1
 9 | businesstimedelta==1.0.1
10 | holidays==0.34
11 | sentry-sdk==1.32.*
12 | python-dotenv
13 | pydantic==2.4.*
14 | pydantic-settings==2.0.*
15 | psutil==5.9.*
16 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = tracker-exporter
 3 | description-file = README
 4 | 
 5 | [flake8]
 6 | extend-ignore = E203
 7 | ignore =
 8 |     D203,
 9 |     W503,
10 |     E722,
11 |     W605,
12 |     E402
13 | exclude =
14 |     ansible,
15 |     scripts,
16 |     docs,
17 |     migrations,
18 |     .git,
19 |     .env,
20 |     build,
21 |     dist,
22 |     venv,
23 |     .eggs,
24 |     tests,
25 |     scripts
26 |     setup.py,
27 |     .example,
28 |     .yaml,
29 |     .vscode
30 | max-complexity = 15
31 | max-line-length = 120
32 | 
33 | [pylint.message-control]
34 | disable =
35 |     W0511,
36 |     C0114,
37 |     C0115,
38 |     C0116,
39 |     W1203,
40 |     W0703,
41 |     R0903,
42 |     C0116,
43 |     R0913,
44 |     R0902,
45 |     R1719


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from os import path, environ
 4 | from setuptools import find_packages, setup
 5 | 
 6 | 
 7 | def readme():
 8 |     with open("README.md", "r") as fh:
 9 |         long_description = fh.read()
10 | 
11 |     return long_description
12 | 
13 | cwd = path.abspath(path.dirname(__file__))
14 | 
15 | 
16 | def metadata():
17 |     meta = {}
18 |     with open(path.join(cwd, "tracker_exporter", "_meta.py"), "r") as fh:
19 |         exec(fh.read(), meta)  # nosec
20 |     return meta
21 | 
22 | 
23 | def requirements():
24 |     requirements_list = []
25 | 
26 |     with open("requirements.txt") as requirements:
27 |         for install in requirements:
28 |             requirements_list.append(install.strip())
29 | 
30 |     return requirements_list
31 | 
32 | metadata = metadata()
33 | readme = readme()
34 | packages = find_packages()
35 | requirements = requirements()
36 | 
37 | if environ.get("PYPI_FROM_GITHUB", 0) == 1:
38 |     version = "{{PKG_VERSION}}"
39 | else:
40 |     version =  metadata.get("version")
41 | 
42 | 
43 | def main():
44 |     setup(
45 |         name="tracker-exporter",
46 |         version=version,
47 |         author=metadata.get("author"),
48 |         author_email=metadata.get("author_email"),
49 |         license=metadata.get("license"),
50 |         description=metadata.get("description"),
51 |         long_description=readme,
52 |         long_description_content_type="text/markdown",
53 |         url=metadata.get("url"),
54 |         download_url=metadata.get("download_url"),
55 |         keywords=["yandex tracker exporter", "yandex", "tracker", "etl", "agile", "cycle time"],
56 |         platforms=["osx", "linux"],
57 |         packages=packages,
58 |         classifiers = [
59 |             "Programming Language :: Python :: 3.10",
60 |         ],
61 |         install_requires=requirements,
62 |         include_package_data=True,
63 |         python_requires=">=3.10",
64 |         entry_points={
65 |             "console_scripts": [
66 |                 "tracker-exporter=tracker_exporter.main:main"
67 |             ]
68 |         },
69 |         zip_safe=False
70 |     )
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     main()
75 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | 
 4 | from tracker_exporter.config import Settings
 5 | from tracker_exporter.etl import YandexTrackerETL
 6 | from tracker_exporter.services.clickhouse import ClickhouseClient
 7 | from tracker_exporter.services.tracker import YandexTrackerClient
 8 | 
 9 | # Token & OrgID from Github
10 | os.environ["EXPORTER_TRACKER__SEARCH__QUEUES"] = "OSSPYTEST"
11 | os.environ["EXPORTER_CLICKHOUSE__ENABLE_UPLOAD"] = "false"
12 | 
13 | 
14 | @pytest.fixture(scope="function")
15 | def etl() -> YandexTrackerETL:
16 |     """Returns YandexTrackerETL for tests."""
17 |     return YandexTrackerETL(
18 |         tracker_client=YandexTrackerClient(),
19 |         clickhouse_client=ClickhouseClient(),
20 |     )
21 | 
22 | 
23 | @pytest.fixture(scope="function")
24 | def config() -> Settings:
25 |     return Settings()
26 | 


--------------------------------------------------------------------------------
/tests/test_config.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | class test_valid_config():
 4 |     pass
 5 | 
 6 | 
 7 | 
 8 | class test_invalid_config():
 9 |     pass
10 | 


--------------------------------------------------------------------------------
/tests/test_etl.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | import pytest
 4 | from tracker_exporter.etl import YandexTrackerETL
 5 | 
 6 | NEW_ISSUE = None
 7 | 
 8 | 
 9 | @pytest.mark.skip("Later")
10 | def test_prepare_issue(etl: YandexTrackerETL):
11 |     global NEW_ISSUE
12 |     n = str(time.time()).split(".")[0]
13 |     new_issue = etl.tracker.client.issues.create(queue="OSSPYTEST", summary=f"TEST-{n}")
14 |     NEW_ISSUE = new_issue.key
15 |     # os.environ["EXPORTER_TRACKER__SEARCH_QUERY"] = f"Issue: {new_issue.key}"
16 |     # time.sleep(5)
17 |     # etl.tracker.client.issues[new_issue.key]  # todo status change to In Progress
18 |     # time.sleep(5)
19 | 
20 | 
21 | def test_query_builder(etl: YandexTrackerETL):
22 |     pass
23 | 
24 | 
25 | def test_issue_transform(etl: YandexTrackerETL):
26 |     pass
27 | 
28 | 
29 | def test_export_and_transform(etl: YandexTrackerETL):
30 |     pass
31 | 
32 | 
33 | def test_upload_to_storage(etl: YandexTrackerETL):
34 |     pass
35 | 
36 | 
37 | def test_full_run(etl: YandexTrackerETL):
38 |     pass
39 | 


--------------------------------------------------------------------------------
/tests/test_helpers.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import tracker_exporter.utils.helpers as helpers
  3 | 
  4 | from tracker_exporter.config import Settings
  5 | from datetime import datetime
  6 | from contextlib import nullcontext as does_not_raise
  7 | 
  8 | 
  9 | class StringTestObject:
 10 |     def __init__(self):
 11 |         self.name = "stringTestObject"
 12 | 
 13 | 
 14 | class IntTestObject:
 15 |     def __init__(self):
 16 |         self.name = 1
 17 | 
 18 | 
 19 | @pytest.mark.parametrize(
 20 |     "end_time, start_time, unit, expected, expectation",
 21 |     [
 22 |         (
 23 |             datetime(2023, 1, 1, 10, 1, 0),
 24 |             datetime(2023, 1, 1, 10, 0, 0),
 25 |             helpers.TimeDeltaOut.SECONDS,
 26 |             60,
 27 |             does_not_raise()
 28 |         ),
 29 |         (
 30 |             datetime(2023, 1, 1, 10, 1, 0),
 31 |             datetime(2023, 1, 1, 10, 0, 0),
 32 |             helpers.TimeDeltaOut.MINUTES,
 33 |             1,
 34 |             does_not_raise()
 35 |         ),
 36 |         (
 37 |             "2023-01-01 10:01:00",
 38 |             "2023-01-01 10:00:00",
 39 |             helpers.TimeDeltaOut.MINUTES,
 40 |             1,
 41 |             pytest.raises(AssertionError)
 42 |         ),
 43 |     ]
 44 | )
 45 | def test_get_timedelta(end_time, start_time, unit, expected, expectation):
 46 |     with expectation:
 47 |         assert expected == helpers.get_timedelta(end_time, start_time, unit)
 48 | 
 49 | 
 50 | @pytest.mark.parametrize(
 51 |     "start_date, end_date, busdays_only, expected",
 52 |     [
 53 |         (
 54 |             datetime(2023, 1, 1, 10, 0, 0),
 55 |             datetime(2023, 1, 1, 10, 30, 0),
 56 |             True,
 57 |             0
 58 |         ),
 59 |         (
 60 |             datetime(2023, 1, 1, 10, 0, 0),
 61 |             datetime(2023, 1, 1, 10, 30, 0),
 62 |             False,
 63 |             30 * 60
 64 |         ),
 65 |         (
 66 |             "2023-01-01 10:00:00",
 67 |             "2023-01-01 10:30:00",
 68 |             True,
 69 |             0
 70 |         ),
 71 |         (
 72 |             "2023-01-01 10:00:00",
 73 |             "2023-01-01 10:30:00",
 74 |             False,
 75 |             30 * 60
 76 |         ),
 77 |         (
 78 |             "2023-10-16 10:00:00",
 79 |             "2023-10-16 23:00:00",
 80 |             True,
 81 |             12 * 60 * 60
 82 |         ),
 83 |     ]
 84 | )
 85 | def test_calculate_time_spent(start_date, end_date, busdays_only, expected):
 86 |     assert expected == helpers.calculate_time_spent(start_date, end_date, busdays_only)
 87 | 
 88 | 
 89 | def test_fix_null_dates(config: Settings):
 90 |     data = {"a": "b"}
 91 |     for i in range(0, len(config.not_nullable_fields)):
 92 |         data[config.not_nullable_fields[i]] = None
 93 |         assert data[config.not_nullable_fields[i]] is None
 94 | 
 95 |     cleaned_data = helpers.fix_null_dates(data)
 96 |     assert data == cleaned_data
 97 | 
 98 | 
 99 | @pytest.mark.parametrize(
100 |     "resource, attribute, low, expected",
101 |     [
102 |         (
103 |             StringTestObject(),
104 |             "name",
105 |             True,
106 |             "stringtestobject",
107 |         ),
108 |         (
109 |             StringTestObject(),
110 |             "name",
111 |             False,
112 |             "stringTestObject",
113 |         ),
114 |         (
115 |             StringTestObject(),
116 |             "age",
117 |             False,
118 |             None,
119 |         ),
120 |         (
121 |             IntTestObject(),
122 |             "name",
123 |             True,
124 |             1,
125 |         ),
126 |         (
127 |             IntTestObject(),
128 |             "age",
129 |             False,
130 |             None,
131 |         ),
132 |     ]
133 | )
134 | def test_validate_resource(resource, attribute, low, expected):
135 |     assert expected == helpers.validate_resource(resource, attribute, low)
136 | 
137 | 
138 | @pytest.mark.parametrize(
139 |     "text, expected",
140 |     [
141 |         ("русскаястрока", "русскаястрока"),
142 |         ("РусскийВерблюд", "русский_верблюд"),
143 |         ("русскийВерблюд2", "русский_верблюд_2"),
144 |         ("Русские пробелы", "русские_пробелы"),
145 |         ("русский-кебаб", "русский_кебаб"),
146 |         ("РУССКИЕ_БОЛЬШИЕ", "русские_большие"),
147 |         ("русская_змея", "русская_змея"),
148 |         ("РусскийДлинныйВерблюдПлюсЧисло1", "русский_длинный_верблюд_плюс_число_1"),
149 |         ("singlestring", "singlestring"),
150 |         ("camelCase", "camel_case"),
151 |         ("longCamelCase", "long_camel_case"),
152 |         ("longCamelCaseWithNumber1", "long_camel_case_with_number_1"),
153 |         ("PascalCase", "pascal_case"),
154 |         ("LongPascalCase", "long_pascal_case"),
155 |         ("LongPascalCaseWithNumber1", "long_pascal_case_with_number_1"),
156 |         ("snake_case", "snake_case"),
157 |         ("kebab-case", "kebab_case"),
158 |         ("CONSTANT_CASE", "constant_case"),
159 |         ("camelCase-kebab_snakePascalCaseCONSTANT_case", "camel_case_kebab_snake_pascal_case_constant_case"),
160 |         ("separated string case", "separated_string_case"),
161 |         (None, None),
162 |         (" ", ""),
163 |     ]
164 | )
165 | def test_to_snake_case(text, expected):
166 |     assert expected == helpers.to_snake_case(text)
167 | 
168 | 
169 | @pytest.mark.parametrize(
170 |     "dtime, date_only, timezone, expected",
171 |     [
172 |         (
173 |             "2023-01-01T10:00:00.123+0000",
174 |             True,
175 |             "UTC",
176 |             "2023-01-01"
177 |         ),
178 |         (
179 |             "2023-01-01T10:00:00.123+0300",
180 |             False,
181 |             "UTC",
182 |             "2023-01-01T07:00:00.123"
183 |         ),
184 |         (
185 |             "2023-01-01T10:00:00.123+0000",
186 |             False,
187 |             "Europe/Moscow",
188 |             "2023-01-01T13:00:00.123"
189 |         ),
190 |         (
191 |             None,
192 |             False,
193 |             "UTC",
194 |             None
195 |         ),
196 |     ]
197 | )
198 | def test_convert_datetime(dtime, date_only, timezone, expected):
199 |     assert expected == helpers.convert_datetime(dtime, date_only=date_only, timezone=timezone)
200 | 
201 | 
202 | @pytest.mark.skip("Later")
203 | def test_backoff(exceptions, base_delay, expo_factor, max_tries, jitter):
204 |     pass
205 | 
206 | 
207 | @pytest.mark.parametrize(
208 |     "seconds, verbosity, expected",
209 |     [
210 |         (60, 2, "1m"),
211 |         (300, 2, "5m"),
212 |         (320, 2, "5m 20s"),
213 |         (86700, 2, "1d 5m"),
214 |         (3200400, 3, "1mo 1w 1h")
215 |     ]
216 | )
217 | def test_to_human_time(seconds, verbosity, expected):
218 |     assert expected == helpers.to_human_time(seconds, verbosity)
219 | 
220 | 
221 | @pytest.mark.parametrize(
222 |     "timestr, expected",
223 |     [
224 |         ("1m", 60),
225 |         ("5m", 300),
226 |         ("5m 20s", 320),
227 |         ("1d 5m", 86700),
228 |         ("1mo 1w 1h", 3200400),
229 |     ]
230 | )
231 | def test_from_human_time(timestr, expected):
232 |     assert expected == helpers.from_human_time(timestr)
233 | 
234 | 
235 | @pytest.mark.parametrize(
236 |     "text, expected",
237 |     [
238 |         ("normalized", "normalized"),
239 |         ("emoji😎", "emoji"),
240 |     ]
241 | )
242 | def test_string_normalize(text, expected):
243 |     assert expected == helpers.string_normalize(text)
244 | 
245 | 


--------------------------------------------------------------------------------
/tests/test_state.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | class TestJSONFileStateStorage:
4 |     pass
5 | 
6 | 
7 | class TestRedisStateStorage:
8 |     pass
9 | 


--------------------------------------------------------------------------------
/tracker_exporter/__init__.py:
--------------------------------------------------------------------------------
 1 | from tracker_exporter.main import (
 2 |     run_etl,
 3 |     configure_sentry,
 4 |     configure_state_manager,
 5 | )
 6 | from tracker_exporter.etl import YandexTrackerETL
 7 | from tracker_exporter.services.clickhouse import ClickhouseClient
 8 | from tracker_exporter.services.tracker import YandexTrackerClient
 9 | 
10 | __all__ = [
11 |     "ClickhouseClient",
12 |     "YandexTrackerClient",
13 |     "YandexTrackerETL",
14 |     "run_etl",
15 |     "configure_sentry",
16 |     "configure_state_manager",
17 | ]
18 | 


--------------------------------------------------------------------------------
/tracker_exporter/_meta.py:
--------------------------------------------------------------------------------
1 | version = "2.0.0"
2 | url = "https://github.com/akimrx/yandex-tracker-exporter"
3 | download_url = "https://pypi.org/project/tracker-exporter/"
4 | appname = "yandex_tracker_exporter"
5 | description = "Yandex.Tracker issue metrics exporter"
6 | author = "Akim Faskhutdinov"
7 | author_email = "akimstrong@yandex.ru"
8 | license = "MIT"
9 | 


--------------------------------------------------------------------------------
/tracker_exporter/_typing.py:
--------------------------------------------------------------------------------
1 | from typing import TypeVar, Union, Sequence
2 | 
3 | T = TypeVar("T")
4 | DateTimeISO8601Str = Union[str, int]
5 | DateStr = str
6 | _Sequence = Union[T, Sequence[T]]
7 | 


--------------------------------------------------------------------------------
/tracker_exporter/config.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import logging
  3 | 
  4 | from functools import lru_cache
  5 | from typing import Literal, Optional, Union
  6 | from pydantic import validator, root_validator
  7 | from pydantic_settings import BaseSettings
  8 | 
  9 | from tracker_exporter.models.base import YandexTrackerLanguages, LogLevels
 10 | from tracker_exporter.exceptions import ConfigurationError
 11 | from tracker_exporter.services.monitoring import DogStatsdClient
 12 | 
 13 | YANDEX_TRACKER_API_SEARCH_HARD_LIMIT = 10000
 14 | YANDEX_TRACKER_HARD_LIMIT_ISSUE_URL = "https://github.com/yandex/yandex_tracker_client/issues/13"
 15 | 
 16 | logger = logging.getLogger(__name__)
 17 | 
 18 | 
 19 | class MonitoringSettings(BaseSettings):
 20 |     """Observability settings."""
 21 | 
 22 |     metrics_enabled: Optional[bool] = False
 23 |     metrics_host: Optional[str] = "localhost"
 24 |     metrics_port: Optional[int] = 8125
 25 |     metrics_base_prefix: Optional[str] = "tracker_exporter"
 26 |     metrics_base_labels: Optional[list[str]] = []
 27 |     sentry_enabled: Optional[bool] = False
 28 |     sentry_dsn: Optional[str] = None
 29 | 
 30 |     @validator("sentry_dsn", pre=True, always=True)
 31 |     def validate_sentry_dsn(cls, value: str | None, values: dict) -> str:
 32 |         sentry_enabled = values.get("sentry_enabled")
 33 |         if sentry_enabled and not value:
 34 |             raise ConfigurationError("Sentry DSN must not be empty when Sentry is enabled")
 35 |         return value
 36 | 
 37 |     class Config:
 38 |         extra = "ignore"
 39 | 
 40 | 
 41 | class ClickhouseSettings(BaseSettings):
 42 |     """Settings for Clickhouse storage."""
 43 | 
 44 |     enable_upload: Optional[bool] = True
 45 |     host: Optional[str] = "localhost"
 46 |     proto: Optional[str] = "http"
 47 |     port: Optional[int] = 8123
 48 |     cacert_path: Optional[str] = None
 49 |     serverless_proxy_id: str | None = None
 50 |     username: Optional[str] = "default"
 51 |     password: Optional[str] = None
 52 |     database: Optional[str] = "agile"
 53 |     issues_table: Optional[str] = "issues"
 54 |     issue_metrics_table: Optional[str] = "issue_metrics"
 55 |     issues_changelog_table: Optional[str] = "issues_changelog"
 56 |     auto_deduplicate: Optional[bool] = True
 57 |     backoff_base_delay: Optional[Union[int, float]] = 0.5
 58 |     backoff_expo_factor: Optional[Union[int, float]] = 2.5
 59 |     backoff_max_tries: Optional[int] = 3
 60 |     backoff_jitter: Optional[bool] = True
 61 | 
 62 |     @validator("serverless_proxy_id", pre=True, always=True)
 63 |     def validate_serverless_proxy_id(cls, value: str | None, values: dict) -> str:
 64 |         http = values.get("proto") == "http"
 65 |         if http and value is not None:
 66 |             raise ConfigurationError("Clickhouse proto must be HTTPS when serverless used")
 67 |         return value
 68 | 
 69 |     @validator("cacert_path", pre=True, always=True)
 70 |     def validate_cacert_path(cls, value: str | None, values: dict) -> str:
 71 |         https = values.get("proto") == "https"
 72 |         if https and not value:
 73 |             raise ConfigurationError("CA cert path must not be empty when Clickhouse proto is HTTPS")
 74 |         return value
 75 | 
 76 |     class Config:
 77 |         extra = "ignore"
 78 | 
 79 | 
 80 | class IssuesSearchSettings(BaseSettings):
 81 |     """Settings for search & export."""
 82 | 
 83 |     query: Optional[str] = None
 84 |     range: Optional[str] = "2h"
 85 |     queues: Optional[Union[str, list[str]]] = None
 86 |     per_page_limit: Optional[int] = 100
 87 | 
 88 |     @validator("queues", pre=True, always=True)
 89 |     def validate_queues(cls, value: str) -> list:
 90 |         if value is None:
 91 |             return None
 92 | 
 93 |         if not isinstance(value, (str, list)):
 94 |             raise ConfigurationError("Invalid QUEUES. Example: TEST,TRASH. Received: %s", value)
 95 | 
 96 |         queues = value.split(",") if isinstance(value, str) else value
 97 |         return ", ".join([f"{q.upper()}" for q in queues])
 98 | 
 99 |     class Config:
100 |         extra = "ignore"
101 | 
102 | 
103 | class TrackerSettings(BaseSettings):
104 |     """Settings for Yandex.Tracker client."""
105 | 
106 |     loglevel: Optional[LogLevels] = LogLevels.warning
107 |     token: Optional[str] = None
108 |     org_id: Optional[str] = None
109 |     iam_token: Optional[str] = None
110 |     cloud_org_id: Optional[str] = None
111 |     timeout: Optional[int] = 10
112 |     max_retries: Optional[int] = 10
113 |     language: Optional[YandexTrackerLanguages] = YandexTrackerLanguages.en
114 |     timezone: Optional[str] = "Europe/Moscow"
115 |     search: IssuesSearchSettings = IssuesSearchSettings()
116 | 
117 |     @root_validator(pre=True)
118 |     def validate_tokens_and_orgs(cls, values) -> str:
119 |         token = values.get("token")
120 |         iam_token = values.get("iam_token")
121 |         org_id = values.get("org_id")
122 |         cloud_org_id = values.get("cloud_org_id")
123 | 
124 |         if all((token, iam_token)):
125 |             raise ConfigurationError("Two tokens passed. Please use one of: TOKEN or IAM_TOKEN")
126 |         elif not any((token, iam_token)):
127 |             raise ConfigurationError("Empty tokens. Please use one of: TOKEN or IAM_TOKEN")
128 | 
129 |         if all((cloud_org_id, org_id)):
130 |             raise ConfigurationError("Two orgs id passed. Please use one of: ORG_ID or CLOUD_ORG_ID")
131 |         elif not any((cloud_org_id, org_id)):
132 |             raise ConfigurationError("Empty orgs id. Please use one of: ORG_ID or CLOUD_ORG_ID")
133 | 
134 |         return values
135 | 
136 |     class Config:
137 |         extra = "ignore"
138 | 
139 | 
140 | class StateSettings(BaseSettings):
141 |     """Settings for stateful mode."""
142 | 
143 |     storage: Optional[Literal["redis", "jsonfile", "custom"]] = "jsonfile"
144 |     redis_dsn: Optional[str] = "redis://localhost:6379"
145 |     jsonfile_strategy: Optional[Literal["s3", "local"]] = "local"
146 |     jsonfile_path: Optional[str] = "state.json"
147 |     jsonfile_s3_bucket: Optional[str] = None
148 |     jsonfile_s3_region: Optional[str] = "us-east-1"
149 |     jsonfile_s3_endpoint: Optional[str] = None
150 |     jsonfile_s3_access_key: Optional[str] = None
151 |     jsonfile_s3_secret_key: Optional[str] = None
152 |     custom_storage_params: Optional[dict] = {}
153 | 
154 |     @root_validator(pre=True)
155 |     def validate_state(cls, values) -> str:
156 |         jsonfile_strategy = values.get("jsonfile_strategy")
157 |         jsonfile_s3_bucket = values.get("jsonfile_s3_bucket")
158 |         jsonfile_s3_endpoint = values.get("jsonfile_s3_endpoint")
159 |         jsonfile_s3_access_key = values.get("jsonfile_s3_access_key")
160 |         jsonfile_s3_secret_key = values.get("jsonfile_s3_secret_key")
161 |         s3_is_configured = all(
162 |             (
163 |                 jsonfile_s3_bucket,
164 |                 jsonfile_s3_endpoint,
165 |                 jsonfile_s3_access_key,
166 |                 jsonfile_s3_secret_key,
167 |             )
168 |         )
169 | 
170 |         if jsonfile_strategy == "s3" and not s3_is_configured:
171 |             raise ConfigurationError("S3 must be configured for JSONFileStorage with S3 strategy.")
172 | 
173 |         return values
174 | 
175 |     class Config:
176 |         extra = "ignore"
177 | 
178 | 
179 | class Settings(BaseSettings):
180 |     """Global merged config."""
181 | 
182 |     monitoring: MonitoringSettings = MonitoringSettings()
183 |     clickhouse: ClickhouseSettings = ClickhouseSettings()
184 |     tracker: TrackerSettings = TrackerSettings  # TODO (akimrx): research, called class not see TOKEN's
185 |     state: StateSettings = StateSettings()
186 |     stateful: Optional[bool] = False
187 |     stateful_initial_range: Optional[str] = "1w"
188 |     changelog_export_enabled: Optional[bool] = False
189 |     log_etl_stats: Optional[bool] = True
190 |     log_etl_stats_each_n_iter: Optional[int] = 100
191 | 
192 |     loglevel: Optional[LogLevels] = LogLevels.info
193 |     workdays: Optional[list[int]] = [0, 1, 2, 3, 4]
194 |     business_hours_start: Optional[datetime.time] = datetime.time(9)
195 |     business_hours_end: Optional[datetime.time] = datetime.time(22)
196 |     datetime_response_format: Optional[str] = "%Y-%m-%dT%H:%M:%S.%f%z"
197 |     datetime_query_format: Optional[str] = "%Y-%m-%d %H:%M:%S"
198 |     datetime_clickhouse_format: Optional[str] = "%Y-%m-%dT%H:%M:%S.%f"
199 | 
200 |     etl_interval_minutes: Optional[int] = 30
201 |     closed_issue_statuses: Optional[Union[str, list]] = "closed,rejected,resolved,cancelled,released"
202 |     not_nullable_fields: Optional[Union[tuple, list, str]] = (
203 |         "created_at",
204 |         "resolved_at",
205 |         "closed_at",
206 |         "updated_at",
207 |         "released_at",
208 |         "deadline",
209 |         "start_date",
210 |         "end_date",
211 |         "start_time",
212 |         "end_time",
213 |         "moved_at",
214 |     )
215 | 
216 |     @validator("closed_issue_statuses", pre=True, always=True)
217 |     def validate_closed_issue_statuses(cls, value: str) -> list:
218 |         if not isinstance(value, (str, list)):
219 |             raise ConfigurationError(
220 |                 "Invalid CLOSED_ISSUES_STATUSES. Example: closed,released,cancelled. Received: %s",
221 |                 value,
222 |             )
223 | 
224 |         if isinstance(value, str):
225 |             return value.split(",")
226 |         return value
227 | 
228 |     @validator("not_nullable_fields", pre=True, always=True)
229 |     def validate_not_nullable_fields(cls, value: str) -> list:
230 |         if not isinstance(value, (str, list, tuple)):
231 |             raise ConfigurationError(
232 |                 "Invalid NOT_NULLABLE_FIELDS. Example: created_at,deadline,updated_at. Received: %s",
233 |                 value,
234 |             )
235 | 
236 |         if isinstance(value, str):
237 |             return value.split(",")
238 |         return value
239 | 
240 |     class Config:
241 |         env_prefix = "EXPORTER_"
242 |         case_sensitive = False
243 |         env_nested_delimiter = "__"
244 |         env_file = ".env"
245 |         extra = "ignore"
246 | 
247 | 
248 | @lru_cache
249 | def _get_settings():
250 |     cfg = Settings()
251 |     return cfg
252 | 
253 | 
254 | config = _get_settings()
255 | monitoring = DogStatsdClient(
256 |     host=config.monitoring.metrics_host,
257 |     port=config.monitoring.metrics_port,
258 |     base_labels=config.monitoring.metrics_base_labels,
259 |     metric_name_prefix=config.monitoring.metrics_base_prefix,
260 |     use_ms=True,
261 |     enabled=config.monitoring.metrics_enabled,
262 | )
263 | 


--------------------------------------------------------------------------------
/tracker_exporter/etl.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import logging
  3 | from datetime import datetime, timedelta
  4 | from typing import Tuple, List, Optional
  5 | from yandex_tracker_client.collections import Issues
  6 | from yandex_tracker_client.objects import SeekablePaginatedList
  7 | from yandex_tracker_client.exceptions import Forbidden
  8 | 
  9 | from tracker_exporter.config import config, monitoring
 10 | from tracker_exporter.models.issue import TrackerIssue
 11 | from tracker_exporter.models.base import ClickhousePayload
 12 | from tracker_exporter.state.managers import AbstractStateManager
 13 | from tracker_exporter.services.tracker import YandexTrackerClient
 14 | from tracker_exporter.services.clickhouse import ClickhouseClient
 15 | from tracker_exporter.exceptions import ConfigurationError, UploadError, ExportOrTransformError
 16 | from tracker_exporter.utils.helpers import (
 17 |     fix_null_dates,
 18 |     from_human_time,
 19 |     convert_datetime,
 20 |     log_etl_stats,
 21 | )
 22 | 
 23 | logger = logging.getLogger(__name__)
 24 | 
 25 | 
 26 | class YandexTrackerETL:
 27 |     """Export, transform, load facade."""
 28 | 
 29 |     def __init__(
 30 |         self,
 31 |         *,
 32 |         tracker_client: YandexTrackerClient,
 33 |         clickhouse_client: ClickhouseClient,
 34 |         state_manager: Optional[AbstractStateManager] = None,
 35 |         issue_model: TrackerIssue = TrackerIssue,
 36 |         database: str = config.clickhouse.database,
 37 |         issues_table: str = config.clickhouse.issues_table,
 38 |         metrics_table: str = config.clickhouse.issue_metrics_table,
 39 |         changelogs_table: str = config.clickhouse.issues_changelog_table,
 40 |         upload_to_storage: bool = config.clickhouse.enable_upload,
 41 |         state_key: str = "tracker_etl_default",
 42 |     ) -> None:
 43 |         self.tracker = tracker_client
 44 |         self.clickhouse = clickhouse_client
 45 |         self.state = state_manager
 46 |         self.issue_model = issue_model
 47 |         self.database = database
 48 |         self.issues_table = issues_table
 49 |         self.metrics_table = metrics_table
 50 |         self.changelogs_table = changelogs_table
 51 |         self.upload_to_storage = upload_to_storage
 52 |         self.state_key = state_key
 53 | 
 54 |     def _get_possible_new_state(self, issue: TrackerIssue | ClickhousePayload):
 55 |         try:
 56 |             last_state = issue.updated_at
 57 |         except AttributeError:
 58 |             last_state = issue.issue.updated_at
 59 |         return convert_datetime(
 60 |             last_state,
 61 |             source_dt_format=config.datetime_clickhouse_format,
 62 |             output_format=config.datetime_query_format,
 63 |             timezone=config.tracker.timezone,
 64 |         )
 65 | 
 66 |     def _build_search_query(
 67 |         self,
 68 |         stateful: bool = False,
 69 |         queues: str | None = None,
 70 |         search_query: str | None = None,
 71 |         search_range: str | None = None,
 72 |     ) -> str | dict:
 73 |         """Prepare search query for Yandex.Tracker."""
 74 |         default_order = ["updated"]
 75 |         sort_by_updated_asc = ' "Sort by": Updated ASC'
 76 | 
 77 |         def append_sort_by(query: str, sort_by: str) -> str:
 78 |             return f"{query} {sort_by}" if "ort by" not in query else query
 79 | 
 80 |         def build_stateful_query() -> str:
 81 |             if self.state is None:
 82 |                 raise ConfigurationError("StateKeeper is not configured for stateful ETL mode.")
 83 |             queue_query = f"Queue: {queues} and " if queues else ""
 84 |             if (last_state := self.state.get(self.state_key)) is None:
 85 |                 last_state = (
 86 |                     datetime.now() - timedelta(seconds=from_human_time(config.stateful_initial_range))
 87 |                 ).strftime(config.datetime_query_format)
 88 |             updated_query = f'Updated: >= "{last_state}"'
 89 |             return f"{queue_query} {updated_query} {sort_by_updated_asc}".strip()
 90 | 
 91 |         def build_query_from_filters() -> str:
 92 |             queue_query = f"Queue: {queues}" if queues else ""
 93 |             from_ = datetime.now() - timedelta(seconds=from_human_time(search_range))
 94 |             updated_query = f'Updated: >= "{from_.strftime(config.datetime_query_format)}"' if search_range else ""
 95 |             and_ = " and" if all((queues, search_range)) else ""
 96 |             return f"{queue_query}{and_} {updated_query} {sort_by_updated_asc}".strip()
 97 | 
 98 |         params = {"query": None, "filter": {}, "order": default_order}
 99 |         if search_query:
100 |             logger.info("Search query received, ignoring other filter params")
101 |             params["query"] = append_sort_by(search_query, sort_by_updated_asc)
102 |         elif stateful:
103 |             params["query"] = build_stateful_query()
104 |         elif queues or search_range:
105 |             params["query"] = build_query_from_filters()
106 |         else:
107 |             raise ConfigurationError(
108 |                 "Pass one of param: search_query, queues, search_range. Or run ETL in stateful mode."
109 |             )
110 |         logger.debug(f"Builded search query: {params}")
111 |         return params
112 | 
113 |     @monitoring.send_time_metric("issue_transform_time_seconds")
114 |     def _transform(self, issue: Issues) -> ClickhousePayload:
115 |         """Transform issue to storage-compatible payload format."""
116 |         _issue = self.issue_model(issue)
117 |         changelog = _issue._changelog_events
118 |         metrics = _issue.metrics()
119 | 
120 |         return ClickhousePayload(
121 |             issue=fix_null_dates(_issue.to_dict()),
122 |             changelog=[c.model_dump() for c in changelog] if changelog else [],
123 |             metrics=[m.to_dict() for m in metrics] if metrics else [],
124 |         )
125 | 
126 |     @monitoring.send_time_metric("export_and_transform_time_seconds")
127 |     def _export_and_transform(
128 |         self,
129 |         query: str | None = None,
130 |         filter: dict | list | None = None,
131 |         order: dict | list | None = None,
132 |         limit: int = 100,
133 |     ) -> Tuple[List[dict], List[dict], List[dict], str | None]:
134 |         """Collects and transforms metrics for found tasks."""
135 |         issues = []
136 |         metrics = []
137 |         changelog_events = []
138 |         issues_without_metrics = 0
139 |         possible_new_state = None
140 |         logger.info("Searching, exporting and transform issues...")
141 | 
142 |         found_issues = self.tracker.search_issues(query=query, filter=filter, order=order, limit=limit)
143 |         if len(found_issues) == 0:
144 |             logger.info("Nothing to export. Skipping ETL")
145 |             return issues, changelog_events, metrics, possible_new_state
146 | 
147 |         if isinstance(found_issues, SeekablePaginatedList):
148 |             pagination = True
149 |             logger.info("Paginated list received, possible new state will be calculated later")
150 |         else:
151 |             pagination = False
152 |             possible_new_state = self._get_possible_new_state(self.issue_model(found_issues[-1]))
153 | 
154 |         et_start_time = time.time()
155 |         for i, tracker_issue in enumerate(found_issues):
156 |             if config.log_etl_stats:
157 |                 if i == 0:
158 |                     pass
159 |                 elif i % config.log_etl_stats_each_n_iter == 0:
160 |                     elapsed_time = time.time() - et_start_time
161 |                     log_etl_stats(iteration=i, remaining=len(found_issues), elapsed=elapsed_time)
162 | 
163 |             try:
164 |                 issue, changelog, issue_metrics = self._transform(tracker_issue).model_dump().values()
165 | 
166 |                 if pagination and i == len(found_issues) - 1:
167 |                     logger.info("Trying to get new state from last iteration")
168 |                     possible_new_state = self._get_possible_new_state(self.issue_model(tracker_issue))
169 | 
170 |                 issues.append(issue)
171 |                 changelog_events.extend(changelog)
172 | 
173 |                 if not issue_metrics:
174 |                     logger.debug(f"Ignore {tracker_issue.key} because metrics is empty")
175 |                     issues_without_metrics += 1
176 |                 else:
177 |                     metrics.extend(issue_metrics)
178 | 
179 |                 monitoring.send_count_metric("issues_total_processed_count", 1)
180 |             except Forbidden as forbidden:
181 |                 logger.warning(f"Can't read {tracker_issue.key}, permission denied. Details: {forbidden}")
182 |             except Exception as exc:
183 |                 logger.exception(f"Issue {tracker_issue.key} can't be transformed, details: {exc}")
184 | 
185 |         monitoring.send_gauge_metric("issues_without_metrics", value=issues_without_metrics)
186 |         logger.info(
187 |             f"Total issues: {len(issues)}, total metrics: {len(metrics)}, "
188 |             f"total changelog events: {len(changelog_events)}, "
189 |             f"ignored issues with empty metrics: {issues_without_metrics}"
190 |         )
191 |         return issues, changelog_events, metrics, possible_new_state
192 | 
193 |     @monitoring.send_time_metric("upload_to_storage_time_seconds")
194 |     def _load_to_storage(self, database: str, table: str, payload: list, deduplicate: bool = True) -> dict:
195 |         """Load transformed payload to storage."""
196 |         logger.info(f"Inserting batch ({len(payload)}) to {database}.{table}...")
197 |         self.clickhouse.insert_batch(database, table, payload)
198 |         if deduplicate:
199 |             logger.info(f"Optimizing {database}.{table} for deduplication...")
200 |             self.clickhouse.deduplicate(database, table)
201 | 
202 |     @monitoring.send_time_metric("etl_duration_seconds")
203 |     def run(
204 |         self,
205 |         *,
206 |         stateful: bool = False,
207 |         queues: str | None = None,
208 |         search_query: str | None = None,
209 |         search_range: str | None = None,
210 |         limit: int = 100,
211 |         ignore_exceptions: bool = True,
212 |         auto_deduplicate: bool = True,
213 |     ) -> None:
214 |         """Runs main ETL process."""
215 |         query = self._build_search_query(stateful, queues, search_query, search_range)
216 |         try:
217 |             issues, changelogs, metrics, possible_new_state = self._export_and_transform(**query, limit=limit)
218 |             if stateful and possible_new_state is not None:
219 |                 logger.info(f"Stateful mode enabled, fetching possible new state: {possible_new_state}")
220 |                 last_saved_state = self.state.get(self.state_key)
221 |                 if last_saved_state == possible_new_state and len(issues) <= 1 and len(metrics) <= 1:
222 |                     logger.info("Data already is up-to-date, skipping upload stage")
223 |                     return
224 |         except Exception as exc:
225 |             logger.error(f"An error occured in ETL while exporting and transform: {exc}")
226 |             if not ignore_exceptions:
227 |                 raise ExportOrTransformError(str(exc))
228 | 
229 |         if self.upload_to_storage and (issues or metrics or changelogs):
230 |             try:
231 |                 if issues:
232 |                     self._load_to_storage(self.database, self.issues_table, issues, deduplicate=auto_deduplicate)
233 |                 if metrics:
234 |                     self._load_to_storage(self.database, self.metrics_table, metrics, deduplicate=auto_deduplicate)
235 |                 if changelogs:
236 |                     self._load_to_storage(
237 |                         self.database,
238 |                         self.changelogs_table,
239 |                         changelogs,
240 |                         deduplicate=auto_deduplicate,
241 |                     )
242 |                 success = True
243 |             except Exception as exc:
244 |                 logger.error(f"An exception occured in ETL while uploading: {exc}")
245 |                 success = False
246 |                 if not ignore_exceptions:
247 |                     raise UploadError(str(exc))
248 |             else:
249 |                 if all((stateful, self.state, possible_new_state)):
250 |                     logger.info(f"Saving last ETL timestamp {possible_new_state}")
251 |                     self.state.set(self.state_key, possible_new_state)
252 |                 else:
253 |                     logger.info(
254 |                         "The state snapshot will not be saved. Not all conditions are met "
255 |                         f"{stateful=} {self.state=} {possible_new_state=}"
256 |                     )
257 |                 monitoring.send_gauge_metric("last_update_timestamp", value=int(time.time()))
258 |             finally:
259 |                 monitoring.send_gauge_metric("etl_upload_status", value=1 if success else 2)
260 |         else:
261 |             logger.info("The state snapshot will not be saved because the upload to the storage is disabled.")
262 |             print(issues if issues else "Empty issues")
263 |             print(metrics if metrics else "Empty metrics")
264 |             print(changelogs if changelogs else "Empty changelogs")
265 | 


--------------------------------------------------------------------------------
/tracker_exporter/exceptions.py:
--------------------------------------------------------------------------------
 1 | class TrackerExporterError(Exception):
 2 |     pass
 3 | 
 4 | 
 5 | class ClickhouseError(TrackerExporterError):
 6 |     pass
 7 | 
 8 | 
 9 | class TrackerError(TrackerExporterError):
10 |     pass
11 | 
12 | 
13 | class ExportOrTransformError(TrackerExporterError):
14 |     pass
15 | 
16 | 
17 | class UploadError(TrackerExporterError):
18 |     pass
19 | 
20 | 
21 | class ConfigurationError(Exception):
22 |     pass
23 | 
24 | 
25 | class JsonFileNotFound(Exception):
26 |     pass
27 | 
28 | 
29 | class InvalidJsonFormat(Exception):
30 |     pass
31 | 
32 | 
33 | class SerializerError(Exception):
34 |     pass
35 | 


--------------------------------------------------------------------------------
/tracker_exporter/main.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import os
  4 | import sys
  5 | import signal
  6 | import logging
  7 | import warnings
  8 | import argparse
  9 | 
 10 | from datetime import datetime, timedelta
 11 | from dotenv import load_dotenv, find_dotenv
 12 | 
 13 | import sentry_sdk
 14 | from apscheduler.schedulers.background import BackgroundScheduler
 15 | 
 16 | parser = argparse.ArgumentParser("tracker-exporter")
 17 | parser.add_argument(
 18 |     "-e",
 19 |     "--env-file",
 20 |     metavar="file",
 21 |     dest="env_file",
 22 |     type=str,
 23 |     required=False,
 24 |     help="Path to .env file",
 25 | )
 26 | parser.add_argument("--run-once", dest="run_once", action="store_true", help="Run ETL once.")
 27 | args, _ = parser.parse_known_args()
 28 | warnings.filterwarnings("ignore")
 29 | 
 30 | if args.env_file:
 31 |     load_dotenv(args.env_file)
 32 | else:
 33 |     load_dotenv(find_dotenv())
 34 | 
 35 | # pylint: disable=C0413
 36 | from tracker_exporter.services.monitoring import sentry_events_filter
 37 | from tracker_exporter.state.managers import AbstractStateManager
 38 | from tracker_exporter.state.factory import StateManagerFactory, IObjectStorageProps
 39 | from tracker_exporter.models.issue import TrackerIssue
 40 | from tracker_exporter.etl import YandexTrackerETL
 41 | from tracker_exporter.services.tracker import YandexTrackerClient
 42 | from tracker_exporter.services.clickhouse import ClickhouseClient
 43 | from tracker_exporter._meta import appname, version
 44 | from tracker_exporter.config import config
 45 | 
 46 | logging.basicConfig(
 47 |     level=config.loglevel.upper(),
 48 |     datefmt="%Y-%m-%d %H:%M:%S",
 49 |     format="%(asctime)s.%(msecs)03d [%(levelname)s] [%(name)s.%(funcName)s] %(message)s",
 50 | )
 51 | logging.getLogger("yandex_tracker_client").setLevel(config.tracker.loglevel.upper())
 52 | logger = logging.getLogger(__name__)
 53 | logger.debug(f"Environment: {os.environ.items()}")
 54 | logger.debug(f"Configuration dump: {config.model_dump()}")
 55 | 
 56 | scheduler = BackgroundScheduler()
 57 | 
 58 | 
 59 | def signal_handler(sig, frame) -> None:  # pylint: disable=W0613
 60 |     """Graceful shutdown."""
 61 |     if sig in (
 62 |         signal.SIGINT,
 63 |         signal.SIGTERM,
 64 |     ):
 65 |         logger.warning(f"Received {signal.Signals(sig).name}, graceful shutdown...")
 66 |         scheduler.shutdown()
 67 |         sys.exit(0)
 68 | 
 69 | 
 70 | def configure_sentry() -> None:
 71 |     """Configure Sentry client for send exception stacktraces."""
 72 |     if config.monitoring.sentry_enabled:
 73 |         assert config.monitoring.sentry_dsn is not None
 74 |         sentry_sdk.init(
 75 |             dsn=config.monitoring.sentry_dsn,
 76 |             traces_sample_rate=1.0,
 77 |             release=f"{appname}@{version}",
 78 |             before_send=sentry_events_filter,
 79 |         )
 80 |     logger.info(f"Sentry send traces is {'enabled' if config.monitoring.sentry_enabled else 'disabled'}")
 81 | 
 82 | 
 83 | def configure_state_manager() -> AbstractStateManager | None:
 84 |     """Configure StateKeeper for ETL stateful mode."""
 85 |     if not config.stateful:
 86 |         return
 87 | 
 88 |     match config.state.storage:
 89 |         case "jsonfile":
 90 |             s3_props: IObjectStorageProps = IObjectStorageProps(
 91 |                 bucket_name=config.state.jsonfile_s3_bucket,
 92 |                 access_key_id=config.state.jsonfile_s3_access_key,
 93 |                 secret_key=config.state.jsonfile_s3_secret_key,
 94 |                 endpoint_url=config.state.jsonfile_s3_endpoint,
 95 |                 region=config.state.jsonfile_s3_region,
 96 |             )
 97 |             return StateManagerFactory.create_file_state_manager(
 98 |                 strategy=config.state.jsonfile_strategy, filename=config.state.jsonfile_path, **s3_props
 99 |             )
100 |         case "redis":
101 |             return StateManagerFactory.create_redis_state_manager(config.state.redis_dsn)
102 |         case "custom":
103 |             raise NotImplementedError
104 |         case _:
105 |             raise ValueError
106 | 
107 | 
108 | def run_etl(ignore_exceptions: bool = False, issue_model: TrackerIssue = TrackerIssue) -> None:
109 |     """Start ETL process."""
110 |     etl = YandexTrackerETL(
111 |         tracker_client=YandexTrackerClient(),
112 |         clickhouse_client=ClickhouseClient(),
113 |         state_manager=configure_state_manager(),
114 |         issue_model=issue_model,
115 |     )
116 |     etl.run(
117 |         stateful=config.stateful,
118 |         queues=config.tracker.search.queues,
119 |         search_query=config.tracker.search.query,
120 |         search_range=config.tracker.search.range,
121 |         limit=config.tracker.search.per_page_limit,
122 |         ignore_exceptions=ignore_exceptions,
123 |         auto_deduplicate=config.clickhouse.auto_deduplicate,
124 |     )
125 | 
126 | 
127 | def main() -> None:
128 |     """Entry point for CLI command."""
129 |     configure_sentry()
130 | 
131 |     if args.run_once:
132 |         logger.info("A one-time launch command is received, the scheduler setting will be skipped")
133 |         run_etl()
134 |         sys.exit(0)
135 | 
136 |     signal.signal(signal.SIGINT, signal_handler)
137 |     signal.signal(signal.SIGTERM, signal_handler)
138 |     scheduler.start()
139 |     scheduler.add_job(
140 |         run_etl,
141 |         trigger="interval",
142 |         name="tracker_etl_default",
143 |         minutes=int(config.etl_interval_minutes),
144 |         max_instances=1,
145 |         next_run_time=datetime.now() + timedelta(seconds=5),
146 |     )
147 |     signal.pause()
148 | 
149 | 
150 | if __name__ == "__main__":
151 |     main()
152 | 


--------------------------------------------------------------------------------
/tracker_exporter/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akimrx/yandex-tracker-exporter/f3e277d4e53eadb99cef855e8eb284c1068b5637/tracker_exporter/models/__init__.py


--------------------------------------------------------------------------------
/tracker_exporter/models/base.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from abc import ABCMeta
 3 | from enum import Enum
 4 | from typing import Any
 5 | 
 6 | from pydantic import BaseModel
 7 | 
 8 | 
 9 | class ClickhousePayload(BaseModel):
10 |     issue: dict
11 |     changelog: list
12 |     metrics: list
13 | 
14 | 
15 | class LogLevels(str, Enum):
16 |     debug = "debug"
17 |     info = "info"
18 |     warning = "warning"
19 |     error = "error"
20 |     critical = "critical"
21 | 
22 | 
23 | class TrackerChangelogEvents:
24 |     ISSUE_WORKFLOW = "IssueWorkflow"
25 |     ISSUE_MOVED = "IssueMoved"
26 | 
27 | 
28 | class TrackerWorkflowTypes:
29 |     TRANSITION = "status"
30 |     RESOLVE_ISSUE = "resolution"
31 | 
32 | 
33 | class YandexTrackerLanguages(str, Enum):
34 |     ru = "ru"
35 |     en = "en"
36 | 
37 | 
38 | class TimeDeltaOut:
39 |     SECONDS = "seconds"
40 |     MINUTES = "minutes"
41 | 
42 | 
43 | class ClickhouseProto:
44 |     HTTPS = "https"
45 |     HTTP = "http"
46 | 
47 | 
48 | class Base:
49 |     """Base class for objects."""
50 | 
51 |     __metaclass__ = ABCMeta
52 | 
53 |     def __str__(self) -> str:
54 |         return str(self.to_dict())
55 | 
56 |     def __repr__(self) -> str:
57 |         return str(self)
58 | 
59 |     def __getitem__(self, item):
60 |         return self.__dict__[item]
61 | 
62 |     @classmethod
63 |     def de_json(cls, data) -> dict:
64 |         """Deserialize object."""
65 |         if not data:
66 |             return None
67 | 
68 |         data = data.copy()
69 |         return data
70 | 
71 |     def to_json(self) -> dict:
72 |         """Serialize object to json."""
73 |         return json.dumps(self.to_dict())
74 | 
75 |     def to_dict(self) -> dict:
76 |         """Recursive serialize object."""
77 | 
78 |         def null_cleaner(value: Any):
79 |             if value is None:
80 |                 return ""
81 |             return value
82 | 
83 |         def parse(val):
84 |             if isinstance(val, list):
85 |                 return [parse(it) for it in val]
86 |             if isinstance(val, dict):
87 |                 return {key: null_cleaner(parse(value)) for key, value in val.items() if not key.startswith("_")}
88 |             return val
89 | 
90 |         data = self.__dict__.copy()
91 |         return parse(data)
92 | 


--------------------------------------------------------------------------------
/tracker_exporter/models/issue.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | from pydantic import BaseModel
  4 | from typing import List, Any
  5 | from tracker_exporter._typing import DateTimeISO8601Str, DateStr
  6 | 
  7 | from yandex_tracker_client.collections import Issues, IssueChangelog
  8 | from yandex_tracker_client.exceptions import NotFound
  9 | 
 10 | from tracker_exporter.models.base import Base
 11 | from tracker_exporter.models.base import (
 12 |     TrackerChangelogEvents,
 13 |     TrackerWorkflowTypes,
 14 | )
 15 | from tracker_exporter.utils.helpers import (
 16 |     calculate_time_spent,
 17 |     string_normalize,
 18 |     validate_resource,
 19 |     extract_changelog_field,
 20 |     convert_datetime,
 21 |     to_snake_case,
 22 |     to_human_time,
 23 | )
 24 | from tracker_exporter.config import config
 25 | 
 26 | logger = logging.getLogger(__name__)
 27 | 
 28 | 
 29 | class TrackerIssueChangelog(BaseModel):
 30 |     """This object represents a issue changelog events."""
 31 | 
 32 |     issue_key: str
 33 |     queue: str
 34 |     event_time: DateTimeISO8601Str
 35 |     event_type: str
 36 |     transport: str
 37 |     actor: str
 38 |     changed_field: Any
 39 |     changed_from: Any
 40 |     changed_to: Any
 41 | 
 42 | 
 43 | class TrackerIssueMetric(Base):
 44 |     """This object represents a issue metrics."""
 45 | 
 46 |     def __init__(
 47 |         self,
 48 |         issue_key: str,
 49 |         status_name: str,
 50 |         status_transitions_count: int,
 51 |         duration: int,
 52 |         busdays_duration: int,
 53 |         last_seen: str,
 54 |     ) -> None:
 55 |         self.issue_key = issue_key
 56 |         self.status_name = status_name
 57 |         self.status_transitions_count = status_transitions_count
 58 |         self.duration = duration
 59 |         self.human_readable_duration = to_human_time(self.duration)
 60 |         self.busdays_duration = busdays_duration
 61 |         self.human_readable_busdays_duration = to_human_time(self.busdays_duration)
 62 |         self.last_seen = last_seen
 63 | 
 64 | 
 65 | class TrackerIssue(Base):
 66 |     """This object represents a issue from Yandex.Tracker."""
 67 | 
 68 |     def __init__(self, issue: Issues) -> None:
 69 |         self._changelog_events: List[TrackerIssueChangelog] = []
 70 |         self._issue: Issues = issue
 71 |         self._metrics: dict = {}
 72 |         self._transform(self._issue)
 73 | 
 74 |     def _transform(self, issue: Issues) -> None:
 75 |         """Transformation of a issue into useful data."""
 76 |         logger.debug(f"Transforming issue {issue.key}...")
 77 | 
 78 |         self.queue: str = issue.queue.key
 79 |         self.issue_key: str = issue.key
 80 |         self.title: str = string_normalize(issue.summary)
 81 |         self.issue_type: str = to_snake_case(validate_resource(issue.type, "name"))
 82 |         self.priority: str = validate_resource(issue.priority, "name")
 83 |         self.assignee: str = validate_resource(issue.assignee, "email")
 84 |         self.author: str = validate_resource(issue.createdBy, "email")
 85 |         self.status: str = to_snake_case(validate_resource(issue.status, "name"))
 86 |         self.resolution: str = to_snake_case(validate_resource(issue.resolution, "name"))
 87 |         self.tags: list = issue.tags or []
 88 |         self.components: list = [c.name for c in issue.components if issue.components]
 89 |         self.is_resolved: bool = True if self.resolution is not None else False
 90 |         self.is_closed: bool = True if self.status in config.closed_issue_statuses or self.is_resolved else False
 91 |         self.created_at: DateTimeISO8601Str = convert_datetime(issue.createdAt)
 92 |         self.updated_at: DateTimeISO8601Str = convert_datetime(issue.updatedAt)
 93 |         self.resolved_at: DateTimeISO8601Str = convert_datetime(issue.resolvedAt)
 94 |         self.closed_at: DateTimeISO8601Str = self.resolved_at if self.is_resolved else None
 95 |         self.start_date: DateStr = validate_resource(issue, "start")
 96 |         self.end_date: DateStr = validate_resource(issue, "end")
 97 |         self.deadline: DateStr = validate_resource(issue, "deadline")
 98 |         self.story_points: int = validate_resource(issue, "storyPoints") or 0
 99 |         self.parent_issue_key: str = validate_resource(issue.parent, "key", low=False)
100 |         self.epic_issue_key: str = validate_resource(issue.epic, "key", low=False)
101 |         self.is_subtask: bool = True if any((self.parent_issue_key,)) else False
102 |         self.qa_engineer: str = validate_resource(issue.qaEngineer, "email")
103 |         self.aliases: list = validate_resource(issue, "aliases") or []
104 |         self.was_moved: bool = False
105 |         self.moved_at: DateTimeISO8601Str = None
106 |         self.moved_by: str = None
107 |         self._handle_strange_tracker_artifacts(self._issue)
108 | 
109 |     def _handle_strange_tracker_artifacts(self, issue: Issues):
110 |         """
111 |         Handling strange artifacts in the Yandex.Tracker.
112 |         For some reason, the tracker can't find the project or sprint specified in the issue,
113 |         like yandex_tracker_client.exceptions.NotFound: Sprint does not exist.
114 |         """
115 |         try:
116 |             self.project = validate_resource(issue.project, "name")
117 |         except NotFound as exc:
118 |             logger.warning(f"Can't get info about specified project for issue {self.issue_key}. Details: {exc}")
119 |             self.project = ""
120 |         try:
121 |             self.sprints: list = [s.name for s in issue.sprint if issue.sprint]
122 |         except NotFound as exc:
123 |             logger.warning(f"Can't get info about specified sprint for issue {self.issue_key}. Details: {exc}")
124 |             self.sprints = []
125 | 
126 |     def _convert_and_save_changelog(self, event: IssueChangelog) -> None:
127 |         """Convert issue changelog events to compatible format."""
128 |         metadata = {
129 |             "issue_key": event.issue.key,
130 |             "queue": event.issue.queue.key,
131 |             "event_time": convert_datetime(event.updatedAt),
132 |             "event_type": event.type,
133 |             "transport": event.transport,
134 |             "actor": validate_resource(event.updatedBy, "email") or validate_resource(event.updatedBy, "name") or "",
135 |         }
136 | 
137 |         for change in event.fields:
138 |             try:  # Ah shit, here we go again
139 |                 changed_field = extract_changelog_field(change.get("field"))
140 |                 changed_from = extract_changelog_field(change.get("from"))
141 |                 changed_to = extract_changelog_field(change.get("to"))
142 |             except NotFound as exc:
143 |                 logger.warning(
144 |                     f"Tracker BUG, can't get info about '{changed_field}' in "
145 |                     f"{self.issue_key}, the entity may have been deleted. Details: {exc}"
146 |                 )
147 |                 continue
148 | 
149 |             if changed_field is None or not any((changed_from, changed_to)):
150 |                 logger.debug(f"Skipping bad changelog event for {self.issue_key} ({changed_field}): {change}")
151 |                 continue
152 | 
153 |             self._changelog_events.append(
154 |                 TrackerIssueChangelog(
155 |                     **metadata,
156 |                     changed_field=changed_field,
157 |                     changed_from=changed_from,
158 |                     changed_to=changed_to,
159 |                 )
160 |             )
161 | 
162 |     def _on_changelog_issue_moved(self, event: IssueChangelog) -> None:
163 |         """Actions whe 'issue moved' event triggered."""
164 |         logger.debug(f"Moved issue found: {self.issue_key}")
165 |         self.was_moved = True
166 |         self.moved_by = validate_resource(event.updatedBy, "email")
167 |         self.moved_at = convert_datetime(event.updatedAt)
168 | 
169 |     def _on_changelog_issue_workflow(self, event: IssueChangelog) -> None:
170 |         """Actions whe 'issue wofklow' event triggered."""
171 |         logger.debug(f"Issue workflow fields found: {event.fields}")
172 | 
173 |         if len(event.fields) < 2:
174 |             logger.debug(f"Not interesting event, skipping: {event.fields}")
175 |             return
176 | 
177 |         # Keep only status transition events
178 |         worklow_type = event.fields[0].get("field").id
179 |         if worklow_type != TrackerWorkflowTypes.TRANSITION:
180 |             logger.debug(f"Skipping {event.fields[0].get('field').id} for {self.issue_key}")
181 |             return
182 | 
183 |         # Find datetimes between transition from status A to status B
184 |         status = to_snake_case(event.fields[0].get("from").name.lower())
185 |         event_start_time = event.fields[1].get("from") or self._issue.createdAt  # transition from the initial status
186 |         event_end_time = event.fields[1].get("to")
187 | 
188 |         if event_start_time is None or event_end_time is None:
189 |             logger.warning(
190 |                 f"Found corrupted changelog event with bad datetime range. "
191 |                 f"Perhaps this field is not a status. See details: "
192 |                 f"{self.issue_key}: {event.fields[1]}. All fields: {event.fields}"
193 |             )
194 |             return
195 | 
196 |         # Calculation of the time spent in the status
197 |         start_time = convert_datetime(event_start_time)
198 |         end_time = convert_datetime(event_end_time)
199 |         total_status_time = calculate_time_spent(start_time, end_time)
200 |         # TODO (akimrx): get workhours from queue settings?
201 |         busdays_status_time = calculate_time_spent(start_time, end_time, busdays_only=True)
202 | 
203 |         # Custom logic for calculating the finish date of the issue,
204 |         # because not everyone uses resolutions, sadly
205 |         # Also, resolved tasks will be flagged as is_closed with closed_at the same as resoluition time
206 |         transition_status = to_snake_case(event.fields[0].get("to").name.lower())
207 |         if self.is_resolved and self.resolved_at:
208 |             self.closed_at = self.resolved_at
209 |         elif transition_status in config.closed_issue_statuses and self.status in config.closed_issue_statuses:
210 |             self.closed_at = convert_datetime(event_end_time)
211 | 
212 |         try:
213 |             self._metrics[status]["duration"] += total_status_time
214 |             self._metrics[status]["busdays_duration"] += busdays_status_time
215 |             self._metrics[status]["status_transitions_count"] += 1
216 |         except (KeyError, AttributeError):
217 |             self._metrics[status] = {
218 |                 "issue_key": self.issue_key,
219 |                 "status_name": status,
220 |                 "status_transitions_count": 1,
221 |                 "duration": total_status_time,
222 |                 "busdays_duration": busdays_status_time,
223 |                 "last_seen": convert_datetime(event_end_time),
224 |             }
225 | 
226 |     def metrics(self) -> List[TrackerIssueMetric]:
227 |         """
228 |         All metrics are based on status change events in the task history.
229 | 
230 |         The metric of being in the status is considered
231 |         only after the end of being in the calculated status.
232 | 
233 |         For example, the task has moved from the status "Open"
234 |         to the status "In progress", in this case only the metric
235 |         for "Open" will be considered.
236 |         As soon as the status "In progress" is changed to any other,
237 |         it will be calculated as a metric for "In progress".
238 | 
239 |         In other words, the current status of the task will not be
240 |         calculated.
241 |         """
242 |         for event in self._issue.changelog:
243 |             if config.changelog_export_enabled:
244 |                 self._convert_and_save_changelog(event)
245 |             match event.type:
246 |                 case TrackerChangelogEvents.ISSUE_MOVED:
247 |                     self._on_changelog_issue_moved(event)
248 | 
249 |                 case TrackerChangelogEvents.ISSUE_WORKFLOW:
250 |                     self._on_changelog_issue_workflow(event)
251 | 
252 |                 case _:  # not interesting event
253 |                     pass
254 | 
255 |         logger.debug(f"Metrics for {self.issue_key}: {self._metrics}")
256 |         metrics = [TrackerIssueMetric(**metric) for _, metric in self._metrics.items()]
257 | 
258 |         return metrics
259 | 


--------------------------------------------------------------------------------
/tracker_exporter/services/__init__.py:
--------------------------------------------------------------------------------
 1 | from tracker_exporter.services.clickhouse import ClickhouseClient
 2 | from tracker_exporter.services.monitoring import DogStatsdClient
 3 | from tracker_exporter.services.tracker import YandexTrackerClient
 4 | 
 5 | __all__ = [
 6 |     "ClickhouseClient",
 7 |     "DogStatsdClient",
 8 |     "YandexTrackerClient",
 9 | ]
10 | 


--------------------------------------------------------------------------------
/tracker_exporter/services/clickhouse.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | 
  4 | from typing import List, Dict
  5 | 
  6 | import requests
  7 | from requests import Response, ConnectionError, Timeout
  8 | 
  9 | from tracker_exporter.exceptions import ClickhouseError
 10 | from tracker_exporter.utils.helpers import backoff
 11 | from tracker_exporter.models.base import ClickhouseProto
 12 | from tracker_exporter.config import config, monitoring
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | 
 17 | class ClickhouseClient:
 18 |     """This class provide simple facade interface for Clickhouse."""
 19 | 
 20 |     def __init__(
 21 |         self,
 22 |         host: str = config.clickhouse.host,
 23 |         port: int = config.clickhouse.port,
 24 |         username: str = config.clickhouse.username,
 25 |         password: str = config.clickhouse.password,
 26 |         proto: ClickhouseProto = config.clickhouse.proto,
 27 |         cacert: str = config.clickhouse.cacert_path,
 28 |         serverless_proxy_id: str = config.clickhouse.serverless_proxy_id,
 29 |         params: dict = {},
 30 |         http_timeout: int = 10,
 31 |     ) -> None:
 32 |         self.host = host
 33 |         self.port = port
 34 |         self.username = username
 35 |         self.password = password
 36 |         self.proto = proto
 37 |         self.cacert = cacert
 38 |         self.serverless_proxy_id = serverless_proxy_id
 39 |         self.params = params
 40 |         self.timeout = int(http_timeout)
 41 |         self.headers = {}
 42 | 
 43 |         self._prepare_headers()
 44 |         if self.proto == ClickhouseProto.HTTPS:
 45 |             assert self.cacert is not None
 46 | 
 47 |     def _prepare_headers(self):
 48 |         # fmt: off
 49 |         self.headers = {
 50 |             "Content-Type": "application/json",
 51 |             "X-Clickhouse-User": self.username
 52 |         }  # fmt: on
 53 |         if self.password is not None:
 54 |             self.headers["X-Clickhouse-Key"] = self.password
 55 | 
 56 |     def _prepare_query_params(self):
 57 |         params = self.params.copy()
 58 | 
 59 |         if params.get("user") is not None:
 60 |             logger.warning("Removed 'user' key:value from params, please pass 'user' via arg")
 61 |             del params["user"]
 62 | 
 63 |         if params.get("password") is not None:
 64 |             logger.warning("Removed 'password' key:value from params, please pass 'password' via arg")
 65 |             del params["password"]
 66 | 
 67 |         if self.serverless_proxy_id:
 68 |             self.params["database"] = self.serverless_proxy_id
 69 | 
 70 |         return params
 71 | 
 72 |     @backoff(
 73 |         exceptions=(ConnectionError, Timeout),
 74 |         base_delay=config.clickhouse.backoff_base_delay,
 75 |         expo_factor=config.clickhouse.backoff_expo_factor,
 76 |         max_tries=config.clickhouse.backoff_max_tries,
 77 |         jitter=config.clickhouse.backoff_jitter,
 78 |     )
 79 |     def execute(self, query: str) -> Response | None:
 80 |         url = f"{self.proto}://{self.host}:{self.port}"
 81 |         params = self._prepare_query_params()
 82 | 
 83 |         try:
 84 |             if self.proto == ClickhouseProto.HTTPS:
 85 |                 response = requests.post(
 86 |                     url=url,
 87 |                     headers=self.headers,
 88 |                     params=params,
 89 |                     data=query,
 90 |                     timeout=self.timeout,
 91 |                     verify=self.cacert,
 92 |                 )
 93 |             else:
 94 |                 response = requests.post(
 95 |                     url=url, headers=self.headers, params=params, data=query, timeout=self.timeout
 96 |                 )
 97 |         except (Timeout, ConnectionError):
 98 |             raise
 99 |         except Exception as exc:
100 |             logger.exception(f"Could not execute query in Clickhouse: {exc}")
101 |             raise ClickhouseError(exc) from exc
102 |         else:
103 |             if not response.ok:
104 |                 msg = f"Could not execute query in Clickhouse. Status: {response.status_code}. {response.text}"
105 |                 logger.error(msg)
106 |                 raise ClickhouseError(msg)
107 |         return response
108 | 
109 |     # TODO (akimrx): add sort by partition key (i.e. `updated_at`)? for best insert perfomance
110 |     def insert_batch(self, database: str, table: str, payload: List[Dict]) -> Response | None:
111 |         if not isinstance(payload, list):
112 |             raise ClickhouseError("Payload must be list")
113 | 
114 |         tags = [f"database:{database}", f"table:{table}"]
115 |         batch_size = len(payload)
116 |         data = " ".join([json.dumps(row) for row in payload])
117 |         logger.debug(f"Inserting batch ({batch_size}): {data}")
118 | 
119 |         with monitoring.send_time_metric("clickhouse_insert_time_seconds", tags):
120 |             query_result = self.execute(f"INSERT INTO {database}.{table} FORMAT JSONEachRow {data}")
121 | 
122 |         monitoring.send_gauge_metric("clickhouse_inserted_rows", batch_size, tags)
123 |         return query_result
124 | 
125 |     def deduplicate(self, database: str, table: str) -> None:
126 |         tags = [f"database:{database}", f"table:{table}"]
127 |         with monitoring.send_time_metric("clickhouse_deduplicate_time_seconds", tags):
128 |             self.execute(f"OPTIMIZE TABLE {database}.{table} FINAL")
129 | 


--------------------------------------------------------------------------------
/tracker_exporter/services/monitoring.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=W0102
 2 | import logging
 3 | 
 4 | from contextlib import contextmanager
 5 | from typing import Callable, ContextManager
 6 | from functools import wraps
 7 | from datadog import DogStatsd
 8 | 
 9 | from yandex_tracker_client.exceptions import (
10 |     TrackerError,
11 |     TrackerServerError,
12 |     TrackerRequestError,
13 |     TrackerClientError,
14 | )
15 | 
16 | logger = logging.getLogger(__name__)
17 | 
18 | 
19 | class DogStatsdClient:
20 |     """This class represents interface for DataDog statsd UDP client."""
21 | 
22 |     def __init__(
23 |         self,
24 |         host: str,
25 |         port: int,
26 |         base_labels: list = [],  # pylint: disable=W0102
27 |         metric_name_prefix: str = "tracker_exporter",
28 |         use_ms: bool = True,
29 |         enabled: bool = True,
30 |     ) -> None:
31 |         self.host = host
32 |         self.port = port
33 |         self.base_labels = base_labels
34 |         self.prefix = metric_name_prefix
35 |         self._enabled = enabled
36 |         self._use_ms = use_ms
37 | 
38 |         if self._enabled:
39 |             assert self.host is not None
40 |             assert self.port is not None
41 | 
42 |         self.client = DogStatsd(host=self.host, port=self.port, use_ms=self._use_ms, constant_tags=self.base_labels)
43 | 
44 |     def send_count_metric(self, name: str, value: int, tags: list = []) -> Callable:
45 |         metric = f"{self.prefix}_{name}"
46 | 
47 |         def metric_wrapper(func):
48 |             @wraps(func)
49 |             def wrapper(*args, **kwargs):
50 |                 if not self._enabled:
51 |                     return func(*args, **kwargs)
52 | 
53 |                 self.client.increment(metric, value, tags=tags)
54 |                 logger.debug(f"Success sent count metric: {metric}")
55 |                 return func(*args, **kwargs)
56 | 
57 |             return wrapper
58 | 
59 |         return metric_wrapper
60 | 
61 |     def send_gauge_metric(self, name: str, value: int, tags: list = []) -> None:
62 |         if not self._enabled:
63 |             return
64 | 
65 |         metric = f"{self.prefix}_{name}"
66 |         self.client.gauge(metric, value, tags=tags)
67 |         logger.debug(f"Success sent gauge metric: {metric}")
68 | 
69 |     @contextmanager
70 |     def _dummy_send_time_metric(self):
71 |         yield
72 | 
73 |     def send_time_metric(self, name: str, tags: list = [], **kwargs) -> Callable | ContextManager:
74 |         metric = f"{self.prefix}_{name}"
75 |         if self._enabled:
76 |             return self.client.timed(metric, tags=tags, **kwargs)
77 |         return self._dummy_send_time_metric()
78 | 
79 | 
80 | def sentry_events_filter(event, hint):  # pylint: disable=R1710
81 |     #  Drop all events without exception trace
82 |     if "exc_info" not in hint:
83 |         return
84 | 
85 |     exception = hint["exc_info"][1]
86 |     if isinstance(exception, (TrackerError, TrackerClientError, TrackerRequestError, TrackerServerError)):
87 |         event["fingerprint"] = ["tracker-error"]
88 | 
89 |     return event
90 | 


--------------------------------------------------------------------------------
/tracker_exporter/services/tracker.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from typing import List
 4 | from yandex_tracker_client import TrackerClient
 5 | from yandex_tracker_client.collections import Issues, IssueComments
 6 | 
 7 | from tracker_exporter.models.base import YandexTrackerLanguages
 8 | from tracker_exporter.config import (
 9 |     config,
10 |     monitoring,
11 |     YANDEX_TRACKER_API_SEARCH_HARD_LIMIT,
12 |     YANDEX_TRACKER_HARD_LIMIT_ISSUE_URL,
13 | )
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | class YandexTrackerClient:
19 |     """This class provide simple wrapper over default Yandex.Tracker client."""
20 | 
21 |     def __init__(
22 |         self,
23 |         *,
24 |         token: str | None = config.tracker.token,
25 |         iam_token: str | None = config.tracker.iam_token,
26 |         org_id: str | None = config.tracker.org_id,
27 |         cloud_org_id: str | None = config.tracker.cloud_org_id,
28 |         timeout: int = config.tracker.timeout,
29 |         retries: int = config.tracker.max_retries,
30 |         lang: YandexTrackerLanguages = config.tracker.language,
31 |     ) -> None:
32 |         self.client = TrackerClient(
33 |             token=token,
34 |             iam_token=iam_token,
35 |             org_id=org_id,
36 |             cloud_org_id=cloud_org_id,
37 |             timeout=timeout,
38 |             retries=retries,
39 |             headers={"Accept-Language": lang},
40 |         )
41 | 
42 |     @monitoring.send_time_metric("issue_prefetch_seconds")
43 |     def get_issue(self, issue_key: str) -> Issues:
44 |         return self.client.issues[issue_key]
45 | 
46 |     @monitoring.send_time_metric("comments_fetch_seconds")
47 |     def get_comments(self, issue_key: str) -> IssueComments:
48 |         return self.client.issues[issue_key].comments.get_all()
49 | 
50 |     @monitoring.send_time_metric("issues_search_time_seconds")
51 |     def search_issues(
52 |         self,
53 |         query: str | None = None,
54 |         filter: dict | list | None = None,
55 |         order: dict | list | None = None,
56 |         limit: int = 100,
57 |     ) -> List[Issues]:
58 |         # https://github.com/yandex/yandex_tracker_client/issues/13
59 |         issues_count = self.client.issues.find(query=query, filter=filter, order=order, count_only=True)
60 |         if issues_count > YANDEX_TRACKER_API_SEARCH_HARD_LIMIT:
61 |             logger.warning(
62 |                 f"The number of tasks found ({issues_count}) exceeds the hard limit "
63 |                 f"({YANDEX_TRACKER_API_SEARCH_HARD_LIMIT}) of the Yandex.Tracker API. "
64 |                 f"Issue on Github - {YANDEX_TRACKER_HARD_LIMIT_ISSUE_URL}"
65 |             )
66 |         logger.info(f"Found {issues_count} issues by query: {query} | filter: {filter} | order: {order}'")
67 |         return self.client.issues.find(query=query, filter=filter, order=order, per_page=limit)
68 | 


--------------------------------------------------------------------------------
/tracker_exporter/state/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akimrx/yandex-tracker-exporter/f3e277d4e53eadb99cef855e8eb284c1068b5637/tracker_exporter/state/__init__.py


--------------------------------------------------------------------------------
/tracker_exporter/state/backends.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | 
  4 | from abc import ABC, abstractmethod
  5 | from typing import Any, ContextManager
  6 | 
  7 | import boto3
  8 | 
  9 | from tracker_exporter.state.serializers import AbstractSerializer, JsonSerializer
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | 
 14 | class AbstractFileStorageBackend(ABC):
 15 |     """
 16 |     An abstract base class for file storage systems, enforcing a common interface for file operations.
 17 | 
 18 |     :param serializer: The serializer instance used for serializing and deserializing data.
 19 |     :param raise_if_not_exists: Raise :exc:`FileNotFound` if file not exists. Defaults to True.
 20 |     :param auto_sub_ext_by_serializer: Automatically substitute the file extension based on the serializer. Defaults is ``False``.
 21 | 
 22 |     """
 23 | 
 24 |     def __init__(
 25 |         self,
 26 |         serializer: AbstractSerializer,
 27 |         raise_if_not_exists: bool = True,
 28 |         auto_sub_ext_by_serializer: bool = False,
 29 |     ) -> None:
 30 |         self.serializer = serializer if hasattr(serializer, "is_initialized") else serializer()
 31 |         self.raise_if_not_exists = raise_if_not_exists
 32 |         self.auto_sub_ext_by_serializer = auto_sub_ext_by_serializer
 33 | 
 34 |     def path_with_ext(self, path: str) -> str:
 35 |         """Appends the file extension from the serializer if not present in the path."""
 36 |         if not path.endswith(f".{self.serializer.ext}"):
 37 |             return f"{path}.{self.serializer.ext}"
 38 |         return path
 39 | 
 40 |     @abstractmethod
 41 |     def read(self, path: str, deserialize: bool = False) -> Any:
 42 |         """Abstract method for reading data from a given file path."""
 43 | 
 44 |     @abstractmethod
 45 |     def write(self, path: str, data: Any) -> None:
 46 |         """Abstract method for writing data to a given file path."""
 47 | 
 48 | 
 49 | class AbstractKeyValueStorageBackend(ABC):
 50 |     """An abstract base class for key value storage backends like Redis, Consul, etc."""
 51 | 
 52 |     @abstractmethod
 53 |     def client(self, *args, **kwargs) -> ContextManager:
 54 |         """An abstract method that returns client context manager."""
 55 | 
 56 |     @abstractmethod
 57 |     def get(self, key: str | list, *args, **kwargs) -> Any:
 58 |         """An abstract method for get value(s) by key from storage."""
 59 | 
 60 |     @abstractmethod
 61 |     def set(self, key: str, value: Any, *args, **kwargs) -> None:
 62 |         """An abstract method for save key:value pair to storage."""
 63 | 
 64 |     @abstractmethod
 65 |     def delete(self, key: str | list, *args, **kwargs) -> None:
 66 |         """An abstract method for deletes key(s) from storage."""
 67 | 
 68 | 
 69 | class LocalFileStorageBackend(AbstractFileStorageBackend):
 70 |     """
 71 |     A concrete synchronous implementation of AbstractFileStorage for local file storage operations.
 72 |     Overrides the read and write asynchronous methods for file operations using the aiofiles package.
 73 | 
 74 |     :param serializer: The serializer instance used for serializing and deserializing data.
 75 |     :param raise_if_not_exists: Raise :exc:`FileNotFound` if file not exists. Defaults to True.
 76 |     :param auto_sub_ext_by_serializer: Automatically substitute the file extension based on the serializer. Defaults is ``False``.
 77 | 
 78 |     Default serializer: :class:`JsonSerializer`
 79 | 
 80 |     Usage::
 81 | 
 82 |         storage = LocalFileStorage()
 83 | 
 84 |         storage.write("myfile.json", data={"foo": "bar"})
 85 |         r = storage.read("myfile.json", deserialize=True)
 86 | 
 87 |         print(r)  # {"foo": "bar"}
 88 | 
 89 |     """
 90 | 
 91 |     def __init__(
 92 |         self,
 93 |         serializer: AbstractSerializer | None = None,
 94 |         raise_if_not_exists: bool = True,
 95 |         auto_sub_ext_by_serializer: bool = False,
 96 |     ) -> None:
 97 |         super().__init__(
 98 |             serializer or JsonSerializer,
 99 |             raise_if_not_exists=raise_if_not_exists,
100 |             auto_sub_ext_by_serializer=auto_sub_ext_by_serializer,
101 |         )
102 | 
103 |     def read(self, path: str, deserialize: bool = False) -> Any:
104 |         """
105 |         Reads data from a local file, deserializes it using the provided serializer,
106 |         and returns the deserialized data.
107 | 
108 |         :param path: A local file path for read content from.
109 |         :param deserialize: Deserialize readed file content via serializer.
110 | 
111 |         """
112 |         if self.auto_sub_ext_by_serializer:
113 |             path = self.path_with_ext(path)
114 | 
115 |         if not os.path.isfile(path) and not os.path.exists(path):
116 |             if self.raise_if_not_exists:
117 |                 raise FileNotFoundError(f"File with name {path} not found")
118 |             logger.debug(f"File with name '{path}' not found")
119 |             return {}
120 | 
121 |         with open(path, "r") as file:
122 |             data = file.read()
123 | 
124 |         if deserialize:
125 |             return self.serializer.deserialize(data)
126 |         return data
127 | 
128 |     def write(self, path: str, data: Any) -> None:
129 |         """
130 |         Serializes the given data using the provided serializer and writes it to a local file.
131 | 
132 |         :param path: An local path for write content to.
133 |         :param data: Content that will be written to file.
134 | 
135 |         """
136 | 
137 |         if self.auto_sub_ext_by_serializer:
138 |             path = self.path_with_ext(path)
139 | 
140 |         with open(path, "w") as file:
141 |             file.write(self.serializer.serialize(data))
142 | 
143 | 
144 | class S3FileStorageBackend(AbstractFileStorageBackend):
145 |     """
146 |     A concrete synchronous implementation of AbstractFileStorage for S3 object storage operations.
147 |     Initializes an aioboto3 session and provides read and write operations for files stored in an S3 bucket.
148 | 
149 |     Default serializer: :class:`JsonSerializer`
150 | 
151 |     :param bucket_name: The name of the S3 bucket.
152 |     :param access_key_id: Service account ID, if empty using ``AWS_ACCESS_KEY_ID`` environment variable.
153 |     :param secret_key: Secret key for service account, if empty using ``AWS_SECRET_ACCESS_KEY`` environment variable.
154 |     :param endpoint_url: S3 endpoint for use with Yandex.Cloud, Minio and other providers.
155 |     :param region: S3 region. Default: ``us-east1``
156 |     :param serializer: The serializer instance used for serializing and deserializing data.
157 |     :param raise_if_not_exists: Raise FileNotFound if file not exists. Defaults to ``True``.
158 |     :param auto_sub_ext_by_serializer: Automatically substitute the file extension based on the serializer. Defaults is ``False``.
159 | 
160 |     Usage::
161 | 
162 |         storage = S3FileStorage(
163 |             bucket_name="my-bucket",
164 |             access_key_id="XXXX",
165 |             secret_key="XXXX",
166 |             endpoint_url="https://storage.yandexcloud.net",
167 |             region="ru-central1"
168 |         )
169 | 
170 |         storage.write("myfile.json", data={"foo": "bar"})
171 |         r = storage.read("myfile.json", deserialize=True)
172 | 
173 |         print(r)  # {"foo": "bar"}
174 | 
175 |     """
176 | 
177 |     def __init__(
178 |         self,
179 |         bucket_name: str,
180 |         serializer: AbstractSerializer | None = None,
181 |         raise_if_not_exists: bool = True,
182 |         auto_sub_ext_by_serializer: bool = False,
183 |         access_key_id: str | None = None,
184 |         secret_key: str | None = None,
185 |         region: str | None = None,
186 |         endpoint_url: str | None = None,
187 |         **kwargs,
188 |     ) -> None:
189 |         super().__init__(
190 |             serializer or JsonSerializer,
191 |             raise_if_not_exists=raise_if_not_exists,
192 |             auto_sub_ext_by_serializer=auto_sub_ext_by_serializer,
193 |         )
194 |         self.bucket_name = bucket_name
195 |         self.endpoint_url = endpoint_url
196 |         self.session = boto3.Session(
197 |             aws_access_key_id=access_key_id,
198 |             aws_secret_access_key=secret_key,
199 |             region_name=region or "us-east1",
200 |             **kwargs,
201 |         )
202 | 
203 |     @property
204 |     def client(self):
205 |         """Returns a resource client for S3 operations."""
206 |         return self.session.client("s3", endpoint_url=self.endpoint_url)
207 | 
208 |     def read(self, path: str, deserialize: bool = False) -> Any:
209 |         """
210 |         Reads data from an S3 object, deserializes it using the provided serializer,
211 |         and returns the deserialized data.
212 | 
213 |         :param path: A local file path for read content from.
214 |         :param deserialize: Deserialize readed file content via serializer.
215 | 
216 |         """
217 |         if self.auto_sub_ext_by_serializer:
218 |             path = self.path_with_ext(path)
219 | 
220 |         try:
221 |             response = self.client.get_object(Bucket=self.bucket_name, Key=path)
222 |         except Exception as exc:
223 |             error_msg = f"Exception while reading file '{path}'. Possible file not exists. Error: {exc}"
224 | 
225 |             if self.raise_if_not_exists:
226 |                 raise FileNotFoundError(error_msg) from exc
227 | 
228 |             logger.debug(error_msg)
229 |             return {}
230 | 
231 |         with response["Body"] as stream:
232 |             data = stream.read()
233 | 
234 |         if deserialize:
235 |             return self.serializer.deserialize(data.decode())
236 |         return data.decode()
237 | 
238 |     def write(self, path: str, data: Any) -> None:
239 |         """
240 |         Serializes the given data using the provided serializer and writes it to an S3 object.
241 | 
242 |         :param path: An local path for write content to.
243 |         :param data: Content that will be written to file.
244 | 
245 |         """
246 |         if self.auto_sub_ext_by_serializer:
247 |             path = self.path_with_ext(path)
248 | 
249 |         self.client.put_object(Bucket=self.bucket_name, Key=path, Body=self.serializer.serialize(data).encode())
250 | 
251 | 
252 | __all__ = [
253 |     "AbstractFileStorageBackend",
254 |     "LocalFileStorageBackend",
255 |     "S3FileStorageBackend",
256 | ]
257 | 


--------------------------------------------------------------------------------
/tracker_exporter/state/factory.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal, Type, TypedDict, Optional
 2 | 
 3 | from redis import Redis
 4 | 
 5 | from tracker_exporter.state.serializers import AbstractSerializer, JsonSerializer
 6 | from tracker_exporter.state.backends import S3FileStorageBackend, LocalFileStorageBackend
 7 | from tracker_exporter.state.managers import FileStateManager, RedisStateManager
 8 | 
 9 | 
10 | class IObjectStorageProps(TypedDict):
11 |     bucket_name: str
12 |     access_key_id: str
13 |     secret_key: str
14 |     region: Optional[str]
15 |     endpoint_url: Optional[str]
16 | 
17 | 
18 | class StateManagerFactory:
19 |     """Factory for easy way to create StateManager."""
20 | 
21 |     @staticmethod
22 |     def create_file_state_manager(
23 |         strategy: Literal["local", "s3"],
24 |         filename: str = "state.json",
25 |         serializer: Type[AbstractSerializer] = JsonSerializer,
26 |         **s3_props: Optional[IObjectStorageProps],
27 |     ) -> FileStateManager:
28 |         match strategy:
29 |             case "local":
30 |                 backend = LocalFileStorageBackend(serializer=serializer, raise_if_not_exists=False)
31 |             case "s3":
32 |                 bucket_name = s3_props["bucket_name"]
33 |                 del s3_props["bucket_name"]
34 | 
35 |                 backend = S3FileStorageBackend(
36 |                     bucket_name, serializer=serializer, raise_if_not_exists=False, **s3_props
37 |                 )
38 |             case _:
39 |                 raise ValueError("Invalid jsonfile strategy, allowed: s3, local")
40 | 
41 |         return FileStateManager(backend, state_file_name=filename)
42 | 
43 |     @staticmethod
44 |     def create_redis_state_manager(
45 |         url: str,
46 |         namespace: str = "tracker_exporter_default",
47 |         serializer: Type[AbstractSerializer] = JsonSerializer,
48 |     ) -> RedisStateManager:
49 |         backend = Redis.from_url(url, decode_responses=True)
50 |         return RedisStateManager(backend, namespace=namespace, serializer=serializer)
51 | 


--------------------------------------------------------------------------------
/tracker_exporter/state/managers.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from contextlib import suppress
  3 | from typing import Any, Type
  4 | 
  5 | from tracker_exporter.state.backends import AbstractFileStorageBackend, AbstractKeyValueStorageBackend
  6 | from tracker_exporter.state.serializers import AbstractSerializer, JsonSerializer
  7 | from tracker_exporter.exceptions import SerializerError
  8 | 
  9 | 
 10 | class AbstractStateManager(ABC):
 11 |     """
 12 |     Abstract class for state storage.
 13 | 
 14 |     Allows user to async save, receive, delete and flush the state.
 15 |     """
 16 | 
 17 |     @abstractmethod
 18 |     async def set(self, key: str, value: Any) -> None:
 19 |         """Abstract method for save key:value pair to storage."""
 20 | 
 21 |     @abstractmethod
 22 |     async def get(self, key: str, default: Any = None) -> Any:
 23 |         """Abstract method for get value by key from storage."""
 24 | 
 25 |     @abstractmethod
 26 |     async def delete(self, key: str) -> None:
 27 |         """Abstract method for delete value by key from storage."""
 28 | 
 29 |     @abstractmethod
 30 |     async def flush(self) -> None:
 31 |         """Abstract method for flush (drop) state from storage."""
 32 | 
 33 | 
 34 | class FileStateManager(AbstractStateManager):
 35 |     """
 36 |     A state manager for handling state persistence in file storage (local, s3 or other).
 37 | 
 38 |     This class provides an abstraction for managing application state data stored within a file.
 39 |     It supports basic CRUD operations such as setting, getting, and deleting state information,
 40 |     utilizing an abstract file storage mechanism.
 41 | 
 42 |     :param storage: The file storage provider for persisting state data.
 43 |     :param state_file_name: The name of the file where state data is stored. Defaults to ``state``.
 44 | 
 45 |     Usage::
 46 | 
 47 |         from datetime import datetime
 48 | 
 49 |         storage_backend = LocalFileStorage()  # also, you can use S3FileStorage
 50 |         state = FileStateManager(storage_backend, state_file_name="my_state")
 51 | 
 52 | 
 53 |         def my_function() -> None:
 54 |             ...
 55 |             last_state = state.get("my_function", default={})
 56 | 
 57 |             if last_state.get("last_run") is None:
 58 |                 new_state = {"last_run": datetime.now().strftime("%Y-%M-%d %H:%M:%S")}
 59 |                 state.set("myfunction", new_state)
 60 | 
 61 |                 ...
 62 | 
 63 |     .. note::
 64 |         The state data is managed as a dictionary (JSON-compatible), allowing for key-value pair manipulation.
 65 |         Other data formats is NOT SUPPORTED.
 66 | 
 67 |     """
 68 | 
 69 |     def __init__(self, storage: AbstractFileStorageBackend, state_file_name: str = "state") -> None:
 70 |         self.storage = storage
 71 |         self.state_file_name = state_file_name
 72 |         self.state = {}
 73 | 
 74 |         self.storage.auto_sub_ext_by_serializer = True
 75 |         self.storage.raise_if_not_exists = False
 76 | 
 77 |     def get(self, key: str, default: Any = None) -> Any:
 78 |         """
 79 |         Get state value by key.
 80 | 
 81 |         :param key: State key.
 82 |         :param default: Default value if specified key not found.
 83 | 
 84 |         """
 85 |         self.state = self.storage.read(self.state_file_name, deserialize=True)
 86 |         return self.state.get(key, default)
 87 | 
 88 |     def set(self, key: str, value: str) -> None:
 89 |         """
 90 |         Set state an value for the key.
 91 | 
 92 |         :param key: State key.
 93 |         :param value: Value to be saved assotiated with key.
 94 | 
 95 |         """
 96 |         self.state = self.storage.read(self.state_file_name, deserialize=True)
 97 |         self.state[key] = value
 98 |         self.storage.write(self.state_file_name, self.state)
 99 | 
100 |     def delete(self, key: str) -> None:
101 |         """
102 | 
103 |         Deletes state (value) by key.
104 | 
105 |         :param key: State key to be deleted.
106 |         """
107 |         self.state = self.storage.read(self.state_file_name, deserialize=True)
108 |         if self.state.get(key) is not None:
109 |             del self.state[key]
110 |             self.storage.write(self.state_file_name, self.state)
111 | 
112 |     def flush(self):
113 |         """Drop all data from state."""
114 |         self.state = {}
115 |         self.storage.write(self.state_file_name, self.state)
116 | 
117 | 
118 | class RedisStateManager(AbstractStateManager):
119 |     """
120 |     A state manager for handling state persistence in the Redis storage.
121 | 
122 |     This class provides an abstraction layer over a Redis storage mechanism, allowing
123 |     for easy setting, getting, and deletion of state information with optional serialization
124 |     support. It uses an underlying key-value storage provider and supports namespacing to
125 |     segregate different state data.
126 | 
127 |     It is recommended to use a JSON-compatible state format, such as a dict, to maintain portability
128 |     between other state managers.
129 | 
130 |     :param storage: The storage provider for persisting state data.
131 |     :param serializer: An optional serializer for converting
132 |                 data to and from the storage format. Defaults to JsonSerializer if not provided.
133 |     :param namespace: A namespace prefix for all keys managed by this instance.
134 |                 Helps in avoiding key collisions. Defaults to ``tracker_exporter_default``.
135 | 
136 |     Usage::
137 | 
138 |         from datetime import datetime
139 |         from redis import Redis
140 | 
141 |         redis = Redis.from_url("redis://localhost:6379", decode_responses=True)
142 |         state = RedisStateManager(redis, namespace="my_namespace")
143 | 
144 | 
145 |         def my_function() -> None:
146 |             ...
147 |             last_state = state.get("my_function", default={})
148 | 
149 |             if last_state.get("last_run") is None:
150 |                 new_state = {"last_run": datetime.now().strftime("%Y-%M-%d %H:%M:%S")}
151 |                 state.set("myfunction", new_state)
152 | 
153 |                 ...
154 | 
155 |     """
156 | 
157 |     def __init__(
158 |         self,
159 |         storage: AbstractKeyValueStorageBackend,
160 |         serializer: Type[AbstractSerializer] | None = None,
161 |         namespace: str = "tracker_exporter_default",
162 |     ) -> None:
163 |         self.storage = storage
164 |         self.serializer = serializer() or JsonSerializer()
165 |         self.namespace = namespace
166 | 
167 |     def _rkey(self, key: str) -> str:
168 |         """Resolve full key path with namespace."""
169 |         return f"{self.namespace}:{key}"
170 | 
171 |     def set(self, key: str, value: Any) -> None:
172 |         """
173 |         Set an value for the state key.
174 | 
175 |         :param key: State key.
176 |         :param value: Value to be saved assotiated with key.
177 | 
178 |         """
179 |         if isinstance(value, dict):
180 |             value = self.serializer.serialize(value)
181 | 
182 |         with self.storage.client() as session:
183 |             session.set(self._rkey(key), value)
184 | 
185 |     def get(self, key: str) -> Any:
186 |         """
187 |         Get state value by key from Redis.
188 | 
189 |         :param key: Key state.
190 |         :param default: Default value if specified key not found.
191 | 
192 |         """
193 |         with self.storage.client() as session:
194 |             value = session.get(self._rkey(key))
195 | 
196 |         with suppress(SerializerError):
197 |             value = self.serializer.deserialize(value)
198 |         return value
199 | 
200 |     def delete(self, key: str) -> None:
201 |         """
202 |         Deletes state (value) by key if exists.
203 | 
204 |         :param key: State key to be deleted.
205 |         """
206 |         with self.storage.client() as session:
207 |             session.delete(self._rkey(key))
208 | 
209 |     def flush(self) -> None:
210 |         """Flush all data in the namespace."""
211 |         raise NotImplementedError
212 | 
213 |     def execute(self, cmd: str, *args, **kwargs) -> Any:
214 |         """
215 |         Common method for execute any Redis supported command.
216 | 
217 |         :param cmd: Redis command to execute.
218 |         """
219 |         with self.storage.client() as session:
220 |             return session.execute_command(cmd, *args, **kwargs)
221 | 
222 | 
223 | __all__ = ["AbstractStateManager", "FileStateManager", "RedisStateManager"]
224 | 


--------------------------------------------------------------------------------
/tracker_exporter/state/serializers.py:
--------------------------------------------------------------------------------
 1 | """This module contains content serializers."""
 2 | 
 3 | import json
 4 | import yaml
 5 | 
 6 | from abc import ABC, abstractmethod
 7 | from typing import Any
 8 | 
 9 | from tracker_exporter.exceptions import SerializerError
10 | 
11 | 
12 | class AbstractSerializer(ABC):
13 |     """
14 |     An abstract serializer like JSON, YAML, etc.
15 | 
16 |     All (de)serialize errors must be raise `SerializerError`.
17 |     """
18 | 
19 |     def __init__(self) -> None:
20 |         self.is_initialized = True
21 | 
22 |     @property
23 |     @abstractmethod
24 |     def ext(self) -> str:
25 |         """Abstract property for returns serializer file extension."""
26 | 
27 |     @abstractmethod
28 |     def serialize(self, data: Any, *args, **kwargs) -> str:
29 |         """Abstract method for serialize data."""
30 | 
31 |     @abstractmethod
32 |     def deserialize(self, data: str, **kwargs) -> Any:
33 |         """Abstract method for deserialize data."""
34 | 
35 | 
36 | class JsonSerializer(AbstractSerializer):
37 |     """
38 |     Serializer for converting between JSON and Python objects.
39 | 
40 |     This serializer handles serialization (Python object to JSON format)
41 |     and deserialization (JSON format to Python object) processes,
42 |     ensuring that data is correctly transformed for JSON storage or
43 |     retrieval while maintaining the Python object's structure.
44 | 
45 |     :raises SerializerError: If an error occurs during the JSON (de)serialization process.
46 |     """
47 | 
48 |     @property
49 |     def ext(self) -> str:
50 |         return "json"
51 | 
52 |     def serialize(self, data: Any, ensure_ascii: bool = False, indent: int = 2, **kwargs) -> str:
53 |         """
54 |         Serialize data to JSON format (str).
55 | 
56 |         :param data: Data that will be serialized to JSON.
57 |         :param ensure_ascii: If ``False``, then the return value can contain non-ASCII characters if they appear in strings contained in obj.
58 |                              Otherwise, all such characters are escaped in JSON strings.
59 |         :param indent: Spaces indent. Defaults: ``2``.
60 | 
61 |         :raises SerializerError: If an error occurs during the JSON serialization process.
62 |         """
63 |         try:
64 |             return json.dumps(data, ensure_ascii=ensure_ascii, indent=indent, **kwargs)
65 |         except (json.JSONDecodeError, TypeError) as exc:
66 |             raise SerializerError(exc) from exc
67 | 
68 |     def deserialize(self, data: str, **kwargs) -> Any:
69 |         """
70 |         Derialize JSON data to Python object format.
71 | 
72 |         :param data: Data that will be deserialized from JSON.
73 | 
74 |         :raises SerializerError: If an error occurs during the JSON deserialization process.
75 |         """
76 |         try:
77 |             return json.loads(data, **kwargs)
78 |         except (json.JSONDecodeError, TypeError) as exc:
79 |             raise SerializerError(exc) from exc
80 | 
81 | 
82 | __all__ = ["AbstractSerializer", "JsonSerializer"]
83 | 


--------------------------------------------------------------------------------
/tracker_exporter/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akimrx/yandex-tracker-exporter/f3e277d4e53eadb99cef855e8eb284c1068b5637/tracker_exporter/utils/__init__.py


--------------------------------------------------------------------------------
/tracker_exporter/utils/helpers.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import os
  3 | import time
  4 | import json
  5 | import logging
  6 | import random
  7 | import pytz
  8 | import psutil
  9 | 
 10 | from functools import wraps
 11 | from typing import Union, Tuple, Type, Callable, Any
 12 | from datetime import datetime, timezone as dt_timezone
 13 | 
 14 | import holidays
 15 | import pandas as pd
 16 | import businesstimedelta
 17 | 
 18 | 
 19 | from yandex_tracker_client.objects import Reference
 20 | from tracker_exporter._typing import DateTimeISO8601Str, DateStr, _Sequence
 21 | from tracker_exporter.models.base import TimeDeltaOut
 22 | from tracker_exporter.config import config
 23 | 
 24 | logger = logging.getLogger(__name__)
 25 | 
 26 | 
 27 | def get_timedelta(end_time: datetime, start_time: datetime, out: TimeDeltaOut = TimeDeltaOut.SECONDS) -> int:
 28 |     """Simple timedelta between dates."""
 29 |     assert isinstance(start_time, datetime)
 30 |     assert isinstance(end_time, datetime)
 31 | 
 32 |     delta = int((end_time - start_time).total_seconds())
 33 |     if out == TimeDeltaOut.MINUTES:
 34 |         return delta // 60
 35 |     if out == TimeDeltaOut.SECONDS:
 36 |         return delta
 37 |     return delta
 38 | 
 39 | 
 40 | def calculate_time_spent(
 41 |     start_date: datetime,
 42 |     end_date: datetime,
 43 |     busdays_only: bool = False,
 44 |     workdays: list = config.workdays,
 45 |     business_hours: Tuple = (
 46 |         config.business_hours_start,
 47 |         config.business_hours_end,
 48 |     ),
 49 | ) -> int:
 50 |     """
 51 |     Calculate timedelta between dates with business days support.
 52 |     Weekdays: Monday is 0, Sunday is 6, so weekends (5, 6) mean (Sat, Sun).
 53 |     Returns: seconds
 54 |     """
 55 |     if not isinstance(start_date, datetime):
 56 |         start_date = pd.to_datetime(start_date)
 57 |     if not isinstance(end_date, datetime):
 58 |         end_date = pd.to_datetime(end_date)
 59 | 
 60 |     holiday_rules = businesstimedelta.HolidayRule(holidays.RU())
 61 |     workday_rules = businesstimedelta.WorkDayRule(
 62 |         start_time=business_hours[0], end_time=business_hours[1], working_days=workdays
 63 |     )
 64 | 
 65 |     if busdays_only:
 66 |         logger.debug(f"Calculating workhours. Business hours: {business_hours}. {start_date}, {end_date}")
 67 |         bt = businesstimedelta.Rules([workday_rules, holiday_rules])
 68 |         result = bt.difference(start_date, end_date).timedelta.total_seconds()
 69 |     else:
 70 |         logger.debug("Calculating regular hours")
 71 |         result = (end_date - start_date).total_seconds()
 72 | 
 73 |     return abs(int(result))
 74 | 
 75 | 
 76 | def fix_null_dates(data: dict) -> dict:
 77 |     """Clean keys with None values from dict."""
 78 |     to_remove = []
 79 | 
 80 |     for key, value in data.items():
 81 |         if key in config.not_nullable_fields and (value is None or value == ""):
 82 |             to_remove.append(key)
 83 | 
 84 |     for key in to_remove:
 85 |         del data[key]
 86 | 
 87 |     return data
 88 | 
 89 | 
 90 | # pylint: disable=R1710
 91 | def validate_resource(resource: object, attribute: str, low: bool = True) -> Any | None:
 92 |     """Validate Yandex.Tracker object attribute and return it if exists."""
 93 |     if hasattr(resource, attribute):
 94 |         _attr = getattr(resource, attribute)
 95 |         if isinstance(_attr, str):
 96 |             if low:
 97 |                 return _attr.lower()
 98 |             return _attr
 99 |         return _attr
100 | 
101 | 
102 | def to_snake_case(text: str) -> str:
103 |     """Convert any string to `snake_case` format."""
104 |     if text is None:
105 |         return None
106 |     if not isinstance(text, str):
107 |         raise ValueError(f"Expected string, received: {type(text)}")
108 |     if text.strip() == "":
109 |         return text.strip()
110 | 
111 |     text = re.sub(r"(?<=[a-zа-яё])(?=[A-ZА-ЯЁ])", "_", text)
112 |     text = re.sub(r"(?<=[a-zа-яё])(?=\d)", "_", text)
113 |     text = re.sub(r"(?<=\d)(?=[a-zа-яё])", "_", text)
114 |     text = re.sub(r"[^a-zA-Zа-яёА-ЯЁ0-9_]", "_", text)
115 | 
116 | 
117 |     return text.lower()
118 | 
119 | 
120 | def convert_datetime(
121 |     dtime: str,
122 |     source_dt_format: str = config.datetime_response_format,
123 |     output_format: str = config.datetime_clickhouse_format,
124 |     date_only: bool = False,
125 |     timezone: str = "UTC",
126 | ) -> DateTimeISO8601Str | DateStr:
127 |     """
128 |     Returns ISO8601 datetime (UTC).
129 |     Or date format `YYYY-MM-DD` from original datetime when date_only passed.
130 |     """
131 |     logger.debug(f"Timezone set to {timezone}")
132 |     if dtime is None:
133 |         return None
134 | 
135 |     dt = datetime.strptime(dtime, source_dt_format)
136 |     if dt.tzinfo is None:
137 |         logger.debug("Replacing datetime tzinfo to UTC")
138 |         dt = dt.replace(tzinfo=dt_timezone.utc)
139 | 
140 |     output_datetime = dt.astimezone(pytz.timezone(timezone))
141 |     if date_only:
142 |         return output_datetime.date().strftime("%Y-%d-%m")
143 | 
144 |     if output_format.endswith("%f"):
145 |         return output_datetime.strftime(output_format)[:-3]
146 |     return output_datetime.strftime(output_format)
147 | 
148 | 
149 | def backoff(
150 |     exceptions: _Sequence[Type[Exception]],
151 |     base_delay: int | float = 0.5,
152 |     expo_factor: int | float = 2.5,
153 |     max_tries: int = 3,
154 |     jitter: bool = False,
155 | ) -> Callable:
156 |     """Decorator for backoff retry function/method calls."""
157 | 
158 |     def retry_decorator(func: Callable):
159 |         @wraps(func)
160 |         def func_retry(*args, **kwargs):
161 |             logger.debug(f"Start func {func.__qualname__} with {max_tries} tries")
162 |             tries, delay = max_tries, base_delay
163 |             counter = 0
164 |             while tries > 0:
165 |                 try:
166 |                     counter += 1
167 |                     return func(*args, **kwargs)
168 |                 except exceptions as err:
169 |                     tries -= 1
170 |                     if tries == 0:
171 |                         logger.error(f"{func.__qualname__} has failed {counter} times")
172 |                         raise
173 |                     logger.warning(
174 |                         f"Error in func {func.__qualname__}, cause: {err}. "
175 |                         f"Retrying ({counter}/{max_tries - 1}) in {delay:.2f}s..."
176 |                     )
177 |                     if jitter:
178 |                         delay = random.uniform(delay / 2, delay * expo_factor)  # nosec
179 |                         time.sleep(delay)
180 |                     else:
181 |                         time.sleep(delay)
182 |                     delay *= expo_factor
183 | 
184 |         return func_retry
185 | 
186 |     return retry_decorator
187 | 
188 | 
189 | def to_human_time(seconds: Union[int, float], verbosity: int = 2) -> str:
190 |     """Convert seconds to human readable timedelta like a `2w 3d 1h 20m`."""
191 |     seconds = int(seconds)
192 |     if seconds == 0:
193 |         return "0s"
194 | 
195 |     negative = False
196 |     if seconds < 0:
197 |         negative = True
198 |         seconds = abs(seconds)
199 | 
200 |     result = []
201 |     intervals = (
202 |         ("y", 31104000),
203 |         ("mo", 2592000),
204 |         ("w", 604800),
205 |         ("d", 86400),
206 |         ("h", 3600),
207 |         ("m", 60),
208 |         ("s", 1),
209 |     )
210 |     for name, count in intervals:
211 |         value = seconds // count
212 |         if value:
213 |             seconds -= value * count
214 |             result.append(f"{value}{name}")
215 |     delta = " ".join(result[:verbosity])
216 |     return f"-{delta}" if negative else delta
217 | 
218 | 
219 | def from_human_time(timestr: str) -> int:
220 |     """Convert a duration string like `2w 3d 1h 20m` to seconds."""
221 | 
222 |     logger.debug(f"Received human time: {timestr}")
223 |     total_seconds = 0
224 |     patterns = [
225 |         (r"(\d+)y", 365 * 24 * 60 * 60),  # years
226 |         (r"(\d+)mo", 30 * 24 * 60 * 60),  # months
227 |         (r"(\d+)w", 7 * 24 * 60 * 60),  # weeks
228 |         (r"(\d+)d", 24 * 60 * 60),  # days
229 |         (r"(\d+)h", 60 * 60),  # hours
230 |         (r"(\d+)m", 60),  # minutes
231 |         (r"(\d+)s", 1),  # seconds
232 |     ]
233 | 
234 |     for pattern, multiplier in patterns:
235 |         matches = re.search(pattern, timestr)
236 |         if matches:
237 |             total_seconds += int(matches.group(1)) * multiplier
238 |             timestr = re.sub(pattern, "", timestr)
239 | 
240 |     timestr = timestr.strip()
241 |     if timestr:
242 |         raise ValueError(f"Invalid format detected in the string: '{timestr}'")
243 | 
244 |     return total_seconds
245 | 
246 | 
247 | def string_normalize(text: str) -> str:
248 |     """Remove all incompatible symbols."""
249 |     emoji_pattern = re.compile(
250 |         "["
251 |         "\U0001F600-\U0001F64F"  # emoticons
252 |         "\U0001F300-\U0001F5FF"  # symbols & pictographs
253 |         "\U0001F680-\U0001F6FF"  # transport & map symbols
254 |         "\U0001F1E0-\U0001F1FF"  # flags (iOS)
255 |         "]+",
256 |         flags=re.UNICODE,
257 |     )
258 |     return emoji_pattern.sub(r"", text)
259 | 
260 | 
261 | def extract_changelog_field(value: Any) -> Any:
262 |     """Extractor for Yandex.Tracker issue changelog."""
263 |     match value:
264 |         case list():
265 |             logger.debug(f"Changelog field is list: {value}")
266 |             return ", ".join(extract_changelog_field(i) for i in value)
267 |         case str():
268 |             logger.debug(f"Changelog field is string: {value}")
269 |             try:
270 |                 dtime = convert_datetime(value)
271 |             except Exception:
272 |                 if len(value) > 100:
273 |                     return "text too long, see history in UI"
274 |                 return value
275 |             else:
276 |                 return dtime
277 |         case dict():
278 |             logger.debug(f"Changelog field is dict, dumping: {value}")
279 |             return json.dumps(value, ensure_ascii=False)
280 |         case None:
281 |             logger.debug(f"Changelog field is None, fixing: {value}")
282 |             return ""
283 |         case int():
284 |             logger.debug(f"Changelog field is integer: {value}")
285 |             return str(value)
286 |         case float():
287 |             logger.debug(f"Changelog field is float: {value}")
288 |             return str(value)
289 |         case Reference():
290 |             logger.debug(f"Changelog field is Reference to object: {value}. Extracting...")
291 |             return (
292 |                 validate_resource(value, "key", low=False)
293 |                 or validate_resource(value, "email")
294 |                 or validate_resource(value, "name", low=False)
295 |                 or validate_resource(value, "id", low=False)
296 |             )
297 |         case _:
298 |             logger.warning(f"Unknown type of changelog field received: {type(value)}: {value}")
299 | 
300 | 
301 | def bytes_to_human(data: int, granularity=2):
302 |     """Convert bytes to human format with binary prefix."""
303 |     _bytes = int(data)
304 |     result = []
305 |     sizes = (  # fmt: off
306 |         ("TB", 1024**4),
307 |         ("GB", 1024**3),
308 |         ("MB", 1024**2),
309 |         ("KB", 1024),
310 |         ("B", 1),
311 |     )  # fmt: on
312 |     if _bytes == 0:
313 |         return 0
314 |     else:
315 |         for name, count in sizes:
316 |             value = _bytes // count
317 |             if value:
318 |                 _bytes -= value * count
319 |                 result.append(f"{value}{name}")
320 |         return ", ".join(result[:granularity])
321 | 
322 | 
323 | def log_etl_stats(iteration: int, remaining: int, elapsed: float, entity: str = "issues"):  # pragma: no cover
324 |     """Logging resources usage."""
325 |     process = psutil.Process(os.getpid())
326 |     memory = process.memory_info()
327 |     memory_rss_usage = bytes_to_human(memory.rss, granularity=1)
328 |     elapsed_time = to_human_time(elapsed)
329 | 
330 |     try:
331 |         avg_time = elapsed / iteration
332 |         avg_task_transform = f"{avg_time:.2f}ms" if avg_time < 1 else to_human_time(avg_time)
333 |     except ZeroDivisionError:
334 |         avg_task_transform = "calculating.."
335 | 
336 |     logger.info(
337 |         f"Processed {iteration} of ~{remaining} {entity}. Avg time per issue: {avg_task_transform}. "
338 |         f"Elapsed time: {elapsed_time}. MEM_RSS_USED: {memory_rss_usage}"
339 |     )
340 | 


--------------------------------------------------------------------------------